16 #define TACHYON_INTERNAL 1    43   if (scene->shader == NULL) {
    59 #if defined(USECPUAFFINITY)    67   cpuaffinity = parms->
tid / 4;
    70 #if 0 && defined(_ARCH_PPC64)     74   cpuaffinity = parms->
tid;
    77   if (cpuaffinity > 0) {
    80     if (scene->verbosemode && scene->mynode == 0) {
    81       printf(
"Thread[%d] setting affinity to %d\n", parms->
tid, cpuaffinity);
   103 #if defined(MPI) && defined(THR)   110   int sched_dynamic = 0; 
   117   if (scene->ambocc.numsamples > 0 || scene->antialiasing > 4) {
   121   sched_dynamic = (getenv(
"SCHED_DYNAMIC") != NULL);
   137 #if defined(MPI) && defined(THR)   139   numrowbars = scene->vres;
   142   for (row=0; row<numrowbars; row++) {
   148   for (thr=0; thr<scene->numthreads; thr++) {
   150     parms[thr].
nthr=scene->numthreads;
   151     parms[thr].
scene=scene;
   156 #if !defined(DISABLEMBOX)   157       (
unsigned long *) calloc(
sizeof(
unsigned long)*scene->objgroup.numobjects + 32, 1);
   170     if (scene->nodes == 1) {
   172       parms[thr].
stopx  = scene->hres;
   174       parms[thr].
starty = thr + 1;
   175       parms[thr].
stopy  = scene->vres;
   176       parms[thr].
yinc   = scene->numthreads;
   178       parms[thr].
startx = thr + 1;
   179       parms[thr].
stopx  = scene->hres;
   180       parms[thr].
xinc   = scene->numthreads;
   181       parms[thr].
starty = scene->mynode + 1;
   182       parms[thr].
stopy  = scene->vres;
   183       parms[thr].
yinc   = scene->nodes;
   187     parms[thr].sched_dynamic = sched_dynamic;
   188     parms[thr].pixelsched = pixelsched;
   191 #if defined(MPI) && defined(THR)   192     parms[thr].numrowbars = numrowbars;
   193     parms[thr].rowbars = rowbars;
   194     parms[thr].rowsdone = rowsdone;
   198   scene->threadparms = (
void *) parms;
   199   scene->threads = (
void *) threads;
   201   for (thr=1; thr < scene->numthreads; thr++) 
   214 #if defined(MPI) && defined(THR)   218   if (scene->threads != NULL) {
   223     for (thr=1; thr<parms[0].
nthr; thr++) 
   229     free(scene->threads);
   232   if (scene->threadparms != NULL) {
   238     for (thr=0; thr < parms[0].
nthr; thr++) {
   239       if (parms[thr].local_mbox != NULL) 
   240         free(parms[thr].local_mbox);
   246     free(parms[0].pixelsched);
   249 #if defined(MPI) && defined(THR)   251     for (row=0; row<parms[0].numrowbars; row++) {
   255     free(parms[0].rowbars);
   256     free(parms[0].rowsdone);
   259     free(scene->threadparms);
   262   scene->threads = NULL;
   263   scene->threadparms = NULL;
   277   if (scene->verbosemode && scene->mynode == 0) {
   283     memset(msgtxt, 0, 
sizeof(msgtxt));
   284     if ((scene->nodes == 1) && (scene->cpuinfo[0].cpucaps != NULL)) {
   287       strcpy(msgtxt, 
"  CPU features: ");
   289 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))   291         strcat(msgtxt, 
"SSE2 ");
   293         strcat(msgtxt, 
"SSE4.1 ");
   295         strcat(msgtxt, 
"AVX ");
   297         strcat(msgtxt, 
"AVX2 ");
   299         strcat(msgtxt, 
"FMA ");
   301         strcat(msgtxt, 
"F16 ");
   304         strcat(msgtxt, 
"KNL:AVX-512F+CD+ER+PF ");
   307           strcat(msgtxt, 
"AVX512F ");
   309           strcat(msgtxt, 
"AVX512CD ");
   311           strcat(msgtxt, 
"AVX512ER ");
   313           strcat(msgtxt, 
"AVX512PF ");
   317         strcat(msgtxt, 
"HT ");
   325 #if (defined(__ARM_ARCH_ISA_A64) || defined(__ARM_NEON))   327         strcat(msgtxt, 
"FP ");
   329         strcat(msgtxt, 
"SVE ");
   332         strcat(msgtxt, 
"ASIMD ");
   334         strcat(msgtxt, 
"ASIMDHP ");
   336         strcat(msgtxt, 
"ASIMDRDM ");
   338         strcat(msgtxt, 
"ASIMDDP ");
   340         strcat(msgtxt, 
"ASIMDFHM ");
   343         strcat(msgtxt, 
"AES ");
   345         strcat(msgtxt, 
"CRC32 ");
   347         strcat(msgtxt, 
"SHA1 ");
   349         strcat(msgtxt, 
"SHA2 ");
   351         strcat(msgtxt, 
"SHA3 ");
   353         strcat(msgtxt, 
"SHA512 ");
   355 #if defined(VMDCPUDISPATCH) && defined(__ARM_FEATURE_SVE)   358         sprintf(msgtxt, 
"  ARM64 SVE vector lengths  32-bit: %d,  64-bit: %d",
   359                 arm_sve_vecsize_32bits(), arm_sve_vecsize_64bits());
   369     for (i=0; i<scene->nodes; i++) {
   371             "  Node %4d: %2d CPUs, CPU Speed %4.2f, Node Speed %6.2f Name: %s",
   372             i, scene->cpuinfo[i].numcpus, scene->cpuinfo[i].cpuspeed,
   373             scene->cpuinfo[i].nodespeed, scene->cpuinfo[i].machname);
   376       totalcpus += scene->cpuinfo[i].numcpus;
   377       totalspeed += scene->cpuinfo[i].nodespeed;
   380     sprintf(msgtxt, 
"  Total CPUs: %d", totalcpus);
   382     sprintf(msgtxt, 
"  Total Speed: %f\n", totalspeed);
   397   if (scene->cliplist != NULL) {
   398     scene->flags |= RT_SHADE_CLIPPING;
   402   if (scene->imginternal && (scene->img != NULL)) {
   408   if (scene->img == NULL) {
   409     scene->imginternal = 1;
   410     if (scene->verbosemode && scene->mynode == 0) { 
   415     if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
   416       scene->img = malloc(scene->hres * scene->vres * 3);
   417     } 
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
   418       scene->img = malloc(
sizeof(
float) * scene->hres * scene->vres * 3);
   423     if (scene->img == NULL) {
   424       scene->imginternal = 0;
   429 #if defined(RT_ACCUMULATE_ON)   433     int bufsz = 
sizeof(float) * scene->hres * scene->vres * 3;
   436     if (scene->accum_buf != NULL) {
   437       free(scene->accum_buf);
   438       scene->accum_buf = NULL;
   441     if (scene->accum_buf == NULL) {
   442       scene->accum_buf = calloc(1, bufsz);  
   444       scene->accum_count = 0;               
   448       int bufsz = 
sizeof(float) * scene->hres * scene->vres * 3;
   449       memset(scene->accum_buf, 0, bufsz);   
   450       scene->accum_count = 0;               
   470   scene->scenecheck = 0;
   477   if (scene->mynode == 0) {
   479     sprintf(msgtxt, 
"Preprocessing Time: %10.4f seconds",runtime);
   496   if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
   497     if (scene->imgprocess & RT_IMAGE_NORMALIZE) {
   502     if (scene->imgprocess & RT_IMAGE_GAMMA) {
   503       gamma_rgb96f(scene->hres, scene->vres, (
float *) scene->img, 
   507   } 
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
   508     if (scene->imgprocess & (RT_IMAGE_NORMALIZE | RT_IMAGE_GAMMA))
   514     writeimage(scene->outfilename, scene->hres, scene->vres, 
   515                scene->img, scene->imgbufformat, scene->imgfileformat);
   518     if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
   521                                   scene->imgcrop.xres, scene->imgcrop.yres, 
   522                                   scene->imgcrop.xstart, scene->imgcrop.ystart);
   523       writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
   524                  imgcrop, scene->imgbufformat, scene->imgfileformat);
   526     } 
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
   527       unsigned char *imgcrop;
   529                                  scene->imgcrop.xres, scene->imgcrop.yres, 
   530                                  scene->imgcrop.xstart, scene->imgcrop.ystart);
   531       writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
   532                  imgcrop, scene->imgbufformat, scene->imgfileformat);
   541   sprintf(msgtxt, 
"    Image I/O Time: %10.4f seconds", iotime);
   557   if (scene->scenecheck)
   560 #if defined(RT_ACCUMULATE_ON)   563     int bufsz = 
sizeof(float) * scene->hres * scene->vres * 3;
   564     memset(scene->accum_buf, 0, bufsz);   
   565     scene->accum_count = 0;               
   568   scene->accum_count++;               
   571   if (scene->mynode == 0) 
   594 #if defined(MPI) && defined(THR)   628   if (scene->mynode == 0) {
   633     sprintf(msgtxt, 
"\n  Ray Tracing Time: %10.4f seconds", runtime);
   636     if (scene->writeimagefile) 
 #define RT_CROP_DISABLED
Image cropping disabled. 
 
int stopx
ending X pixel index 
 
#define CPU_ARM64_AES
AES insns avail. 
 
double rt_timer_time(rt_timerhandle v)
 
void rt_par_barrier_sync(rt_parhandle voidhandle)
 
static void renderio(scenedef *scene)
 
void rt_timer_destroy(rt_timerhandle v)
 
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive 
 
#define CPU_ARM64_CRC32
CRC32 insns avail. 
 
#define CPU_ARM64_FP
FP insns avail. 
 
#define CPU_ARM64_SVE
Scalable Vector Extns avail. 
 
int nthr
total number of worker threads 
 
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive 
 
void rt_timer_start(rt_timerhandle v)
 
scenedef * scene
scene handle 
 
#define CPU_ARM64_SHA2
SHA-2 insns avail. 
 
#define CPU_ARM64_ASIMD
Advanced SIMD avail. 
 
rt_barrier_t * runbar
sleeping thread pool barrier 
 
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system) 
 
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive 
 
#define CPU_HT
x86 Hyperthreading detected 
 
#define CPU_SSE2
SSE2 SIMD avail. 
 
void rt_ui_message(int level, char *msg)
 
static void rendercheck(scenedef *scene)
 
unsigned char * image_crop_rgb24(int xres, int yres, unsigned char *img, int szx, int szy, int sx, int sy)
 
void rt_par_start_scanlinereceives(rt_parhandle voidparhandle, rt_parbuf voidhandle)
 
#define CPU_AVX
AVX SIMD avail. 
 
int starty
starting Y pixel index 
 
#define CPU_ARM64_SHA512
SHA-512 insns avail. 
 
void rt_timer_stop(rt_timerhandle v)
 
#define CPU_ARM64_SHA1
SHA-1 insns avail. 
 
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable 
 
#define RT_ACCUMULATE_ON
accum. 
 
void rt_par_waitscanlines(rt_parhandle voidparhandle, rt_parbuf voidhandle)
 
Tachyon cross-platform thread creation and management, atomic operations, and CPU feature query APIs...
 
static void rt_autoshader(scenedef *scene)
 
int startx
starting X pixel index 
 
#define CPU_ARM64_ASIMDRDM
Advanced SIMD RDM avail. 
 
int writeimage(char *name, int xres, int yres, void *img, int imgbufferformat, int fileformat)
 
double flt
generic floating point number, using double 
 
#define RT_BOUNDING_ENABLED
Enable spatial subdivision/bounding. 
 
void * thread_trace(thr_parms *t)
 
#define CPU_ARM64_ASIMDHP
Advanced SIMD HP avail. 
 
#define RT_ACCUMULATE_CLEAR
accum. 
 
#define CPU_ARM64_SHA3
SHA-3 insns avail. 
 
unsigned long * local_mbox
grid acceleration mailbox structure 
 
void * thread_worker(void *voidparms)
 
#define CPU_AVX512CD
AVX-512CD SIMD avail. 
 
Tachyon cross-platform timers, special math function wrappers, and RNGs. 
 
#define CPU_AVX2
AVX2 SIMD avail. 
 
color full_shader(ray *incident)
 
#define CPU_AVX512F
AVX-512F SIMD avail. 
 
#define CPU_AVX512ER
AVX-512ER SIMD avail. 
 
void normalize_rgb96f(int xres, int yres, float *fimg)
 
#define CPU_ARM64_ASIMDDP
Advanced SIMD DP avail. 
 
atomic int structure with padding to prevent false sharing 
 
void create_render_threads(scenedef *scene)
 
int engrid_scene(scenedef *scene, int boundthresh)
 
int rt_thread_join(rt_thread_t thr, void **stat)
join (wait for completion of, and merge with) a thread 
 
void camera_init(scenedef *scene)
 
unsigned long serialno
ray mailbox test serial number 
 
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable 
 
#define CPU_FMA
FMA insns avail. 
 
#define CPU_KNL
Intel KNL. 
 
barrier sync object with padding to prevent false sharing 
 
#define CPU_AVX512PF
AVX-512PF SIMD avail. 
 
void renderscene(scenedef *scene)
 
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable 
 
#define CPU_SSE4_1
SSE4.1 SIMD avail. 
 
void * rt_par_init_scanlinereceives(rt_parhandle voidhandle, scenedef *scene)
 
rt_timerhandle rt_timer_create(void)
 
void rt_ui_progress(int percent)
 
float * image_crop_rgb96f(int xres, int yres, float *fimg, int szx, int szy, int sx, int sy)
 
Tachyon public API function prototypes and declarations used to drive the ray tracing engine...
 
void gamma_rgb96f(int xres, int yres, float *fimg, float gamma)
 
#define CPU_HYPERVISOR
VM/Hypervisor environment. 
 
#define CPU_F16C
F16C insns avail. 
 
int tid
worker thread index 
 
int stopy
ending Y pixel index 
 
#define CPU_ARM64_ASIMDFHM
Advanced SIMD FHM avail. 
 
int rt_thread_create(rt_thread_t *thr, void *fctn(void *), void *arg)
create a new child thread 
 
void destroy_render_threads(scenedef *scene)