16 #define TACHYON_INTERNAL 1 43 if (scene->shader == NULL) {
59 #if defined(USECPUAFFINITY) 67 cpuaffinity = parms->
tid / 4;
70 #if 0 && defined(_ARCH_PPC64) 74 cpuaffinity = parms->
tid;
77 if (cpuaffinity > 0) {
80 if (scene->verbosemode && scene->mynode == 0) {
81 printf(
"Thread[%d] setting affinity to %d\n", parms->
tid, cpuaffinity);
103 #if defined(MPI) && defined(THR) 110 int sched_dynamic = 0;
117 if (scene->ambocc.numsamples > 0 || scene->antialiasing > 4) {
121 sched_dynamic = (getenv(
"SCHED_DYNAMIC") != NULL);
137 #if defined(MPI) && defined(THR) 139 numrowbars = scene->vres;
142 for (row=0; row<numrowbars; row++) {
148 for (thr=0; thr<scene->numthreads; thr++) {
150 parms[thr].
nthr=scene->numthreads;
151 parms[thr].
scene=scene;
156 #if !defined(DISABLEMBOX) 157 (
unsigned long *) calloc(
sizeof(
unsigned long)*scene->objgroup.numobjects + 32, 1);
170 if (scene->nodes == 1) {
172 parms[thr].
stopx = scene->hres;
174 parms[thr].
starty = thr + 1;
175 parms[thr].
stopy = scene->vres;
176 parms[thr].
yinc = scene->numthreads;
178 parms[thr].
startx = thr + 1;
179 parms[thr].
stopx = scene->hres;
180 parms[thr].
xinc = scene->numthreads;
181 parms[thr].
starty = scene->mynode + 1;
182 parms[thr].
stopy = scene->vres;
183 parms[thr].
yinc = scene->nodes;
187 parms[thr].sched_dynamic = sched_dynamic;
188 parms[thr].pixelsched = pixelsched;
191 #if defined(MPI) && defined(THR) 192 parms[thr].numrowbars = numrowbars;
193 parms[thr].rowbars = rowbars;
194 parms[thr].rowsdone = rowsdone;
198 scene->threadparms = (
void *) parms;
199 scene->threads = (
void *) threads;
201 for (thr=1; thr < scene->numthreads; thr++)
214 #if defined(MPI) && defined(THR) 218 if (scene->threads != NULL) {
223 for (thr=1; thr<parms[0].
nthr; thr++)
229 free(scene->threads);
232 if (scene->threadparms != NULL) {
238 for (thr=0; thr < parms[0].
nthr; thr++) {
239 if (parms[thr].local_mbox != NULL)
240 free(parms[thr].local_mbox);
246 free(parms[0].pixelsched);
249 #if defined(MPI) && defined(THR) 251 for (row=0; row<parms[0].numrowbars; row++) {
255 free(parms[0].rowbars);
256 free(parms[0].rowsdone);
259 free(scene->threadparms);
262 scene->threads = NULL;
263 scene->threadparms = NULL;
277 if (scene->verbosemode && scene->mynode == 0) {
283 memset(msgtxt, 0,
sizeof(msgtxt));
284 if ((scene->nodes == 1) && (scene->cpuinfo[0].cpucaps != NULL)) {
287 strcpy(msgtxt,
" CPU features: ");
289 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64)) 291 strcat(msgtxt,
"SSE2 ");
293 strcat(msgtxt,
"SSE4.1 ");
295 strcat(msgtxt,
"AVX ");
297 strcat(msgtxt,
"AVX2 ");
299 strcat(msgtxt,
"FMA ");
301 strcat(msgtxt,
"F16 ");
304 strcat(msgtxt,
"KNL:AVX-512F+CD+ER+PF ");
307 strcat(msgtxt,
"AVX512F ");
309 strcat(msgtxt,
"AVX512CD ");
311 strcat(msgtxt,
"AVX512ER ");
313 strcat(msgtxt,
"AVX512PF ");
317 strcat(msgtxt,
"HT ");
325 #if (defined(__ARM_ARCH_ISA_A64) || defined(__ARM_NEON)) 327 strcat(msgtxt,
"FP ");
329 strcat(msgtxt,
"SVE ");
332 strcat(msgtxt,
"ASIMD ");
334 strcat(msgtxt,
"ASIMDHP ");
336 strcat(msgtxt,
"ASIMDRDM ");
338 strcat(msgtxt,
"ASIMDDP ");
340 strcat(msgtxt,
"ASIMDFHM ");
343 strcat(msgtxt,
"AES ");
345 strcat(msgtxt,
"CRC32 ");
347 strcat(msgtxt,
"SHA1 ");
349 strcat(msgtxt,
"SHA2 ");
351 strcat(msgtxt,
"SHA3 ");
353 strcat(msgtxt,
"SHA512 ");
355 #if defined(VMDCPUDISPATCH) && defined(__ARM_FEATURE_SVE) 358 sprintf(msgtxt,
" ARM64 SVE vector lengths 32-bit: %d, 64-bit: %d",
359 arm_sve_vecsize_32bits(), arm_sve_vecsize_64bits());
369 for (i=0; i<scene->nodes; i++) {
371 " Node %4d: %2d CPUs, CPU Speed %4.2f, Node Speed %6.2f Name: %s",
372 i, scene->cpuinfo[i].numcpus, scene->cpuinfo[i].cpuspeed,
373 scene->cpuinfo[i].nodespeed, scene->cpuinfo[i].machname);
376 totalcpus += scene->cpuinfo[i].numcpus;
377 totalspeed += scene->cpuinfo[i].nodespeed;
380 sprintf(msgtxt,
" Total CPUs: %d", totalcpus);
382 sprintf(msgtxt,
" Total Speed: %f\n", totalspeed);
397 if (scene->cliplist != NULL) {
398 scene->flags |= RT_SHADE_CLIPPING;
402 if (scene->imginternal && (scene->img != NULL)) {
408 if (scene->img == NULL) {
409 scene->imginternal = 1;
410 if (scene->verbosemode && scene->mynode == 0) {
415 if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
416 scene->img = malloc(scene->hres * scene->vres * 3);
417 }
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
418 scene->img = malloc(
sizeof(
float) * scene->hres * scene->vres * 3);
423 if (scene->img == NULL) {
424 scene->imginternal = 0;
429 #if defined(RT_ACCUMULATE_ON) 433 int bufsz =
sizeof(float) * scene->hres * scene->vres * 3;
436 if (scene->accum_buf != NULL) {
437 free(scene->accum_buf);
438 scene->accum_buf = NULL;
441 if (scene->accum_buf == NULL) {
442 scene->accum_buf = calloc(1, bufsz);
444 scene->accum_count = 0;
448 int bufsz =
sizeof(float) * scene->hres * scene->vres * 3;
449 memset(scene->accum_buf, 0, bufsz);
450 scene->accum_count = 0;
470 scene->scenecheck = 0;
477 if (scene->mynode == 0) {
479 sprintf(msgtxt,
"Preprocessing Time: %10.4f seconds",runtime);
496 if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
497 if (scene->imgprocess & RT_IMAGE_NORMALIZE) {
502 if (scene->imgprocess & RT_IMAGE_GAMMA) {
503 gamma_rgb96f(scene->hres, scene->vres, (
float *) scene->img,
507 }
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
508 if (scene->imgprocess & (RT_IMAGE_NORMALIZE | RT_IMAGE_GAMMA))
514 writeimage(scene->outfilename, scene->hres, scene->vres,
515 scene->img, scene->imgbufformat, scene->imgfileformat);
518 if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
521 scene->imgcrop.xres, scene->imgcrop.yres,
522 scene->imgcrop.xstart, scene->imgcrop.ystart);
523 writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
524 imgcrop, scene->imgbufformat, scene->imgfileformat);
526 }
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
527 unsigned char *imgcrop;
529 scene->imgcrop.xres, scene->imgcrop.yres,
530 scene->imgcrop.xstart, scene->imgcrop.ystart);
531 writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
532 imgcrop, scene->imgbufformat, scene->imgfileformat);
541 sprintf(msgtxt,
" Image I/O Time: %10.4f seconds", iotime);
557 if (scene->scenecheck)
560 #if defined(RT_ACCUMULATE_ON) 563 int bufsz =
sizeof(float) * scene->hres * scene->vres * 3;
564 memset(scene->accum_buf, 0, bufsz);
565 scene->accum_count = 0;
568 scene->accum_count++;
571 if (scene->mynode == 0)
594 #if defined(MPI) && defined(THR) 628 if (scene->mynode == 0) {
633 sprintf(msgtxt,
"\n Ray Tracing Time: %10.4f seconds", runtime);
636 if (scene->writeimagefile)
#define RT_CROP_DISABLED
Image cropping disabled.
int stopx
ending X pixel index
#define CPU_ARM64_AES
AES insns avail.
double rt_timer_time(rt_timerhandle v)
void rt_par_barrier_sync(rt_parhandle voidhandle)
static void renderio(scenedef *scene)
void rt_timer_destroy(rt_timerhandle v)
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive
#define CPU_ARM64_CRC32
CRC32 insns avail.
#define CPU_ARM64_FP
FP insns avail.
#define CPU_ARM64_SVE
Scalable Vector Extns avail.
int nthr
total number of worker threads
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive
void rt_timer_start(rt_timerhandle v)
scenedef * scene
scene handle
#define CPU_ARM64_SHA2
SHA-2 insns avail.
#define CPU_ARM64_ASIMD
Advanced SIMD avail.
rt_barrier_t * runbar
sleeping thread pool barrier
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system)
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive
#define CPU_HT
x86 Hyperthreading detected
#define CPU_SSE2
SSE2 SIMD avail.
void rt_ui_message(int level, char *msg)
static void rendercheck(scenedef *scene)
unsigned char * image_crop_rgb24(int xres, int yres, unsigned char *img, int szx, int szy, int sx, int sy)
void rt_par_start_scanlinereceives(rt_parhandle voidparhandle, rt_parbuf voidhandle)
#define CPU_AVX
AVX SIMD avail.
int starty
starting Y pixel index
#define CPU_ARM64_SHA512
SHA-512 insns avail.
void rt_timer_stop(rt_timerhandle v)
#define CPU_ARM64_SHA1
SHA-1 insns avail.
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable
#define RT_ACCUMULATE_ON
accum.
void rt_par_waitscanlines(rt_parhandle voidparhandle, rt_parbuf voidhandle)
Tachyon cross-platform thread creation and management, atomic operations, and CPU feature query APIs...
static void rt_autoshader(scenedef *scene)
int startx
starting X pixel index
#define CPU_ARM64_ASIMDRDM
Advanced SIMD RDM avail.
int writeimage(char *name, int xres, int yres, void *img, int imgbufferformat, int fileformat)
double flt
generic floating point number, using double
#define RT_BOUNDING_ENABLED
Enable spatial subdivision/bounding.
void * thread_trace(thr_parms *t)
#define CPU_ARM64_ASIMDHP
Advanced SIMD HP avail.
#define RT_ACCUMULATE_CLEAR
accum.
#define CPU_ARM64_SHA3
SHA-3 insns avail.
unsigned long * local_mbox
grid acceleration mailbox structure
void * thread_worker(void *voidparms)
#define CPU_AVX512CD
AVX-512CD SIMD avail.
Tachyon cross-platform timers, special math function wrappers, and RNGs.
#define CPU_AVX2
AVX2 SIMD avail.
color full_shader(ray *incident)
#define CPU_AVX512F
AVX-512F SIMD avail.
#define CPU_AVX512ER
AVX-512ER SIMD avail.
void normalize_rgb96f(int xres, int yres, float *fimg)
#define CPU_ARM64_ASIMDDP
Advanced SIMD DP avail.
atomic int structure with padding to prevent false sharing
void create_render_threads(scenedef *scene)
int engrid_scene(scenedef *scene, int boundthresh)
int rt_thread_join(rt_thread_t thr, void **stat)
join (wait for completion of, and merge with) a thread
void camera_init(scenedef *scene)
unsigned long serialno
ray mailbox test serial number
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable
#define CPU_FMA
FMA insns avail.
#define CPU_KNL
Intel KNL.
barrier sync object with padding to prevent false sharing
#define CPU_AVX512PF
AVX-512PF SIMD avail.
void renderscene(scenedef *scene)
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable
#define CPU_SSE4_1
SSE4.1 SIMD avail.
void * rt_par_init_scanlinereceives(rt_parhandle voidhandle, scenedef *scene)
rt_timerhandle rt_timer_create(void)
void rt_ui_progress(int percent)
float * image_crop_rgb96f(int xres, int yres, float *fimg, int szx, int szy, int sx, int sy)
Tachyon public API function prototypes and declarations used to drive the ray tracing engine...
void gamma_rgb96f(int xres, int yres, float *fimg, float gamma)
#define CPU_HYPERVISOR
VM/Hypervisor environment.
#define CPU_F16C
F16C insns avail.
int tid
worker thread index
int stopy
ending Y pixel index
#define CPU_ARM64_ASIMDFHM
Advanced SIMD FHM avail.
int rt_thread_create(rt_thread_t *thr, void *fctn(void *), void *arg)
create a new child thread
void destroy_render_threads(scenedef *scene)