Tachyon (current)  Current Main Branch
render.c
Go to the documentation of this file.
1 /*
2  * render.c - This file contains the main program and driver for the raytracer.
3  *
4  * (C) Copyright 1994-2022 John E. Stone
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * $Id: render.c,v 1.123 2022/02/18 17:55:28 johns Exp $
8  *
9  */
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <math.h>
15 
16 #define TACHYON_INTERNAL 1
17 #include "tachyon.h"
18 #include "macros.h"
19 #include "threads.h"
20 #include "parallel.h"
21 #include "imageio.h"
22 #include "trace.h"
23 #include "render.h"
24 #include "util.h"
25 #include "shade.h"
26 #include "ui.h"
27 #include "grid.h"
28 #include "camera.h"
29 #include "intersect.h"
30 
31 /*
32  * Determine which shader to use based on the list of capabilities
33  * needed to render the scene at full quality. Ideally we'll avoid
34  * using anything more sophisticated than is actually needed to render
35  * a scene.
36  */
37 static void rt_autoshader(scenedef * scene) {
38  /*
39  * If the user has already specified a particular shader
40  * then we use what they asked for, otherwise we determine
41  * which shader to use ourselves.
42  */
43  if (scene->shader == NULL) {
44  /* No logic yet, just use max quality */
45  scene->shader = (color (*)(void *)) full_shader;
46  }
47 }
48 
49 
50 /*
51  * All of the threads in the pool wait on a barrier until
52  * they are told to wake up and do some work. At present,
53  * the only actions they can take are to render the scene
54  * or to terminate be returning to the master.
55  */
56 void * thread_worker(void * voidparms) {
57  thr_parms * parms = (thr_parms *) voidparms;
58 
59 #if defined(USECPUAFFINITY)
60  /* Optionally set CPU affinity mask for each thread */
61  int cpuaffinity = -1;
62 
63 #if defined(__MIC__)
64  /* On the MIC platform, we want 4 threads per CPU, with a hard-coded */
65  /* mapping that puts neighboring workers on neighboring CPUs with the */
66  /* hope of better L1/L2 cache sharing */
67  cpuaffinity = parms->tid / 4;
68 #endif
69 
70 #if 0 && defined(_ARCH_PPC64)
71  /* On POWER7/8 platforms, CPUs are numbered sequentially including */
72  /* indices for all per-core SMT threads which may be enabled/disabled */
73  /* in a consecutive sequence. */
74  cpuaffinity = parms->tid;
75 #endif
76 
77  if (cpuaffinity > 0) {
78  rt_thread_set_self_cpuaffinity(cpuaffinity);
79 #if 0
80  if (scene->verbosemode && scene->mynode == 0) {
81  printf("Thread[%d] setting affinity to %d\n", parms->tid, cpuaffinity);
82  }
83 #endif
84  }
85 #endif
86 
87  while (rt_thread_barrier(parms->runbar, 0)) {
88  thread_trace(parms);
89  }
90  return NULL;
91 }
92 
93 
94 /*
95  * Create the pool of rendering threads, initialize all of the
96  * state variables they need, and start them waiting on the barrier.
97  */
98 void create_render_threads(scenedef * scene) {
99  int thr;
100  thr_parms * parms;
101  rt_thread_t * threads;
102  rt_barrier_t * bar;
103 #if defined(MPI) && defined(THR)
104  int row, numrowbars;
105  rt_atomic_int_t * rowbars;
106  rt_atomic_int_t * rowsdone;
107 #endif
108 #if defined(THR)
109  rt_atomic_int_t * pixelsched;
110  int sched_dynamic = 0; /* leave dynamic pixel scheduling off by default */
111 
112 #if 1
113  /* determine whether to enable dynamic pixel scheduling based */
114  /* on whether the scene uses any particularly costly rendering */
115  /* features such as ambient occlusion lighting, or greater than */
116  /* 4-samples per-pixel antialiasing... */
117  if (scene->ambocc.numsamples > 0 || scene->antialiasing > 4) {
118  sched_dynamic = 1;
119  }
120 #else
121  sched_dynamic = (getenv("SCHED_DYNAMIC") != NULL);
122 #endif
123 #endif
124 
125  /* allocate and initialize thread parameter buffers */
126  threads = (rt_thread_t *) malloc(scene->numthreads * sizeof(rt_thread_t));
127  parms = (thr_parms *) malloc(scene->numthreads * sizeof(thr_parms));
128 
129  bar = rt_thread_barrier_init(scene->numthreads);
130 
131 #if defined(THR)
132  /* initialize atomic pixel scheduler used for dynamic load balancing */
133  pixelsched = (rt_atomic_int_t *) calloc(1, sizeof(rt_atomic_int_t));
134  rt_atomic_int_init(pixelsched, 0);
135 #endif
136 
137 #if defined(MPI) && defined(THR)
138  /* initialize row barriers for MPI builds */
139  numrowbars = scene->vres;
140  rowbars = (rt_atomic_int_t *) calloc(1, numrowbars * sizeof(rt_atomic_int_t));
141  rowsdone = (rt_atomic_int_t *) calloc(1, sizeof(rt_atomic_int_t));
142  for (row=0; row<numrowbars; row++) {
143  rt_atomic_int_init(&rowbars[row], 0);
144  }
145  rt_atomic_int_init(rowsdone, 0);
146 #endif
147 
148  for (thr=0; thr<scene->numthreads; thr++) {
149  parms[thr].tid=thr;
150  parms[thr].nthr=scene->numthreads;
151  parms[thr].scene=scene;
152 
153  /* the sizes of these arrays are padded to avoid cache aliasing */
154  /* and false sharing between threads. */
155  parms[thr].local_mbox =
156 #if !defined(DISABLEMBOX)
157  (unsigned long *) calloc(sizeof(unsigned long)*scene->objgroup.numobjects + 32, 1);
158 #else
159  NULL;
160 #endif
161 
162  parms[thr].serialno = 1;
163  parms[thr].runbar = bar;
164 
165  /* For a threads-only build (or MPI nodes == 1), we distribute */
166  /* work round-robin by scanlines. For MPI-only builds, we also */
167  /* distribute by scanlines. For mixed MPI+threads builds, we */
168  /* distribute work to nodes by scanline, and to the threads */
169  /* within a node on a pixel-by-pixel basis. */
170  if (scene->nodes == 1) {
171  parms[thr].startx = 1;
172  parms[thr].stopx = scene->hres;
173  parms[thr].xinc = 1;
174  parms[thr].starty = thr + 1;
175  parms[thr].stopy = scene->vres;
176  parms[thr].yinc = scene->numthreads;
177  } else {
178  parms[thr].startx = thr + 1;
179  parms[thr].stopx = scene->hres;
180  parms[thr].xinc = scene->numthreads;
181  parms[thr].starty = scene->mynode + 1;
182  parms[thr].stopy = scene->vres;
183  parms[thr].yinc = scene->nodes;
184  }
185 
186 #if defined(THR)
187  parms[thr].sched_dynamic = sched_dynamic;
188  parms[thr].pixelsched = pixelsched;
189 #endif
190 
191 #if defined(MPI) && defined(THR)
192  parms[thr].numrowbars = numrowbars;
193  parms[thr].rowbars = rowbars;
194  parms[thr].rowsdone = rowsdone;
195 #endif
196  }
197 
198  scene->threadparms = (void *) parms;
199  scene->threads = (void *) threads;
200 
201  for (thr=1; thr < scene->numthreads; thr++)
202  rt_thread_create(&threads[thr], thread_worker, (void *) (&parms[thr]));
203 
204 }
205 
206 
207 /*
208  * Shutdown all of the worker threads and free up their resources
209  */
210 void destroy_render_threads(scenedef * scene) {
211  thr_parms * parms = (thr_parms *) scene->threadparms;
212  rt_thread_t * threads = (rt_thread_t *) scene->threads;
213  int thr;
214 #if defined(MPI) && defined(THR)
215  int row;
216 #endif
217 
218  if (scene->threads != NULL) {
219  /* wake up sleepers and tell them to exit */
220  rt_thread_barrier(parms[0].runbar, 0);
221 
222  /* wait for all sleepers to exit */
223  for (thr=1; thr<parms[0].nthr; thr++)
224  rt_thread_join(threads[thr], NULL);
225 
226  /* destroy the thread barrier */
227  rt_thread_barrier_destroy(parms[0].runbar);
228 
229  free(scene->threads);
230  }
231 
232  if (scene->threadparms != NULL) {
233  /* deallocate thread parameter buffers
234  * NOTE: This has to use the remembered number of threads stored in the
235  * thread parameter area for thread 0, since the one in the scene
236  * may have changed on us.
237  */
238  for (thr=0; thr < parms[0].nthr; thr++) {
239  if (parms[thr].local_mbox != NULL)
240  free(parms[thr].local_mbox);
241  }
242 
243 #if defined(THR)
244  /* destroy the atomic pixel scheduler counter */
245  rt_atomic_int_destroy(parms[0].pixelsched);
246  free(parms[0].pixelsched);
247 #endif
248 
249 #if defined(MPI) && defined(THR)
250  /* destroy and free row barriers for MPI builds */
251  for (row=0; row<parms[0].numrowbars; row++) {
252  rt_atomic_int_destroy(&parms[0].rowbars[row]);
253  }
254  rt_atomic_int_destroy(parms[0].rowsdone);
255  free(parms[0].rowbars);
256  free(parms[0].rowsdone);
257 #endif
258 
259  free(scene->threadparms);
260  }
261 
262  scene->threads = NULL;
263  scene->threadparms = NULL;
264 }
265 
266 
267 
268 /*
269  * Check the scene to determine whether or not any parameters that affect
270  * the thread pool, the persistent message passing primitives, or other
271  * infrastructure needs to be reconfigured before rendering commences.
272  */
273 static void rendercheck(scenedef * scene) {
274  flt runtime;
275  rt_timerhandle stth; /* setup time timer handle */
276 
277  if (scene->verbosemode && scene->mynode == 0) {
278  char msgtxt[1024];
279  int i, totalcpus;
280  flt totalspeed;
281 
282  rt_ui_message(MSG_0, "CPU Information:");
283  memset(msgtxt, 0, sizeof(msgtxt));
284  if ((scene->nodes == 1) && (scene->cpuinfo[0].cpucaps != NULL)) {
285  rt_cpu_caps_t *cpucaps = (rt_cpu_caps_t *) scene->cpuinfo[0].cpucaps;
286 
287  strcpy(msgtxt, " CPU features: ");
288 
289 #if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_AMD64))
290  if (cpucaps->flags & CPU_SSE2)
291  strcat(msgtxt, "SSE2 ");
292  if (cpucaps->flags & CPU_SSE4_1)
293  strcat(msgtxt, "SSE4.1 ");
294  if (cpucaps->flags & CPU_AVX)
295  strcat(msgtxt, "AVX ");
296  if (cpucaps->flags & CPU_AVX2)
297  strcat(msgtxt, "AVX2 ");
298  if (cpucaps->flags & CPU_FMA)
299  strcat(msgtxt, "FMA ");
300  if (cpucaps->flags & CPU_F16C)
301  strcat(msgtxt, "F16 ");
302 
303  if ((cpucaps->flags & CPU_KNL) == CPU_KNL) {
304  strcat(msgtxt, "KNL:AVX-512F+CD+ER+PF ");
305  } else {
306  if (cpucaps->flags & CPU_AVX512F)
307  strcat(msgtxt, "AVX512F ");
308  if (cpucaps->flags & CPU_AVX512CD)
309  strcat(msgtxt, "AVX512CD ");
310  if (cpucaps->flags & CPU_AVX512ER)
311  strcat(msgtxt, "AVX512ER ");
312  if (cpucaps->flags & CPU_AVX512PF)
313  strcat(msgtxt, "AVX512PF ");
314  }
315 
316  if (cpucaps->flags & CPU_HT)
317  strcat(msgtxt, "HT ");
318 
319  if (cpucaps->flags & CPU_HYPERVISOR) {
320  rt_ui_message(MSG_0, msgtxt);
321  rt_ui_message(MSG_0, " Detected VM or hypervisor execution environment");
322  }
323 #endif
324 
325 #if (defined(__ARM_ARCH_ISA_A64) || defined(__ARM_NEON))
326  if (cpucaps->flags & CPU_ARM64_FP)
327  strcat(msgtxt, "FP ");
328  if (cpucaps->flags & CPU_ARM64_SVE)
329  strcat(msgtxt, "SVE ");
330 
331  if (cpucaps->flags & CPU_ARM64_ASIMD)
332  strcat(msgtxt, "ASIMD ");
333  if (cpucaps->flags & CPU_ARM64_ASIMDHP)
334  strcat(msgtxt, "ASIMDHP ");
335  if (cpucaps->flags & CPU_ARM64_ASIMDRDM)
336  strcat(msgtxt, "ASIMDRDM ");
337  if (cpucaps->flags & CPU_ARM64_ASIMDDP)
338  strcat(msgtxt, "ASIMDDP ");
339  if (cpucaps->flags & CPU_ARM64_ASIMDFHM)
340  strcat(msgtxt, "ASIMDFHM ");
341 
342  if (cpucaps->flags & CPU_ARM64_AES)
343  strcat(msgtxt, "AES ");
344  if (cpucaps->flags & CPU_ARM64_CRC32)
345  strcat(msgtxt, "CRC32 ");
346  if (cpucaps->flags & CPU_ARM64_SHA1)
347  strcat(msgtxt, "SHA1 ");
348  if (cpucaps->flags & CPU_ARM64_SHA2)
349  strcat(msgtxt, "SHA2 ");
350  if (cpucaps->flags & CPU_ARM64_SHA3)
351  strcat(msgtxt, "SHA3 ");
352  if (cpucaps->flags & CPU_ARM64_SHA512)
353  strcat(msgtxt, "SHA512 ");
354 
355 #if defined(VMDCPUDISPATCH) && defined(__ARM_FEATURE_SVE)
356  if (cpucaps->flags & CPU_ARM64_SVE) {
357  rt_ui_message(MSG_0, msgtxt);
358  sprintf(msgtxt, " ARM64 SVE vector lengths 32-bit: %d, 64-bit: %d",
359  arm_sve_vecsize_32bits(), arm_sve_vecsize_64bits());
360  }
361 #endif
362 #endif
363 
364  rt_ui_message(MSG_0, msgtxt);
365  }
366 
367  totalspeed = 0.0;
368  totalcpus = 0;
369  for (i=0; i<scene->nodes; i++) {
370  sprintf(msgtxt,
371  " Node %4d: %2d CPUs, CPU Speed %4.2f, Node Speed %6.2f Name: %s",
372  i, scene->cpuinfo[i].numcpus, scene->cpuinfo[i].cpuspeed,
373  scene->cpuinfo[i].nodespeed, scene->cpuinfo[i].machname);
374  rt_ui_message(MSG_0, msgtxt);
375 
376  totalcpus += scene->cpuinfo[i].numcpus;
377  totalspeed += scene->cpuinfo[i].nodespeed;
378  }
379 
380  sprintf(msgtxt, " Total CPUs: %d", totalcpus);
381  rt_ui_message(MSG_0, msgtxt);
382  sprintf(msgtxt, " Total Speed: %f\n", totalspeed);
383  rt_ui_message(MSG_0, msgtxt);
384  }
385 
386  rt_par_barrier_sync(scene->parhnd); /* synchronize all nodes at this point */
387  stth=rt_timer_create();
388  rt_timer_start(stth); /* Time the preprocessing of the scene database */
389  rt_autoshader(scene); /* Adapt to the shading features needed at runtime */
390 
391  /* Hierarchical grid ray tracing acceleration scheme */
392  if (scene->boundmode == RT_BOUNDING_ENABLED)
393  engrid_scene(scene, scene->boundthresh);
394 
395  /* if any clipping groups exist, we have to use appropriate */
396  /* intersection testing logic */
397  if (scene->cliplist != NULL) {
398  scene->flags |= RT_SHADE_CLIPPING;
399  }
400 
401  /* if there was a preexisting image, free it before continuing */
402  if (scene->imginternal && (scene->img != NULL)) {
403  free(scene->img);
404  scene->img = NULL;
405  }
406 
407  /* Allocate a new image buffer if necessary */
408  if (scene->img == NULL) {
409  scene->imginternal = 1;
410  if (scene->verbosemode && scene->mynode == 0) {
411  rt_ui_message(MSG_0, "Allocating Image Buffer.");
412  }
413 
414  /* allocate the image buffer accordinate to pixel format */
415  if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
416  scene->img = malloc(scene->hres * scene->vres * 3);
417  } else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
418  scene->img = malloc(sizeof(float) * scene->hres * scene->vres * 3);
419  } else {
420  rt_ui_message(MSG_0, "Illegal image buffer format specifier!");
421  }
422 
423  if (scene->img == NULL) {
424  scene->imginternal = 0;
425  rt_ui_message(MSG_0, "Warning: Failed To Allocate Image Buffer!");
426  }
427  }
428 
429 #if defined(RT_ACCUMULATE_ON)
430  /* Allocate the accumulation buffer if necessary */
431  if ((scene->accum_mode == RT_ACCUMULATE_ON) ||
432  (scene->accum_mode == RT_ACCUMULATE_CLEAR)) {
433  int bufsz = sizeof(float) * scene->hres * scene->vres * 3;
434 
435  /* handle resize events */
436  if (scene->accum_buf != NULL) {
437  free(scene->accum_buf);
438  scene->accum_buf = NULL;
439  }
440 
441  if (scene->accum_buf == NULL) {
442  scene->accum_buf = calloc(1, bufsz); /* allocate and clear buffer */
443  scene->accum_mode = RT_ACCUMULATE_ON; /* reset to on from clear */
444  scene->accum_count = 0; /* reset accumulation count */
445  }
446 
447  if (scene->accum_mode == RT_ACCUMULATE_CLEAR) {
448  int bufsz = sizeof(float) * scene->hres * scene->vres * 3;
449  memset(scene->accum_buf, 0, bufsz); /* clear accumulation buffer */
450  scene->accum_count = 0; /* reset accumulation count */
451  scene->accum_mode = RT_ACCUMULATE_ON; /* reset to on from clear */
452  }
453  }
454 #endif
455 
456  /* if any threads are leftover from a previous scene, and the */
457  /* scene has changed significantly, we have to collect, and */
458  /* respawn the worker threads, since lots of things may have */
459  /* changed which would affect them. */
460  destroy_render_threads(scene);
461  create_render_threads(scene);
462 
463  /* allocate and initialize persistent scanline receive buffers */
464  /* which are used by the parallel message passing code. */
465  scene->parbuf = rt_par_init_scanlinereceives(scene->parhnd, scene);
466 
467  /* the scene has been successfully prepared for rendering */
468  /* unless it gets modified in certain ways, we don't need to */
469  /* pre-process it ever again. */
470  scene->scenecheck = 0;
471 
472  rt_timer_stop(stth); /* Preprocessing is finished, stop timing */
473  runtime=rt_timer_time(stth);
474  rt_timer_destroy(stth);
475 
476  /* Print out relevent timing info */
477  if (scene->mynode == 0) {
478  char msgtxt[256];
479  sprintf(msgtxt, "Preprocessing Time: %10.4f seconds",runtime);
480  rt_ui_message(MSG_0, msgtxt);
481  }
482 }
483 
484 
485 /*
486  * Save the rendered image to disk.
487  */
488 static void renderio(scenedef * scene) {
489  flt iotime;
490  char msgtxt[256];
491  rt_timerhandle ioth; /* I/O timer handle */
492 
493  ioth=rt_timer_create();
494  rt_timer_start(ioth);
495 
496  if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
497  if (scene->imgprocess & RT_IMAGE_NORMALIZE) {
498  normalize_rgb96f(scene->hres, scene->vres, (float *) scene->img);
499  rt_ui_message(MSG_0, "Post-processing: normalizing pixel values.");
500  }
501 
502  if (scene->imgprocess & RT_IMAGE_GAMMA) {
503  gamma_rgb96f(scene->hres, scene->vres, (float *) scene->img,
504  scene->imggamma);
505  rt_ui_message(MSG_0, "Post-processing: gamma correcting pixel values.");
506  }
507  } else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
508  if (scene->imgprocess & (RT_IMAGE_NORMALIZE | RT_IMAGE_GAMMA))
509  rt_ui_message(MSG_0, "Can't post-process 24-bit integer image data");
510  }
511 
512  /* support cropping of output images for SPECMPI benchmarks */
513  if (scene->imgcrop.cropmode == RT_CROP_DISABLED) {
514  writeimage(scene->outfilename, scene->hres, scene->vres,
515  scene->img, scene->imgbufformat, scene->imgfileformat);
516  } else {
517  /* crop image before writing if necessary */
518  if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
519  float *imgcrop;
520  imgcrop = image_crop_rgb96f(scene->hres, scene->vres, scene->img,
521  scene->imgcrop.xres, scene->imgcrop.yres,
522  scene->imgcrop.xstart, scene->imgcrop.ystart);
523  writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
524  imgcrop, scene->imgbufformat, scene->imgfileformat);
525  free(imgcrop);
526  } else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
527  unsigned char *imgcrop;
528  imgcrop = image_crop_rgb24(scene->hres, scene->vres, scene->img,
529  scene->imgcrop.xres, scene->imgcrop.yres,
530  scene->imgcrop.xstart, scene->imgcrop.ystart);
531  writeimage(scene->outfilename, scene->imgcrop.xres, scene->imgcrop.yres,
532  imgcrop, scene->imgbufformat, scene->imgfileformat);
533  free(imgcrop);
534  }
535  }
536 
537  rt_timer_stop(ioth);
538  iotime = rt_timer_time(ioth);
539  rt_timer_destroy(ioth);
540 
541  sprintf(msgtxt, " Image I/O Time: %10.4f seconds", iotime);
542  rt_ui_message(MSG_0, msgtxt);
543 }
544 
545 
546 /*
547  * Render the scene
548  */
549 void renderscene(scenedef * scene) {
550  flt runtime;
551  rt_timerhandle rtth; /* render time timer handle */
552 
553  /* if certain key aspects of the scene parameters have been changed */
554  /* since the last frame rendered, or when rendering the scene the */
555  /* first time, various setup, initialization and memory allocation */
556  /* routines need to be run in order to prepare for rendering. */
557  if (scene->scenecheck)
558  rendercheck(scene);
559 
560 #if defined(RT_ACCUMULATE_ON)
561  /* update accumulation buffer state on every frame */
562  if (scene->accum_mode == RT_ACCUMULATE_CLEAR) {
563  int bufsz = sizeof(float) * scene->hres * scene->vres * 3;
564  memset(scene->accum_buf, 0, bufsz); /* clear accumulation buffer */
565  scene->accum_count = 0; /* reset accumulation count */
566  scene->accum_mode = RT_ACCUMULATE_ON; /* reset to on from clear */
567  }
568  scene->accum_count++; /* increment accumulation count */
569 #endif
570 
571  if (scene->mynode == 0)
572  rt_ui_progress(0); /* print 0% progress at start of rendering */
573 
574  /*
575  * Core Ray Tracing Code
576  *
577  * Ideally, as little as possible other than this code should be
578  * executed for rendering a frame. Most if not all memory allocations
579  * should be done outside of the core code, and all setup should be
580  * done outside of here. This will give the best speed when rendering
581  * walk-throughs and similar things.
582  */
583 
584  rtth=rt_timer_create(); /* create/init rendering timer */
585  rt_timer_start(rtth); /* start ray tracing timer */
586 
587  camera_init(scene); /* Initialize all aspects of camera system */
588 
589 #if defined(THR)
590  /* reset the pixel counter for this frame */
591  rt_atomic_int_set(((thr_parms *) scene->threadparms)[0].pixelsched, 0);
592 #endif
593 
594 #if defined(MPI) && defined(THR)
595  /* reset the rows counter for this frame */
596  rt_atomic_int_set(((thr_parms *) scene->threadparms)[0].rowsdone, 0);
597 #endif
598 
599 #ifdef THR
600  /* if using threads, wake up the child threads... */
601  rt_thread_barrier(((thr_parms *) scene->threadparms)[0].runbar, 1);
602 #endif
603 
604 #ifdef MPI
605  /* if using message passing, start persistent receives */
606  rt_par_start_scanlinereceives(scene->parhnd, scene->parbuf);
607 #endif
608 
609  /* Actually Ray Trace The Image */
610  thread_trace(&((thr_parms *) scene->threadparms)[0]);
611 
612 #ifdef MPI
613  /* wait for all scanlines to recv/send */
614  rt_par_waitscanlines(scene->parhnd, scene->parbuf);
615 #endif
616 
617  rt_timer_stop(rtth); /* stop timer for ray tracing runtime */
618  runtime=rt_timer_time(rtth);
619  rt_timer_destroy(rtth);
620 
621  /*
622  * End of Core Ray Tracing Code
623  *
624  * Anything after here should be UI, tear-down, or reset code
625  *
626  */
627 
628  if (scene->mynode == 0) {
629  char msgtxt[256];
630 
631  rt_ui_progress(100); /* print 100% progress when finished rendering */
632 
633  sprintf(msgtxt, "\n Ray Tracing Time: %10.4f seconds", runtime);
634  rt_ui_message(MSG_0, msgtxt);
635 
636  if (scene->writeimagefile)
637  renderio(scene);
638  }
639 } /* end of renderscene() */
640 
#define RT_CROP_DISABLED
Image cropping disabled.
Definition: tachyon.h:277
int stopx
ending X pixel index
Definition: trace.h:18
#define CPU_ARM64_AES
AES insns avail.
Definition: threads.h:75
double rt_timer_time(rt_timerhandle v)
Definition: util.c:186
void rt_par_barrier_sync(rt_parhandle voidhandle)
Definition: parallel.c:319
static void renderio(scenedef *scene)
Definition: render.c:488
void rt_timer_destroy(rt_timerhandle v)
Definition: util.c:233
int rt_thread_barrier(rt_barrier_t *barrier, int increment)
synchronize on counting barrier primitive
Definition: threads.c:1425
#define CPU_ARM64_CRC32
CRC32 insns avail.
Definition: threads.h:72
void * rt_timerhandle
Definition: util.h:63
#define CPU_ARM64_FP
FP insns avail.
Definition: threads.h:73
#define CPU_ARM64_SVE
Scalable Vector Extns avail.
Definition: threads.h:82
int nthr
total number of worker threads
Definition: trace.h:13
void rt_thread_barrier_destroy(rt_barrier_t *barrier)
destroy counting barrier primitive
Definition: threads.c:1416
void rt_timer_start(rt_timerhandle v)
Definition: util.c:168
scenedef * scene
scene handle
Definition: trace.h:14
#define CPU_ARM64_SHA2
SHA-2 insns avail.
Definition: threads.h:85
#define CPU_ARM64_ASIMD
Advanced SIMD avail.
Definition: threads.h:77
rt_barrier_t * runbar
sleeping thread pool barrier
Definition: trace.h:23
int rt_thread_set_self_cpuaffinity(int cpu)
set the CPU affinity of the current thread (if allowed by host system)
Definition: threads.c:572
rt_barrier_t * rt_thread_barrier_init(int n_clients)
initialize counting barrier primitive
Definition: threads.c:1355
#define CPU_HT
x86 Hyperthreading detected
Definition: threads.h:55
#define CPU_SSE2
SSE2 SIMD avail.
Definition: threads.h:57
void rt_ui_message(int level, char *msg)
Definition: ui.c:31
static void rendercheck(scenedef *scene)
Definition: render.c:273
unsigned char * image_crop_rgb24(int xres, int yres, unsigned char *img, int szx, int szy, int sx, int sy)
Definition: imageio.c:222
void rt_par_start_scanlinereceives(rt_parhandle voidparhandle, rt_parbuf voidhandle)
Definition: parallel.c:510
#define CPU_AVX
AVX SIMD avail.
Definition: threads.h:61
int starty
starting Y pixel index
Definition: trace.h:20
#define CPU_ARM64_SHA512
SHA-512 insns avail.
Definition: threads.h:83
void rt_timer_stop(rt_timerhandle v)
Definition: util.c:177
#define CPU_ARM64_SHA1
SHA-1 insns avail.
Definition: threads.h:84
int rt_atomic_int_init(rt_atomic_int_t *atomp, int val)
initialize an atomic int variable
Definition: threads.c:1043
#define RT_ACCUMULATE_ON
accum.
Definition: tachyon.h:349
void rt_par_waitscanlines(rt_parhandle voidparhandle, rt_parbuf voidhandle)
Definition: parallel.c:525
Tachyon cross-platform thread creation and management, atomic operations, and CPU feature query APIs...
static void rt_autoshader(scenedef *scene)
Definition: render.c:37
int startx
starting X pixel index
Definition: trace.h:17
#define CPU_ARM64_ASIMDRDM
Advanced SIMD RDM avail.
Definition: threads.h:80
int writeimage(char *name, int xres, int yres, void *img, int imgbufferformat, int fileformat)
Definition: imageio.c:321
double flt
generic floating point number, using double
Definition: tachyon.h:47
unsigned int flags
Definition: threads.h:89
#define RT_BOUNDING_ENABLED
Enable spatial subdivision/bounding.
Definition: tachyon.h:475
void * thread_trace(thr_parms *t)
Definition: trace.c:201
#define CPU_ARM64_ASIMDHP
Advanced SIMD HP avail.
Definition: threads.h:79
#define RT_ACCUMULATE_CLEAR
accum.
Definition: tachyon.h:350
#define CPU_ARM64_SHA3
SHA-3 insns avail.
Definition: threads.h:86
unsigned long * local_mbox
grid acceleration mailbox structure
Definition: trace.h:15
int rt_thread_t
Definition: threads.h:165
void * thread_worker(void *voidparms)
Definition: render.c:56
int yinc
Y pixel stride.
Definition: trace.h:22
#define CPU_AVX512CD
AVX-512CD SIMD avail.
Definition: threads.h:64
Tachyon cross-platform timers, special math function wrappers, and RNGs.
#define CPU_AVX2
AVX2 SIMD avail.
Definition: threads.h:62
color full_shader(ray *incident)
Definition: shade.c:233
#define CPU_AVX512F
AVX-512F SIMD avail.
Definition: threads.h:63
#define CPU_AVX512ER
AVX-512ER SIMD avail.
Definition: threads.h:65
int xinc
X pixel stride.
Definition: trace.h:19
void normalize_rgb96f(int xres, int yres, float *fimg)
Definition: imageio.c:146
#define CPU_ARM64_ASIMDDP
Advanced SIMD DP avail.
Definition: threads.h:78
atomic int structure with padding to prevent false sharing
Definition: threads.h:178
void create_render_threads(scenedef *scene)
Definition: render.c:98
int engrid_scene(scenedef *scene, int boundthresh)
Definition: grid.c:224
int rt_thread_join(rt_thread_t thr, void **stat)
join (wait for completion of, and merge with) a thread
Definition: threads.c:688
void camera_init(scenedef *scene)
Definition: camera.c:33
unsigned long serialno
ray mailbox test serial number
Definition: trace.h:16
int rt_atomic_int_destroy(rt_atomic_int_t *atomp)
destroy an atomic int variable
Definition: threads.c:1066
#define CPU_FMA
FMA insns avail.
Definition: threads.h:60
#define CPU_KNL
Intel KNL.
Definition: threads.h:67
barrier sync object with padding to prevent false sharing
Definition: threads.h:195
#define CPU_AVX512PF
AVX-512PF SIMD avail.
Definition: threads.h:66
void renderscene(scenedef *scene)
Definition: render.c:549
int rt_atomic_int_set(rt_atomic_int_t *atomp, int val)
set an atomic int variable
Definition: threads.c:1087
#define CPU_SSE4_1
SSE4.1 SIMD avail.
Definition: threads.h:58
void * rt_par_init_scanlinereceives(rt_parhandle voidhandle, scenedef *scene)
Definition: parallel.c:436
#define MSG_0
Definition: ui.h:12
rt_timerhandle rt_timer_create(void)
Definition: util.c:226
void rt_ui_progress(int percent)
Definition: ui.c:36
float * image_crop_rgb96f(int xres, int yres, float *fimg, int szx, int szy, int sx, int sy)
Definition: imageio.c:196
Tachyon public API function prototypes and declarations used to drive the ray tracing engine...
void gamma_rgb96f(int xres, int yres, float *fimg, float gamma)
Definition: imageio.c:157
#define CPU_HYPERVISOR
VM/Hypervisor environment.
Definition: threads.h:56
#define CPU_F16C
F16C insns avail.
Definition: threads.h:59
int tid
worker thread index
Definition: trace.h:12
int stopy
ending Y pixel index
Definition: trace.h:21
#define CPU_ARM64_ASIMDFHM
Advanced SIMD FHM avail.
Definition: threads.h:81
int rt_thread_create(rt_thread_t *thr, void *fctn(void *), void *arg)
create a new child thread
Definition: threads.c:645
void destroy_render_threads(scenedef *scene)
Definition: render.c:210