Tachyon (current)  Current Main Branch
TachyonOptiX.cu
Go to the documentation of this file.
1 /*
2  * TachyonOptiX.cu - OptiX host-side RT engine implementation
3  *
4  * (C) Copyright 2013-2022 John E. Stone
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * $Id: TachyonOptiX.cu,v 1.89 2022/04/19 03:04:52 johns Exp $
8  *
9  */
10 
22 //
23 // This is a second generation of the Tachyon implementation for OptiX.
24 // The new implementation favors the strengths of OptiX 7, and uses
25 // OptiX ray payload registers, direct CUDA interoperability and advanced
26 // CUDA features for both performance and maintainability.
27 //
28 // This software and its line of antecedants are described in:
29 // "Multiscale modeling and cinematic visualization of photosynthetic
30 // energy conversion processes from electronic to cell scales"
31 // M. Sener, S. Levy, J. E. Stone, AJ Christensen, B. Isralewitz,
32 // R. Patterson, K. Borkiewicz, J. Carpenter, C. N. Hunter,
33 // Z. Luthey-Schulten, D. Cox.
34 // J. Parallel Computing, 102, pp. 102698, 2021.
35 // https://doi.org/10.1016/j.parco.2020.102698
36 //
37 // "Omnidirectional Stereoscopic Projections for VR"
38 // J. E. Stone. In, William R. Sherman, editor,
39 // VR Developer Gems, Taylor and Francis / CRC Press, Chapter 24, 2019.
40 // https://www.taylorfrancis.com/chapters/edit/10.1201/b21598-24/omnidirectional-stereoscopic-projections-vr-john-stone
41 //
42 // "Interactive Ray Tracing Techniques for
43 // High-Fidelity Scientific Visualization"
44 // J. E. Stone. In, Eric Haines and Tomas Akenine-Möller, editors,
45 // Ray Tracing Gems, Apress, Chapter 27, pp. 493-515, 2019.
46 // https://link.springer.com/book/10.1007/978-1-4842-4427-2
47 //
48 // "A Planetarium Dome Master Camera"
49 // J. E. Stone. In, Eric Haines and Tomas Akenine-Möller, editors,
50 // Ray Tracing Gems, Apress, Chapter 4, pp. 49-60, 2019.
51 // https://link.springer.com/book/10.1007/978-1-4842-4427-2
52 //
53 // "Immersive Molecular Visualization with Omnidirectional
54 // Stereoscopic Ray Tracing and Remote Rendering"
55 // J. E. Stone, W. R. Sherman, and K. Schulten.
56 // High Performance Data Analysis and Visualization Workshop,
57 // 2016 IEEE International Parallel and Distributed Processing
58 // Symposium Workshops (IPDPSW), pp. 1048-1057, 2016.
59 // http://dx.doi.org/10.1109/IPDPSW.2016.121
60 //
61 // "Atomic Detail Visualization of Photosynthetic Membranes with
62 // GPU-Accelerated Ray Tracing"
63 // J. E. Stone, M. Sener, K. L. Vandivort, A. Barragan, A. Singharoy,
64 // I. Teo, J. V. Ribeiro, B. Isralewitz, B. Liu, B.-C. Goh, J. C. Phillips,
65 // C. MacGregor-Chatwin, M. P. Johnson, L. F. Kourkoutis, C. N. Hunter,
66 // K. Schulten
67 // J. Parallel Computing, 55:17-27, 2016.
68 // http://dx.doi.org/10.1016/j.parco.2015.10.015
69 //
70 // "GPU-Accelerated Molecular Visualization on
71 // Petascale Supercomputing Platforms"
72 // J. E. Stone, K. L. Vandivort, and K. Schulten.
73 // UltraVis'13: Proceedings of the 8th International Workshop on
74 // Ultrascale Visualization, pp. 6:1-6:8, 2013.
75 // http://dx.doi.org/10.1145/2535571.2535595
76 //
77 // "An Efficient Library for Parallel Ray Tracing and Animation"
78 // John E. Stone. Master's Thesis, University of Missouri-Rolla,
79 // Department of Computer Science, April 1998
80 // https://scholarsmine.mst.edu/masters_theses/1747
81 //
82 // "Rendering of Numerical Flow Simulations Using MPI"
83 // J. Stone and M. Underwood.
84 // Second MPI Developers Conference, pages 138-141, 1996.
85 // http://dx.doi.org/10.1109/MPIDC.1996.534105
86 //
87 
88 #define TACHYON_INTERNAL 1
89 #include "TachyonOptiX.h"
90 #include <optix_stubs.h>
91 #include <optix_function_table_definition.h>
92 
93 
94 #include "ProfileHooks.h"
95 //#include "/home/johns/graphics/tachyon/src/ProfileHooks.h"
96 
97 #if 0
98 #define DBG()
99 #else
100 #define DBG() if (verbose == RT_VERB_DEBUG) { printf("TachyonOptiX) %s\n", __func__); }
101 #endif
102 
103 #define CUERR { cudaError_t err; \
104  if ((err = cudaGetLastError()) != cudaSuccess) { \
105  printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
106  }}
107 
108 
109 
110 //
111 // CUDA kernels for post-processing denoiser results
112 //
113 #if defined(TACHYON_OPTIXDENOISER)
114 
115 __global__ static void post_denoise_rgba4u(uchar4 *rgba4u,
116  float4 *rgba4f,
117  int tonemap_mode,
118  float tonemap_exposure,
119  int colorspace,
120  int xres, int yres) {
121  unsigned int imgsz = xres * yres;
122  unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
123 
124  //
125  // Here we copy/convert the output RGBA color buffer to uchar4
126  // format and the final output colorspace.
127  //
128  // If denoising is enabled, then we have to invert the operations
129  // performed prior to denoising so that we present the followup
130  // tone mapping operators with expected inputs.
131  //
132  if (idx < imgsz) {
133  // read in denoised sRGB approximation
134  float4 sRGB_approx20 = rgba4f[idx];
135 
136  // convert from the sRGB approximation back to linear
137  float4 lin = sRGB_to_linear_approx_20(sRGB_approx20);
138 
139  // invert range pre-scaling operation
140  lin *= 1.25f; // this also inverts the modification to alpha
141 
142  // HDR tone mapping operators need to be applied after denoising
143  // has been completed. If we use tone mapping on an LDR input,
144  // we may have to revert from sRGB to linear before applying the
145  // TMO, and then convert back to sRGB.
146  // Also performs color space conversion if required
147  float4 tonedcol;
148  tonedcol = tonemap_color(lin, tonemap_mode, tonemap_exposure, colorspace);
149 
150  float4 outcol;
151  if (colorspace == RT_COLORSPACE_sRGB)
152  outcol = linear_to_sRGB(tonedcol);
153  else
154  outcol = tonedcol;
155 
156  // clamping is applied during conversion to uchar4
157  rgba4u[idx] = make_color_rgb4u(outcol);
158  }
159 }
160 
161 #endif
162 
163 
164 //
165 // Main RT engine class
166 //
167 
169  verbose = RT_VERB_DEBUG; // ensure debug macro produces output first time
170  DBG();
171 
172  PROFILE_PUSH_RANGE("TachyonOptiX::TachyonOptiX()", RTPROF_GENERAL);
173  rt_timer = wkf_timer_create(); // create and initialize timer
174  wkf_timer_start(rt_timer);
175 
176  lasterr = OPTIX_SUCCESS; // begin with no error state set
177 
178  context_created = 0; // no context yet
179  cuda_ctx = 0; // take over current CUDA context, if not set
180  stream = 0; // stream 0
181  optix_ctx = nullptr; // no valid context yet
182  pipe = nullptr; // no valid pipeline
183  general_module = nullptr; // no module has been loaded/created
184  curve_module = nullptr; // no module has been loaded/created
185  scene_created = 0; // scene has not been created
186 
187  // set default shader path for runtime demand-loading
188  strcpy(shaderpath, "TachyonOptiXShaders.ptx");
189 
190  // clear timers
191  time_ctx_setup = 0.0;
192  time_ctx_validate = 0.0;
193  time_ctx_AS_build = 0.0;
194  time_ray_tracing = 0.0;
195  time_image_io = 0.0;
196 
197  memset((void *) &sbt, 0, sizeof(sbt)); // clear SBT record
198 
199  // clear host-side rtLaunch OptiX launch parameter buffer
200  memset(&rtLaunch, 0, sizeof(rtLaunch));
201 
202  // set default scene background state
203  scene_background_mode = RT_BACKGROUND_TEXTURE_SOLID;
204  memset(scene_bg_color, 0, sizeof(scene_bg_color));
205  memset(scene_bg_grad_top, 0, sizeof(scene_bg_grad_top));
206  memset(scene_bg_grad_bot, 0, sizeof(scene_bg_grad_bot));
207  memset(scene_bg_grad_updir, 0, sizeof(scene_bg_grad_updir));
208  scene_bg_grad_topval = 1.0f;
209  scene_bg_grad_botval = -scene_bg_grad_topval;
210  // this has to be recomputed prior to rendering when topval/botval change
211  scene_bg_grad_invrange = 1.0f / (scene_bg_grad_topval - scene_bg_grad_botval);
212 
213  camera_type = RT_PERSPECTIVE;
214  float tmp_pos[3] = { 0.0f, 0.0f, -1.0f };
215  float tmp_U[3] = { 1.0f, 0.0f, 0.0f };
216  float tmp_V[3] = { 0.0f, 1.0f, 0.0f };
217  float tmp_W[3] = { 0.0f, 0.0f, 1.0f };
218  memcpy(cam_pos, tmp_pos, sizeof(cam_pos));
219  memcpy(cam_U, tmp_U, sizeof(cam_U));
220  memcpy(cam_V, tmp_V, sizeof(cam_V));
221  memcpy(cam_W, tmp_W, sizeof(cam_W));
222  cam_zoom = 1.0f; // default field of view
223 
224  cam_dof_enabled = 0; // disable DoF by default
225  cam_dof_focal_dist = 2.0f; // default focal plane dist
226  cam_dof_fnumber = 64.0f; // default focal ratio
227 
228  cam_stereo_enabled = 0; // disable stereo by default
229  cam_stereo_eyesep = 0.06f; // default eye separation
230  cam_stereo_convergence_dist = 2.0f; // default convergence
231 
232  clipview_mode = RT_VIEWCLIP_NONE; // VR HMD fade+clipping plane/sphere
233  clipview_start = 1.0f; // VR HMD fade+clipping radial start dist
234  clipview_end = 0.2f; // VR HMD fade+clipping radial end dist
235 
236  headlight_mode = RT_HEADLIGHT_OFF; // VR HMD headlight disabled by default
237 
238  denoiser_enabled = RT_DENOISER_OFF; // disable denoiser by default
239  shadows_enabled = RT_SHADOWS_OFF; // disable shadows by default
240 
241  aa_samples = 0; // no AA samples by default
242 
243  ao_samples = 0; // no AO samples by default
244  ao_direct = 0.3f; // AO direct contribution is 30%
245  ao_ambient = 0.7f; // AO ambient contribution is 70%
246  ao_maxdist = RT_DEFAULT_MAX; // default is no max occlusion distance
247 
248  fog_mode = RT_FOG_NONE; // fog/cueing disabled by default
249  fog_start = 0.0f; // default fog start at camera
250  fog_end = 10.0f; // default fog end
251  fog_density = 0.32f; // default exp^2 fog density
252 
253  scene_max_depth = 21; // set reasonable default ray depth
254  scene_max_trans = scene_max_depth; // set max trans crossings to match depth
255  scene_epsilon = 5.e-5f * 50; // set default scene epsilon
256 
257  lasterr = OPTIX_SUCCESS; // clear any error state
258  width = 1024; // default
259  height = 1024; // default
260 
261  verbose = RT_VERB_MIN; // quiet console by default
262  check_verbose_env(); // check for user-overridden verbose flag
263 
264  regen_optix_pipeline=1; // force regen of pipeline
265  regen_optix_sbt=1; // force regen of SBT
266  regen_optix_lights=1; // force regen of lights
267 
268  create_context(); // create CUDA/OptiX hardware contexts
269  destroy_scene(); // zero obj counters, ready for rendering
270 
272 }
273 
274 
275 // destructor...
277  DBG();
278  PROFILE_PUSH_RANGE("TachyonOptiX::~TachyonOptiX()", RTPROF_GENERAL);
279 
280  cudaDeviceSynchronize(); CUERR;
281 
282  if (context_created) {
283  destroy_context();
284  }
285 
286 #if 0
287  // XXX this is only for use with memory debugging tools!
288  cudaDeviceReset();
289 #endif
290 
291  wkf_timer_destroy(rt_timer);
292 
294 }
295 
296 
297 // Global OptiX logging callback
298 static void TachyonOptixLogCallback(unsigned int level,
299  const char* tag,
300  const char* message,
301  void* cbdata) {
302  if (cbdata != NULL) {
303  TachyonOptiX *tcy = (TachyonOptiX *) cbdata;
304  tcy->log_callback(level, tag, message);
305  }
306 }
307 
308 
309 void TachyonOptiX::log_callback(unsigned int level,
310  const char *tag, const char *msg) {
311  // Log callback levels:
312  // 1: fatal non-recoverable error, context needs to be destroyed
313  // 2: recoverable error, invalid call params, etc.
314  // 3: warning hints about slow perf, etc.
315  // 4: print status or progress messages
316  if ((verbose == RT_VERB_DEBUG) || (level < 4))
317  printf("TachyonOptiX) [%s]: %s\n", tag, msg);
318 }
319 
320 
321 // check environment for verbose timing/debugging output flags
324  char *verbstr = getenv("TACHYONOPTIXVERBOSE");
325  if (verbstr != NULL) {
326 // printf("TachyonOptiX) verbosity config request: '%s'\n", verbstr);
327  if (!strcasecmp(verbstr, "MIN")) {
328  myverbosity = TachyonOptiX::RT_VERB_MIN;
329  if (inform)
330  printf("TachyonOptiX) verbose setting: minimum\n");
331  } else if (!strcasecmp(verbstr, "TIMING")) {
332  myverbosity = TachyonOptiX::RT_VERB_TIMING;
333  if (inform)
334  printf("TachyonOptiX) verbose setting: timing data\n");
335  } else if (!strcasecmp(verbstr, "DEBUG")) {
336  myverbosity = TachyonOptiX::RT_VERB_DEBUG;
337  if (inform)
338  printf("TachyonOptiX) verbose setting: full debugging data\n");
339  }
340  }
341  return myverbosity;
342 }
343 
344 
345 int TachyonOptiX::device_list(int **devlist, char ***devnames) {
347  if (dl_verbose == RT_VERB_DEBUG)
348  printf("TachyonOptiX::device_list()\n");
349 
350  int devcount = 0;
351  cudaGetDeviceCount(&devcount);
352  return devcount;
353 }
354 
355 
358  if (dl_verbose == RT_VERB_DEBUG)
359  printf("TachyonOptiX::device_count()\n");
360 
361  return device_list(NULL, NULL);
362 }
363 
364 
365 unsigned int TachyonOptiX::optix_version(void) {
367  if (dl_verbose == RT_VERB_DEBUG)
368  printf("TachyonOptiX::optix_version()\n");
369 
374 
375  unsigned int version=OPTIX_VERSION;
376 
377  return version;
378 }
379 
380 
381 void TachyonOptiX::check_verbose_env() {
382  verbose = get_verbose_flag(1);
383 }
384 
385 
386 void TachyonOptiX::create_context() {
387  DBG();
388  time_ctx_create = 0;
389  if (context_created)
390  return;
391 
392  PROFILE_PUSH_RANGE("TachyonOptiX::create_context()", RTPROF_GENERAL);
393 
394  double starttime = wkf_timer_timenow(rt_timer);
395 
396  if (verbose == RT_VERB_DEBUG)
397  printf("TachyonOptiX) creating context...\n");
398 
399  if (lasterr == OPTIX_SUCCESS) {
400  rt_ptx_code_string = NULL;
401 
402  if (verbose == RT_VERB_DEBUG) {
403  printf("TachyonOptiX) Loading PTX src from compilation...\n");
404  }
405  rt_ptx_code_string = internal_compiled_ptx_src();
406  if (!rt_ptx_code_string) {
407  if (verbose == RT_VERB_DEBUG) {
408  printf("TachyonOptiX) Loading PTX src from disk\n");
409  }
410  if (read_ptx_src(shaderpath, &rt_ptx_code_string) != 0) {
411  printf("TachyonOptiX) Failed to load PTX shaders: '%s'\n", shaderpath);
412  return;
413  }
414  }
415  }
416 
417  //
418  // initialize CUDA for this thread if not already
419  //
420 #if 0
421  cudaSetDevice(0); // XXX hack for dev/testing on 'eclipse'
422 #endif
423  cudaFree(0); // initialize CUDA
424 
425  lasterr = optixInit();
426  if (lasterr == OPTIX_ERROR_UNSUPPORTED_ABI_VERSION) {
427  //
428  // Correspondence of OptiX versions with driver ABI versions:
429  // OptiX: 7.0.0 7.1.0 7.2.0 7.3.0 7.4.0
430  // ABI: 22 36 41 47 55
431  //
432  printf("TachyonOptiX) OptiX initialization failed driver is too old.\n");
433  printf("TachyonOptiX) Driver does not support ABI version %d\n",
434  OPTIX_ABI_VERSION);
435  return;
436  }
437 
438  cudaStreamCreate(&stream);
439 
440  OptixDeviceContextOptions options = {};
441  optixDeviceContextCreate(cuda_ctx, &options, &optix_ctx);
442 
443  lasterr = optixDeviceContextSetLogCallback(optix_ctx,
445  this,
446  4); // enable all levels
447 
448 
449  if (lasterr == OPTIX_SUCCESS)
450  context_create_module();
451 
452  double time_ptxsrc = wkf_timer_timenow(rt_timer);
453  if (verbose >= RT_VERB_TIMING) {
454  printf("TachyonOptiX) load PTX shader src %.1f secs\n", time_ptxsrc - starttime);
455  }
456 
457  if (lasterr == OPTIX_SUCCESS)
458  context_create_pipeline();
459 
460  //
461  // Preallocate various performance-critical buffers and device-side
462  // storage required during the performance-critical phases of rendering.
463  // These buffers are either fixed-size for the entire run
464  // or they are buffers that will be repeatedly reused, so we grow
465  // their size but do not free or shrink them, except after a call to
466  // minimize_memory_use().
467  //
468 
469  // pre-allocate compacted size buffer so we don't have to allocate
470  // during in-flight AS creation
471  compactedSizeBuffer.set_size(sizeof(uint64_t));
472 
473  // pre-allocate a reasonable size scene IAS buffer to avoid
474  // later runtime overheads.
475  IASBuffer.set_size(8L * 1024L * 1024L);
476 
477  // pre-allocate rtLaunch-sized launchParamsBuffer
478  launchParamsBuffer.set_size(sizeof(rtLaunch));
479 
480 #if defined(TACHYON_OPTIXDENOISER)
481  // initialize denoiser and allocate buffers
482  context_create_denoiser();
483 #endif
484 
485  double time_pipeline = wkf_timer_timenow(rt_timer);
486  if (verbose >= RT_VERB_TIMING) {
487  printf("TachyonOptiX) create RT pipeline %.1f secs\n", time_pipeline - time_ptxsrc);
488  }
489 
490  time_ctx_create = wkf_timer_timenow(rt_timer) - starttime;
491 
492  if (verbose == RT_VERB_TIMING || verbose == RT_VERB_DEBUG) {
493  printf("TachyonOptiX) context creation time: %.2f\n", time_ctx_create);
494  }
495 
496  context_created = 1;
497 
499 }
500 
501 
503  if (!context_created)
504  return;
505 
506  // free internal temporary buffers use for AS builds
507  // or other purposes, but don't free the existing scene
508  ASTempBuffer.free();
509 }
510 
511 
512 
513 int TachyonOptiX::read_ptx_src(const char *ptxfilename, char **ptxstring) {
514  DBG();
515  FILE *ptxfp = fopen(ptxfilename, "r");
516  if (ptxfp == NULL) {
517  return -1;
518  }
519 
520  // find size and load RT PTX source
521  fseek(ptxfp, 0, SEEK_END);
522  long ptxsize = ftell(ptxfp);
523  fseek(ptxfp, 0, SEEK_SET);
524  *ptxstring = (char *) calloc(1, ptxsize + 1);
525  if (fread(*ptxstring, ptxsize, 1, ptxfp) != 1) {
526  return -1;
527  }
528 
529  return 0;
530 }
531 
532 
533 
534 void TachyonOptiX::context_create_denoiser() {
535 #if defined(TACHYON_OPTIXDENOISER)
536  denoiser_ctx = nullptr;
537  memset(&denoiser_options, 0, sizeof(denoiser_options));
538  optixDenoiserCreate(optix_ctx, OPTIX_DENOISER_MODEL_KIND_LDR,
539  &denoiser_options, &denoiser_ctx);
540 
541  denoiser_resize_update();
542 #endif
543 }
544 
545 
546 void TachyonOptiX::context_destroy_denoiser() {
547 #if defined(TACHYON_OPTIXDENOISER)
548  if (denoiser_ctx) {
549  optixDenoiserDestroy(denoiser_ctx);
550  denoiser_ctx = nullptr;
551 
552  denoiser_scratch.free();
553  denoiser_state.free();
554  denoiser_colorbuffer.free();
555  denoiser_denoisedbuffer.free();
556  }
557 #endif
558 }
559 
560 
561 void TachyonOptiX::denoiser_resize_update() {
562 #if defined(TACHYON_OPTIXDENOISER)
563  if (denoiser_ctx) {
564  optixDenoiserComputeMemoryResources(denoiser_ctx, width, height,
565  &denoiser_sizes);
566 
567  long newsz = max(denoiser_sizes.withOverlapScratchSizeInBytes,
568  denoiser_sizes.withoutOverlapScratchSizeInBytes);
569  denoiser_scratch.set_size(newsz);
570 
571  denoiser_state.set_size(denoiser_sizes.stateSizeInBytes);
572 
573  optixDenoiserSetup(denoiser_ctx, stream, width, height,
574  denoiser_state.cu_dptr(), denoiser_state.get_size(),
575  denoiser_scratch.cu_dptr(), denoiser_scratch.get_size());
576 
577  int fbsz = width * height * sizeof(float4);
578  denoiser_colorbuffer.set_size(fbsz, stream);
579  denoiser_denoisedbuffer.set_size(fbsz, stream);
580  }
581 #endif
582 }
583 
584 
585 void TachyonOptiX::denoiser_launch() {
586 #if defined(TACHYON_OPTIXDENOISER)
587  // run denoiser on color buffer, then post-convert to uchar4 output buffer
588  if (denoiser_ctx && denoiser_enabled) {
589  OptixDenoiserParams denoiser_params = {};
590  denoiser_params.denoiseAlpha = 1;
591  denoiser_params.hdrIntensity = (CUdeviceptr)0;
592 
593  // blend between input and denoised output.
594  // subframe_index will be set to a non-zero value before we get here.
595  // Since SNR increases w/ sqrt(N) samples, we use N^0.5, N^0.33, or N^0.25
596  // curves to gradually blend in less of the denoiser output as sample
597  // counts rise.
598  denoiser_params.blendFactor =
599  1.0f - (1.0f / powf(rtLaunch.frame.subframe_index, 0.5f));
600 
601  if (verbose == RT_VERB_DEBUG) {
602  printf("TachyonOptiX) Accum. Buf AI Denoising Blend Factor: %.2f\n",
603  denoiser_params.blendFactor);
604  }
605 
606  OptixImage2D input_layer = {};
607  input_layer.data = denoiser_colorbuffer.cu_dptr();
608  input_layer.width = width;
609  input_layer.height = height;
610  input_layer.rowStrideInBytes = width * sizeof(float4);
611  input_layer.pixelStrideInBytes = sizeof(float4);
612  input_layer.format = OPTIX_PIXEL_FORMAT_FLOAT4;
613 
614  OptixImage2D output_layer = {};
615  output_layer.data = denoiser_denoisedbuffer.cu_dptr();
616  output_layer.width = width;
617  output_layer.height = height;
618  output_layer.rowStrideInBytes = width * sizeof(float4);
619  output_layer.pixelStrideInBytes = sizeof(float4);
620  output_layer.format = OPTIX_PIXEL_FORMAT_FLOAT4;
621 
622  OptixDenoiserGuideLayer denoiser_guidelayer = {};
623  OptixDenoiserLayer denoiser_layer = {};
624  denoiser_layer.input = input_layer;
625  denoiser_layer.output = output_layer;
626 
627  optixDenoiserInvoke(denoiser_ctx, stream, &denoiser_params,
628  denoiser_state.cu_dptr(), denoiser_state.get_size(),
629  &denoiser_guidelayer, &denoiser_layer,
630  1, // only one layer used
631  0, 0, // not a tiled denoising run
632  denoiser_scratch.cu_dptr(), denoiser_scratch.get_size());
633 
634  // copy+convert denoised image to the uchar4 output framebuffer
635  dim3 Bsz(128, 1, 1);
636  dim3 Gsz((width*height + Bsz.x - 1)/Bsz.x, 1, 1);
637  post_denoise_rgba4u<<<Gsz, Bsz, 0, stream>>>(
638  (uchar4 *) framebuffer.cu_dptr(),
639  (float4 *) denoiser_denoisedbuffer.cu_dptr(),
643  width, height);
644  }
645 #endif
646 }
647 
648 
649 
650 
651 
652 void TachyonOptiX::context_create_exception_pgms() {
653  DBG();
654  exceptionPGs.resize(1);
655 
656  OptixProgramGroupOptions pgOpts = {};
657  OptixProgramGroupDesc pgDesc = {};
658  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_EXCEPTION;
659  pgDesc.raygen.module = general_module;
660 
661  pgDesc.raygen.entryFunctionName="__exception__all";
662 
663  char log[2048];
664  size_t sizeof_log = sizeof(log);
665  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
666  log, &sizeof_log, &exceptionPGs[0]);
667 
668  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
669  printf("TachyonOptiX) exception construction log:\n %s\n", log);
670  }
671 }
672 
673 
674 void TachyonOptiX::context_destroy_exception_pgms() {
675  DBG();
676  for (auto &pg : exceptionPGs)
677  optixProgramGroupDestroy(pg);
678  exceptionPGs.clear();
679 }
680 
681 
682 void TachyonOptiX::context_create_raygen_pgms() {
683  DBG();
684  PROFILE_PUSH_RANGE("TachyonOptiX::context_create_raygen_pgms()", RTPROF_GENERAL);
685 
686  raygenPGs.resize(1);
687 
688  OptixProgramGroupOptions pgOpts = {};
689  OptixProgramGroupDesc pgDesc = {};
690  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
691  pgDesc.raygen.module = general_module;
692 
693  // Assign the raygen program according to the active camera
694  const char *raygenfctn=nullptr;
695  switch (camera_type) {
696  case RT_PERSPECTIVE:
697  if (cam_dof_enabled) {
698  if (cam_stereo_enabled)
699  raygenfctn = "__raygen__camera_perspective_stereo_dof";
700  else
701  raygenfctn = "__raygen__camera_perspective_dof";
702  } else {
703  if (cam_stereo_enabled)
704  raygenfctn = "__raygen__camera_perspective_stereo";
705  else
706  raygenfctn = "__raygen__camera_perspective";
707  }
708  break;
709 
710  case RT_ORTHOGRAPHIC:
711  if (cam_dof_enabled) {
712  if (cam_stereo_enabled)
713  raygenfctn = "__raygen__camera_orthographic_stereo_dof";
714  else
715  raygenfctn = "__raygen__camera_orthographic_dof";
716  } else {
717  if (cam_stereo_enabled)
718  raygenfctn = "__raygen__camera_orthographic_stereo";
719  else
720  raygenfctn = "__raygen__camera_orthographic";
721  }
722  break;
723 
724  case RT_CUBEMAP:
725  if (cam_dof_enabled) {
726  if (cam_stereo_enabled)
727  raygenfctn = "__raygen__camera_cubemap_stereo_dof";
728  else
729  raygenfctn = "__raygen__camera_cubemap_dof";
730  } else {
731  if (cam_stereo_enabled)
732  raygenfctn = "__raygen__camera_cubemap_stereo";
733  else
734  raygenfctn = "__raygen__camera_cubemap";
735  }
736  break;
737 
738  case RT_DOME_MASTER:
739  if (cam_dof_enabled) {
740  if (cam_stereo_enabled)
741  raygenfctn = "__raygen__camera_dome_master_stereo_dof";
742  else
743  raygenfctn = "__raygen__camera_dome_master_dof";
744  } else {
745  if (cam_stereo_enabled)
746  raygenfctn = "__raygen__camera_dome_master_stereo";
747  else
748  raygenfctn = "__raygen__camera_dome_master";
749  }
750  break;
751 
752  case RT_EQUIRECTANGULAR:
753  if (cam_dof_enabled) {
754  if (cam_stereo_enabled)
755  raygenfctn = "__raygen__camera_equirectangular_stereo_dof";
756  else
757  raygenfctn = "__raygen__camera_equirectangular_dof";
758  } else {
759  if (cam_stereo_enabled)
760  raygenfctn = "__raygen__camera_equirectangular_stereo";
761  else
762  raygenfctn = "__raygen__camera_equirectangular";
763  }
764  break;
765 
766  case RT_OCTAHEDRAL:
767  if (cam_dof_enabled) {
768  if (cam_stereo_enabled)
769  raygenfctn = "__raygen__camera_octahedral_stereo_dof";
770  else
771  raygenfctn = "__raygen__camera_octahedral_dof";
772  } else {
773  if (cam_stereo_enabled)
774  raygenfctn = "__raygen__camera_octahedral_stereo";
775  else
776  raygenfctn = "__raygen__camera_octahedral";
777  }
778  break;
779 
780  case RT_OCULUS_RIFT:
781  if (cam_dof_enabled) {
782  if (cam_stereo_enabled)
783  raygenfctn = "__raygen__camera_oculus_rift_stereo_dof";
784  else
785  raygenfctn = "__raygen__camera_oculus_rift_dof";
786  } else {
787  if (cam_stereo_enabled)
788  raygenfctn = "__raygen__camera_oculus_rift_stereo";
789  else
790  raygenfctn = "__raygen__camera_oculus_rift";
791  }
792  break;
793  }
794  pgDesc.raygen.entryFunctionName=raygenfctn;
795  if (verbose == RT_VERB_DEBUG)
796  printf("TachyonOptiX) raygen: '%s'\n", raygenfctn);
797 
798  char log[2048];
799  size_t sizeof_log = sizeof(log);
800  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
801  log, &sizeof_log, &raygenPGs[0]);
802 
803  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
804  printf("TachyonOptiX) raygen construction log:\n %s\n", log);
805  }
806 
808 }
809 
810 
811 void TachyonOptiX::context_destroy_raygen_pgms() {
812  DBG();
813  for (auto &pg : raygenPGs)
814  optixProgramGroupDestroy(pg);
815  raygenPGs.clear();
816 }
817 
818 
819 void TachyonOptiX::context_create_miss_pgms() {
820  DBG();
821  missPGs.resize(RT_RAY_TYPE_COUNT);
822 
823  char log[2048];
824  size_t sizeof_log = sizeof(log);
825 
826  OptixProgramGroupOptions pgOpts = {};
827  OptixProgramGroupDesc pgDesc = {};
828  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
829  pgDesc.miss.module = general_module;
830 
831  //
832  // radiance rays
833  //
834 
835  // Assign the miss program according to the active background mode
836  const char *missfctn=nullptr;
837  switch (scene_background_mode) {
839  missfctn = "__miss__radiance_gradient_bg_sky_sphere";
840  break;
841 
843  missfctn = "__miss__radiance_gradient_bg_sky_plane";
844  break;
845 
847  default:
848  missfctn = "__miss__radiance_solid_bg";
849  break;
850  }
851  pgDesc.miss.entryFunctionName=missfctn;
852  if (verbose == RT_VERB_DEBUG)
853  printf("TachyonOptiX) miss: '%s'\n", missfctn);
854 
855  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
856  log, &sizeof_log,
857  &missPGs[RT_RAY_TYPE_RADIANCE]);
858  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
859  printf("TachyonOptiX) miss radiance construction log:\n %s\n", log);
860  }
861 
862  // shadow rays
863  pgDesc.miss.entryFunctionName = "__miss__shadow_nop";
864  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
865  log, &sizeof_log,
866  &missPGs[RT_RAY_TYPE_SHADOW]);
867  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
868  printf("TachyonOptiX) miss shadow construction log:\n %s\n", log);
869  }
870 }
871 
872 
873 void TachyonOptiX::context_destroy_miss_pgms() {
874  DBG();
875  for (auto &pg : missPGs)
876  optixProgramGroupDestroy(pg);
877  missPGs.clear();
878 }
879 
880 
881 void TachyonOptiX::context_create_curve_hitgroup_pgms() {
882  DBG();
883  curvePGs.resize(RT_RAY_TYPE_COUNT);
884 
885  char log[2048];
886  size_t sizeof_log = sizeof( log );
887 
888  OptixProgramGroupOptions pgOpts = {};
889  OptixProgramGroupDesc pgDesc = {};
890  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
891  auto &hg = pgDesc.hitgroup;
892 
893  hg.moduleCH = general_module;
894  hg.moduleAH = general_module;
895 
896  // Assign intersection fctn from the OptiX-internal module
897  hg.moduleIS = curve_module;
898  hg.entryFunctionNameIS = 0; // automatically supplied for built-in module
899 
900  // radiance rays
901  hg.entryFunctionNameCH = "__closesthit__radiance_general";
902  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
903  if (verbose == RT_VERB_DEBUG) {
904  printf("TachyonOptiX) curve anyhit: %s\n", hg.entryFunctionNameAH);
905  printf("TachyonOptiX) curve closesthit: %s\n", hg.entryFunctionNameCH);
906  }
907  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
908  log, &sizeof_log,
909  &curvePGs[RT_RAY_TYPE_RADIANCE]);
910  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
911  printf("TachyonOptiX) curve hitgroup radiance construction log:\n"
912  " %s\n", log);
913  }
914 
915  // shadow rays
916  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
917  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
918  // XXX if we ever want to do two-pass shadows, we might use ray masks
919  // and intersect opaque geometry first and do transmissive geom last
920  // hg.entryFunctionNameAH = "__anyhit__shadow_opaque";
921 
922  if (verbose == RT_VERB_DEBUG) {
923  printf("TachyonOptiX) curve anyhit: %s\n", hg.entryFunctionNameAH);
924  printf("TachyonOptiX) curve closesthit: %s\n", hg.entryFunctionNameCH);
925  }
926  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
927  log, &sizeof_log,
928  &curvePGs[RT_RAY_TYPE_SHADOW]);
929  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
930  printf("TachyonOptiX) curve hitgroup shadow construction log:\n %s\n", log);
931  }
932 }
933 
934 
935 void TachyonOptiX::context_destroy_curve_hitgroup_pgms() {
936  DBG();
937  for (auto &pg : curvePGs)
938  optixProgramGroupDestroy(pg);
939  curvePGs.clear();
940 }
941 
942 
943 void TachyonOptiX::context_create_hwtri_hitgroup_pgms() {
944  DBG();
945  trimeshPGs.resize(RT_RAY_TYPE_COUNT);
946 
947  char log[2048];
948  size_t sizeof_log = sizeof( log );
949 
950  OptixProgramGroupOptions pgOpts = {};
951  OptixProgramGroupDesc pgDesc = {};
952  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
953  auto &hg = pgDesc.hitgroup;
954 
955  hg.moduleCH = general_module;
956  hg.moduleAH = general_module;
957 
958  // radiance rays
959  hg.entryFunctionNameCH = "__closesthit__radiance_general";
960  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
961  if (verbose == RT_VERB_DEBUG) {
962  printf("TachyonOptiX) triangle anyhit: %s\n", hg.entryFunctionNameAH);
963  printf("TachyonOptiX) triangle closesthit: %s\n", hg.entryFunctionNameCH);
964  }
965  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
966  log, &sizeof_log,
967  &trimeshPGs[RT_RAY_TYPE_RADIANCE]);
968  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
969  printf("TachyonOptiX) triangle hitgroup radiance construction log:\n"
970  " %s\n", log);
971  }
972 
973  // shadow rays
974  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
975  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
976  // XXX if we ever want to do two-pass shadows, we might use ray masks
977  // and intersect opaque geometry first and do transmissive geom last
978  // hg.entryFunctionNameAH = "__anyhit__shadow_opaque";
979 
980  if (verbose == RT_VERB_DEBUG) {
981  printf("TachyonOptiX) triangle anyhit: %s\n", hg.entryFunctionNameAH);
982  printf("TachyonOptiX) triangle closesthit: %s\n", hg.entryFunctionNameCH);
983  }
984  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
985  log, &sizeof_log,
986  &trimeshPGs[RT_RAY_TYPE_SHADOW]);
987  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
988  printf("TachyonOptiX) triangle hitgroup shadow construction log:\n"
989  " %s\n", log);
990  }
991 }
992 
993 
994 void TachyonOptiX::context_destroy_hwtri_hitgroup_pgms() {
995  DBG();
996  for (auto &pg : trimeshPGs)
997  optixProgramGroupDestroy(pg);
998  trimeshPGs.clear();
999 }
1000 
1001 
1002 void TachyonOptiX::context_create_intersection_pgms() {
1003  DBG();
1004  custprimPGs.resize(RT_CUST_PRIM_COUNT * RT_RAY_TYPE_COUNT);
1005 
1006  char log[2048];
1007  size_t sizeof_log = sizeof(log);
1008 
1009  OptixProgramGroupOptions pgOpts = {};
1010  OptixProgramGroupDesc pgDesc = {};
1011  pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
1012  auto &hg = pgDesc.hitgroup;
1013  hg.moduleIS = general_module;
1014  hg.moduleCH = general_module;
1015  hg.moduleAH = general_module;
1016 
1017  //
1018  // Cones
1019  //
1020  const int conePG = RT_CUST_PRIM_CONE * RT_RAY_TYPE_COUNT;
1021 
1022  // radiance rays
1023  hg.entryFunctionNameIS = "__intersection__cone_array_color";
1024  hg.entryFunctionNameCH = "__closesthit__radiance_general";
1025  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
1026  if (verbose == RT_VERB_DEBUG) {
1027  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1028  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1029  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1030  }
1031  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1032  log, &sizeof_log,
1033  &custprimPGs[conePG + RT_RAY_TYPE_RADIANCE]);
1034  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1035  printf("TachyonOptiX) cone radiance intersection construction log:\n"
1036  " %s\n", log);
1037  }
1038 
1039  // shadow rays
1040  hg.entryFunctionNameIS = "__intersection__cone_array_color";
1041  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
1042  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
1043  if (verbose == RT_VERB_DEBUG) {
1044  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1045  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1046  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1047  }
1048  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1049  log, &sizeof_log,
1050  &custprimPGs[conePG + RT_RAY_TYPE_SHADOW]);
1051  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1052  printf("TachyonOptiX) cone shadow intersection construction log:\n"
1053  " %s\n", log);
1054  }
1055 
1056 
1057  //
1058  // Cylinders
1059  //
1060  const int cylPG = RT_CUST_PRIM_CYLINDER * RT_RAY_TYPE_COUNT;
1061 
1062  // radiance rays
1063  hg.entryFunctionNameIS = "__intersection__cylinder_array_color";
1064  hg.entryFunctionNameCH = "__closesthit__radiance_general";
1065  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
1066  if (verbose == RT_VERB_DEBUG) {
1067  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1068  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1069  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1070  }
1071  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1072  log, &sizeof_log,
1073  &custprimPGs[cylPG + RT_RAY_TYPE_RADIANCE]);
1074  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1075  printf("TachyonOptiX) cylinder radiance intersection construction log:\n"
1076  " %s\n", log);
1077  }
1078 
1079  // shadow rays
1080  hg.entryFunctionNameIS = "__intersection__cylinder_array_color";
1081  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
1082  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
1083  if (verbose == RT_VERB_DEBUG) {
1084  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1085  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1086  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1087  }
1088  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1089  log, &sizeof_log,
1090  &custprimPGs[cylPG + RT_RAY_TYPE_SHADOW]);
1091  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1092  printf("TachyonOptiX) cylinder shadow intersection construction log:\n"
1093  " %s\n", log);
1094  }
1095 
1096 
1097  //
1098  // Quad mesh
1099  //
1100  const int quadPG = RT_CUST_PRIM_QUAD * RT_RAY_TYPE_COUNT;
1101 
1102  // radiance rays
1103  hg.entryFunctionNameIS = "__intersection__quadmesh";
1104  hg.entryFunctionNameCH = "__closesthit__radiance_general";
1105  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
1106  if (verbose == RT_VERB_DEBUG) {
1107  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1108  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1109  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1110  }
1111  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1112  log, &sizeof_log,
1113  &custprimPGs[quadPG + RT_RAY_TYPE_RADIANCE]);
1114  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1115  printf("TachyonOptiX) quad radiance intersection construction log:\n"
1116  " %s\n", log);
1117  }
1118 
1119  // shadow rays
1120  hg.entryFunctionNameIS = "__intersection__quadmesh";
1121  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
1122  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
1123  if (verbose == RT_VERB_DEBUG) {
1124  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1125  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1126  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1127  }
1128  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1129  log, &sizeof_log,
1130  &custprimPGs[quadPG + RT_RAY_TYPE_SHADOW]);
1131  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1132  printf("TachyonOptiX) quad shadow intersection construction log:\n"
1133  " %s\n", log);
1134  }
1135 
1136 
1137  //
1138  // Rings
1139  //
1140  const int ringPG = RT_CUST_PRIM_RING * RT_RAY_TYPE_COUNT;
1141 
1142  // radiance rays
1143  hg.entryFunctionNameIS = "__intersection__ring_array";
1144  hg.entryFunctionNameCH = "__closesthit__radiance_general";
1145  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
1146  if (verbose == RT_VERB_DEBUG) {
1147  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1148  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1149  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1150  }
1151  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1152  log, &sizeof_log,
1153  &custprimPGs[ringPG + RT_RAY_TYPE_RADIANCE]);
1154  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1155  printf("TachyonOptiX) ring radiance intersection construction log:\n"
1156  " %s\n", log);
1157  }
1158 
1159  // shadow rays
1160  hg.entryFunctionNameIS = "__intersection__ring_array";
1161  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
1162  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
1163  if (verbose == RT_VERB_DEBUG) {
1164  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1165  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1166  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1167  }
1168  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1169  log, &sizeof_log,
1170  &custprimPGs[ringPG + RT_RAY_TYPE_SHADOW]);
1171  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1172  printf("TachyonOptiX) ring shadow intersection construction log:\n"
1173  " %s\n", log);
1174  }
1175 
1176 
1177  //
1178  // Spheres
1179  //
1180  const int spherePG = RT_CUST_PRIM_SPHERE * RT_RAY_TYPE_COUNT;
1181 
1182  // radiance rays
1183  hg.entryFunctionNameIS = "__intersection__sphere_array";
1184  hg.entryFunctionNameCH = "__closesthit__radiance_general";
1185  hg.entryFunctionNameAH = "__anyhit__radiance_nop";
1186  if (verbose == RT_VERB_DEBUG) {
1187  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1188  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1189  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1190  }
1191  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1192  log, &sizeof_log,
1193  &custprimPGs[spherePG + RT_RAY_TYPE_RADIANCE]);
1194  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1195  printf("TachyonOptiX) sphere radiance intersection construction log:\n"
1196  " %s\n", log);
1197  }
1198 
1199  // shadow rays
1200  hg.entryFunctionNameIS = "__intersection__sphere_array";
1201  hg.entryFunctionNameCH = "__closesthit__shadow_nop";
1202  hg.entryFunctionNameAH = "__anyhit__shadow_transmission";
1203  if (verbose == RT_VERB_DEBUG) {
1204  printf("TachyonOptiX) anyhit: %s\n", hg.entryFunctionNameAH);
1205  printf("TachyonOptiX) closesthit: %s\n", hg.entryFunctionNameCH);
1206  printf("TachyonOptiX) intersection: %s\n", hg.entryFunctionNameIS);
1207  }
1208  lasterr = optixProgramGroupCreate(optix_ctx, &pgDesc, 1, &pgOpts,
1209  log, &sizeof_log,
1210  &custprimPGs[spherePG + RT_RAY_TYPE_SHADOW]);
1211  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1212  printf("TachyonOptiX) sphere shadow intersection construction log:\n"
1213  " %s\n", log);
1214  }
1215 }
1216 
1217 
1218 void TachyonOptiX::context_destroy_intersection_pgms() {
1219  DBG();
1220  for (auto &pg : custprimPGs)
1221  optixProgramGroupDestroy(pg);
1222  custprimPGs.clear();
1223 }
1224 
1225 
1226 void TachyonOptiX::context_create_module() {
1227  DBG();
1228 
1229  OptixModuleCompileOptions moduleCompOpts = {};
1230 // moduleCompOpts.maxRegisterCount = 50;
1231  moduleCompOpts.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT;
1232  moduleCompOpts.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT;
1233 
1234  // NOTE: lineinfo is required for profiling tools like nsight compute.
1235  // OptiX RT PTX must also be compiled using the "--generate-line-info" flag.
1236 #if OPTIX_VERSION >= 70400
1237  // OptiX 7.4 has renamed the debug level enums/macros according to
1238  // their runtime performance impact
1239  moduleCompOpts.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL;
1240 #else
1241  moduleCompOpts.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO;
1242 #endif
1243 
1244  pipeCompOpts.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
1245  pipeCompOpts.usesMotionBlur = false;
1246 
1247  // XXX OptiX 7.4 deprecates the use of a single global payload count
1248  // in favor of per-program module compile options that indicate both
1249  // the number of payload values and their read/write usage, to better
1250  // optimized register use. When using the OptiX >= 7.4 payloadType
1251  // data, the global pipeline options should set numPayloadValues
1252  // to zero here.
1253  // See:
1254  // https://raytracing-docs.nvidia.com/optix7/guide/index.html#payload#payload
1255  //
1256  pipeCompOpts.numPayloadValues = 3;
1257  pipeCompOpts.numAttributeValues = 2;
1258 
1259  // XXX enable exceptions full-time during development/testing
1260  if ((getenv("TACHYONOPTIXDEBUG") != NULL)) {
1261  pipeCompOpts.exceptionFlags = OPTIX_EXCEPTION_FLAG_DEBUG |
1262  OPTIX_EXCEPTION_FLAG_TRACE_DEPTH |
1263  OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
1264  } else {
1265  pipeCompOpts.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
1266  }
1267  pipeCompOpts.pipelineLaunchParamsVariableName = "rtLaunch";
1268 
1269 #if (OPTIX_VERSION >= 70100)
1270  pipeCompOpts.usesPrimitiveTypeFlags =
1271  OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM | OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE |
1272  OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR;
1273 // pipeCompOpts.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE;
1274 // pipeCompOpts.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE;
1275 #endif
1276 
1277  char log[2048];
1278  size_t sizeof_log = sizeof(log);
1279  lasterr = optixModuleCreateFromPTX(optix_ctx, &moduleCompOpts, &pipeCompOpts,
1280  rt_ptx_code_string,
1281  strlen(rt_ptx_code_string),
1282  log, &sizeof_log, &general_module);
1283 
1284  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1285  printf("TachyonOptiX) general_module construction log:\n %s\n", log);
1286  }
1287 
1288 #if OPTIX_VERSION >= 70100
1289  //
1290  // Lookup OptiX built-in intersection pgm/module for curves
1291  //
1292  OptixBuiltinISOptions ISopts = {};
1293  ISopts.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR;
1294 // ISopts.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE;
1295 // ISopts.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE;
1296 #if OPTIX_VERSION >= 70400
1297  ISopts.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE;
1298  ISopts.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT;
1299 #endif
1300  ISopts.usesMotionBlur = false;
1301  optixBuiltinISModuleGet(optix_ctx, &moduleCompOpts, &pipeCompOpts,
1302  &ISopts, &curve_module);
1303 #endif
1304 
1305 
1306 }
1307 
1308 
1309 void TachyonOptiX::context_destroy_module() {
1310  DBG();
1311 
1312  if (general_module)
1313  optixModuleDestroy(general_module);
1314 
1315  if (curve_module)
1316  optixModuleDestroy(curve_module);
1317 }
1318 
1319 
1320 void TachyonOptiX::context_create_pipeline() {
1321  DBG();
1322  PROFILE_PUSH_RANGE("TachyonOptiX::context_create_pipeline()", RTPROF_GENERAL);
1323 
1324  if (lasterr == OPTIX_SUCCESS)
1325  context_create_exception_pgms();
1326 
1327  if (lasterr == OPTIX_SUCCESS)
1328  context_create_raygen_pgms();
1329  if (lasterr == OPTIX_SUCCESS)
1330  context_create_miss_pgms();
1331  if (lasterr == OPTIX_SUCCESS)
1332  context_create_curve_hitgroup_pgms();
1333  if (lasterr == OPTIX_SUCCESS)
1334  context_create_hwtri_hitgroup_pgms();
1335  if (lasterr == OPTIX_SUCCESS)
1336  context_create_intersection_pgms();
1337 
1338  std::vector<OptixProgramGroup> programGroups;
1339  for (auto &pg : exceptionPGs)
1340  programGroups.push_back(pg);
1341  for (auto &pg : raygenPGs)
1342  programGroups.push_back(pg);
1343  for (auto &pg : missPGs)
1344  programGroups.push_back(pg);
1345  for (auto &pg : curvePGs)
1346  programGroups.push_back(pg);
1347  for (auto &pg : trimeshPGs)
1348  programGroups.push_back(pg);
1349  for (auto &pg : custprimPGs)
1350  programGroups.push_back(pg);
1351 
1352  if (verbose == RT_VERB_DEBUG) {
1353  printf("TachyonOptiX) creating complete pipeline...\n");
1354  }
1355 
1356  char log[2048];
1357  size_t sizeof_log = sizeof(log);
1358  OptixPipelineLinkOptions pipeLinkOpts = {};
1359  pipeLinkOpts.maxTraceDepth = 21; // OptiX recursion limit is 31
1360  lasterr = optixPipelineCreate(optix_ctx, &pipeCompOpts, &pipeLinkOpts,
1361  programGroups.data(), (int)programGroups.size(),
1362  log, &sizeof_log, &pipe);
1363  if ((verbose == RT_VERB_DEBUG) && (sizeof_log > 1)) {
1364  printf("TachyonOptiX) pipeline construction log:\n %s\n", log);
1365  }
1366 
1367  // max allowed stack sz appears to be 64kB per category
1368  optixPipelineSetStackSize(pipe,
1369  8*1024,
1370  8*1024,
1371  8*1024,
1372  1);
1373 
1374  regen_optix_pipeline=0;
1375  regen_optix_sbt=1;
1376 
1378 }
1379 
1380 
1381 void TachyonOptiX::context_destroy_pipeline() {
1382  DBG();
1383  cudaDeviceSynchronize(); CUERR;
1384 
1385  SBT_destroy();
1386 
1387  custprimsGASBuffer.free();
1388 #if OPTIX_VERSION >= 70100
1389  curvesGASBuffer.free();
1390 #endif
1391  trimeshesGASBuffer.free();
1392 
1393  if (pipe != nullptr) {
1394  if (verbose == RT_VERB_DEBUG)
1395  printf("TachyonOptiX) destroying existing pipeline...\n");
1396 
1397  optixPipelineDestroy(pipe);
1398  pipe=nullptr;
1399  }
1400 
1401  context_destroy_raygen_pgms();
1402  context_destroy_miss_pgms();
1403  context_destroy_curve_hitgroup_pgms();
1404  context_destroy_hwtri_hitgroup_pgms();
1405  context_destroy_intersection_pgms();
1406  context_destroy_exception_pgms();
1407 
1408  regen_optix_pipeline=1;
1409  regen_optix_sbt=1;
1410 }
1411 
1412 
1413 
1414 void TachyonOptiX::SBT_create_programs() {
1415  DBG();
1416  PROFILE_PUSH_RANGE("TachyonOptiX::SBT_create_programs()", RTPROF_SBT);
1417 
1418  PROFILE_PUSH_RANGE("Upload SBT PGM Recs", RTPROF_SBT);
1419 
1420  // build exception records
1421  std::vector<ExceptionRecord> exceptionRecords;
1422  for (int i=0; i<exceptionPGs.size(); i++) {
1423  ExceptionRecord rec = {};
1424  optixSbtRecordPackHeader(exceptionPGs[i], &rec);
1425  rec.data = nullptr;
1426  exceptionRecords.push_back(rec);
1427  }
1428  exceptionRecordsBuffer.resize_upload(exceptionRecords, stream);
1429  sbt.exceptionRecord = exceptionRecordsBuffer.cu_dptr();
1430 
1431  // build raygen records
1432  std::vector<RaygenRecord> raygenRecords;
1433  for (int i=0; i<raygenPGs.size(); i++) {
1434  RaygenRecord rec = {};
1435  optixSbtRecordPackHeader(raygenPGs[i], &rec);
1436  rec.data = nullptr;
1437  raygenRecords.push_back(rec);
1438  }
1439  raygenRecordsBuffer.resize_upload(raygenRecords, stream);
1440  sbt.raygenRecord = raygenRecordsBuffer.cu_dptr();
1441 
1442  // build miss records
1443  std::vector<MissRecord> missRecords;
1444  for (int i=0; i<missPGs.size(); i++) {
1445  MissRecord rec = {};
1446  optixSbtRecordPackHeader(missPGs[i], &rec);
1447  rec.data = nullptr;
1448  missRecords.push_back(rec);
1449  }
1450  missRecordsBuffer.resize_upload(missRecords, stream);
1451  sbt.missRecordBase = missRecordsBuffer.cu_dptr();
1452  sbt.missRecordStrideInBytes = sizeof(MissRecord);
1453  sbt.missRecordCount = (int) missRecords.size();
1454 
1456 }
1457 
1458 
1459 void TachyonOptiX::SBT_create_hitgroups() {
1460  DBG();
1461  PROFILE_PUSH_RANGE("TachyonOptiX::SBT_create_hitgroups()", RTPROF_SBT);
1462 
1463  PROFILE_PUSH_RANGE("Upload SBT PGM Recs", RTPROF_SBT);
1464  // beginning of geometry-associated processing
1465  PROFILE_PUSH_RANGE("Pack Geom Recs", RTPROF_GEOM);
1466 
1467  // build hitgroup records
1468  // Note: SBT must not contain any NULLs, stubs must exist at least
1469  std::vector<HGRecordGroup> HGRecGroups;
1470 
1471  // Add cone arrays to SBT
1472  const int conePG = RT_CUST_PRIM_CONE * RT_RAY_TYPE_COUNT;
1473  int numCones = (int) conearrays.size();
1474  for (int objID=0; objID<numCones; objID++) {
1475  HGRecordGroup rec = {};
1476 
1477  // set primitive array data on the first SBT hitgroup record of the group
1478  auto &p = rec.radiance.data;
1479  p.cone.base = (float3 *) coneBaseBuffers[objID].cu_dptr();
1480  p.cone.apex = (float3 *) coneApexBuffers[objID].cu_dptr();
1481  p.cone.baserad = (float *) coneBaseRadBuffers[objID].cu_dptr();
1482  p.cone.apexrad = (float *) coneApexRadBuffers[objID].cu_dptr();
1483 
1484  // common geometry params
1485  p.prim_color = (float3 *) conePrimColorBuffers[objID].cu_dptr();
1486  p.uniform_color = conearrays[objID].uniform_color;
1487  p.materialindex = conearrays[objID].materialindex;
1488  p.geomflags = 0; // initialize geomflags to empty until updated later
1489 
1490  // replicate data to all records in the group
1491  rec.shadow = rec.radiance;
1492 
1493  // write record headers
1494  optixSbtRecordPackHeader(custprimPGs[conePG + RT_RAY_TYPE_RADIANCE], &rec.radiance);
1495  optixSbtRecordPackHeader(custprimPGs[conePG + RT_RAY_TYPE_SHADOW], &rec.shadow);
1496  HGRecGroups.push_back(rec);
1497  }
1498 
1499 
1500  // Add cylinder arrays to SBT
1501  const int cylPG = RT_CUST_PRIM_CYLINDER * RT_RAY_TYPE_COUNT;
1502  int numCyls = (int) cyarrays.size();
1503  for (int objID=0; objID<numCyls; objID++) {
1504  HGRecordGroup rec = {};
1505 
1506  // set primitive array data on the first SBT hitgroup record of the group
1507  auto &p = rec.radiance.data;
1508  p.cyl.start = (float3 *) cyStartBuffers[objID].cu_dptr();
1509  p.cyl.end = (float3 *) cyEndBuffers[objID].cu_dptr();
1510  p.cyl.radius = (float *) cyRadiusBuffers[objID].cu_dptr();
1511 
1512  // common geometry params
1513  p.prim_color = (float3 *) cyPrimColorBuffers[objID].cu_dptr();
1514  p.uniform_color = cyarrays[objID].uniform_color;
1515  p.materialindex = cyarrays[objID].materialindex;
1516  p.geomflags = 0; // initialize geomflags to empty until updated later
1517 
1518  // replicate data to all records in the group
1519  rec.shadow = rec.radiance;
1520 
1521  // write record headers
1522  optixSbtRecordPackHeader(custprimPGs[cylPG + RT_RAY_TYPE_RADIANCE], &rec.radiance);
1523  optixSbtRecordPackHeader(custprimPGs[cylPG + RT_RAY_TYPE_SHADOW], &rec.shadow);
1524  HGRecGroups.push_back(rec);
1525  }
1526 
1527 
1528  // Add quad meshes to SBT
1529  const int quadPG = RT_CUST_PRIM_QUAD * RT_RAY_TYPE_COUNT;
1530  int numQuads = (int) quadmeshes.size();
1531  for (int objID=0; objID<numQuads; objID++) {
1532  HGRecordGroup rec = {};
1533 
1534  // set primitive array data on the first SBT hitgroup record of the group
1535  auto &q = rec.radiance.data.quadmesh;
1536  q.vertices = (float3 *) quadMeshVertBuffers[objID].cu_dptr();
1537  q.indices = (int4 *) quadMeshIdxBuffers[objID].cu_dptr();
1538  q.normals = (float3 *) quadMeshVertNormalBuffers[objID].cu_dptr();
1539  q.packednormals = (uint4 *) quadMeshVertPackedNormalBuffers[objID].cu_dptr();
1540  q.vertcolors3f = (float3 *) quadMeshVertColor3fBuffers[objID].cu_dptr();
1541  q.vertcolors4u = (uchar4 *) quadMeshVertColor4uBuffers[objID].cu_dptr();
1542 
1543  // common geometry params
1544  auto &p = rec.radiance.data;
1545  p.prim_color = (float3 *) quadMeshPrimColorBuffers[objID].cu_dptr();
1546  p.uniform_color = quadmeshes[objID].uniform_color;
1547  p.materialindex = quadmeshes[objID].materialindex;
1548  p.geomflags = 0; // initialize geomflags to empty until updated later
1549 
1550  // replicate data to all records in the group
1551  rec.shadow = rec.radiance;
1552 
1553  // write record headers
1554  optixSbtRecordPackHeader(custprimPGs[quadPG + RT_RAY_TYPE_RADIANCE], &rec.radiance);
1555  optixSbtRecordPackHeader(custprimPGs[quadPG + RT_RAY_TYPE_SHADOW], &rec.shadow);
1556  HGRecGroups.push_back(rec);
1557  }
1558 
1559 
1560  // Add ring arrays to SBT
1561  const int ringPG = RT_CUST_PRIM_RING * RT_RAY_TYPE_COUNT;
1562  int numRings = (int) riarrays.size();
1563  for (int objID=0; objID<numRings; objID++) {
1564  HGRecordGroup rec = {};
1565 
1566  // set primitive array data on the first SBT hitgroup record of the group
1567  auto &p = rec.radiance.data;
1568  p.ring.center = (float3 *) riCenterBuffers[objID].cu_dptr();
1569  p.ring.norm = (float3 *) riNormalBuffers[objID].cu_dptr();
1570  p.ring.inrad = (float *) riInRadiusBuffers[objID].cu_dptr();
1571  p.ring.outrad = (float *) riOutRadiusBuffers[objID].cu_dptr();
1572 
1573  // common geometry params
1574  p.prim_color = (float3 *) riPrimColorBuffers[objID].cu_dptr();
1575  p.uniform_color = riarrays[objID].uniform_color;
1576  p.materialindex = riarrays[objID].materialindex;
1577  p.geomflags = 0; // initialize geomflags to empty until updated later
1578 
1579  // replicate data to all records in the group
1580  rec.shadow = rec.radiance;
1581 
1582  // write record headers
1583  optixSbtRecordPackHeader(custprimPGs[ringPG + RT_RAY_TYPE_RADIANCE], &rec.radiance);
1584  optixSbtRecordPackHeader(custprimPGs[ringPG + RT_RAY_TYPE_SHADOW], &rec.shadow);
1585  HGRecGroups.push_back(rec);
1586  }
1587 
1588 
1589  // Add sphere arrays to SBT
1590  const int spherePG = RT_CUST_PRIM_SPHERE * RT_RAY_TYPE_COUNT;
1591  int numSpheres = (int) sparrays.size();
1592  for (int objID=0; objID<numSpheres; objID++) {
1593  HGRecordGroup rec = {};
1594 
1595  // set primitive array data on the first SBT hitgroup record of the group
1596  auto &p = rec.radiance.data;
1597  p.sphere.PosRadius = (float4 *) spPosRadiusBuffers[objID].cu_dptr();
1598 
1599  // common geometry params
1600  p.prim_color = (float3 *) spPrimColorBuffers[objID].cu_dptr();
1601  p.uniform_color = sparrays[objID].uniform_color;
1602  p.materialindex = sparrays[objID].materialindex;
1603  p.geomflags = 0; // initialize geomflags to empty until updated later
1604 
1605  // replicate data to all records in the group
1606  rec.shadow = rec.radiance;
1607 
1608  // write record headers
1609  optixSbtRecordPackHeader(custprimPGs[spherePG + RT_RAY_TYPE_RADIANCE], &rec.radiance);
1610  optixSbtRecordPackHeader(custprimPGs[spherePG + RT_RAY_TYPE_SHADOW], &rec.shadow);
1611  HGRecGroups.push_back(rec);
1612  }
1613 
1614 
1615 #if OPTIX_VERSION >= 70100
1616  // Add curve arrays to SBT
1617  int numCurves = (int) curvearrays.size();
1618  for (int objID=0; objID<numCurves; objID++) {
1619  HGRecordGroup rec = {};
1620 
1621  // set primitive array data on the first SBT hitgroup record of the group
1622  auto &p = rec.radiance.data;
1623  p.curve.vertices = (float3 *) curveVertBuffers[objID].cu_dptr();
1624  p.curve.vertradii = (float *) curveVertRadBuffers[objID].cu_dptr();
1625  p.curve.segindices = (int *) curveSegIdxBuffers[objID].cu_dptr();
1626 
1627  // common geometry params
1628  p.prim_color = (float3 *) curvePrimColorBuffers[objID].cu_dptr();
1629  p.uniform_color = curvearrays[objID].uniform_color;
1630  p.materialindex = curvearrays[objID].materialindex;
1631  p.geomflags = 0; // initialize geomflags to empty until updated later
1632 
1633  // replicate data to all records in the group
1634  rec.shadow = rec.radiance;
1635 
1636  // write record headers
1637  optixSbtRecordPackHeader(curvePGs[RT_RAY_TYPE_RADIANCE], &rec.radiance);
1638  optixSbtRecordPackHeader(curvePGs[RT_RAY_TYPE_SHADOW], &rec.shadow);
1639  HGRecGroups.push_back(rec);
1640  }
1641 #endif
1642 
1643 
1644  // Add triangle meshes to SBT
1645  int numTrimeshes = (int) trimeshes.size();
1646  for (int objID=0; objID<numTrimeshes; objID++) {
1647  HGRecordGroup rec = {};
1648 
1649  // set primitive array data on the first SBT hitgroup record of the group
1650  auto &t = rec.radiance.data.trimesh;
1651  t.vertices = (float3 *) triMeshVertBuffers[objID].cu_dptr();
1652  t.indices = (int3 *) triMeshIdxBuffers[objID].cu_dptr();
1653  t.normals = (float3 *) triMeshVertNormalBuffers[objID].cu_dptr();
1654  t.packednormals = (uint4 *) triMeshVertPackedNormalBuffers[objID].cu_dptr();
1655  t.vertcolors3f = (float3 *) triMeshVertColor3fBuffers[objID].cu_dptr();
1656  t.vertcolors4u = (uchar4 *) triMeshVertColor4uBuffers[objID].cu_dptr();
1657  t.tex2d = (float2 *) triMeshTex2dBuffers[objID].cu_dptr();
1658  t.tex3d = (float3 *) triMeshTex3dBuffers[objID].cu_dptr();
1659 
1660  // common geometry params
1661  auto &p = rec.radiance.data;
1662  p.prim_color = (float3 *) triMeshPrimColorBuffers[objID].cu_dptr();
1663  p.uniform_color = trimeshes[objID].uniform_color;
1664  p.materialindex = trimeshes[objID].materialindex;
1665  p.geomflags = 0; // initialize geomflags to empty until updated later
1666 
1667  // replicate data to all records in the group
1668  rec.shadow = rec.radiance;
1669 
1670  // write record headers
1671  optixSbtRecordPackHeader(trimeshPGs[RT_RAY_TYPE_RADIANCE], &rec.radiance);
1672  optixSbtRecordPackHeader(trimeshPGs[RT_RAY_TYPE_SHADOW], &rec.shadow);
1673  HGRecGroups.push_back(rec);
1674  }
1675 
1676  PROFILE_STREAM_SYNC_PRETTY(stream); // sync only for clearer profile traces
1677 
1678  // end of geometry-associated work
1680 
1681  PROFILE_PUSH_RANGE("Upload SBT", RTPROF_GEOM);
1682 
1683  // upload and set the final SBT hitgroup array
1684  int hgsz = hitgroupRecordGroups.size();
1685  int hgrgsz = HGRecGroups.size();
1686  if (hgrgsz > 0) {
1687  // temporarily append the contents of HGRecGroups to hitgroupRecordGroups
1688  // so they are also included in the SBT
1689  // pre-grow hitgroupRecordGroups to final size prior to append loop...
1690  if (hitgroupRecordGroups.capacity() < (hgsz+hgrgsz))
1691  hitgroupRecordGroups.reserve(hgsz+hgrgsz);
1692 
1693  // append HGRecGroups and upload the final HG record list to the GPU
1694  for (auto &r: HGRecGroups) {
1695  hitgroupRecordGroups.push_back(r);
1696  }
1697  }
1698 
1699  // update SBT hitgroup geomflags from materials before upload
1700  SBT_update_hitgroup_geomflags();
1701 
1702  hitgroupRecordsBuffer.resize_upload(hitgroupRecordGroups, stream);
1703  sync_hitgroupRecordGroups = 0;
1704 
1705  sbt.hitgroupRecordBase = hitgroupRecordsBuffer.cu_dptr();
1706  sbt.hitgroupRecordStrideInBytes = sizeof(HGRecord);
1707 
1708  // Each HGRecordGroup contains RT_RAY_TYPE_COUNT HGRecords, so we multiply
1709  // the vector size by RT_RAY_TYPE_COUNT to get the total HG record count
1710  sbt.hitgroupRecordCount = (int) hitgroupRecordGroups.size()*RT_RAY_TYPE_COUNT;
1711 
1712  if (hgrgsz > 0) {
1713  // delete temporarily appended HGRecGroups records
1714  hitgroupRecordGroups.erase(hitgroupRecordGroups.begin()+hgsz,
1715  hitgroupRecordGroups.end());
1716  }
1717 
1718  cudaStreamSynchronize(stream);
1719  regen_optix_sbt=0;
1720 
1723 }
1724 
1725 
1726 void TachyonOptiX::SBT_clear() {
1727  DBG();
1728  PROFILE_PUSH_RANGE("TachyonOptix::SBT_clear", RTPROF_SBT);
1729 
1730  // set the host-side buffer sizes to zero length, but retain the
1731  // GPU device-side memory allocations so they can be reused rather
1732  // than forcing complete reallocation in the (very likely) cases that
1733  // the new contents are very close or identical in size.
1734  exceptionRecordsBuffer.clear_persist_allocation();
1735  raygenRecordsBuffer.clear_persist_allocation();
1736  missRecordsBuffer.clear_persist_allocation();
1737  hitgroupRecordsBuffer.clear_persist_allocation();
1738 
1739  regen_optix_sbt=1;
1740 
1741  PROFILE_STREAM_SYNC_PRETTY(stream); // sync only for clearer profile traces
1742 
1744 }
1745 
1746 
1747 void TachyonOptiX::SBT_destroy() {
1748  DBG();
1749  PROFILE_PUSH_RANGE("TachyonOptix::SBT_destroy", RTPROF_SBT);
1750 
1751  // actually free all GPU device memory allocations
1752  exceptionRecordsBuffer.free(stream);
1753  raygenRecordsBuffer.free(stream);
1754  missRecordsBuffer.free(stream);
1755  hitgroupRecordsBuffer.free(stream);
1756 
1757  // clear to all zeroes to ensure no possibility of accidental reuse
1758  memset((void *) &sbt, 0, sizeof(sbt));
1759  regen_optix_sbt=1;
1760 
1761  PROFILE_STREAM_SYNC_PRETTY(stream); // sync only for clearer profile traces
1762 
1764 }
1765 
1766 
1767 void TachyonOptiX::SBT_update_hitgroup_geomflags() {
1768  DBG();
1769  PROFILE_PUSH_RANGE("TachyonOptix::SBT_update_geomflags", RTPROF_SBT);
1770 
1771 #if defined(TACHYON_USE_GEOMFLAGS)
1772  // update every hitgroup record's geomflags from the latest
1773  // material list, along with geometry color/texture mode encodings, etc.
1774  for (auto &g: hitgroupRecordGroups) {
1775  // copy material flags into lowest bits of geomflags
1776  int matidx = g.radiance.data.materialindex;
1777  int matflags = materialcache[matidx].matflags;
1778  g.radiance.data.geomflags = matflags;
1779  g.shadow.data.geomflags = matflags;
1780  }
1781 
1782  sync_hitgroupRecordGroups = 1;
1783 #endif
1784 
1786 }
1787 
1788 
1789 
1790 void TachyonOptiX::AABB_cone_array(CUMemBuf &aabbBuffer,
1791  const float3 *base, const float3 *apex,
1792  const float *brad, const float *arad,
1793  int primcnt) {
1794  // XXX AABB calcs should be done in CUDA on the GPU...
1795  std::vector<OptixAabb> hostAabb(primcnt); // temp array for aabb generation
1796  for (int i=0; i<primcnt; i++) {
1797  auto &b = base[i];
1798  auto &a = apex[i];
1799  float baserad = brad[i];
1800  float apexrad = arad[i];
1801 
1802  hostAabb[i].minX = fminf(b.x - baserad, a.x - apexrad);
1803  hostAabb[i].minY = fminf(b.y - baserad, a.y - apexrad);
1804  hostAabb[i].minZ = fminf(b.z - baserad, a.z - apexrad);
1805  hostAabb[i].maxX = fmaxf(b.x + baserad, a.x + apexrad);
1806  hostAabb[i].maxY = fmaxf(b.y + baserad, a.y + apexrad);
1807  hostAabb[i].maxZ = fmaxf(b.z + baserad, a.z + apexrad);
1808  }
1809 
1810  aabbBuffer.resize_upload(hostAabb);
1811 }
1812 
1813 
1814 void TachyonOptiX::AABB_cylinder_array(CUMemBuf &aabbBuffer,
1815  const float3 *base, const float3 *apex,
1816  const float *rads, int primcnt) {
1817  // XXX AABB calcs should be done in CUDA on the GPU...
1818  std::vector<OptixAabb> hostAabb(primcnt); // temp array for aabb generation
1819  for (int i=0; i<primcnt; i++) {
1820  auto &b = base[i];
1821  auto &a = apex[i];
1822  float rad = rads[i];
1823 
1824  hostAabb[i].minX = fminf(b.x - rad, a.x - rad);
1825  hostAabb[i].minY = fminf(b.y - rad, a.y - rad);
1826  hostAabb[i].minZ = fminf(b.z - rad, a.z - rad);
1827  hostAabb[i].maxX = fmaxf(b.x + rad, a.x + rad);
1828  hostAabb[i].maxY = fmaxf(b.y + rad, a.y + rad);
1829  hostAabb[i].maxZ = fmaxf(b.z + rad, a.z + rad);
1830  }
1831 
1832  aabbBuffer.resize_upload(hostAabb);
1833 }
1834 
1835 
1836 void TachyonOptiX::AABB_quadmesh(CUMemBuf &aabbBuffer, const float3 *verts,
1837  const int4 *indices, int primcnt) {
1838  // XXX AABB calcs should be done in CUDA on the GPU...
1839  std::vector<OptixAabb> hostAabb(primcnt); // temp array for aabb generation
1840  if (indices == NULL) {
1841  for (int i=0; i<primcnt; i++) {
1842  int idx4 = i*4;
1843  OptixAabb bbox;
1844  float3 tmp = verts[idx4];
1845  bbox.minX = tmp.x;
1846  bbox.minY = tmp.y;
1847  bbox.minZ = tmp.z;
1848  bbox.maxX = tmp.x;
1849  bbox.maxY = tmp.y;
1850  bbox.maxZ = tmp.z;
1851 
1852  tmp = verts[idx4+1];
1853  bbox.minX = fminf(bbox.minX, tmp.x);
1854  bbox.minY = fminf(bbox.minY, tmp.y);
1855  bbox.minZ = fminf(bbox.minZ, tmp.z);
1856  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1857  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1858  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1859 
1860  tmp = verts[idx4+2];
1861  bbox.minX = fminf(bbox.minX, tmp.x);
1862  bbox.minY = fminf(bbox.minY, tmp.y);
1863  bbox.minZ = fminf(bbox.minZ, tmp.z);
1864  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1865  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1866  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1867 
1868  tmp = verts[idx4+3];
1869  bbox.minX = fminf(bbox.minX, tmp.x);
1870  bbox.minY = fminf(bbox.minY, tmp.y);
1871  bbox.minZ = fminf(bbox.minZ, tmp.z);
1872  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1873  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1874  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1875 
1876  hostAabb[i] = bbox;
1877  }
1878  } else {
1879  for (int i=0; i<primcnt; i++) {
1880  int4 index = indices[i];
1881  OptixAabb bbox;
1882  float3 tmp = verts[index.x];
1883  bbox.minX = tmp.x;
1884  bbox.minY = tmp.y;
1885  bbox.minZ = tmp.z;
1886  bbox.maxX = tmp.x;
1887  bbox.maxY = tmp.y;
1888  bbox.maxZ = tmp.z;
1889 
1890  tmp = verts[index.y];
1891  bbox.minX = fminf(bbox.minX, tmp.x);
1892  bbox.minY = fminf(bbox.minY, tmp.y);
1893  bbox.minZ = fminf(bbox.minZ, tmp.z);
1894  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1895  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1896  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1897 
1898  tmp = verts[index.z];
1899  bbox.minX = fminf(bbox.minX, tmp.x);
1900  bbox.minY = fminf(bbox.minY, tmp.y);
1901  bbox.minZ = fminf(bbox.minZ, tmp.z);
1902  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1903  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1904  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1905 
1906  tmp = verts[index.w];
1907  bbox.minX = fminf(bbox.minX, tmp.x);
1908  bbox.minY = fminf(bbox.minY, tmp.y);
1909  bbox.minZ = fminf(bbox.minZ, tmp.z);
1910  bbox.maxX = fmaxf(bbox.maxX, tmp.x);
1911  bbox.maxY = fmaxf(bbox.maxY, tmp.y);
1912  bbox.maxZ = fmaxf(bbox.maxZ, tmp.z);
1913 
1914  hostAabb[i] = bbox;
1915  }
1916  }
1917 
1918  aabbBuffer.resize_upload(hostAabb);
1919 }
1920 
1921 
1922 void TachyonOptiX::AABB_ring_array(CUMemBuf &aabbBuffer,
1923  const float3 *pos, const float *rads,
1924  int primcnt) {
1925  // XXX AABB calcs should be done in CUDA on the GPU...
1926  std::vector<OptixAabb> hostAabb(primcnt); // temp array for aabb generation
1927  for (int i=0; i<primcnt; i++) {
1928  float rad = rads[i];
1929  hostAabb[i].minX = pos[i].x - rad;
1930  hostAabb[i].minY = pos[i].y - rad;
1931  hostAabb[i].minZ = pos[i].z - rad;
1932  hostAabb[i].maxX = pos[i].x + rad;
1933  hostAabb[i].maxY = pos[i].y + rad;
1934  hostAabb[i].maxZ = pos[i].z + rad;
1935  }
1936 
1937  aabbBuffer.resize_upload(hostAabb);
1938 }
1939 
1940 
1941 void TachyonOptiX::AABB_sphere_array(CUMemBuf &aabbBuffer,
1942  const float3 *pos, const float *rads,
1943  int primcnt) {
1944  // XXX AABB calcs should be done in CUDA on the GPU...
1945  std::vector<OptixAabb> hostAabb(primcnt); // temp array for aabb generation
1946  for (int i=0; i<primcnt; i++) {
1947  float rad = rads[i];
1948  hostAabb[i].minX = pos[i].x - rad;
1949  hostAabb[i].minY = pos[i].y - rad;
1950  hostAabb[i].minZ = pos[i].z - rad;
1951  hostAabb[i].maxX = pos[i].x + rad;
1952  hostAabb[i].maxY = pos[i].y + rad;
1953  hostAabb[i].maxZ = pos[i].z + rad;
1954  }
1955 
1956  aabbBuffer.resize_upload(hostAabb);
1957 }
1958 
1959 
1960 
1961 void TachyonOptiX::AS_buildinp_AABB(OptixBuildInput &asInp,
1962  CUdeviceptr *aabbptr,
1963  uint32_t *flagptr, int primcnt) {
1964  asInp = {};
1965  asInp.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1966 
1967  // device custom primitive buffers
1968 #if (OPTIX_VERSION >= 70100)
1969  auto &primArray = asInp.customPrimitiveArray;
1970 #else
1971  auto &primArray = asInp.aabbArray;
1972 #endif
1973 
1974  primArray.aabbBuffers = aabbptr;
1975  primArray.numPrimitives = primcnt;
1976  primArray.strideInBytes = 0; // tight-packed, sizeof(OptixAabb)
1977 
1978  // Ensure that anyhit is called only once for transparency handling
1979  *flagptr = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1980  primArray.flags = flagptr;
1981 
1982  primArray.numSbtRecords = 1;
1983  primArray.sbtIndexOffsetBuffer = 0; // No per-primitive record
1984  primArray.sbtIndexOffsetSizeInBytes = 0;
1985  primArray.sbtIndexOffsetStrideInBytes = 0;
1986  primArray.primitiveIndexOffset = 0;
1987 }
1988 
1989 
1990 int TachyonOptiX::build_GAS(std::vector<OptixBuildInput> asInp,
1991  CUMemBuf &ASTmpBuf,
1992  CUMemBuf &GASbuffer,
1993  uint64_t *d_ASCompactedSize,
1994  OptixTraversableHandle &tvh,
1995  cudaStream_t GASstream) {
1996  PROFILE_PUSH_RANGE("TachyonOptiX::build_GAS()", RTPROF_ACCEL);
1997  const int arrayCount = asInp.size();
1998 
1999  // BLAS setup
2000  OptixAccelBuildOptions asOpts = {};
2001  asOpts.motionOptions.numKeys = 1;
2002  asOpts.buildFlags = OPTIX_BUILD_FLAG_NONE |
2003  OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
2004  asOpts.operation = OPTIX_BUILD_OPERATION_BUILD;
2005 
2006  OptixAccelBufferSizes blasBufSizes = {};
2007  optixAccelComputeMemoryUsage(optix_ctx, &asOpts, asInp.data(),
2008  arrayCount, &blasBufSizes);
2009 
2010  // prepare compaction
2011  OptixAccelEmitDesc emitDesc = {};
2012  emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
2013  emitDesc.result = (CUdeviceptr) d_ASCompactedSize; // uint64_t in GPU device memory
2014 
2015  //
2016  // execute build (main stage)
2017  //
2018 
2019  // If we already have an existing temp buffer of required size
2020  // we use it as-is to avoid paying the reallocation time cost
2021  if (ASTmpBuf.get_size() < blasBufSizes.tempSizeInBytes)
2022  ASTmpBuf.set_size(blasBufSizes.tempSizeInBytes, GASstream);
2023 
2024  CUMemBuf outputBuffer;
2025  outputBuffer.set_size(blasBufSizes.outputSizeInBytes, GASstream);
2026 
2027  if (verbose == RT_VERB_DEBUG) {
2028  printf("TachyonOptiX) GAS/BLAS buffer sizes: temp %d output %d\n",
2029  blasBufSizes.tempSizeInBytes, blasBufSizes.outputSizeInBytes);
2030  }
2031 
2032  tvh = {}; // clear traversable handle
2033  optixAccelBuild(optix_ctx, GASstream, &asOpts, asInp.data(), arrayCount,
2034  ASTmpBuf.cu_dptr(), ASTmpBuf.get_size(),
2035  outputBuffer.cu_dptr(), outputBuffer.get_size(),
2036  &tvh, &emitDesc, 1);
2037 
2038  cudaStreamSynchronize(GASstream);
2039 
2040  // XXX compaction should only be performed when (compactedSize < outputSize)
2041  // to avoid the extra compacting pass when it is not beneficial,
2042  // but this still requires the GPU-Host copy of compactedSize,
2043  // and we'll have to be able to swap device pointers easily.
2044 
2045  // fetch compactedSize back to host
2046  uint64_t compactedSize = 0;
2047  cudaMemcpyAsync(&compactedSize, d_ASCompactedSize, sizeof(uint64_t),
2048  cudaMemcpyDeviceToHost, GASstream);
2049  if (verbose == RT_VERB_DEBUG) {
2050  cudaStreamSynchronize(GASstream);
2051  printf("TachyonOptiX) GAS/BLAS compacted size: %ld\n", compactedSize);
2052  }
2053 
2054  // perform compaction
2055  GASbuffer.set_size(compactedSize, GASstream);
2056  optixAccelCompact(optix_ctx, GASstream, tvh,
2057  GASbuffer.cu_dptr(), GASbuffer.get_size(), &tvh);
2058 
2059  // at this point, the final compacted AS is stored in the final Buffer
2060  // and the returned traversable--ephemeral data can be destroyed...
2061  outputBuffer.free(GASstream);
2062 
2063  cudaStreamSynchronize(GASstream);
2064 // PROFILE_STREAM_SYNC_PRETTY(GASstream); // sync only for clearer profile traces
2065 
2067  return 0;
2068 }
2069 
2070 
2071 int TachyonOptiX::build_IAS(std::vector<OptixBuildInput> asInp,
2072  CUMemBuf &ASTmpBuf,
2073  CUMemBuf &IASbuf,
2074  OptixTraversableHandle &tvh,
2075  cudaStream_t IASstream) {
2076  const int arrayCount = asInp.size();
2077  PROFILE_PUSH_RANGE("TachyonOptiX::build_IAS()", RTPROF_ACCEL);
2078 
2079  // TLAS setup
2080  OptixAccelBuildOptions asOpts = {};
2081  asOpts.buildFlags = OPTIX_BUILD_FLAG_NONE;
2082  asOpts.operation = OPTIX_BUILD_OPERATION_BUILD;
2083 
2084  OptixAccelBufferSizes tlasBufSizes = {};
2085  optixAccelComputeMemoryUsage(optix_ctx, &asOpts, asInp.data(),
2086  arrayCount, &tlasBufSizes);
2087 
2088  // execute build (main stage)
2089 
2090  // If we already have an existing temp buffer of required size
2091  // we use it as-is to avoid paying the reallocation time cost
2092  if (ASTmpBuf.get_size() < tlasBufSizes.tempSizeInBytes)
2093  ASTmpBuf.set_size(tlasBufSizes.tempSizeInBytes, IASstream);
2094 
2095  // If we already have an existing IAS buffer of required size
2096  // we use it as-is to avoid paying the reallocation time cost
2097  if (IASbuf.get_size() < tlasBufSizes.outputSizeInBytes)
2098  IASbuf.set_size(tlasBufSizes.outputSizeInBytes, IASstream);
2099 
2100  if (verbose == RT_VERB_DEBUG) {
2101  printf("TachyonOptiX) IAS/TLAS buffer sizes: temp %d output %d\n",
2102  tlasBufSizes.tempSizeInBytes, tlasBufSizes.outputSizeInBytes);
2103  }
2104 
2105  tvh = {}; // clear traversable handle
2106  optixAccelBuild(optix_ctx, IASstream, &asOpts, asInp.data(), arrayCount,
2107  ASTmpBuf.cu_dptr(), ASTmpBuf.get_size(),
2108  IASbuf.cu_dptr(), IASbuf.get_size(),
2109  &tvh, nullptr, 0);
2110 
2111  cudaStreamSynchronize(IASstream);
2112 // PROFILE_STREAM_SYNC_PRETTY(IASstream); // sync only for clearer profile traces
2113 
2115  return 0;
2116 }
2117 
2118 
2119 OptixTraversableHandle TachyonOptiX::build_curves_GAS() {
2120  DBG();
2121  PROFILE_PUSH_RANGE("TachyonOptiX::build_curves_GAS()", RTPROF_ACCEL);
2122 
2123  OptixTraversableHandle asHandle { 0 };
2124 
2125 #if OPTIX_VERSION >= 70100
2126  const int arrayCount = curvearrays.size();
2127 
2128  // RTX triangle inputs, preset vector sizes
2129  // AS build will consume device pointers, so when these
2130  // are freed, we should be destroying the associated AS
2131  curveVertBuffers.resize(arrayCount);
2132  curveVertRadBuffers.resize(arrayCount);
2133  curveSegIdxBuffers.resize(arrayCount);
2134 // curveVertColor3fBuffers.resize(arrayCount);
2135 // curveVertColor4uBuffers.resize(arrayCount);
2136  curvePrimColorBuffers.resize(arrayCount);
2137 
2138  // store per-curve data in arrays so we can dereference and
2139  // submit as a single-element array below
2140  std::vector<OptixBuildInput> asCurveInp(arrayCount);
2141  std::vector<CUdeviceptr> d_vertices(arrayCount);
2142  std::vector<CUdeviceptr> d_vertrads(arrayCount);
2143  std::vector<uint32_t> asCurveInpFlags(arrayCount);
2144 
2145  // loop over geom buffers and incorp into AS build...
2146  // Uploads each curve to the GPU before building AS,
2147  // stores resulting device pointers in lists, and
2148  // prepares OptixBuildInput records containing the
2149  // resulting device pointers, primitive counts, and flags.
2150  for (int i=0; i<arrayCount; i++) {
2151  CurveArray &model = curvearrays[i];
2152  curveVertBuffers[i].resize_upload(model.vertices);
2153  curveVertRadBuffers[i].resize_upload(model.vertradii);
2154  curveSegIdxBuffers[i].resize_upload(model.segindices);
2155 
2156  // optional buffers
2157 // curveVertColor3fBuffers[i].free();
2158 // curveVertColor4uBuffers[i].free();
2159  curvePrimColorBuffers[i].resize_upload(model.primcolors3f);
2160 
2161  asCurveInp[i] = {};
2162  asCurveInp[i].type = OPTIX_BUILD_INPUT_TYPE_CURVES;
2163 
2164  d_vertices[i] = curveVertBuffers[i].cu_dptr(); // host array of dev ptrs...
2165  d_vertrads[i] = curveVertRadBuffers[i].cu_dptr(); // host array of dev ptrs...
2166 
2167  // device curve buffers
2168  auto &curveArray = asCurveInp[i].curveArray;
2169  curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR;
2170 #if OPTIX_VERSION >= 70400
2171 // curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
2172 #endif
2173 
2174  // device curve vertex buffer
2175 // curveArray.numPrimitives = (int)model.segindices.size();
2176  curveArray.numPrimitives = 1; // num segments
2177  curveArray.vertexBuffers = &d_vertices[i];
2178  curveArray.numVertices = 2; // (int)model.vertices.size();
2179  curveArray.vertexStrideInBytes = sizeof(float3);
2180 
2181  // device curve width/radii buffer
2182  curveArray.widthBuffers = &d_vertrads[i];
2183  curveArray.widthStrideInBytes = sizeof(float);
2184 
2185  // device curve normal buffer
2186  // normal buffers are unused in OptiX versions <= 7.4
2187  curveArray.normalBuffers = NULL;
2188  curveArray.normalStrideInBytes = 0;
2189 
2190  // device curve index buffer
2191  curveArray.indexBuffer = curveSegIdxBuffers[i].cu_dptr();
2192  curveArray.indexStrideInBytes = sizeof(int);
2193 
2194  // Ensure that anyhit is called only once for transparency handling
2195  asCurveInpFlags[i] = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
2196  curveArray.flag = asCurveInpFlags[i];
2197  curveArray.primitiveIndexOffset = 0;
2198 
2199 #if OPTIX_VERSION >= 70400
2200  curveArray.endcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT;
2201 // curveArray.endcapFlags = OPTIX_CURVE_ENDCAP_ON;
2202 #endif
2203  }
2204 
2205  build_GAS(asCurveInp, ASTempBuffer, curvesGASBuffer,
2206  (uint64_t *) compactedSizeBuffer.cu_dptr(), asHandle, stream);
2207 #endif
2208 
2210  return asHandle;
2211 }
2212 
2213 
2214 OptixTraversableHandle TachyonOptiX::build_trimeshes_GAS() {
2215  DBG();
2216  PROFILE_PUSH_RANGE("TachyonOptiX::build_trimeshes_GAS()", RTPROF_ACCEL);
2217 
2218  PROFILE_PUSH_RANGE("Trimesh Upload, AS Input", RTPROF_GEOM);
2219  const int arrayCount = trimeshes.size();
2220 
2221  // RTX triangle inputs, preset vector sizes
2222  // AS build will consume device pointers, so when these
2223  // are freed, we should be destroying the associated AS
2224  triMeshVertBuffers.resize(arrayCount);
2225  triMeshIdxBuffers.resize(arrayCount);
2226  triMeshVertNormalBuffers.resize(arrayCount);
2227  triMeshVertPackedNormalBuffers.resize(arrayCount);
2228  triMeshVertColor3fBuffers.resize(arrayCount);
2229  triMeshVertColor4uBuffers.resize(arrayCount);
2230  triMeshPrimColorBuffers.resize(arrayCount);
2231  triMeshTex2dBuffers.resize(arrayCount);
2232  triMeshTex3dBuffers.resize(arrayCount);
2233 
2234  std::vector<OptixBuildInput> asTriInp(arrayCount);
2235  std::vector<CUdeviceptr> d_vertices(arrayCount);
2236  std::vector<uint32_t> asTriInpFlags(arrayCount);
2237 
2238  // loop over geom buffers and incorp into AS build...
2239  // Uploads each mesh to the GPU before building AS,
2240  // stores resulting device pointers in lists, and
2241  // prepares OptixBuildInput records containing the
2242  // resulting device pointers, primitive counts, and flags.
2243  for (int i=0; i<arrayCount; i++) {
2244  TriangleMesh &model = trimeshes[i];
2245  triMeshVertBuffers[i].resize_upload(model.vertices, stream);
2246  triMeshIdxBuffers[i].resize_upload(model.indices, stream); // optional
2247 
2248  // optional buffers
2249  triMeshVertNormalBuffers[i].resize_upload(model.normals, stream);
2250  triMeshVertPackedNormalBuffers[i].resize_upload(model.packednormals, stream);
2251  triMeshVertColor3fBuffers[i].resize_upload(model.vertcolors3f, stream);
2252  triMeshVertColor4uBuffers[i].resize_upload(model.vertcolors4u, stream);
2253  triMeshPrimColorBuffers[i].resize_upload(model.primcolors3f, stream);
2254  triMeshTex2dBuffers[i].resize_upload(model.tex2d, stream);
2255  triMeshTex3dBuffers[i].resize_upload(model.tex3d, stream);
2256  cudaStreamSynchronize(stream);
2257 
2258  asTriInp[i] = {};
2259  asTriInp[i].type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
2260 
2261  d_vertices[i] = triMeshVertBuffers[i].cu_dptr(); // host array of dev ptrs...
2262 
2263  // device triangle mesh buffers
2264  auto &triArray = asTriInp[i].triangleArray;
2265 
2266  // device trimesh vertex buffer
2267  triArray.vertexBuffers = &d_vertices[i];
2268  triArray.numVertices = (int)model.vertices.size();
2269  triArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
2270  triArray.vertexStrideInBytes = sizeof(float3);
2271 
2272  // optional device trimesh index buffer
2273  if (model.indices.size() > 0) {
2274  triArray.indexBuffer = triMeshIdxBuffers[i].cu_dptr();
2275  triArray.numIndexTriplets = (int)model.indices.size();
2276  triArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
2277  triArray.indexStrideInBytes = sizeof(int3);
2278  } else {
2279  triArray.indexBuffer = 0;
2280  triArray.numIndexTriplets = 0;
2281 #if OPTIX_VERSION >= 70100
2282  triArray.indexFormat = OPTIX_INDICES_FORMAT_NONE;
2283 #endif
2284  triArray.indexStrideInBytes = 0;
2285  }
2286  triArray.preTransform = 0; // no xform matrix
2287 
2288  // Ensure that anyhit is called only once for transparency handling
2289  asTriInpFlags[i] = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
2290  triArray.flags = &asTriInpFlags[i];
2291 
2292  triArray.numSbtRecords = 1;
2293  triArray.sbtIndexOffsetBuffer = 0;
2294  triArray.sbtIndexOffsetSizeInBytes = 0;
2295  triArray.sbtIndexOffsetStrideInBytes = 0;
2296  triArray.primitiveIndexOffset = 0;
2297  }
2298 
2300 
2301  OptixTraversableHandle asHandle;
2302  build_GAS(asTriInp, ASTempBuffer, trimeshesGASBuffer,
2303  (uint64_t *) compactedSizeBuffer.cu_dptr(), asHandle, stream);
2304 
2306  return asHandle;
2307 }
2308 
2309 
2310 OptixTraversableHandle TachyonOptiX::build_custprims_GAS() {
2311  DBG();
2312  PROFILE_PUSH_RANGE("TachyonOptiX::build_custprims_GAS()", RTPROF_ACCEL);
2313 
2314  // RTX custom primitive inputs
2315  // AS build will consume device pointers, so when these
2316  // are freed, we should be destroying the associated AS
2317 
2318  const int coneCount = conearrays.size();
2319  coneBaseBuffers.resize(coneCount);
2320  coneApexBuffers.resize(coneCount);
2321  coneBaseRadBuffers.resize(coneCount);
2322  coneApexRadBuffers.resize(coneCount);
2323  conePrimColorBuffers.resize(coneCount);
2324  coneAabbBuffers.resize(coneCount);
2325 
2326  const int cyCount = cyarrays.size();
2327  cyStartBuffers.resize(cyCount);
2328  cyEndBuffers.resize(cyCount);
2329  cyRadiusBuffers.resize(cyCount);
2330  cyPrimColorBuffers.resize(cyCount);
2331  cyAabbBuffers.resize(cyCount);
2332 
2333  const int quadCount = quadmeshes.size();
2334  quadMeshVertBuffers.resize(quadCount);
2335  quadMeshIdxBuffers.resize(quadCount);
2336  quadMeshVertNormalBuffers.resize(quadCount);
2337  quadMeshVertPackedNormalBuffers.resize(quadCount);
2338  quadMeshVertColor3fBuffers.resize(quadCount);
2339  quadMeshVertColor4uBuffers.resize(quadCount);
2340  quadMeshPrimColorBuffers.resize(quadCount);
2341  quadMeshAabbBuffers.resize(quadCount);
2342 
2343  const int riCount = riarrays.size();
2344  riCenterBuffers.resize(riCount);
2345  riNormalBuffers.resize(riCount);
2346  riInRadiusBuffers.resize(riCount);
2347  riOutRadiusBuffers.resize(riCount);
2348  riPrimColorBuffers.resize(riCount);
2349  riAabbBuffers.resize(riCount);
2350 
2351  const int spCount = sparrays.size();
2352  spPosRadiusBuffers.resize(spCount);
2353  spPrimColorBuffers.resize(spCount);
2354  spAabbBuffers.resize(spCount);
2355 
2356  const int arrayCount = coneCount + cyCount + quadCount + riCount + spCount;
2357 
2358  std::vector<OptixBuildInput> asInp(arrayCount);
2359  std::vector<CUdeviceptr> d_aabb(arrayCount);
2360  std::vector<uint32_t> asInpFlags(arrayCount);
2361 
2362  // loop over geom buffers and incorp into AS build...
2363  // Uploads each mesh to the GPU before building AS,
2364  // stores resulting device pointers in lists, and
2365  // prepares OptixBuildInput records containing the
2366  // resulting device pointers, primitive counts, and flags.
2367  int bufIdx = 0;
2368 
2369  // Cones...
2370  for (int i=0; i<coneCount; i++) {
2371  ConeArray &m = conearrays[i];
2372  coneBaseBuffers[i].resize_upload(m.base);
2373  coneApexBuffers[i].resize_upload(m.apex);
2374  coneBaseRadBuffers[i].resize_upload(m.baserad);
2375  coneApexRadBuffers[i].resize_upload(m.apexrad);
2376  conePrimColorBuffers[i].resize_upload(m.primcolors3f);
2377 
2378  int primcnt = m.base.size();
2379  AABB_cone_array(coneAabbBuffers[i], m.base.data(), m.apex.data(),
2380  m.baserad.data(), m.apexrad.data(), primcnt);
2381  int bidx = bufIdx + i;
2382  d_aabb[bidx] = coneAabbBuffers[i].cu_dptr();
2383  AS_buildinp_AABB(asInp[bidx], &d_aabb[bidx], &asInpFlags[bidx], primcnt);
2384  }
2385  bufIdx += coneCount;
2386 
2387  // Cylinders...
2388  for (int i=0; i<cyCount; i++) {
2389  CylinderArray &m = cyarrays[i];
2390  cyStartBuffers[i].resize_upload(m.start);
2391  cyEndBuffers[i].resize_upload(m.end);
2392  cyRadiusBuffers[i].resize_upload(m.radius);
2393  cyPrimColorBuffers[i].resize_upload(m.primcolors3f);
2394 
2395  int primcnt = m.radius.size();
2396  AABB_cylinder_array(cyAabbBuffers[i], m.start.data(), m.end.data(),
2397  m.radius.data(), primcnt);
2398  int bidx = bufIdx + i;
2399  d_aabb[bidx] = cyAabbBuffers[i].cu_dptr();
2400  AS_buildinp_AABB(asInp[bidx], &d_aabb[bidx], &asInpFlags[bidx], primcnt);
2401  }
2402  bufIdx += cyCount;
2403 
2404  // Quads...
2405  for (int i=0; i<quadCount; i++) {
2406  QuadMesh &m = quadmeshes[i];
2407  quadMeshVertBuffers[i].resize_upload(m.vertices);
2408  quadMeshIdxBuffers[i].resize_upload(m.indices);
2409  quadMeshVertNormalBuffers[i].resize_upload(m.normals);
2410  quadMeshVertPackedNormalBuffers[i].resize_upload(m.packednormals);
2411  quadMeshVertColor3fBuffers[i].resize_upload(m.vertcolors3f);
2412  quadMeshVertColor4uBuffers[i].resize_upload(m.vertcolors4u);
2413  quadMeshPrimColorBuffers[i].resize_upload(m.primcolors3f);
2414 
2415  int primcnt = (m.indices.size() > 0) ? m.indices.size() : (m.vertices.size() / 4);
2416  AABB_quadmesh(quadMeshAabbBuffers[i], m.vertices.data(), m.indices.data(), primcnt);
2417  int bidx = bufIdx + i;
2418  d_aabb[bidx] = quadMeshAabbBuffers[i].cu_dptr();
2419  AS_buildinp_AABB(asInp[bidx], &d_aabb[bidx], &asInpFlags[bidx], primcnt);
2420  }
2421  bufIdx += quadCount;
2422 
2423  // Rings...
2424  for (int i=0; i<riCount; i++) {
2425  RingArray &m = riarrays[i];
2426  riCenterBuffers[i].resize_upload(m.center);
2427  riNormalBuffers[i].resize_upload(m.normal);
2428  riInRadiusBuffers[i].resize_upload(m.inrad);
2429  riOutRadiusBuffers[i].resize_upload(m.outrad);
2430  riPrimColorBuffers[i].resize_upload(m.primcolors3f);
2431 
2432  int primcnt = m.outrad.size();
2433  AABB_ring_array(riAabbBuffers[i], m.center.data(),
2434  m.outrad.data(), primcnt);
2435  int bidx = bufIdx + i;
2436  d_aabb[bidx] = riAabbBuffers[i].cu_dptr();
2437  AS_buildinp_AABB(asInp[bidx], &d_aabb[bidx], &asInpFlags[bidx], primcnt);
2438  }
2439  bufIdx += riCount;
2440 
2441  // Spheres...
2442  for (int i=0; i<spCount; i++) {
2443  SphereArray &m = sparrays[i];
2444  int sz = m.radius.size();
2445  std::vector<float4 PINALLOCS(float4)> tmp(sz);
2446  for (int j=0; j<sz; j++) {
2447  tmp[j] = make_float4(m.center[j], m.radius[i]);
2448  }
2449  spPosRadiusBuffers[i].resize_upload(tmp);
2450  spPrimColorBuffers[i].resize_upload(m.primcolors3f);
2451 
2452  int primcnt = m.radius.size();
2453  AABB_sphere_array(spAabbBuffers[i], m.center.data(), m.radius.data(), primcnt);
2454  int bidx = bufIdx + i;
2455  d_aabb[bidx] = spAabbBuffers[i].cu_dptr();
2456  AS_buildinp_AABB(asInp[bidx], &d_aabb[bidx], &asInpFlags[bidx], primcnt);
2457  }
2458  bufIdx += spCount;
2459 
2460  OptixTraversableHandle asHandle;
2461  build_GAS(asInp, ASTempBuffer, custprimsGASBuffer,
2462  (uint64_t *) compactedSizeBuffer.cu_dptr(), asHandle, stream);
2463 
2465  return asHandle;
2466 }
2467 
2468 
2469 void TachyonOptiX::build_scene_IAS() {
2470  DBG();
2471  PROFILE_PUSH_RANGE("TachyonOptiX::build_scene_IAS()", RTPROF_ACCEL);
2472 
2473  OptixTraversableHandle trimeshesGAS = {};
2474  OptixTraversableHandle curvesGAS = {};
2475  OptixTraversableHandle custprimsGAS = {};
2476 
2477  // zero out host-side array sizes, but retain GPU-side allocation to
2478  // avoid costly reallocations unless absolutely necessary
2479  custprimsGASBuffer.clear_persist_allocation();
2480 #if OPTIX_VERSION >= 70100
2481  curvesGASBuffer.clear_persist_allocation();
2482 #endif
2483  trimeshesGASBuffer.clear_persist_allocation();
2484 
2485  //
2486  // (re)build GASes for each geometry class
2487  //
2488  int custprimcount = (conearrays.size() + cyarrays.size() +
2489  quadmeshes.size() + riarrays.size() +
2490  sparrays.size());
2491  if (custprimcount > 0) {
2492  custprimsGAS = build_custprims_GAS();
2493  }
2494 
2495  if (curvearrays.size() > 0) {
2496  curvesGAS = build_curves_GAS();
2497  }
2498 
2499  if (trimeshes.size() > 0) {
2500  trimeshesGAS = build_trimeshes_GAS();
2501  }
2502 
2503  int sbtOffset = 0;
2504  std::vector<OptixInstance> instances;
2505 
2506  OptixInstance tmpInst = {};
2507  auto &i = tmpInst;
2508  float identity_xform3x4[12] = {
2509  1.0f, 0.0f, 0.0f, 0.0f,
2510  0.0f, 1.0f, 0.0f, 0.0f,
2511  0.0f, 0.0f, 1.0f, 0.0f
2512  };
2513 
2514  // populate instance
2515  memcpy(i.transform, identity_xform3x4, sizeof(identity_xform3x4));
2516  i.instanceId = 0;
2517  i.sbtOffset = 0;
2518  i.visibilityMask = 0xFF;
2519  i.flags = OPTIX_INSTANCE_FLAG_NONE;
2520 
2521  if (custprimsGAS) {
2522  i.traversableHandle = custprimsGAS;
2523  i.sbtOffset = sbtOffset;
2524  instances.push_back(i);
2525 
2526  sbtOffset += RT_RAY_TYPE_COUNT * custprimcount;
2527  }
2528 
2529  if (curvesGAS) {
2530  i.traversableHandle = curvesGAS;
2531  i.sbtOffset = sbtOffset;
2532  instances.push_back(i);
2533 
2534  sbtOffset += RT_RAY_TYPE_COUNT * curvearrays.size();
2535  }
2536 
2537  if (trimeshesGAS) {
2538  i.traversableHandle = trimeshesGAS;
2539  i.sbtOffset = sbtOffset;
2540  instances.push_back(i);
2541 
2542  sbtOffset += RT_RAY_TYPE_COUNT * trimeshes.size();
2543  }
2544 
2545 #if 0
2546  printf("TachyonOptiX) custprimsGAS: %p\n", custprimsGAS);
2547  printf("TachyonOptiX) curvesGAS: %p\n", curvesGAS);
2548  printf("TachyonOptiX) trimeshesGAS: %p\n", trimeshesGAS);
2549  printf("TachyonOptiX) i.traversable: %p\n", i.traversableHandle);
2550  printf("TachyonOptiX) instance[0].traversable: %p\n", instances[0].traversableHandle);
2551 #endif
2552 
2553  CUMemBuf devinstances;
2554  devinstances.resize_upload(instances, stream);
2555 
2556  std::vector<OptixBuildInput> asInstInp(1);
2557  asInstInp[0] = {};
2558  asInstInp[0].type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
2559  asInstInp[0].instanceArray.instances = devinstances.cu_dptr();
2560  asInstInp[0].instanceArray.numInstances = (int) instances.size();
2561 
2562  OptixTraversableHandle asHandle { 0 };
2563  build_IAS(asInstInp, ASTempBuffer, IASBuffer, asHandle, stream);
2564  rtLaunch.traversable = asHandle;
2565 
2566  devinstances.free(stream);
2567 
2568  PROFILE_STREAM_SYNC_PRETTY(stream); // sync only for clearer profile traces
2569 
2571 }
2572 
2573 
2574 void TachyonOptiX::destroy_context() {
2575  DBG();
2576  if (!context_created)
2577  return;
2578 
2579  destroy_scene();
2580 
2581  // free the normally-persistent, and globally used
2582  // AS temp buffer, IAS, and compacted size buffers
2583  ASTempBuffer.free();
2584  compactedSizeBuffer.free();
2585  IASBuffer.free();
2586 
2587  context_destroy_pipeline();
2588  context_destroy_module();
2590 #if defined(TACHYON_OPTIXDENOISER)
2591  context_destroy_denoiser();
2592 #endif
2593 
2594  // launch params buffer refers to materials/lights buffers
2595  // so we destroy it first...
2596  launchParamsBuffer.free();
2597  materialsBuffer.free();
2598  directionalLightsBuffer.free();
2599  positionalLightsBuffer.free();
2600 
2601  optixDeviceContextDestroy(optix_ctx);
2602 
2603  regen_optix_pipeline=1;
2604  regen_optix_sbt=1;
2605  regen_optix_lights=1;
2606 }
2607 
2608 
2609 
2610 //
2611 // Images, Textures, Materials
2612 //
2613 
2615  return userindex; // XXX short-term hack
2616 }
2617 
2618 
2619 int TachyonOptiX::add_tex2d_rgba4u(const unsigned char *img,
2620  int xres, int yres,
2621  int texflags, int userindex) {
2622 // DBG();
2623 
2624  int oldtexcount = texturecache.size();
2625  if (oldtexcount <= userindex) {
2626  rt_texture t;
2627 
2628  // XXX do something noticable so we see that we got a bad entry...
2629  memset(&t, 0, sizeof(t));
2630  t.userindex = -1; // negative user index indicates an unused or bad entry
2631 
2632  texturecache.resize(userindex+1);
2633  for (int i=oldtexcount; i<=userindex; i++) {
2634  texturecache[i]=t;
2635  }
2636  }
2637 
2638  if (texturecache[userindex].userindex > 0) {
2639  return userindex;
2640  } else {
2641  if (verbose == RT_VERB_DEBUG) printf("TachyonOptiX) Adding texture[%d]\n", userindex);
2642 
2643  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar4>();
2644  cudaArray_t texArray;
2645  cudaMallocArray(&texArray, &channelDesc, xres, yres);
2646 
2647  // Set pitch of the source (the width in memory in bytes of the 2D array
2648  // pointed to by src, including padding), we dont have any padding
2649  const size_t spitch = xres * sizeof(float);
2650  cudaMemcpy2DToArray(texArray, 0, 0, img, spitch, xres * sizeof(float),
2651  yres, cudaMemcpyHostToDevice);
2652 
2653  cudaResourceDesc resDesc;
2654  memset(&resDesc, 0, sizeof(resDesc));
2655  resDesc.resType = cudaResourceTypeArray;
2656  resDesc.res.array.array = texArray;
2657 
2658  cudaTextureDesc texDesc;
2659  memset(&texDesc, 0, sizeof(texDesc));
2660  texDesc.addressMode[0] = cudaAddressModeWrap;
2661  texDesc.addressMode[1] = cudaAddressModeWrap;
2662  texDesc.filterMode = cudaFilterModeLinear;
2663  texDesc.readMode = cudaReadModeNormalizedFloat;
2664  texDesc.normalizedCoords = 1;
2665  if (texflags & RT_TEX_COLORSPACE_sRGB)
2666  texDesc.sRGB = 1;
2667  else
2668  texDesc.sRGB = 0;
2669 
2670 
2671  // Create texture object
2672  cudaTextureObject_t texObj = 0;
2673  cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
2674 
2675  texturecache[userindex].texflags = texflags;
2676  texturecache[userindex].d_img = texArray;
2677  texturecache[userindex].tex = texObj;
2678  texturecache[userindex].userindex=userindex;
2679  }
2680 
2681  return userindex;
2682 }
2683 
2684 
2685 int TachyonOptiX::add_tex3d_rgba4u(const unsigned char *img,
2686  int xres, int yres, int zres,
2687  int texflags, int userindex) {
2688 // DBG();
2689 
2690  int oldtexcount = texturecache.size();
2691  if (oldtexcount <= userindex) {
2692  rt_texture t;
2693 
2694  // XXX do something noticable so we see that we got a bad entry...
2695  memset(&t, 0, sizeof(t));
2696  t.userindex = -1; // negative user index indicates an unused or bad entry
2697 
2698  texturecache.resize(userindex+1);
2699  for (int i=oldtexcount; i<=userindex; i++) {
2700  texturecache[i]=t;
2701  }
2702  }
2703 
2704  if (texturecache[userindex].userindex > 0) {
2705  return userindex;
2706  } else {
2707  if (verbose == RT_VERB_DEBUG) printf("TachyonOptiX) Adding texture[%d]\n", userindex);
2708 
2709  // Compute grid extents and channel description for the 3-D array
2710  cudaExtent gridExtent = make_cudaExtent(xres, yres, zres);
2711  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar4>();
2712 
2713  cudaArray_t texArray;
2714  cudaMalloc3DArray(&texArray, &channelDesc, gridExtent);
2715 
2716  cudaMemcpy3DParms copyParams = {0};
2717  copyParams.srcPtr = make_cudaPitchedPtr((void*)img,
2718  gridExtent.width*sizeof(uchar4),
2719  gridExtent.width,
2720  gridExtent.height);
2721  copyParams.dstArray = texArray;
2722  copyParams.extent = gridExtent;
2723  copyParams.kind = cudaMemcpyHostToDevice;
2724  cudaMemcpy3D(&copyParams);
2725 
2726  cudaResourceDesc resDesc;
2727  memset(&resDesc, 0, sizeof(resDesc));
2728  resDesc.resType = cudaResourceTypeArray;
2729  resDesc.res.array.array = texArray;
2730 
2731  cudaTextureDesc texDesc;
2732  memset(&texDesc, 0, sizeof(texDesc));
2733  texDesc.addressMode[0] = cudaAddressModeClamp;
2734  texDesc.addressMode[1] = cudaAddressModeClamp;
2735  texDesc.addressMode[2] = cudaAddressModeClamp;
2736  texDesc.filterMode = cudaFilterModeLinear;
2737  texDesc.readMode = cudaReadModeNormalizedFloat;
2738  texDesc.normalizedCoords = 1;
2739  if (texflags & RT_TEX_COLORSPACE_sRGB)
2740  texDesc.sRGB = 1;
2741  else
2742  texDesc.sRGB = 0;
2743 
2744  // Create texture object
2745  cudaTextureObject_t texObj = 0;
2746  cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
2747 
2748  texturecache[userindex].texflags = texflags;
2749  texturecache[userindex].d_img = texArray;
2750  texturecache[userindex].tex = texObj;
2751  texturecache[userindex].userindex=userindex;
2752  }
2753 
2754  return userindex;
2755 }
2756 
2757 
2759  return userindex; // XXX short-term hack
2760 }
2761 
2762 int TachyonOptiX::add_material(float ambient, float diffuse, float specular,
2763  float shininess, float reflectivity,
2764  float opacity, float outline, float outlinewidth,
2765  int transmode, int userindex) {
2766  return add_material_textured(ambient, diffuse, specular, shininess,
2767  reflectivity, opacity, outline, outlinewidth,
2768  transmode, -1, userindex);
2769 }
2770 
2771 int TachyonOptiX::add_material_textured(float ambient, float diffuse,
2772  float specular, float shininess,
2773  float reflectivity, float opacity,
2774  float outline, float outlinewidth,
2775  int transmode,
2776  int texindex, int userindex) {
2777 // DBG();
2778 
2779  int oldmatcount = materialcache.size();
2780  if (oldmatcount <= userindex) {
2781  rt_material m;
2782 
2783  // XXX do something noticable so we see that we got a bad entry...
2784  m.ambient = 0.5f;
2785  m.diffuse = 0.7f;
2786  m.specular = 0.0f;
2787  m.shininess = 10.0f;
2788  m.reflectivity = 0.0f;
2789  m.opacity = 1.0f;
2790  m.transmode = 0;
2791  m.tex = 0;
2792  m.matflags = 0;
2793  m.userindex = -1; // negative user index indicates an unused or bad entry
2794 
2795  materialcache.resize(userindex+1);
2796  for (int i=oldmatcount; i<=userindex; i++) {
2797  materialcache[i]=m;
2798  }
2799  }
2800 
2801  if (materialcache[userindex].userindex > 0) {
2802  return userindex;
2803  } else {
2804  if (verbose == RT_VERB_DEBUG) printf("TachyonOptiX) Adding material[%d]\n", userindex);
2805 
2806  materialcache[userindex].ambient = ambient;
2807  materialcache[userindex].diffuse = diffuse;
2808  materialcache[userindex].specular = specular;
2809  materialcache[userindex].shininess = shininess;
2810  materialcache[userindex].reflectivity = reflectivity;
2811  materialcache[userindex].opacity = opacity;
2812  materialcache[userindex].outline = outline;
2813  materialcache[userindex].outlinewidth = outlinewidth;
2814  materialcache[userindex].transmode = transmode;
2815  materialcache[userindex].tex = 0;
2816  materialcache[userindex].matflags = 0;
2817  materialcache[userindex].userindex=userindex;
2818 
2819  if (opacity < 1.0f) {
2820  materialcache[userindex].matflags |= RT_MAT_ALPHA;
2821  }
2822 
2823  // set texture object
2824  if (texindex >= 0) {
2825  materialcache[userindex].tex = texturecache[texindex].tex;
2826 #if 0
2827  printf("mat[%d] texid: %llu\n", userindex, texturecache[texindex].tex);
2828 #endif
2829 
2830  // set flags when texture alpha or cutout transparency is in use
2831  if (texturecache[texindex].texflags & RT_TEX_ALPHA) {
2832  materialcache[userindex].matflags |= RT_MAT_TEXALPHA;
2833 #if 0
2834  printf("mat[%d] uses cutout alpha texture, matflags: %08x\n",
2835  userindex, materialcache[userindex].matflags);
2836 #endif
2837  }
2838  }
2839 
2840  regen_optix_materials=1; // force a fresh material table upload to the GPU
2841  }
2842 
2843  return userindex;
2844 }
2845 
2846 
2848  if (verbose == RT_VERB_DEBUG) printf("TachyonOptiX) init_materials()\n");
2849 
2850  materialcache.clear();
2851  regen_optix_materials=1; // force a fresh material table upload to the GPU
2852 }
2853 
2854 
2855 void TachyonOptiX::add_directional_light(const float *dir, const float *color) {
2857  l.dir = normalize(make_float3(dir[0], dir[1], dir[2]));
2858 // l.color = make_float3(color[0], color[1], color[2]);
2859  directional_lights.push_back(l);
2860  regen_optix_lights=1;
2861 }
2862 
2863 
2864 void TachyonOptiX::add_positional_light(const float *pos, const float *color) {
2866  l.pos = make_float3(pos[0], pos[1], pos[2]);
2867 // l.color = make_float3(color[0], color[1], color[2]);
2868  positional_lights.push_back(l);
2869  regen_optix_lights=1;
2870 }
2871 
2872 
2874  DBG();
2875  double starttime = wkf_timer_timenow(rt_timer);
2876  time_ctx_destroy_scene = 0;
2877 
2878  // zero out all object counters
2879  cylinder_array_cnt = 0;
2880  cylinder_array_color_cnt = 0;
2881  ring_array_color_cnt = 0;
2882  sphere_array_cnt = 0;
2883  sphere_array_color_cnt = 0;
2884  tricolor_cnt = 0;
2885  trimesh_c4u_n3b_v3f_cnt = 0;
2886  trimesh_n3b_v3f_cnt = 0;
2887  trimesh_n3f_v3f_cnt = 0;
2888  trimesh_v3f_cnt = 0;
2889 
2890  if (!context_created)
2891  return;
2892 
2893  // XXX this renderer class isn't tracking scene state yet
2894  scene_created = 1;
2895  if (scene_created) {
2897  destroy_lights();
2898 
2899  for (auto &&buf : coneAabbBuffers) buf.free();
2900  coneAabbBuffers.clear();
2901  for (auto &&buf : coneBaseBuffers) buf.free();
2902  coneBaseBuffers.clear();
2903  for (auto &&buf : coneApexBuffers) buf.free();
2904  coneApexBuffers.clear();
2905  for (auto &&buf : coneBaseRadBuffers) buf.free();
2906  coneBaseRadBuffers.clear();
2907  for (auto &&buf : coneApexRadBuffers) buf.free();
2908  coneApexRadBuffers.clear();
2909  for (auto &&buf : conePrimColorBuffers) buf.free();
2910  conePrimColorBuffers.clear();
2911  conearrays.clear();
2912 
2913  for (auto &&buf : curveVertBuffers) buf.free();
2914  curveVertBuffers.clear();
2915  for (auto &&buf : curveVertRadBuffers) buf.free();
2916  curveVertRadBuffers.clear();
2917  for (auto &&buf : curveSegIdxBuffers) buf.free();
2918  curveSegIdxBuffers.clear();
2919  for (auto &&buf : curvePrimColorBuffers) buf.free();
2920  curvePrimColorBuffers.clear();
2921  curvearrays.clear();
2922 
2923  for (auto &&buf : cyAabbBuffers) buf.free();
2924  cyAabbBuffers.clear();
2925  for (auto &&buf : cyStartBuffers) buf.free();
2926  cyStartBuffers.clear();
2927  for (auto &&buf : cyEndBuffers) buf.free();
2928  cyEndBuffers.clear();
2929  for (auto &&buf : cyRadiusBuffers) buf.free();
2930  cyRadiusBuffers.clear();
2931  for (auto &&buf : cyPrimColorBuffers) buf.free();
2932  cyPrimColorBuffers.clear();
2933  cyarrays.clear();
2934 
2935  for (auto &&buf : quadMeshAabbBuffers) buf.free();
2936  quadMeshAabbBuffers.clear();
2937  for (auto &&buf : quadMeshVertBuffers) buf.free();
2938  quadMeshVertBuffers.clear();
2939  for (auto &&buf : quadMeshIdxBuffers) buf.free();
2940  quadMeshIdxBuffers.clear();
2941  for (auto &&buf : quadMeshVertNormalBuffers) buf.free();
2942  quadMeshVertNormalBuffers.clear();
2943  for (auto &&buf : quadMeshVertPackedNormalBuffers) buf.free();
2944  quadMeshVertPackedNormalBuffers.clear();
2945  for (auto &&buf : quadMeshVertColor3fBuffers) buf.free();
2946  quadMeshVertColor3fBuffers.clear();
2947  for (auto &&buf : quadMeshVertColor4uBuffers) buf.free();
2948  quadMeshVertColor4uBuffers.clear();
2949  for (auto &&buf : quadMeshPrimColorBuffers) buf.free();
2950  quadMeshPrimColorBuffers.clear();
2951  quadmeshes.clear();
2952 
2953  for (auto &&buf : riAabbBuffers) buf.free();
2954  riAabbBuffers.clear();
2955  for (auto &&buf : riCenterBuffers) buf.free();
2956  riCenterBuffers.clear();
2957  for (auto &&buf : riNormalBuffers) buf.free();
2958  riNormalBuffers.clear();
2959  for (auto &&buf : riInRadiusBuffers) buf.free();
2960  riInRadiusBuffers.clear();
2961  for (auto &&buf : riOutRadiusBuffers) buf.free();
2962  riOutRadiusBuffers.clear();
2963  for (auto &&buf : riPrimColorBuffers) buf.free();
2964  riPrimColorBuffers.clear();
2965  riarrays.clear();
2966 
2967  for (auto &&buf : spAabbBuffers) buf.free();
2968  spAabbBuffers.clear();
2969  for (auto &&buf : spPosRadiusBuffers) buf.free();
2970  spPosRadiusBuffers.clear();
2971  for (auto &&buf : spPrimColorBuffers) buf.free();
2972  spPrimColorBuffers.clear();
2973  sparrays.clear();
2974 
2975  for (auto &&buf : triMeshVertBuffers) buf.free();
2976  triMeshVertBuffers.clear();
2977  for (auto &&buf : triMeshIdxBuffers) buf.free();
2978  triMeshIdxBuffers.clear();
2979  for (auto &&buf : triMeshVertNormalBuffers) buf.free();
2980  triMeshVertNormalBuffers.clear();
2981  for (auto &&buf : triMeshVertPackedNormalBuffers) buf.free();
2982  triMeshVertPackedNormalBuffers.clear();
2983  for (auto &&buf : triMeshVertColor3fBuffers) buf.free();
2984  triMeshVertColor3fBuffers.clear();
2985  for (auto &&buf : triMeshVertColor4uBuffers) buf.free();
2986  triMeshVertColor4uBuffers.clear();
2987  for (auto &&buf : triMeshPrimColorBuffers) buf.free();
2988  triMeshPrimColorBuffers.clear();
2989  for (auto &&buf : triMeshTex2dBuffers) buf.free();
2990  triMeshTex2dBuffers.clear();
2991  for (auto &&buf : triMeshTex3dBuffers) buf.free();
2992  triMeshTex3dBuffers.clear();
2993  trimeshes.clear();
2994 
2995  SBT_clear(); // only zero out the SBT, don't free GPU mem
2996 
2997  // zero out host-side array sizes, but retain GPU-side allocation to
2998  // avoid costly reallocations unless absolutely necessary
2999  custprimsGASBuffer.clear_persist_allocation();
3000 #if OPTIX_VERSION >= 70100
3001  curvesGASBuffer.clear_persist_allocation();
3002 #endif
3003  trimeshesGASBuffer.clear_persist_allocation();
3004  }
3005 
3006  double endtime = wkf_timer_timenow(rt_timer);
3007  time_ctx_destroy_scene = endtime - starttime;
3008 
3009  scene_created = 0; // scene has been destroyed
3010 }
3011 
3012 
3013 void TachyonOptiX::set_camera_lookat(const float *at, const float *upV) {
3014  // force position update to be committed to the rtLaunch struct too...
3015  rtLaunch.cam.pos = make_float3(cam_pos[0], cam_pos[1], cam_pos[2]);
3016  float3 lookat = make_float3(at[0], at[1], at[2]);
3017  float3 V = make_float3(upV[0], upV[1], upV[2]);
3018  rtLaunch.cam.W = normalize(lookat - rtLaunch.cam.pos);
3021 
3022  // copy new ONB vectors back to top level data structure
3023  cam_U[0] = rtLaunch.cam.U.x;
3024  cam_U[1] = rtLaunch.cam.U.y;
3025  cam_U[2] = rtLaunch.cam.U.z;
3026 
3027  cam_V[0] = rtLaunch.cam.V.x;
3028  cam_V[1] = rtLaunch.cam.V.y;
3029  cam_V[2] = rtLaunch.cam.V.z;
3030 
3031  cam_W[0] = rtLaunch.cam.W.x;
3032  cam_W[1] = rtLaunch.cam.W.y;
3033  cam_W[2] = rtLaunch.cam.W.z;
3034 }
3035 
3036 
3037 void TachyonOptiX::framebuffer_config(int fbwidth, int fbheight,
3038  int interactive) {
3039  DBG();
3040  if (!context_created)
3041  return;
3042 
3043  framebuffer_resize(fbwidth, fbheight);
3044 
3045  // do anything special for interactive...
3046 }
3047 
3049  colorspace=colspace;
3050 }
3051 
3052 void TachyonOptiX::framebuffer_resize(int fbwidth, int fbheight) {
3053  DBG();
3054  if (!context_created)
3055  return;
3056  PROFILE_PUSH_RANGE("TachyonOptiX::framebuffer_resize()", RTPROF_GENERAL);
3057 
3058  width = fbwidth;
3059  height = fbheight;
3060 
3061  int fbsz = width * height * sizeof(uchar4);
3062  framebuffer.set_size(fbsz, stream);
3063 
3064  int acsz = width * height * sizeof(float4);
3065  accumulation_buffer.set_size(acsz, stream);
3066 
3067 #if defined(TACHYON_OPTIXDENOISER)
3068  denoiser_resize_update();
3069 #endif
3070 
3071 #if defined(TACHYON_RAYSTATS)
3072  int assz = width * height * sizeof(uint4);
3073  raystats1_buffer.set_size(assz, stream);
3074  raystats2_buffer.set_size(assz, stream);
3075 #endif
3076 
3078 
3079  if (verbose == RT_VERB_DEBUG)
3080  printf("TachyonOptiX) framebuffer_resize(%d x %d)\n", width, height);
3081 
3083 }
3084 
3085 
3087  rtLaunch.frame.subframe_index = 0; // only reset when accum buf cleared
3088  rtLaunch.frame.accum_normalize = 1.0f; // only reset when accum buf cleared
3089 
3090 #if 1
3091  //
3092  // only clear the FBs as part of 1st rendering pass
3093  //
3094  rtLaunch.frame.fb_clearall = 1; // clear all bufs during rendering
3095 #else
3096  //
3097  // force-clear the FBs irrespective of rendering
3098  //
3099  auto fbsz = framebuffer.get_size();
3100  cudaMemsetAsync(framebuffer.dptr(), 0, fbsz, stream);
3101 
3102  auto acsz = accumulation_buffer.get_size();
3103  cudaMemsetAsync(accumulation_buffer.dptr(), 0, acsz, stream);
3104 
3105 #if defined(TACHYON_RAYSTATS)
3106  // clear stats buffers
3107  auto assz = raystats1_buffer.get_size();
3108  cudaMemsetAsync(raystats1_buffer.dptr(), 0, assz, stream);
3109  cudaMemsetAsync(raystats2_buffer.dptr(), 0, assz, stream);
3110 #endif
3111 
3112  cudaStreamSynchronize(stream);
3113 #endif
3114 
3115  if (verbose == RT_VERB_DEBUG)
3116  printf("TachyonOptiX) framebuffer_clear(%d x %d)\n", width, height);
3117 }
3118 
3119 
3120 void TachyonOptiX::framebuffer_download_rgb4u(unsigned char *imgrgb4u) {
3121  DBG();
3122  framebuffer.download(imgrgb4u, width * height * sizeof(int));
3123 }
3124 
3125 
3127  DBG();
3128  if (!context_created)
3129  return;
3130 
3131  framebuffer.free();
3132  accumulation_buffer.free();
3133 #if defined(TACHYON_RAYSTATS)
3134  raystats1_buffer.free();
3135  raystats2_buffer.free();
3136 #endif
3137 }
3138 
3139 
3140 void TachyonOptiX::render_compile_and_validate(void) {
3141  DBG();
3142  if (!context_created)
3143  return;
3144 
3145  //
3146  // finalize context validation, compilation, and AS generation
3147  //
3148  double startctxtime = wkf_timer_timenow(rt_timer);
3149 
3150  PROFILE_PUSH_RANGE("TachyonOptiX::render_compile_and_validate()", RTPROF_RENDER);
3151 
3152  // (re)build OptiX raygen/hitgroup/miss program pipeline
3153  if (regen_optix_pipeline) {
3154  if (pipe != nullptr)
3155  context_destroy_pipeline();
3156  context_create_pipeline();
3157 
3158  if ((lasterr != OPTIX_SUCCESS) /* && (verbose == RT_VERB_DEBUG) */ )
3159  printf("TachyonOptiX) An error occured during pipeline regen!\n");
3160  }
3161 
3162  double start_AS_build = wkf_timer_timenow(rt_timer);
3163 
3164  // start IAS + SBT (re)builds
3165  build_scene_IAS();
3166  if ((lasterr != OPTIX_SUCCESS) /* && (verbose == RT_VERB_DEBUG) */ )
3167  printf("TachyonOptiX) An error occured during AS regen!\n");
3168 
3169  // (re)build SBT
3170  if (regen_optix_sbt) {
3171  SBT_clear(); // only zero out the SBT, don't free GPU mem
3172  SBT_create_programs();
3173  SBT_create_hitgroups();
3174 #if 0
3175  } else if (regen_optix_materials) {
3176  // XXX alpha/opacity optimization:
3177  // Update hitgroup-flattened opacity information used to improve
3178  // anyhit performance by avoiding pointer/index chasing.
3179  // XXX update hitgroup records with new geomflags from materials, etc.
3180  SBT_update_hitgroup_geomflags();
3181  hitgroupRecordsBuffer.resize_upload(h, stream);
3182 #endif
3183  }
3184 
3185 
3186  if ((lasterr != OPTIX_SUCCESS) /* && (verbose == RT_VERB_DEBUG) */ )
3187  printf("TachyonOptiX) An error occured during SBT regen!\n");
3188 
3189  time_ctx_AS_build = wkf_timer_timenow(rt_timer) - start_AS_build;
3190 
3191  // upload current materials
3192  if (regen_optix_materials) {
3193  materialsBuffer.resize_upload(materialcache);
3194  regen_optix_materials=0; // no need to re-upload until a change occurs
3195  }
3196 
3197  // upload current lights
3198  if (regen_optix_lights) {
3199  directionalLightsBuffer.resize_upload(directional_lights);
3200  positionalLightsBuffer.resize_upload(positional_lights);
3201  regen_optix_lights=0; // no need to re-upload until a change occurs
3202  }
3203 
3204  if ((lasterr != OPTIX_SUCCESS) /* && (verbose == RT_VERB_DEBUG) */ )
3205  printf("TachyonOptiX) An error occured during materials/lights regen!\n");
3206 
3207  //
3208  // update the launch parameters that we'll pass to the optix launch:
3209  //
3210  rtLaunch.frame.size = make_int2(width, height);
3211  rtLaunch.frame.colorspace = colorspace;
3212 
3213  // XXX tone mapping params are hard-coded
3216 
3217  rtLaunch.frame.framebuffer = (uchar4*) framebuffer.cu_dptr();
3218  rtLaunch.frame.accum_buffer = (float4*) accumulation_buffer.cu_dptr();
3219 
3220 #if defined(TACHYON_OPTIXDENOISER)
3221  rtLaunch.frame.denoiser_colorbuffer = (float4*) denoiser_colorbuffer.cu_dptr();
3222  rtLaunch.frame.denoiser_enabled = denoiser_enabled;
3223 #endif
3224 
3225 #if defined(TACHYON_RAYSTATS)
3226  rtLaunch.frame.raystats1_buffer = (uint4*) raystats1_buffer.cu_dptr();
3227  rtLaunch.frame.raystats2_buffer = (uint4*) raystats2_buffer.cu_dptr();
3228 #endif
3229 
3230  // update material table pointer
3231  rtLaunch.materials = (rt_material *) materialsBuffer.cu_dptr();
3232 
3233  // finalize camera parms
3234  rtLaunch.cam.pos = make_float3(cam_pos[0], cam_pos[1], cam_pos[2]);
3235  rtLaunch.cam.U = make_float3(cam_U[0], cam_U[1], cam_U[2]);
3236  rtLaunch.cam.V = make_float3(cam_V[0], cam_V[1], cam_V[2]);
3237  rtLaunch.cam.W = make_float3(cam_W[0], cam_W[1], cam_W[2]);
3238  rtLaunch.cam.zoom = cam_zoom;
3239 
3240  rtLaunch.cam.dof_enabled = cam_dof_enabled;
3241  rtLaunch.cam.dof_focal_dist = cam_dof_focal_dist;
3242  rtLaunch.cam.dof_aperture_rad = cam_dof_focal_dist / (2.0f * cam_zoom * cam_dof_fnumber);
3243 
3244  rtLaunch.cam.stereo_enabled = cam_stereo_enabled;
3245  rtLaunch.cam.stereo_eyesep = cam_stereo_eyesep;
3246  rtLaunch.cam.stereo_convergence_dist = cam_stereo_convergence_dist;
3247 
3248 
3249  // populate rtLaunch scene data
3250  rtLaunch.scene.bg_color = make_float3(scene_bg_color[0],
3251  scene_bg_color[1],
3252  scene_bg_color[2]);
3253  rtLaunch.scene.bg_color_grad_top = make_float3(scene_bg_grad_top[0],
3254  scene_bg_grad_top[1],
3255  scene_bg_grad_top[2]);
3256  rtLaunch.scene.bg_color_grad_bot = make_float3(scene_bg_grad_bot[0],
3257  scene_bg_grad_bot[1],
3258  scene_bg_grad_bot[2]);
3259  rtLaunch.scene.bg_grad_updir = make_float3(scene_bg_grad_updir[0],
3260  scene_bg_grad_updir[1],
3261  scene_bg_grad_updir[2]);
3262  rtLaunch.scene.bg_grad_topval = scene_bg_grad_topval;
3263  rtLaunch.scene.bg_grad_botval = scene_bg_grad_botval;
3264 
3265  // this has to be recomputed prior to rendering when topval/botval change
3266  scene_bg_grad_invrange = 1.0f / (scene_bg_grad_topval - scene_bg_grad_botval);
3267  rtLaunch.scene.bg_grad_invrange = scene_bg_grad_invrange;
3268 
3269  // Add noise to gradient backgrounds to prevent Mach banding effects,
3270  // particularly noticable in video streams or movie renderings.
3271  // Compute the delta between the top and bottom gradient colors and
3272  // calculate the noise magnitude required, such that by adding it to the
3273  // scalar interpolation parameter we get more than +/-1ulp in the
3274  // resulting interpolated color, as represented in an 8bpp framebuffer.
3276 
3277  // Ideally the noise mag calc would take into account both max color delta
3278  // and launch_dim.y to avoid banding even with very subtle gradients.
3279  rtLaunch.scene.bg_grad_noisemag = (3.0f/256.0f) / (maxcoldelta + 0.0005);
3280 
3281  rtLaunch.scene.fog_mode = fog_mode;
3282  rtLaunch.scene.fog_start = fog_start;
3283  rtLaunch.scene.fog_end = fog_end;
3284  rtLaunch.scene.fog_density = fog_density;
3285 
3286  rtLaunch.scene.epsilon = scene_epsilon;
3287  rtLaunch.max_depth = scene_max_depth;
3288  rtLaunch.max_trans = scene_max_trans;
3289 
3290  rtLaunch.aa_samples = 1; // aa_samples;
3291 
3292  rtLaunch.lights.shadows_enabled = shadows_enabled;
3293  rtLaunch.lights.ao_samples = ao_samples;
3294  if (ao_samples)
3295  rtLaunch.lights.ao_lightscale = 2.0f / ao_samples;
3296  else
3297  rtLaunch.lights.ao_lightscale = 0.0f;
3298 
3299  rtLaunch.lights.ao_ambient = ao_ambient;
3300  rtLaunch.lights.ao_direct = ao_direct;
3301  rtLaunch.lights.ao_maxdist = ao_maxdist;
3302  rtLaunch.lights.headlight_mode = headlight_mode;
3303 
3304  rtLaunch.lights.num_dir_lights = directional_lights.size();
3305  rtLaunch.lights.dir_lights = (float3 *) directionalLightsBuffer.cu_dptr();
3306  rtLaunch.lights.num_pos_lights = positional_lights.size();
3307  rtLaunch.lights.pos_lights = (float3 *) positionalLightsBuffer.cu_dptr();
3308 
3309  time_ctx_validate = wkf_timer_timenow(rt_timer) - startctxtime;
3310 
3311  if (verbose == RT_VERB_DEBUG) {
3312  printf("TachyonOptiX) launching render: %d x %d\n", width, height);
3313  }
3314 
3316 }
3317 
3318 
3320  DBG();
3321  if (!context_created)
3322  return;
3323 }
3324 
3325 
3327  DBG();
3328  if (!context_created)
3329  return;
3330 
3331  wkf_timer_start(rt_timer);
3332  double rendstarttime = wkf_timer_timenow(rt_timer);
3333 
3334  PROFILE_PUSH_RANGE("TachyonOptiX::render()", RTPROF_RENDER);
3335 
3337  render_compile_and_validate();
3338  double starttime = wkf_timer_timenow(rt_timer);
3339 
3340  //
3341  // run the renderer
3342  //
3343  if (lasterr == OPTIX_SUCCESS) {
3344  // Render only to the accumulation buffer for one less than the
3345  // total required number of passes
3347 
3348  int samples_per_pass = 1;
3349  rtLaunch.aa_samples = samples_per_pass;
3350 
3351  PROFILE_PUSH_RANGE("TachyonOptiX--Render Loop", RTPROF_RENDERRT);
3352  for (int p=0; p<aa_samples; p+=samples_per_pass) {
3353  PROFILE_PUSH_RANGE("TachyonOptiX--launchParamsBuffer.upload()", RTPROF_TRANSFER);
3354 
3355  // advance to next subrame index, needed by both RNGs and
3356  // for accumulation_buffer handling
3357  // calc normalization factor for the final subframe index we'll have
3358  // just as we return and copy out the color buffer
3359  rtLaunch.frame.subframe_index += samples_per_pass;
3361 
3362  // copy the accumulation buffer image data to the framebuffer and perform
3363  // type conversion and normaliztion on the image data when we reach
3364  // the last subframe in this internal rendering loop.
3365  if (p >= (aa_samples - samples_per_pass)) {
3367  }
3368 
3369  // update launch params with current buffer and subframe info
3370  launchParamsBuffer.upload(&rtLaunch, 1, stream);
3371  PROFILE_STREAM_SYNC_PRETTY(stream); // sync only for clearer profile traces
3373 
3374  PROFILE_PUSH_RANGE("TachyonOptiX--optixLaunch()", RTPROF_RENDERRT);
3375  lasterr = optixLaunch(pipe, stream,
3376  launchParamsBuffer.cu_dptr(),
3377  launchParamsBuffer.get_size(),
3378  &sbt,
3379  rtLaunch.frame.size.x,
3380  rtLaunch.frame.size.y,
3381  1);
3382 
3383  cudaStreamSynchronize(stream);
3385 
3386  // ensure framebuffer clear is disabled after the first rendering pass
3388  }
3389 
3390  if (lasterr != OPTIX_SUCCESS) {
3391  printf("TachyonOptiX) Error during rendering. Rendering aborted.\n");
3392  }
3393 
3394  double rtendtime = wkf_timer_timenow(rt_timer);
3395  time_ray_tracing = rtendtime - starttime;
3396  double totalrendertime = rtendtime - rendstarttime;
3397 
3398  //
3399  // Perform denoising if enabled and available
3400  //
3401  PROFILE_PUSH_RANGE("TachyonOptiX--denoiser_launch()", RTPROF_RENDERRT);
3402  denoiser_launch();
3403  cudaStreamSynchronize(stream);
3404  double denoiseendtime = wkf_timer_timenow(rt_timer);
3405  double denoise_time = denoiseendtime - rtendtime;
3407 
3408  if (lasterr != OPTIX_SUCCESS) {
3409  printf("TachyonOptiX) Error during denoising. Rendering aborted.\n");
3410  }
3411 
3412  if (verbose == RT_VERB_TIMING || verbose == RT_VERB_DEBUG) {
3413  printf("TachyonOptiX) Render Time: %.2fms, %.1fFPS\n",
3414  totalrendertime * 1.0e3, 1.0 / totalrendertime);
3415  printf("TachyonOptiX) (AS %.2fms, RT %.2fms, DN %.2fms, io %.2fms)\n",
3416  time_ctx_AS_build * 1.0e3, time_ray_tracing * 1.0e3,
3417  denoise_time * 1.0e3, time_image_io * 1.0e3);
3418  }
3419  } else {
3420  printf("TachyonOptiX) An error occured prior to rendering. Rendering aborted.\n");
3421  }
3422 
3425 }
3426 
3427 
3428 
3429 //
3430 // Report ray tracing performance statistics
3431 //
3433 #if defined(TACHYON_RAYSTATS)
3434  // no stats data
3435  if (rtLaunch.frame.size.x < 1 || rtLaunch.frame.size.y < 1) {
3436  printf("TachyonOptiX) No data in ray stats buffers!\n");
3437  return;
3438  }
3439  CUERR
3440 
3441  int framesz = rtLaunch.frame.size.x * rtLaunch.frame.size.y;
3442  size_t bufsz = framesz * sizeof(uint4);
3443  uint4 *raystats1 = (uint4 *) calloc(1, bufsz);
3444  uint4 *raystats2 = (uint4 *) calloc(1, bufsz);
3445  raystats1_buffer.download(raystats1, framesz);
3446  raystats2_buffer.download(raystats2, framesz);
3447  CUERR
3448 
3449  // no stats data
3450  if (rtLaunch.frame.size.x < 1 || rtLaunch.frame.size.y < 1) {
3451  printf("TachyonOptiX) No data in ray stats buffers!\n");
3452  return;
3453  }
3454 
3455  // collect and sum all per-pixel ray stats
3456  unsigned long misses=0, transkips=0, primaryrays=0, shadowlights=0,
3457  shadowao=0, transrays=0, reflrays=0;
3458 
3459  // accumulate per-pixel ray stats into totals
3460  for (int i=0; i<framesz; i++) {
3461  primaryrays += raystats1[i].x;
3462  shadowlights += raystats1[i].y;
3463  shadowao += raystats1[i].z;
3464  misses += raystats1[i].w;
3465  transrays += raystats2[i].x;
3466  transkips += raystats2[i].y;
3467  // XXX raystats2[i].z unused at present...
3468  reflrays += raystats2[i].w;
3469  }
3470  unsigned long totalrays = primaryrays + shadowlights + shadowao
3471  + transrays + reflrays;
3472 
3473  printf("TachyonOptiX)\n");
3474  printf("TachyonOptiX) TachyonOptiX Scene Ray Tracing Statistics:\n");
3475  printf("TachyonOptiX) ----------------------------------------\n");
3476  printf("TachyonOptiX) Image resolution: %d x %d \n",
3478  printf("TachyonOptiX) Pixel count: %d\n", framesz);
3479  printf("TachyonOptiX) ----------------------------------------\n");
3480  printf("TachyonOptiX) Misses: %lu\n", misses);
3481  printf("TachyonOptiX) Transmission Any-Hit Skips: %lu\n", transkips);
3482  printf("TachyonOptiX) ----------------------------------------\n");
3483  printf("TachyonOptiX) Primary Rays: %lu\n", primaryrays);
3484  printf("TachyonOptiX) Dir-Light Shadow Rays: %lu\n", shadowlights);
3485  printf("TachyonOptiX) AO Shadow Rays: %lu\n", shadowao);
3486  printf("TachyonOptiX) Transmission Rays: %lu\n", transrays);
3487  printf("TachyonOptiX) Reflection Rays: %lu\n", reflrays);
3488  printf("TachyonOptiX) ----------------------------------------\n");
3489  printf("TachyonOptiX) Total Rays: %lu\n", totalrays);
3490  printf("TachyonOptiX) Total Rays: %g\n", totalrays * 1.0);
3491  if (time_ray_tracing > 0.0) {
3492  printf("TachyonOptiX) Pure ray tracing rays/sec: %g\n", totalrays / time_ray_tracing);
3493  }
3494  double totalruntime = time_ray_tracing + time_ctx_AS_build;
3495  if (totalruntime > 0.0) {
3496  printf("TachyonOptiX) Overall effective rays/sec: %g\n", totalrays / totalruntime);
3497  }
3498  printf("TachyonOptiX)\n");
3499 
3500  free(raystats1);
3501  free(raystats2);
3502 #else
3503  printf("TachyonOptiX) Compiled without ray stats buffers!\n");
3504 #endif
3505 }
3506 
3507 
3508 
3509 
3510 //
3511 // A few structure padding/alignment/size diagnostic helper routines
3512 //
3514  printf("TachyonOptiX) internal data structure information\n");
3515 
3516  printf("TachyonOptiX) Hitgroup SBT record info:\n");
3517  printf(" SBT rec align size: %d b\n", OPTIX_SBT_RECORD_ALIGNMENT);
3518  printf(" total size: %d b\n", sizeof(HGRecord));
3519  printf(" header size: %d b\n", sizeof(((HGRecord*)0)->header));
3520  printf(" data offset: %d b\n", offsetof(HGRecord, data));
3521  printf(" data size: %d b\n", sizeof(((HGRecord*)0)->data));
3522  printf(" material size: %d b\n", offsetof(HGRecord, data.cone) - offsetof(HGRecord, data.prim_color));
3523  printf(" geometry size: %d b\n", sizeof(HGRecord) - offsetof(HGRecord, data.trimesh));
3524  printf("\n");
3525  printf(" prim_color offset: %d b\n", offsetof(HGRecord, data.prim_color));
3526  printf(" uniform_color offset: %d b\n", offsetof(HGRecord, data.uniform_color));
3527  printf(" materialindex offset: %d b\n", offsetof(HGRecord, data.materialindex));
3528  printf(" geometry offset: %d b\n", offsetof(HGRecord, data.cone));
3529 
3530  printf("\n");
3531  printf(" geometry union size: %d b\n", sizeof(HGRecord) - offsetof(HGRecord, data.trimesh));
3532  printf(" cone sz: %d b\n", sizeof(((HGRecord*)0)->data.cone ));
3533  printf(" cyl sz: %d b\n", sizeof(((HGRecord*)0)->data.cyl ));
3534  printf(" ring sz: %d b\n", sizeof(((HGRecord*)0)->data.ring ));
3535  printf(" sphere sz: %d b\n", sizeof(((HGRecord*)0)->data.sphere ));
3536  printf(" trimesh sz: %d b\n", sizeof(((HGRecord*)0)->data.trimesh));
3537  printf(" WASTED hitgroup sz: %d b\n", sizeof(HGRecord) - (sizeof(((HGRecord*)0)->header) + sizeof(((HGRecord*)0)->data)));
3538  printf("\n");
3539 }
3540 
3541 
3542 
3543 //
3544 // geometry instance group management
3545 //
3547  TachyonInstanceGroup g = {};
3548  sceneinstancegroups.push_back(g);
3549  return int(sceneinstancegroups.size()) - 1;
3550 }
3551 
3553  TachyonInstanceGroup &g = sceneinstancegroups[idx];
3554  return 0;
3555 }
3556 
3557 
3559  return 0;
3560 }
3561 
3562 
3563 #if 0
3564 int TachyonOptiX::set_geom_instance_group_xforms(int idx, int n, float [][16]) {
3565  return 0;
3566 }
3567 #endif
3568 
3569 
3570 //
3571 // XXX short-term host API hacks to facilitate early bring-up and testing
3572 //
3573 void TachyonOptiX::add_conearray(ConeArray & newmodel, int materialidx) {
3574  if (!context_created)
3575  return;
3576 
3577  newmodel.materialindex = materialidx; // XXX overwrite hack...
3578 
3579  conearrays.push_back(newmodel);
3580  regen_optix_sbt=1;
3581 }
3582 
3583 void TachyonOptiX::add_curvearray(CurveArray & newmodel, int materialidx) {
3584  if (!context_created)
3585  return;
3586 
3587  newmodel.materialindex = materialidx; // XXX overwrite hack...
3588 
3589  curvearrays.push_back(newmodel);
3590  regen_optix_sbt=1;
3591 }
3592 
3593 void TachyonOptiX::add_cylarray(CylinderArray & newmodel, int materialidx) {
3594  if (!context_created)
3595  return;
3596 
3597  newmodel.materialindex = materialidx; // XXX overwrite hack...
3598 
3599  cyarrays.push_back(newmodel);
3600  regen_optix_sbt=1;
3601 }
3602 
3603 
3604 void TachyonOptiX::add_quadmesh(QuadMesh & newmodel, int materialidx) {
3605  if (!context_created)
3606  return;
3607 
3608  newmodel.materialindex = materialidx; // XXX overwrite hack...
3609 
3610  quadmeshes.push_back(newmodel);
3611  regen_optix_sbt=1;
3612 }
3613 
3614 
3615 void TachyonOptiX::add_ringarray(RingArray & newmodel, int materialidx) {
3616  if (!context_created)
3617  return;
3618 
3619  newmodel.materialindex = materialidx; // XXX overwrite hack...
3620 
3621  riarrays.push_back(newmodel);
3622  regen_optix_sbt=1;
3623 }
3624 
3625 
3626 void TachyonOptiX::add_spherearray(SphereArray & newmodel, int materialidx) {
3627  if (!context_created)
3628  return;
3629 
3630  newmodel.materialindex = materialidx; // XXX overwrite hack...
3631 
3632  sparrays.push_back(newmodel);
3633  regen_optix_sbt=1;
3634 }
3635 
3636 
3637 void TachyonOptiX::add_trimesh(TriangleMesh & newmodel, int materialidx) {
3638  if (!context_created)
3639  return;
3640 
3641  newmodel.materialindex = materialidx; // XXX overwrite hack...
3642 
3643  trimeshes.push_back(newmodel);
3644  regen_optix_sbt=1;
3645 }
3646 
3647 
3648 //
3649 // Compiled-in PTX src, if available
3650 //
3651 char *TachyonOptiX::internal_compiled_ptx_src(void) {
3652 #if 1 && defined(TACHYON_INTERNAL_COMPILED_SRC)
3653  const char *ptxsrc =
3654  #include "TachyonOptiXShaders.ptxinc"
3655  ;
3656 
3657  int len = strlen(ptxsrc);
3658  char *ptxstring = (char *) calloc(1, len + 1);
3659  strcpy(ptxstring, ptxsrc);
3660  return ptxstring;
3661 #else
3662  return NULL;
3663 #endif
3664 }
3665 
3666 
3667 
3668 
3669 
__host__ __device__ float3 normalize(const float3 &v)
Normalize input vector to unit length.
float3 U
camera orthonormal U (right) axis
Several OptiX APIs make use of CUDA driver API pointer types (CUdevicepointer) so it becomes worthwhi...
Definition: TachyonOptiX.h:328
void minimize_memory_use(void)
reduce active memory footprint without destroying the scene by freeing internal temporary buffers use...
std::vector< float3 PINALLOCS(float3)> center
Definition: TachyonOptiX.h:226
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:263
omnidirectional octahedral
Definition: TachyonOptiX.h:643
std::vector< uint4 PINALLOCS(uint4) > packednormals
Definition: TachyonOptiX.h:289
std::vector< float3 PINALLOCS(float3)> center
Definition: TachyonOptiX.h:245
uchar4 * framebuffer
8-bit unorm RGBA framebuffer
float accum_normalize
precalc 1.0f / subframe_index
#define CUERR
std::vector< float3 PINALLOCS(float3)> vertcolors3f
Definition: TachyonOptiX.h:217
int matflags
alpha/cutout transparency flags
int materialindex
Definition: TachyonOptiX.h:184
int headlight_mode
Extra VR camera-located headlight.
void free()
free allocated memory
Definition: TachyonOptiX.h:433
int add_tex3d_rgba4u(const unsigned char *img, int xres, int yres, int zres, int texflags, int userindex)
define image to be used in a texture map
float bg_grad_invrange
miss background gradient inverse range
float3 bg_grad_updir
miss background gradient up direction
int create_geom_instance_group()
Create geometry instance group.
std::vector< float PINALLOCS(float)> radius
Definition: TachyonOptiX.h:199
__constant__ tachyonLaunchParams rtLaunch
launch parameters in constant memory, filled by optixLaunch)
int update_colorbuffer
accumulation copyout flag
planetarium dome master
Definition: TachyonOptiX.h:641
int add_tex2d_rgba4u(const unsigned char *img, int xres, int yres, int texflags, int userindex)
define image to be used in a texture map
cudaTextureObject_t tex
texture, non-zero if valid
static TachyonOptiX::Verbosity get_verbose_flag(int inform=0)
int tonemap_mode
output tone mapping mode
int add_material_textured(float ambient, float diffuse, float specular, float shininess, float reflectivity, float opacity, float outline, float outlinewidth, int transmode, int textureindex, int userindex)
float ao_lightscale
2.0f/float(ao_samples)
std::vector< int PINALLOCS(int)> segindices
Definition: TachyonOptiX.h:261
float dof_aperture_rad
DoF (defocus blur) aperture radius.
std::vector< float3 PINALLOCS(float3)> apex
Definition: TachyonOptiX.h:179
float reflectivity
mirror reflectance coefficient
float shininess
specular highlight size (exponential)
void print_internal_struct_info(void)
diagnostic info routines
float ao_maxdist
AO maximum occlusion distance.
__host__ __device__ float4 make_float4(const float3 &a, const float &b)
size_t get_size(void)
query current buffer size in bytes
Definition: TachyonOptiX.h:350
ring SBT index multiplier
__host__ __device__ float3 make_float3(const float s)
void add_directional_light(const float *dir, const float *color)
float3 W
camera orthonormal W (view) axis
__host__ __device__ float3 fmaxf(const float3 &a, const float3 &b)
float3 pos
camera position
std::vector< float3 PINALLOCS(float3)> base
Definition: TachyonOptiX.h:178
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:200
int max_trans
max transparent surface crossing count
omnidirectional lat/long
Definition: TachyonOptiX.h:642
float tonemap_exposure
tone mapping exposure gain parameter
cylinder SBT index multiplier
shadows disabled
std::vector< float3 PINALLOCS(float3) > tex3d
Definition: TachyonOptiX.h:294
int userindex
material user index, positive if valid
float ao_direct
AO direct lighting scaling factor.
#define RT_DEFAULT_MAX
int add_material(float ambient, float diffuse, float specular, float shininess, float reflectivity, float opacity, float outline, float outlinewidth, int transmode, int userindex)
add a material with an associated user-provided index
std::vector< float PINALLOCS(float)> apexrad
Definition: TachyonOptiX.h:181
void add_curvearray(CurveArray &model, int matidx)
CPU and GPU profiling utility macros/routines.
int fb_clearall
clear/overwrite all FB components
struct tachyonLaunchParams::@4 lights
__host__ __device__ float3 fabsf(const float3 &a)
#define PROFILE_PUSH_RANGE(name, cid)
Pushes a time range annotation onto the profiler&#39;s trace stack, beginning at the time of submission...
Definition: ProfileHooks.h:275
shadow probe/AO rays
std::vector< float PINALLOCS(float)> radius
Definition: TachyonOptiX.h:246
int fog_mode
fog type (or off)
void framebuffer_resize(int fbwidth, int fbheight)
std::vector< float3 PINALLOCS(float3) > normals
Definition: TachyonOptiX.h:288
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:182
int materialindex
Definition: TachyonOptiX.h:232
float3 pos
point light position
static int device_count(void)
static GPU device query
void set_camera_lookat(const float *at, const float *V)
set camera orientation to look "at" a point in space, with a given "up" direction (camera ONB "V" vec...
void destroy_scene(void)
void destroy_materials()
CUdeviceptr cu_dptr() const
Definition: TachyonOptiX.h:341
int destroy_geom_instance_group(int idx)
void print_raystats_info(void)
report performance statistics
float bg_grad_topval
miss background gradient top value
std::vector< float3 PINALLOCS(float3) > vertices
Definition: TachyonOptiX.h:286
float dof_focal_dist
DoF focal plane distance.
void framebuffer_destroy(void)
enable cutout/transparency
static int device_list(int **, char ***)
static methods for querying OptiX-supported GPU hardware independent of whether we actually have an a...
std::vector< float3 PINALLOCS(float3)> start
Definition: TachyonOptiX.h:197
Adobe sRGB (gamma 2.2)
#define RTPROF_RENDER
trace color for overall rendering
Definition: TachyonOptiX.h:172
structure containing Tachyon texture (only used on host side)
Output timing/perf data only.
Definition: TachyonOptiX.h:649
float3 dir
directional light direction
conventional orthographic
Definition: TachyonOptiX.h:639
float fog_end
radial/linear fog end/max distance
#define RTPROF_GEOM
trace color for geometry processing
Definition: TachyonOptiX.h:175
void framebuffer_clear(void)
int shadows_enabled
global shadow flag
int transmode
transparency behavior
int stereo_enabled
stereo rendering on/off
void add_spherearray(SphereArray &model, int matidx)
float3 * dir_lights
list of directional light directions
void update_rendering_state(int interactive)
float bg_grad_botval
miss background gradient bottom value
void add_ringarray(RingArray &model, int matidx)
struct tachyonLaunchParams::@3 scene
conventional perspective
Definition: TachyonOptiX.h:638
float fog_start
radial/linear fog start distance
No console output.
Definition: TachyonOptiX.h:648
float specular
specular reflectance coefficient
enable alpha transparency
std::vector< float PINALLOCS(float)> inrad
Definition: TachyonOptiX.h:228
no frustum clipping
Definition: TachyonOptiX.h:619
std::vector< float3 PINALLOCS(float3) > vertcolors3f
Definition: TachyonOptiX.h:290
void add_conearray(ConeArray &model, int matidx)
#define PROFILE_POP_RANGE()
Pops the innermost time range off of the profiler&#39;s trace stack, at the time of execution.
Definition: ProfileHooks.h:279
only clamp the color values [0,1]
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:230
#define RTPROF_RENDERRT
trace color for specifically for RT
Definition: TachyonOptiX.h:173
void log_callback(unsigned int level, const char *tag, const char *msg)
console output logging callback
float3 V
camera orthonormal V (up) axis
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:219
void set_size(size_t newsize)
(re)allocate buffer of requested size
Definition: TachyonOptiX.h:355
std::vector< float2 PINALLOCS(float2) > tex2d
Definition: TachyonOptiX.h:293
std::vector< float3 PINALLOCS(float3)> vertices
Definition: TachyonOptiX.h:213
void * dptr() const
Definition: TachyonOptiX.h:333
std::vector< float3 PINALLOCS(float3)> normal
Definition: TachyonOptiX.h:227
#define RTPROF_GENERAL
trace color for general operations
Definition: TachyonOptiX.h:169
std::vector< int4 PINALLOCS(int4)> indices
Definition: TachyonOptiX.h:214
std::vector< uchar4 PINALLOCS(uchar4)> vertcolors4u
Definition: TachyonOptiX.h:218
float stereo_convergence_dist
stereo convergence distance (world)
int2 size
framebuffer size
total count of SBT geometric multipliers
float3 bg_color_grad_bot
miss background gradient (bottom)
static void TachyonOptixLogCallback(unsigned int level, const char *tag, const char *message, void *cbdata)
void framebuffer_download_rgb4u(unsigned char *imgrgb4u)
std::vector< float3 PINALLOCS(float3) > primcolors3f
Definition: TachyonOptiX.h:292
std::vector< float3 PINALLOCS(float3)> normals
Definition: TachyonOptiX.h:215
int dof_enabled
DoF (defocus blur) on/off.
int ao_samples
number of AO samples per AA ray
int subframe_index
accumulation subframe index
normal radiance rays
__host__ __device__ float3 cross(const float3 &a, const float3 &b)
calculate the cross product between vectors a and b.
int finalize_geom_instance_group(int idx)
denoiser disabled
int userindex
material user index, positive if valid
float bg_grad_noisemag
miss background gradient noise magnitude
int aa_samples
AA samples per launch.
void download(T *t, size_t cnt)
Synchronous download from GPU device memory.
Definition: TachyonOptiX.h:560
cone SBT index multiplier
void framebuffer_config(int fbwidth, int fbheight, int interactive)
int materialindex
Definition: TachyonOptiX.h:265
std::vector< float3 PINALLOCS(float3)> vertices
Definition: TachyonOptiX.h:259
OptixTraversableHandle traversable
global OptiX scene traversable handle
float3 bg_color_grad_top
miss background gradient (top)
#define RTPROF_SBT
trace color for SBT construction
Definition: TachyonOptiX.h:171
quad SBT index multiplier
struct tachyonLaunchParams::@2 frame
std::vector< uint4 PINALLOCS(uint4)> packednormals
Definition: TachyonOptiX.h:216
void upload(const T *t, size_t cnt)
Synchronous upload to GPU device memory.
Definition: TachyonOptiX.h:554
Store all hitgroup records for a given geometry together for simpler dynamic updates.
void add_positional_light(const float *pos, const float *color)
float opacity
surface opacity
sphere SBT index multiplier
std::vector< float PINALLOCS(float)> outrad
Definition: TachyonOptiX.h:229
#define PROFILE_STREAM_SYNC_PRETTY(stream)
Helper macro that can conditionally insert extra calls to cudaStreamSynchronize() into an application...
Definition: ProfileHooks.h:289
static unsigned int optix_version(void)
static OptiX version query
int colorspace
output colorspace
std::vector< int3 PINALLOCS(int3) > indices
Definition: TachyonOptiX.h:287
struct tachyonLaunchParams::@5 cam
void destroy_lights()
int num_dir_lights
directional light count
float3 * pos_lights
list of positional light positions
std::vector< float PINALLOCS(float)> vertradii
Definition: TachyonOptiX.h:260
#define DBG()
Output fully verbose debug info.
Definition: TachyonOptiX.h:650
void clear_persist_allocation(void)
clear "used" size to zero, but keep existing device allocation
Definition: TachyonOptiX.h:424
std::vector< float PINALLOCS(float)> baserad
Definition: TachyonOptiX.h:180
int materialindex
Definition: TachyonOptiX.h:221
#define RTPROF_TRANSFER
trace color for host-GPU DMA
Definition: TachyonOptiX.h:174
#define RTPROF_ACCEL
trace color for RT AS builds
Definition: TachyonOptiX.h:170
static __forceinline__ __device__ float4 sRGB_to_linear_approx_20(const float4 &rgba)
structure containing Tachyon material properties
rt_material * materials
device memory material array
float diffuse
diffuse reflectance coefficient
void framebuffer_colorspace(int colspace)
int material_index_from_user_index(int userindex)
locate material via user index
Tachyon ray tracing host side routines and internal APIs that provide the core ray OptiX-based RTX-ac...
void add_cylarray(CylinderArray &model, int matidx)
float fog_density
exponential fog density
float3 bg_color
miss background color
~TachyonOptiX(void)
int num_pos_lights
positional light count
float ambient
constant ambient light factor
omnidirectional cubemap
Definition: TachyonOptiX.h:640
float ao_ambient
AO ambient factor.
float4 * accum_buffer
32-bit FP RGBA accumulation buffer
std::vector< float3 PINALLOCS(float3)> primcolors3f
Definition: TachyonOptiX.h:247
void add_trimesh(TriangleMesh &model, int matidx)
float stereo_eyesep
stereo eye separation, in world coords
Adobe sRGB (gamma 2.2)
std::vector< float3 PINALLOCS(float3)> end
Definition: TachyonOptiX.h:198
int max_depth
global max ray tracing recursion depth
enable tex cutout transparency
float epsilon
global epsilon value
void add_quadmesh(QuadMesh &model, int matidx)
std::vector< uchar4 PINALLOCS(uchar4) > vertcolors4u
Definition: TachyonOptiX.h:291
total count of ray types
Definition: util.c:161
void resize_upload(const std::vector< T > &vecT)
Combination of a buffer resize with synchronous upload to GPU device memory.
Definition: TachyonOptiX.h:471
static __forceinline__ __device__ float4 linear_to_sRGB(const float4 &lin)
int image_index_from_user_index(int userindex)
locate texture via user index
float zoom
camera zoom factor