Tachyon (current)  Current Main Branch
TachyonOptiXShaders.cu
Go to the documentation of this file.
1 /*
2  * TachyonOptiXShaders.cu - OptiX PTX shading and ray intersection routines
3  *
4  * (C) Copyright 2013-2022 John E. Stone
5  * SPDX-License-Identifier: BSD-3-Clause
6  *
7  * $Id: TachyonOptiXShaders.cu,v 1.112 2022/04/20 03:30:38 johns Exp $
8  *
9  */
10 
18 //
19 // This is a second generation of the Tachyon implementation for OptiX.
20 // The new implementation favors the strengths of OptiX 7, and uses
21 // OptiX ray payload registers, direct CUDA interoperability and advanced
22 // CUDA features for both performance and maintainability.
23 //
24 // This software and its line of antecedants are described in:
25 // "Multiscale modeling and cinematic visualization of photosynthetic
26 // energy conversion processes from electronic to cell scales"
27 // M. Sener, S. Levy, J. E. Stone, AJ Christensen, B. Isralewitz,
28 // R. Patterson, K. Borkiewicz, J. Carpenter, C. N. Hunter,
29 // Z. Luthey-Schulten, D. Cox.
30 // J. Parallel Computing, 102, pp. 102698, 2021.
31 // https://doi.org/10.1016/j.parco.2020.102698
32 //
33 // "Omnidirectional Stereoscopic Projections for VR"
34 // J. E. Stone. In, William R. Sherman, editor,
35 // VR Developer Gems, Taylor and Francis / CRC Press, Chapter 24, 2019.
36 // https://www.taylorfrancis.com/chapters/edit/10.1201/b21598-24/omnidirectional-stereoscopic-projections-vr-john-stone
37 //
38 // "Interactive Ray Tracing Techniques for
39 // High-Fidelity Scientific Visualization"
40 // J. E. Stone. In, Eric Haines and Tomas Akenine-Möller, editors,
41 // Ray Tracing Gems, Apress, Chapter 27, pp. 493-515, 2019.
42 // https://link.springer.com/book/10.1007/978-1-4842-4427-2
43 //
44 // "A Planetarium Dome Master Camera"
45 // J. E. Stone. In, Eric Haines and Tomas Akenine-Möller, editors,
46 // Ray Tracing Gems, Apress, Chapter 4, pp. 49-60, 2019.
47 // https://link.springer.com/book/10.1007/978-1-4842-4427-2
48 //
49 // "Immersive Molecular Visualization with Omnidirectional
50 // Stereoscopic Ray Tracing and Remote Rendering"
51 // J. E. Stone, W. R. Sherman, and K. Schulten.
52 // High Performance Data Analysis and Visualization Workshop,
53 // 2016 IEEE International Parallel and Distributed Processing
54 // Symposium Workshops (IPDPSW), pp. 1048-1057, 2016.
55 // http://dx.doi.org/10.1109/IPDPSW.2016.121
56 //
57 // "Atomic Detail Visualization of Photosynthetic Membranes with
58 // GPU-Accelerated Ray Tracing"
59 // J. E. Stone, M. Sener, K. L. Vandivort, A. Barragan, A. Singharoy,
60 // I. Teo, J. V. Ribeiro, B. Isralewitz, B. Liu, B.-C. Goh, J. C. Phillips,
61 // C. MacGregor-Chatwin, M. P. Johnson, L. F. Kourkoutis, C. N. Hunter,
62 // K. Schulten
63 // J. Parallel Computing, 55:17-27, 2016.
64 // http://dx.doi.org/10.1016/j.parco.2015.10.015
65 //
66 // "GPU-Accelerated Molecular Visualization on
67 // Petascale Supercomputing Platforms"
68 // J. E. Stone, K. L. Vandivort, and K. Schulten.
69 // UltraVis'13: Proceedings of the 8th International Workshop on
70 // Ultrascale Visualization, pp. 6:1-6:8, 2013.
71 // http://dx.doi.org/10.1145/2535571.2535595
72 //
73 // "An Efficient Library for Parallel Ray Tracing and Animation"
74 // John E. Stone. Master's Thesis, University of Missouri-Rolla,
75 // Department of Computer Science, April 1998
76 // https://scholarsmine.mst.edu/masters_theses/1747
77 //
78 // "Rendering of Numerical Flow Simulations Using MPI"
79 // J. Stone and M. Underwood.
80 // Second MPI Developers Conference, pages 138-141, 1996.
81 // http://dx.doi.org/10.1109/MPIDC.1996.534105
82 //
83 
84 
85 #include <optix.h>
86 //#include <optix_device.h>
87 #include <stdint.h>
88 
89 #define TACHYON_INTERNAL 1
90 #include "TachyonOptiXShaders.h"
91 
92 // Macros related to ray origin epsilon stepping to prevent
93 // self-intersections with the surface we're leaving
94 // This is a cheesy way of avoiding self-intersection
95 // but it ameliorates the problem.
96 // Since changing the scene epsilon even to large values does not
97 // always cure the problem, this workaround is still required.
98 #define TACHYON_USE_RAY_STEP 1
99 #define TACHYON_TRANS_USE_INCIDENT 1
100 #define TACHYON_RAY_STEP N*rtLaunch.scene.epsilon*4.0f
101 #define TACHYON_RAY_STEP2 ray_direction*rtLaunch.scene.epsilon*4.0f
102 
103 // reverse traversal of any-hit rays for shadows/AO
104 #define REVERSE_RAY_STEP (scene_epsilon*10.0f)
105 #define REVERSE_RAY_LENGTH 3.0f
106 
107 // Macros to enable particular ray-geometry intersection variants that
108 // optimize for speed, or some combination of speed and accuracy
109 #define TACHYON_USE_SPHERES_HEARNBAKER 1
110 
111 
114 static __device__ __inline__ int tachyon1DLaunchIndex(void) {
115  const uint3 launch_index = optixGetLaunchIndex();
116  const uint3 launch_dim = optixGetLaunchDimensions();
117  const int idx = launch_index.y*launch_dim.x + launch_index.x;
118  return idx;
119 }
120 
123 static __device__ __inline__ int tachyon1DLaunchIndex(uint3 dim, uint3 index) {
124  return index.y*dim.x + index.x;
125 }
126 
127 
128 
129 //
130 // OptiX ray processing programs
131 //
132 
135 
136 static __forceinline__ __device__
137 void *unpackPointer( uint32_t i0, uint32_t i1 ) {
138  const uint64_t uptr = static_cast<uint64_t>( i0 ) << 32 | i1;
139  void* ptr = reinterpret_cast<void*>( uptr );
140  return ptr;
141 }
142 
143 static __forceinline__ __device__
144 void packPointer( void* ptr, uint32_t& i0, uint32_t& i1 ) {
145  const uint64_t uptr = reinterpret_cast<uint64_t>( ptr );
146  i0 = uptr >> 32;
147  i1 = uptr & 0x00000000ffffffff;
148 }
149 
150 template<typename T>
151 static __forceinline__ __device__ T *getPRD() {
152  const uint32_t p0 = optixGetPayload_0();
153  const uint32_t p1 = optixGetPayload_1();
154  return reinterpret_cast<T*>( unpackPointer( p0, p1 ) );
155 }
156 
157 
158 
159 //
160 // Per-ray data "PRD"
161 //
162 
163 // radiance PRD data is used by closest-hit and miss programs
165  float3 result; // final shaded surface color
166  float alpha; // alpha value to back-propagate to framebuffer
167  float importance; // importance of recursive ray tree
168  int depth; // current recursion depth
169  int transcnt; // transmission ray surface count/depth
170 };
171 
173 static __forceinline__ __device__ uint32_t getPayloadAAsample() {
174  return optixGetPayload_2();
175 }
176 
177 
178 #if 0
179 // XXX we currently use ray payload registers for shadow PRD
180 // but this is maintained for the inevitable future revisions
181 // that bring more sophisticated shadow filtering
182 struct PerRayData_shadow {
183  // attenuation is set to 0.0f at 100% shadow, and 1.0f no occlusion
184  float attenuation; // grayscale light filtered by transmissive surfaces
185 };
186 #endif
187 
190 static __forceinline__ __device__ float getPayloadShadowAttenuation() {
191  return __int_as_float(optixGetPayload_0());
192 }
193 
196 static __forceinline__ __device__ void setPayloadShadowAttenuation(const float attenuation) {
197  optixSetPayload_0(__float_as_int(attenuation));
198 }
199 
200 
201 static int __forceinline__ __device__ subframe_count() {
202 // return (accumCount + progressiveSubframeIndex);
203  return rtLaunch.frame.subframe_index;
204 }
205 
206 
207 
208 //
209 // Device functions for clipping rays by geometric primitives
210 //
211 
212 // fade_start: onset of fading
213 // fade_end: fully transparent, begin clipping of geometry
214 __device__ void sphere_fade_and_clip(const float3 &hit_point,
215  const float3 &cam_pos,
216  float fade_start, float fade_end,
217  float &alpha) {
218  float camdist = length(hit_point - cam_pos);
219 
220  // we can omit the distance test since alpha modulation value is clamped
221  // if (1 || camdist < fade_start) {
222  float fade_len = fade_start - fade_end;
223  alpha *= __saturatef((camdist - fade_start) / fade_len);
224  // }
225 }
226 
227 
228 __device__ void ray_sphere_clip_interval(float3 ray_origin,
229  float3 ray_direction, float3 center,
230  float rad, float2 &tinterval) {
231  float3 V = center - ray_origin;
232  float b = dot(V, ray_direction);
233  float disc = b*b + rad*rad - dot(V, V);
234 
235  // if the discriminant is positive, the ray hits...
236  if (disc > 0.0f) {
237  disc = sqrtf(disc);
238  tinterval.x = b-disc;
239  tinterval.y = b+disc;
240  } else {
241  tinterval.x = -RT_DEFAULT_MAX;
242  tinterval.y = RT_DEFAULT_MAX;
243  }
244 }
245 
246 
247 __device__ void clip_ray_by_plane(float3 ray_origin,
248  float3 ray_direction,
249  float &tmin, float &tmax,
250  const float4 plane) {
251  float3 n = make_float3(plane);
252  float dt = dot(ray_direction, n);
253  float t = (-plane.w - dot(n, ray_origin))/dt;
254  if(t > tmin && t < tmax) {
255  if (dt <= 0) {
256  tmax = t;
257  } else {
258  tmin = t;
259  }
260  } else {
261  // ray interval lies completely on one side of the plane. Test one point.
262  float3 p = ray_origin + tmin * ray_direction;
263  if (dot(make_float4(p.x, p.y, p.z, 1.0f), plane) < 0) {
264  tmin = tmax = RT_DEFAULT_MAX; // cull geometry
265  }
266  }
267 }
268 
269 
270 
271 //
272 // Default Tachyon exception handling program
273 // Any OptiX state on the stack will be gone post-exception, so if we
274 // want to store anything it would need to be written to a global
275 // memory allocation.
276 // When printing exception info, all output should be emitted with a single
277 // printf() call or equivalent to ensure correct output ordering.
278 //
279 extern "C" __global__ void __exception__all() {
280  const int code = optixGetExceptionCode();
281  const uint3 launch_index = optixGetLaunchIndex();
282 
283  switch (code) {
284  case OPTIX_EXCEPTION_CODE_STACK_OVERFLOW:
285  printf("TachyonOptiX) Stack overflow, launch idx (%u,%u)\n",
286  launch_index.x, launch_index.y);
287  break;
288 
289  case OPTIX_EXCEPTION_CODE_TRACE_DEPTH_EXCEEDED:
290  printf("TachyonOptiX) Max trace depth exceeded, launch idx (%u,%u)\n",
291  launch_index.x, launch_index.y);
292  break;
293 
294  case OPTIX_EXCEPTION_CODE_TRAVERSAL_DEPTH_EXCEEDED:
295  printf("TachyonOptiX) Max traversal depth exceeded, launch idx (%u,%u)\n",
296  launch_index.x, launch_index.y);
297  break;
298 
299  case OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_MISS_SBT:
300  printf("TachyonOptiX) Invalid miss SBT record idx, launch idx (%u,%u)\n",
301  launch_index.x, launch_index.y);
302  // optixGetExceptionInvalidSbtOffset()
303  break;
304 
305  case OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_HIT_SBT:
306  printf("TachyonOptiX) Invalid hit SBT record idx, launch idx (%u,%u)\n",
307  launch_index.x, launch_index.y);
308  // optixGetExceptionInvalidSbtOffset()
309  break;
310 
311 #if OPTIX_VERSION >= 70100
312  case OPTIX_EXCEPTION_CODE_BUILTIN_IS_MISMATCH:
313  printf("TachyonOptiX) Built-in IS mismatch, launch idx (%u,%u)\n",
314  launch_index.x, launch_index.y);
315  break;
316 
317  case OPTIX_EXCEPTION_CODE_INVALID_RAY:
318  printf("TachyonOptiX) Trace call contains Inf/NaN, launch idx (%u,%u):\n"
319  "TachyonOptiX) @ %s\n",
320  launch_index.x, launch_index.y, optixGetExceptionLineInfo());
321  // optixGetExceptionInvalidRay()
322  break;
323 
324  case OPTIX_EXCEPTION_CODE_CALLABLE_PARAMETER_MISMATCH:
325  printf("TachyonOptiX) Callable param mismatch, launch idx (%d,%d)\n",
326  launch_index.x, launch_index.y);
327  // optixGetExceptionParameterMismatch()
328  break;
329 #endif
330 
331  case OPTIX_EXCEPTION_CODE_TRAVERSAL_INVALID_TRAVERSABLE:
332  default:
333  printf("TachyonOptiX) Caught exception 0x%X (%d) at launch idx (%u,%u)\n",
334  code, code, launch_index.x, launch_index.y );
335  break;
336  }
337 
338  // and write to frame buffer ...
339  const int idx = launch_index.x + launch_index.y*rtLaunch.frame.size.x;
340  rtLaunch.frame.framebuffer[idx] = make_color_rgb4u(make_float3(0.f, 0.f, 0.f));
341 }
342 
343 
344 //
345 // Shadow ray programs
346 //
347 // The shadow PRD attenuation factor represents what fraction of the light
348 // is visible. An attenuation factor of 0 indicates full shadow, no light
349 // makes it to the surface. An attenuation factor of 1.0 indicates a
350 // complete lack of shadow.
351 //
352 
353 extern "C" __global__ void __closesthit__shadow_nop() {
354  // no-op
355 }
356 
357 
358 //
359 // Shadow miss program for any kind of geometry
360 // Regardless what type of geometry we're rendering, if we end up running
361 // the miss program, then we know we didn't hit an occlusion, so the
362 // light attenuation factor should be 1.0.
363 extern "C" __global__ void __miss__shadow_nop() {
364  // For scenes with either opaque or transmissive objects,
365  // a "miss" always indicates that there was no (further) occlusion,
366  // and thus no shadow.
367 
368  // no-op
369 }
370 
371 
372 // Shadow AH program for purely opaque geometry
373 // If we encounter an opaque object during an anyhit traversal,
374 // it sets the light attentuation factor to 0 (full shadow) and
375 // immediately terminates the shadow traversal.
376 extern "C" __global__ void __anyhit__shadow_opaque() {
377  // this material is opaque, so it fully attenuates all shadow rays
379 
380  // full shadow should cause us to early-terminate AH search
381  optixTerminateRay();
382 }
383 
384 
385 // Shadow programs for scenes containing a mix of both opaque and transparent
386 // objects. In the case that a scene contains a mix of both fully opaque
387 // and transparent objects, we have two different types of AH programs
388 // for the two cases. Prior to launching the shadow rays, the PRD attenuation
389 // factor is set to 1.0 to indicate no shadowing, and it is subsequently
390 // modified by the AH programs associated with the different objects in
391 // the scene.
392 //
393 // To facilitate best performance in scenes that contain a mix of
394 // fully opaque and transparent geometry, we could run an anyhit against
395 // opaque geometry first, and only if we had a miss, we could continue with
396 // running anyhit traversal on just the transmissive geometry.
397 //
398 
399 // Any hit program required for shadow filtering through transparent materials
400 extern "C" __global__ void __anyhit__shadow_transmission() {
401 #if defined(TACHYON_USE_GEOMFLAGS)
402  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
403 
404  int geomflags = sbtHG.geomflags;
405  if (!(geomflags & (RT_MAT_ALPHA | RT_MAT_TEXALPHA))) {
406  // we hit something fully opaque
407  setPayloadShadowAttenuation(0.0f); // 100% full shadow
408  optixTerminateRay();
409  } else {
410  // use a VERY simple shadow filtering scheme based on opacity
411  float opacity = rtLaunch.materials[sbtHG.materialindex].opacity;
412 
413 #if 1
414  // incorporate alpha cutout textures into any-hit if necessary
415  if (geomflags & RT_MAT_TEXALPHA) {
416  auto & tmesh = sbtHG.trimesh;
417  if (tmesh.tex2d != nullptr) {
418  const int primID = optixGetPrimitiveIndex();
419 
420  int3 index;
421  if (tmesh.indices == NULL) {
422  int idx3 = primID*3;
423  index = make_int3(idx3, idx3+1, idx3+2);
424  } else {
425  index = tmesh.indices[primID];
426  }
427 
428  const float2 barycentrics = optixGetTriangleBarycentrics();
429 
430  float2 txc0 = tmesh.tex2d[index.x];
431  float2 txc1 = tmesh.tex2d[index.y];
432  float2 txc2 = tmesh.tex2d[index.z];
433 
434  // interpolate tex coord from triangle barycentrics
435  float2 texcoord = (txc0 * (1.0f - barycentrics.x - barycentrics.y) +
436  txc1 * barycentrics.x + txc2 * barycentrics.y);
437 
438  // XXX need to implement ray differentials for tex filtering
439  int matidx = sbtHG.materialindex;
440  const auto &mat = rtLaunch.materials[matidx];
441  float4 tx = tex2D<float4>(mat.tex, texcoord.x, texcoord.y);
442 
443  opacity *= tx.w;
444  }
445  }
446 #endif
447 
448  // this material could be translucent, so it may attenuate shadow rays
449  float attenuation = getPayloadShadowAttenuation();
450  attenuation *= (1.0f - opacity);
451  setPayloadShadowAttenuation(attenuation);
452  // ceck to see if we've hit 100% shadow or not
453  if (attenuation < 0.001f) {
454  optixTerminateRay();
455  } else {
456 #if defined(TACHYON_RAYSTATS)
457  const int idx = tachyon1DLaunchIndex();
458  rtLaunch.frame.raystats2_buffer[idx].y++; // increment trans ray skip count
459 #endif
460  optixIgnoreIntersection();
461  }
462  }
463 
464 #else
465 
466  // use a VERY simple shadow filtering scheme based on opacity
467  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
468  float opacity = rtLaunch.materials[sbtHG.materialindex].opacity;
469 
470 #if 0
471  const uint3 launch_index = optixGetLaunchIndex();
472  if (launch_index.x == 994) {
473  printf("AH xy:%d %d mat[%d] diffuse: %g opacity: %g atten: %g\n",
474  launch_index.x, launch_index.y,
475  sbtHG.materialindex, mat.diffuse, mat.opacity,
476  prd.attenuation);
477  }
478 #endif
479 
480  // this material could be translucent, so it may attenuate shadow rays
481  float attenuation = getPayloadShadowAttenuation();
482  attenuation *= (1.0f - opacity);
483  setPayloadShadowAttenuation(attenuation);
484  // check to see if we've hit 100% shadow or not
485  if (attenuation < 0.001f) {
486  optixTerminateRay();
487  } else {
488 #if defined(TACHYON_RAYSTATS)
489  const int idx = tachyon1DLaunchIndex();
490  rtLaunch.frame.raystats2_buffer[idx].y++; // increment trans ray skip count
491 #endif
492  optixIgnoreIntersection();
493  }
494 #endif
495 }
496 
497 
498 
499 // Any hit program required for shadow filtering when an
500 // HMD/camera fade-and-clip is active, through both
501 // solid and transparent materials
502 extern "C" __global__ void any_hit_shadow_clip_sphere() {
503  const float3 ray_origin = optixGetWorldRayOrigin();
504  const float3 ray_direction = optixGetWorldRayDirection();
505  const float t_hit = optixGetRayTmax();
506  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
507  float opacity = rtLaunch.materials[sbtHG.materialindex].opacity;
508 
509  // compute world space hit point for use in evaluating fade/clip effect
510  float3 hit_point = ray_origin + t_hit * ray_direction;
511 
512  // compute additional attenuation from clipping sphere if enabled
513  float clipalpha = 1.0f;
514  if (rtLaunch.clipview_mode == 2) {
516  rtLaunch.clipview_end, clipalpha);
517  }
518 
519 
520  // use a VERY simple shadow filtering scheme based on opacity
521  // this material could be translucent, so it may attenuate shadow rays
522  float attenuation = getPayloadShadowAttenuation();
523  attenuation *= (1.0f - (clipalpha * opacity));
524  setPayloadShadowAttenuation(attenuation);
525  // check to see if we've hit 100% shadow or not
526  if (attenuation < 0.001f) {
527  optixTerminateRay();
528  } else {
529 #if defined(TACHYON_RAYSTATS)
530  const int idx = tachyon1DLaunchIndex();
531  rtLaunch.frame.raystats2_buffer[idx].y++; // increment trans ray skip count
532 #endif
533  optixIgnoreIntersection();
534  }
535 }
536 
537 
538 //
539 // OptiX anyhit program for radiance rays, a no-op
540 //
541 
542 extern "C" __global__ void __anyhit__radiance_nop() {
543  // no-op
544 }
545 
546 
547 //
548 // OptiX miss programs for drawing the background color or
549 // background color gradient when no objects are hit
550 //
551 
552 // Miss program for solid background
553 extern "C" __global__ void __miss__radiance_solid_bg() {
554  // Fog overrides the background color if we're using
555  // Tachyon radial fog, but not for OpenGL style fog.
556  PerRayData_radiance &prd = *getPRD<PerRayData_radiance>();
558  prd.alpha = 0.0f; // alpha of background is 0.0f;
559 
560 #if defined(TACHYON_RAYSTATS)
561  const int idx = tachyon1DLaunchIndex();
562  rtLaunch.frame.raystats1_buffer[idx].w++; // increment miss counter
563 #endif
564 }
565 
566 
567 // Miss program for gradient background with perspective projection.
568 // Fog overrides the background color if we're using
569 // Tachyon radial fog, but not for OpenGL style fog.
570 extern "C" __global__ void __miss__radiance_gradient_bg_sky_sphere() {
571  PerRayData_radiance &prd = *getPRD<PerRayData_radiance>();
572 
573  // project ray onto the gradient "up" direction, and compute the
574  // scalar color interpolation parameter
575  float IdotG = dot(optixGetWorldRayDirection(), rtLaunch.scene.bg_grad_updir);
576  float val = (IdotG - rtLaunch.scene.bg_grad_botval) *
578 
579  // Compute and add random noise to the background gradient to
580  // avoid banding artifacts, particularly in compressed video.
581  // Noise RNG depends only on pixel index, with no sample/subframe
582  // contribution, so that dither pattern won't average out.
583  const int idx = tachyon1DLaunchIndex();
584 #if 1
585  float u = squares_rng<2>(idx, SQUARES_RNG_KEY1) * UINT32_RAND_MAX_INV;
586 #else
587  float u = tea<4>(idx, idx) * UINT32_RAND_MAX_INV;
588 #endif
589  float noise = rtLaunch.scene.bg_grad_noisemag * (u - 0.5f);
590  val += noise; // add the noise to the interpolation parameter
591 
592  val = __saturatef(val); // clamp the interpolation param to [0:1]
593  float3 col = val * rtLaunch.scene.bg_color_grad_top +
594  (1.0f - val) * rtLaunch.scene.bg_color_grad_bot;
595  prd.result = col;
596  prd.alpha = 0.0f; // alpha of background is 0.0f;
597 
598 #if defined(TACHYON_RAYSTATS)
599  rtLaunch.frame.raystats1_buffer[idx].w++; // increment miss counter
600 #endif
601 }
602 
603 
604 // Miss program for gradient background with orthographic projection.
605 // Fog overrides the background color if we're using
606 // Tachyon radial fog, but not for OpenGL style fog.
607 extern "C" __global__ void __miss__radiance_gradient_bg_sky_plane() {
608  PerRayData_radiance &prd = *getPRD<PerRayData_radiance>();
609 
610  // project ray onto the gradient "up" direction, and compute the
611  // scalar color interpolation parameter
612  float IdotG = dot(optixGetWorldRayDirection(), rtLaunch.scene.bg_grad_updir);
613  float val = (IdotG - rtLaunch.scene.bg_grad_botval) *
615 
616  // Compute and add random noise to the background gradient to
617  // avoid banding artifacts, particularly in compressed video.
618  // Noise RNG depends only on pixel index, with no sample/subframe
619  // contribution, so that dither pattern won't average out.
620  const int idx = tachyon1DLaunchIndex();
621 #if 1
622  float u = squares_rng<2>(idx, SQUARES_RNG_KEY1) * UINT32_RAND_MAX_INV;
623 #else
624  float u = tea<4>(idx, idx) * UINT32_RAND_MAX_INV;
625 #endif
626  float noise = rtLaunch.scene.bg_grad_noisemag * (u - 0.5f);
627  val += noise; // add the noise to the interpolation parameter
628 
629  val = __saturatef(val); // clamp the interpolation param to [0:1]
630  float3 col = val * rtLaunch.scene.bg_color_grad_top +
631  (1.0f - val) * rtLaunch.scene.bg_color_grad_bot;
632  prd.result = col;
633  prd.alpha = 0.0f; // alpha of background is 0.0f;
634 
635 #if defined(TACHYON_RAYSTATS)
636  rtLaunch.frame.raystats1_buffer[idx].w++; // increment miss counter
637 #endif
638 }
639 
640 
641 
642 //
643 // Ray gen accumulation buffer helper routines
644 //
645 static void __inline__ __device__ accumulate_color(int idx, float4 colrgba4f) {
646  // accumulate with existing contents except during a "clear"
647  if (!rtLaunch.frame.fb_clearall) {
648  float4 rgba = rtLaunch.frame.accum_buffer[idx];
649  colrgba4f += rgba;
650  }
651 
652  // always update the accumulation buffer
653  rtLaunch.frame.accum_buffer[idx] = colrgba4f;
654 
655  // update the color buffer only when we're told to
657  colrgba4f *= rtLaunch.frame.accum_normalize;
658 
659 #if defined(TACHYON_OPTIXDENOISER)
660  // When running on LDR inputs, AI denoiser consumes images in sRGB
661  // colorspace or at least using a gamma of 2.2, with floating point
662  // values range-clamped to [0,1].
663 
664  if (rtLaunch.frame.denoiser_enabled) {
665  // pre-scale RGBA inputs to avoid excessive clamping before denoising
666  colrgba4f *= 0.80f; // alpha is modified, but we revert it later
667 
668  // RGBA value range clamping can't be inverted, so LDR denoising
669  // has some flexibility downsides for subsequent steps.
670  colrgba4f = clamp_float4(colrgba4f); // clamp values [0,1]
671 
672  // use a cheap gamma 2.0 approximation which is trivially inverted
673  // to please the LDR denoiser input stage
674  float4 sRGB_approx20 = linear_to_sRGB_approx_20(colrgba4f);
675  rtLaunch.frame.denoiser_colorbuffer[idx] = sRGB_approx20;
676 
677  // When denoising, we early-exit here. The remaining steps in
678  // the image pipeline are done within a separate CUDA kernel,
679  // launched only after denoising has completed
680  return;
681  }
682 #endif
683 
684  //
685  // The remaining steps here are only done when denoising is off,
686  //
687 
688  // HDR tone mapping operators need to be applied after denoising
689  // has been completed. If we use tone mapping on an LDR input,
690  // we may have to revert from sRGB to linear before applying the
691  // TMO, and then convert back to sRGB.
692  // Also performs color space conversion if required
693  float4 tonedcol;
694  tonedcol = tonemap_color(colrgba4f,
698 
700  colrgba4f = linear_to_sRGB(tonedcol);
701  else
702  colrgba4f = tonedcol;
703 
704  // clamping is applied during conversion to uchar4
705  rtLaunch.frame.framebuffer[idx] = make_color_rgb4u(colrgba4f);
706  }
707 }
708 
709 
710 #if defined(TACHYON_RAYSTATS)
711 static void __inline__ __device__ raystats_clear_incr(unsigned int idx) {
712  if (rtLaunch.frame.fb_clearall) {
713  // assign ray stats immediately to cut register use later
714  uint4 s=make_uint4(rtLaunch.aa_samples, 0, 0, 0); // set primary ray counter
715  rtLaunch.frame.raystats1_buffer[idx]=s;
716  } else {
717  // increment ray stats immediately to cut register use later
718  rtLaunch.frame.raystats1_buffer[idx].x+=rtLaunch.aa_samples; // increment primary ray counter
719  }
720 }
721 #endif
722 
723 
724 //
725 // OptiX programs that implement the camera models and ray generation code
726 //
727 
728 
729 //
730 // CUDA device function for computing the new ray origin
731 // and ray direction, given the radius of the circle of confusion disc,
732 // and an orthonormal basis for each ray.
733 //
734 #if 1
735 
736 static __device__ __inline__
737 void dof_ray(const float cam_dof_focal_dist, const float cam_dof_aperture_rad,
738  const float3 &ray_origin_orig, float3 &ray_origin,
739  const float3 &ray_direction_orig, float3 &ray_direction,
740  unsigned int &randseed, const float3 &up, const float3 &right) {
741  float3 focuspoint = ray_origin_orig + ray_direction_orig * cam_dof_focal_dist;
742  float2 dofjxy;
743  jitter_disc2f(randseed, dofjxy, cam_dof_aperture_rad);
744  ray_origin = ray_origin_orig + dofjxy.x*right + dofjxy.y*up;
745  ray_direction = normalize(focuspoint - ray_origin);
746 }
747 
748 #else
749 
750 // use low-discrepancy sequences for sampling the circle of confusion disc
751 static __device__ __inline__
752 void dof_ray(const float cam_dof_focal_dist, const float cam_dof_aperture_rad,
753  const float3 &ray_origin_orig, float3 &ray_origin,
754  const float3 &ray_direction_orig, float3 &ray_direction,
755  float2 &qrnxy, const float3 &up, const float3 &right) {
756  float3 focuspoint = ray_origin_orig + ray_direction_orig * cam_dof_focal_dist;
757  float2 dofjxy;
758  jitter_disc2f_qrn(qrnxy, dofjxy, cam_dof_aperture_rad);
759  ray_origin = ray_origin_orig + dofjxy.x*right + dofjxy.y*up;
760  ray_direction = normalize(focuspoint - ray_origin);
761 }
762 
763 #endif
764 
765 
766 //
767 // Templated perspective camera ray generation code
768 //
769 template<int STEREO_ON, int DOF_ON>
770 static __device__ __inline__
772 #if defined(TACHYON_TIME_COLORING)
773  clock_t t0 = clock(); // start per-pixel RT timer
774 #endif
775 
776  const uint3 launch_dim = optixGetLaunchDimensions();
777  const uint3 launch_index = optixGetLaunchIndex();
778  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
779 #if defined(TACHYON_RAYSTATS)
780  // clear/increment ray stats immediately to cut register use later
781  raystats_clear_incr(idx);
782 #endif
783 
784  const auto &cam = rtLaunch.cam;
785 
786  // Stereoscopic rendering is provided by rendering in an over/under
787  // format with the left eye image into the top half of a double-high
788  // framebuffer, and the right eye into the lower half. The subsequent
789  // OpenGL drawing code can trivially unpack and draw the two images
790  // with simple pointer offset arithmetic.
791  float3 eyepos;
792  uint viewport_sz_y, viewport_idx_y;
793  if (STEREO_ON) {
794  // render into a double-high framebuffer when stereo is enabled
795  viewport_sz_y = launch_dim.y >> 1;
796  if (launch_index.y >= viewport_sz_y) {
797  // right image
798  viewport_idx_y = launch_index.y - viewport_sz_y;
799  eyepos = cam.pos + cam.U * cam.stereo_eyesep * 0.5f;
800  } else {
801  // left image
802  viewport_idx_y = launch_index.y;
803  eyepos = cam.pos - cam.U * cam.stereo_eyesep * 0.5f;
804  }
805  } else {
806  // render into a normal size framebuffer if stereo is not enabled
807  viewport_sz_y = launch_dim.y;
808  viewport_idx_y = launch_index.y;
809  eyepos = cam.pos;
810  }
811 
812 
813  //
814  // general primary ray calculations
815  //
816  float2 aspect = make_float2(float(launch_dim.x) / float(viewport_sz_y), 1.0f) * cam.zoom;
817  float2 viewportscale = 1.0f / make_float2(launch_dim.x, viewport_sz_y);
818  float2 d = make_float2(launch_index.x, viewport_idx_y) * viewportscale * aspect * 2.f - aspect; // center of pixel in image plane
819 
820  unsigned int randseed = tea<4>(idx, subframe_count());
821 
822  float3 col = make_float3(0.0f);
823  float alpha = 0.0f;
824  float3 ray_origin = eyepos;
825  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
826  float2 jxy;
827  jitter_offset2f(randseed, jxy);
828 
829  jxy = jxy * viewportscale * aspect * 2.f + d;
830  float3 ray_direction = normalize(jxy.x*cam.U + jxy.y*cam.V + cam.W);
831 
832  // compute new ray origin and ray direction
833  if (DOF_ON) {
834  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
835  eyepos, ray_origin, ray_direction, ray_direction,
836  randseed, cam.V, cam.U);
837  }
838 
839  // trace the new ray...
841  prd.result = make_float3(0.0f);
842  prd.alpha = 1.f;
843  prd.importance = 1.f;
844  prd.depth = 0;
846 
847  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
848  packPointer(&prd, p0, p1);
849 
850  optixTrace(rtLaunch.traversable,
851  ray_origin,
852  ray_direction,
853  0.0f, // tmin
854  RT_DEFAULT_MAX, // tmax
855  0.0f, // ray time
856  OptixVisibilityMask( 255 ),
857  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
858  RT_RAY_TYPE_RADIANCE, // SBT offset
859  RT_RAY_TYPE_COUNT, // SBT stride
860  RT_RAY_TYPE_RADIANCE, // missSBTIndex
861  p0, p1, // PRD ptr in 2x uint32
862  s); // use aasample in CH/MISS RNGs
863 
864  col += prd.result;
865  alpha += prd.alpha;
866  }
867 
868 #if defined(TACHYON_TIME_COLORING)
869  accumulate_time_coloring(col, t0);
870 #else
871  accumulate_color(idx, make_float4(col, alpha));
872 #endif
873 }
874 
875 extern "C" __global__ void __raygen__camera_perspective() {
876  tachyon_camera_perspective_general<0, 0>();
877 }
878 
879 extern "C" __global__ void __raygen__camera_perspective_dof() {
880  tachyon_camera_perspective_general<0, 1>();
881 }
882 
883 extern "C" __global__ void __raygen__camera_perspective_stereo() {
884  tachyon_camera_perspective_general<1, 0>();
885 }
886 
887 extern "C" __global__ void __raygen__camera_perspective_stereo_dof() {
888  tachyon_camera_perspective_general<1, 1>();
889 }
890 
891 
892 
893 
894 //
895 // Templated orthographic camera ray generation code
896 //
897 template<int STEREO_ON, int DOF_ON>
898 static __device__ __inline__
900 #if defined(TACHYON_TIME_COLORING)
901  clock_t t0 = clock(); // start per-pixel RT timer
902 #endif
903 
904  const uint3 launch_dim = optixGetLaunchDimensions();
905  const uint3 launch_index = optixGetLaunchIndex();
906  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
907 #if defined(TACHYON_RAYSTATS)
908  // clear/increment ray stats immediately to cut register use later
909  raystats_clear_incr(idx);
910 #endif
911 
912  const auto &cam = rtLaunch.cam;
913 
914  // Stereoscopic rendering is provided by rendering in an over/under
915  // format with the left eye image into the top half of a double-high
916  // framebuffer, and the right eye into the lower half. The subsequent
917  // OpenGL drawing code can trivially unpack and draw the two images
918  // with simple pointer offset arithmetic.
919  float3 eyepos;
920  uint viewport_sz_y, viewport_idx_y;
921  float3 view_direction;
922  if (STEREO_ON) {
923  // render into a double-high framebuffer when stereo is enabled
924  viewport_sz_y = launch_dim.y >> 1;
925  if (launch_index.y >= viewport_sz_y) {
926  // right image
927  viewport_idx_y = launch_index.y - viewport_sz_y;
928  eyepos = cam.pos + cam.U * cam.stereo_eyesep * 0.5f;
929  } else {
930  // left image
931  viewport_idx_y = launch_index.y;
932  eyepos = cam.pos - cam.U * cam.stereo_eyesep * 0.5f;
933  }
934  view_direction = normalize(cam.pos-eyepos + normalize(cam.W) * cam.stereo_convergence_dist);
935  } else {
936  // render into a normal size framebuffer if stereo is not enabled
937  viewport_sz_y = launch_dim.y;
938  viewport_idx_y = launch_index.y;
939  eyepos = cam.pos;
940  view_direction = normalize(cam.W);
941  }
942 
943  //
944  // general primary ray calculations
945  //
946  float2 aspect = make_float2(float(launch_dim.x) / float(viewport_sz_y), 1.0f) * cam.zoom;
947  float2 viewportscale = 1.0f / make_float2(launch_dim.x, viewport_sz_y);
948 
949  float2 d = make_float2(launch_index.x, viewport_idx_y) * viewportscale * aspect * 2.f - aspect; // center of pixel in image plane
950 
951  unsigned int randseed = tea<4>(idx, subframe_count());
952 
953  float3 col = make_float3(0.0f);
954  float alpha = 0.0f;
955  float3 ray_direction = view_direction;
956  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
957  float2 jxy;
958  jitter_offset2f(randseed, jxy);
959  jxy = jxy * viewportscale * aspect * 2.f + d;
960  float3 ray_origin = eyepos + jxy.x*cam.U + jxy.y*cam.V;
961 
962  // compute new ray origin and ray direction
963  if (DOF_ON) {
964  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
965  ray_origin, ray_origin, view_direction, ray_direction,
966  randseed, cam.V, cam.U);
967  }
968 
969  // trace the new ray...
971  prd.alpha = 1.f;
972  prd.importance = 1.f;
973  prd.depth = 0;
975 
976  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
977  packPointer(&prd, p0, p1);
978 
979  optixTrace(rtLaunch.traversable,
980  ray_origin,
981  ray_direction,
982  0.0f, // tmin
983  RT_DEFAULT_MAX, // tmax
984  0.0f, // ray time
985  OptixVisibilityMask( 255 ),
986  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
987  RT_RAY_TYPE_RADIANCE, // SBT offset
988  RT_RAY_TYPE_COUNT, // SBT stride
989  RT_RAY_TYPE_RADIANCE, // missSBTIndex
990  p0, p1, // PRD ptr in 2x uint32
991  s); // use aasample in CH/MISS RNGs
992 
993  col += prd.result;
994  alpha += prd.alpha;
995  }
996 
997 #if defined(TACHYON_TIME_COLORING)
998  accumulate_time_coloring(col, t0);
999 #else
1000  accumulate_color(idx, make_float4(col, alpha));
1001 #endif
1002 }
1003 
1004 extern "C" __global__ void __raygen__camera_orthographic() {
1005  tachyon_camera_orthographic_general<0, 0>();
1006 }
1007 
1008 extern "C" __global__ void __raygen__camera_orthographic_dof() {
1009  tachyon_camera_orthographic_general<0, 1>();
1010 }
1011 
1012 extern "C" __global__ void __raygen__camera_orthographic_stereo() {
1013  tachyon_camera_orthographic_general<1, 0>();
1014 }
1015 
1016 extern "C" __global__ void __raygen__camera_orthographic_stereo_dof() {
1017  tachyon_camera_orthographic_general<1, 1>();
1018 }
1019 
1020 
1021 
1022 
1023 //
1024 // 360-degree stereoscopic cube map image format for use with
1025 // Oculus, Google Cardboard, and similar VR headsets
1026 //
1027 // ORBX player format:
1028 // all faces are left-right mirror images vs. what viewer sees from inside
1029 // top is also rotated 90 degrees right, bottom is rotated 90 degrees left
1030 // Faces are ordered Back, Front, Top, Bottom, Left, Right
1031 // Stereo layout has left eye images on the left, right eye images
1032 // on the right, all within the same row
1033 //
1034 // Unity cube map format:
1035 // https://docs.unity3d.com/Manual/class-Cubemap.html
1036 // https://vrandarchitecture.com/2016/07/19/stereoscopic-renders-in-unity3d-for-gearvr/
1037 // https://mheavers.medium.com/implementing-a-stereo-skybox-into-unity-for-virtual-reality-e427cf338b06
1038 // https://forum.unity.com/threads/camera-rendertocubemap-including-orientation.534469/
1039 // https://www.videopoetics.com/tutorials/capturing-stereoscopic-panoramas-unity/
1040 //
1041 // Unreal Engine cube map format:
1042 // https://docs.unrealengine.com/4.27/en-US/RenderingAndGraphics/Textures/Cubemaps/CreatingCubemaps/
1043 //
1044 template<int STEREO_ON, int DOF_ON>
1045 static __device__ __inline__
1047 #if defined(TACHYON_TIME_COLORING)
1048  clock_t t0 = clock(); // start per-pixel RT timer
1049 #endif
1050 
1051  const uint3 launch_dim = optixGetLaunchDimensions();
1052  const uint3 launch_index = optixGetLaunchIndex();
1053  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
1054 #if defined(TACHYON_RAYSTATS)
1055  // clear/increment ray stats immediately to cut register use later
1056  raystats_clear_incr(idx);
1057 #endif
1058 
1059  const auto &cam = rtLaunch.cam;
1060 
1061  // compute which cubemap face we're drawing by the X index.
1062  uint facesz = launch_dim.y; // square cube faces, equal to image height
1063  uint face = (launch_index.x / facesz) % 6;
1064  uint2 face_idx = make_uint2(launch_index.x % facesz, launch_index.y);
1065 
1066  // For the OTOY ORBX viewer, Oculus VR software, and some of the
1067  // related apps, the cubemap image is stored with the X axis oriented
1068  // such that when viewed as a 2-D image, they are all mirror images.
1069  // The mirrored left-right orientation used here corresponds to what is
1070  // seen standing outside the cube, whereas the ray tracer shoots
1071  // rays from the inside, so we flip the X-axis pixel storage order.
1072  // The top face of the cubemap has both the left-right and top-bottom
1073  // orientation flipped also.
1074  // Set per-face orthonormal basis for camera
1075  float3 face_U, face_V, face_W;
1076  switch (face) {
1077  case 0: // back face, left-right mirror
1078  face_U = cam.U;
1079  face_V = cam.V;
1080  face_W = -cam.W;
1081  break;
1082 
1083  case 1: // front face, left-right mirror
1084  face_U = -cam.U;
1085  face_V = cam.V;
1086  face_W = cam.W;
1087  break;
1088 
1089  case 2: // top face, left-right mirrored, rotated 90 degrees right
1090  face_U = -cam.W;
1091  face_V = cam.U;
1092  face_W = cam.V;
1093  break;
1094 
1095  case 3: // bottom face, left-right mirrored, rotated 90 degrees left
1096  face_U = -cam.W;
1097  face_V = -cam.U;
1098  face_W = -cam.V;
1099  break;
1100 
1101  case 4: // left face, left-right mirrored
1102  face_U = -cam.W;
1103  face_V = cam.V;
1104  face_W = -cam.U;
1105  break;
1106 
1107  case 5: // right face, left-right mirrored
1108  face_U = cam.W;
1109  face_V = cam.V;
1110  face_W = cam.U;
1111  break;
1112  }
1113 
1114  // Stereoscopic rendering is provided by rendering in a side-by-side
1115  // format with the left eye image into the left half of a double-wide
1116  // framebuffer, and the right eye into the right half. The subsequent
1117  // OpenGL drawing code can trivially unpack and draw the two images
1118  // into an efficient cubemap texture.
1119  uint viewport_sz_x; // , viewport_idx_x;
1120  float eyeshift;
1121  if (STEREO_ON) {
1122  // render into a double-wide framebuffer when stereo is enabled
1123  viewport_sz_x = launch_dim.x >> 1;
1124  if (launch_index.x >= viewport_sz_x) {
1125  // right image
1126 // viewport_idx_x = launch_index.x - viewport_sz_x;
1127  eyeshift = 0.5f * cam.stereo_eyesep;
1128  } else {
1129  // left image
1130 // viewport_idx_x = launch_index.x;
1131  eyeshift = -0.5f * cam.stereo_eyesep;
1132  }
1133  } else {
1134  // render into a normal size framebuffer if stereo is not enabled
1135  viewport_sz_x = launch_dim.x;
1136 // viewport_idx_x = launch_index.x;
1137  eyeshift = 0.0f;
1138  }
1139 
1140  //
1141  // general primary ray calculations, locked to 90-degree FoV per face...
1142  //
1143  float facescale = 1.0f / facesz;
1144  float2 d = make_float2(face_idx.x, face_idx.y) * facescale * 2.f - 1.0f; // center of pixel in image plane
1145 
1146  unsigned int randseed = tea<4>(idx, subframe_count());
1147 
1148  float3 col = make_float3(0.0f);
1149  float alpha = 0.0f;
1150  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
1151  float2 jxy;
1152  jitter_offset2f(randseed, jxy);
1153  jxy = jxy * facescale * 2.f + d;
1154  float3 ray_direction = normalize(jxy.x*face_U + jxy.y*face_V + face_W);
1155 
1156  float3 ray_origin = cam.pos;
1157  if (STEREO_ON) {
1158  ray_origin += eyeshift * cross(ray_direction, cam.V);
1159  }
1160 
1161  // compute new ray origin and ray direction
1162  if (DOF_ON) {
1163  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
1164  ray_origin, ray_origin, ray_direction, ray_direction,
1165  randseed, face_V, face_U);
1166  }
1167 
1168  // trace the new ray...
1169  PerRayData_radiance prd;
1170  prd.alpha = 1.f;
1171  prd.importance = 1.f;
1172  prd.depth = 0;
1173  prd.transcnt = rtLaunch.max_trans;
1174 
1175  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
1176  packPointer(&prd, p0, p1);
1177 
1178  optixTrace(rtLaunch.traversable,
1179  ray_origin,
1180  ray_direction,
1181  0.0f, // tmin
1182  RT_DEFAULT_MAX, // tmax
1183  0.0f, // ray time
1184  OptixVisibilityMask( 255 ),
1185  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
1186  RT_RAY_TYPE_RADIANCE, // SBT offset
1187  RT_RAY_TYPE_COUNT, // SBT stride
1188  RT_RAY_TYPE_RADIANCE, // missSBTIndex
1189  p0, p1, // PRD ptr in 2x uint32
1190  s); // use aasample in CH/MISS RNGs
1191 
1192  col += prd.result;
1193  alpha += prd.alpha;
1194  }
1195 
1196 #if defined(TACHYON_TIME_COLORING)
1197  accumulate_time_coloring(col, t0);
1198 #else
1199  accumulate_color(idx, make_float4(col, alpha));
1200 #endif
1201 }
1202 
1203 
1204 extern "C" __global__ void __raygen__camera_cubemap() {
1205  tachyon_camera_cubemap_general<0, 0>();
1206 }
1207 
1208 extern "C" __global__ void __raygen__camera_cubemap_dof() {
1209  tachyon_camera_cubemap_general<0, 1>();
1210 }
1211 
1212 extern "C" __global__ void __raygen__camera_cubemap_stereo() {
1213  tachyon_camera_cubemap_general<1, 0>();
1214 }
1215 
1216 extern "C" __global__ void __raygen__camera_cubemap_stereo_dof() {
1217  tachyon_camera_cubemap_general<1, 1>();
1218 }
1219 
1220 
1221 
1222 
1223 //
1224 // Camera ray generation code for planetarium dome display
1225 // Generates a fisheye style frame with ~180 degree FoV
1226 //
1227 // A variation of this implementation is described here:
1228 // A Planetarium Dome Master Camera. John E. Stone.
1229 // In, Eric Haines and Tomas Akenine-Möller, editors,
1230 // Ray Tracing Gems, Apress, Chapter 4, pp. 49-60, 2019.
1231 // https://doi.org/10.1007/978-1-4842-4427-2_4
1232 //
1233 template<int STEREO_ON, int DOF_ON>
1234 static __device__ __inline__
1236 #if defined(TACHYON_TIME_COLORING)
1237  clock_t t0 = clock(); // start per-pixel RT timer
1238 #endif
1239 
1240  const uint3 launch_dim = optixGetLaunchDimensions();
1241  const uint3 launch_index = optixGetLaunchIndex();
1242  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
1243 #if defined(TACHYON_RAYSTATS)
1244  // clear/increment ray stats immediately to cut register use later
1245  raystats_clear_incr(idx);
1246 #endif
1247 
1248  const auto &cam = rtLaunch.cam;
1249 
1250  // Stereoscopic rendering is provided by rendering in an over/under
1251  // format with the left eye image into the top half of a double-high
1252  // framebuffer, and the right eye into the lower half. The subsequent
1253  // OpenGL drawing code can trivially unpack and draw the two images
1254  // with simple pointer offset arithmetic.
1255  uint viewport_sz_y, viewport_idx_y;
1256  float eyeshift;
1257  if (STEREO_ON) {
1258  // render into a double-high framebuffer when stereo is enabled
1259  viewport_sz_y = launch_dim.y >> 1;
1260  if (launch_index.y >= viewport_sz_y) {
1261  // left image
1262  viewport_idx_y = launch_index.y - viewport_sz_y;
1263  eyeshift = -0.5f * cam.stereo_eyesep;
1264  } else {
1265  // right image
1266  viewport_idx_y = launch_index.y;
1267  eyeshift = 0.5f * cam.stereo_eyesep;
1268  }
1269  } else {
1270  // render into a normal size framebuffer if stereo is not enabled
1271  viewport_sz_y = launch_dim.y;
1272  viewport_idx_y = launch_index.y;
1273  eyeshift = 0.0f;
1274  }
1275 
1276  float fov = M_PIf; // dome FoV in radians
1277 
1278  // half FoV in radians, pixels beyond this distance are outside
1279  // of the field of view of the projection, and are set black
1280  float thetamax = 0.5 * fov;
1281 
1282  // The dome angle from center of the projection is proportional
1283  // to the image-space distance from the center of the viewport.
1284  // viewport_sz contains the viewport size, radperpix contains the
1285  // radians/pixel scaling factors in X/Y, and viewport_mid contains
1286  // the midpoint coordinate of the viewpoint used to compute the
1287  // distance from center.
1288  float2 viewport_sz = make_float2(launch_dim.x, viewport_sz_y);
1289  float2 radperpix = fov / viewport_sz;
1290  float2 viewport_mid = viewport_sz * 0.5f;
1291 
1292  unsigned int randseed = tea<4>(idx, subframe_count());
1293 
1294  float3 col = make_float3(0.0f);
1295  float alpha = 0.0f;
1296  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
1297  // compute the jittered image plane sample coordinate
1298  float2 jxy;
1299  jitter_offset2f(randseed, jxy);
1300  float2 viewport_idx = make_float2(launch_index.x, viewport_idx_y) + jxy;
1301 
1302  // compute the ray angles in X/Y and total angular distance from center
1303  float2 p = (viewport_idx - viewport_mid) * radperpix;
1304  float theta = hypotf(p.x, p.y);
1305 
1306  // pixels outside the dome FoV are treated as black by not
1307  // contributing to the color accumulator
1308  if (theta < thetamax) {
1309  float3 ray_direction;
1310  float3 ray_origin = cam.pos;
1311 
1312  if (theta == 0) {
1313  // handle center of dome where azimuth is undefined by
1314  // setting the ray direction to the zenith
1315  ray_direction = cam.W;
1316  } else {
1317  float sintheta, costheta;
1318  sincosf(theta, &sintheta, &costheta);
1319  float rsin = sintheta / theta; // normalize component
1320  ray_direction = cam.U*rsin*p.x + cam.V*rsin*p.y + cam.W*costheta;
1321  if (STEREO_ON) {
1322  // assumes a flat dome, where cam.W also points in the
1323  // audience "up" direction
1324  ray_origin += eyeshift * cross(ray_direction, cam.W);
1325  }
1326 
1327  if (DOF_ON) {
1328  float rcos = costheta / theta; // normalize component
1329  float3 ray_up = -cam.U*rcos*p.x -cam.V*rcos*p.y + cam.W*sintheta;
1330  float3 ray_right = cam.U*(p.y/theta) + cam.V*(-p.x/theta);
1331  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
1332  ray_origin, ray_origin, ray_direction, ray_direction,
1333  randseed, ray_up, ray_right);
1334  }
1335  }
1336 
1337  // trace the new ray...
1338  PerRayData_radiance prd;
1339  prd.alpha = 1.f;
1340  prd.importance = 1.f;
1341  prd.depth = 0;
1342  prd.transcnt = rtLaunch.max_trans;
1343 
1344  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
1345  packPointer(&prd, p0, p1);
1346 
1347  optixTrace(rtLaunch.traversable,
1348  ray_origin,
1349  ray_direction,
1350  0.0f, // tmin
1351  RT_DEFAULT_MAX, // tmax
1352  0.0f, // ray time
1353  OptixVisibilityMask( 255 ),
1354  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
1355  RT_RAY_TYPE_RADIANCE, // SBT offset
1356  RT_RAY_TYPE_COUNT, // SBT stride
1357  RT_RAY_TYPE_RADIANCE, // missSBTIndex
1358  p0, p1, // PRD ptr in 2x uint32
1359  s); // use aasample in CH/MISS RNGs
1360 
1361  col += prd.result;
1362  alpha += prd.alpha;
1363  }
1364  }
1365 
1366 #if defined(TACHYON_TIME_COLORING)
1367  accumulate_time_coloring(col, t0);
1368 #else
1369  accumulate_color(idx, make_float4(col, alpha));
1370 #endif
1371 }
1372 
1373 
1374 extern "C" __global__ void __raygen__camera_dome_master() {
1375  tachyon_camera_dome_general<0, 0>();
1376 }
1377 
1378 extern "C" __global__ void __raygen__camera_dome_master_dof() {
1379  tachyon_camera_dome_general<0, 1>();
1380 }
1381 
1382 extern "C" __global__ void __raygen__camera_dome_master_stereo() {
1383  tachyon_camera_dome_general<1, 0>();
1384 }
1385 
1386 extern "C" __global__ void __raygen__camera_dome_master_stereo_dof() {
1387  tachyon_camera_dome_general<1, 1>();
1388 }
1389 
1390 
1391 //
1392 // Camera ray generation code for 360 degre FoV
1393 // equirectangular (lat/long) projection suitable
1394 // for use a texture map for a sphere, e.g. for
1395 // immersive VR HMDs, other spheremap-based projections.
1396 //
1397 // A variation of this implementation is described here:
1398 // Omnidirectional Stereoscopic Projections for VR. John E. Stone.
1399 // In, William R. Sherman, editor, VR Developer Gems,
1400 // Taylor and Francis / CRC Press, Chapter 24, pp. 423-436, 2019.
1401 // https://www.taylorfrancis.com/chapters/edit/10.1201/b21598-24/omnidirectional-stereoscopic-projections-vr-john-stone
1402 //
1403 // Paul Bourke's page:
1404 // http://paulbourke.net/stereographics/ODSPmaths/
1405 //
1406 // Google:
1407 // https://developers.google.com/vr/jump/rendering-ods-content.pdf
1408 //
1409 template<int STEREO_ON, int DOF_ON>
1410 static __device__ __inline__
1412 #if defined(TACHYON_TIME_COLORING)
1413  clock_t t0 = clock(); // start per-pixel RT timer
1414 #endif
1415 
1416  const uint3 launch_dim = optixGetLaunchDimensions();
1417  const uint3 launch_index = optixGetLaunchIndex();
1418  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
1419 #if defined(TACHYON_RAYSTATS)
1420  // clear/increment ray stats immediately to cut register use later
1421  raystats_clear_incr(idx);
1422 #endif
1423 
1424  const auto &cam = rtLaunch.cam;
1425 
1426  // The Samsung GearVR OTOY ORBX players have the left eye image on top,
1427  // and the right eye image on the bottom.
1428  // Stereoscopic rendering is provided by rendering in an over/under
1429  // format with the left eye image into the top half of a double-high
1430  // framebuffer, and the right eye into the lower half. The subsequent
1431  // OpenGL drawing code can trivially unpack and draw the two images
1432  // with simple pointer offset arithmetic.
1433  uint viewport_sz_y, viewport_idx_y;
1434  float eyeshift;
1435  if (STEREO_ON) {
1436  // render into a double-high framebuffer when stereo is enabled
1437  viewport_sz_y = launch_dim.y >> 1;
1438  if (launch_index.y >= viewport_sz_y) {
1439  // left image
1440  viewport_idx_y = launch_index.y - viewport_sz_y;
1441  eyeshift = -0.5f * cam.stereo_eyesep;
1442  } else {
1443  // right image
1444  viewport_idx_y = launch_index.y;
1445  eyeshift = 0.5f * cam.stereo_eyesep;
1446  }
1447  } else {
1448  // render into a normal size framebuffer if stereo is not enabled
1449  viewport_sz_y = launch_dim.y;
1450  viewport_idx_y = launch_index.y;
1451  eyeshift = 0.0f;
1452  }
1453 
1454  float2 viewport_sz = make_float2(launch_dim.x, viewport_sz_y);
1455  float2 radperpix = M_PIf / viewport_sz * make_float2(2.0f, 1.0f);
1456  float2 viewport_mid = viewport_sz * 0.5f;
1457 
1458  unsigned int randseed = tea<4>(idx, subframe_count());
1459 
1460  float3 col = make_float3(0.0f);
1461  float alpha = 0.0f;
1462  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
1463  float2 jxy;
1464  jitter_offset2f(randseed, jxy);
1465 
1466  float2 viewport_idx = make_float2(launch_index.x, viewport_idx_y) + jxy;
1467  float2 rangle = (viewport_idx - viewport_mid) * radperpix;
1468 
1469  float sin_ax, cos_ax, sin_ay, cos_ay;
1470  sincosf(rangle.x, &sin_ax, &cos_ax);
1471  sincosf(rangle.y, &sin_ay, &cos_ay);
1472 
1473  float3 ray_direction = normalize(cos_ay * (cos_ax * cam.W + sin_ax * cam.U) + sin_ay * cam.V);
1474 
1475  float3 ray_origin = cam.pos;
1476  if (STEREO_ON) {
1477  ray_origin += eyeshift * cross(ray_direction, cam.V);
1478  }
1479 
1480  // compute new ray origin and ray direction
1481  if (DOF_ON) {
1482  float3 ray_right = normalize(cos_ay * (-sin_ax * cam.W - cos_ax * cam.U) + sin_ay * cam.V);
1483  float3 ray_up = cross(ray_direction, ray_right);
1484  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
1485  ray_origin, ray_origin, ray_direction, ray_direction,
1486  randseed, ray_up, ray_right);
1487  }
1488 
1489  // trace the new ray...
1490  PerRayData_radiance prd;
1491  prd.alpha = 1.f;
1492  prd.importance = 1.f;
1493  prd.depth = 0;
1494  prd.transcnt = rtLaunch.max_trans;
1495 
1496  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
1497  packPointer(&prd, p0, p1);
1498 
1499  optixTrace(rtLaunch.traversable,
1500  ray_origin,
1501  ray_direction,
1502  0.0f, // tmin
1503  RT_DEFAULT_MAX, // tmax
1504  0.0f, // ray time
1505  OptixVisibilityMask( 255 ),
1506  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
1507  RT_RAY_TYPE_RADIANCE, // SBT offset
1508  RT_RAY_TYPE_COUNT, // SBT stride
1509  RT_RAY_TYPE_RADIANCE, // missSBTIndex
1510  p0, p1, // PRD ptr in 2x uint32
1511  s); // use aasample in CH/MISS RNGs
1512 
1513  col += prd.result;
1514  alpha += prd.alpha;
1515  }
1516 
1517 #if defined(TACHYON_TIME_COLORING)
1518  accumulate_time_coloring(col, t0);
1519 #else
1520  accumulate_color(idx, make_float4(col, alpha));
1521 #endif
1522 }
1523 
1524 extern "C" __global__ void __raygen__camera_equirectangular() {
1525  tachyon_camera_equirectangular_general<0, 0>();
1526 }
1527 
1528 extern "C" __global__ void __raygen__camera_equirectangular_dof() {
1529  tachyon_camera_equirectangular_general<0, 1>();
1530 }
1531 
1532 extern "C" __global__ void __raygen__camera_equirectangular_stereo() {
1533  tachyon_camera_equirectangular_general<1, 0>();
1534 }
1535 
1536 extern "C" __global__ void __raygen__camera_equirectangular_stereo_dof() {
1537  tachyon_camera_equirectangular_general<1, 1>();
1538 }
1539 
1540 
1541 
1542 //
1543 // Octohedral panoramic camera, defined for a square image:
1544 // Essential Ray Generation Shaders. Morgan McGuire and Zander Majercik,
1545 // In Adam Marrs, Peter Shirley, Ingo Wald, editors,
1546 // Ray Tracing Gems II, Apress, Chapter 3, pp. 40-64, 2021.
1547 // https://link.springer.com/content/pdf/10.1007%2F978-1-4842-7185-8.pdf
1548 //
1549 template<int STEREO_ON, int DOF_ON>
1550 static __device__ __inline__
1552 #if defined(TACHYON_TIME_COLORING)
1553  clock_t t0 = clock(); // start per-pixel RT timer
1554 #endif
1555 
1556  const uint3 launch_dim = optixGetLaunchDimensions();
1557  const uint3 launch_index = optixGetLaunchIndex();
1558  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
1559 #if defined(TACHYON_RAYSTATS)
1560  // clear/increment ray stats immediately to cut register use later
1561  raystats_clear_incr(idx);
1562 #endif
1563 
1564  const auto &cam = rtLaunch.cam;
1565 
1566  // The Samsung GearVR OTOY ORBX players have the left eye image on top,
1567  // and the right eye image on the bottom.
1568  // Stereoscopic rendering is provided by rendering in an over/under
1569  // format with the left eye image into the top half of a double-high
1570  // framebuffer, and the right eye into the lower half. The subsequent
1571  // OpenGL drawing code can trivially unpack and draw the two images
1572  // with simple pointer offset arithmetic.
1573  uint viewport_sz_y, viewport_idx_y;
1574  float eyeshift;
1575  if (STEREO_ON) {
1576  // render into a double-high framebuffer when stereo is enabled
1577  viewport_sz_y = launch_dim.y >> 1;
1578  if (launch_index.y >= viewport_sz_y) {
1579  // left image
1580  viewport_idx_y = launch_index.y - viewport_sz_y;
1581  eyeshift = -0.5f * cam.stereo_eyesep;
1582  } else {
1583  // right image
1584  viewport_idx_y = launch_index.y;
1585  eyeshift = 0.5f * cam.stereo_eyesep;
1586  }
1587  } else {
1588  // render into a normal size framebuffer if stereo is not enabled
1589  viewport_sz_y = launch_dim.y;
1590  viewport_idx_y = launch_index.y;
1591  eyeshift = 0.0f;
1592  }
1593 
1594  float2 viewport_sz = make_float2(launch_dim.x, viewport_sz_y);
1595  float2 viewport_sz_inv = make_float2(1.0f / launch_dim.x,
1596  1.0f / viewport_sz_y);
1597 
1598  unsigned int randseed = tea<4>(idx, subframe_count());
1599 
1600  float3 col = make_float3(0.0f);
1601  float alpha = 0.0f;
1602  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
1603  float2 jxy;
1604  jitter_offset2f(randseed, jxy);
1605 
1606  float2 viewport_idx = make_float2(launch_index.x, viewport_idx_y) + jxy;
1607 
1608  float2 px = viewport_idx * viewport_sz_inv;
1609  px = (px - make_float2(0.5f, 0.5f)) * 2.0f;
1610 
1611  // convert planar pixel coordinate to a spherical direction
1612  float3 ray_direction = OctDecode<1>(px);
1613 
1614  float3 ray_origin = cam.pos;
1615  if (STEREO_ON) {
1616  ray_origin += eyeshift * cross(ray_direction, cam.V);
1617  }
1618 
1619  // compute new ray origin and ray direction
1620  if (DOF_ON) {
1621  float3 ray_right = normalize(cross(ray_direction, cam.V));
1622  float3 ray_up = cross(ray_direction, ray_right);
1623  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
1624  ray_origin, ray_origin, ray_direction, ray_direction,
1625  randseed, ray_up, ray_right);
1626  }
1627 
1628  // trace the new ray...
1629  PerRayData_radiance prd;
1630  prd.alpha = 1.f;
1631  prd.importance = 1.f;
1632  prd.depth = 0;
1633  prd.transcnt = rtLaunch.max_trans;
1634 
1635  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
1636  packPointer(&prd, p0, p1);
1637 
1638  optixTrace(rtLaunch.traversable,
1639  ray_origin,
1640  ray_direction,
1641  0.0f, // tmin
1642  RT_DEFAULT_MAX, // tmax
1643  0.0f, // ray time
1644  OptixVisibilityMask( 255 ),
1645  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
1646  RT_RAY_TYPE_RADIANCE, // SBT offset
1647  RT_RAY_TYPE_COUNT, // SBT stride
1648  RT_RAY_TYPE_RADIANCE, // missSBTIndex
1649  p0, p1, // PRD ptr in 2x uint32
1650  s); // use aasample in CH/MISS RNGs
1651 
1652  col += prd.result;
1653  alpha += prd.alpha;
1654  }
1655 
1656 #if defined(TACHYON_TIME_COLORING)
1657  accumulate_time_coloring(col, t0);
1658 #else
1659  accumulate_color(idx, make_float4(col, alpha));
1660 #endif
1661 }
1662 
1663 extern "C" __global__ void __raygen__camera_octahedral() {
1664  tachyon_camera_octahedral_general<0, 0>();
1665 }
1666 
1667 extern "C" __global__ void __raygen__camera_octahedral_dof() {
1668  tachyon_camera_octahedral_general<0, 1>();
1669 }
1670 
1671 extern "C" __global__ void __raygen__camera_octahedral_stereo() {
1672  tachyon_camera_octahedral_general<1, 0>();
1673 }
1674 
1675 extern "C" __global__ void __raygen__camera_octahedral_stereo_dof() {
1676  tachyon_camera_octahedral_general<1, 1>();
1677 }
1678 
1679 
1680 
1681 //
1682 // Templated Oculus Rift perspective camera ray generation code
1683 //
1684 template<int STEREO_ON, int DOF_ON>
1685 static __device__ __inline__
1687 #if defined(TACHYON_TIME_COLORING)
1688  clock_t t0 = clock(); // start per-pixel RT timer
1689 #endif
1690 
1691  const uint3 launch_dim = optixGetLaunchDimensions();
1692  const uint3 launch_index = optixGetLaunchIndex();
1693  const int idx = tachyon1DLaunchIndex(launch_dim, launch_index);
1694 #if defined(TACHYON_RAYSTATS)
1695  // clear/increment ray stats immediately to cut register use later
1696  raystats_clear_incr(idx);
1697 #endif
1698 
1699  const auto &cam = rtLaunch.cam;
1700 
1701  // Stereoscopic rendering is provided by rendering in a side-by-side
1702  // format with the left eye image in the left half of a double-wide
1703  // framebuffer, and the right eye in the right half. The subsequent
1704  // OpenGL drawing code can trivially unpack and draw the two images
1705  // with simple pointer offset arithmetic.
1706  uint viewport_sz_x, viewport_idx_x;
1707  float eyeshift;
1708  if (STEREO_ON) {
1709  // render into a double-wide framebuffer when stereo is enabled
1710  viewport_sz_x = launch_dim.x >> 1;
1711  if (launch_index.x >= viewport_sz_x) {
1712  // right image
1713  viewport_idx_x = launch_index.x - viewport_sz_x;
1714  eyeshift = 0.5f * cam.stereo_eyesep;
1715  } else {
1716  // left image
1717  viewport_idx_x = launch_index.x;
1718  eyeshift = -0.5f * cam.stereo_eyesep;
1719  }
1720  } else {
1721  // render into a normal size framebuffer if stereo is not enabled
1722  viewport_sz_x = launch_dim.x;
1723  viewport_idx_x = launch_index.x;
1724  eyeshift = 0.0f;
1725  }
1726 
1727  //
1728  // general primary ray calculations
1729  //
1730  float2 aspect = make_float2(float(viewport_sz_x) / float(launch_dim.y), 1.0f) * cam.zoom;
1731  float2 viewportscale = 1.0f / make_float2(viewport_sz_x, launch_dim.y);
1732  float2 d = make_float2(viewport_idx_x, launch_index.y) * viewportscale * aspect * 2.f - aspect; // center of pixel in image plane
1733 
1734 
1735  // Compute barrel distortion required to correct for the pincushion inherent
1736  // in the plano-convex optics in the Oculus Rift, Google Cardboard, etc.
1737  // Barrel distortion involves computing distance of the pixel from the
1738  // center of the eye viewport, and then scaling this distance by a factor
1739  // based on the original distance:
1740  // rnew = 0.24 * r^4 + 0.22 * r^2 + 1.0
1741  // Since we are only using even powers of r, we can use efficient
1742  // squared distances everywhere.
1743  // The current implementation doesn't discard rays that would have fallen
1744  // outside of the original viewport FoV like most OpenGL implementations do.
1745  // The current implementation computes the distortion for the initial ray
1746  // but doesn't apply these same corrections to antialiasing jitter, to
1747  // depth-of-field jitter, etc, so this leaves something to be desired if
1748  // we want best quality, but this raygen code is really intended for
1749  // interactive display on an Oculus Rift or Google Cardboard type viewer,
1750  // so I err on the side of simplicity/speed for now.
1751  float2 cp = make_float2(viewport_sz_x >> 1, launch_dim.y >> 1) * viewportscale * aspect * 2.f - aspect;;
1752  float2 dr = d - cp;
1753  float r2 = dr.x*dr.x + dr.y*dr.y;
1754  float r = 0.24f*r2*r2 + 0.22f*r2 + 1.0f;
1755  d = r * dr;
1756 
1757  int subframecount = subframe_count();
1758  unsigned int randseed = tea<4>(idx, subframecount);
1759 
1760  float3 eyepos = cam.pos;
1761  if (STEREO_ON) {
1762  eyepos += eyeshift * cam.U;
1763  }
1764 
1765  float3 ray_origin = eyepos;
1766  float3 col = make_float3(0.0f);
1767  float alpha = 0.0f;
1768  for (uint32_t s=0; s<rtLaunch.aa_samples; s++) {
1769  float2 jxy;
1770  jitter_offset2f(randseed, jxy);
1771 
1772  // don't jitter the first sample, since when using an HMD we often run
1773  // with only one sample per pixel unless the user wants higher fidelity
1774  jxy *= (subframecount > 0 || s > 0);
1775 
1776  jxy = jxy * viewportscale * aspect * 2.f + d;
1777  float3 ray_direction = normalize(jxy.x*cam.U + jxy.y*cam.V + cam.W);
1778 
1779  // compute new ray origin and ray direction
1780  if (DOF_ON) {
1781  dof_ray(cam.dof_focal_dist, cam.dof_aperture_rad,
1782  eyepos, ray_origin, ray_direction, ray_direction,
1783  randseed, cam.V, cam.U);
1784  }
1785 
1786  // trace the new ray...
1787  PerRayData_radiance prd;
1788  prd.alpha = 1.f;
1789  prd.importance = 1.f;
1790  prd.depth = 0;
1791  prd.transcnt = rtLaunch.max_trans;
1792 
1793  uint32_t p0, p1; // pack PRD pointer into p0,p1 payload regs
1794  packPointer(&prd, p0, p1);
1795 
1796  optixTrace(rtLaunch.traversable,
1797  ray_origin,
1798  ray_direction,
1799  0.0f, // tmin
1800  RT_DEFAULT_MAX, // tmax
1801  0.0f, // ray time
1802  OptixVisibilityMask( 255 ),
1803  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
1804  RT_RAY_TYPE_RADIANCE, // SBT offset
1805  RT_RAY_TYPE_COUNT, // SBT stride
1806  RT_RAY_TYPE_RADIANCE, // missSBTIndex
1807  p0, p1, // PRD ptr in 2x uint32
1808  s); // use aasample in CH/MISS RNGs
1809 
1810  col += prd.result;
1811  alpha += prd.alpha;
1812  }
1813 
1814 #if defined(TACHYON_TIME_COLORING)
1815  accumulate_time_coloring(col, t0);
1816 #else
1817  accumulate_color(idx, make_float4(col, alpha));
1818 #endif
1819 }
1820 
1821 extern "C" __global__ void __raygen__camera_oculus_rift() {
1822  tachyon_camera_oculus_rift_general<0, 0>();
1823 }
1824 
1825 extern "C" __global__ void __raygen__camera_oculus_rift_dof() {
1826  tachyon_camera_oculus_rift_general<0, 1>();
1827 }
1828 
1829 extern "C" __global__ void __raygen__camera_oculus_rift_stereo() {
1830  tachyon_camera_oculus_rift_general<1, 0>();
1831 }
1832 
1833 extern "C" __global__ void __raygen__camera_oculus_rift_stereo_dof() {
1834  tachyon_camera_oculus_rift_general<1, 1>();
1835 }
1836 
1837 
1838 //
1839 // Existing versions of Nsight Systems don't show the correct
1840 // OptiX raygen program name in the CUDA API trace line.
1841 // Rather than showing the correct raygen program name, the traces
1842 // show the name of the last raygen program defined in the loaded PTX code.
1843 // We add this here for the time being so that any traces we capture
1844 // with the existing broken behavior clearly indicate that we don't really
1845 // know which raygen program was actually run.
1846 //
1847 extern "C" __global__ void __raygen__UNKNOWN() {
1848  printf("This should never happen!\n");
1849 }
1850 
1851 
1852 //
1853 // Shared utility functions needed by custom geometry intersection or
1854 // shading helper functions.
1855 //
1856 
1857 // normal calc routine needed only to simplify the macro to produce the
1858 // complete combinatorial expansion of template-specialized
1859 // closest hit radiance functions
1860 static __inline__ __device__
1861 float3 calc_ffworld_normal(const float3 &Nshading, const float3 &Ngeometric) {
1862  float3 world_shading_normal = normalize(optixTransformNormalFromObjectToWorldSpace(Nshading));
1863  float3 world_geometric_normal = normalize(optixTransformNormalFromObjectToWorldSpace(Ngeometric));
1864  const float3 ray_dir = optixGetWorldRayDirection();
1865  return faceforward(world_shading_normal, -ray_dir, world_geometric_normal);
1866 }
1867 
1868 
1869 
1870 //
1871 // Object and/or vertex/color/normal buffers...
1872 //
1873 
1874 
1875 //
1876 // Color-per-cone array primitive
1877 //
1878 extern "C" __global__ void __intersection__cone_array_color() {
1879  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
1880  const float3 ray_origin = optixGetObjectRayOrigin();
1881  const float3 obj_ray_direction = optixGetObjectRayDirection();
1882  const int primID = optixGetPrimitiveIndex();
1883 
1884  float3 base = sbtHG.cone.base[primID];
1885  float3 apex = sbtHG.cone.apex[primID];
1886  float baserad = sbtHG.cone.baserad[primID];
1887  float apexrad = sbtHG.cone.apexrad[primID];
1888 
1889  float3 axis = (apex - base);
1890  float3 obase = ray_origin - base;
1891  float3 oapex = ray_origin - apex;
1892  float m0 = dot(axis, axis);
1893  float m1 = dot(obase, axis);
1894  float m2 = dot(obj_ray_direction, axis);
1895  float m3 = dot(obj_ray_direction, obase);
1896  float m5 = dot(obase, obase);
1897  float m9 = dot(oapex, axis);
1898 
1899  // caps...
1900 
1901  float rr = baserad - apexrad;
1902  float hy = m0 + rr*rr;
1903  float k2 = m0*m0 - m2*m2*hy;
1904  float k1 = m0*m0*m3 - m1*m2*hy + m0*baserad*(rr*m2*1.0f );
1905  float k0 = m0*m0*m5 - m1*m1*hy + m0*baserad*(rr*m1*2.0f - m0*baserad);
1906  float h = k1*k1 - k2*k0;
1907  if (h < 0.0f)
1908  return; // no intersection
1909 
1910  float t = (-k1-sqrt(h))/k2;
1911  float y = m1 + t*m2;
1912  if (y < 0.0f || y > m0)
1913  return; // no intersection
1914 
1915  optixReportIntersection(t, RT_HIT_CONE);
1916 }
1917 
1918 
1919 static __host__ __device__ __inline__
1920 void get_shadevars_cone_array(const GeomSBTHG &sbtHG, float3 &shading_normal) {
1921  const float3 ray_origin = optixGetWorldRayOrigin();
1922  const float3 ray_direction = optixGetWorldRayDirection();
1923  const float t_hit = optixGetRayTmax();
1924  const int primID = optixGetPrimitiveIndex();
1925 
1926  // compute geometric and shading normals:
1927 
1928  float3 base = sbtHG.cone.base[primID];
1929  float3 apex = sbtHG.cone.apex[primID];
1930  float baserad = sbtHG.cone.baserad[primID];
1931  float apexrad = sbtHG.cone.apexrad[primID];
1932 
1933  float3 axis = (apex - base);
1934  float3 obase = ray_origin - base;
1935  float3 oapex = ray_origin - apex;
1936  float m0 = dot(axis, axis);
1937  float m1 = dot(obase, axis);
1938  float m2 = dot(ray_direction, axis);
1939  float m3 = dot(ray_direction, obase);
1940  float m5 = dot(obase, obase);
1941  float m9 = dot(oapex, axis);
1942 
1943  // caps...
1944 
1945  float rr = baserad - apexrad;
1946  float hy = m0 + rr*rr;
1947  float k2 = m0*m0 - m2*m2*hy;
1948  float k1 = m0*m0*m3 - m1*m2*hy + m0*baserad*(rr*m2*1.0f );
1949  float k0 = m0*m0*m5 - m1*m1*hy + m0*baserad*(rr*m1*2.0f - m0*baserad);
1950  float h = k1*k1 - k2*k0;
1951 // if (h < 0.0f)
1952 // return; // no intersection
1953 
1954  float t = (-k1-sqrt(h))/k2;
1955  float y = m1 + t*m2;
1956 // if (y < 0.0f || y > m0)
1957 // return; // no intersection
1958 
1959  float3 hit = t * ray_direction;
1960  float3 Ng = normalize(m0*(m0*(obase + hit) + rr*axis*baserad) - axis*hy*y);
1961 
1962  shading_normal = calc_ffworld_normal(Ng, Ng);
1963 }
1964 
1965 
1966 
1967 //
1968 // Color-per-cylinder array primitive
1969 //
1970 // XXX not yet handling Obj vs. World coordinate xforms
1971 extern "C" __global__ void __intersection__cylinder_array_color() {
1972  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
1973  const float3 ray_origin = optixGetObjectRayOrigin();
1974  const float3 obj_ray_direction = optixGetObjectRayDirection();
1975  const int primID = optixGetPrimitiveIndex();
1976 
1977  float3 start = sbtHG.cyl.start[primID];
1978  float3 end = sbtHG.cyl.end[primID];
1979  float radius = sbtHG.cyl.radius[primID];
1980 
1981  float3 axis = (end - start);
1982  float3 rc = ray_origin - start;
1983  float3 n = cross(obj_ray_direction, axis);
1984  float lnsq = dot(n, n);
1985 
1986  // check if ray is parallel to cylinder
1987  if (lnsq == 0.0f) {
1988  return; // ray is parallel, we missed or went through the "hole"
1989  }
1990  float invln = rsqrtf(lnsq);
1991  n *= invln;
1992  float d = fabsf(dot(rc, n));
1993 
1994  // check for cylinder intersection
1995  if (d <= radius) {
1996  float3 O = cross(rc, axis);
1997  float t = -dot(O, n) * invln;
1998  O = cross(n, axis);
1999  O = normalize(O);
2000  float s = dot(obj_ray_direction, O);
2001  s = fabs(sqrtf(radius*radius - d*d) / s);
2002  float axlen = length(axis);
2003  float3 axis_u = normalize(axis);
2004 
2005  // test hit point against cylinder ends
2006  float tin = t - s;
2007  float3 hit = ray_origin + obj_ray_direction * tin;
2008  float3 tmp2 = hit - start;
2009  float tmp = dot(tmp2, axis_u);
2010  if ((tmp > 0.0f) && (tmp < axlen)) {
2011  optixReportIntersection(tin, RT_HIT_CYLINDER);
2012  }
2013 
2014  // continue with second test...
2015  float tout = t + s;
2016  hit = ray_origin + obj_ray_direction * tout;
2017  tmp2 = hit - start;
2018  tmp = dot(tmp2, axis_u);
2019  if ((tmp > 0.0f) && (tmp < axlen)) {
2020  optixReportIntersection(tout, RT_HIT_CYLINDER);
2021  }
2022  }
2023 }
2024 
2025 
2026 static __host__ __device__ __inline__
2027 void get_shadevars_cylinder_array(const GeomSBTHG &sbtHG, float3 &shading_normal) {
2028  const float3 ray_origin = optixGetWorldRayOrigin();
2029  const float3 ray_direction = optixGetWorldRayDirection();
2030  const float t_hit = optixGetRayTmax();
2031  const int primID = optixGetPrimitiveIndex();
2032 
2033  // compute geometric and shading normals:
2034  float3 start = sbtHG.cyl.start[primID];
2035  float3 end = sbtHG.cyl.end[primID];
2036  float3 axis_u = normalize(end-start);
2037  float3 hit = ray_origin + ray_direction * t_hit;
2038  float3 tmp2 = hit - start;
2039  float tmp = dot(tmp2, axis_u);
2040  float3 Ng = normalize(hit - (tmp * axis_u + start));
2041  shading_normal = calc_ffworld_normal(Ng, Ng);
2042 }
2043 
2044 
2045 
2046 #if 0
2047 
2048 extern "C" __global__ void cylinder_array_color_bounds(int primIdx, float result[6]) {
2049  const float3 start = cylinder_buffer[primIdx].start;
2050  const float3 end = start + cylinder_buffer[primIdx].axis;
2051  const float3 rad = make_float3(cylinder_buffer[primIdx].radius);
2052  optix::Aabb* aabb = (optix::Aabb*)result;
2053 
2054  if (rad.x > 0.0f && !isinf(rad.x)) {
2055  aabb->m_min = fminf(start - rad, end - rad);
2056  aabb->m_max = fmaxf(start + rad, end + rad);
2057  } else {
2058  aabb->invalidate();
2059  }
2060 }
2061 
2062 #endif
2063 
2064 
2065 //
2066 // Quadrilateral mesh primitive
2067 //
2068 // Based on the ray-quad approach by Ares Lagae and Philip Dutré,
2069 // "An efficient ray-quadrilateral intersection test"
2070 // Journal of graphics tools, 10(4):23-32, 2005
2071 // https://graphics.cs.kuleuven.be/publications/LD05ERQIT/LD05ERQIT_paper.pdf
2072 // https://github.com/erich666/jgt-code/blob/master/Volume_10/Number_4/Lagae2005/erqit.cpp
2073 //
2074 // Note: The 2-D projection scheme used by Inigo Quilez is probably also worthy
2075 // of a look, since the later stages work in 2-D and might therefore involve
2076 // fewer FLOPS and registers:
2077 // https://www.shadertoy.com/view/XtlBDs
2078 //
2079 
2080 // #define QUAD_VERTEX_REORDERING 1
2081 
2082 extern "C" __global__ void __intersection__quadmesh() {
2083  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
2084  const float3 ray_origin = optixGetWorldRayOrigin();
2085  const float3 ray_direction = optixGetWorldRayDirection();
2086  const int primID = optixGetPrimitiveIndex();
2087  const float quadepsilon = rtLaunch.scene.epsilon * 1.0e-2f;
2088 
2089  auto & qmesh = sbtHG.quadmesh;
2090 
2091  int4 index;
2092  if (qmesh.indices == NULL) {
2093  int idx4 = primID*4;
2094  index = make_int4(idx4, idx4+1, idx4+2, idx4+3);
2095  } else {
2096  index = qmesh.indices[primID];
2097  }
2098 
2099  // use key variable names as per Lagae and Dutré paper
2100  const float3 &v00 = qmesh.vertices[index.x];
2101  const float3 &v10 = qmesh.vertices[index.y];
2102  const float3 &v11 = qmesh.vertices[index.z];
2103  const float3 &v01 = qmesh.vertices[index.w];
2104 
2105  float3 e01 = v10 - v00;
2106  float3 e03 = v01 - v00;
2107  float3 P = cross(ray_direction, e03);
2108  float det = dot(e01, P);
2109  if (fabsf(det) < quadepsilon)
2110  return;
2111 
2112 #if 0
2113  float inv_det = __frcp_rn(det);
2114 #else
2115  float inv_det = 1.0f / det;
2116 #endif
2117  float3 T = ray_origin - v00;
2118  float alpha = dot(T, P) * inv_det;
2119  if (alpha < 0.0f)
2120  return;
2121 #if defined(QUAD_VERTEX_REORDERING)
2122  if (alpha > 1.0f)
2123  return; // uncomment if vertex reordering is used
2124 #endif
2125  float3 Q = cross(T, e01);
2126  float beta = dot(ray_direction, Q) * inv_det;
2127  if (beta < 0.0f)
2128  return;
2129 #if defined(QUAD_VERTEX_REORDERING)
2130  if (beta > 1.0f)
2131  return;
2132 #endif
2133 
2134  if ((alpha + beta) > 1.0f) {
2135  // reject rays that intersect plane Q
2136  // to the left of v11v10 or to the right of v11v01
2137  float3 e23 = v01 - v11;
2138  float3 e21 = v10 - v11;
2139  float3 P_prime = cross(ray_direction, e21);
2140  float det_prime = dot(e23, P_prime);
2141  if (fabsf(det_prime) < quadepsilon)
2142  return;
2143 #if 0
2144  float inv_det_prime = __frcp_rn(det_prime);
2145 #else
2146  float inv_det_prime = 1.0f / det_prime;
2147 #endif
2148  float3 T_prime = ray_origin - v11;
2149  float alpha_prime = dot(T_prime, P_prime) * inv_det_prime;
2150  if (alpha_prime < 0.0f)
2151  return;
2152  float3 Q_prime = cross(T_prime, e23);
2153  float beta_prime = dot(ray_direction, Q_prime) * inv_det_prime;
2154  if (beta_prime < 0.0f)
2155  return;
2156  }
2157 
2158  float t = dot(e03, Q) * inv_det;
2159 
2160  // do we really need to screen this for positive t-values here?
2161  if (t > 0.0f) {
2162  // report intersection t value, and the alpha/beta values needed to
2163  // interpolate colors and normals during shading
2164  optixReportIntersection(t, RT_HIT_QUAD,
2165  __float_as_int(alpha), // report alpha as attrib 0
2166  __float_as_int(beta)); // report beta as attrib 1
2167  }
2168 }
2169 
2170 
2171 // calculate barycentrics for vertex v11 -- can be precomputed and stored
2172 static __host__ __device__ __inline__
2174  float &alpha11, float &beta11) {
2175  const int primID = optixGetPrimitiveIndex();
2176 
2177  auto & qmesh = sbtHG.quadmesh;
2178 
2179  const int4 index = qmesh.indices[primID];
2180 
2181  // use key variable names and vertex order per Lagae and Dutré paper
2182  // vertices are listed in counterclockwise order (v00, v10, v11, v01)
2183  const float3 &v00 = qmesh.vertices[index.x];
2184  const float3 &v10 = qmesh.vertices[index.y];
2185  const float3 &v11 = qmesh.vertices[index.z];
2186  const float3 &v01 = qmesh.vertices[index.w];
2187 
2188  float3 e01 = v10 - v00; // also calced during isect tests
2189  float3 e02 = v11 - v00;
2190  float3 e03 = v01 - v00; // also calced during isect tests
2191  float3 n = cross(e01, e03);
2192 
2193  float3 absn = make_float3(fabsf(n.x), fabsf(n.y), fabsf(n.z));
2194  if ((absn.x >= absn.y) && (absn.x >= absn.z)) {
2195  alpha11 = ((e02.y * e03.z) - (e02.z * e03.y)) / n.x;
2196  beta11 = ((e01.y * e02.z) - (e01.z * e02.y)) / n.x;
2197  } else if ((absn.y >= absn.x) && (absn.y >= absn.z)) {
2198  alpha11 = ((e02.z * e03.x) - (e02.x * e03.z)) / n.y;
2199  beta11 = ((e01.z * e02.x) - (e01.x * e02.z)) / n.y;
2200  } else {
2201  alpha11 = ((e02.x * e03.y) - (e02.y * e03.x)) / n.z;
2202  beta11 = ((e01.x * e02.y) - (e01.y * e02.x)) / n.z;
2203  }
2204 }
2205 
2206 
2207 // calculate bilinear interpolation parameters u and v given the
2208 // barycentric coordinates alpha/beta obtained during intersection,
2209 // and the barycentric coords for vertex v11, alpha11/beta11, which
2210 // can either be computed on-demand or stored in advance
2211 static __host__ __device__ __inline__
2213  const float alpha, const float beta,
2214  const float &alpha11, const float &beta11,
2215  float &u, float &v) {
2216  const float quadepsilon = rtLaunch.scene.epsilon * 1.0e-2f;
2217  const float alpha11minus1 = alpha11 - 1.0f;
2218  const float beta11minus1 = beta11 - 1.0f;
2219 
2220  if (fabsf(alpha11minus1) < quadepsilon) {
2221  // quad is a trapezium
2222  u = alpha;
2223  if (fabsf(beta11minus1) < quadepsilon) {
2224  v = beta; // quad is a parallelogram
2225  } else {
2226  v = beta / ((u * beta11minus1) + 1.0f); // quad is a trapezium
2227  }
2228  } else if (fabsf(beta11minus1) < quadepsilon) {
2229  // quad is a trapezium
2230  v = beta;
2231  u = alpha / ((v * alpha11minus1) + 1.0f);
2232  } else {
2233  float A = -beta11minus1;
2234  float B = (alpha * beta11minus1) - (beta * alpha11minus1) - 1.0f;
2235  float C = alpha;
2236  float D = (B * B) - (4.0f * A * C);
2237  float Q = -0.5f * (B + (((B < 0.0f) ? -1.0f : 1.0f) * sqrtf(D)));
2238  u = Q / A;
2239  if ((u < 0.0f) || (u > 1.0f))
2240  u = C / Q;
2241  v = beta / ((u * beta11minus1) + 1.0f);
2242  }
2243 }
2244 
2245 
2246 static __host__ __device__ __inline__
2247 void get_shadevars_quadmesh(const GeomSBTHG &sbtHG, float3 &hit_color,
2248  float3 &shading_normal) {
2249  const int primID = optixGetPrimitiveIndex();
2250 
2251  auto & qmesh = sbtHG.quadmesh;
2252 
2253  int4 index;
2254  if (qmesh.indices == NULL) {
2255  int idx4 = primID*4;
2256  index = make_int4(idx4, idx4+1, idx4+2, idx4+3);
2257  } else {
2258  index = qmesh.indices[primID];
2259  }
2260 
2261  float alpha = __int_as_float(optixGetAttribute_0());
2262  float beta = __int_as_float(optixGetAttribute_1());
2263 
2264  // calc barycentric coords of vertex v11
2265  // XXX could be precomputed and stored
2266  float alpha11, beta11;
2267  quad_calc_barycentrics_v11(sbtHG, alpha11, beta11);
2268 
2269  // calc bilinear interpolation parameters u and v
2270  float u, v;
2271  quad_calc_bilinear_coords(sbtHG, alpha, beta, alpha11, beta11, u, v);
2272 
2273 #if 0
2274  // in practice we can get u/v values beyond 1.0 with not-quite-planar
2275  // quads, which will lead to interpolation problems,
2276  // so we must clamp them to the range 0->1 before use
2277  if (u > 1.0f || v > 1.0f)
2278  printf("quad: u:%.2f v:%.2f a:%.2f b:%.2f a11:%.2f b11:%.2f\n",
2279  u, v, alpha, beta, alpha11, beta11);
2280 #endif
2281 
2282  u = __saturatef(u);
2283  v = __saturatef(v);
2284 
2285  // compute geometric and shading normals:
2286  float3 Ng, Ns;
2287  if (qmesh.normals != nullptr) {
2288  const float3 &v00 = qmesh.vertices[index.x];
2289  const float3 &v10 = qmesh.vertices[index.y];
2290 // const float3 &v11 = qmesh.vertices[index.z];
2291  const float3 &v01 = qmesh.vertices[index.w];
2292  Ng = normalize(cross(v01-v00, v10-v00));
2293 
2294  const float3& n00 = qmesh.normals[index.x];
2295  const float3& n10 = qmesh.normals[index.y];
2296  const float3& n11 = qmesh.normals[index.z];
2297  const float3& n01 = qmesh.normals[index.w];
2298 
2299  // interpolate quad normal using bilinear params u and v
2300  Ns = normalize(((n00 * (1.0f - u)) + (n10 * u)) * (1.0f - v) +
2301  ((n01 * (1.0f - u)) + (n11 * u)) * v);
2302  } else {
2303  const float3 &v00 = qmesh.vertices[index.x];
2304  const float3 &v10 = qmesh.vertices[index.y];
2305 // const float3 &v11 = qmesh.vertices[index.z];
2306  const float3 &v01 = qmesh.vertices[index.w];
2307  Ns = Ng = normalize(cross(v01-v00, v10-v00));
2308  }
2309  shading_normal = calc_ffworld_normal(Ns, Ng);
2310 
2311  // Assign vertex-interpolated, per-primitive or uniform color
2312  if (qmesh.vertcolors3f != nullptr) {
2313  const float3 c00 = qmesh.vertcolors3f[index.x];
2314  const float3 c10 = qmesh.vertcolors3f[index.y];
2315  const float3 c11 = qmesh.vertcolors3f[index.z];
2316  const float3 c01 = qmesh.vertcolors3f[index.w];
2317 
2318  // interpolate quad color using bilinear params u and v
2319  hit_color = ((c00 * (1.0f - u)) + (c10 * u)) * (1.0f - v) +
2320  ((c01 * (1.0f - u)) + (c11 * u)) * v;
2321  } else if (qmesh.vertcolors4u != nullptr) {
2322  const float ci2f = 1.0f / 255.0f;
2323  const float3 c00 = qmesh.vertcolors4u[index.x] * ci2f;
2324  const float3 c10 = qmesh.vertcolors4u[index.y] * ci2f;
2325  const float3 c11 = qmesh.vertcolors4u[index.z] * ci2f;
2326  const float3 c01 = qmesh.vertcolors4u[index.w] * ci2f;
2327 
2328  // interpolate quad color using bilinear params u and v
2329  hit_color = ((c00 * (1.0f - u)) + (c10 * u)) * (1.0f - v) +
2330  ((c01 * (1.0f - u)) + (c11 * u)) * v;
2331  } else if (sbtHG.prim_color != nullptr) {
2332  hit_color = sbtHG.prim_color[primID];
2333  } else {
2334  hit_color = sbtHG.uniform_color;
2335  }
2336 }
2337 
2338 
2339 
2340 //
2341 // Ring array primitive
2342 //
2343 extern "C" __global__ void __intersection__ring_array() {
2344  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
2345  const float3 obj_ray_origin = optixGetObjectRayOrigin();
2346  const float3 obj_ray_direction = optixGetObjectRayDirection();
2347  const int primID = optixGetPrimitiveIndex();
2348 
2349  const float3 center = sbtHG.ring.center[primID];
2350  const float3 norm = sbtHG.ring.norm[primID];
2351  const float inrad = sbtHG.ring.inrad[primID];
2352  const float outrad = sbtHG.ring.outrad[primID];
2353 
2354  float d = -dot(center, norm);
2355  float t = -(d + dot(norm, obj_ray_origin));
2356  float td = dot(norm, obj_ray_direction);
2357  if (td != 0.0f) {
2358  t /= td;
2359  if (t >= 0.0f) {
2360  float3 hit = obj_ray_origin + t * obj_ray_direction;
2361  float rd = length(hit - center);
2362  if ((rd > inrad) && (rd < outrad)) {
2363  optixReportIntersection(t, RT_HIT_RING);
2364  }
2365  }
2366  }
2367 }
2368 
2369 
2370 static __host__ __device__ __inline__
2371 void get_shadevars_ring_array(const GeomSBTHG &sbtHG, float3 &shading_normal) {
2372  const int primID = optixGetPrimitiveIndex();
2373 
2374  // compute geometric and shading normals:
2375  float3 Ng = sbtHG.ring.norm[primID];
2376  shading_normal = calc_ffworld_normal(Ng, Ng);
2377 }
2378 
2379 
2380 
2381 #if 0
2382 
2383 extern "C" __global__ void ring_array_color_bounds(int primIdx, float result[6]) {
2384  const float3 center = ring_buffer[primIdx].center;
2385  const float3 rad = make_float3(ring_buffer[primIdx].outrad);
2386  optix::Aabb* aabb = (optix::Aabb*)result;
2387 
2388  if (rad.x > 0.0f && !isinf(rad.x)) {
2389  aabb->m_min = center - rad;
2390  aabb->m_max = center + rad;
2391  } else {
2392  aabb->invalidate();
2393  }
2394 }
2395 
2396 #endif
2397 
2398 
2399 
2400 #if defined(TACHYON_USE_SPHERES_HEARNBAKER)
2401 
2402 // Ray-sphere intersection method with improved floating point precision
2403 // for cases where the sphere size is small relative to the distance
2404 // from the camera to the sphere. This implementation is based on
2405 // Eq. 10-72, p.603 of "Computer Graphics with OpenGL", 3rd Ed., by
2406 // Donald Hearn and Pauline Baker, 2004, Eq. 10, p.639 in the 4th edition
2407 // (Hearn, Baker, Carithers), and in Ray Tracing Gems,
2408 // Precision Improvements for Ray/Sphere Intersection, pp. 87-94, 2019.
2409 static __host__ __device__ __inline__
2410 void sphere_intersect_hearn_baker(float3 center, float rad) {
2411  const float3 ray_origin = optixGetObjectRayOrigin();
2412  const float3 obj_ray_direction = optixGetObjectRayDirection();
2413 
2414  // if scaling xform was been applied, the ray length won't be normalized,
2415  // so we have to scale the resulting t hitpoints to world coords
2416  float ray_invlen;
2417  const float3 ray_direction = normalize_len(obj_ray_direction, ray_invlen);
2418 
2419  float3 deltap = center - ray_origin;
2420  float ddp = dot(ray_direction, deltap);
2421  float3 remedyTerm = deltap - ddp * ray_direction;
2422  float disc = rad*rad - dot(remedyTerm, remedyTerm);
2423  if (disc >= 0.0f) {
2424  float disc_root = sqrtf(disc);
2425 
2426 #if 0 && defined(FASTONESIDEDSPHERES)
2427  float t1 = ddp - disc_root;
2428  t1 *= ray_invlen; // transform t value back to world coordinates
2429  optixReportIntersection(t1, RT_HIT_SPHERE);
2430 #else
2431  float t1 = ddp - disc_root;
2432  t1 *= ray_invlen; // transform t value back to world coordinates
2433  optixReportIntersection(t1, RT_HIT_SPHERE);
2434 
2435  float t2 = ddp + disc_root;
2436  t2 *= ray_invlen; // transform t value back to world coordinates
2437  optixReportIntersection(t2, RT_HIT_SPHERE);
2438 #endif
2439  }
2440 }
2441 
2442 #else
2443 
2444 //
2445 // Ray-sphere intersection using standard geometric solution approach
2446 //
2447 static __host__ __device__ __inline__
2448 void sphere_intersect_classic(float3 center, float rad) {
2449  const float3 ray_origin = optixGetObjectRayOrigin();
2450  const float3 obj_ray_direction = optixGetObjectRayDirection();
2451 
2452  // if scaling xform was been applied, the ray length won't be normalized,
2453  // so we have to scale the resulting t hitpoints to world coords
2454  float ray_invlen;
2455  const float3 ray_direction = normalize_len(obj_ray_direction, ray_invlen);
2456 
2457  float3 deltap = center - ray_origin;
2458  float ddp = dot(ray_direction, deltap);
2459  float disc = ddp*ddp + rad*rad - dot(deltap, deltap);
2460  if (disc > 0.0f) {
2461  float disc_root = sqrtf(disc);
2462 
2463 #if 0 && defined(FASTONESIDEDSPHERES)
2464  // only calculate the nearest intersection, for speed
2465  float t1 = ddp - disc_root;
2466  t1 *= ray_invlen; // transform t value back to world coordinates
2467  optixReportIntersection(t1, RT_HIT_SPHERE);
2468 #else
2469  float t2 = ddp + disc_root;
2470  t2 *= ray_invlen; // transform t value back to world coordinates
2471  optixReportIntersection(t2, RT_HIT_SPHERE);
2472 
2473  float t1 = ddp - disc_root;
2474  t1 *= ray_invlen; // transform t value back to world coordinates
2475  optixReportIntersection(t1, RT_HIT_SPHERE);
2476 #endif
2477  }
2478 }
2479 
2480 #endif
2481 
2482 
2483 //
2484 // Sphere array primitive
2485 //
2486 extern "C" __global__ void __intersection__sphere_array() {
2487  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
2488  const int primID = optixGetPrimitiveIndex();
2489  float4 xyzr = sbtHG.sphere.PosRadius[primID];
2490  float3 center = make_float3(xyzr);
2491  float radius = xyzr.w;
2492 
2493 #if defined(TACHYON_USE_SPHERES_HEARNBAKER)
2494  sphere_intersect_hearn_baker(center, radius);
2495 #else
2496  sphere_intersect_classic(center, radius);
2497 #endif
2498 }
2499 
2500 
2501 static __host__ __device__ __inline__
2502 void get_shadevars_sphere_array(const GeomSBTHG &sbtHG, float3 &shading_normal) {
2503  const float3 ray_origin = optixGetWorldRayOrigin();
2504  const float3 ray_direction = optixGetWorldRayDirection();
2505  const float t_hit = optixGetRayTmax();
2506  const int primID = optixGetPrimitiveIndex();
2507 
2508  // compute geometric and shading normals:
2509  float4 xyzr = sbtHG.sphere.PosRadius[primID];
2510  float3 center = make_float3(xyzr);
2511  float radius = xyzr.w;
2512  float3 deltap = center - ray_origin;
2513  float3 Ng = (t_hit * ray_direction - deltap) * (1.0f / radius);
2514  shading_normal = calc_ffworld_normal(Ng, Ng);
2515 }
2516 
2517 
2518 #if 0
2519 extern "C" __global__ void sphere_array_bounds(int primIdx, float result[6]) {
2520  const float3 cen = sphere_buffer[primIdx].center;
2521  const float3 rad = make_float3(sphere_buffer[primIdx].radius);
2522  optix::Aabb* aabb = (optix::Aabb*)result;
2523 
2524  if (rad.x > 0.0f && !isinf(rad.x)) {
2525  aabb->m_min = cen - rad;
2526  aabb->m_max = cen + rad;
2527  } else {
2528  aabb->invalidate();
2529  }
2530 }
2531 #endif
2532 
2533 
2534 #if 0
2535 // OptiX 6.x bounds code
2536 extern "C" __global__ void sphere_array_color_bounds(int primIdx, float result[6]) {
2537  const float3 cen = sphere_color_buffer[primIdx].center;
2538  const float3 rad = make_float3(sphere_color_buffer[primIdx].radius);
2539  optix::Aabb* aabb = (optix::Aabb*)result;
2540 
2541  if (rad.x > 0.0f && !isinf(rad.x)) {
2542  aabb->m_min = cen - rad;
2543  aabb->m_max = cen + rad;
2544  } else {
2545  aabb->invalidate();
2546  }
2547 }
2548 #endif
2549 
2550 
2551 
2552 //
2553 // Curve array primitives
2554 //
2555 #if OPTIX_VERSION >= 70100
2556 static __host__ __device__ __inline__
2557 void get_shadevars_curves_linear(const GeomSBTHG &sbtHG, float3 &hit_color,
2558  float3 &shading_normal) {
2559  const int primID = optixGetPrimitiveIndex();
2560 
2561  // XXX unfinished
2562  hit_color = sbtHG.uniform_color;
2563  shading_normal = make_float3(0.0f, 0.0f, -1.0f);
2564 }
2565 #endif
2566 
2567 #if OPTIX_VERSION >= 70400
2568 static __host__ __device__ __inline__
2569 void get_shadevars_curves_catmullrom(const GeomSBTHG &sbtHG, float3 &hit_color,
2570  float3 &shading_normal) {
2571  const int primID = optixGetPrimitiveIndex();
2572 
2573  // XXX unfinished
2574  hit_color = sbtHG.uniform_color;
2575  shading_normal = make_float3(0.0f, 0.0f, -1.0f);
2576 }
2577 #endif
2578 
2579 
2580 //
2581 // Triangle mesh/array primitives
2582 //
2583 
2584 static __host__ __device__ __inline__
2585 void get_shadevars_trimesh(const GeomSBTHG &sbtHG, float3 &hit_color,
2586  float &hit_alpha, float3 &shading_normal) {
2587  const int primID = optixGetPrimitiveIndex();
2588 
2589  auto & tmesh = sbtHG.trimesh;
2590 
2591  int3 index;
2592  if (tmesh.indices == NULL) {
2593  int idx3 = primID*3;
2594  index = make_int3(idx3, idx3+1, idx3+2);
2595  } else {
2596  index = tmesh.indices[primID];
2597  }
2598 
2599  const float2 barycentrics = optixGetTriangleBarycentrics();
2600 
2601  // compute geometric and shading normals:
2602  float3 Ng, Ns;
2603  if (tmesh.packednormals != nullptr) {
2604 
2605 #if 1
2606  // XXX packed normals currently only work for implicit indexed buffers
2607  Ng = unpackNormal(tmesh.packednormals[primID].x);
2608  const float3& n0 = unpackNormal(tmesh.packednormals[primID].y);
2609  const float3& n1 = unpackNormal(tmesh.packednormals[primID].z);
2610  const float3& n2 = unpackNormal(tmesh.packednormals[primID].w);
2611 #else
2612  // XXX we can't use indexing for uint4 packed normals
2613  Ng = unpackNormal(tmesh.packednormals[index].x);
2614  const float3& n0 = unpackNormal(tmesh.packednormals[index.x].y);
2615  const float3& n1 = unpackNormal(tmesh.packednormals[index.y].z);
2616  const float3& n2 = unpackNormal(tmesh.packednormals[index.z].w);
2617 #endif
2618 
2619  // interpolate triangle normal from barycentrics
2620  Ns = normalize(n0 * (1.0f - barycentrics.x - barycentrics.y) +
2621  n1 * barycentrics.x + n2 * barycentrics.y);
2622  } else if (tmesh.normals != nullptr) {
2623  const float3 &A = tmesh.vertices[index.x];
2624  const float3 &B = tmesh.vertices[index.y];
2625  const float3 &C = tmesh.vertices[index.z];
2626  Ng = normalize(cross(B-A, C-A));
2627 
2628  const float3& n0 = tmesh.normals[index.x];
2629  const float3& n1 = tmesh.normals[index.y];
2630  const float3& n2 = tmesh.normals[index.z];
2631 
2632  // interpolate triangle normal from barycentrics
2633  Ns = normalize(n0 * (1.0f - barycentrics.x - barycentrics.y) +
2634  n1 * barycentrics.x + n2 * barycentrics.y);
2635  } else {
2636  const float3 &A = tmesh.vertices[index.x];
2637  const float3 &B = tmesh.vertices[index.y];
2638  const float3 &C = tmesh.vertices[index.z];
2639  Ns = Ng = normalize(cross(B-A, C-A));
2640  }
2641  shading_normal = calc_ffworld_normal(Ns, Ng);
2642 
2643  // Assign texture, vertex-interpolated, per-primitive or uniform color
2644  if (tmesh.vertcolors4u != nullptr) {
2645  const float ci2f = 1.0f / 255.0f;
2646  const float3 c0 = tmesh.vertcolors4u[index.x] * ci2f;
2647  const float3 c1 = tmesh.vertcolors4u[index.y] * ci2f;
2648  const float3 c2 = tmesh.vertcolors4u[index.z] * ci2f;
2649 
2650  // interpolate triangle color from barycentrics
2651  hit_color = (c0 * (1.0f - barycentrics.x - barycentrics.y) +
2652  c1 * barycentrics.x + c2 * barycentrics.y);
2653  } else if (tmesh.vertcolors3f != nullptr) {
2654  const float3 c0 = tmesh.vertcolors3f[index.x];
2655  const float3 c1 = tmesh.vertcolors3f[index.y];
2656  const float3 c2 = tmesh.vertcolors3f[index.z];
2657 
2658  // interpolate triangle color from barycentrics
2659  hit_color = (c0 * (1.0f - barycentrics.x - barycentrics.y) +
2660  c1 * barycentrics.x + c2 * barycentrics.y);
2661  } else if (sbtHG.prim_color != nullptr) {
2662  hit_color = sbtHG.prim_color[primID];
2663  } else if (tmesh.tex2d != nullptr) {
2664  float2 txc0 = tmesh.tex2d[index.x];
2665  float2 txc1 = tmesh.tex2d[index.y];
2666  float2 txc2 = tmesh.tex2d[index.z];
2667 
2668  // interpolate tex coord from triangle barycentrics
2669  float2 texcoord = (txc0 * (1.0f - barycentrics.x - barycentrics.y) +
2670  txc1 * barycentrics.x + txc2 * barycentrics.y);
2671 
2672  // XXX need to implement ray differentials for tex filtering
2673  int matidx = sbtHG.materialindex;
2674  const auto &mat = rtLaunch.materials[matidx];
2675  float4 tx = tex2D<float4>(mat.tex, texcoord.x, texcoord.y);
2676  hit_color = make_float3(tx);
2677  hit_alpha = tx.w; // overwrite hit_alpha when available
2678  } else if (tmesh.tex3d != nullptr) {
2679  float3 txc0 = tmesh.tex3d[index.x];
2680  float3 txc1 = tmesh.tex3d[index.y];
2681  float3 txc2 = tmesh.tex3d[index.z];
2682 
2683  // interpolate tex coord from triangle barycentrics
2684  float3 texcoord = (txc0 * (1.0f - barycentrics.x - barycentrics.y) +
2685  txc1 * barycentrics.x + txc2 * barycentrics.y);
2686 
2687  // XXX need to implement ray differentials for tex filtering
2688  int matidx = sbtHG.materialindex;
2689  const auto &mat = rtLaunch.materials[matidx];
2690  float4 tx = tex3D<float4>(mat.tex, texcoord.x, texcoord.y, texcoord.z);
2691  hit_color = make_float3(tx);
2692  hit_alpha = tx.w; // overwrite hit_alpha when available
2693  } else {
2694  hit_color = sbtHG.uniform_color;
2695  }
2696 }
2697 
2698 
2699 
2700 #if 0
2701 
2702 // inline device function for computing triangle bounding boxes
2703 __device__ __inline__ void generic_tri_bounds(optix::Aabb *aabb,
2704  float3 v0, float3 v1, float3 v2) {
2705 #if 1
2706  // conventional paranoid implementation that culls degenerate triangles
2707  float area = length(cross(v1-v0, v2-v0));
2708  if (area > 0.0f && !isinf(area)) {
2709  aabb->m_min = fminf(fminf(v0, v1), v2);
2710  aabb->m_max = fmaxf(fmaxf(v0, v1), v2);
2711  } else {
2712  aabb->invalidate();
2713  }
2714 #else
2715  // don't cull any triangles, even if they might be degenerate
2716  aabb->m_min = fminf(fminf(v0, v1), v2);
2717  aabb->m_max = fmaxf(fmaxf(v0, v1), v2);
2718 #endif
2719 }
2720 
2721 
2722 //
2723 // triangle mesh with vertices, geometric normal, uniform color
2724 //
2725 extern "C" __global__ void ort_tri_intersect(int primIdx) {
2726  float3 v0 = stri_buffer[primIdx].v0;
2727  float3 v1 = tri_buffer[primIdx].v1;
2728  float3 v2 = tri_buffer[primIdx].v2;
2729 
2730  // Intersect ray with triangle
2731  float3 n;
2732  float t, beta, gamma;
2733  if (intersect_triangle(ray, v0, v1, v2, n, t, beta, gamma)) {
2734  if (rtPotentialIntersection(t)) {
2735  shading_normal = geometric_normal = normalize(n);
2736 
2737  // uniform color for the entire object
2738  prim_color = uniform_color;
2739  rtReportIntersection(0);
2740  }
2741  }
2742 }
2743 
2744 extern "C" __global__ void ort_tri_bounds(int primIdx, float result[6]) {
2745  float3 v0 = tri_buffer[primIdx].v0;
2746  float3 v1 = tri_buffer[primIdx].v1;
2747  float3 v2 = tri_buffer[primIdx].v2;
2748 
2749  optix::Aabb *aabb = (optix::Aabb*)result;
2750  generic_tri_bounds(aabb, v0, v1, v2);
2751 }
2752 
2753 
2754 //
2755 // triangle mesh with vertices, smoothed normals, uniform color
2756 //
2757 extern "C" __global__ void ort_stri_intersect(int primIdx) {
2758  float3 v0 = stri_buffer[primIdx].v0;
2759  float3 v1 = stri_buffer[primIdx].v1;
2760  float3 v2 = stri_buffer[primIdx].v2;
2761 
2762  // Intersect ray with triangle
2763  float3 n;
2764  float t, beta, gamma;
2765  if (intersect_triangle(ray, v0, v1, v2, n, t, beta, gamma)) {
2766  if (rtPotentialIntersection(t)) {
2767  float3 n0 = stri_buffer[primIdx].n0;
2768  float3 n1 = stri_buffer[primIdx].n1;
2769  float3 n2 = stri_buffer[primIdx].n2;
2770  shading_normal = normalize(n1*beta + n2*gamma + n0*(1.0f-beta-gamma));
2771  geometric_normal = normalize(n);
2772 
2773  // uniform color for the entire object
2774  prim_color = uniform_color;
2775  rtReportIntersection(0);
2776  }
2777  }
2778 }
2779 
2780 extern "C" __global__ void ort_stri_bounds(int primIdx, float result[6]) {
2781  float3 v0 = stri_buffer[primIdx].v0;
2782  float3 v1 = stri_buffer[primIdx].v1;
2783  float3 v2 = stri_buffer[primIdx].v2;
2784 
2785  optix::Aabb *aabb = (optix::Aabb*)result;
2786  generic_tri_bounds(aabb, v0, v1, v2);
2787 }
2788 
2789 
2790 //
2791 // triangle mesh with vertices, smoothed normals, colors
2792 //
2793 extern "C" __global__ void ort_vcstri_intersect(int primIdx) {
2794  float3 v0 = vcstri_buffer[primIdx].v0;
2795  float3 v1 = vcstri_buffer[primIdx].v1;
2796  float3 v2 = vcstri_buffer[primIdx].v2;
2797 
2798  // Intersect ray with triangle
2799  float3 n;
2800  float t, beta, gamma;
2801  if (intersect_triangle(ray, v0, v1, v2, n, t, beta, gamma)) {
2802  if (rtPotentialIntersection(t)) {
2803  float3 n0 = vcstri_buffer[primIdx].n0;
2804  float3 n1 = vcstri_buffer[primIdx].n1;
2805  float3 n2 = vcstri_buffer[primIdx].n2;
2806  shading_normal = normalize(n1*beta + n2*gamma + n0*(1.0f-beta-gamma));
2807  geometric_normal = normalize(n);
2808 
2809  float3 c0 = vcstri_buffer[primIdx].c0;
2810  float3 c1 = vcstri_buffer[primIdx].c1;
2811  float3 c2 = vcstri_buffer[primIdx].c2;
2812  prim_color = c1*beta + c2*gamma + c0*(1.0f-beta-gamma);
2813  rtReportIntersection(0);
2814  }
2815  }
2816 }
2817 
2818 extern "C" __global__ void ort_vcstri_bounds(int primIdx, float result[6]) {
2819  float3 v0 = vcstri_buffer[primIdx].v0;
2820  float3 v1 = vcstri_buffer[primIdx].v1;
2821  float3 v2 = vcstri_buffer[primIdx].v2;
2822 
2823  optix::Aabb *aabb = (optix::Aabb*)result;
2824  generic_tri_bounds(aabb, v0, v1, v2);
2825 }
2826 
2827 #endif
2828 
2829 
2830 
2831 //
2832 // Support functions for closest hit and any hit programs for radiance rays
2833 //
2834 
2835 // Fog implementation
2836 static __device__ __forceinline__ float fog_coord(float3 hit_point) {
2837  // Compute planar fog (e.g. to match OpenGL) by projecting t value onto
2838  // the camera view direction vector to yield a planar a depth value.
2839  const float3 ray_direction = optixGetWorldRayDirection();
2840  const float t_hit = optixGetRayTmax();
2841  const auto &scene = rtLaunch.scene;
2842 
2843  float r = dot(ray_direction, rtLaunch.cam.W) * t_hit;
2844  float f=1.0f;
2845  float v;
2846 
2847  switch (scene.fog_mode) {
2848  case 1: // RT_FOG_LINEAR
2849  f = (scene.fog_end - r) / (scene.fog_end - scene.fog_start);
2850  break;
2851 
2852  case 2: // RT_FOG_EXP
2853  // XXX Tachyon needs to allow fog_start to be non-zero for
2854  // exponential fog, but fixed-function OpenGL does not...
2855  // float v = fog_density * (r - fog_start);
2856  v = scene.fog_density * r;
2857  f = expf(-v);
2858  break;
2859 
2860  case 3: // RT_FOG_EXP2
2861  // XXX Tachyon needs to allow fog_start to be non-zero for
2862  // exponential fog, but fixed-function OpenGL does not...
2863  // float v = fog_density * (r - fog_start);
2864  v = scene.fog_density * r;
2865  f = expf(-v*v);
2866  break;
2867 
2868  case 0: // RT_FOG_NONE
2869  default:
2870  break;
2871  }
2872  return __saturatef(f);
2873 }
2874 
2875 
2876 static __device__ __forceinline__ float3 fog_color(float fogmod, float3 hit_col) {
2877  float3 col = (fogmod * hit_col) + ((1.0f - fogmod) * rtLaunch.scene.bg_color);
2878  return col;
2879 }
2880 
2881 
2882 
2883 //
2884 // trivial ambient occlusion implementation
2885 //
2886 static __device__ float shade_ambient_occlusion(float3 hit, float3 N, float aoimportance) {
2887  float inten = 0.0f;
2888 
2889  // Improve AO RNG seed generation when more than one AA sample is run
2890  // per rendering pass. The classic OptiX 6 formulation doesn't work
2891  // as well now that we do our own subframe counting, and with RTX hardware
2892  // we often want multiple AA samples per pass now unlike before.
2893  unsigned int aas = 1+getPayloadAAsample(); // add one to prevent a zero
2894  int teabits1 = aas * subframe_count() * 313331337;
2895  unsigned int randseed = tea<2>(teabits1, teabits1);
2896 
2897  // do all the samples requested, with no observance of importance
2898  for (int s=0; s<rtLaunch.lights.ao_samples; s++) {
2899  float3 dir;
2900  jitter_sphere3f(randseed, dir);
2901  float ndotambl = dot(N, dir);
2902 
2903  // flip the ray so it's in the same hemisphere as the surface normal
2904  if (ndotambl < 0.0f) {
2905  ndotambl = -ndotambl;
2906  dir = -dir;
2907  }
2908 
2909  float3 aoray_origin, aoray_direction;
2910  float tmax=rtLaunch.lights.ao_maxdist;
2911 #ifdef USE_REVERSE_SHADOW_RAYS
2912  if (shadows_enabled == RT_SHADOWS_ON_REVERSE) {
2913  // reverse any-hit ray traversal direction for increased perf
2914  // XXX We currently hard-code REVERSE_RAY_LENGTH in such a way that
2915  // it works well for scenes that fall within the view volume,
2916  // given the relationship between the model and camera coordinate
2917  // systems, but this would be best computed by the diagonal of the
2918  // AABB for the full scene, and then scaled into camera coordinates.
2919  // The REVERSE_RAY_STEP size is computed to avoid self intersection
2920  // with the surface we're shading.
2922  aoray_origin = hit + dir * REVERSE_RAY_LENGTH;
2923  aoray_direction = -dir;
2924  } else
2925 #endif
2926  {
2927 #if defined(TACHYON_USE_RAY_STEP)
2928  aoray_origin = hit + TACHYON_RAY_STEP;
2929 #else
2930  aoray_origin = hit;
2931 #endif
2932  aoray_direction = dir;
2933  }
2934 
2935  // initialize per-ray shadow attenuation as "no-occlusion"
2936  uint32_t p0 = __float_as_int(1.0f);
2937 
2938  optixTrace(rtLaunch.traversable,
2939  aoray_origin,
2940  aoray_direction,
2941  0.0f, // tmin
2942  tmax, // tmax
2943  0.0f, // ray time
2944  OptixVisibilityMask( 255 ),
2945 #if 1
2946  OPTIX_RAY_FLAG_NONE,
2947 #elif 1
2948  // Hard shadows only, no opacity filtering.
2949  // For shadow rays skip any/closest hit and terminate
2950  // on first intersection with anything.
2951  OPTIX_RAY_FLAG_DISABLE_ANYHIT
2952  | OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT
2953  | OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT,
2954 #endif
2955  RT_RAY_TYPE_SHADOW, // SBT offset
2956  RT_RAY_TYPE_COUNT, // SBT stride
2957  RT_RAY_TYPE_SHADOW, // missSBTIndex
2958  p0); // send attention
2959 
2960  inten += ndotambl * __int_as_float(p0); // fetch attenuation from p0
2961  }
2962 
2963  // unweighted non-importance-sampled scaling factor
2964  return inten * rtLaunch.lights.ao_lightscale;
2965 }
2966 
2967 
2968 
2969 template<int SHADOWS_ON>
2970 static __device__ __inline__ void shade_light(float3 &result,
2971  float3 &hit_point,
2972  float3 &N, float3 &L,
2973  float p_Kd,
2974  float p_Ks,
2975  float p_phong_exp,
2976  float3 &col,
2977  float3 &phongcol,
2978  float shadow_tmax) {
2979  float inten = dot(N, L);
2980 
2981  // cast shadow ray
2982  float light_attenuation = static_cast<float>(inten > 0.0f);
2983  if (SHADOWS_ON && rtLaunch.lights.shadows_enabled && inten > 0.0f) {
2984 
2985  float3 shadowray_origin, shadowray_direction;
2986  float tmax=shadow_tmax;
2987 #ifdef USE_REVERSE_SHADOW_RAYS
2988  if (shadows_enabled == RT_SHADOWS_ON_REVERSE) {
2989  // reverse any-hit ray traversal direction for increased perf
2990  // XXX We currently hard-code REVERSE_RAY_LENGTH in such a way that
2991  // it works well for scenes that fall within the view volume,
2992  // given the relationship between the model and camera coordinate
2993  // systems, but this would be best computed by the diagonal of the
2994  // AABB for the full scene, and then scaled into camera coordinates.
2995  // The REVERSE_RAY_STEP size is computed to avoid self intersection
2996  // with the surface we're shading.
2998  shadowray_origin = hit_point + L * REVERSE_RAY_LENGTH;
2999  shadowray_direction = -L
3000  tmax = fminf(tmax, shadow_tmax));
3001  }
3002  else
3003 #endif
3004  {
3005  shadowray_origin = hit_point + TACHYON_RAY_STEP;
3006  shadowray_direction = L;
3007  }
3008 
3009  // initialize per-ray shadow attenuation as "no-occlusion"
3010  uint32_t p0 = __float_as_int(1.0f);
3011 
3012  optixTrace(rtLaunch.traversable,
3013  shadowray_origin,
3014  shadowray_direction,
3015  0.0f, // tmin
3016  tmax, // tmax
3017  0.0f, // ray time
3018  OptixVisibilityMask( 255 ),
3019 #if 1
3020  OPTIX_RAY_FLAG_NONE,
3021 #else
3022  // Hard shadows only, no opacity filtering.
3023  // For shadow rays skip any/closest hit and terminate
3024  // on first intersection with anything.
3025  OPTIX_RAY_FLAG_DISABLE_ANYHIT
3026  | OPTIX_RAY_FLAG_TERMINATE_ON_FIRST_HIT
3027  | OPTIX_RAY_FLAG_DISABLE_CLOSESTHIT,
3028 #endif
3029  RT_RAY_TYPE_SHADOW, // SBT offset
3030  RT_RAY_TYPE_COUNT, // SBT stride
3031  RT_RAY_TYPE_SHADOW, // missSBTIndex
3032  p0); // send attention
3033  light_attenuation = __int_as_float(p0); // get attenuation from p0
3034 
3035 #if defined(TACHYON_RAYSTATS)
3036  const int idx = tachyon1DLaunchIndex();
3037  rtLaunch.frame.raystats1_buffer[idx].y++; // increment shadow ray counter
3038 #endif
3039  }
3040 
3041  // If not completely shadowed, light the hit point.
3042  // When shadows are disabled, the light can't possibly be attenuated.
3043  if (!SHADOWS_ON || light_attenuation > 0.0f) {
3044  result += col * p_Kd * inten * light_attenuation;
3045 
3046  // add specular hightlight using Blinn's halfway vector approach
3047  const float3 ray_direction = optixGetWorldRayDirection();
3048  float3 H = normalize(L - ray_direction);
3049  float nDh = dot(N, H);
3050  if (nDh > 0) {
3051  float power = powf(nDh, p_phong_exp);
3052  phongcol += make_float3(p_Ks) * power * light_attenuation;
3053  }
3054  }
3055 }
3056 
3057 
3058 
3059 
3060 //
3061 // Partial re-implementation of the key portions of Tachyon's "full" shader.
3062 //
3063 // This shader has been written to be expanded into a large set of
3064 // fully specialized shaders generated through combinatorial expansion
3065 // of each of the major shader features associated with scene-wide or
3066 // material-specific shading properties.
3067 // At present, there are three scene-wide properties (fog, shadows, AO),
3068 // and three material-specific properties (outline, reflection, transmission).
3069 // There can be a performance cost for OptiX work scheduling of disparate
3070 // materials if too many unique materials are used in a scene.
3071 // Although there are 8 combinations of scene-wide parameters and
3072 // 8 combinations of material-specific parameters (64 in total),
3073 // the scene-wide parameters are uniform for the whole scene.
3074 // We will therefore only have at most 8 different shader variants
3075 // in use in a given scene, due to the 8 possible combinations
3076 // of material-specific (outline, reflection, transmission) properties.
3077 //
3078 // The macros that generate the full set of 64 possible shader variants
3079 // are at the very end of this source file.
3080 //
3081 template<int CLIP_VIEW_ON,
3082  int HEADLIGHT_ON,
3083  int FOG_ON,
3084  int SHADOWS_ON,
3085  int AO_ON,
3086  int OUTLINE_ON,
3087  int REFLECTION_ON,
3088  int TRANSMISSION_ON>
3089 static __device__ void shader_template(float3 prim_color, float3 N,
3090  float p_Ka, float p_Kd, float p_Ks,
3091  float p_phong_exp, float p_reflectivity,
3092  float p_opacity,
3093  float p_outline, float p_outlinewidth,
3094  int p_transmode) {
3095  PerRayData_radiance &prd = *getPRD<PerRayData_radiance>();
3096  const float3 ray_origin = optixGetWorldRayOrigin();
3097  const float3 ray_direction = optixGetWorldRayDirection();
3098  const float t_hit = optixGetRayTmax();
3099 
3100  float3 hit_point = ray_origin + t_hit * ray_direction;
3101  float3 result = make_float3(0.0f);
3102  float3 phongcol = make_float3(0.0f);
3103 
3104  // add depth cueing / fog if enabled
3105  // use fog coordinate to modulate importance for AO rays, etc.
3106  float fogmod = 1.0f;
3107  if (FOG_ON && rtLaunch.scene.fog_mode != 0) {
3108  fogmod = fog_coord(hit_point);
3109  }
3110 
3111 #if defined(TACHYON_RAYSTATS)
3112  // compute and reuse 1-D pixel index
3113  const int idx = tachyon1DLaunchIndex();
3114 #endif
3115 
3116 #if 1
3117  // don't render transparent surfaces if we've reached the max count
3118  // this implements the same logic as the -trans_max_surfaces argument
3119  // in the CPU version of Tachyon, and is described in:
3120  // Interactive Ray Tracing Techniques for High-Fidelity
3121  // Scientific Visualization. John E. Stone.
3122  // In, Eric Haines and Tomas Akenine-Möller, editors,
3123  // Ray Tracing Gems, Apress, Chapter 27, pp. 493-515, 2019.
3124  // https://doi.org/10.1007/978-1-4842-4427-2_27
3125  //
3126  if ((p_opacity < 1.0f) && (prd.transcnt < 1)) {
3127  // shoot a transmission ray
3128  PerRayData_radiance new_prd;
3129  new_prd.importance = prd.importance * (1.0f - p_opacity);
3130  new_prd.alpha = 1.0f;
3131  new_prd.result = rtLaunch.scene.bg_color;
3132 
3133  // For correct operation with the RTX runtime strategy and its
3134  // associated stack management scheme, we MUST increment the
3135  // ray recursion depth counter when performing transparent surface
3136  // peeling, otherwise we could go beyond the max recursion depth
3137  // that we previously requested from OptiX. This will work less well
3138  // than the former approach in terms of visual outcomes, but we presently
3139  // have no alternative and must avoid issues with runtime stack overruns.
3140  new_prd.depth = prd.depth + 1;
3141  new_prd.transcnt = 0; // don't decrement further since unsigned int type
3142 
3143  if (new_prd.importance >= 0.001f && new_prd.depth < rtLaunch.max_depth) {
3144  float3 transray_direction = ray_direction;
3145  float3 transray_origin;
3146 #if defined(TACHYON_USE_RAY_STEP)
3147 #if defined(TACHYON_TRANS_USE_INCIDENT)
3148  // step the ray in the incident ray direction
3149  transray_origin = hit_point + TACHYON_RAY_STEP2;
3150 #else
3151  // step the ray in the direction opposite the surface normal (going in)
3152  // rather than out, for transmission rays...
3153  transray_origin = hit_point - TACHYON_RAY_STEP;
3154 #endif
3155 #else
3156  transray_origin = hit_point;
3157 #endif
3158 
3159  // the values we store the PRD pointer in:
3160  uint32_t p0, p1;
3161  packPointer( &new_prd, p0, p1 );
3162  uint32_t s = getPayloadAAsample(); // use aasample in CH/MISS RNGs
3163 
3164  optixTrace(rtLaunch.traversable,
3165  transray_origin,
3166  transray_direction,
3167  0.0f, // tmin
3168  RT_DEFAULT_MAX, // tmax
3169  0.0f, // ray time
3170  OptixVisibilityMask( 255 ),
3171  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
3172  RT_RAY_TYPE_RADIANCE, // SBT offset
3173  RT_RAY_TYPE_COUNT, // SBT stride
3174  RT_RAY_TYPE_RADIANCE, // missSBTIndex
3175  p0, p1, // PRD ptr in 2x uint32
3176  s); // use aasample in CH/MISS RNGs
3177 
3178 #if defined(TACHYON_RAYSTATS)
3179  rtLaunch.frame.raystats2_buffer[idx].x++; // increment trans ray counter
3180 #endif
3181  }
3182  prd.result = new_prd.result;
3183  return; // early-exit
3184  }
3185 #endif
3186 
3187  // execute the object's texture function
3188  float3 col = prim_color; // XXX no texturing implemented yet
3189 
3190  // compute lighting from directional lights
3191  for (int i=0; i < rtLaunch.lights.num_dir_lights; i++) {
3192  float3 L = rtLaunch.lights.dir_lights[i];
3193  shade_light<SHADOWS_ON>(result, hit_point, N, L, p_Kd, p_Ks, p_phong_exp,
3194  col, phongcol, RT_DEFAULT_MAX);
3195  }
3196 
3197  // compute lighting from positional lights
3198  for (int i=0; i < rtLaunch.lights.num_pos_lights; i++) {
3199  float3 Lpos = rtLaunch.lights.pos_lights[i];
3200  float3 L = Lpos - hit_point;
3201  float shadow_tmax;
3202  L = normalize_len(L, shadow_tmax); // normalize and compute shadow tmax
3203  shade_light<SHADOWS_ON>(result, hit_point, N, L, p_Kd, p_Ks, p_phong_exp,
3204  col, phongcol, shadow_tmax);
3205  }
3206 
3207  // add point light for camera headlight need for Oculus Rift HMDs,
3208  // equirectangular panorama images, and planetarium dome master images
3209  if (HEADLIGHT_ON && (rtLaunch.lights.headlight_mode != 0)) {
3210  float3 L = rtLaunch.cam.pos - hit_point;
3211  float shadow_tmax;
3212  L = normalize_len(L, shadow_tmax); // normalize and compute shadow tmax
3213  shade_light<SHADOWS_ON>(result, hit_point, N, L, p_Kd, p_Ks, p_phong_exp,
3214  col, phongcol, shadow_tmax);
3215  }
3216 
3217  // add ambient occlusion diffuse lighting, if enabled
3218  if (AO_ON && rtLaunch.lights.ao_samples > 0) {
3219  result *= rtLaunch.lights.ao_direct;
3220  result += rtLaunch.lights.ao_ambient * col * p_Kd * shade_ambient_occlusion(hit_point, N, fogmod * p_opacity);
3221 
3222 #if defined(TACHYON_RAYSTATS)
3223  rtLaunch.frame.raystats1_buffer[idx].z+=rtLaunch.lights.ao_samples; // increment AO shadow ray counter
3224 #endif
3225  }
3226 
3227  // add edge shading if applicable
3228  if (OUTLINE_ON && p_outline > 0.0f) {
3229  float edgefactor = dot(N, ray_direction);
3230  edgefactor *= edgefactor;
3231  edgefactor = 1.0f - edgefactor;
3232  edgefactor = 1.0f - powf(edgefactor, (1.0f - p_outlinewidth) * 32.0f);
3233  float outlinefactor = __saturatef((1.0f - p_outline) + (edgefactor * p_outline));
3234  result *= outlinefactor;
3235  }
3236 
3237  result += make_float3(p_Ka); // white ambient contribution
3238  result += phongcol; // add phong highlights
3239 
3240  //
3241  // spawn reflection rays if necessary
3242  //
3243  if (REFLECTION_ON && p_reflectivity > 0.0f) {
3244  // ray tree attenuation
3245  PerRayData_radiance new_prd;
3246  new_prd.importance = prd.importance * p_reflectivity;
3247  new_prd.depth = prd.depth + 1;
3248  new_prd.transcnt = prd.transcnt;
3249 
3250  // shoot a reflection ray
3251  if (new_prd.importance >= 0.001f && new_prd.depth <= rtLaunch.max_depth) {
3252  float3 reflray_direction = reflect(ray_direction, N);
3253 
3254  float3 reflray_origin;
3255 #if defined(TACHYON_USE_RAY_STEP)
3256  reflray_origin = hit_point + TACHYON_RAY_STEP;
3257 #else
3258  reflray_origin = hit_point;
3259 #endif
3260 
3261  // the values we store the PRD pointer in:
3262  uint32_t p0, p1;
3263  packPointer( &new_prd, p0, p1 );
3264  uint32_t s = getPayloadAAsample(); // use aasample in CH/MISS RNGs
3265 
3266  optixTrace(rtLaunch.traversable,
3267  reflray_origin,
3268  reflray_direction,
3269  0.0f, // tmin
3270  RT_DEFAULT_MAX, // tmax
3271  0.0f, // ray time
3272  OptixVisibilityMask( 255 ),
3273  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
3274  RT_RAY_TYPE_RADIANCE, // SBT offset
3275  RT_RAY_TYPE_COUNT, // SBT stride
3276  RT_RAY_TYPE_RADIANCE, // missSBTIndex
3277  p0, p1, // PRD ptr in 2x uint32
3278  s); // use aasample in CH/MISS RNGs
3279 
3280 #if defined(TACHYON_RAYSTATS)
3281  rtLaunch.frame.raystats2_buffer[idx].w++; // increment refl ray counter
3282 #endif
3283  result += p_reflectivity * new_prd.result;
3284  }
3285  }
3286 
3287  //
3288  // spawn transmission rays if necessary
3289  //
3290  float alpha = p_opacity;
3291 #if 1
3292  if (CLIP_VIEW_ON && (rtLaunch.clipview_mode == 2))
3293  sphere_fade_and_clip(hit_point, rtLaunch.cam.pos,
3295 #else
3296  if (CLIP_VIEW_ON && (rtLaunch.clipview_mode == 2)) {
3297  // draft implementation of a smooth "fade-out-and-clip sphere"
3298  float fade_start = 1.00f; // onset of fading
3299  float fade_end = 0.20f; // fully transparent
3300  float camdist = length(hit_point - rtLaunch.cam.pos);
3301 
3302  // XXX we can omit the distance test since alpha modulation value is clamped
3303  // if (1 || camdist < fade_start) {
3304  float fade_len = fade_start - fade_end;
3305  alpha *= __saturatef((camdist - fade_start) / fade_len);
3306  // }
3307  }
3308 #endif
3309 
3310 #if 1
3311  // TRANSMISSION_ON: handles transparent surface shading, test is only
3312  // performed when the geometry has a known-transparent material
3313  // CLIP_VIEW_ON: forces check of alpha value for all geom as per transparent
3314  // material, since all geometry may become tranparent with the
3315  // fade+clip sphere active
3316  if ((TRANSMISSION_ON || CLIP_VIEW_ON) && alpha < 0.999f ) {
3317  // Emulate Tachyon/Raster3D's angle-dependent surface opacity if enabled
3318  if (p_transmode) {
3319  alpha = 1.0f + cosf(3.1415926f * (1.0f-alpha) * dot(N, ray_direction));
3320  alpha = alpha*alpha * 0.25f;
3321  }
3322 
3323  result *= alpha; // scale down current lighting by opacity
3324 
3325  // shoot a transmission ray
3326  PerRayData_radiance new_prd;
3327  new_prd.importance = prd.importance * (1.0f - alpha);
3328  new_prd.alpha = 1.0f;
3329  new_prd.result = rtLaunch.scene.bg_color;
3330  new_prd.depth = prd.depth + 1;
3331  new_prd.transcnt = max(1, prd.transcnt) - 1; // prevent uint wraparound
3332  if (new_prd.importance >= 0.001f && new_prd.depth <= rtLaunch.max_depth) {
3333  float3 transray_direction = ray_direction;
3334  float3 transray_origin;
3335 #if defined(TACHYON_USE_RAY_STEP)
3336 #if defined(TACHYON_TRANS_USE_INCIDENT)
3337  // step the ray in the incident ray direction
3338  transray_origin = hit_point + TACHYON_RAY_STEP2;
3339 #else
3340  // step the ray in the direction opposite the surface normal (going in)
3341  // rather than out, for transmission rays...
3342  transray_origin = hit_point - TACHYON_RAY_STEP;
3343 #endif
3344 #else
3345  transray_origin = hit_point;
3346 #endif
3347 
3348  // the values we store the PRD pointer in:
3349  uint32_t p0, p1;
3350  packPointer( &new_prd, p0, p1 );
3351  uint32_t s = getPayloadAAsample(); // use aasample in CH/MISS RNGs
3352 
3353  optixTrace(rtLaunch.traversable,
3354  transray_origin,
3355  transray_direction,
3356  0.0f, // tmin
3357  RT_DEFAULT_MAX, // tmax
3358  0.0f, // ray time
3359  OptixVisibilityMask( 255 ),
3360  OPTIX_RAY_FLAG_DISABLE_ANYHIT, // Only want CH
3361  RT_RAY_TYPE_RADIANCE, // SBT offset
3362  RT_RAY_TYPE_COUNT, // SBT stride
3363  RT_RAY_TYPE_RADIANCE, // missSBTIndex
3364  p0, p1, // PRD ptr in 2x uint32
3365  s); // use aasample in CH/MISS RNGs
3366 
3367 #if defined(TACHYON_RAYSTATS)
3368  rtLaunch.frame.raystats2_buffer[idx].x++; // increment trans ray counter
3369 #endif
3370  }
3371  result += (1.0f - alpha) * new_prd.result;
3372  prd.alpha = alpha + (1.0f - alpha) * new_prd.alpha;
3373  }
3374 #endif
3375 
3376  // add depth cueing / fog if enabled
3377  if (FOG_ON && fogmod < 1.0f) {
3378  result = fog_color(fogmod, result);
3379  }
3380 
3381  prd.result = result; // pass the color back up the tree
3382 }
3383 
3384 
3385 
3386 //
3387 // OptiX closest hit and anyhit programs for radiance rays
3388 //
3389 //#define TACHYON_FLATTEN_CLOSESTHIT_DISPATCH 1
3390 //#define TACHYON_MERGED_CLOSESTHIT_DISPATCH 1
3391 
3392 // general-purpose any-hit program, with all template options enabled,
3393 // intended for shader debugging and comparison with the original
3394 // Tachyon full_shade() code.
3395 extern "C" __global__ void __closesthit__radiance_general() {
3396  const GeomSBTHG &sbtHG = *reinterpret_cast<const GeomSBTHG*>(optixGetSbtDataPointer());
3397 
3398  // shading variables that need to be computed/set by primitive-specific code
3399  float3 shading_normal;
3400  float3 hit_color;
3401  float hit_alpha=1.0f; // tex alpha|cutout transparency, mult w/ mat opacity
3402  int vertexcolorset=0;
3403 
3404 
3405 #if defined(TACHYON_MERGED_CLOSESTHIT_DISPATCH)
3406  unsigned int hit_kind = optixGetHitKind();
3407  unsigned int hit_prim_type = 0;
3408 #if OPTIX_VERSION >= 70100
3409  hit_prim_type = optixGetPrimitiveType(hit_kind);
3410 #endif
3411 
3412  // merge kind+type by left shifting OptiX "0x25XX" value range so it can
3413  // be directly bitwise-ORed with the hit kind from our custom prims, or
3414  // OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE or OPTIX_HIT_KIND_TRIANGLE_BACK_FACE.
3415  // If we strip off the low bit from hit kind, and ensure that our
3416  // custom prim types start with indices >= 2, we can handle the merged
3417  // triangle front/back faces as a single case (they are coded as
3418  // 0xFF and 0xFE in the OptiX headers)
3419  unsigned int mergeprimtype = (hit_prim_type << 16) | (0xFE & hit_kind);
3420  switch (mergeprimtype) {
3421  case RT_PRM_CONE:
3422  get_shadevars_cone_array(sbtHG, shading_normal);
3423  break;
3424 
3425  case RT_PRM_CYLINDER:
3426  get_shadevars_cylinder_array(sbtHG, shading_normal);
3427  break;
3428 
3429  case RT_PRM_QUAD:
3430  get_shadevars_quadmesh(sbtHG, hit_color, shading_normal);
3431  vertexcolorset=1;
3432  break;
3433 
3434  case RT_PRM_RING:
3435  get_shadevars_ring_array(sbtHG, shading_normal);
3436  break;
3437 
3438  case RT_PRM_SPHERE:
3439  get_shadevars_sphere_array(sbtHG, shading_normal);
3440  break;
3441 
3442  case RT_PRM_TRIANGLE:
3443  get_shadevars_trimesh(sbtHG, hit_color, hit_alpha, shading_normal);
3444  vertexcolorset=1;
3445  break;
3446 
3447 #if OPTIX_VERSION >= 70400
3448  case RT_PRM_CATMULLROM:
3449  get_shadevars_curves_catmullrom(sbtHG, hit_color, shading_normal);
3450  break;
3451 #endif
3452 #if OPTIX_VERSION >= 70200
3453  case RT_PRM_LINEAR:
3454  get_shadevars_curves_linear(sbtHG, hit_color, shading_normal);
3455  break;
3456 #endif
3457 
3458 #if 0
3459  default:
3460  printf("Unrecognized merged prim: %08x\n", mergeprimtype);
3461  break;
3462 #endif
3463  }
3464  {
3465 
3466 #else // !defined(TACHYON_MERGED_CLOSESTHIT_DISPATCH)
3467 
3468  // Handle normal and color computations according to primitive type
3469  unsigned int hit_kind = optixGetHitKind();
3470 
3471 #if !defined(TACHYON_FLATTEN_CLOSESTHIT_DISPATCH)
3472 #if OPTIX_VERSION >= 70100
3473 // OptixPrimitiveType hit_prim_type = optixGetPrimitiveType(hit_kind);
3474  unsigned int hit_prim_type = optixGetPrimitiveType(hit_kind);
3475 
3476  // XXX It would be more desirable to have a full switch block
3477  // for triangles/curves/custom rather than chained if/else
3478  if (hit_prim_type == OPTIX_PRIMITIVE_TYPE_TRIANGLE) {
3479  get_shadevars_trimesh(sbtHG, hit_color, hit_alpha, shading_normal);
3480  } else if (hit_prim_type == OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR) {
3481  get_shadevars_curves_linear(sbtHG, hit_color, shading_normal);
3482 #if OPTIX_VERSION >= 70400
3483  } else if (hit_prim_type == OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM) {
3484  get_shadevars_curves_catmullrom(sbtHG, hit_color, shading_normal);
3485 #endif
3486  } else
3487 #endif
3488 #endif // TACHYON_FLATTEN_CLOSESTHIT_DISPATCH
3489  {
3490  // For OPTIX_PRIMITIVE_TYPE_CUSTOM we check the lowest 7 bits of
3491  // hit_kind to determine our user-defined primitive type.
3492  // For peak traversal performance, calculation of surface normals
3493  // colors, etc is deferred until CH/AH shading herein.
3494  switch (hit_kind) {
3495 #if (OPTIX_VERSION < 70100) || defined(TACHYON_FLATTEN_CLOSESTHIT_DISPATCH)
3496  // For OptiX 7.0.0 we handle triangle hits here too since
3497  // it lacked optixGetPrimitiveType() etc...
3498  // Built-in primitive hit == (hit_kind & 0x80)
3499  case OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE: // front-face of triangle hit
3500  case OPTIX_HIT_KIND_TRIANGLE_BACK_FACE: // back-face of triangle hit
3501  get_shadevars_trimesh(sbtHG, hit_color, hit_alpha, shading_normal);
3502  vertexcolorset=1;
3503  break;
3504 #endif
3505 
3506  case RT_HIT_CONE:
3507  get_shadevars_cone_array(sbtHG, shading_normal);
3508  break;
3509 
3510  case RT_HIT_CYLINDER:
3511  get_shadevars_cylinder_array(sbtHG, shading_normal);
3512  break;
3513 
3514  case RT_HIT_QUAD:
3515  get_shadevars_quadmesh(sbtHG, hit_color, shading_normal);
3516  vertexcolorset=1;
3517  break;
3518 
3519  case RT_HIT_RING:
3520  get_shadevars_ring_array(sbtHG, shading_normal);
3521  break;
3522 
3523  case RT_HIT_SPHERE:
3524  get_shadevars_sphere_array(sbtHG, shading_normal);
3525  break;
3526 
3527 #if defined(TACHYON_FLATTEN_CLOSESTHIT_DISPATCH)
3528 #if OPTIX_VERSION >= 70100
3529  default:
3530  {
3531  OptixPrimitiveType hit_prim_type = optixGetPrimitiveType(hit_kind);
3532  // At this point we know it must be a curve or one of the
3533  // other built-in types
3534  if (hit_prim_type == OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR) {
3535  get_shadevars_curves_linear(sbtHG, hit_color, shading_normal);
3536 #if OPTIX_VERSION >= 70400
3537  } else if (hit_prim_type == OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM) {
3538  get_shadevars_curves_catmullrom(sbtHG, hit_color, shading_normal);
3539 #endif
3540  }
3541  }
3542  break;
3543 #endif
3544 #endif
3545 
3546  }
3547 #endif // !defined(TACHYON_MERGED_CLOSESTHIT_DISPATCH)
3548 
3549  // Assign either per-primitive or uniform color
3550  if (!vertexcolorset) {
3551  if (sbtHG.prim_color != nullptr) {
3552  const int primID = optixGetPrimitiveIndex();
3553  hit_color = sbtHG.prim_color[primID];
3554  } else {
3555  hit_color = sbtHG.uniform_color;
3556  }
3557  }
3558  }
3559 
3560  // VR CLIP_VIEW and HEADLIGHT modes are locked out for normal renderings
3561  // for the time being, so they don't harm performance measurements for
3562  // the more typical non-VR use cases.
3563  int matidx = sbtHG.materialindex; // common for all geometry
3564  const auto &mat = rtLaunch.materials[matidx];
3565  shader_template<0, 0, 1, 1, 1, 1, 1, 1>(hit_color, shading_normal,
3566  mat.ambient, mat.diffuse,
3567  mat.specular, mat.shininess,
3568  mat.reflectivity,
3569  mat.opacity * hit_alpha,
3570  mat.outline, mat.outlinewidth,
3571  mat.transmode);
3572 }
3573 
3574 
3575 
3576 
__global__ void __closesthit__radiance_general()
__host__ __device__ float3 normalize(const float3 &v)
Normalize input vector to unit length.
static __host__ __device__ __inline__ void quad_calc_barycentrics_v11(const GeomSBTHG &sbtHG, float &alpha11, float &beta11)
__global__ void __raygen__camera_dome_master_dof()
static __forceinline__ __device__ void packPointer(void *ptr, uint32_t &i0, uint32_t &i1)
__global__ void __exception__all()
static __inline__ __device__ float3 calc_ffworld_normal(const float3 &Nshading, const float3 &Ngeometric)
__global__ void __raygen__camera_orthographic_stereo()
uchar4 * framebuffer
8-bit unorm RGBA framebuffer
float accum_normalize
precalc 1.0f / subframe_index
#define REVERSE_RAY_LENGTH
static __device__ __inline__ void tachyon_camera_perspective_general()
int headlight_mode
Extra VR camera-located headlight.
static __device__ __inline__ void jitter_offset2f(unsigned int &pval, float2 &xy)
float bg_grad_invrange
miss background gradient inverse range
static __device__ __inline__ void dof_ray(const float cam_dof_focal_dist, const float cam_dof_aperture_rad, const float3 &ray_origin_orig, float3 &ray_origin, const float3 &ray_direction_orig, float3 &ray_direction, unsigned int &randseed, const float3 &up, const float3 &right)
float4 * PosRadius
X,Y,Z,Radius packed for coalescing.
static __forceinline__ __device__ void * unpackPointer(uint32_t i0, uint32_t i1)
float3 bg_grad_updir
miss background gradient up direction
__constant__ tachyonLaunchParams rtLaunch
launch parameters in constant memory, filled by optixLaunch)
int update_colorbuffer
accumulation copyout flag
static __host__ __device__ __inline__ void sphere_intersect_hearn_baker(float3 center, float rad)
static __device__ __inline__ void jitter_disc2f_qrn(float2 &qrnxy, float2 &xy, float radius)
int tonemap_mode
output tone mapping mode
__host__ __device__ float3 normalize_len(const float3 v, float &l)
Normalize input vector to unit length, and return its original length.
#define REVERSE_RAY_STEP
float ao_lightscale
2.0f/float(ao_samples)
static __device__ float shade_ambient_occlusion(float3 hit, float3 N, float aoimportance)
static __forceinline__ __device__ float getPayloadShadowAttenuation()
any-hit programs read-modify-update shadow attenuation value carried in ray payload register 0 ...
QuadMeshSBT quadmesh
__global__ void __raygen__camera_perspective()
static __device__ __inline__ void tachyon_camera_octahedral_general()
__global__ void __raygen__camera_equirectangular_stereo_dof()
float ao_maxdist
AO maximum occlusion distance.
__host__ __device__ float4 make_float4(const float3 &a, const float &b)
__host__ __device__ float3 make_float3(const float s)
float3 * prim_color
optional per-primitive color array
float3 W
camera orthonormal W (view) axis
static void __inline__ __device__ accumulate_color(int idx, float4 colrgba4f)
__global__ void __intersection__sphere_array()
__host__ __device__ float3 fmaxf(const float3 &a, const float3 &b)
float3 pos
camera position
__global__ void __intersection__cone_array_color()
__device__ void clip_ray_by_plane(float3 ray_origin, float3 ray_direction, float &tmin, float &tmax, const float4 plane)
static __device__ __forceinline__ float3 fog_color(float fogmod, float3 hit_col)
int max_trans
max transparent surface crossing count
float tonemap_exposure
tone mapping exposure gain parameter
Tachyon ray tracing engine core routines and data structures compiled to PTX for runtime JIT to build...
float ao_direct
AO direct lighting scaling factor.
__global__ void __raygen__camera_oculus_rift()
#define SQUARES_RNG_KEY1
int materialindex
material index for this array
__global__ void __raygen__camera_oculus_rift_stereo_dof()
__host__ __device__ float3 faceforward(const float3 &n, const float3 &i, const float3 &nref)
Ensure that an interpolated surface normal n faces in the same direction as dictated by a geometric n...
static __host__ __device__ __inline__ void quad_calc_bilinear_coords(const GeomSBTHG &sbtHG, const float alpha, const float beta, const float &alpha11, const float &beta11, float &u, float &v)
#define RT_DEFAULT_MAX
Tachyon OptiX global launch parameter structure containing the active camera, framebuffer, materials, and any global scene parameters required for shading.
void jitter_sphere3f(rng_frand_handle *rngh, float *dir)
Definition: util.c:779
int fb_clearall
clear/overwrite all FB components
struct tachyonLaunchParams::@4 lights
__host__ __device__ float3 fabsf(const float3 &a)
custom prim ring
shadow probe/AO rays
int fog_mode
fog type (or off)
static __host__ __device__ __inline__ void get_shadevars_ring_array(const GeomSBTHG &sbtHG, float3 &shading_normal)
__global__ void __closesthit__shadow_nop()
__global__ void any_hit_shadow_clip_sphere()
static __forceinline__ __device__ T * getPRD()
__global__ void __raygen__UNKNOWN()
static __host__ __device__ __inline__ void get_shadevars_trimesh(const GeomSBTHG &sbtHG, float3 &hit_color, float &hit_alpha, float3 &shading_normal)
__global__ void __raygen__camera_octahedral_stereo()
__host__ __device__ float3 reflect(const float3 &i, const float3 &n)
calculate reflection direction from incident direction i, and surface normal n.
__global__ void __miss__radiance_solid_bg()
TriMeshSBT trimesh
static __device__ __inline__ void jitter_disc2f(unsigned int &pval, float2 &xy, float radius)
__global__ void __intersection__cylinder_array_color()
__global__ void __raygen__camera_equirectangular_stereo()
Adobe sRGB (gamma 2.2)
custom prim cylinder
SphereArraySBT sphere
__global__ void __anyhit__shadow_transmission()
static __host__ __device__ __inline__ float3 unpackNormal(uint packed)
static __forceinline__ __device__ void setPayloadShadowAttenuation(const float attenuation)
any-hit programs read-modify-update shadow attenuation value carried in ray payload register 0 ...
int shadows_enabled
global shadow flag
__global__ void __raygen__camera_dome_master_stereo_dof()
static __device__ __forceinline__ float fog_coord(float3 hit_point)
__global__ void __anyhit__shadow_opaque()
__global__ void __raygen__camera_perspective_stereo_dof()
__global__ void __raygen__camera_dome_master()
float3 * dir_lights
list of directional light directions
custom prim cyliner
float bg_grad_botval
miss background gradient bottom value
struct tachyonLaunchParams::@3 scene
static int __forceinline__ __device__ subframe_count()
__device__ void sphere_fade_and_clip(const float3 &hit_point, const float3 &cam_pos, float fade_start, float fade_end, float &alpha)
enable alpha transparency
__host__ __device__ float dot(const float3 &a, const float3 &b)
static __device__ __inline__ void tachyon_camera_dome_general()
__global__ void __raygen__camera_equirectangular_dof()
__global__ void __raygen__camera_cubemap()
static __host__ __device__ __inline__ void get_shadevars_sphere_array(const GeomSBTHG &sbtHG, float3 &shading_normal)
#define TACHYON_RAY_STEP
__global__ void __intersection__ring_array()
CylinderArraySBT cyl
#define TACHYON_RAY_STEP2
static __device__ void shader_template(float3 prim_color, float3 N, float p_Ka, float p_Kd, float p_Ks, float p_phong_exp, float p_reflectivity, float p_opacity, float p_outline, float p_outlinewidth, int p_transmode)
material-specific shading property
int2 size
framebuffer size
float3 bg_color_grad_bot
miss background gradient (bottom)
static __host__ __device__ __inline__ void get_shadevars_quadmesh(const GeomSBTHG &sbtHG, float3 &hit_color, float3 &shading_normal)
static __forceinline__ __device__ float4 linear_to_sRGB_approx_20(const float4 &linear)
__global__ void __raygen__camera_cubemap_dof()
any-hit traversal reversal
__global__ void __miss__radiance_gradient_bg_sky_sphere()
__global__ void __anyhit__radiance_nop()
static __forceinline__ __device__ uint32_t getPayloadAAsample()
radiance PRD aasample count is stored in ray payload register 2
ConeArraySBT cone
__global__ void __raygen__camera_orthographic_stereo_dof()
int ao_samples
number of AO samples per AA ray
__global__ void __intersection__quadmesh()
static __host__ __device__ __inline__ void get_shadevars_cone_array(const GeomSBTHG &sbtHG, float3 &shading_normal)
__global__ void __raygen__camera_orthographic_dof()
int subframe_index
accumulation subframe index
normal radiance rays
__global__ void __raygen__camera_perspective_dof()
__host__ __device__ float3 cross(const float3 &a, const float3 &b)
calculate the cross product between vectors a and b.
__global__ void __raygen__camera_octahedral()
float bg_grad_noisemag
miss background gradient noise magnitude
int aa_samples
AA samples per launch.
static __device__ __inline__ void shade_light(float3 &result, float3 &hit_point, float3 &N, float3 &L, float p_Kd, float p_Ks, float p_phong_exp, float3 &col, float3 &phongcol, float shadow_tmax)
scene-wide shading property
static __device__ __inline__ void tachyon_camera_equirectangular_general()
OptixTraversableHandle traversable
global OptiX scene traversable handle
__global__ void __raygen__camera_equirectangular()
float3 bg_color_grad_top
miss background gradient (top)
float clipview_end
clipping sphere/plane end coord
static __device__ __inline__ void tachyon_camera_oculus_rift_general()
struct tachyonLaunchParams::@2 frame
custom prim sphere
__host__ __device__ float length(const float3 &v)
float clipview_start
clipping sphere/plane start coord
static __device__ __inline__ void tachyon_camera_orthographic_general()
float opacity
surface opacity
__device__ void ray_sphere_clip_interval(float3 ray_origin, float3 ray_direction, float3 center, float rad, float2 &tinterval)
int colorspace
output colorspace
struct tachyonLaunchParams::@5 cam
int num_dir_lights
directional light count
float3 * pos_lights
list of positional light positions
#define UINT32_RAND_MAX_INV
__global__ void __miss__shadow_nop()
custom prim quadrilateral
__global__ void __raygen__camera_octahedral_dof()
__global__ void __raygen__camera_dome_master_stereo()
__global__ void __raygen__camera_cubemap_stereo()
rt_material * materials
device memory material array
#define M_PIf
custom prim cone
__global__ void __raygen__camera_oculus_rift_stereo()
__global__ void __raygen__camera_perspective_stereo()
custom prim quadrilateral
float3 bg_color
miss background color
custom prim cone
__global__ void __raygen__camera_oculus_rift_dof()
int num_pos_lights
positional light count
custom prim ring
float ao_ambient
AO ambient factor.
float3 uniform_color
uniform color for entire sphere array
float4 * accum_buffer
32-bit FP RGBA accumulation buffer
__global__ void __raygen__camera_octahedral_stereo_dof()
float stereo_eyesep
stereo eye separation, in world coords
__global__ void __miss__radiance_gradient_bg_sky_plane()
int clipview_mode
VR clipping view on/off.
static __device__ __inline__ int tachyon1DLaunchIndex(void)
Helper function to return 1-D framebuffer offset computed from the current thread&#39;s launch_index...
int max_depth
global max ray tracing recursion depth
enable tex cutout transparency
__global__ void __raygen__camera_orthographic()
static __device__ __inline__ void tachyon_camera_cubemap_general()
custom prim sphere
float epsilon
global epsilon value
RingArraySBT ring
total count of ray types
__global__ void __raygen__camera_cubemap_stereo_dof()
static __host__ __device__ __inline__ void get_shadevars_cylinder_array(const GeomSBTHG &sbtHG, float3 &shading_normal)
static __forceinline__ __device__ float4 linear_to_sRGB(const float4 &lin)