raytracer/doxygen/TachyonOptiXShaders_8h_source.html

 /*
  * TachyonOptiXShaders.h - prototypes for OptiX PTX shader routines
  *
  * (C) Copyright 2013-2022 John E. Stone
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * $Id: TachyonOptiXShaders.h,v 1.67 2022/04/19 02:54:24 johns Exp $
  *
  */

 //
 // This is a second generation of the Tachyon implementation for OptiX.
 // The new implementation favors the strengths of OptiX 7, and uses
 // OptiX ray payload registers, direct CUDA interoperability and advanced
 // CUDA features for both performance and maintainability.
 //
 // This software and its line of antecedants are described in:
 //   "Multiscale modeling and cinematic visualization of photosynthetic
 //    energy conversion processes from electronic to cell scales"
 //    M. Sener, S. Levy, J. E. Stone, AJ Christensen, B. Isralewitz,
 //    R. Patterson, K. Borkiewicz, J. Carpenter, C. N. Hunter,
 //    Z. Luthey-Schulten, D. Cox.
 //    J. Parallel Computing, 102, pp. 102698, 2021.
 //    https://doi.org/10.1016/j.parco.2020.102698
 //
 //   "Omnidirectional Stereoscopic Projections for VR"
 //    J. E. Stone.  In, William R. Sherman, editor,
 //    VR Developer Gems, Taylor and Francis / CRC Press, Chapter 24, 2019.
 //    https://www.taylorfrancis.com/chapters/edit/10.1201/b21598-24/omnidirectional-stereoscopic-projections-vr-john-stone
 //
 //   "Interactive Ray Tracing Techniques for
 //    High-Fidelity Scientific Visualization"
 //    J. E. Stone. In, Eric Haines and Tomas Akenine-M�ller, editors,
 //    Ray Tracing Gems, Apress, Chapter 27, pp. 493-515, 2019.
 //    https://link.springer.com/book/10.1007/978-1-4842-4427-2
 //
 //   "A Planetarium Dome Master Camera"
 //    J. E. Stone.  In, Eric Haines and Tomas Akenine-M�ller, editors,
 //    Ray Tracing Gems, Apress, Chapter 4, pp. 49-60, 2019.
 //    https://link.springer.com/book/10.1007/978-1-4842-4427-2
 //
 //   "Immersive Molecular Visualization with Omnidirectional
 //    Stereoscopic Ray Tracing and Remote Rendering"
 //    J. E. Stone, W. R. Sherman, and K. Schulten.
 //    High Performance Data Analysis and Visualization Workshop,
 //    2016 IEEE International Parallel and Distributed Processing
 //    Symposium Workshops (IPDPSW), pp. 1048-1057, 2016.
 //    http://dx.doi.org/10.1109/IPDPSW.2016.121
 //
 //   "Atomic Detail Visualization of Photosynthetic Membranes with
 //    GPU-Accelerated Ray Tracing"
 //    J. E. Stone, M. Sener, K. L. Vandivort, A. Barragan, A. Singharoy,
 //    I. Teo, J. V. Ribeiro, B. Isralewitz, B. Liu, B.-C. Goh, J. C. Phillips,
 //    C. MacGregor-Chatwin, M. P. Johnson, L. F. Kourkoutis, C. N. Hunter,
 //    K. Schulten
 //    J. Parallel Computing, 55:17-27, 2016.
 //    http://dx.doi.org/10.1016/j.parco.2015.10.015
 //
 //   "GPU-Accelerated Molecular Visualization on
 //    Petascale Supercomputing Platforms"
 //    J. E. Stone, K. L. Vandivort, and K. Schulten.
 //    UltraVis'13: Proceedings of the 8th International Workshop on
 //    Ultrascale Visualization, pp. 6:1-6:8, 2013.
 //    http://dx.doi.org/10.1145/2535571.2535595
 //
 //    "An Efficient Library for Parallel Ray Tracing and Animation"
 //    John E. Stone.  Master's Thesis, University of Missouri-Rolla,
 //    Department of Computer Science, April 1998
 //    https://scholarsmine.mst.edu/masters_theses/1747
 //
 //    "Rendering of Numerical Flow Simulations Using MPI"
 //    J. Stone and M. Underwood.
 //    Second MPI Developers Conference, pages 138-141, 1996.
 //    http://dx.doi.org/10.1109/MPIDC.1996.534105
 //

 #ifndef TACHYONOPTIXSHADERS_H
 #define TACHYONOPTIXSHADERS_H

 #if 0
 #define TACHYON_RAYSTATS 1
 #endif

 #if OPTIX_VERSION >= 70300
 #define TACHYON_OPTIXDENOISER 1
 #endif

 // enable use of geometry flags to accelerate various work
 #define TACHYON_USE_GEOMFLAGS 1

 //
 // Constants shared by both host and device code
 //
 #define RT_DEFAULT_MAX 1e27f

 //
 // Beginning of OptiX data structures
 //

 // Enable reversed traversal of any-hit rays for shadows/AO.
 // This optimization yields a 20% performance gain in many cases.
 // #define USE_REVERSE_SHADOW_RAYS 1

 // Use reverse rays by default rather than only when enabled interactively
 // #define USE_REVERSE_SHADOW_RAYS_DEFAULT 1
 enum RtShadowMode {
   RT_SHADOWS_OFF=0,
   RT_SHADOWS_ON=1,
   RT_SHADOWS_ON_REVERSE=2
 };

 enum RtDenoiserMode {
   RT_DENOISER_OFF=0,
   RT_DENOISER_ON=1,
 };

 enum RtTonemapMode {
   RT_TONEMAP_CLAMP=0,
   RT_TONEMAP_ACES,
   RT_TONEMAP_REINHARD,
   RT_TONEMAP_REINHARD_EXT,
   RT_TONEMAP_REINHARD_EXT_L,
   RT_TONEMAP_COUNT
 };

 enum RayType {
   RT_RAY_TYPE_RADIANCE=0,
   RT_RAY_TYPE_SHADOW=1,
   RT_RAY_TYPE_COUNT
 };

 //
 // OptiX 7.x geometry type-associated "hit kind" enums
 //
 enum RtHitKind {
   RT_HIT_HWTRIANGLE=1,

   // XXX custom prims offset to start at 2 (see below)
   RT_HIT_CONE,
   RT_HIT_CYLINDER,
   RT_HIT_QUAD,
   RT_HIT_RING,
   RT_HIT_SPHERE,
   RT_HIT_CURVE,
 };


 // simplify runtime code for OptiX 7.0.0
 #if defined(OPTIX_PRIMITIVE_TYPE_CUSTOM)
 #define RT_CUSTPRIM    (OPTIX_PRIMITIVE_TYPE_CUSTOM << 16)
 #define RT_TRI_BUILTIN (OPTIX_PRIMITIVE_TYPE_TRIANGLE << 16)
 #else
 #define RT_CUSTPRIM    0 // OptiX 7.0.0
 #define RT_TRI_BUILTIN OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE
 #endif

 enum RtMergedPrimKind {
   //
   // Tachyon custom primitives:
   //   XXX to prevent the triangle front/back hit kindl ow-bit masking scheme
   //       (see below) from interfering with the custom prim types,
   //       the lowest byte of their enums must start at values above 0x02
   RT_PRM_CONE       = RT_CUSTPRIM | RT_HIT_CONE,
   RT_PRM_CYLINDER   = RT_CUSTPRIM | RT_HIT_CYLINDER,
   RT_PRM_QUAD       = RT_CUSTPRIM | RT_HIT_QUAD,
   RT_PRM_RING       = RT_CUSTPRIM | RT_HIT_RING,
   RT_PRM_SPHERE     = RT_CUSTPRIM | RT_HIT_SPHERE,

   //
   // OptiX 7.x built-in primitives
   //
   // XXX we handle both front+back face triangles with a single case by
   //     masking off the low bit from the hit kind value and the enums:
   RT_PRM_TRIANGLE   = RT_TRI_BUILTIN |
                        (0xFE & OPTIX_HIT_KIND_TRIANGLE_FRONT_FACE),

 #if OPTIX_VERSION >= 70400
   RT_PRM_CATMULLROM = (OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM << 16),
 #endif
 #if OPTIX_VERSION >= 70200
   RT_PRM_LINEAR     = (OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR << 16),
 #endif
 };


 // Enums used for custom primitive PGM indexing in SBT + GAS
 enum RtCustPrim {
   RT_CUST_PRIM_CONE=0,
   RT_CUST_PRIM_CYLINDER,
   RT_CUST_PRIM_QUAD,
   RT_CUST_PRIM_RING,
   RT_CUST_PRIM_SPHERE,
   RT_CUST_PRIM_COUNT
 };

 enum RtColorSpace {
   RT_COLORSPACE_LINEAR=0,
   RT_COLORSPACE_sRGB=1,
   RT_COLORSPACE_COUNT
 };

 enum RtTexFlags {
   RT_TEX_NONE=0,
   RT_TEX_COLORSPACE_LINEAR = 0,
   RT_TEX_COLORSPACE_sRGB   = 0x1,
   RT_TEX_ALPHA             = 0x2
 };

 enum RtMatFlags {
   RT_MAT_NONE     = 0,
   RT_MAT_ALPHA    = 0x1,
   RT_MAT_TEXALPHA = 0x2,
 };


 //
 // Images, Materials, Textures...
 //

 typedef struct {
   int texflags;
   float3 texgen_origin;
   float3 texgen_uaxis;
   float3 texgen_vaxis;
   float3 texgen_waxis;
   cudaArray_t d_img;
   cudaTextureObject_t tex;
   int userindex;
 } rt_texture;


 typedef struct {
   float opacity;
   float ambient;
   float diffuse;
   float specular;
   float shininess;
   float reflectivity;
   float outline;
   float outlinewidth;
   int transmode;
   cudaTextureObject_t tex;
   int matflags;
   int userindex;
 } rt_material;


 //
 // Lighting data structures
 //
 typedef struct {
   float3 dir;
 //  float3 color; // not yet used
 } rt_directional_light;

 typedef struct {
   float3 pos;
 //  float3 color; // not yet used
 } rt_positional_light;


 //
 // Shader Binding Table (SBT) Data Structures
 //
 struct ConeArraySBT {
   float3 *base;
   float3 *apex;
   float  *baserad;
   float  *apexrad;
 };

 struct CurveArraySBT {
   float3 *vertices;
   float  *vertradii;
   int    *segindices;
 };

 struct CylinderArraySBT {
   float3 *start;
   float3 *end;
   float  *radius;
 };

 struct QuadMeshSBT {
   float3 *vertices;
   int4   *indices;
   float3 *normals;
   uint4  *packednormals;
   float3 *vertcolors3f;
   uchar4 *vertcolors4u;
 };

 struct RingArraySBT {
   float3 *center;
   float3 *norm;
   float  *inrad;
   float  *outrad;
 };

 struct SphereArraySBT {
   float4 *PosRadius;
 };

 struct TriMeshSBT {
   float3 *vertices;
   int3   *indices;
   float3 *normals;
   uint4  *packednormals;
   float3 *vertcolors3f;
   uchar4 *vertcolors4u;
   float2 *tex2d;
   float3 *tex3d;
 };

 struct GeomSBTHG {
 #if defined(TACHYON_USE_GEOMFLAGS)
   // XXX alpha/opacity AH optimization flags to skip material fetching
   int geomflags;
 #endif
   float3 *prim_color;
   float3 uniform_color;
   int materialindex;

   union {
     ConeArraySBT cone;
     CurveArraySBT curve;
     CylinderArraySBT cyl;
     QuadMeshSBT quadmesh;
     RingArraySBT ring;
     SphereArraySBT sphere;
     TriMeshSBT trimesh;
   };
 };


 struct __align__( OPTIX_SBT_RECORD_ALIGNMENT ) HGRecord {
   __align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE];
   GeomSBTHG data;
 };

 struct HGRecordGroup {
   HGRecord radiance;
   HGRecord shadow;
 };


 struct __align__( OPTIX_SBT_RECORD_ALIGNMENT ) ExceptionRecord {
   __align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE];
   void *data; // dummy value
 };

 struct __align__( OPTIX_SBT_RECORD_ALIGNMENT ) RaygenRecord {
   __align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE];
   void *data; // dummy value
 };

 struct __align__( OPTIX_SBT_RECORD_ALIGNMENT ) MissRecord {
   __align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE];
   void *data; // dummy value
 };


 struct tachyonLaunchParams {
   struct {
     int2 size;
     int subframe_index;
     int update_colorbuffer;
     int fb_clearall;
     int colorspace;
     int tonemap_mode;
     float tonemap_exposure;
     uchar4 *framebuffer;

 #if defined(TACHYON_OPTIXDENOISER)
     // buffers required for denoising
     float4 *denoiser_colorbuffer;
     int denoiser_enabled;
 #endif

     float accum_normalize;
     float4 *accum_buffer;

 #if defined(TACHYON_RAYSTATS)
     uint4 *raystats1_buffer;
     uint4 *raystats2_buffer;
 #endif
   } frame;

   struct {
     float3 bg_color;
     float3 bg_color_grad_top;
     float3 bg_color_grad_bot;
     float3 bg_grad_updir;
     float  bg_grad_topval;
     float  bg_grad_botval;
     float  bg_grad_invrange;
     float  bg_grad_noisemag;
     int    fog_mode;
     float  fog_start;
     float  fog_end;
     float  fog_density;
     float  epsilon;
   } scene;

   struct {
     int shadows_enabled;
     int ao_samples;
     float ao_lightscale;
     float ao_ambient;
     float ao_direct;
     float ao_maxdist;
     int headlight_mode;
     int num_dir_lights;
     float3 *dir_lights;
     int num_pos_lights;
     float3 *pos_lights;
   } lights;

   struct {
     float3 pos;
     float3 U;
     float3 V;
     float3 W;
     float zoom;
     int   dof_enabled;
     float dof_aperture_rad;
     float dof_focal_dist;
     int   stereo_enabled;
     float stereo_eyesep;
     float stereo_convergence_dist;
   } cam;

   // VR HMD fade+clipping plane/sphere
   int clipview_mode;
   float clipview_start;
   float clipview_end;

   rt_material *materials;

   int max_depth;
   int max_trans;
   int aa_samples;

   OptixTraversableHandle traversable;
 };


 #ifndef M_PI
 #define M_PI 3.14159265358979323846
 #endif
 #ifndef M_PIf
 #define M_PIf 3.14159265358979323846f
 #endif

 //
 // Eliminate compiler warnings about any unused functions
 //
 //#pragma push
 // suppress "function was declared but never referenced warning"
 //#pragma nv_diag_suppress 177


 //
 // Vector math helper routines
 //

 //
 // float2 vector operators
 //
 inline __host__ __device__ float2 operator+(const float2& a, const float2& b) {
   return make_float2(a.x + b.x, a.y + b.y);
 }

 inline __host__ __device__ float2 operator+(const float2& a, const float s) {
   return make_float2(a.x + s, a.y + s);
 }

 inline __host__ __device__ float2 operator-(const float2& a, const float2& b) {
   return make_float2(a.x - b.x, a.y - b.y);
 }

 inline __host__ __device__ float2 operator-(const float2& a, const float s) {
   return make_float2(a.x - s, a.y - s);
 }

 inline __host__ __device__ float2 operator-(const float s, const float2& a) {
   return make_float2(s - a.x, s - a.y);
 }

 inline __host__ __device__ float2 operator*(const float2& a, const float2& b) {
   return make_float2(a.x * b.x, a.y * b.y);
 }

 inline __host__ __device__ float2 operator*(const float s, const float2& a) {
   return make_float2(a.x * s, a.y * s);
 }

 inline __host__ __device__ float2 operator*(const float2& a, const float s) {
   return make_float2(a.x * s, a.y * s);
 }

 inline __host__ __device__ void operator*=(float2& a, const float s) {
   a.x *= s; a.y *= s;
 }

 inline __host__ __device__ float2 operator/(const float s, const float2& a) {
   return make_float2(s/a.x, s/a.y);
 }


 //
 // float3 vector operators
 //
 inline __host__ __device__ float3 make_float3(const float s) {
   return make_float3(s, s, s);
 }

 inline __host__ __device__ float3 make_float3(const float4& a) {
   return make_float3(a.x, a.y, a.z);
 }

 inline __host__ __device__ float3 operator+(float3 a, float3 b) {
   return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
 }

 inline __host__ __device__ float3 operator-(const float3& a, const float3 &b) {
   return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
 }

 inline __host__ __device__ float3 operator-(const float3& a) {
   return make_float3(-a.x, -a.y, -a.z);
 }

 inline __host__ __device__ void operator+=(float3& a, const float3& b) {
   a.x += b.x; a.y += b.y; a.z += b.z;
 }

 inline __host__ __device__ float3 operator+(const float3& a, const float &b) {
   return make_float3(a.x + b, a.y + b, a.z + b);
 }

 inline __host__ __device__ float3 operator*(const float3& a, const float3 &b) {
   return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
 }

 inline __host__ __device__ float3 operator*(float s, const float3 &a) {
   return make_float3(s * a.x, s * a.y, s * a.z);
 }

 inline __host__ __device__ float3 operator*(const float3 &a, const float s) {
   return make_float3(a.x * s, a.y * s, a.z * s);
 }

 inline __host__ __device__ void operator*=(float3& a, const float s) {
   a.x *= s; a.y *= s; a.z *= s;
 }

 inline __host__ __device__ void operator*=(float3& a, const float3 &b) {
   a.x *= b.x; a.y *= b.y; a.z *= b.z;
 }

 inline __host__ __device__ float3 operator/(const float3 &a, const float3 &b) {
   return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
 }


 //
 // float4 vector operators
 //
 inline __host__ __device__ float4 make_float4(const float3 &a, const float &b) {
   return make_float4(a.x, a.y, a.z, b);
 }

 inline __host__ __device__ float4 make_float4(const float a) {
   return make_float4(a, a, a, a);
 }

 inline __host__ __device__ void operator+=(float4& a, const float4& b) {
   a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w;
 }

 inline __host__ __device__ float4 operator*(const float4& a, const float s) {
   return make_float4(a.x * s, a.y * s, a.z * s, a.w * s);
 }

 inline __host__ __device__ void operator*=(float4& a, const float &b) {
   a.x *= b; a.y *= b; a.z *= b; a.w *= b;
 }


 //
 // operators with subsequent type conversions
 //
 inline __host__ __device__ float3 operator*(char4 a, const float s) {
   return make_float3(s * a.x, s * a.y, s * a.z);
 }

 inline __host__ __device__ float3 operator*(uchar4 a, const float s) {
   return make_float3(s * a.x, s * a.y, s * a.z);
 }


 //
 // math fctns...
 //
 inline __host__ __device__ float3 fabsf(const float3& a) {
   return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
 }

 inline __host__ __device__ float3 fmaxf(const float3& a, const float3& b) {
   return make_float3(fmaxf(a.x, b.x), fmaxf(a.y, b.y), fmaxf(a.z, b.z));
 }

 inline __host__ __device__ float fmaxf(const float3& a) {
   return fmaxf(fmaxf(a.x, a.y), a.z);
 }

 inline __host__ __device__ float dot(const float3 & a, const float3 & b) {
   return a.x*b.x + a.y*b.y + a.z*b.z;
 }

 inline __host__ __device__ float dot(const float4 & a, const float4 & b) {
   return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w;
 }

 inline __host__ __device__ float length(const float3 & v) {
   return sqrtf(dot(v, v));
 }


 inline __host__ __device__ float3 normalize(const float3 & v) {
 #if defined(__CUDACC__) || defined(__NVCC__)
   float invlen = rsqrtf(dot(v, v));
 #else
   float invlen = 1.0f / sqrtf(dot(v, v));
 #endif
   float3 out;
   out.x = v.x * invlen;
   out.y = v.y * invlen;
   out.z = v.z * invlen;
   return out;
 }


 inline __host__ __device__ float3 normalize_len(const float3 v, float &l) {
   l = length(v);
   float invlen = 1.0f / l;
   float3 out;
   out.x = v.x * invlen;
   out.y = v.y * invlen;
   out.z = v.z * invlen;
   return out;
 }


 inline __host__ __device__ float3 normalize_invlen(const float3 v, float &invlen) {
 #if defined(__CUDACC__) || defined(__NVCC__)
   invlen = rsqrtf(dot(v, v));
 #else
   invlen = 1.0f / sqrtf(dot(v, v));
 #endif
   float3 out;
   out.x = v.x * invlen;
   out.y = v.y * invlen;
   out.z = v.z * invlen;
   return out;
 }


 inline __host__ __device__ float3 cross(const float3 & a, const float3 & b) {
   float3 out;
   out.x =  a.y * b.z - b.y * a.z;
   out.y = -a.x * b.z + b.x * a.z;
   out.z =  a.x * b.y - b.x * a.y;
   return out;
 }


 inline __host__ __device__ float3 reflect(const float3& i, const float3& n) {
   return i - 2.0f * n * dot(n, i);
 }


 inline __host__ __device__ float3 faceforward(const float3& n, const float3& i,
                                               const float3& nref) {
   return n * copysignf(1.0f, dot(i, nref));
 }


 //
 // PRNGs
 //

 //
 // Various random number routines
 //   https://en.wikipedia.org/wiki/List_of_random_number_generators
 //

 #define UINT32_RAND_MAX     4294967296.0f      // max uint32 random value
 #define UINT32_RAND_MAX_INV 2.3283064365e-10f  // normalize uint32 RNs

 //
 // Survey of parallel RNGS suited to GPUs, by L'Ecuyer et al.:
 //   Random numbers for parallel computers: Requirements and methods,
 //   with emphasis on GPUs.
 //   Pierre L'Ecuyer, David Munger, Boris Oreshkina, and Richard Simard.
 //   Mathematics and Computers in Simulation 135:3-17, 2017.
 //   https://doi.org/10.1016/j.matcom.2016.05.005
 //
 // Counter-based RNGs introduced by Salmon @ D.E. Shaw Research:
 //   "Parallel random numbers: as easy as 1, 2, 3", by Salmon et al.,
 //    D. E. Shaw Research:
 //   http://doi.org/10.1145/2063384.2063405
 //   https://www.thesalmons.org/john/random123/releases/latest/docs/index.html
 //   https://en.wikipedia.org/wiki/Counter-based_random_number_generator_(CBRNG)
 //


 //
 // Quick and dirty 32-bit LCG random number generator [Fishman 1990]:
 //   A=1099087573 B=0 M=2^32
 //   Period: 10^9
 // Fastest gun in the west, but fails many tests after 10^6 samples,
 // and fails all statistics tests after 10^7 samples.
 // It fares better than the Numerical Recipes LCG.  This is the fastest
 // power of two rand, and has the best multiplier for 2^32, found by
 // brute force[Fishman 1990].  Test results:
 //   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/testu01.pdf
 //   http://www.shadlen.org/ichbin/random/
 //
 static __host__ __device__ __inline__
 uint32_t qnd_rng(uint32_t &idum) {
   idum *= 1099087573;
   return idum; // already 32-bits, no need to mask result
 }


 //
 // Middle Square Weyl Sequence ("msws")
 //   This is an improved variant of von Neumann's middle square RNG
 //   that uses Weyl sequences to provide a long period.  Claimed as
 //   fastest traditional seeded RNG that passes statistical tests.
 //   V5: Bernard Widynski, May 2020.
 //   https://arxiv.org/abs/1704.00358
 //
 //   Additional notes and commentary:
 //     https://en.wikipedia.org/wiki/Middle-square_method
 //     https://pthree.org/2018/07/30/middle-square-weyl-sequence-prng/
 //
 //   Reported to passes both BigCrush and PractRand tests:
 //     "An Empirical Study of Non-Cryptographically Secure
 //      Pseudorandom Number Generators," M. Singh, P. Singh and P. Kumar,
 //      2020 International Conference on Computer Science, Engineering
 //      and Applications (ICCSEA), 2020,
 //      http://doi.org/10.1109/ICCSEA49143.2020.9132873
 //
 static __host__ __device__ __inline__
 uint32_t msws_rng(uint64_t &x, uint64_t &w) {
   const uint64_t s = 0xb5ad4eceda1ce2a9;
   x *= x;                // square the value per von Neumann's RNG
   w += s;                // add in Weyl sequence for longer period
   x += w;                // apply to x
   x = (x>>32) | (x<<32); // select "middle square" as per von Neumann's RNG
   return x;              // implied truncation to lower 32-bit result
 }


 //
 // Squares: A Fast Counter-Based RNG
 //   This is a counter-based RNG based on John von Neumann's
 //   Middle Square RNG, with the Weyl sequence added to provide a long period.
 //   V3: Bernard Widynski, Nov 2020.
 //   https://arxiv.org/abs/2004.06278
 //
 // This RNG claims to outperform all of the original the counter-based RNGs
 // in "Parallel random numbers: as easy as 1, 2, 3",
 //   by Salmon et al., http://doi.org/10.1145/2063384.2063405
 //   https://en.wikipedia.org/wiki/Counter-based_random_number_generator_(CBRNG)
 // That being said, key generation technique is important in this case.
 //
 #define SQUARES_RNG_KEY1 0x1235d7fcb4dfec21  // a few good keys...
 #define SQUARES_RNG_KEY2 0x418627e323f457a1  // a few good keys...
 #define SQUARES_RNG_KEY3 0x83fc79d43614975f  // a few good keys...
 #define SQUARES_RNG_KEY4 0xc62f73498cb654e3  // a few good keys...

 // Template to allow compile-time selection of number of rounds (2, 3, 4).
 // Roughly 5 integer ALU operations per round, 4 rounds is standard.
 template<unsigned int ROUNDS> static __host__ __device__ __inline__
 uint32_t squares_rng(uint64_t counter, uint64_t key) {
   uint64_t x, y, z;
   y = x = counter * key;
   z = x + key;

   x = x*x + y;                // round 1, middle square, add Weyl seq
   x = (x>>32) | (x<<32);      // round 1, bit rotation

   x = x*x + z;                // round 2, middle square, add Weyl seq
   if (ROUNDS == 2) {
     return x >> 32;           // round 2, upper 32-bits are bit-rotated result
   } else {
     x = (x>>32) | (x<<32);    // round 2, bit rotation

     x = x*x + y;              // round 3, middle square, add Weyl seq
     if (ROUNDS == 3) {
       return x >> 32;         // round 3, upper 32-bits are bit-rotated result
     } else {
       x = (x>>32) | (x<<32);  // round 3, bit rotation

       x = x*x + z;            // round 4, middle square, add Weyl seq
       return x >> 32;         // round 4, upper 32-bits are bit-rotated result
     }
   }
 }


 //
 // Hashing based PRNGs
 //


 //
 // TEA, a tiny encryption algorithm.
 // D. Wheeler and R. Needham, 2nd Intl. Workshop Fast Software Encryption,
 // LNCS, pp. 363-366, 1994.
 //
 // GPU Random Numbers via the Tiny Encryption Algorithm
 // F. Zafar, M. Olano, and A. Curtis.
 // HPG '10 Proceedings of the Conference on High Performance Graphics,
 // pp. 133-141, 2010.
 // https://dl.acm.org/doi/10.5555/1921479.1921500
 //
 // Tea has avalanche effect in output from one bit input delta after 6 rounds
 //
 template<unsigned int ROUNDS> static __host__ __device__ __inline__
 unsigned int tea(uint32_t val0, uint32_t val1) {
   uint32_t v0 = val0;
   uint32_t v1 = val1;
   uint32_t s0 = 0;

   for (unsigned int n = 0; n < ROUNDS; n++) {
     s0 += 0x9e3779b9;
     v0 += ((v1<<4)+0xa341316c)^(v1+s0)^((v1>>5)+0xc8013ea4);
     v1 += ((v0<<4)+0xad90777d)^(v0+s0)^((v0>>5)+0x7e95761e);
   }

   return v0;
 }


 //
 // QRNGs
 //


 //
 // Low discrepancy sequences based on the Golden Ratio, described in
 // Golden Ratio Sequences for Low-Discrepancy Sampling,
 // Colas Schretter and Leif Kobbelt, pp. 95-104, JGT 16(2), 2012.
 //
 // Other useful online references:
 //   http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/
 //

 // compute Nth value in 1-D sequence
 static __device__ __inline__
 float goldenratioseq1d(int n) {
   const double g = 1.61803398874989484820458683436563;
   const double a1 = 1.0 / g;
   const double seed = 0.5;
   double ngold;
   ngold = (seed + (a1 * n));
   return ngold - trunc(ngold);
 }


 // incremental formulation to obtain the next value in the sequence
 static __device__ __inline__
 void goldenratioseq1d_incr(float &x) {
   const double g = 1.61803398874989484820458683436563;
   const double a1 = 1.0 / g;
   float ngold = x + a1;
   x = ngold - truncf(ngold);
 }


 // compute Nth point in 2-D sequence
 static __device__ __inline__
 void goldenratioseq2d(int n, float2 &xy) {
   const double g = 1.32471795724474602596;
   const double a1 = 1.0 / g;
   const double a2 = 1.0 / (g*g);
   const double seed = 0.5;
   double ngold;

   ngold = (seed + (a1 * n));
   xy.x = (float) (ngold - trunc(ngold));

   ngold = (seed + (a2 * n));
   xy.y = (float) (ngold - trunc(ngold));
 }


 // incremental formulation to obtain the next value in the sequence
 static __device__ __inline__
 void goldenratioseq2d_incr(float2 &xy) {
   const float g = 1.32471795724474602596;
   const float a1 = 1.0 / g;
   const float a2 = 1.0 / (g*g);
   float ngold;

   ngold = xy.x + a1;
   xy.x = (ngold - trunc(ngold));

   ngold = xy.y + a2;
   xy.y = (ngold - trunc(ngold));
 }


 // compute Nth point in 3-D sequence
 static __device__ __inline__
 void goldenratioseq3d(int n, float3 &xyz) {
   const double g = 1.22074408460575947536;
   const double a1 = 1.0 / g;
   const double a2 = 1.0 / (g*g);
   const double a3 = 1.0 / (g*g*g);
   const double seed = 0.5;
   double ngold;

   ngold = (seed + (a1 * n));
   xyz.x = (float) (ngold - trunc(ngold));

   ngold = (seed + (a2 * n));
   xyz.y = (float) (ngold - trunc(ngold));

   ngold = (seed + (a3 * n));
   xyz.z = (float) (ngold - trunc(ngold));
 }


 // incremental formulation to obtain the next value in the sequence
 static __device__ __inline__
 void goldenratioseq3d_incr(float3 &xyz) {
   const float g = 1.22074408460575947536;
   const float a1 = 1.0 / g;
   const float a2 = 1.0 / (g*g);
   const float a3 = 1.0 / (g*g*g);
   float ngold;

   ngold = xyz.x + a1;
   xyz.x = (ngold - trunc(ngold));

   ngold = xyz.y + a2;
   xyz.y = (ngold - trunc(ngold));

   ngold = xyz.z + a3;
   xyz.z = (ngold - trunc(ngold));
 }


 // compute Nth point in 4-D sequence
 static __device__ __inline__
 void goldenratioseq4d(int n, float2 &xy1, float2 &xy2) {
   const double g = 1.167303978261418740;
   const double a1 = 1.0 / g;
   const double a2 = 1.0 / (g*g);
   const double a3 = 1.0 / (g*g*g);
   const double a4 = 1.0 / (g*g*g*g);
   const double seed = 0.5;
   double ngold;

   ngold = (seed + (a1 * n));
   xy1.x = (float) (ngold - trunc(ngold));

   ngold = (seed + (a2 * n));
   xy1.y = (float) (ngold - trunc(ngold));

   ngold = (seed + (a3 * n));
   xy2.x = (float) (ngold - trunc(ngold));

   ngold = (seed + (a4 * n));
   xy2.y = (float) (ngold - trunc(ngold));
 }


 // incremental formulation to obtain the next value in the sequence
 static __device__ __inline__
 void goldenratioseq4d_incr(float2 &xy1, float2 &xy2) {
   const double g = 1.167303978261418740;
   const float a1 = 1.0 / g;
   const float a2 = 1.0 / (g*g);
   const float a3 = 1.0 / (g*g*g);
   const float a4 = 1.0 / (g*g*g*g);
   float ngold;

   ngold = xy1.x + a1;
   xy1.x = (ngold - trunc(ngold));

   ngold = xy1.y + a2;
   xy1.y = (ngold - trunc(ngold));

   ngold = xy2.x + a3;
   xy2.x = (ngold - trunc(ngold));

   ngold = xy2.y + a4;
   xy2.y = (ngold - trunc(ngold));
 }


 //
 // stochastic sampling helper routines
 //

 // Generate an offset to jitter AA samples in the image plane
 static __device__ __inline__
 void jitter_offset2f(unsigned int &pval, float2 &xy) {
   xy.x = (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 0.5f;
   xy.y = (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 0.5f;
 }


 // Generate an offset to jitter DoF samples in the Circle of Confusion
 static __device__ __inline__
 void jitter_disc2f(unsigned int &pval, float2 &xy, float radius) {
 #if 1
   // Since the GPU RT currently uses super cheap/sleazy LCG RNGs,
   // it is best to avoid using sample picking, which can fail if
   // we use a multiply-only RNG and we hit a zero in the PRN sequence.
   // The special functions are slow, but have bounded runtime and
   // minimal branch divergence.
   float   r=(qnd_rng(pval) * UINT32_RAND_MAX_INV);
   float phi=(qnd_rng(pval) * UINT32_RAND_MAX_INV) * 2.0f * M_PIf;
   __sincosf(phi, &xy.x, &xy.y); // fast approximation
   xy *= sqrtf(r) * radius;
 #else
   // Pick uniform samples that fall within the disc --
   // this scheme can hang in an endless loop if a poor quality
   // RNG is used and it gets stuck in a short PRN sub-sequence
   do {
     xy.x = 2.0f * (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 1.0f;
     xy.y = 2.0f * (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 1.0f;
   } while ((xy.x*xy.x + xy.y*xy.y) > 1.0f);
   xy *= radius;
 #endif
 }


 // Generate an offset to jitter AA samples in the image plane using
 // a low-discrepancy sequence
 static __device__ __inline__
 void jitter_offset2f_qrn(float2 qrnxy, float2 &xy) {
   xy = qrnxy - make_float2(0.5f, 0.5f);
 }


 // Generate an offset to jitter DoF samples in the Circle of Confusion,
 // using low-discrepancy sequences based on the Golden Ratio
 static __device__ __inline__
 void jitter_disc2f_qrn(float2 &qrnxy, float2 &xy, float radius) {
   goldenratioseq2d_incr(qrnxy);
   float   r=qrnxy.x;
   float phi=qrnxy.y * 2.0f * M_PIf;
   __sincosf(phi, &xy.x, &xy.y); // fast approximation
   xy *= sqrtf(r) * radius;
 }


 //
 // Protect functions that are only GPU-callable, e.g., those that
 // use GPU-specific intrinsics such as __saturatef() or others.
 //
 #if defined(TACHYON_INTERNAL)

 // Generate a randomly oriented ray
 static __device__ __inline__
 void jitter_sphere3f(unsigned int &pval, float3 &dir) {
 #if 1
   //
   // Use GPU fast/approximate math routines
   //
   /* Archimedes' cylindrical projection scheme       */
   /* generate a point on a unit cylinder and project */
   /* back onto the sphere.  This approach is likely  */
   /* faster for SIMD hardware, despite the use of    */
   /* transcendental functions.                       */
   float u1 = qnd_rng(pval) * UINT32_RAND_MAX_INV;
   dir.z = 2.0f * u1 - 1.0f;
   float R = __fsqrt_rn(1.0f - dir.z*dir.z);  // fast approximation
   float u2 = qnd_rng(pval) * UINT32_RAND_MAX_INV;
   float phi = 2.0f * M_PIf * u2;
   float sinphi, cosphi;
   __sincosf(phi, &sinphi, &cosphi); // fast approximation
   dir.x = R * cosphi;
   dir.y = R * sinphi;
 #elif 1
   /* Archimedes' cylindrical projection scheme       */
   /* generate a point on a unit cylinder and project */
   /* back onto the sphere.  This approach is likely  */
   /* faster for SIMD hardware, despite the use of    */
   /* transcendental functions.                       */
   float u1 = qnd_rng(pval) * UINT32_RAND_MAX_INV;
   dir.z = 2.0f * u1 - 1.0f;
   float R = sqrtf(1.0f - dir.z*dir.z);

   float u2 = qnd_rng(pval) * UINT32_RAND_MAX_INV;
   float phi = 2.0f * M_PIf * u2;
   float sinphi, cosphi;
   sincosf(phi, &sinphi, &cosphi);
   dir.x = R * cosphi;
   dir.y = R * sinphi;
 #else
   /* Marsaglia's uniform sphere sampling scheme           */
   /* In order to correctly sample a sphere, using rays    */
   /* generated randomly within a cube we must throw out   */
   /* direction vectors longer than 1.0, otherwise we'll   */
   /* oversample the corners of the cube relative to       */
   /* a true sphere.                                       */
   float len;
   float3 d;
   do {
     d.x = (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 0.5f;
     d.y = (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 0.5f;
     d.z = (qnd_rng(pval) * UINT32_RAND_MAX_INV) - 0.5f;
     len = dot(d, d);
   } while (len > 0.250f);
   float invlen = rsqrtf(len);

   /* finish normalizing the direction vector */
   dir = d * invlen;
 #endif
 }


 //
 // Spherical Fibonacci pattern to create a uniformly
 // distributed sample pattern on a sphere.
 //   Spherical Fibonacci mapping.
 //   B. Keinert, M. Innmann, M. S�nger, and M. Stamminger.
 //   ACM Transactions on Graphics, 34:193:1-193:7, 2015.
 //   http://doi.org/10.1145/2816795.2818131
 //
 static __device__ __inline__
 float3 sphericalFibonacci(float i, float totaln) {
   const float PHI = sqrtf(5.0f) * 0.5f + 0.5f;
   float fraction = (i * (PHI - 1.0f)) - floorf(i * (PHI - 1.0f));

   float phi = 2.0f * M_PI * fraction;
   float cosTheta = 1.0f - (2.0f * i + 1.0f) * (1.0f / totaln);
   float sinTheta = sqrt(__saturatef(1.0f - cosTheta * cosTheta));

   float cosPhi, sinPhi;
   sincosf(phi, &cosPhi, &sinPhi);
   return make_float3(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
 }

 #endif // TACHYON_INTERNAL


 //
 // Convert between 2-D planar coordinates and an octahedral mapping.
 // This is useful for both omnidirectional cameras and image formats,
 // and for surface normal compression/quantization.
 //
 // This implementation follows the method described here:
 //   "A Survey of Efficient Representations for Independent Unit Vectors",
 //   Cigolle et al., J. Computer Graphics Techniques 3(2), 2014.
 //   http://jcgt.org/published/0003/02/01/
 //
 // UNORM: convert internal SNORM output range [-1,1] to UNORM [0,1] range
 //        UNORM mode costs extra instructions
 //
 template <int UNORM>
 static __host__ __device__ __inline__ float2 OctEncode(float3 n) {
   const float invL1Norm = 1.0f / (fabsf(n.x) + fabsf(n.y) + fabsf(n.z));
   float2 projected;
   if (n.z < 0.0f) {
     projected = 1.0f - make_float2(fabsf(n.y), fabsf(n.x)) * invL1Norm;
     projected.x = copysignf(projected.x, n.x);
     projected.y = copysignf(projected.y, n.y);
   } else {
     projected = make_float2(n.x, n.y) * invL1Norm;
   }

   // convert from SNORM to UNORM
   if (UNORM)
     projected = projected * 0.5f + 0.5f; // convert to UNORM range [0,1]

   return projected;
 }


 //
 // XXX TODO: implement a high-precision OctPEncode() variant, based on
 //           floored snorms and an error minimization scheme using a
 //           comparison of internally decoded values for least error
 //

 //
 // Direct adaptation from Cigolle et al, with optional UNORM mode.
 //
 // UNORM: convert from UNORM input domain [0,1] to internal SNORM [-1,1] domain
 //        UNORM mode costs extra instructions
 //
 template <int UNORM>
 static __host__ __device__ __inline__ float3 OctDecode(float2 projected) {
   // convert from UNORM input domain to native SNORM internal domain
   if (UNORM)
     projected *= 2.0f - 1.0f; // convert to SNORM range [-1,1]

   float3 n = make_float3(projected.x,
                          projected.y,
                          1.0f - (fabsf(projected.x) + fabsf(projected.y)));
   if (n.z < 0.0f) {
     float oldX = n.x;
     n.x = copysignf(1.0f - fabsf(n.y), oldX);
     n.y = copysignf(1.0f - fabsf(oldX), n.y);
   }

   return n;
 }


 //
 // Protect functions that are only GPU-callable, e.g., those that
 // use GPU-specific intrinsics such as __saturatef() or others.
 //
 #if defined(TACHYON_INTERNAL)

 //
 // Faster version by Rune Stubbe (2017) that avoids branching in decode:
 //   https://twitter.com/Stubbesaurus/status/937994790553227264
 //   https://twitter.com/Stubbesaurus/status/937994790553227264/photo/1
 // https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding
 // Another variant:
 // http://johnwhite3d.blogspot.com/2017/10/signed-octahedron-normal-encoding.html
 // UNORM: convert from UNORM input domain [0,1] to internal SNORM [-1,1] domain
 //        UNORM mode costs extra instructions
 //
 template <int UNORM>
 static __device__ __inline__ float3 OctDecode_fast(float2 projected) {
   // convert from UNORM input domain to native SNORM internal domain
   if (UNORM)
     projected *= 2.0f - 1.0f; // convert to SNORM range [-1,1]

   float3 n = make_float3(projected.x,
                          projected.y,
                          1.0f - fabsf(projected.x) - fabsf(projected.y));
   float t = __saturatef(-n.z); // or max(-n.z, 0.0)
   n.x += (n.x >= 0.0f) ? -t : t;
   n.y += (n.y >= 0.0f) ? -t : t;

   return n;
 }

 #endif // TACHYON_INTERNAL


 //
 // Methods for packing normals into a 4-byte quantity, such as a
 // [u]int or [u]char4, and similar.  See JCGT article by Cigolle et al.,
 // "A Survey of Efficient Representations for Independent Unit Vectors",
 // J. Computer Graphics Techniques 3(2), 2014.
 // http://jcgt.org/published/0003/02/01/
 //
 static __host__ __device__ __inline__ uint convfloat2uint32(float2 f2) {
   f2 = f2 * 0.5f + 0.5f;
   uint packed;
   packed = ((uint) (f2.x * 65535)) | ((uint) (f2.y * 65535) << 16);
   return packed;
 }

 static __host__ __device__ __inline__ float2 convuint32float2(uint packed) {
   float2 f2;
   f2.x = (float)((packed      ) & 0x0000ffff) / 65535;
   f2.y = (float)((packed >> 16) & 0x0000ffff) / 65535;
   return f2 * 2.0f - 1.0f;
 }


 #if 1

 //
 // oct32: 32-bit octahedral normal encoding using [su]norm16x2 quantization
 // Meyer et al., "On Floating Point Normal Vectors", In Proc. 21st
 // Eurographics Conference on Rendering.
 //   http://dx.doi.org/10.1111/j.1467-8659.2010.01737.x
 //
 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
   float2 octf2 = OctEncode<0>(normal);
   return convfloat2uint32(octf2);
 }

 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
   float2 octf2 = convuint32float2(packed);
   return OctDecode<0>(octf2);
 }

 #elif 0

 //
 // snorm10x3: signed 10-bit-per-component scalar unit real representation
 // Better representation than unorm.
 // Supported by most fixed-function graphics hardware.
 // https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_snorm.txt
 //   i=round(clamp(r,-1,1) * (2^(b-1) - 1)
 //   r=clamp(i/(2^(b-1) - 1), -1, 1)
 //

 #elif 1

 // OpenGL GLbyte signed quantization scheme
 //   i = r * (2^b - 1) - 0.5;
 //   r = (2i + 1)/(2^b - 1)
 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
   // conversion to GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
   const float3 N = normal * 127.5f - 0.5f;
   const char4 packed = make_char4(N.x, N.y, N.z, 0);
   return *((uint *) &packed);
 }

 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
   char4 c4norm = *((char4 *) &packed);

   // conversion from GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
   // float = (2c+1)/(2^8-1)
   const float ci2f = 1.0f / 255.0f;
   const float cn2f = 1.0f / 127.5f;
   float3 N = c4norm * cn2f + ci2f;

   return N;
 }
 #endif


 //
 // Device functions to convert between linear and sRGB colorspaces
 //
 // It's important to note that accurate conversions between
 // linear and sRGB color spaces require the use of
 // floating point or deep bit depth integer arithmetic.
 // We use the CUDA texturing hardware to perform sRGB to
 // linear color conversion during texture sampling.
 //
 // Some useful example results from improper conversion techniques:
 //   https://blog.demofox.org/2018/03/10/dont-convert-srgb-u8-to-linear-u8/
 //


 //
 // Conversion between sRGB and linear using the official equations
 //
 static __forceinline__ __device__
 float4 sRGB_to_linear(const float4 &rgba) {
   float4 lin;
   if (rgba.x <= 0.0404482362771082f) {
     lin.x = rgba.x * 0.0773993f; // divide by 12.92f;
   } else {
     lin.x = powf(((rgba.x + 0.055f)/1.055f), 2.4f);
   }

   if (rgba.y <= 0.0404482362771082f) {
     lin.y = rgba.y * 0.0773993f; // divide by 12.92f;
   } else {
     lin.y = powf(((rgba.y + 0.055f)/1.055f), 2.4f);
   }

   if (rgba.z <= 0.0404482362771082f) {
     lin.z = rgba.z * 0.0773993f; // divide by 12.92f;
   } else {
     lin.z = powf(((rgba.z + 0.055f)/1.055f), 2.4f);
   }

   lin.w = rgba.w; // alpha remains linear regardless of color space

   return lin;
 }


 //
 // Conversion between linear and sRGB using the official equations
 //
 static __forceinline__ __device__
 float4 linear_to_sRGB(const float4 &lin) {
   float4 rgba;
   if (lin.x > 0.0031308f) {
     rgba.x = 1.055f * (powf(lin.x, (1.0f / 2.4f))) - 0.055f;
   } else {
     rgba.x = 12.92f * lin.x;
   }

   if (lin.y > 0.0031308f) {
     rgba.y = 1.055f * (powf(lin.y, (1.0f / 2.4f))) - 0.055f;
   } else {
     rgba.y = 12.92f * lin.y;
   }

   if (lin.z > 0.0031308f) {
     rgba.z = 1.055f * (powf(lin.z, (1.0f / 2.4f))) - 0.055f;
   } else {
     rgba.z = 12.92f * lin.z;
   }

   rgba.w = lin.w; // alpha remains linear regardless of color space

   return rgba;
 }


 //
 // Fast, approximate conversion between linear and sRGB:
 //   https://excamera.com/sphinx/article-srgb.html
 //   http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
 //
 static __forceinline__ __device__
 float4 sRGB_to_linear_approx(const float4 &rgba) {
   float3 sRGB = make_float3(rgba);
   float3 lin = sRGB * (sRGB * (sRGB * 0.305306011f + 0.682171111f) + 0.012522878f);
   return make_float4(lin, rgba.w); // preserve linear alpha
 }


 //
 // Fast, approximate conversion between sRGB and linear:
 //   https://excamera.com/sphinx/article-srgb.html
 //   http://chilliant.blogspot.com/2012/08/srgb-approximations-for-hlsl.html
 //
 static __forceinline__ __device__
 float4 linear_to_sRGB_approx(const float4 &linear) {
   float3 lin = make_float3(linear);
   float3 S1 = make_float3(sqrtf(lin.x), sqrtf(lin.y), sqrtf(lin.z));
   float3 S2 = make_float3(sqrtf(S1.x), sqrtf(S1.y), sqrtf(S1.z));
   float3 S3 = make_float3(sqrtf(S2.x), sqrtf(S2.y), sqrtf(S2.z));
   float3 sRGB = 0.662002687f * S1 + 0.684122060f * S2
                 - 0.323583601f * S3 - 0.0225411470f * lin;
   return make_float4(sRGB, linear.w); // preserver linear alpha
 }


 //
 // Fastest low-approximate conversion between linear and sRGB (gamma 2.0):
 //
 static __forceinline__ __device__
 float4 sRGB_to_linear_approx_20(const float4 &rgba) {
   float3 sRGB = make_float3(rgba);
   return make_float4(sRGB * sRGB, rgba.w); // preserve linear alpha
 }


 //
 // Fastest low-approximate conversion between sRGB and linear (gamma 2.0):
 //
 static __forceinline__ __device__
 float4 linear_to_sRGB_approx_20(const float4 &linear) {
   float3 lin = make_float3(linear);
   float3 sRGB = make_float3(sqrtf(lin.x), sqrtf(lin.y), sqrtf(lin.z));
   return make_float4(sRGB, linear.w); // preserver linear alpha
 }


 //
 // Tone mapping and color grading device functions.
 // Useful references:
 //   Photographic Tone Reproduction for Digital Images
 //   E. Reinhard, M. Stark, P. Shirley, J. Ferwerda
 //   ACM Transactions on Graphics, 21(3) pp. 267-276, 2002.
 //   https://doi.org/10.1145/566654.566575
 //
 //   Tone Mapping of HDR Images: A Review
 //   Y. Salih, W. Md-Esa, A. Malik, N. Saad.
 //   http://doi.org/10.1109/ICIAS.2012.6306220
 //
 // Others:
 // http://filmicworlds.com/blog/filmic-tonemapping-operators/
 // http://filmicworlds.com/blog/filmic-tonemapping-with-piecewise-power-curves/
 // http://filmicworlds.com/blog/minimal-color-grading-tools/
 // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
 // https://bartwronski.com/2022/02/28/exposure-fusion-local-tonemapping-for-real-time-rendering/
 //   https://bartwronski.github.io/local_tonemapping_js_demo/


 //
 // Calculate relative luminance from linear RGB w/ perceptual coefficients:
 //   https://en.wikipedia.org/wiki/Relative_luminance
 //
 static __device__ __inline__
 float luminance(float3 c) {
   return dot(c, make_float3(0.2126f, 0.7152f, 0.0722f));;
 }


 //
 // Rescale RGB colors to achieve desired luminance
 //
 static __device__ __inline__
 float3 rescale_luminance(float3 c, float newluminance) {
   float l = luminance(c);
   return c * (newluminance / l);
 }


 //
 // ACES filmic tone mapping approximations:
 //   https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
 //   https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl
 //
 static __device__ __inline__
 float3 ACES_TMO(float3 c) {
   float3 num = c * (2.51f * c + make_float3(0.03f));
   float3 den = c * (2.43f * c + make_float3(0.59f)) + make_float3(0.14f);
   float3 t = num / den;

   return t; // clamping is deferred
 }


 //
 // Reinhard style tone mapping
 //
 static __device__ __inline__
 float3 reinhard_TMO(float3 c) {
   return c / (make_float3(1.0f) + c);
 }


 //
 // Extended Reinhard style tone mapping:
 //   https://64.github.io/tonemapping/
 //
 static __device__ __inline__
 float3 reinhard_extended_TMO(float3 c, float maxwhite) {
   float3 num = c * (make_float3(1.0f) + (c / make_float3(maxwhite * maxwhite)));
   return num / (make_float3(1.0f) + c);
 }


 //
 // Extended Reinhard style tone mapping applied to luminance:
 //   https://64.github.io/tonemapping/
 //
 static __device__ __inline__
 float3 reinhard_extended_luminance_TMO(float3 c, float maxL) {
   float oldL = luminance(c);
   float num = oldL * (1.0f + (oldL / (maxL * maxL)));
   float newL = num / (1.0f + oldL);
   return rescale_luminance(c, newL);
 }


 //
 // Protect functions that are only GPU-callable, e.g., those that
 // use GPU-specific intrinsics such as __saturatef() or others.
 //
 #if defined(TACHYON_INTERNAL)

 // clamp vector to range [0,1] using __saturatef() intrinsic
 static __device__ __inline__ float3 clamp_float3(const float3 &a) {
   return make_float3(__saturatef(a.x), __saturatef(a.y), __saturatef(a.z));
 }

 // clamp vector to range [0,1] using __saturatef() intrinsic
 static __device__ __inline__ float4 clamp_float4(const float4 &a) {
   return make_float4(__saturatef(a.x), __saturatef(a.y),
                      __saturatef(a.z), __saturatef(a.w));
 }


 //
 // Color conversion operations
 //

 static __device__ __inline__ uchar4 make_color_rgb4u(const float3& c) {
   return make_uchar4(static_cast<unsigned char>(__saturatef(c.x)*255.99f),
                      static_cast<unsigned char>(__saturatef(c.y)*255.99f),
                      static_cast<unsigned char>(__saturatef(c.z)*255.99f),
                      255u);
 }

 static __device__ __inline__ uchar4 make_color_rgb4u(const float4& c) {
   return make_uchar4(static_cast<unsigned char>(__saturatef(c.x)*255.99f),
                      static_cast<unsigned char>(__saturatef(c.y)*255.99f),
                      static_cast<unsigned char>(__saturatef(c.z)*255.99f),
                      static_cast<unsigned char>(__saturatef(c.w)*255.99f));
 }


 //
 // HDR tone mapping
 //
 static __inline__ __device__
 float4 tonemap_color(const float4 & colrgba4f, int tonemap_mode,
                      float tonemap_exposure, int colorspace) {
   float alpha = colrgba4f.w; // preserve linear alpha channel
   float3 color = make_float3(colrgba4f) * tonemap_exposure;

   switch (tonemap_mode) {
     case RT_TONEMAP_ACES:
       color = ACES_TMO(color);
       break;

     case RT_TONEMAP_REINHARD:
       color = reinhard_TMO(color);
       break;

     case RT_TONEMAP_REINHARD_EXT:
       color = reinhard_extended_TMO(color, 1.0f);
       break;

     case RT_TONEMAP_REINHARD_EXT_L:
       color = reinhard_extended_luminance_TMO(color, 1.0f);
       break;

     case RT_TONEMAP_CLAMP:
     default:
       break;
   }

   float4 outcolor = make_float4(color, alpha);

   // range clamping is deferred until storage format conversion
   return outcolor;
 }

 #endif // TACHYON_INTERNAL


 //
 // End of potentially unreferenced functions
 //
 //#pragma pop


 #endif // TACHYONOPTIXSHADERS_H
tea
static __host__ __device__ __inline__ unsigned int tea(uint32_t val0, uint32_t val1)
Definition: TachyonOptiXShaders.h:876

normalize
__host__ __device__ float3 normalize(const float3 &v)
Normalize input vector to unit length.
Definition: TachyonOptiXShaders.h:663

tachyonLaunchParams::U
float3 U
camera orthonormal U (right) axis
Definition: TachyonOptiXShaders.h:450

RtMatFlags
RtMatFlags
Definition: TachyonOptiXShaders.h:221

rescale_luminance
static __device__ __inline__ float3 rescale_luminance(float3 c, float newluminance)
Definition: TachyonOptiXShaders.h:1543

TriMeshSBT::vertcolors4u
uchar4 * vertcolors4u
unsigned char color representation
Definition: TachyonOptiXShaders.h:325

rt_texture::texgen_vaxis
float3 texgen_vaxis
world coordinate texgen V axis
Definition: TachyonOptiXShaders.h:237

QuadMeshSBT::packednormals
uint4 * packednormals
packed normals: ng [n0 n1 n2]
Definition: TachyonOptiXShaders.h:303

rt_material::outlinewidth
float outlinewidth
width of outline shading effect
Definition: TachyonOptiXShaders.h:254

goldenratioseq4d
static __device__ __inline__ void goldenratioseq4d(int n, float2 &xy1, float2 &xy2)
Definition: TachyonOptiXShaders.h:1004

tachyonLaunchParams::framebuffer
uchar4 * framebuffer
8-bit unorm RGBA framebuffer
Definition: TachyonOptiXShaders.h:401

tachyonLaunchParams::accum_normalize
float accum_normalize
precalc 1.0f / subframe_index
Definition: TachyonOptiXShaders.h:409

RT_DENOISER_ON
denosier on, std. impl.
Definition: TachyonOptiXShaders.h:126

rt_texture
void * rt_texture(SceneHandle sc, apitexture *apitex)
Translate a texture definition into the internal format used by Tachyon, and returns an opaque pointe...
Definition: api.c:933

RT_SHADOWS_ON
shadows on, std. impl.
Definition: TachyonOptiXShaders.h:120

rt_material::matflags
int matflags
alpha/cutout transparency flags
Definition: TachyonOptiXShaders.h:257

tachyonLaunchParams::headlight_mode
int headlight_mode
Extra VR camera-located headlight.
Definition: TachyonOptiXShaders.h:441

goldenratioseq1d
static __device__ __inline__ float goldenratioseq1d(int n)
Definition: TachyonOptiXShaders.h:908

jitter_offset2f
static __device__ __inline__ void jitter_offset2f(unsigned int &pval, float2 &xy)
Definition: TachyonOptiXShaders.h:1058

tachyonLaunchParams::bg_grad_invrange
float bg_grad_invrange
miss background gradient inverse range
Definition: TachyonOptiXShaders.h:425

SphereArraySBT::PosRadius
float4 * PosRadius
X,Y,Z,Radius packed for coalescing.
Definition: TachyonOptiXShaders.h:316

TriMeshSBT::vertcolors3f
float3 * vertcolors3f
Definition: TachyonOptiXShaders.h:324

tachyonLaunchParams::bg_grad_updir
float3 bg_grad_updir
miss background gradient up direction
Definition: TachyonOptiXShaders.h:422

RT_MAT_NONE
default behavior
Definition: TachyonOptiXShaders.h:222

tachyonLaunchParams::update_colorbuffer
int update_colorbuffer
accumulation copyout flag
Definition: TachyonOptiXShaders.h:396

rt_material::tex
cudaTextureObject_t tex
texture, non-zero if valid
Definition: TachyonOptiXShaders.h:256

CylinderArraySBT::start
float3 * start
Definition: TachyonOptiXShaders.h:294

jitter_disc2f_qrn
static __device__ __inline__ void jitter_disc2f_qrn(float2 &qrnxy, float2 &xy, float radius)
Definition: TachyonOptiXShaders.h:1101

tachyonLaunchParams::tonemap_mode
int tonemap_mode
output tone mapping mode
Definition: TachyonOptiXShaders.h:399

normalize_len
__host__ __device__ float3 normalize_len(const float3 v, float &l)
Normalize input vector to unit length, and return its original length.
Definition: TachyonOptiXShaders.h:678

QuadMeshSBT
Definition: TachyonOptiXShaders.h:299

tachyonLaunchParams::ao_lightscale
float ao_lightscale
2.0f/float(ao_samples)
Definition: TachyonOptiXShaders.h:437

GeomSBTHG::quadmesh
QuadMeshSBT quadmesh
Definition: TachyonOptiXShaders.h:343

tachyonLaunchParams::dof_aperture_rad
float dof_aperture_rad
DoF (defocus blur) aperture radius.
Definition: TachyonOptiXShaders.h:455

operator+
__host__ __device__ float2 operator+(const float2 &a, const float2 &b)
Definition: TachyonOptiXShaders.h:500

CylinderArraySBT::end
float3 * end
Definition: TachyonOptiXShaders.h:295

msws_rng
static __host__ __device__ __inline__ uint32_t msws_rng(uint64_t &x, uint64_t &w)
Definition: TachyonOptiXShaders.h:798

rt_material::reflectivity
float reflectivity
mirror reflectance coefficient
Definition: TachyonOptiXShaders.h:252

rt_material::shininess
float shininess
specular highlight size (exponential)
Definition: TachyonOptiXShaders.h:251

tachyonLaunchParams::ao_maxdist
float ao_maxdist
AO maximum occlusion distance.
Definition: TachyonOptiXShaders.h:440

goldenratioseq3d_incr
static __device__ __inline__ void goldenratioseq3d_incr(float3 &xyz)
Definition: TachyonOptiXShaders.h:984

make_float4
__host__ __device__ float4 make_float4(const float3 &a, const float &b)
Definition: TachyonOptiXShaders.h:601

RT_CUST_PRIM_RING
ring SBT index multiplier
Definition: TachyonOptiXShaders.h:203

make_float3
__host__ __device__ float3 make_float3(const float s)
Definition: TachyonOptiXShaders.h:545

GeomSBTHG::prim_color
float3 * prim_color
optional per-primitive color array
Definition: TachyonOptiXShaders.h:335

RT_HIT_CURVE
OptiX 7.x built-in curve prims.
Definition: TachyonOptiXShaders.h:156

TriMeshSBT::tex2d
float2 * tex2d
2-D texture coordinate buffer
Definition: TachyonOptiXShaders.h:326

TriMeshSBT::packednormals
uint4 * packednormals
packed normals: ng [n0 n1 n2]
Definition: TachyonOptiXShaders.h:323

tachyonLaunchParams::W
float3 W
camera orthonormal W (view) axis
Definition: TachyonOptiXShaders.h:452

GeomSBTHG
Definition: TachyonOptiXShaders.h:330

fmaxf
__host__ __device__ float3 fmaxf(const float3 &a, const float3 &b)
Definition: TachyonOptiXShaders.h:641

RtShadowMode
RtShadowMode
Definition: TachyonOptiXShaders.h:118

RtTexFlags
RtTexFlags
Definition: TachyonOptiXShaders.h:214

tachyonLaunchParams::pos
float3 pos
camera position
Definition: TachyonOptiXShaders.h:449

RingArraySBT::outrad
float * outrad
Definition: TachyonOptiXShaders.h:312

tachyonLaunchParams::max_trans
int max_trans
max transparent surface crossing count
Definition: TachyonOptiXShaders.h:470

RtMergedPrimKind
RtMergedPrimKind
Definition: TachyonOptiXShaders.h:169

tachyonLaunchParams::tonemap_exposure
float tonemap_exposure
tone mapping exposure gain parameter
Definition: TachyonOptiXShaders.h:400

RT_CUST_PRIM_CYLINDER
cylinder SBT index multiplier
Definition: TachyonOptiXShaders.h:201

RT_SHADOWS_OFF
shadows disabled
Definition: TachyonOptiXShaders.h:119

ConeArraySBT::base
float3 * base
Definition: TachyonOptiXShaders.h:281

CurveArraySBT::vertradii
float * vertradii
Definition: TachyonOptiXShaders.h:289

rt_texture::texgen_uaxis
float3 texgen_uaxis
world coordinate texgen U axis
Definition: TachyonOptiXShaders.h:236

ConeArraySBT::apex
float3 * apex
Definition: TachyonOptiXShaders.h:282

rt_material::userindex
int userindex
material user index, positive if valid
Definition: TachyonOptiXShaders.h:258

CurveArraySBT
Definition: TachyonOptiXShaders.h:287

tachyonLaunchParams::ao_direct
float ao_direct
AO direct lighting scaling factor.
Definition: TachyonOptiXShaders.h:439

GeomSBTHG::materialindex
int materialindex
material index for this array
Definition: TachyonOptiXShaders.h:337

TriMeshSBT::normals
float3 * normals
Definition: TachyonOptiXShaders.h:322

faceforward
__host__ __device__ float3 faceforward(const float3 &n, const float3 &i, const float3 &nref)
Ensure that an interpolated surface normal n faces in the same direction as dictated by a geometric n...
Definition: TachyonOptiXShaders.h:724

operator*=
__host__ __device__ void operator*=(float2 &a, const float s)
Definition: TachyonOptiXShaders.h:532

TriMeshSBT::vertices
float3 * vertices
Definition: TachyonOptiXShaders.h:320

RingArraySBT::norm
float3 * norm
Definition: TachyonOptiXShaders.h:310

tachyonLaunchParams
Tachyon OptiX global launch parameter structure containing the active camera, framebuffer, materials, and any global scene parameters required for shading.
Definition: TachyonOptiXShaders.h:392

jitter_sphere3f
void jitter_sphere3f(rng_frand_handle *rngh, float *dir)
Definition: util.c:779

tachyonLaunchParams::fb_clearall
int fb_clearall
clear/overwrite all FB components
Definition: TachyonOptiXShaders.h:397

tachyonLaunchParams::lights
struct tachyonLaunchParams::@4 lights

fabsf
__host__ __device__ float3 fabsf(const float3 &a)
Definition: TachyonOptiXShaders.h:637

RT_HIT_RING
custom prim ring
Definition: TachyonOptiXShaders.h:154

RT_RAY_TYPE_SHADOW
shadow probe/AO rays
Definition: TachyonOptiXShaders.h:140

tachyonLaunchParams::fog_mode
int fog_mode
fog type (or off)
Definition: TachyonOptiXShaders.h:427

sRGB_to_linear
static __forceinline__ __device__ float4 sRGB_to_linear(const float4 &rgba)
Definition: TachyonOptiXShaders.h:1398

rt_positional_light::pos
float3 pos
point light position
Definition: TachyonOptiXShaders.h:271

rt_texture::tex
cudaTextureObject_t tex
texture, non-zero if valid
Definition: TachyonOptiXShaders.h:240

M_PI
#define M_PI
Definition: TachyonOptiXShaders.h:479

reflect
__host__ __device__ float3 reflect(const float3 &i, const float3 &n)
calculate reflection direction from incident direction i, and surface normal n.
Definition: TachyonOptiXShaders.h:717

goldenratioseq4d_incr
static __device__ __inline__ void goldenratioseq4d_incr(float2 &xy1, float2 &xy2)
Definition: TachyonOptiXShaders.h:1029

RT_PRM_TRIANGLE
Definition: TachyonOptiXShaders.h:186

tachyonLaunchParams::bg_grad_topval
float bg_grad_topval
miss background gradient top value
Definition: TachyonOptiXShaders.h:423

operator/
__host__ __device__ float2 operator/(const float s, const float2 &a)
Definition: TachyonOptiXShaders.h:536

RT_TEX_NONE
default behavior
Definition: TachyonOptiXShaders.h:215

tachyonLaunchParams::dof_focal_dist
float dof_focal_dist
DoF focal plane distance.
Definition: TachyonOptiXShaders.h:456

__align__
struct __align__(OPTIX_SBT_RECORD_ALIGNMENT) HGRecord
SBT record for a hitgroup program.
Definition: TachyonOptiXShaders.h:353

RT_COLORSPACE_LINEAR
linear rgba, gamma 1.0
Definition: TachyonOptiXShaders.h:209

packNormal
static __host__ __device__ __inline__ uint packNormal(const float3 &normal)
Definition: TachyonOptiXShaders.h:1332

RT_TEX_ALPHA
enable cutout/transparency
Definition: TachyonOptiXShaders.h:218

RT_TEX_COLORSPACE_LINEAR
linear rgba, gamma 1.0
Definition: TachyonOptiXShaders.h:216

GeomSBTHG::trimesh
TriMeshSBT trimesh
Definition: TachyonOptiXShaders.h:346

jitter_disc2f
static __device__ __inline__ void jitter_disc2f(unsigned int &pval, float2 &xy, float radius)
Definition: TachyonOptiXShaders.h:1066

squares_rng
static __host__ __device__ __inline__ uint32_t squares_rng(uint64_t counter, uint64_t key)
Definition: TachyonOptiXShaders.h:830

goldenratioseq2d_incr
static __device__ __inline__ void goldenratioseq2d_incr(float2 &xy)
Definition: TachyonOptiXShaders.h:947

operator+=
__host__ __device__ void operator+=(float3 &a, const float3 &b)
Definition: TachyonOptiXShaders.h:565

rt_material::outline
float outline
outline shading coefficient
Definition: TachyonOptiXShaders.h:253

RT_COLORSPACE_sRGB
Adobe sRGB (gamma 2.2)
Definition: TachyonOptiXShaders.h:210

RT_PRM_CYLINDER
custom prim cylinder
Definition: TachyonOptiXShaders.h:176

OctEncode
static __host__ __device__ __inline__ float2 OctEncode(float3 n)
Definition: TachyonOptiXShaders.h:1216

rt_directional_light::dir
float3 dir
directional light direction
Definition: TachyonOptiXShaders.h:266

GeomSBTHG::sphere
SphereArraySBT sphere
Definition: TachyonOptiXShaders.h:345

unpackNormal
static __host__ __device__ __inline__ float3 unpackNormal(uint packed)
Definition: TachyonOptiXShaders.h:1337

rt_texture::texgen_waxis
float3 texgen_waxis
world coordinate texgen W axis
Definition: TachyonOptiXShaders.h:238

tachyonLaunchParams::fog_end
float fog_end
radial/linear fog end/max distance
Definition: TachyonOptiXShaders.h:429

CurveArraySBT::segindices
int * segindices
Definition: TachyonOptiXShaders.h:290

tachyonLaunchParams::shadows_enabled
int shadows_enabled
global shadow flag
Definition: TachyonOptiXShaders.h:435

rt_material::transmode
int transmode
transparency behavior
Definition: TachyonOptiXShaders.h:255

rt_directional_light
void * rt_directional_light(SceneHandle voidscene, void *tex, apivector dir)
Define a directional light with associated texture and direction.
Definition: api.c:1077

goldenratioseq2d
static __device__ __inline__ void goldenratioseq2d(int n, float2 &xy)
Definition: TachyonOptiXShaders.h:930

RT_CUSTPRIM
#define RT_CUSTPRIM
Definition: TachyonOptiXShaders.h:165

tachyonLaunchParams::stereo_enabled
int stereo_enabled
stereo rendering on/off
Definition: TachyonOptiXShaders.h:457

tachyonLaunchParams::dir_lights
float3 * dir_lights
list of directional light directions
Definition: TachyonOptiXShaders.h:443

HGRecordGroup::radiance
HGRecord radiance
Definition: TachyonOptiXShaders.h:365

reinhard_TMO
static __device__ __inline__ float3 reinhard_TMO(float3 c)
Definition: TachyonOptiXShaders.h:1568

RT_HIT_CYLINDER
custom prim cyliner
Definition: TachyonOptiXShaders.h:152

QuadMeshSBT::indices
int4 * indices
Definition: TachyonOptiXShaders.h:301

tachyonLaunchParams::bg_grad_botval
float bg_grad_botval
miss background gradient bottom value
Definition: TachyonOptiXShaders.h:424

ConeArraySBT::baserad
float * baserad
Definition: TachyonOptiXShaders.h:283

tachyonLaunchParams::scene
struct tachyonLaunchParams::@3 scene

tachyonLaunchParams::fog_start
float fog_start
radial/linear fog start distance
Definition: TachyonOptiXShaders.h:428

QuadMeshSBT::vertices
float3 * vertices
Definition: TachyonOptiXShaders.h:300

rt_material::specular
float specular
specular reflectance coefficient
Definition: TachyonOptiXShaders.h:250

RT_MAT_ALPHA
enable alpha transparency
Definition: TachyonOptiXShaders.h:223

dot
__host__ __device__ float dot(const float3 &a, const float3 &b)
Definition: TachyonOptiXShaders.h:649

luminance
static __device__ __inline__ float luminance(float3 c)
Definition: TachyonOptiXShaders.h:1534

RT_TONEMAP_CLAMP
only clamp the color values [0,1]
Definition: TachyonOptiXShaders.h:130

rt_texture::d_img
cudaArray_t d_img
GPU allocated image buffer.
Definition: TachyonOptiXShaders.h:239

TriMeshSBT::tex3d
float3 * tex3d
3-D texture coordinate buffer
Definition: TachyonOptiXShaders.h:327

tachyonLaunchParams::V
float3 V
camera orthonormal V (up) axis
Definition: TachyonOptiXShaders.h:451

RT_TONEMAP_REINHARD_EXT
"Extended" Reinhard style, color
Definition: TachyonOptiXShaders.h:133

ConeArraySBT::apexrad
float * apexrad
Definition: TachyonOptiXShaders.h:284

ACES_TMO
static __device__ __inline__ float3 ACES_TMO(float3 c)
Definition: TachyonOptiXShaders.h:1555

GeomSBTHG::cyl
CylinderArraySBT cyl
Definition: TachyonOptiXShaders.h:342

tachyonLaunchParams::stereo_convergence_dist
float stereo_convergence_dist
stereo convergence distance (world)
Definition: TachyonOptiXShaders.h:459

tachyonLaunchParams::size
int2 size
framebuffer size
Definition: TachyonOptiXShaders.h:394

RT_CUST_PRIM_COUNT
total count of SBT geometric multipliers
Definition: TachyonOptiXShaders.h:205

CylinderArraySBT
Definition: TachyonOptiXShaders.h:293

tachyonLaunchParams::bg_color_grad_bot
float3 bg_color_grad_bot
miss background gradient (bottom)
Definition: TachyonOptiXShaders.h:421

linear_to_sRGB_approx_20
static __forceinline__ __device__ float4 linear_to_sRGB_approx_20(const float4 &linear)
Definition: TachyonOptiXShaders.h:1500

RT_SHADOWS_ON_REVERSE
any-hit traversal reversal
Definition: TachyonOptiXShaders.h:121

GeomSBTHG::curve
CurveArraySBT curve
Definition: TachyonOptiXShaders.h:341

RtCustPrim
RtCustPrim
Definition: TachyonOptiXShaders.h:199

GeomSBTHG::cone
ConeArraySBT cone
Definition: TachyonOptiXShaders.h:340

QuadMeshSBT::vertcolors3f
float3 * vertcolors3f
Definition: TachyonOptiXShaders.h:304

qnd_rng
static __host__ __device__ __inline__ uint32_t qnd_rng(uint32_t &idum)
Definition: TachyonOptiXShaders.h:772

tachyonLaunchParams::dof_enabled
int dof_enabled
DoF (defocus blur) on/off.
Definition: TachyonOptiXShaders.h:454

tachyonLaunchParams::ao_samples
int ao_samples
number of AO samples per AA ray
Definition: TachyonOptiXShaders.h:436

OctDecode
static __host__ __device__ __inline__ float3 OctDecode(float2 projected)
Definition: TachyonOptiXShaders.h:1248

tachyonLaunchParams::subframe_index
int subframe_index
accumulation subframe index
Definition: TachyonOptiXShaders.h:395

RT_RAY_TYPE_RADIANCE
normal radiance rays
Definition: TachyonOptiXShaders.h:139

cross
__host__ __device__ float3 cross(const float3 &a, const float3 &b)
calculate the cross product between vectors a and b.
Definition: TachyonOptiXShaders.h:706

RT_DENOISER_OFF
denoiser disabled
Definition: TachyonOptiXShaders.h:125

rt_texture::userindex
int userindex
material user index, positive if valid
Definition: TachyonOptiXShaders.h:241

QuadMeshSBT::vertcolors4u
uchar4 * vertcolors4u
unsigned char color representation
Definition: TachyonOptiXShaders.h:305

rt_positional_light
Definition: TachyonOptiXShaders.h:270

tachyonLaunchParams::bg_grad_noisemag
float bg_grad_noisemag
miss background gradient noise magnitude
Definition: TachyonOptiXShaders.h:426

operator-
__host__ __device__ float2 operator-(const float2 &a, const float2 &b)
Definition: TachyonOptiXShaders.h:508

tachyonLaunchParams::aa_samples
int aa_samples
AA samples per launch.
Definition: TachyonOptiXShaders.h:471

convfloat2uint32
static __host__ __device__ __inline__ uint convfloat2uint32(float2 f2)
Definition: TachyonOptiXShaders.h:1308

RT_CUST_PRIM_CONE
cone SBT index multiplier
Definition: TachyonOptiXShaders.h:200

linear_to_sRGB_approx
static __forceinline__ __device__ float4 linear_to_sRGB_approx(const float4 &linear)
Definition: TachyonOptiXShaders.h:1474

tachyonLaunchParams::traversable
OptixTraversableHandle traversable
global OptiX scene traversable handle
Definition: TachyonOptiXShaders.h:473

CurveArraySBT::vertices
float3 * vertices
Definition: TachyonOptiXShaders.h:288

tachyonLaunchParams::bg_color_grad_top
float3 bg_color_grad_top
miss background gradient (top)
Definition: TachyonOptiXShaders.h:420

tachyonLaunchParams::clipview_end
float clipview_end
clipping sphere/plane end coord
Definition: TachyonOptiXShaders.h:465

RT_CUST_PRIM_QUAD
quad SBT index multiplier
Definition: TachyonOptiXShaders.h:202

operator*
__host__ __device__ float2 operator*(const float2 &a, const float2 &b)
Definition: TachyonOptiXShaders.h:520

tachyonLaunchParams::frame
struct tachyonLaunchParams::@2 frame

reinhard_extended_TMO
static __device__ __inline__ float3 reinhard_extended_TMO(float3 c, float maxwhite)
Definition: TachyonOptiXShaders.h:1578

HGRecordGroup
Store all hitgroup records for a given geometry together for simpler dynamic updates.
Definition: TachyonOptiXShaders.h:364

QuadMeshSBT::normals
float3 * normals
Definition: TachyonOptiXShaders.h:302

RT_PRM_SPHERE
custom prim sphere
Definition: TachyonOptiXShaders.h:179

length
__host__ __device__ float length(const float3 &v)
Definition: TachyonOptiXShaders.h:657

tachyonLaunchParams::clipview_start
float clipview_start
clipping sphere/plane start coord
Definition: TachyonOptiXShaders.h:464

RT_TRI_BUILTIN
#define RT_TRI_BUILTIN
Definition: TachyonOptiXShaders.h:166

ConeArraySBT
Definition: TachyonOptiXShaders.h:280

RtTonemapMode
RtTonemapMode
Definition: TachyonOptiXShaders.h:129

rt_material::opacity
float opacity
surface opacity
Definition: TachyonOptiXShaders.h:247

RT_CUST_PRIM_SPHERE
sphere SBT index multiplier
Definition: TachyonOptiXShaders.h:204

TriMeshSBT
Definition: TachyonOptiXShaders.h:319

RtColorSpace
RtColorSpace
Definition: TachyonOptiXShaders.h:208

CylinderArraySBT::radius
float * radius
Definition: TachyonOptiXShaders.h:296

GeomSBTHG::geomflags
int geomflags
Definition: TachyonOptiXShaders.h:333

tachyonLaunchParams::colorspace
int colorspace
output colorspace
Definition: TachyonOptiXShaders.h:398

tachyonLaunchParams::cam
struct tachyonLaunchParams::@5 cam

tachyonLaunchParams::num_dir_lights
int num_dir_lights
directional light count
Definition: TachyonOptiXShaders.h:442

RT_TONEMAP_COUNT
total count of ray types
Definition: TachyonOptiXShaders.h:135

tachyonLaunchParams::pos_lights
float3 * pos_lights
list of positional light positions
Definition: TachyonOptiXShaders.h:445

UINT32_RAND_MAX_INV
#define UINT32_RAND_MAX_INV
Definition: TachyonOptiXShaders.h:740

jitter_offset2f_qrn
static __device__ __inline__ void jitter_offset2f_qrn(float2 qrnxy, float2 &xy)
Definition: TachyonOptiXShaders.h:1093

RingArraySBT
Definition: TachyonOptiXShaders.h:308

rt_texture::texflags
int texflags
linear/sRGB colorspace | texturing flags
Definition: TachyonOptiXShaders.h:234

RT_COLORSPACE_COUNT
total count of available colorspaces
Definition: TachyonOptiXShaders.h:211

RT_HIT_QUAD
custom prim quadrilateral
Definition: TachyonOptiXShaders.h:153

RT_HIT_HWTRIANGLE
RTX triangle.
Definition: TachyonOptiXShaders.h:148

sRGB_to_linear_approx_20
static __forceinline__ __device__ float4 sRGB_to_linear_approx_20(const float4 &rgba)
Definition: TachyonOptiXShaders.h:1490

rt_material
structure containing Tachyon material properties
Definition: TachyonOptiXShaders.h:246

tachyonLaunchParams::materials
rt_material * materials
device memory material array
Definition: TachyonOptiXShaders.h:467

rt_material::diffuse
float diffuse
diffuse reflectance coefficient
Definition: TachyonOptiXShaders.h:249

goldenratioseq3d
static __device__ __inline__ void goldenratioseq3d(int n, float3 &xyz)
Definition: TachyonOptiXShaders.h:963

RT_TONEMAP_REINHARD
Reinhard style, color.
Definition: TachyonOptiXShaders.h:132

M_PIf
#define M_PIf
Definition: TachyonOptiXShaders.h:482

RT_HIT_CONE
custom prim cone
Definition: TachyonOptiXShaders.h:151

SphereArraySBT
Definition: TachyonOptiXShaders.h:315

normalize_invlen
__host__ __device__ float3 normalize_invlen(const float3 v, float &invlen)
Normalize input vector to unit length, and return the reciprocal of its original length.
Definition: TachyonOptiXShaders.h:691

tachyonLaunchParams::fog_density
float fog_density
exponential fog density
Definition: TachyonOptiXShaders.h:430

RT_TONEMAP_ACES
ACES style approximation.
Definition: TachyonOptiXShaders.h:131

RtHitKind
RtHitKind
Definition: TachyonOptiXShaders.h:147

RT_PRM_QUAD
custom prim quadrilateral
Definition: TachyonOptiXShaders.h:177

tachyonLaunchParams::bg_color
float3 bg_color
miss background color
Definition: TachyonOptiXShaders.h:419

RT_PRM_CONE
custom prim cone
Definition: TachyonOptiXShaders.h:175

rt_texture::texgen_origin
float3 texgen_origin
world coordinate texgen origin
Definition: TachyonOptiXShaders.h:235

RingArraySBT::center
float3 * center
Definition: TachyonOptiXShaders.h:309

tachyonLaunchParams::num_pos_lights
int num_pos_lights
positional light count
Definition: TachyonOptiXShaders.h:444

HGRecordGroup::shadow
HGRecord shadow
Definition: TachyonOptiXShaders.h:366

sRGB_to_linear_approx
static __forceinline__ __device__ float4 sRGB_to_linear_approx(const float4 &rgba)
Definition: TachyonOptiXShaders.h:1461

RT_PRM_RING
custom prim ring
Definition: TachyonOptiXShaders.h:178

rt_material::ambient
float ambient
constant ambient light factor
Definition: TachyonOptiXShaders.h:248

reinhard_extended_luminance_TMO
static __device__ __inline__ float3 reinhard_extended_luminance_TMO(float3 c, float maxL)
Definition: TachyonOptiXShaders.h:1589

convuint32float2
static __host__ __device__ __inline__ float2 convuint32float2(uint packed)
Definition: TachyonOptiXShaders.h:1315

tachyonLaunchParams::ao_ambient
float ao_ambient
AO ambient factor.
Definition: TachyonOptiXShaders.h:438

GeomSBTHG::uniform_color
float3 uniform_color
uniform color for entire sphere array
Definition: TachyonOptiXShaders.h:336

tachyonLaunchParams::accum_buffer
float4 * accum_buffer
32-bit FP RGBA accumulation buffer
Definition: TachyonOptiXShaders.h:410

goldenratioseq1d_incr
static __device__ __inline__ void goldenratioseq1d_incr(float &x)
Definition: TachyonOptiXShaders.h:920

RtDenoiserMode
RtDenoiserMode
Definition: TachyonOptiXShaders.h:124

RayType
RayType
Definition: TachyonOptiXShaders.h:138

TriMeshSBT::indices
int3 * indices
Definition: TachyonOptiXShaders.h:321

tachyonLaunchParams::stereo_eyesep
float stereo_eyesep
stereo eye separation, in world coords
Definition: TachyonOptiXShaders.h:458

RT_TEX_COLORSPACE_sRGB
Adobe sRGB (gamma 2.2)
Definition: TachyonOptiXShaders.h:217

tachyonLaunchParams::clipview_mode
int clipview_mode
VR clipping view on/off.
Definition: TachyonOptiXShaders.h:463

tachyonLaunchParams::max_depth
int max_depth
global max ray tracing recursion depth
Definition: TachyonOptiXShaders.h:469

RT_MAT_TEXALPHA
enable tex cutout transparency
Definition: TachyonOptiXShaders.h:224

RT_TONEMAP_REINHARD_EXT_L
"Extended" Reinhard style, luminance
Definition: TachyonOptiXShaders.h:134

RT_HIT_SPHERE
custom prim sphere
Definition: TachyonOptiXShaders.h:155

tachyonLaunchParams::epsilon
float epsilon
global epsilon value
Definition: TachyonOptiXShaders.h:431

GeomSBTHG::ring
RingArraySBT ring
Definition: TachyonOptiXShaders.h:344

RT_RAY_TYPE_COUNT
total count of ray types
Definition: TachyonOptiXShaders.h:141

RingArraySBT::inrad
float * inrad
Definition: TachyonOptiXShaders.h:311

linear_to_sRGB
static __forceinline__ __device__ float4 linear_to_sRGB(const float4 &lin)
Definition: TachyonOptiXShaders.h:1428

tachyonLaunchParams::zoom
float zoom
camera zoom factor
Definition: TachyonOptiXShaders.h:453