OptiXShaders.h Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010 * RCS INFORMATION:
00011 *
00012 *      $RCSfile: OptiXShaders.h,v $
00013 *      $Author: johns $      $Locker:  $               $State: Exp $
00014 *      $Revision: 1.41 $         $Date: 2020/02/26 03:51:31 $
00015 *
00016 ***************************************************************************/
00079 #ifndef OPTIXSHADERS
00080 #define OPTIXSHADERS
00081 #include <optixu/optixu_vector_types.h>
00082 
00083 // Compile-time flag for collection and reporting of ray statistics
00084 #if 0
00085 #define ORT_RAYSTATS 1
00086 #endif
00087 
00088 // Compile-time flag to enable the use of RTX hardware ray tracing 
00089 // acceleration APIs in OptiX
00090 #if OPTIX_VERSION >= 60000
00091 #define ORT_USERTXAPIS 1
00092 #endif
00093 
00094 // When compiling with OptiX 3.8 or greater, we use the new
00095 // progressive rendering APIs rather than our previous hand-coded
00096 // progressive renderer.
00097 #if (defined(VMDOPTIX_VCA) || (OPTIX_VERSION >= 3080)) // && !defined(VMDUSEOPENHMD)
00098 #define VMDOPTIX_PROGRESSIVEAPI 1
00099 #endif
00100 
00101 #if 1 || defined(VMDOPTIX_PROGRESSIVEAPI)
00102 #define VMDOPTIX_LIGHTUSEROBJS 1
00103 #endif
00104 
00105 #if defined(VMDOPTIX_LIGHTUSEROBJS)
00106 #include "Scene.h" // for DISP_LIGHTS macro
00107 #endif
00108 
00109 // "*" operator
00110 inline __host__ __device__ float3 operator*(char4 a, float b) {
00111   return make_float3(b * a.x, b * a.y, b * a.z);
00112 }
00113 
00114 inline __host__ __device__ float3 operator*(uchar4 a, float b) {
00115   return make_float3(b * a.x, b * a.y, b * a.z);
00116 }
00117 
00118 #if defined(__cplusplus)
00119   typedef optix::float3 float3;
00120 #endif
00121 
00122 
00123 // XXX OptiX 4.0 and later versions have a significant performance impact
00124 //     on VMD startup if we use 256-way combinatorial shader specialization.
00125 //     Shader template specialization had very little impact on
00126 //     OptiX versions 3.[789].x previously.  The new LLVM based compiler 
00127 //     back-end used in recent versions of OptiX has much more overhead
00128 //     when processing large numbers of shaders single PTX files.  
00129 //     If we want to retain the template specialization approach, 
00130 //     we will have to generate shader code and store it in many separate 
00131 //     PTX files to mitigate overheads in back-end compiler infrastructure.
00132 #if OPTIX_VERSION < 40000
00133 // this macro enables or disables the use of an array of
00134 // template-specialized shaders for every combination of
00135 // scene-wide and material-specific shader features.
00136 #define ORT_USE_TEMPLATE_SHADERS 1
00137 #endif
00138 
00139 
00140 // Enable reversed traversal of any-hit rays for shadows/AO.
00141 // This optimization yields a 20% performance gain in many cases.
00142 // #define USE_REVERSE_SHADOW_RAYS 1
00143  
00144 // Use reverse rays by default rather than only when enabled interactively
00145 // #define USE_REVERSE_SHADOW_RAYS_DEFAULT 1
00146 enum RtShadowMode { RT_SHADOWS_OFF=0,        
00147                     RT_SHADOWS_ON=1,         
00148                     RT_SHADOWS_ON_REVERSE=2  
00149                   };
00150 
00151 
00152 //
00153 // Lighting data structures
00154 //
00155 #if defined(VMDOPTIX_LIGHTUSEROBJS)
00156 typedef struct {
00157   int num_lights;
00158   float3 dirs[DISP_LIGHTS+1];  
00159 } DirectionalLightList;
00160 
00161 typedef struct {
00162   int num_lights;
00163   float3 posns[DISP_LIGHTS+1]; 
00164 } PositionalLightList;
00165 #endif
00166 
00167 typedef struct {
00168   float3 dir;
00169   int    padding; // pad to next power of two
00170 } DirectionalLight;
00171 
00172 typedef struct {
00173   float3 pos;
00174   int    padding; // pad to next power of two
00175 } PositionalLight;
00176 
00177 
00178 //
00179 // Cylinders
00180 //
00181 
00182 // XXX memory layout is likely suboptimal
00183 typedef struct {
00184   float3 start;
00185   float radius;
00186   float3 axis;
00187   float pad;
00188 } vmd_cylinder;
00189 
00190 // XXX memory layout is likely suboptimal
00191 typedef struct {
00192   float3 start;
00193   float radius;
00194   float3 axis;
00195   float3 color;
00196 } vmd_cylinder_color;
00197 
00198 //
00199 // Rings (annular or otherwise)
00200 //
00201 
00202 // XXX memory layout is likely suboptimal, but is a multiple float4
00203 typedef struct {
00204   float3 center;
00205   float3 norm;
00206   float inrad;
00207   float outrad;
00208   float3 color;
00209   float pad;
00210 } vmd_ring_color;
00211 
00212 
00213 //
00214 // Spheres
00215 //
00216 
00217 typedef struct {
00218   float3 center;
00219   float  radius;
00220 } vmd_sphere;
00221 
00222 // XXX memory layout is likely suboptimal
00223 typedef struct {
00224   float3 center;
00225   float  radius;
00226   float3 color;
00227   float  pad;
00228 } vmd_sphere_color;
00229 
00230 
00231 //
00232 // Triangle meshes of various kinds
00233 //
00234 
00235 // XXX memory layout is definitely suboptimal
00236 typedef struct {
00237   float3 v0;
00238   float3 v1;
00239   float3 v2;
00240   float3 n0;
00241   float3 n1;
00242   float3 n2;
00243   float3 c0;
00244   float3 c1;
00245   float3 c2;
00246 } vmd_tricolor;
00247 
00248 typedef struct {
00249   uchar4 c0;
00250   uchar4 c1;
00251   uchar4 c2;
00252   char4  n0;
00253   char4  n1;
00254   char4  n2;
00255   float3 v0;
00256   float3 v1;
00257   float3 v2;
00258 } vmd_trimesh_c4u_n3b_v3f;
00259 
00260 typedef struct {
00261   float3 n0;
00262   float3 n1;
00263   float3 n2;
00264   float3 v0;
00265   float3 v1;
00266   float3 v2;
00267 } vmd_trimesh_n3f_v3f;
00268 
00269 typedef struct {
00270   char4  n0;
00271   char4  n1;
00272   char4  n2;
00273   float3 v0;
00274   float3 v1;
00275   float3 v2;
00276 } vmd_trimesh_n3b_v3f;
00277 
00278 typedef struct {
00279   float3 v0;
00280   float3 v1;
00281   float3 v2;
00282 } vmd_trimesh_v3f;
00283 
00284 
00285 
00286 //
00287 // Methods for packing normals into a 4-byte quantity, such as a 
00288 // [u]int or [u]char4, and similar.  See JCGT article by Cigolle et al.,
00289 // "A Survey of Efficient Representations for Independent Unit Vectors",
00290 // J. Computer Graphics Techniques 3(2), 2014.
00291 // http://jcgt.org/published/0003/02/01/
00292 //
00293 #if defined(ORT_USERTXAPIS)
00294 #include <optixu/optixu_math_namespace.h> // for make_xxx() fctns
00295 
00296 #if 1
00297 
00298 //
00299 // oct32: 32-bit octahedral normal encoding using [su]norm16x2 quantization
00300 // Meyer et al., "On Floating Point Normal Vectors", In Proc. 21st
00301 // Eurographics Conference on Rendering.
00302 //   http://dx.doi.org/10.1111/j.1467-8659.2010.01737.x
00303 // Others:
00304 // https://twitter.com/Stubbesaurus/status/937994790553227264
00305 // https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding
00306 //
00307 static __host__ __device__ __inline__ float3 OctDecode(float2 projected) {
00308   float3 n = make_float3(projected.x, 
00309                          projected.y, 
00310                          1.0f - (fabsf(projected.x) + fabsf(projected.y)));
00311   if (n.z < 0.0f) {
00312     float oldX = n.x;
00313     n.x = copysignf(1.0f - fabsf(n.y), oldX);
00314     n.y = copysignf(1.0f - fabsf(oldX), n.y);
00315   }
00316 
00317   return n;
00318 }
00319 
00320 //
00321 // XXX TODO: implement a high-precision OctPEncode() variant, based on 
00322 //           floored snorms and an error minimization scheme using a 
00323 //           comparison of internally decoded values for least error
00324 //
00325 
00326 static __host__ __device__ __inline__ float2 OctEncode(float3 n) {
00327   const float invL1Norm = 1.0f / (fabsf(n.x) + fabsf(n.y) + fabsf(n.z));
00328   float2 projected;
00329   if (n.z < 0.0f) {
00330     projected = 1.0f - make_float2(fabsf(n.y), fabsf(n.x)) * invL1Norm;
00331     projected.x = copysignf(projected.x, n.x);
00332     projected.y = copysignf(projected.y, n.y);
00333   } else {
00334     projected = make_float2(n.x, n.y) * invL1Norm;
00335   }
00336 
00337   return projected;
00338 }
00339  
00340 
00341 static __host__ __device__ __inline__ uint convfloat2uint32(float2 f2) {
00342   f2 = f2 * 0.5f + 0.5f;
00343   uint packed;
00344   packed = ((uint) (f2.x * 65535)) | ((uint) (f2.y * 65535) << 16);
00345   return packed;
00346 }
00347 
00348 static __host__ __device__ __inline__ float2 convuint32float2(uint packed) {
00349   float2 f2;
00350   f2.x = (float)((packed      ) & 0x0000ffff) / 65535;
00351   f2.y = (float)((packed >> 16) & 0x0000ffff) / 65535;
00352   return f2 * 2.0f - 1.0f;
00353 }
00354 
00355 
00356 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00357   float2 octf2 = OctEncode(normal);
00358   return convfloat2uint32(octf2);
00359 }
00360 
00361 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00362   float2 octf2 = convuint32float2(packed);
00363   return OctDecode(octf2);
00364 }
00365 
00366 #elif 1
00367 
00368 // 
00369 // unorm10x3: unsigned 10-bit-per-component scalar unit real representation
00370 // Not quite as good as 'snorm' representations
00371 // This is largely equivalent to OpenGL's UNSIGNED_INT_2_10_10_10_REV 
00372 // Described in the GLSL 4.20 specification, J. Kessenich 2011
00373 //   i=round(clamp(r,0,1) * (2^b - 1))
00374 //   r=i/(2^b - 1)
00375 //
00376 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00377   const float3 N = normal * 0.5f + 0.5f;
00378   const uint packed = ((uint) (N.x * 1023)) |
00379                       ((uint) (N.y * 1023) << 10) |
00380                       ((uint) (N.z * 1023) << 20);
00381   return packed;
00382 }
00383 
00384 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00385   float3 N;
00386   N.x = (float)(packed & 0x000003ff) / 1023;
00387   N.y = (float)(((packed >> 10) & 0x000003ff)) / 1023;
00388   N.z = (float)(((packed >> 20) & 0x000003ff)) / 1023;
00389   return N * 2.0f - 1.0f;
00390 }
00391 
00392 #elif 0
00393 
00394 // 
00395 // snorm10x3: signed 10-bit-per-component scalar unit real representation
00396 // Better representation than unorm.  
00397 // Supported by most fixed-function graphics hardware.
00398 // https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_texture_snorm.txt
00399 //   i=round(clamp(r,-1,1) * (2^(b-1) - 1)
00400 //   r=clamp(i/(2^(b-1) - 1), -1, 1)
00401 //
00402 
00403 #elif 1
00404 
00405 // OpenGL GLbyte signed quantization scheme
00406 //   i = r * (2^b - 1) - 0.5;
00407 //   r = (2i + 1)/(2^b - 1)
00408 static __host__ __device__ __inline__ uint packNormal(const float3& normal) {
00409   // conversion to GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
00410   const float3 N = normal * 127.5f - 0.5f;
00411   const char4 packed = make_char4(N.x, N.y, N.z, 0);
00412   return *((uint *) &packed);
00413 }
00414 
00415 static __host__ __device__ __inline__ float3 unpackNormal(uint packed) {
00416   char4 c4norm = *((char4 *) &packed);
00417 
00418   // conversion from GLbyte format, Table 2.6, p. 44 of OpenGL spec 1.2.1
00419   // float = (2c+1)/(2^8-1)
00420   const float ci2f = 1.0f / 255.0f;
00421   const float cn2f = 1.0f / 127.5f;
00422   float3 N = c4norm * cn2f + ci2f;
00423 
00424   return N;
00425 }
00426 
00427 #endif
00428 #endif
00429 
00430 
00431 #endif
00432