ComputeNonbondedCUDAKernel.h

Go to the documentation of this file.
00001 #ifdef NAMD_CUDA
00002 //this type defined in multiple files
00003 typedef float GBReal;
00004 
00005 void cuda_errcheck(const char *msg);
00006 
00007 #ifndef __CUDACC__
00008 #undef __align__
00009 #define __align__(X)
00010 #endif
00011 
00012 
00013 // Number of warps per block for Non-bonded CUDA kernel
00014 #define NUM_WARP 4
00015 #define WARPSIZE 32
00016 
00017 // Exclusion mask: bit 1 = atom pair is excluded
00018 struct exclmask {
00019   unsigned int excl[32];
00020 };
00021 
00022 struct __align__(16) patch_pair {
00023   float3 offset;
00024   int patch1_start;      // Coordinate/force start for this patch
00025   int patch1_size;       // Size of the patch
00026   int patch2_start;
00027   int patch2_size;
00028   int patch1_ind;        // Patch index
00029   int patch2_ind;
00030   int patch1_num_pairs;  // Number of pairs that involve this patch
00031   int patch2_num_pairs;
00032   union {
00033     bool patch_done[2];      // After-GPU-computation shared memory temporary storage
00034     struct {
00035       int plist_start;       // Pair list start
00036       int plist_size;        // Pair list size
00037     };
00038   };
00039   int exclmask_start;    // Exclusion mask start
00040   int patch1_free_size;  // Size of the free atoms in patch
00041   int patch2_free_size;  // Size of the free atoms in patch
00042 //  int pad1, pad2;
00043 };
00044 
00045 #define PATCH_PAIR_SIZE (sizeof(patch_pair)/4)
00046 
00047 struct __align__(16) atom {  // must be multiple of 16!
00048   float3 position;
00049   float charge;
00050 };
00051 
00052 struct __align__(16) atom_param {  // must be multiple of 16!
00053   int vdw_type;
00054   int index;
00055   int excl_index;
00056   int excl_maxdiff;  // maxdiff == 0 -> only excluded from self
00057 };
00058 
00059 #define COPY_ATOM( DEST, SOURCE ) { \
00060   DEST.position.x = SOURCE.position.x; \
00061   DEST.position.y = SOURCE.position.y; \
00062   DEST.position.z = SOURCE.position.z; \
00063   DEST.charge = SOURCE.charge; \
00064   }
00065 
00066 #define COPY_PARAM( DEST, SOURCE ) { \
00067   DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
00068   DEST.half_sigma = SOURCE.half_sigma; \
00069   DEST.index = SOURCE.index; \
00070   DEST.excl_index = SOURCE.excl_index; \
00071   DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
00072   }
00073 
00074 #define COPY_ATOM_TO_SHARED( ATOM, PARAM, SHARED ) { \
00075     COPY_ATOM( SHARED, ATOM ) \
00076     COPY_PARAM( SHARED, PARAM ) \
00077   }
00078 
00079 #define COPY_ATOM_FROM_SHARED( ATOM, PARAM, SHARED ) { \
00080     COPY_ATOM( ATOM, SHARED ) \
00081     COPY_PARAM( PARAM, SHARED ) \
00082   }
00083 
00084 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
00085 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
00086 #define MAX_EXCLUSIONS (1<<27)
00087 #define MAX_CONST_EXCLUSIONS 2048  // cache size is 8k
00088 
00089 void cuda_bind_exclusions(const unsigned int *t, int n);
00090 
00091 void cuda_bind_lj_table(const float2 *t, int _lj_table_size);
00092 
00093 // #define FORCE_TABLE_SIZE 512
00094 // maximum size of CUDA array 1D texture reference is 2^13 = 8192
00095 // #define FORCE_TABLE_SIZE 8192
00096 // CUDA docs lie, older devices can only handle 4096
00097 #define FORCE_TABLE_SIZE 4096
00098 
00099 void cuda_bind_force_table(const float4 *t, const float4 *et);
00100 
00101 void cuda_init();
00102 
00103 void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs,
00104                            int npatches, int natoms, int nexclmask, int plist_len);
00105 
00106 void cuda_bind_atom_params(const atom_param *t);
00107 void cuda_bind_vdw_types(const int *t);
00108 
00109 void cuda_bind_atoms(const atom *a);
00110 
00111 void cuda_bind_forces(float4 *f, float4 *f_slow);
00112 
00113 void cuda_bind_virials(float *v, int *queue, int *blockorder);
00114 
00115 void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc,
00116                            float cutoff2, float plcutoff2,
00117                            int cbegin, int ccount, int ctotal,
00118                            int doSlow, int doEnergy, int usePairlists, int savePairlists,
00119                            int doStreaming, int saveOrder, cudaStream_t &strm);
00120 
00121 //GBIS methods
00122 void cuda_GBIS_P1(
00123   int cbegin,
00124   int ccount,
00125   int pbegin,
00126   int pcount,
00127   float a_cut,
00128   float rho_0,
00129   float3 lata,
00130   float3 latb,
00131   float3 latc,
00132   cudaStream_t &strm
00133   );
00134 void cuda_GBIS_P2(
00135   int cbegin,
00136   int ccount,
00137   int pbegin,
00138   int pcount,
00139   float a_cut,
00140   float r_cut,
00141   float scaling,
00142   float kappa,
00143   float smoothDist,
00144   float epsilon_p,
00145   float epsilon_s,
00146   float3 lata,
00147   float3 latb,
00148   float3 latc,
00149   int doEnergy,
00150   int doFullElec,
00151   cudaStream_t &strm
00152   );
00153 void cuda_GBIS_P3(
00154   int cbegin,
00155   int ccount,
00156   int pbegin,
00157   int pcount,
00158   float a_cut,
00159   float rho_0,
00160   float scaling,
00161   float3 lata,
00162   float3 latb,
00163   float3 latc,
00164   cudaStream_t &strm
00165   );
00166 
00167 void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH);
00168 void cuda_bind_GBIS_energy(float *energy_gbis);
00169 void cuda_bind_GBIS_psiSum(GBReal *psiSumH);
00170 void cuda_bind_GBIS_bornRad(float *bornRadH);
00171 void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH);
00172 void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH);
00173 
00174 //end GBIS methods
00175 
00176 int cuda_stream_finished();
00177 
00178 #endif  // NAMD_CUDA
00179 

Generated on Fri Sep 22 01:17:11 2017 for NAMD by  doxygen 1.4.7