NAMD
ComputeNonbondedCUDAKernel.h
Go to the documentation of this file.
1 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2 #include "HipDefines.h"
3 //this type defined in multiple files
4 typedef float GBReal;
5 
6 void cuda_errcheck(const char *msg);
7 
8 // Number of warps per block for Non-bonded CUDA kernel
9 #define NUM_WARP 4
10 
11 // Exclusion mask: bit 1 = atom pair is excluded
12 struct exclmask {
13  unsigned int excl[32];
14 };
15 
16 struct __align__(16) patch_pair {
17  float3 offset;
18  int patch1_start; // Coordinate/force start for this patch
19  int patch1_size; // Size of the patch
20  int patch2_start;
21  int patch2_size;
22  int patch1_ind; // Patch index
23  int patch2_ind;
24  int patch1_num_pairs; // Number of pairs that involve this patch
25  int patch2_num_pairs;
26  union {
27  bool patch_done[2]; // After-GPU-computation shared memory temporary storage
28  struct {
29  int plist_start; // Pair list start
30  int plist_size; // Pair list size
31  };
32  };
33  int exclmask_start; // Exclusion mask start
34  int patch1_free_size; // Size of the free atoms in patch
35  int patch2_free_size; // Size of the free atoms in patch
36 // int pad1, pad2;
37 };
38 
39 #define PATCH_PAIR_SIZE (sizeof(patch_pair)/4)
40 
41 struct __align__(16) atom { // must be multiple of 16!
42  float3 position;
43  float charge;
44 };
45 
46 struct __align__(16) atom_param { // must be multiple of 16!
47  int vdw_type;
48  int index;
49  int excl_index;
50  int excl_maxdiff; // maxdiff == 0 -> only excluded from self
51 };
52 
53 #define COPY_ATOM( DEST, SOURCE ) { \
54  DEST.position.x = SOURCE.position.x; \
55  DEST.position.y = SOURCE.position.y; \
56  DEST.position.z = SOURCE.position.z; \
57  DEST.charge = SOURCE.charge; \
58  }
59 
60 #define COPY_PARAM( DEST, SOURCE ) { \
61  DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
62  DEST.half_sigma = SOURCE.half_sigma; \
63  DEST.index = SOURCE.index; \
64  DEST.excl_index = SOURCE.excl_index; \
65  DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
66  }
67 
68 #define COPY_ATOM_TO_SHARED( ATOM, PARAM, SHARED ) { \
69  COPY_ATOM( SHARED, ATOM ) \
70  COPY_PARAM( SHARED, PARAM ) \
71  }
72 
73 #define COPY_ATOM_FROM_SHARED( ATOM, PARAM, SHARED ) { \
74  COPY_ATOM( ATOM, SHARED ) \
75  COPY_PARAM( PARAM, SHARED ) \
76  }
77 
78 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
79 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
80 #define MAX_EXCLUSIONS (1<<27)
81 #define MAX_CONST_EXCLUSIONS 2048 // cache size is 8k
82 
83 void cuda_bind_exclusions(const unsigned int *t, int n);
84 
85 void cuda_bind_lj_table(const float2 *t, int _lj_table_size);
86 
87 // #define FORCE_TABLE_SIZE 512
88 // maximum size of CUDA array 1D texture reference is 2^13 = 8192
89 // #define FORCE_TABLE_SIZE 8192
90 // CUDA docs lie, older devices can only handle 4096
91 #define FORCE_TABLE_SIZE 4096
92 
93 void cuda_bind_force_table(const float4 *t, const float4 *et);
94 
95 void cuda_init();
96 
97 void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs,
98  int npatches, int natoms, int nexclmask, int plist_len);
99 
100 void cuda_bind_atom_params(const atom_param *t);
101 void cuda_bind_vdw_types(const int *t);
102 
103 void cuda_bind_atoms(const atom *a);
104 
105 void cuda_bind_forces(float4 *f, float4 *f_slow);
106 
107 void cuda_bind_virials(float *v, int *queue, int *blockorder);
108 
109 void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc,
110  float cutoff2, float plcutoff2,
111  int cbegin, int ccount, int ctotal,
112  int doSlow, int doEnergy, int usePairlists, int savePairlists,
113  int doStreaming, int saveOrder, cudaStream_t &strm);
114 
115 //GBIS methods
116 void cuda_GBIS_P1(
117  int cbegin,
118  int ccount,
119  int pbegin,
120  int pcount,
121  float a_cut,
122  float rho_0,
123  float3 lata,
124  float3 latb,
125  float3 latc,
126  cudaStream_t &strm
127  );
128 void cuda_GBIS_P2(
129  int cbegin,
130  int ccount,
131  int pbegin,
132  int pcount,
133  float a_cut,
134  float r_cut,
135  float scaling,
136  float kappa,
137  float smoothDist,
138  float epsilon_p,
139  float epsilon_s,
140  float3 lata,
141  float3 latb,
142  float3 latc,
143  int doEnergy,
144  int doFullElec,
145  cudaStream_t &strm
146  );
147 void cuda_GBIS_P3(
148  int cbegin,
149  int ccount,
150  int pbegin,
151  int pcount,
152  float a_cut,
153  float rho_0,
154  float scaling,
155  float3 lata,
156  float3 latb,
157  float3 latc,
158  cudaStream_t &strm
159  );
160 
161 void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH);
163 void cuda_bind_GBIS_psiSum(GBReal *psiSumH);
164 void cuda_bind_GBIS_bornRad(float *bornRadH);
165 void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH);
167 
168 //end GBIS methods
169 
171 
172 #endif // NAMD_CUDA
173 
void cuda_bind_force_table(const float4 *t, const float4 *et)
void cuda_bind_forces(float4 *f, float4 *f_slow)
int cuda_stream_finished()
void cuda_bind_exclusions(const unsigned int *t, int n)
void cuda_bind_atoms(const atom *a)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
static __thread float * bornRadH
static __thread float * dHdrPrefixH
static __thread int plist_size
void cuda_bind_GBIS_energy(float *e)
void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float plcutoff2
void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc, float cutoff2, float plcutoff2, int cbegin, int ccount, int ctotal, int doSlow, int doEnergy, int usePairlists, int savePairlists, int doStreaming, int saveOrder, cudaStream_t &strm)
static __thread float * intRadSH
void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs, int npatches, int natoms, int plist_len, int nexclmask)
void cuda_bind_vdw_types(const int *t)
void cuda_bind_lj_table(const float2 *t, int _lj_table_size)
unsigned int excl[32]
void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH)
void cuda_bind_GBIS_bornRad(float *bornRadH)
void cuda_bind_virials(float *v, int *queue, int *blockorder)
void cuda_bind_GBIS_psiSum(GBReal *psiSumH)
void cuda_GBIS_P3(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float scaling, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
void cuda_GBIS_P1(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
void cuda_init()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH)
static __thread float * energy_gbis
struct __align__(16) patch_pair
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
void cuda_GBIS_P2(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float r_cut, float scaling, float kappa, float smoothDist, float epsilon_p, float epsilon_s, float3 lata, float3 latb, float3 latc, int doEnergy, int doFullElec, cudaStream_t &strm)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
void cuda_bind_atom_params(const atom_param *t)
static __thread float * intRad0H
float GBReal
Definition: ComputeGBIS.inl:17