NAMD
CudaComputeNonbondedKernel.hip.h
Go to the documentation of this file.
1 #ifndef CUDACOMPUTENONBONDEDKERNEL_HIP_H
2 #define CUDACOMPUTENONBONDEDKERNEL_HIP_H
3 #include "CudaUtils.h"
4 #include "CudaRecord.h"
6 #include "CudaNonbondedTables.h"
7 #include "HipDefines.h"
8 #if defined(NAMD_HIP)
9 
10 
19 struct AlchData{
20  float scaling;
21  float switchdist2;
22  float cutoff2;
23  float switchfactor;
24  float alchVdwShiftCoeff;
25  float alchLambda;
26  float lambdaUp;
27  float lambdaDown;
28  float elecLambdaUp;
29  float elecLambdaDown;
30  float vdwLambdaUp;
31  float vdwLambdaDown;
32 
33  float lambda2Up;
34  float lambda2Down;
35  float elecLambda2Up;
36  float elecLambda2Down;
37  float vdwLambda2Up;
38  float vdwLambda2Down;
39 
40  float vdwShiftUp;
41  float vdwShift2Up;
42  float vdwShiftDown;
43  float vdwShift2Down;
44  bool alchDecouple;
45 };
46 
48 private:
49 
50  const int deviceID;
51  CudaNonbondedTables& cudaNonbondedTables;
52  const bool doStreaming;
53 
54  // Exclusions
55  // JM: Seems like nvcc makes this point to NULL by default but clang doesn't
56  // we rely on it being set to NULL so we can initialize it on setExclusiveByAtom() laters
57  int2 *d_exclusionsByAtom = NULL;
58  unsigned int* overflowExclusions;
59  size_t overflowExclusionsSize;
60 
61  int2* exclIndexMaxDiff;
62  size_t exclIndexMaxDiffSize;
63 
64  // Atom indices
65  int* atomIndex;
66  size_t atomIndexSize;
67 
68  // VdW types
69  int* vdwTypes;
70  size_t vdwTypesSize;
71 
72  unsigned int* patchNumCount;
73  size_t patchNumCountSize;
74 
75  int* patchReadyQueue;
76  size_t patchReadyQueueSize;
77 
78  float *force_x, *force_y, *force_z, *force_w;
79  size_t forceSize;
80  float *forceSlow_x, *forceSlow_y, *forceSlow_z, *forceSlow_w;
81  size_t forceSlowSize;
82 
83  // Drude/NbThole
84  int *isDrude;
85  size_t isDrudeSize;
86  float* drudeAtomAlpha;
87  size_t drudeAtomAlphaSize;
88 public:
89  CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
91 
92  static __device__ __host__ __forceinline__ int
93  computeNumTiles(const int numAtoms, const int tilesize = BOUNDINGBOXSIZE) {
94  return (numAtoms+tilesize-1)/tilesize;
95  }
96 
97  static __device__ __host__ __forceinline__ int
98  computeAtomPad(const int numAtoms, const int tilesize = BOUNDINGBOXSIZE) {
99  return computeNumTiles(numAtoms, tilesize)*tilesize;
100  }
101 
102  void updateVdwTypesExcl(const int atomStorageSize, const int* h_vdwTypes,
103  const int2* h_exclIndexMaxDiff, const int* h_atomIndex, cudaStream_t stream);
104 
106  const int numPatches, const int atomStorageSize, const bool alchOn,
107  CudaLocalRecord* localRecords,
108  const int* d_vdwTypes, const int* d_id, const int* d_sortOrder,
109  const int* d_partition, cudaStream_t stream);
110 
111  void nonbondedForce(CudaTileListKernel& tlKernel,
112  const int atomStorageSize, const bool atomsChanged,
113  const bool doMinimize, const bool doPairlist,
114  const bool doEnergy, const bool doVirial, const bool doSlow, const bool doAlch,
115  const bool doAlchVdwForceSwitching,
116  const bool doFEP, const bool doTI,
117  const bool doNbThole, const bool doTable,
118  const float3 lata, const float3 latb, const float3 latc,
119  const float4* h_xyzq, const float cutoff2,
120  const CudaNBConstants nbConstants,
121  float4* d_forces, float4* d_forcesSlow,
122  float4* h_forces, float4* h_forcesSlow, AlchData *srcFlags,
123  bool lambdaWindowUpdated, char *part,
124  bool CUDASOAintegratorOn, bool useDeviceMigration,
125  const float drudeNbtholeCut2,
126  cudaStream_t stream);
127 
128  void reduceVirialEnergy(CudaTileListKernel& tlKernel,
129  const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS,
130  float4* d_forces, float4* d_forcesSlow,
131  VirialEnergy* d_virialEnergy, cudaStream_t stream);
132 
133  void getVirialEnergy(VirialEnergy* h_virialEnergy, cudaStream_t stream);
134 
135  void bindExclusions(int numExclusions, unsigned int* exclusion_bits);
136 
137  int* getPatchReadyQueue();
138 
139  void reallocate_forceSOA(int atomStorageSize);
140 
141  void setExclusionsByAtom(int2* h_data, const int num_atoms);
142 
143  void updateDrudeData(const int atomStorageSize, const float* h_drudeAtomAlpha, const int* h_isDrude, cudaStream_t stream);
144 };
145 
146 #endif // NAMD_HIP
147 #endif // CUDACOMPUTENONBONDEDKERNEL_H
Alchemical datastructure that holds the lambda-relevant paramenters for FEP/TI.
CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
void updateVdwTypesExcl(const int atomStorageSize, const int *h_vdwTypes, const int2 *h_exclIndexMaxDiff, const int *h_atomIndex, cudaStream_t stream)
void updateDrudeData(const int atomStorageSize, const float *h_drudeAtomAlpha, const int *h_isDrude, cudaStream_t stream)
void reallocate_forceSOA(int atomStorageSize)
void bindExclusions(int numExclusions, unsigned int *exclusion_bits)
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
#define BOUNDINGBOXSIZE
Definition: CudaUtils.h:18
void reduceVirialEnergy(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS, float4 *d_forces, float4 *d_forcesSlow, VirialEnergy *d_virialEnergy, cudaStream_t stream)
void nonbondedForce(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool atomsChanged, const bool doMinimize, const bool doPairlist, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doAlch, const bool doAlchVdwForceSwitching, const bool doFEP, const bool doTI, const bool doNbThole, const bool doTable, const float3 lata, const float3 latb, const float3 latc, const float4 *h_xyzq, const float cutoff2, const CudaNBConstants nbConstants, float4 *d_forces, float4 *d_forcesSlow, float4 *h_forces, float4 *h_forcesSlow, AlchData *fepFlags, bool lambdaWindowUpdated, char *part, bool CUDASOAintegratorOn, bool useDeviceMigration, const float drudeNbtholeCut2, cudaStream_t stream)
void setExclusionsByAtom(int2 *h_data, const int num_atoms)
void updateVdwTypesExclOnGPU(CudaTileListKernel &tlKernel, const int numPatches, const int atomStorageSize, const bool alchOn, CudaLocalRecord *localRecords, const int *d_vdwTypes, const int *d_id, const int *d_sortOrder, const int *d_partition, cudaStream_t stream)
void getVirialEnergy(VirialEnergy *h_virialEnergy, cudaStream_t stream)
static __device__ __host__ __forceinline__ int computeNumTiles(const int numAtoms, const int tilesize=WARPSIZE)