NAMD
CudaComputeNonbonded.h
Go to the documentation of this file.
1 #ifndef CUDACOMPUTENONBONDED_H
2 #define CUDACOMPUTENONBONDED_H
3 
4 #ifdef NAMD_CUDA
5 #include <cuda.h>
6 #endif
7 #ifdef NAMD_HIP
8 #include <hip/hip_runtime.h>
9 #endif
10 
11 #include <vector>
12 #include "Compute.h"
13 #include "Box.h"
14 #include "PatchTypes.h"
15 #include "CudaUtils.h"
16 #include "ComputeNonbondedUtil.h"
17 #include "CudaNonbondedTables.h"
18 #include "CudaTileListKernel.h"
19 #include "CudaTileListKernel.hip.h"
22 #include "CudaComputeGBISKernel.h"
23 #include "ComputeMgr.h"
24 #include "HipDefines.h"
25 
26 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
27 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
28 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
29 #define MAX_EXCLUSIONS (1<<27)
30 
32 public:
33  struct ComputeRecord {
36  // Index to patches[] -array
37  int patchInd[2];
39  };
40 
41  struct PatchRecord {
43  patch = NULL;
44  compAtom = NULL;
45  results = NULL;
46  positionBox = NULL;
47  forceBox = NULL;
48  intRadBox = NULL;
49  psiSumBox = NULL;
50  bornRadBox = NULL;
51  dEdaSumBox = NULL;
52  dHdrPrefixBox = NULL;
53  }
56  int numAtoms;
58  int atomStart;
59  // Pe where the patch was registered
60  int pe;
61  // For priority sorting
64  bool isSameNode;
65  // Storage for open positionBox
67  // Storage for open forceBox
69  // Boxes
72  Box<Patch,Real> *intRadBox; //5 GBIS Boxes
77  Real *intRad; //5 GBIS arrays
82  bool operator < (const PatchRecord& pr) const {
83  return (patchID < pr.patchID);
84  }
85  bool operator == (const PatchRecord& pr) const {
86  return (patchID == pr.patchID);
87  }
88  };
89 
90 private:
91  SimParameters *params; // convenience
92  // This variable is set in atomUpdate() by any Pe
93  bool atomsChangedIn;
94  // This variable is set in doWork() by masterPe
95  bool atomsChanged;
96  int npairlists;
97 
98  bool computesChanged;
99 
100  const int deviceID;
101  size_t maxShmemPerBlock;
102  cudaStream_t stream;
103 
104  // PME and VdW CUDA kernels
105  CudaComputeNonbondedKernel nonbondedKernel;
106 
107  // GBIS kernel
108  CudaComputeGBISKernel GBISKernel;
109 
110  // Tile list CUDA kernels
111  CudaTileListKernel tileListKernel;
112 
113  // Exclusions
114  int2 *exclusionsByAtom;
115 
116  // VdW-types
117  // Pinned host memory
118  int* vdwTypes;
119  size_t vdwTypesSize;
120 
121  // Maximum number of tiles per tile list
122  int maxTileListLen;
123 
124  // Pinned host memory
125  int2* exclIndexMaxDiff;
126  size_t exclIndexMaxDiffSize;
127 
128  // Pinned host memory
129  int* atomIndex;
130  size_t atomIndexSize;
131 
132  // Required (xyzq, vdwTypes) storage
133  int atomStorageSize;
134 
135  // Atom and charge storage
136  // Pinned host memory
137  CudaAtom* atoms;
138  size_t atomsSize;
139 
140  char *part;
141  size_t partSize;
142 
143  // Force storage
144  float4* h_forces;
145  size_t h_forcesSize;
146  float4* h_forcesSlow;
147  size_t h_forcesSlowSize;
148 
149  float4* d_forces;
150  size_t d_forcesSize;
151  float4* d_forcesSlow;
152  size_t d_forcesSlowSize;
153 
154  // Virial and energy storage
155  VirialEnergy* h_virialEnergy;
156  VirialEnergy* d_virialEnergy;
157 
158  // GBIS storage
159  //--------------
160  // Pinned host memory
161  float* intRad0H;
162  size_t intRad0HSize;
163  // Pinned host memory
164  float* intRadSH;
165  size_t intRadSHSize;
166  // Mapped host memory
167  GBReal* psiSumH;
168  size_t psiSumHSize;
169  // Pinned host memory
170  float* bornRadH;
171  size_t bornRadHSize;
172  // Mapped host memory
173  GBReal* dEdaSumH;
174  size_t dEdaSumHSize;
175  // Pinned host memory
176  float* dHdrPrefixH;
177  size_t dHdrPrefixHSize;
178 
179  // Event and sanity check flag for making sure event was actually recorded
180  cudaEvent_t forceDoneEvent;
181  bool forceDoneEventRecord;
182  // Check counter for event polling
183  int checkCount;
184 
185  // Node lock
186  CmiNodeLock lock;
187  // List of local PEs that have patches
188  std::vector<int> pes;
189  // List of patch indices on each rank
190  std::vector< std::vector<int> > rankPatches;
191  // Master Pe = Pe where this Compute and reduction lives
192  int masterPe;
193 
194  // Are we in skip?
195  bool doSkip;
196 
197  // Device-wide patch and compute records, and the list of patches
198  std::vector<ComputeRecord> computes;
199  std::vector<PatchRecord> patches;
200 
201  // CUDA versions of patches
202  // Pinned host memory
203  CudaPatchRecord* cudaPatches;
204 
205  SubmitReduction *reduction;
206  NodeReduction *nodeReduction;
207 
208  // Pair lists
209  int pairlistsValid;
210  float pairlistTolerance;
211  int usePairlists;
212  int savePairlists;
213  float plcutoff2;
214 
215  bool reSortDone;
216 
217  // Flags
218  bool doSlow;
219  bool doEnergy;
220  bool doVirial;
221  bool doAlch;
222  bool doMinimize;
223 
224  AlchData alchFlags;
225  bool lambdaWindowUpdated;
226  // Walltime for force compute start
227  double beforeForceCompute;
228 
229  static inline void updateVdwTypesExclLoop(int first, int last, void *result, int paraNum, void *param);
230  void updateVdwTypesExclSubset(int first, int last);
231 
232  static inline void copyAtomsLoop(int first, int last, void *result, int paraNum, void *param);
233  void copyAtomsSubset(int first, int last);
234 
235  void addPatch(PatchID pid);
236  void addCompute(ComputeID cid, PatchID pid1, PatchID pid2, Vector offset);
237  void updatePatches();
238  int calcNumTileLists();
239  void getMaxMovementTolerance(float& maxAtomMovement, float& maxPatchTolerance);
240  void updateVdwTypesExcl();
241  void buildNeighborlist();
242  void skip();
243  void doGBISphase1();
244  void doGBISphase2();
245  void doGBISphase3();
246  void doForce();
247  void finishSetOfPatchesOnPe(std::vector<int>& patchSet);
248  void finishGBISPhase(int i);
249  void finishTimers();
250  void forceDone();
251  static void forceDoneCheck(void *arg, double walltime);
252  void forceDoneSetCallback();
253  void updateComputes();
254  void buildExclusions();
255  void skipPatch(int i);
256  void openBox(int i);
257  void reallocateArrays();
258 #ifdef NODEGROUP_FORCE_REGISTER
259  void updatePatchRecord();
260 #endif
261  void copyGBISphase(int i);
262  void updatePatch(int i);
263  int findPid(PatchID pid);
264  void assignPatch(int i);
265  ComputeMgr* computeMgr;
266  int patchesCounter;
267 
268  const bool doStreaming;
269  int* patchReadyQueue;
270  int patchReadyQueueNext, patchReadyQueueLen;
271 
272  void finishPatch(int i);
273  void unregisterBox(int i);
274 
275  // void writeId(const char* filename);
276  // void writeXYZ(const char* filename);
277 
278 public:
279  CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
282  void registerComputePair(ComputeID cid, PatchID* pid, int* trans);
283  void assignPatches(ComputeMgr* computeMgrIn);
284  virtual void initialize();
285  virtual void atomUpdate();
286  virtual int noWork();
287  virtual void doWork();
288  void launchWork();
289  void finishReductions();
290  void unregisterBoxesOnPe();
291  void assignPatchesOnPe();
292  void openBoxesOnPe();
293  void skipPatchesOnPe();
294  void finishPatchesOnPe();
295  void finishPatchOnPe(int i);
296  void finishPatches();
297  void messageEnqueueWork();
298  virtual void patchReady(PatchID, int doneMigration, int seq);
299  virtual void gbisP2PatchReady(PatchID, int seq);
300  virtual void gbisP3PatchReady(PatchID, int seq);
301  void reSortTileLists();
302 
303  void updatePatchOrder(const std::vector<CudaLocalRecord>& data);
304  std::vector<PatchRecord>& getPatches() { return patches; }
305 
306  // Utility function to compute nonbonded parameters, used by ComputeBondedCUDAKernel as well
308  // Utility function to determine if force table will be used, used by ComputeBondedCUDAKernel as well
309  static bool getDoTable(SimParameters* params, const bool doSlow, const bool doVirial);
310 };
311 
312 #endif // NAMD_CUDA
313 #endif // CUDACOMPUTENONBONDED_H
bool operator==(const PatchRecord &pr) const
Alchemical datastructure that holds the lambda-relevant paramenters for FEP/TI.
bool operator<(const PatchRecord &pr) const
int32 ComputeID
Definition: NamdTypes.h:278
Definition: Vector.h:72
virtual void gbisP2PatchReady(PatchID, int seq)
float Real
Definition: common.h:118
std::vector< PatchRecord > & getPatches()
Definition: Patch.h:35
static CudaNBConstants getNonbondedCoef(SimParameters *params)
virtual void gbisP3PatchReady(PatchID, int seq)
CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
void registerComputeSelf(ComputeID cid, PatchID pid)
virtual void patchReady(PatchID, int doneMigration, int seq)
Box< Patch, CompAtom > * positionBox
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
void assignPatches(ComputeMgr *computeMgrIn)
int32 PatchID
Definition: NamdTypes.h:277
const ComputeID cid
Definition: Compute.h:43
static bool getDoTable(SimParameters *params, const bool doSlow, const bool doVirial)
float GBReal
Definition: ComputeGBIS.inl:17
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)