NAMD
CudaComputeNonbonded.h
Go to the documentation of this file.
1 #ifndef CUDACOMPUTENONBONDED_H
2 #define CUDACOMPUTENONBONDED_H
3 #include <vector>
4 #include "Compute.h"
5 #include "Box.h"
6 #include "PatchTypes.h"
7 #include "CudaUtils.h"
8 #include "ComputeNonbondedUtil.h"
9 #include "CudaNonbondedTables.h"
10 #include "CudaTileListKernel.h"
12 #include "CudaComputeGBISKernel.h"
13 #include "ComputeMgr.h"
14 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
15 #ifdef NAMD_CUDA
16 #include <cuda.h>
17 #endif
18 #ifdef NAMD_HIP
19 #include <hip/hip_runtime.h>
20 #include "HipDefines.h"
21 #endif
22 
23 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
24 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
25 #define MAX_EXCLUSIONS (1<<27)
26 
28 public:
29  struct ComputeRecord {
32  // Index to patches[] -array
33  int patchInd[2];
35  };
36 
37  struct PatchRecord {
38  PatchRecord(PatchID patchID) : patchID(patchID) {
39  patch = NULL;
40  compAtom = NULL;
41  results = NULL;
42  positionBox = NULL;
43  forceBox = NULL;
44  intRadBox = NULL;
45  psiSumBox = NULL;
46  bornRadBox = NULL;
47  dEdaSumBox = NULL;
48  dHdrPrefixBox = NULL;
49  }
52  int numAtoms;
54  int atomStart;
55  // Pe where the patch was registered
56  int pe;
57  // For priority sorting
60  bool isSameNode;
61  // Storage for open positionBox
63  // Storage for open forceBox
65  // Boxes
68  Box<Patch,Real> *intRadBox; //5 GBIS Boxes
73  Real *intRad; //5 GBIS arrays
78  bool operator < (const PatchRecord& pr) const {
79  return (patchID < pr.patchID);
80  }
81  bool operator == (const PatchRecord& pr) const {
82  return (patchID == pr.patchID);
83  }
84  };
85 
86 private:
87  // This variable is set in atomUpdate() by any Pe
88  bool atomsChangedIn;
89  // This variable is set in doWork() by masterPe
90  bool atomsChanged;
91 
92  bool computesChanged;
93 
94  const int deviceID;
95  size_t maxShmemPerBlock;
96  cudaStream_t stream;
97 
98  // PME and VdW CUDA kernels
99  CudaComputeNonbondedKernel nonbondedKernel;
100 
101  // GBIS kernel
102  CudaComputeGBISKernel GBISKernel;
103 
104  // Tile list CUDA kernels
105  CudaTileListKernel tileListKernel;
106 
107  // Exclusions
108  int2 *exclusionsByAtom;
109 
110  // VdW-types
111  // Pinned host memory
112  int* vdwTypes;
113  int vdwTypesSize;
114 
115  // Maximum number of tiles per tile list
116  int maxTileListLen;
117 
118  // Pinned host memory
119  int2* exclIndexMaxDiff;
120  int exclIndexMaxDiffSize;
121 
122  // Pinned host memory
123  int* atomIndex;
124  int atomIndexSize;
125 
126  // Required (xyzq, vdwTypes) storage
127  int atomStorageSize;
128 
129  // Atom and charge storage
130  // Pinned host memory
131  CudaAtom* atoms;
132  int atomsSize;
133 
134  // Force storage
135  float4* h_forces;
136  int h_forcesSize;
137  float4* h_forcesSlow;
138  int h_forcesSlowSize;
139 
140  float4* d_forces;
141  int d_forcesSize;
142  float4* d_forcesSlow;
143  int d_forcesSlowSize;
144 
145  // Virial and energy storage
146  VirialEnergy* h_virialEnergy;
147  VirialEnergy* d_virialEnergy;
148 
149  // GBIS storage
150  //--------------
151  // Pinned host memory
152  float* intRad0H;
153  int intRad0HSize;
154  // Pinned host memory
155  float* intRadSH;
156  int intRadSHSize;
157  // Mapped host memory
158  GBReal* psiSumH;
159  int psiSumHSize;
160  // Pinned host memory
161  float* bornRadH;
162  int bornRadHSize;
163  // Mapped host memory
164  GBReal* dEdaSumH;
165  int dEdaSumHSize;
166  // Pinned host memory
167  float* dHdrPrefixH;
168  int dHdrPrefixHSize;
169 
170  // Event and sanity check flag for making sure event was actually recorded
171  cudaEvent_t forceDoneEvent;
172  bool forceDoneEventRecord;
173  // Check counter for event polling
174  int checkCount;
175 
176  // Node lock
177  CmiNodeLock lock;
178  // List of local PEs that have patches
179  std::vector<int> pes;
180  // List of patch indices on each rank
181  std::vector< std::vector<int> > rankPatches;
182  // Master Pe = Pe where this Compute and reduction lives
183  int masterPe;
184 
185  // Are we in skip?
186  bool doSkip;
187 
188  // Device-wide patch and compute records, and the list of patches
189  std::vector<ComputeRecord> computes;
190  std::vector<PatchRecord> patches;
191 
192  // CUDA versions of patches
193  // Pinned host memory
194  CudaPatchRecord* cudaPatches;
195 
196  SubmitReduction *reduction;
197 
198  // Pair lists
199  int pairlistsValid;
200  float pairlistTolerance;
201  int usePairlists;
202  int savePairlists;
203  float plcutoff2;
204 
205  bool reSortDone;
206 
207  // Flags
208  bool doSlow;
209  bool doEnergy;
210  bool doVirial;
211 
212  // Walltime for force compute start
213  double beforeForceCompute;
214 
215  static inline void updateVdwTypesExclLoop(int first, int last, void *result, int paraNum, void *param);
216  void updateVdwTypesExclSubset(int first, int last);
217 
218  static inline void copyAtomsLoop(int first, int last, void *result, int paraNum, void *param);
219  void copyAtomsSubset(int first, int last);
220 
221  void addPatch(PatchID pid);
222  void addCompute(ComputeID cid, PatchID pid1, PatchID pid2, Vector offset);
223  void updatePatches();
224  int calcNumTileLists();
225  void getMaxMovementTolerance(float& maxAtomMovement, float& maxPatchTolerance);
226  void updateVdwTypesExcl();
227  void buildNeighborlist();
228  void skip();
229  void doGBISphase1();
230  void doGBISphase2();
231  void doGBISphase3();
232  void doForce();
233  void finishSetOfPatchesOnPe(std::vector<int>& patchSet);
234  void finishPatches();
235  void finishGBISPhase(int i);
236  void finishTimers();
237  void reSortTileLists();
238  void forceDone();
239  static void forceDoneCheck(void *arg, double walltime);
240  void forceDoneSetCallback();
241  void updateComputes();
242  void buildExclusions();
243  void skipPatch(int i);
244  void openBox(int i);
245  void reallocateArrays();
246  void copyGBISphase(int i);
247  void updatePatch(int i);
248  int findPid(PatchID pid);
249  void assignPatch(int i);
250  ComputeMgr* computeMgr;
251  int patchesCounter;
252 
253  const bool doStreaming;
254  int* patchReadyQueue;
255  int patchReadyQueueNext, patchReadyQueueLen;
256 
257  void finishPatch(int i);
258  void unregisterBox(int i);
259 
260  // void writeId(const char* filename);
261  // void writeXYZ(const char* filename);
262 
263 public:
264  CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
267  void registerComputePair(ComputeID cid, PatchID* pid, int* trans);
268  void assignPatches(ComputeMgr* computeMgrIn);
269  virtual void initialize();
270  virtual void atomUpdate();
271  virtual int noWork();
272  virtual void doWork();
273  void launchWork();
274  void finishReductions();
275  void unregisterBoxesOnPe();
276  void assignPatchesOnPe();
277  void openBoxesOnPe();
278  void skipPatchesOnPe();
279  void finishPatchesOnPe();
280  void finishPatchOnPe(int i);
281  void messageEnqueueWork();
282  virtual void patchReady(PatchID, int doneMigration, int seq);
283  virtual void gbisP2PatchReady(PatchID, int seq);
284  virtual void gbisP3PatchReady(PatchID, int seq);
285 };
286 
287 #endif // NAMD_CUDA
288 #endif // CUDACOMPUTENONBONDED_H
int ComputeID
Definition: NamdTypes.h:183
Definition: Vector.h:64
virtual void gbisP2PatchReady(PatchID, int seq)
float Real
Definition: common.h:109
Definition: Patch.h:35
virtual void gbisP3PatchReady(PatchID, int seq)
CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
int PatchID
Definition: NamdTypes.h:182
void registerComputeSelf(ComputeID cid, PatchID pid)
bool operator<(const PatchRecord &pr) const
virtual void patchReady(PatchID, int doneMigration, int seq)
Box< Patch, CompAtom > * positionBox
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
void assignPatches(ComputeMgr *computeMgrIn)
const ComputeID cid
Definition: Compute.h:43
float GBReal
Definition: ComputeGBIS.inl:17
bool operator==(const PatchRecord &pr) const