NAMD
ComputeBondedCUDA.h
Go to the documentation of this file.
1 #ifndef COMPUTEBONDEDCUDA_H
2 #define COMPUTEBONDEDCUDA_H
3 #include "Compute.h"
4 #include "ComputeMap.h"
5 #include "CudaNonbondedTables.h"
7 #include "ComputeHomeTuples.h"
8 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
9 
10 #ifdef BONDED_CUDA
11 
12 #include <vector>
13 #include <array>
14 
15 class ComputeBondedCUDA : public Compute {
16 
17 public:
18 
19  static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
20 
21 private:
22  bool initializeCalled;
23 
24  // Device ID and stream
25  const int deviceID;
26  cudaStream_t stream;
27 
28  // Master PE for this compute
29  const int masterPe;
30 
31  // List of all patch IDs on this object
32  std::vector<int> allPatchIDs;
33 
34  // List of tuple patches for the entire compute (i.e. across all PEs)
35  TuplePatchList tuplePatchList;
36 
37  // For every PE, list of patches that it has registered
38  std::vector< std::vector<int> > patchIDsPerRank;
39 
40  // List of PEs involved in the computation
41  std::vector<int> pes;
42 
43  // Self compute
44  struct SelfCompute {
45  int type;
46  std::vector<int> patchIDs;
47  Tuples* tuples;
48  SelfCompute(int type=-1) : type(type), tuples(NULL) {}
49  int operator==(const SelfCompute &elem) const {
50  return (elem.type == type);
51  }
52  };
53 
54  // Home compute, each PE has one
55  struct HomeCompute {
56  std::vector<char> isBasePatch;
57  std::vector<int> patchIDs;
58  // Multiple tuples per PE, each of different kind
59  std::vector< Tuples* > tuples;
60  };
61 
62  // Computes for each PE
63  struct ComputeRecord {
64  HomeCompute homeCompute;
65  // Self computes, organized by type
66  std::vector< SelfCompute > selfComputes;
67  };
68 
69  // Collection of all computes for each PE
70  std::vector< ComputeRecord > computes;
71 
72  // For every tuple type, list of tuples
73  // NOTE: These are pointers to the data recorded in "computes" and
74  // are here to make it easier to traverse across all tuples of certain kind
75  std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
76 
77  int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
78 
79  AtomMap atomMap;
80  std::vector< AtomMapper* > atomMappers;
81 
82  struct PatchRecord {
83  int atomStart;
84  int numAtoms;
85  };
86  std::vector<PatchRecord> patches;
87 
88  // Patch "patchID" is found in patches[patchIndex[patchID]]
89  std::vector<int> patchIndex;
90 
91  // Maps multiplicit indices
92  std::vector<int> dihedralMultMap;
93  std::vector<int> improperMultMap;
94 
95  // Number of exclusions per rank, separated into modified and non-modified
96  struct NumExcl {
97  int numModifiedExclusions;
98  int numExclusions;
99  };
100  std::vector<NumExcl> numExclPerRank;
101 
102  // Flags that indicate wether this GPU has exclusions and modified exclusions
103  bool hasExclusions;
104  bool hasModifiedExclusions;
105 
106  // All tuple data
107  char* tupleData;
108  int tupleDataSize;
109 
110  // Bonded CUDA kernel
111  ComputeBondedCUDAKernel bondedKernel;
112 
113  // Pointer to computeMgr that created this object
115 
116  // Node-wide counter for patches.
117  int patchesCounter;
118 
119  // "Force done event" for event polling
120  cudaEvent_t forceDoneEvent;
121 
122  // Check counter for event polling
123  int checkCount;
124 
125  // Node lock
126  CmiNodeLock lock;
127 
128  // This variable is set in atomUpdate() by any Pe
129  bool atomsChangedIn;
130  // This variable is set in doWork() by masterPe
131  bool atomsChanged;
132 
133  // Reduction
134  SubmitReduction *reduction;
135 
136  // Required storage
137  int atomStorageSize;
138 
139  // Flags pointer
140  Flags* flags;
141 
142  // Lattice and energy and virial booleans
143  Lattice lattice;
144  bool doEnergy;
145  bool doVirial;
146  bool doSlow;
147  bool doMolly;
148 
149  // Walltime for force compute start
150  double beforeForceCompute;
151 
152  bool accelMDdoDihe;
153 
154  // Atom storage in pinned host memory
155  CudaAtom* atoms;
156  int atomsSize;
157 
158  // Force storage in pinned host memory
160  int forcesSize;
161 
162  double* energies_virials;
163 
164  void mapAtoms();
165  void unmapAtoms();
166 
167  void updatePatches();
168 
169  static void forceDoneCheck(void *arg, double walltime);
170  void forceDoneSetCallback();
171 
172  void finishPatches();
173 
174  // ------------ For copyTupleData -------------------
175  struct TupleCopyWork {
176  int tupletype;
177  int ntuples;
178  void* tupleElemList;
179  int tupleDataPos;
180  };
181 
182  std::vector<TupleCopyWork> tupleCopyWorkList;
183 
184  int exclusionStartPos;
185  int exclusionStartPos2;
186 
187  void copyBondData(const int ntuples, const BondElem* __restrict__ src,
188  const BondValue* __restrict__ bond_array, CudaBond* __restrict__ dst);
189 
190  void copyAngleData(const int ntuples, const AngleElem* __restrict__ src,
191  const AngleValue* __restrict__ angle_array, CudaAngle* __restrict__ dst);
192 
193  template <bool doDihedral, typename T, typename P>
194  void copyDihedralData(const int ntuples, const T* __restrict__ src,
195  const P* __restrict__ p_array, CudaDihedral* __restrict__ dst);
196 
197  void copyExclusionData(const int ntuples, const ExclElem* __restrict__ src, const int typeSize,
198  CudaExclusion* __restrict__ dst1, CudaExclusion* __restrict__ dst2, int& pos, int& pos2);
199 
200  void copyCrosstermData(const int ntuples, const CrosstermElem* __restrict__ src,
201  const CrosstermValue* __restrict__ crossterm_array, CudaCrossterm* __restrict__ dst);
202 
203  static void tupleCopyWorker(int first, int last, void *result, int paraNum, void *param);
204  void tupleCopyWorker(int first, int last);
205  // --------------------------------------------------
206 
207 public:
208 
209  ComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr, int deviceID, CudaNonbondedTables& cudaNonbondedTables);
210  ~ComputeBondedCUDA();
211  void registerCompute(int pe, int type, PatchIDList& pids);
212  void registerSelfCompute(int pe, int type, int pid);
213  void unregisterBoxesOnPe();
214  void assignPatchesOnPe();
215  virtual void patchReady(PatchID, int doneMigration, int seq);
216  virtual void initialize();
217  virtual void atomUpdate();
218  virtual int noWork();
219  virtual void doWork();
220  void messageEnqueueWork();
221  // void updatePatches();
222  void openBoxesOnPe();
223  void loadTuplesOnPe();
224  void copyTupleData();
225  void launchWork();
226 
227  void finishPatchesOnPe();
228  void finishReductions();
229 
230 };
231 
232 #endif // BONDED_CUDA
233 #endif // NAMD_CUDA
234 #endif // COMPUTEBONDEDCUDA_H
int ComputeID
Definition: NamdTypes.h:183
static __thread ComputeMgr * computeMgr
static __thread atom * atoms
static __thread float4 * forces
virtual void initialize()
Definition: Compute.h:56
__thread cudaStream_t stream
virtual void doWork()
Definition: Compute.C:108
#define FORCE_TYPE
int PatchID
Definition: NamdTypes.h:182
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
Definition: CompressPsf.C:146
virtual void atomUpdate()
Definition: Compute.h:59
virtual void patchReady(PatchID, int doneMigration, int seq)
Definition: Compute.C:63
virtual int noWork()
Definition: Compute.C:104