NAMD
PatchData.h
Go to the documentation of this file.
1 #ifndef PATCHDATA_H
2 #define PATCHDATA_H
3 #if __cplusplus < 201103L
4 #undef USING_ATOMICS
5 #else
6 #define USING_ATOMICS
7 #endif
8 
9 #ifdef USING_ATOMICS
10 #include <atomic>
11 #endif
12 #include "ComputeCUDAMgr.h"
13 #include "PatchData.decl.h"
14 #include "NamdTypes.h"
15 #include "HomePatch.h"
16 #include "Sequencer.h"
17 #include "ResizeArray.h"
18 #include "CudaUtils.h"
19 #include "CudaTileListKernel.h" // CudaPatchRecord
20 #include "CudaTileListKernel.hip.h"
21 #include "CudaRecord.h"
22 #include "ReductionMgr.h"
23 #include "Controller.h"
24 #include "TupleTypesCUDA.h"
25 #include "ComputeMgr.h"
26 
27 #ifdef NAMD_CUDA
28 #include <cuda.h>
29 #endif
30 #ifdef NAMD_HIP
31 #include <hip/hip_runtime.h>
32 #endif
33 
34 #include "HipDefines.h"
35 
36 #ifdef NODEGROUP_FORCE_REGISTER
37 #ifdef NAMD_NCCL_ALLREDUCE
38 #ifdef NAMD_CUDA
39 #include "nccl.h"
40 #endif
41 #ifdef NAMD_HIP
42 #include "rccl.h"
43 #endif
44 #endif
45 
46 
47 class CollectionMaster;
48 class Output;
49 class IMDOutput;
50 
51 // DeviceData holds the CUDAMgr pointers to force data
52 // These are particular to each device
53 struct DeviceData{
54  // NONBONDED STUFF
55  float4 *f_nbond;
56  float4 *f_nbond_slow;
57  int f_nbond_size;
58  CudaPatchRecord *nbond_precord;
59  CudaTileListKernel *nbond_tkernel;
60  cudaStream_t nbond_stream;
61  float4* nb_datoms; //The idea is to fill this after integration
62  size_t size_nb_datoms;
63  int nb_precord_size;
64 
65 
66  //BONDED STUFF
67  double *f_bond;
68  double *f_bond_nbond;
69  double *f_bond_slow;
70  int f_bond_size;
71  int bond_pr_size;
72  int bond_pi_size;
73  int forceStride;
74  PatchRecord *bond_pr;
75  int *bond_pi;
76  float4 *b_datoms;
77 
78  // SLOW STUFF
79  CudaForce *f_slow;
80  int f_slow_size;
81  int* slow_patchPositions;
82  int slow_patchPositionsSize;
83  int* slow_pencilPatchIndex;
84  int slow_pencilPatchIndexSize;
85  int* slow_patchID;
86  int slow_patchIDSize;
87  CudaAtom* s_datoms; // slow atoms that will be built for PME
88  int* s_datoms_partition;
89 
90  bool* h_hasPatches;
91  bool* d_hasPatches;
92 
93  int* d_globalToLocalID;
94  int* d_patchToDeviceMap;
95 
96  std::vector<HomePatch*> patches; // Pointers to HomePatches this device owns
97 
98  // Mapping data
99  std::vector<CudaLocalRecord> h_localPatches;
100  CudaLocalRecord* d_localPatches;
101 
102  std::vector<CudaPeerRecord> h_peerPatches;
103  CudaPeerRecord* d_peerPatches;
104 
105  int numAtomsHome;
106  int numPatchesHome;
107  int numPatchesHomeAndProxy;
108 
109  DeviceData();
110  ~DeviceData();
111 };
112 
113 #endif
114 
115 class PatchData : public CBase_PatchData {
116 
117  public:
118 
119  PatchData();
120  ~PatchData();
122 
123 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
124  // Pressure control
127  cudaTensor *d_strainRate_old; // for langevinPistonBarrier no
128  cudaTensor *d_positionRescaleFactor; // for langevinPistonBarrier no
129 #endif
130 
131  // I need "reduction-like" object here to reduce stuff on a single-node
132  // avoiding reduction objects will let me avoid calling submit() on every ULT
134  // backup reduction values for MC barostat
136 
137  // Single node flags are stored here and copied by patches
139 
140  // Controller pointer for outputting stuff
142  // Lattice from masterPe
146 
147  // Provide global (node level) access to the collection chare
152 
153  // Pointer to script
155 
156  // Flag for marking migrations
157  CmiNodeLock printlock;
158 #ifdef NODEGROUP_FORCE_REGISTER
159  // SOA register of all device SOA force arrays
160  // Bonded SOA pointers
161  double** h_soa_fb_x;
162  double** h_soa_fb_y;
163  double** h_soa_fb_z;
164 
165  // Nonbonded SOA pointers
166  double** h_soa_fn_x;
167  double** h_soa_fn_y;
168  double** h_soa_fn_z;
169 
170  // Slow SOA pointers
171  double** h_soa_fs_x;
172  double** h_soa_fs_y;
173  double** h_soa_fs_z;
174 
175  double** h_soa_pos_x;
176  double** h_soa_pos_y;
177  double** h_soa_pos_z;
178 
179  double** h_soa_vel_x;
180  double** h_soa_vel_y;
181  double** h_soa_vel_z;
182 
183  float** h_soa_charge;
184 
185  // Device Migration
186  int** h_soa_sortOrder;
187  int** h_soa_unsortOrder;
188  int** h_soa_id;
189  int** h_soa_vdwType;
190  double3** h_soa_patchCenter;
191  int4** h_soa_migrationDestination;
192  int** h_soa_sortSoluteIndex;
193 
194  FullAtom** h_atomdata_AoS;
195  CudaLocalRecord** h_peer_record;
196 
197  int** h_soa_partition;
198 
199  // we do need a device queue for registering work done
200  // work queue for each device
201  bool** h_devHasForces;
202  unsigned int** d_queues;
203  unsigned int* d_queueCounters;
204  std::vector<int> migrationFlagPerDevice; // Migration flag for each device
205  std::vector<int> tupleReallocationFlagPerDevice; // Migration flag for each device
206  std::vector<int> atomReallocationFlagPerDevice; // Migration flag for each device
207 
208  std::atomic<int> maxNumBonds;
209  std::atomic<int> maxNumAngles;
210  std::atomic<int> maxNumDihedrals;
211  std::atomic<int> maxNumImpropers;
212  std::atomic<int> maxNumModifiedExclusions;
213  std::atomic<int> maxNumExclusions;
214  std::atomic<int> maxNumCrossterms;
215  std::vector<int> devicePatchMapFlag; // Device Patch Map Creation flag per PE
216 #ifdef NAMD_NCCL_ALLREDUCE
217  ncclUniqueId ncclId;
218 #endif
219 
220  TupleDataStagePeer h_tupleDataStage;
221  TupleIntArraysPeer h_tupleCount;
222  TupleIntArraysPeer h_tupleOffset;
223 
229  ComputeBondedCUDA **cudaBondedList;
230  CudaComputeNonbonded **cudaNonbondedList;
231 
232  std::vector<DeviceData> devData;
233  ComputeMgr *master_mgr;
234  CmiNodeLock nodeLock;
235  std::vector<int> cbStore;
236  std::atomic<int> suspendCounter;
237 
238  std::atomic<int> updateCounter;
239 #endif
240 };
241 
242 #endif // PATCHDATA_H
Definition: PDB.h:36
CmiNodeLock printlock
Definition: PatchData.h:157
Definition: Vector.h:72
Controller * c_out
Definition: PatchData.h:141
IMDOutput * imd
Definition: PatchData.h:150
NodeReduction * reduction
Definition: PatchData.h:133
Lattice * lat
Definition: PatchData.h:143
cudaTensor * d_positionRescaleFactor
Definition: PatchData.h:128
Flags flags
Definition: PatchData.h:138
Vector * origin
Definition: PatchData.h:145
PatchData()
Definition: PatchData.C:49
Definition: Output.h:35
CollectionMaster * ptrCollectionMaster
Definition: PatchData.h:148
ScriptTcl * script
Definition: PatchData.h:154
cudaTensor * d_langevinPiston_strainRate
Definition: PatchData.h:125
cudaTensor * d_langevinPiston_origStrainRate
Definition: PatchData.h:126
PDB * pdb
Definition: PatchData.h:151
cudaTensor * d_strainRate_old
Definition: PatchData.h:127
Output * ptrOutput
Definition: PatchData.h:149
NodeReduction * nodeReductionSave
Definition: PatchData.h:135
void setDeviceKernelUpdateCounter()
Definition: PatchData.C:67
Definition: Tensor.h:15
Tensor * factor
Definition: PatchData.h:144
~PatchData()
Definition: PatchData.C:73