NAMD
PatchData.h
Go to the documentation of this file.
1 #ifndef PATCHDATA_H
2 #define PATCHDATA_H
3 #if __cplusplus < 201103L
4 #undef USING_ATOMICS
5 #else
6 #define USING_ATOMICS
7 #endif
8 
9 #ifdef USING_ATOMICS
10 #include <atomic>
11 #endif
12 #include "ComputeCUDAMgr.h"
13 #include "PatchData.decl.h"
14 #include "NamdTypes.h"
15 #include "HomePatch.h"
16 #include "Sequencer.h"
17 #include "ResizeArray.h"
18 #include "CudaUtils.h"
19 #include "CudaTileListKernel.h" // CudaPatchRecord
20 #include "CudaTileListKernel.hip.h"
21 #include "CudaRecord.h"
22 #include "ReductionMgr.h"
23 #include "BroadcastObject.h"
24 #include "Controller.h"
25 #include "TupleTypesCUDA.h"
26 #include "ComputeMgr.h"
27 
28 #ifdef NAMD_CUDA
29 #include <cuda.h>
30 #endif
31 #ifdef NAMD_HIP
32 #include <hip/hip_runtime.h>
33 #endif
34 
35 #include "HipDefines.h"
36 
37 #ifdef NODEGROUP_FORCE_REGISTER
38 #ifdef NAMD_NCCL_ALLREDUCE
39 #ifdef NAMD_CUDA
40 #include "nccl.h"
41 #endif
42 #ifdef NAMD_HIP
43 #include "rccl.h"
44 #endif
45 #endif
46 
47 
48 class CollectionMaster;
49 class Output;
50 class IMDOutput;
51 
52 // DeviceData holds the CUDAMgr pointers to force data
53 // These are particular to each device
54 struct DeviceData{
55  // NONBONDED STUFF
56  float4 *f_nbond;
57  float4 *f_nbond_slow;
58  int f_nbond_size;
59  CudaPatchRecord *nbond_precord;
60  CudaTileListKernel *nbond_tkernel;
61  cudaStream_t nbond_stream;
62  float4* nb_datoms; //The idea is to fill this after integration
63  size_t size_nb_datoms;
64  int nb_precord_size;
65 
66 
67  //BONDED STUFF
68  double *f_bond;
69  double *f_bond_nbond;
70  double *f_bond_slow;
71  int f_bond_size;
72  int bond_pr_size;
73  int bond_pi_size;
74  int forceStride;
75  PatchRecord *bond_pr;
76  int *bond_pi;
77  float4 *b_datoms;
78 
79  // SLOW STUFF
80  CudaForce *f_slow;
81  int f_slow_size;
82  int* slow_patchPositions;
83  int slow_patchPositionsSize;
84  int* slow_pencilPatchIndex;
85  int slow_pencilPatchIndexSize;
86  int* slow_patchID;
87  int slow_patchIDSize;
88  CudaAtom* s_datoms; // slow atoms that will be built for PME
89  int* s_datoms_partition;
90 
91  bool* h_hasPatches;
92  bool* d_hasPatches;
93 
94  int* d_globalToLocalID;
95  int* d_patchToDeviceMap;
96 
97  std::vector<HomePatch*> patches; // Pointers to HomePatches this device owns
98 
99  // Mapping data
100  std::vector<CudaLocalRecord> h_localPatches;
101  CudaLocalRecord* d_localPatches;
102 
103  std::vector<CudaPeerRecord> h_peerPatches;
104  CudaPeerRecord* d_peerPatches;
105 
106  int numAtomsHome;
107  int numPatchesHome;
108  int numPatchesHomeAndProxy;
109 
110  DeviceData();
111  ~DeviceData();
112 };
113 
114 #endif
115 
116 class PatchData : public CBase_PatchData {
117 
118  public:
119 
120  PatchData();
121  ~PatchData();
123 
124 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
125  // Pressure control
128  cudaTensor *d_strainRate_old; // for langevinPistonBarrier no
129  cudaTensor *d_positionRescaleFactor; // for langevinPistonBarrier no
130 #endif
131 
132  // I need "reduction-like" object here to reduce stuff on a single-node
133  // avoiding reduction objects will let me avoid calling submit() on every ULT
135  // backup reduction values for MC barostat
137 
138  // Make sure ColVars compiles
140 
142 
143  // Single node flags are stored here and copied by patches
145 
146  // Controller pointer for outputting stuff
148  // Lattice from masterPe
152 
153  // Provide global (node level) access to the collection chare
158 
159  // Pointer to script
161 
162  // Flag for marking migrations
163  CmiNodeLock printlock;
164 #ifdef NODEGROUP_FORCE_REGISTER
165  // SOA register of all device SOA force arrays
166  // Bonded SOA pointers
167  double** h_soa_fb_x;
168  double** h_soa_fb_y;
169  double** h_soa_fb_z;
170 
171  // Nonbonded SOA pointers
172  double** h_soa_fn_x;
173  double** h_soa_fn_y;
174  double** h_soa_fn_z;
175 
176  // Slow SOA pointers
177  double** h_soa_fs_x;
178  double** h_soa_fs_y;
179  double** h_soa_fs_z;
180 
181  double** h_soa_pos_x;
182  double** h_soa_pos_y;
183  double** h_soa_pos_z;
184 
185  double** h_soa_vel_x;
186  double** h_soa_vel_y;
187  double** h_soa_vel_z;
188 
189  float** h_soa_charge;
190 
191  // Device Migration
192  int** h_soa_sortOrder;
193  int** h_soa_unsortOrder;
194  int** h_soa_id;
195  int** h_soa_vdwType;
196  double3** h_soa_patchCenter;
197  int4** h_soa_migrationDestination;
198  int** h_soa_sortSoluteIndex;
199 
200  FullAtom** h_atomdata_AoS;
201  CudaLocalRecord** h_peer_record;
202 
203  int** h_soa_partition;
204 
205  // we do need a device queue for registering work done
206  // work queue for each device
207  bool** h_devHasForces;
208  unsigned int** d_queues;
209  unsigned int* d_queueCounters;
210  std::vector<int> migrationFlagPerDevice; // Migration flag for each device
211  std::vector<int> tupleReallocationFlagPerDevice; // Migration flag for each device
212  std::vector<int> atomReallocationFlagPerDevice; // Migration flag for each device
213 
214  std::atomic<int> maxNumBonds;
215  std::atomic<int> maxNumAngles;
216  std::atomic<int> maxNumDihedrals;
217  std::atomic<int> maxNumImpropers;
218  std::atomic<int> maxNumModifiedExclusions;
219  std::atomic<int> maxNumExclusions;
220  std::atomic<int> maxNumCrossterms;
221  std::vector<int> devicePatchMapFlag; // Device Patch Map Creation flag per PE
222 #ifdef NAMD_NCCL_ALLREDUCE
223  ncclUniqueId ncclId;
224 #endif
225 
226  TupleDataStagePeer h_tupleDataStage;
227  TupleIntArraysPeer h_tupleCount;
228  TupleIntArraysPeer h_tupleOffset;
229 
235  ComputeBondedCUDA **cudaBondedList;
236  CudaComputeNonbonded **cudaNonbondedList;
237 
238  std::vector<DeviceData> devData;
239  ComputeMgr *master_mgr;
240  CmiNodeLock nodeLock;
241  std::vector<int> cbStore;
242  std::atomic<int> suspendCounter;
243 
244  std::atomic<int> updateCounter;
245 #endif
246 };
247 
248 #endif // PATCHDATA_H
Definition: PDB.h:36
CmiNodeLock printlock
Definition: PatchData.h:163
Definition: Vector.h:72
Controller * c_out
Definition: PatchData.h:147
IMDOutput * imd
Definition: PatchData.h:156
NodeReduction * reduction
Definition: PatchData.h:139
NodeReduction * reductionBackend
Definition: PatchData.h:134
Lattice * lat
Definition: PatchData.h:149
cudaTensor * d_positionRescaleFactor
Definition: PatchData.h:129
Flags flags
Definition: PatchData.h:144
Vector * origin
Definition: PatchData.h:151
PatchData()
Definition: PatchData.C:49
Definition: Output.h:35
NodeReduction * reductionBackendSave
Definition: PatchData.h:136
CollectionMaster * ptrCollectionMaster
Definition: PatchData.h:154
ScriptTcl * script
Definition: PatchData.h:160
cudaTensor * d_langevinPiston_strainRate
Definition: PatchData.h:126
cudaTensor * d_langevinPiston_origStrainRate
Definition: PatchData.h:127
PDB * pdb
Definition: PatchData.h:157
cudaTensor * d_strainRate_old
Definition: PatchData.h:128
Output * ptrOutput
Definition: PatchData.h:155
NodeBroadcast * nodeBroadcast
Definition: PatchData.h:141
void setDeviceKernelUpdateCounter()
Definition: PatchData.C:70
Definition: Tensor.h:15
Tensor * factor
Definition: PatchData.h:150
~PatchData()
Definition: PatchData.C:76