NAMD
PatchData.h
Go to the documentation of this file.
1 #ifndef PATCHDATA_H
2 #define PATCHDATA_H
3 #if __cplusplus < 201103L
4 #undef USING_ATOMICS
5 #else
6 #define USING_ATOMICS
7 #endif
8 
9 #ifdef USING_ATOMICS
10 #include <atomic>
11 #endif
12 #include "ComputeCUDAMgr.h"
13 #include "PatchData.decl.h"
14 #include "NamdTypes.h"
15 #include "HomePatch.h"
16 #include "Sequencer.h"
17 #include "ResizeArray.h"
18 #include "CudaUtils.h"
19 #include "CudaTileListKernel.h" // CudaPatchRecord
20 #include "CudaTileListKernel.hip.h"
21 #include "CudaRecord.h"
22 #include "ReductionMgr.h"
23 #include "BroadcastObject.h"
24 #include "Controller.h"
25 #include "TupleTypesCUDA.h"
26 #include "ComputeMgr.h"
27 
28 #ifdef NAMD_CUDA
29 #include <cuda.h>
30 #endif
31 #ifdef NAMD_HIP
32 #include <hip/hip_runtime.h>
33 #endif
34 
35 #include "HipDefines.h"
36 
37 #ifdef NODEGROUP_FORCE_REGISTER
38 #ifdef NAMD_NCCL_ALLREDUCE
39 #ifdef NAMD_CUDA
40 #include "nccl.h"
41 #endif
42 #ifdef NAMD_HIP
43 #include "rccl.h"
44 #endif
45 #endif
46 
47 
48 class CollectionMaster;
49 class Output;
50 class IMDOutput;
51 
52 // DeviceData holds the CUDAMgr pointers to force data
53 // These are particular to each device
54 struct DeviceData{
55  // NONBONDED STUFF
56  float4 *f_nbond;
57  float4 *f_nbond_slow;
58  int f_nbond_size;
59  CudaPatchRecord *nbond_precord;
60  CudaTileListKernel *nbond_tkernel;
61  cudaStream_t nbond_stream;
62  float4* nb_datoms; //The idea is to fill this after integration
63  size_t size_nb_datoms;
64  int nb_precord_size;
65 
66 
67  //BONDED STUFF
68  double *f_bond;
69  double *f_bond_nbond;
70  double *f_bond_slow;
71  int f_bond_size;
72  int bond_pr_size;
73  int bond_pi_size;
74  int forceStride;
75  PatchRecord *bond_pr;
76  int *bond_pi;
77  float4 *b_datoms;
78 
79  // SLOW STUFF
80  CudaForce *f_slow;
81  int f_slow_size;
82  int* slow_patchPositions;
83  int slow_patchPositionsSize;
84  int* slow_pencilPatchIndex;
85  int slow_pencilPatchIndexSize;
86  int* slow_patchID;
87  int slow_patchIDSize;
88  CudaAtom* s_datoms; // slow atoms that will be built for PME
89  int* s_datoms_partition;
90 
91  bool* h_hasPatches;
92  bool* d_hasPatches;
93 
94  int* d_globalToLocalID;
95  int* d_patchToDeviceMap;
96 
97  std::vector<HomePatch*> patches; // Pointers to HomePatches this device owns
98 
99  // Mapping data
100  std::vector<CudaLocalRecord> h_localPatches;
101  CudaLocalRecord* d_localPatches;
102 
103  std::vector<CudaPeerRecord> h_peerPatches;
104  CudaPeerRecord* d_peerPatches;
105 
106  int numAtomsHome;
107  int numPatchesHome;
108  int numPatchesHomeAndProxy;
109 
110  DeviceData();
111  ~DeviceData();
112 };
113 
114 #endif
115 
116 class PatchData : public CBase_PatchData {
117 
118  public:
119 
120  PatchData();
121  ~PatchData();
123 
124 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
125  // Pressure control
128  cudaTensor *d_strainRate_old; // for langevinPistonBarrier no
129  cudaTensor *d_positionRescaleFactor; // for langevinPistonBarrier no
130 #endif
131 
132  // I need "reduction-like" object here to reduce stuff on a single-node
133  // avoiding reduction objects will let me avoid calling submit() on every ULT
135  // backup reduction values for MC barostat
137 
138  // Make sure ColVars compiles
140 
142 
143  // Single node flags are stored here and copied by patches
145 
146  // Controller pointer for outputting stuff
148  // Lattice from masterPe
152 
153  // Provide global (node level) access to the collection chare
158 
159  // Pointer to script
161 
162  // Flag for marking migrations
163  CmiNodeLock printlock;
164 #ifdef NODEGROUP_FORCE_REGISTER
165  int** h_soa_sortOrder;
166  int** h_soa_vdwType;
167  int** h_soa_id;
168  int4** h_soa_migrationDestination;
169 
170  CudaLocalRecord** h_peer_record;
171 
172  int** h_soa_partition;
173 
174  // we do need a device queue for registering work done
175  // work queue for each device
176  bool** h_devHasForces;
177  unsigned int** d_queues;
178  unsigned int* d_queueCounters;
179  std::vector<int> migrationFlagPerDevice; // Migration flag for each device
180  std::vector<int> tupleReallocationFlagPerDevice; // Migration flag for each device
181  std::vector<int> atomReallocationFlagPerDevice; // Migration flag for each device
182 
183  std::atomic<int> maxNumBonds;
184  std::atomic<int> maxNumAngles;
185  std::atomic<int> maxNumDihedrals;
186  std::atomic<int> maxNumImpropers;
187  std::atomic<int> maxNumModifiedExclusions;
188  std::atomic<int> maxNumExclusions;
189  std::atomic<int> maxNumCrossterms;
190  std::vector<int> devicePatchMapFlag; // Device Patch Map Creation flag per PE
191 #ifdef NAMD_NCCL_ALLREDUCE
192  ncclUniqueId ncclId;
193 #endif
194 
195  TupleDataStagePeer h_tupleDataStage;
196  TupleIntArraysPeer h_tupleCount;
197  TupleIntArraysPeer h_tupleOffset;
198 
204  ComputeBondedCUDA **cudaBondedList;
205  CudaComputeNonbonded **cudaNonbondedList;
206 
207  std::vector<DeviceData> devData;
208  ComputeMgr *master_mgr;
209  CmiNodeLock nodeLock;
210  std::vector<int> cbStore;
211  std::atomic<int> suspendCounter;
212 
213  std::atomic<int> updateCounter;
214 #endif
215 };
216 
217 #endif // PATCHDATA_H
Definition: PDB.h:36
Broadcast object for intra-node GPU-resident broadcasts.
CmiNodeLock printlock
Definition: PatchData.h:163
Definition: Vector.h:72
Controller * c_out
Definition: PatchData.h:147
IMDOutput * imd
Definition: PatchData.h:156
NodeReduction * reduction
Definition: PatchData.h:139
NodeReduction * reductionBackend
Definition: PatchData.h:134
Lattice * lat
Definition: PatchData.h:149
cudaTensor * d_positionRescaleFactor
Definition: PatchData.h:129
Flags flags
Definition: PatchData.h:144
Vector * origin
Definition: PatchData.h:151
PatchData()
Definition: PatchData.C:49
Definition: Output.h:35
NodeReduction * reductionBackendSave
Definition: PatchData.h:136
CollectionMaster * ptrCollectionMaster
Definition: PatchData.h:154
ScriptTcl * script
Definition: PatchData.h:160
cudaTensor * d_langevinPiston_strainRate
Definition: PatchData.h:126
cudaTensor * d_langevinPiston_origStrainRate
Definition: PatchData.h:127
PDB * pdb
Definition: PatchData.h:157
cudaTensor * d_strainRate_old
Definition: PatchData.h:128
Output * ptrOutput
Definition: PatchData.h:155
NodeBroadcast * nodeBroadcast
Definition: PatchData.h:141
void setDeviceKernelUpdateCounter()
Definition: PatchData.C:70
Definition: Tensor.h:15
Tensor * factor
Definition: PatchData.h:150
~PatchData()
Definition: PatchData.C:76