1 #ifndef COMPUTEBONDEDCUDA_H 2 #define COMPUTEBONDEDCUDA_H 12 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 19 class ComputeBondedCUDA :
public Compute {
23 static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
24 static const int CudaTupleTypeSizeStage[Tuples::NUM_TUPLE_TYPES];
27 bool initializeCalled;
32 #ifdef NODEGROUP_FORCE_REGISTER 33 std::atomic<int> tupleWorkIndex;
40 std::vector<int> allPatchIDs;
46 std::vector< std::vector<int> > patchIDsPerRank;
54 std::vector<int> patchIDs;
56 SelfCompute(
int type=-1) : type(type), tuples(NULL) {}
57 int operator==(
const SelfCompute &elem)
const {
58 return (elem.type == type);
64 std::vector<char> isBasePatch;
65 std::vector<int> patchIDs;
67 std::vector< Tuples* > tuples;
71 struct ComputeRecord {
72 HomeCompute homeCompute;
74 std::vector< SelfCompute > selfComputes;
78 std::vector< ComputeRecord > computes;
83 std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
85 int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
88 std::vector< AtomMapper* > atomMappers;
94 std::vector<PatchRecord> patches;
97 std::vector<int> patchIndex;
100 std::vector<int> dihedralMultMap;
101 std::vector<int> improperMultMap;
105 int numModifiedExclusions;
108 std::vector<NumExcl> numExclPerRank;
112 bool hasModifiedExclusions;
116 size_t tupleDataSize;
118 std::vector<CudaBondStage> bondTupleData;
119 std::vector<CudaAngleStage> angleTupleData;
120 std::vector<CudaDihedralStage> dihedralTupleData;
121 std::vector<CudaDihedralStage> improperTupleData;
122 std::vector<CudaExclusionStage> modifiedExclusionTupleData;
123 std::vector<CudaExclusionStage> exclusionTupleData;
124 std::vector<CudaCrosstermStage> crosstermTupleData;
125 std::vector<CudaTholeStage> tholeTupleData;
126 std::vector<CudaAnisoStage> anisoTupleData;
130 #ifdef NODEGROUP_FORCE_REGISTER 131 MigrationBondedCUDAKernel migrationKernel;
132 #endif // NODEGROUP_FORCE_REGISTER 141 double3* h_patchMapCenter;
142 double3* d_patchMapCenter;
148 cudaEvent_t forceDoneEvent;
155 CmiNodeLock printLock;
183 double beforeForceCompute;
196 double* energies_virials;
201 int pswitchTable[3*3];
206 void updatePatches();
208 static void forceDoneCheck(
void *arg,
double walltime);
209 void forceDoneSetCallback();
213 struct TupleCopyWork {
217 int64_t tupleDataPos;
220 std::vector<TupleCopyWork> tupleCopyWorkList;
222 int64_t exclusionStartPos;
223 int64_t exclusionStartPos2;
224 std::vector<CudaBondStage> hostCudaBondStage;
226 #ifdef NODEGROUP_FORCE_REGISTER 227 template <
typename T>
228 void sortTupleList(std::vector<T>& tuples, std::vector<int>& tupleCounts, std::vector<int>& tupleOffsets);
229 void sortAndCopyToDevice();
230 void migrateTuples(
bool startup);
232 template <
typename T,
typename P,
typename D>
233 void copyTupleToStage(
const T& src,
const P* __restrict__ p_array, D& dstval);
235 template <
typename T,
typename P,
typename D>
236 void copyToStage(
const int ntuples,
const T* __restrict__ src,
237 const P* __restrict__ p_array, std::vector<D>& dst);
239 void copyExclusionDataStage(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
240 std::vector<CudaExclusionStage>& dst1, std::vector<CudaExclusionStage>& dst2, int64_t& pos, int64_t& pos2);
243 void copyBondData(
const int ntuples,
const BondElem* __restrict__ src,
246 void copyBondDatafp32(
const int ntuples,
const BondElem* __restrict__ src,
249 void copyAngleData(
const int ntuples,
const AngleElem* __restrict__ src,
252 template <
bool doDihedral,
typename T,
typename P>
253 void copyDihedralData(
const int ntuples,
const T* __restrict__ src,
254 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
256 template <
bool doDihedral,
typename T,
typename P>
257 void copyDihedralDatafp32(
const int ntuples,
const T* __restrict__ src,
258 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
260 void copyExclusionData(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
263 void copyCrosstermData(
const int ntuples,
const CrosstermElem* __restrict__ src,
266 void copyTholeData(
const int ntuples,
const TholeElem* __restrict__ src,
269 void copyAnisoData(
const int ntuples,
const AnisoElem* __restrict src,
272 static void tupleCopyWorker(
int first,
int last,
void *result,
int paraNum,
void *param);
273 void tupleCopyWorker(
int first,
int last);
274 static void tupleCopyWorkerExcl(
int first,
int last,
void *result,
int paraNum,
void *param);
275 void tupleCopyWorkerExcl(
int first,
int last);
277 #ifdef NODEGROUP_FORCE_REGISTER 278 void tupleCopyWorkerType(
int tupletype);
288 ~ComputeBondedCUDA();
289 void registerCompute(
int pe,
int type,
PatchIDList& pids);
290 void registerSelfCompute(
int pe,
int type,
int pid);
291 void unregisterBoxesOnPe();
292 void assignPatchesOnPe();
298 void messageEnqueueWork();
300 void openBoxesOnPe(
int startup = 1);
301 void loadTuplesOnPe(
const int startup = 1);
302 void copyTupleData();
303 void copyTupleDataSN();
305 void updateCudaAlchParameters();
307 void updateHostCudaAlchFlags();
308 void updateKernelCudaAlchFlags();
309 void updateHostCudaAlchParameters();
310 void updateKernelCudaAlchParameters();
311 void updateHostCudaAlchLambdas();
312 void updateKernelCudaAlchLambdas();
314 #ifdef NODEGROUP_FORCE_REGISTER 315 void updatePatchRecords();
318 void registerPointersToHost();
319 void copyHostRegisterToDevice();
320 void copyPatchData();
321 void copyTupleDataGPU(
const int startup);
322 void updatePatchOrder(
const std::vector<CudaLocalRecord>& data);
323 #endif // NODEGROUP_FORCE_REGISTER 325 void finishPatchesOnPe();
326 void finishPatches();
327 void finishReductions();
329 std::vector<int>& getBondedPes(
void) {
return pes;}
331 std::vector<PatchRecord>& getPatches() {
return patches; }
334 #endif // BONDED_CUDA 336 #endif // COMPUTEBONDEDCUDA_H
virtual void initialize()
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
virtual void atomUpdate()
virtual void patchReady(PatchID, int doneMigration, int seq)