1 #ifndef COMPUTEPMECUDAMGR_H
2 #define COMPUTEPMECUDAMGR_H
9 #include "ComputePmeCUDAMgr.decl.h"
11 #include <cuda_runtime.h>
15 #include <hip/hip_runtime.h>
18 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
30 atomIndexCapacity = 0;
34 overflowAtomCapacity = 0;
36 overflowAtomIndexCapacity = 0;
38 lock_ = CmiCreateLock();
41 CmiDestroyLock(lock_);
45 return addAtoms_(natom, src, NULL);
49 return addAtoms_(natom, src, index);
55 if (overflowEnd-overflowStart > 0) {
56 resize_((
void **)&
atom, numAtoms, atomCapacity,
sizeof(
CudaAtom));
57 if (useIndex) resize_((
void **)&
atomIndex, numAtoms, atomIndexCapacity,
sizeof(
int));
79 return patchPos.data();
83 return patchPos.size();
92 NAMD_bug(
"PmeAtomStorage::getAtomIndex, no indexing enabled");
112 int overflowAtomCapacity;
113 int overflowAtomIndexCapacity;
119 std::vector<int> patchPos;
123 int atomIndexCapacity;
126 void resize_(
void **array,
int sizeRequested,
int& arrayCapacity,
const int sizeofType) {
128 if (*array != NULL && arrayCapacity >= sizeRequested)
return;
131 int newArrayCapacity = (int)(sizeRequested*1.5);
132 void* newArray = alloc_(sizeofType*newArrayCapacity);
134 if (*array != NULL) {
136 memcpy_(newArray, *array, arrayCapacity*sizeofType);
142 arrayCapacity = newArrayCapacity;
146 virtual void memcpy_(
void *dst,
const void* src,
const int size) {
147 memcpy(dst, src, size);
150 virtual void copyWithIndex_(
CudaAtom* dst,
const CudaAtom* src,
const int natom,
const int* indexSrc) {
151 for (
int i=0;i < natom;i++) dst[i] = src[indexSrc[i]];
155 virtual void* alloc_(
const int size)=0;
158 virtual void dealloc_(
void *p)=0;
162 int addAtoms_(
const int natom,
const CudaAtom* src,
const int* index) {
166 int patchInd = patchPos.size();
167 int ppos = (patchInd == 0) ? natom : patchPos[patchInd-1] + natom;
168 patchPos.push_back(ppos);
170 bool overflow =
false;
173 if (numAtoms > atomCapacity || (useIndex && numAtoms > atomIndexCapacity)) {
177 if (overflowEnd-overflowStart == 0) {
181 overflowEnd += natom;
182 if (overflowEnd-overflowStart > overflowAtomCapacity) {
183 resize_((
void **)&
overflowAtom, overflowEnd-overflowStart, overflowAtomCapacity,
sizeof(
CudaAtom));
185 if (useIndex && overflowEnd-overflowStart > overflowAtomIndexCapacity) {
186 resize_((
void **)&
overflowAtomIndex, overflowEnd-overflowStart, overflowAtomIndexCapacity,
sizeof(
int));
189 if (useIndex) memcpy_(
overflowAtomIndex+overflowEnd-overflowStart-natom, index, natom*
sizeof(
int));
190 copyWithIndex_(
overflowAtom+overflowEnd-overflowStart-natom, src, natom, index);
200 if (useIndex) memcpy_(
atomIndex+pos, index, natom*
sizeof(
int));
201 copyWithIndex_(
atom+pos, src, natom, index);
270 class CProxy_ComputePmeCUDADevice;
273 CProxy_ComputePmeCUDADevice*
dev;
283 const int pencilIndexY,
const int pencilIndexZ,
const int ylo,
const int yhi,
const int zlo,
const int zhi);
291 int pencilCapacity[9+1];
296 class CProxy_ComputePmeCUDAMgr;
304 int deviceID_in,
int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in,
305 CProxy_PmeAtomFiler pmeAtomFiler_in);
334 bool doVirial, doEnergy;
340 int ylo, yhi, zlo, zhi;
342 int yNBlocks, zNBlocks;
346 int pencilIndexY, pencilIndexZ;
349 int numNeighborsExpected;
355 CmiNodeLock lock_numHomePatchesMerged;
356 CmiNodeLock lock_numPencils;
357 CmiNodeLock lock_numNeighborsRecv;
358 CmiNodeLock lock_recvAtoms;
368 std::vector< PmeAtomStorage* > pmeAtomStorage[2];
369 std::vector<bool> pmeAtomStorageAllocatedHere;
373 std::vector<int> numPencils[2];
376 struct PencilLocation {
380 int pencilPatchIndex;
381 PencilLocation(
int pp,
int pencilPatchIndex) : pp(pp), pencilPatchIndex(pencilPatchIndex) {}
385 std::vector< std::vector<PencilLocation> > plList[2];
388 std::vector< PmeForceMsg* > homePatchForceMsgs[2];
393 std::vector< std::vector<int> > homePatchIndexList[2];
396 int numNeighborsRecv;
399 int numHomePatchesRecv;
402 int numHomePatchesMerged;
405 std::vector< PmeForcePencilMsg* > neighborForcePencilMsgs;
409 std::vector<int> neighborPatchIndex;
424 CProxy_ComputePmeCUDAMgr mgrProxy;
427 CProxy_PmeAtomFiler pmeAtomFiler;
430 CProxy_CudaPmePencilXYZ pmePencilXYZ;
431 CProxy_CudaPmePencilXY pmePencilXY;
432 CProxy_CudaPmePencilX pmePencilX;
435 double beforeWalltime;
457 void recvPencils(CProxy_CudaPmePencilXY
xy, CProxy_CudaPmePencilZ
z);
458 void recvPencils(CProxy_CudaPmePencilX
x, CProxy_CudaPmePencilY
y, CProxy_CudaPmePencilZ
z);
473 CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
474 return mgrProxy.ckLocalBranch();
479 void restrictToMaxPMEPencils();
484 int numNodesContributed;
488 std::vector<int> numHomePatchesList;
502 std::vector<int> xPes;
503 std::vector<int> yPes;
504 std::vector<int> zPes;
510 std::vector<IJ> ijPencilX;
511 std::vector<IJ> ijPencilY;
512 std::vector<IJ> ijPencilZ;
518 std::vector<NodeDevice> nodeDeviceList;
521 CProxy_PmeAtomFiler pmeAtomFiler;
524 std::vector<CProxy_ComputePmeCUDADevice> deviceProxy;
531 std::vector<ExtraDevice> extraDevices;
534 CProxy_CudaPmePencilXYZ pmePencilXYZ;
535 CProxy_CudaPmePencilXY pmePencilXY;
536 CProxy_CudaPmePencilX pmePencilX;
537 CProxy_CudaPmePencilY pmePencilY;
538 CProxy_CudaPmePencilZ pmePencilZ;
546 #endif // COMPUTEPMECUDAMGR_H
int getHomeNode(PatchID patchID)
void sendAtomsToNeighbor(int y, int z, int atomIval)
void sendForcesToNeighbors()
void recvForcesFromNeighbor(PmeForcePencilMsg *msg)
CProxy_ComputePmeCUDAMgr getMgrProxy()
void initialize(CkQdMsg *msg)
int getDeviceIDPencilX(int i, int j)
void sendForcesToPatch(PmeForceMsg *forceMsg)
void setPencilProxy(CProxy_CudaPmePencilXYZ pmePencilXYZ_in)
void getHomePencil(PatchID patchID, int &homey, int &homez)
static __thread atom * atoms
void recvAtomFiler(CProxy_PmeAtomFiler filer)
void recvAtoms(PmeAtomMsg *msg)
void mergeForcesOnPatch(int homePatchIndex)
PmeAtomStorage(const bool useIndex)
void createDevicesAndAtomFiler()
void recvDevices(RecvDeviceMsg *msg)
int getDeviceIDPencilZ(int i, int j)
__thread cudaStream_t stream
void fileAtoms(const int numAtoms, const CudaAtom *atoms, Lattice &lattice, const PmeGrid &pmeGrid, const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi)
void initialize(PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)
static ComputePmeCUDAMgr * Object()
void NAMD_bug(const char *err_msg)
int getDevicePencilZ(int i, int j)
void registerRecvAtomsFromNeighbor()
void recvAtomsFromNeighbor(PmeAtomPencilMsg *msg)
virtual ~PmeAtomStorage()
void initializePatches(int numHomePatches_in)
int addAtomsWithIndex(const int natom, const CudaAtom *src, const int *index)
int addAtoms(const int natom, const CudaAtom *src)
int getDevice(int i, int j)
int getDeviceIDPencilY(int i, int j)
void sendAtomsToNeighbors()
void recvAtoms(PmeAtomMsg *msg)
void activate_pencils(CkQdMsg *msg)
ComputePmeCUDAMgr_SDAG_CODE
int getNode(int i, int j)
void initialize_pencils(CkQdMsg *msg)
int getDevicePencilY(int i, int j)
int * getAtomIndex(int p)
void recvPencils(CProxy_CudaPmePencilXYZ xyz)
CProxy_ComputePmeCUDADevice * dev
void gatherForceDoneSubset(int first, int last)
NumDevicesMsg(int numDevices)
bool isPmeDevice(int deviceID)