1 #ifndef COMPUTEPMECUDAMGR_H 2 #define COMPUTEPMECUDAMGR_H 5 #include <cuda_runtime.h> 8 #include <hip/hip_runtime.h> 16 #include "ComputePmeCUDAMgr.decl.h" 19 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 31 atomIndexCapacity = 0;
35 overflowAtomCapacity = 0;
37 overflowAtomIndexCapacity = 0;
46 lock_ = CmiCreateLock();
49 CmiDestroyLock(lock_);
52 int addAtoms(
const int natom,
const CudaAtom* src,
const std::vector<float*>& lambdaArrays) {
53 return addAtoms_(natom, src, NULL, lambdaArrays);
57 return addAtoms_(natom, src, index, lambdaArrays);
63 if (overflowEnd-overflowStart > 0) {
64 resize_((
void **)&
atom, numAtoms, atomCapacity,
sizeof(
CudaAtom));
65 if (useIndex) resize_((
void **)&
atomIndex, numAtoms, atomIndexCapacity,
sizeof(
int));
115 if (
bool(
simParams.alchElecLambdaStart)) {
131 return patchPos.data();
135 return patchPos.size();
144 NAMD_bug(
"PmeAtomStorage::getAtomIndex, no indexing enabled");
173 int overflowAtomCapacity;
174 int overflowAtomIndexCapacity;
180 std::vector<int> patchPos;
184 int atomIndexCapacity;
187 void resize_(
void **array,
int sizeRequested,
int& arrayCapacity,
const size_t sizeofType) {
189 if (*array != NULL && arrayCapacity >= sizeRequested)
return;
192 int newArrayCapacity = (int)(sizeRequested*1.5);
193 void* newArray = alloc_(sizeofType*newArrayCapacity);
195 if (*array != NULL) {
197 memcpy_(newArray, *array, arrayCapacity*sizeofType);
203 arrayCapacity = newArrayCapacity;
207 virtual void memcpy_(
void *dst,
const void* src,
const int size) {
208 memcpy(dst, src, size);
215 template <
typename array_type>
216 void copyWithIndex_(array_type* dst,
const array_type* src,
const int natom,
const int* indexSrc) {
217 for (
int i=0;i < natom;i++) dst[i] = src[indexSrc[i]];
221 virtual void* alloc_(
const size_t size)=0;
224 virtual void dealloc_(
void *p)=0;
228 int addAtoms_(
const int natom,
const CudaAtom* src,
const int* index,
const std::vector<float*>& lambdaArrays) {
232 int patchInd = patchPos.size();
233 int ppos = (patchInd == 0) ? natom : patchPos[patchInd-1] + natom;
234 patchPos.push_back(ppos);
236 bool overflow =
false;
239 if (numAtoms > atomCapacity || (useIndex && numAtoms > atomIndexCapacity)) {
243 if (overflowEnd-overflowStart == 0) {
247 overflowEnd += natom;
248 if (overflowEnd-overflowStart > overflowAtomCapacity) {
249 resize_((
void **)&
overflowAtom, overflowEnd-overflowStart, overflowAtomCapacity,
sizeof(
CudaAtom));
251 if (useIndex && overflowEnd-overflowStart > overflowAtomIndexCapacity) {
252 resize_((
void **)&
overflowAtomIndex, overflowEnd-overflowStart, overflowAtomIndexCapacity,
sizeof(
int));
263 if (useIndex) memcpy_(
overflowAtomIndex+overflowEnd-overflowStart-natom, index, natom*
sizeof(
int));
264 copyWithIndex_(
overflowAtom+overflowEnd-overflowStart-natom, src, natom, index);
267 if (lambdaArrays[i] != NULL) {
276 if (lambdaArrays[i] != NULL) {
288 if (useIndex) memcpy_(
atomIndex+pos, index, natom*
sizeof(
int));
289 copyWithIndex_(
atom+pos, src, natom, index);
292 if (lambdaArrays[i] != NULL) {
301 if (lambdaArrays[i] != NULL) {
393 class CProxy_ComputePmeCUDADevice;
396 CProxy_ComputePmeCUDADevice*
dev;
406 const int pencilIndexY,
const int pencilIndexZ,
const int ylo,
const int yhi,
const int zlo,
const int zhi);
414 int pencilCapacity[9+1];
419 class CProxy_ComputePmeCUDAMgr;
427 int deviceID_in,
int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in,
428 CProxy_PmeAtomFiler pmeAtomFiler_in);
459 bool doVirial, doEnergy;
465 int ylo, yhi, zlo, zhi;
467 int yNBlocks, zNBlocks;
471 int pencilIndexY, pencilIndexZ;
474 int numNeighborsExpected;
480 CmiNodeLock lock_numHomePatchesMerged;
481 CmiNodeLock lock_numPencils;
482 CmiNodeLock lock_numNeighborsRecv;
483 CmiNodeLock lock_recvAtoms;
493 std::vector< PmeAtomStorage* > pmeAtomStorage[2];
494 std::vector<bool> pmeAtomStorageAllocatedHere;
498 std::vector<int> numPencils[2];
501 struct PencilLocation {
505 int pencilPatchIndex;
506 PencilLocation(
int pp,
int pencilPatchIndex) : pp(pp), pencilPatchIndex(pencilPatchIndex) {}
510 std::vector< std::vector<PencilLocation> > plList[2];
513 std::vector< PmeForceMsg* > homePatchForceMsgs[2];
518 std::vector< std::vector<int> > homePatchIndexList[2];
521 int numNeighborsRecv;
524 int numHomePatchesRecv;
527 int numHomePatchesMerged;
530 std::vector< PmeForcePencilMsg* > neighborForcePencilMsgs;
534 std::vector<int> neighborPatchIndex;
548 std::array<PmeRealSpaceCompute*, NUM_GRID_MAX> pmeRealSpaceComputes;
549 std::array<bool, NUM_GRID_MAX> enabledGrid;
551 std::array<size_t, NUM_GRID_MAX> forceCapacities;
552 std::array<CudaForce*, NUM_GRID_MAX> forces;
562 std::array<int, NUM_GRID_MAX> forceReady;
565 CProxy_ComputePmeCUDAMgr mgrProxy;
568 CProxy_PmeAtomFiler pmeAtomFiler;
571 CProxy_CudaPmePencilXYZ pmePencilXYZ;
572 CProxy_CudaPmePencilXY pmePencilXY;
573 CProxy_CudaPmePencilX pmePencilX;
576 double beforeWalltime;
598 void recvPencils(CProxy_CudaPmePencilXY
xy, CProxy_CudaPmePencilZ z);
599 void recvPencils(CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z);
614 CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
615 return mgrProxy.ckLocalBranch();
620 void restrictToMaxPMEPencils();
625 int numNodesContributed;
629 std::vector<int> numHomePatchesList;
643 std::vector<int> xPes;
644 std::vector<int> yPes;
645 std::vector<int> zPes;
651 std::vector<IJ> ijPencilX;
652 std::vector<IJ> ijPencilY;
653 std::vector<IJ> ijPencilZ;
659 std::vector<NodeDevice> nodeDeviceList;
662 CProxy_PmeAtomFiler pmeAtomFiler;
665 std::vector<CProxy_ComputePmeCUDADevice> deviceProxy;
672 std::vector<ExtraDevice> extraDevices;
675 CProxy_CudaPmePencilXYZ pmePencilXYZ;
676 CProxy_CudaPmePencilXY pmePencilXY;
677 CProxy_CudaPmePencilX pmePencilX;
678 CProxy_CudaPmePencilY pmePencilY;
679 CProxy_CudaPmePencilZ pmePencilZ;
687 #endif // COMPUTEPMECUDAMGR_H int getHomeNode(PatchID patchID)
void sendAtomsToNeighbor(int y, int z, int atomIval)
void sendForcesToNeighbors()
void recvForcesFromNeighbor(PmeForcePencilMsg *msg)
CProxy_ComputePmeCUDAMgr getMgrProxy()
void initialize(CkQdMsg *msg)
void gatherForceDone(unsigned int iGrid)
int getDeviceIDPencilX(int i, int j)
void sendForcesToPatch(PmeForceMsg *forceMsg)
void setPencilProxy(CProxy_CudaPmePencilXYZ pmePencilXYZ_in)
void getHomePencil(PatchID patchID, int &homey, int &homez)
int addAtomsWithIndex(const int natom, const CudaAtom *src, const int *index, const std::vector< float *> &lambdaArrays)
void recvAtomFiler(CProxy_PmeAtomFiler filer)
void recvAtoms(PmeAtomMsg *msg)
int addAtoms(const int natom, const CudaAtom *src, const std::vector< float *> &lambdaArrays)
void setupAlch(const SimParameters &simParams)
std::vector< bool > enabledGrid
void mergeForcesOnPatch(int homePatchIndex)
PmeAtomStorage(const bool useIndex)
void createDevicesAndAtomFiler()
const unsigned int NUM_GRID_MAX
void recvDevices(RecvDeviceMsg *msg)
int getDeviceIDPencilZ(int i, int j)
std::vector< int > overflowAtomElecFactorCapacities
void fileAtoms(const int numAtoms, const CudaAtom *atoms, Lattice &lattice, const PmeGrid &pmeGrid, const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi)
void initialize(PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)
static ComputePmeCUDAMgr * Object()
bool isGridEnabled(unsigned int i) const
void NAMD_bug(const char *err_msg)
int getDevicePencilZ(int i, int j)
void registerRecvAtomsFromNeighbor()
void recvAtomsFromNeighbor(PmeAtomPencilMsg *msg)
virtual ~PmeAtomStorage()
void initializePatches(int numHomePatches_in)
unsigned int totalFactorArrays
int getDevice(int i, int j)
int getDeviceIDPencilY(int i, int j)
void sendAtomsToNeighbors()
void recvAtoms(PmeAtomMsg *msg)
void activate_pencils(CkQdMsg *msg)
ComputePmeCUDAMgr_SDAG_CODE
int getNode(int i, int j)
void initialize_pencils(CkQdMsg *msg)
std::vector< float * > overflowAtomElecFactorArrays
int getDevicePencilY(int i, int j)
int * getAtomIndex(int p)
void recvPencils(CProxy_CudaPmePencilXYZ xyz)
CProxy_ComputePmeCUDADevice * dev
void gatherForceDoneSubset(int first, int last)
float * getAtomElecFactors(unsigned int iGrid)
NumDevicesMsg(int numDevices)
bool isPmeDevice(int deviceID)
std::vector< float * > atomElecFactorArrays