9 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 11 #define __thread __declspec(thread) 23 cudaPmeOneDevice = NULL;
24 cudaGlobalMasterObject =
nullptr;
32 NAMD_bug(
"ComputeCUDAMgr cannot be migrated");
36 cudaPmeOneDevice = NULL;
37 cudaGlobalMasterObject =
nullptr;
44 for (
int i=0;i < numDevices;i++) {
45 if (cudaNonbondedTablesList[i] != NULL)
delete cudaNonbondedTablesList[i];
46 if (cudaComputeNonbondedList[i] != NULL)
delete cudaComputeNonbondedList[i];
48 if (computeBondedCUDAList[i] != NULL)
delete computeBondedCUDAList[i];
64 if (msg != NULL)
delete msg;
67 #ifdef NODEGROUP_FORCE_REGISTER 68 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
69 PatchData *pdata = cpdata.ckLocalBranch();
71 pdata->devData.resize(numDevices);
75 allocate_host<bool*>(&(pdata->h_devHasForces),ndevs);
76 allocate_host<double*>(&(pdata->h_soa_fb_x), ndevs);
77 allocate_host<double*>(&(pdata->h_soa_fb_y), ndevs);
78 allocate_host<double*>(&(pdata->h_soa_fb_z), ndevs);
80 allocate_host<double*>(&(pdata->h_soa_fn_x), ndevs);
81 allocate_host<double*>(&(pdata->h_soa_fn_y), ndevs);
82 allocate_host<double*>(&(pdata->h_soa_fn_z), ndevs);
84 allocate_host<double*>(&(pdata->h_soa_fs_x), ndevs);
85 allocate_host<double*>(&(pdata->h_soa_fs_y), ndevs);
86 allocate_host<double*>(&(pdata->h_soa_fs_z), ndevs);
88 allocate_host<double*>(&(pdata->h_soa_pos_x), ndevs);
89 allocate_host<double*>(&(pdata->h_soa_pos_y), ndevs);
90 allocate_host<double*>(&(pdata->h_soa_pos_z), ndevs);
131 allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.modifiedExclusion),
deviceCUDA->
getNumDevice());
137 allocate_host<unsigned int*>(&(pdata->d_queues), ndevs);
138 allocate_host<unsigned int>(&(pdata->d_queueCounters), ndevs);
140 cudaCheck(cudaMemset(pdata->d_queueCounters, 0,
sizeof(
unsigned int)*ndevs));
146 pdata->maxNumBonds.store(0);
147 pdata->maxNumAngles.store(0);
148 pdata->maxNumDihedrals.store(0);
149 pdata->maxNumImpropers.store(0);
150 pdata->maxNumModifiedExclusions.store(0);
151 pdata->maxNumExclusions.store(0);
152 pdata->maxNumCrossterms.store(0);
153 pdata->devicePatchMapFlag.resize(CkNumPes(), 0);
154 #ifdef NAMD_NCCL_ALLREDUCE 158 pdata->ncclId =
deviceCUDA->getNcclUniqueId();
164 allocate_device<double3*>(&
curSMDCOM,
sizeof(double3*)*numDevices);
174 for(
int i=0;i<
simParams->groupRestraintsCount;i++)
176 allocate_device<double3*>(&
curGrp1COM[i],
sizeof(double3*)*numDevices);
177 allocate_device<double3*>(&
curGrp2COM[i],
sizeof(double3*)*numDevices);
188 cudaNonbondedTablesList.resize(numDevices, NULL);
189 cudaComputeNonbondedList.resize(numDevices, NULL);
191 computeBondedCUDAList.resize(numDevices, NULL);
193 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
194 cudaPmeOneDevice = NULL;
208 if ( CkMyRank() )
NAMD_bug(
"ComputeCUDAMgr::update() should be called only by rank 0");
212 cudaNonbondedTablesList[deviceID]->updateTables();
218 CProxy_ComputeCUDAMgr computeCUDAMgrProxy = CkpvAccess(BOCclass_group).computeCUDAMgr;
219 ComputeCUDAMgr* computeCUDAMgr = computeCUDAMgrProxy.ckLocalBranch();
220 if (computeCUDAMgr == NULL)
221 NAMD_bug(
"getComputeCUDAMgr, unable to locate local branch of BOC entry ComputeCUDAMgr");
222 return computeCUDAMgr;
233 pmeGrid.
dim2 = pmeGrid.
K2;
234 pmeGrid.
dim3 = 2 * (pmeGrid.
K3/2 + 1);
245 #ifdef NODEGROUP_FORCE_REGISTER 249 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
251 return cudaPmeOneDevice;
255 return cudaPmeOneDevice;
263 if (cudaComputeNonbondedList.at(deviceID) != NULL)
264 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded called twice");
265 if (cudaNonbondedTablesList.at(deviceID) == NULL)
266 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
269 cudaComputeNonbondedList[deviceID] =
new CudaComputeNonbonded(c, deviceID, *cudaNonbondedTablesList[deviceID], doStreaming);
270 return cudaComputeNonbondedList[deviceID];
281 NAMD_bug(
"ComputeCUDAMgr::getCudaComputeNonbonded(), device not created yet");
289 ComputeBondedCUDA* ComputeCUDAMgr::createComputeBondedCUDA(
ComputeID c,
ComputeMgr* computeMgr) {
291 if (computeBondedCUDAList.at(deviceID) != NULL)
292 NAMD_bug(
"ComputeCUDAMgr::createComputeBondedCUDA called twice");
293 if (cudaNonbondedTablesList.at(deviceID) == NULL)
294 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
295 computeBondedCUDAList[deviceID] =
new ComputeBondedCUDA(c, computeMgr, deviceID, *cudaNonbondedTablesList[deviceID]);
296 return computeBondedCUDAList[deviceID];
302 ComputeBondedCUDA* ComputeCUDAMgr::getComputeBondedCUDA() {
305 ComputeBondedCUDA* p = computeBondedCUDAList[deviceID];
307 NAMD_bug(
"ComputeCUDAMgr::getComputeBondedCUDA(), device not created yet");
310 #endif // BONDED_CUDA 313 return cudaGlobalMasterObject;
317 iout <<
iINFO <<
"Creating CUDAGlobalMaster on PE " << CkMyPe() <<
'\n' <<
endi;
318 if (cudaGlobalMasterObject) {
319 return cudaGlobalMasterObject;
323 cudaGlobalMasterObject = std::make_shared<CudaGlobalMasterServer>(deviceID,
simParams->cudaGlobalProfilingFreq);
324 return cudaGlobalMasterObject;
327 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 329 #include "ComputeCUDAMgr.def.h"
std::ostream & iINFO(std::ostream &s)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
SimParameters * simParameters
void initialize(CkQdMsg *msg)
std::ostream & endi(std::ostream &s)
int getGlobalDevice() const
void NAMD_bug(const char *err_msg)
static ComputeCUDAMgr * getComputeCUDAMgr()
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
int getDeviceIDbyRank(int rank)
CudaComputeNonbonded * getCudaComputeNonbonded()
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
__thread DeviceCUDA * deviceCUDA
CudaPmeOneDevice * createCudaPmeOneDevice()
CudaPmeOneDevice * getCudaPmeOneDevice()