9 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 11 #define __thread __declspec(thread) 23 cudaPmeOneDevice = NULL;
25 cudaGlobalMasterObject =
nullptr;
34 NAMD_bug(
"ComputeCUDAMgr cannot be migrated");
38 cudaPmeOneDevice = NULL;
40 cudaGlobalMasterObject =
nullptr;
48 for (
int i=0;i < numDevices;i++) {
49 if (cudaNonbondedTablesList[i] != NULL)
delete cudaNonbondedTablesList[i];
50 if (cudaComputeNonbondedList[i] != NULL)
delete cudaComputeNonbondedList[i];
52 if (computeBondedCUDAList[i] != NULL)
delete computeBondedCUDAList[i];
55 delete cudaPmeOneDevice;
63 if (msg != NULL)
delete msg;
66 #ifdef NODEGROUP_FORCE_REGISTER 67 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
68 PatchData *pdata = cpdata.ckLocalBranch();
70 pdata->devData.resize(numDevices);
74 allocate_host<bool*>(&(pdata->h_devHasForces),ndevs);
75 allocate_host<double*>(&(pdata->h_soa_fb_x), ndevs);
76 allocate_host<double*>(&(pdata->h_soa_fb_y), ndevs);
77 allocate_host<double*>(&(pdata->h_soa_fb_z), ndevs);
79 allocate_host<double*>(&(pdata->h_soa_fn_x), ndevs);
80 allocate_host<double*>(&(pdata->h_soa_fn_y), ndevs);
81 allocate_host<double*>(&(pdata->h_soa_fn_z), ndevs);
83 allocate_host<double*>(&(pdata->h_soa_fs_x), ndevs);
84 allocate_host<double*>(&(pdata->h_soa_fs_y), ndevs);
85 allocate_host<double*>(&(pdata->h_soa_fs_z), ndevs);
87 allocate_host<double*>(&(pdata->h_soa_pos_x), ndevs);
88 allocate_host<double*>(&(pdata->h_soa_pos_y), ndevs);
89 allocate_host<double*>(&(pdata->h_soa_pos_z), ndevs);
130 allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.modifiedExclusion),
deviceCUDA->
getNumDevice());
136 allocate_host<unsigned int*>(&(pdata->d_queues), ndevs);
137 allocate_host<unsigned int>(&(pdata->d_queueCounters), ndevs);
139 cudaCheck(cudaMemset(pdata->d_queueCounters, 0,
sizeof(
unsigned int)*ndevs));
145 pdata->maxNumBonds.store(0);
146 pdata->maxNumAngles.store(0);
147 pdata->maxNumDihedrals.store(0);
148 pdata->maxNumImpropers.store(0);
149 pdata->maxNumModifiedExclusions.store(0);
150 pdata->maxNumExclusions.store(0);
151 pdata->maxNumCrossterms.store(0);
152 pdata->devicePatchMapFlag.resize(CkNumPes(), 0);
153 #ifdef NAMD_NCCL_ALLREDUCE 157 pdata->ncclId =
deviceCUDA->getNcclUniqueId();
162 cudaNonbondedTablesList.resize(numDevices, NULL);
163 cudaComputeNonbondedList.resize(numDevices, NULL);
165 computeBondedCUDAList.resize(numDevices, NULL);
167 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
168 cudaPmeOneDevice = NULL;
185 if ( CkMyRank() )
NAMD_bug(
"ComputeCUDAMgr::update() should be called only by rank 0");
189 cudaNonbondedTablesList[deviceID]->updateTables();
195 CProxy_ComputeCUDAMgr computeCUDAMgrProxy = CkpvAccess(BOCclass_group).computeCUDAMgr;
196 ComputeCUDAMgr* computeCUDAMgr = computeCUDAMgrProxy.ckLocalBranch();
197 if (computeCUDAMgr == NULL)
198 NAMD_bug(
"getComputeCUDAMgr, unable to locate local branch of BOC entry ComputeCUDAMgr");
199 return computeCUDAMgr;
210 pmeGrid.
dim2 = pmeGrid.
K2;
211 pmeGrid.
dim3 = 2 * (pmeGrid.
K3/2 + 1);
222 #ifdef NODEGROUP_FORCE_REGISTER 226 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
228 return cudaPmeOneDevice;
232 return cudaPmeOneDevice;
240 if (cudaComputeNonbondedList.at(deviceID) != NULL)
241 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded called twice");
242 if (cudaNonbondedTablesList.at(deviceID) == NULL)
243 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
246 cudaComputeNonbondedList[deviceID] =
new CudaComputeNonbonded(c, deviceID, *cudaNonbondedTablesList[deviceID], doStreaming);
247 return cudaComputeNonbondedList[deviceID];
258 NAMD_bug(
"ComputeCUDAMgr::getCudaComputeNonbonded(), device not created yet");
266 ComputeBondedCUDA* ComputeCUDAMgr::createComputeBondedCUDA(
ComputeID c,
ComputeMgr* computeMgr) {
268 if (computeBondedCUDAList.at(deviceID) != NULL)
269 NAMD_bug(
"ComputeCUDAMgr::createComputeBondedCUDA called twice");
270 if (cudaNonbondedTablesList.at(deviceID) == NULL)
271 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
272 computeBondedCUDAList[deviceID] =
new ComputeBondedCUDA(c, computeMgr, deviceID, *cudaNonbondedTablesList[deviceID]);
273 return computeBondedCUDAList[deviceID];
279 ComputeBondedCUDA* ComputeCUDAMgr::getComputeBondedCUDA() {
282 ComputeBondedCUDA* p = computeBondedCUDAList[deviceID];
284 NAMD_bug(
"ComputeCUDAMgr::getComputeBondedCUDA(), device not created yet");
287 #endif // BONDED_CUDA 291 return cudaGlobalMasterObject;
295 iout <<
iINFO <<
"Creating CUDAGlobalMaster on PE " << CkMyPe() <<
'\n' <<
endi;
296 if (cudaGlobalMasterObject) {
297 return cudaGlobalMasterObject;
301 cudaGlobalMasterObject = std::make_shared<CudaGlobalMasterServer>(deviceID,
simParams->cudaGlobalProfilingFreq);
302 return cudaGlobalMasterObject;
306 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 308 #include "ComputeCUDAMgr.def.h"
std::ostream & iINFO(std::ostream &s)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
SimParameters * simParameters
void initialize(CkQdMsg *msg)
std::ostream & endi(std::ostream &s)
int getGlobalDevice() const
void NAMD_bug(const char *err_msg)
static ComputeCUDAMgr * getComputeCUDAMgr()
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
int getDeviceIDbyRank(int rank)
CudaComputeNonbonded * getCudaComputeNonbonded()
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
__thread DeviceCUDA * deviceCUDA
CudaPmeOneDevice * createCudaPmeOneDevice()
CudaPmeOneDevice * getCudaPmeOneDevice()