12 #include "middle-conv.h" 15 #define MIN_DEBUG_LEVEL 3 19 #include "ComputeMgr.decl.h" 21 #include "ProxyMgr.decl.h" 110 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 112 #define __thread __declspec(thread) 119 CkpvAccess(BOCclass_group).computeMgr = thisgroup;
123 computeDPMEObject = 0;
124 computeEwaldObject = 0;
127 masterServerObject = NULL;
132 delete computeNonbondedWorkArrays;
133 if (masterServerObject != NULL)
delete masterServerObject;
134 for (
auto& loader: CudaGlobalMasterClientDlloaders) {
136 iout <<
iINFO <<
"Close library " << loader->LibName() <<
"\n" <<
endi;
137 loader->DLCloseLib();
144 updateComputesReturnEP = ep;
145 updateComputesReturnChareID = chareID;
146 updateComputesCount = CkNumPes();
150 NAMD_bug(
"updateComputes signaled on wrong Pe!");
153 CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
160 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
167 if ( skipSplitting ) {
168 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
170 CProxy_ComputeMgr(thisgroup).splitComputes();
177 if ( ! CkMyRank() ) {
181 for (
int i=0; i<nc; i++) {
185 CkPrintf(
"Warning: unable to partition compute %d\n", i);
191 if (computeMap->
newNode(i) == -1) {
194 for (
int j=1; j<nnp; ++j ) {
205 CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
212 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
218 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
219 ProxyMgr *proxyMgr = pm.ckLocalBranch();
225 for (
int i=0; i<nc; i++) {
227 if ( computeMap->
node(i) == CkMyPe() &&
233 if ( computeMap->
newNode(i) == CkMyPe() ) computeFlag.
add(i);
235 if (computeMap->
newNode(i) == CkMyPe() && computeMap->
node(i) != CkMyPe())
238 for (
int n=0; n < computeMap->
numPids(i); n++)
243 else if (computeMap->
node(i) == CkMyPe() &&
244 (computeMap->
newNode(i) != -1 && computeMap->
newNode(i) != CkMyPe() ))
254 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
262 CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
269 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
270 ProxyMgr *proxyMgr = pm.ckLocalBranch();
276 if ( ! CkMyRank() ) {
277 for (
int i=0; i<nc; i++) {
279 if (computeMap->
newNode(i) != -1) {
286 for(
int i=0; i<computeFlag.
size(); i++) createCompute(computeFlag[i], computeMap);
293 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
301 CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
318 if ( ! CkMyRank() ) {
345 CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
352 DebugM(4,
"doneUpdateLocalComputes on Pe("<<CkMyPe()<<
")\n");
353 void *msg = CkAllocMsg(0,0,0);
354 CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
358 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 369 ComputeBondedCUDA* getComputeBondedCUDA() {
391 #ifdef NODEGROUP_FORCE_REGISTER 392 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
393 PatchData *patchData = cpdata.ckLocalBranch();
394 suspendCounter=&(patchData->suspendCounter);
397 switch ( map->
type(i) )
400 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 404 computeNonbondedWorkArrays,
412 for (
int j = 0; j < 8; j++) {
413 pid8[j] = map->computeData[i].pids[j].pid;
414 trans8[j] = map->computeData[i].pids[j].trans;
417 computeNonbondedWorkArrays,
425 pid2[0] = map->computeData[i].pids[0].pid;
426 trans2[0] = map->computeData[i].pids[0].trans;
427 pid2[1] = map->computeData[i].pids[1].pid;
428 trans2[1] = map->computeData[i].pids[1].trans;
429 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 433 computeNonbondedWorkArrays,
440 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 449 case computeBondedCUDAType:
450 c = createComputeBondedCUDA(i,
this);
456 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP)) 460 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
471 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 475 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
486 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 490 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
501 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 505 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
516 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 520 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
531 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 534 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
545 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 548 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
559 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 563 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
574 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 578 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
602 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 605 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
615 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 618 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
628 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 631 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
641 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 644 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
654 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 657 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
667 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 669 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
679 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 681 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
691 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 694 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
704 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 706 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
716 case computeDPMTAType:
717 c =
new ComputeDPMTA(i);
723 case computeDPMEType:
724 c = computeDPMEObject =
new ComputeDPME(i,
this);
730 c =
new ComputePme(i,map->computeData[i].pids[0].pid);
734 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 759 c =
new ComputeStir(i,map->computeData[i].pids[0].pid);
795 c =
new ComputeMsmMsa(i);
848 NAMD_bug(
"Unknown compute type in ComputeMgr::createCompute().");
854 #ifdef TRACE_COMPUTE_OBJECTS 859 int adim, bdim, cdim;
861 int x1, y1, z1, x2, y2, z2;
865 memset(user_des, 0, 50);
866 switch ( map->
type(i) )
869 sprintf(user_des,
"computeNonBondedSelfType_%d_pid_%d", i, map->
pid(i,0));
872 sprintf(user_des,
"computeLCPOType_%d_pid_%d", i, map->
pid(i,0));
879 t1 = map->
trans(i, 0);
884 t2 = map->
trans(i, 1);
891 sprintf(user_des,
"computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
893 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 895 case computeBondedCUDAType:
896 sprintf(user_des,
"computeBondedCUDAType_%d", i);
901 sprintf(user_des,
"computeExclsType_%d", i);
904 sprintf(user_des,
"computeBondsType_%d", i);
907 sprintf(user_des,
"computeAnglesType_%d", i);
910 sprintf(user_des,
"computeDihedralsType_%d", i);
913 sprintf(user_des,
"computeImpropersType_%d", i);
916 sprintf(user_des,
"computeTholeType_%d", i);
919 sprintf(user_des,
"computeAnisoType_%d", i);
922 sprintf(user_des,
"computeCrosstermsType_%d", i);
925 sprintf(user_des,
"computeOneFourNbTholeType_%d", i);
928 sprintf(user_des,
"computeSelfExclsType_%d", i);
931 sprintf(user_des,
"computeSelfBondsType_%d", i);
934 sprintf(user_des,
"computeSelfAnglesType_%d", i);
937 sprintf(user_des,
"computeSelfDihedralsType_%d", i);
940 sprintf(user_des,
"computeSelfImpropersType_%d", i);
943 sprintf(user_des,
"computeSelfTholeType_%d", i);
946 sprintf(user_des,
"computeSelfAnisoType_%d", i);
949 sprintf(user_des,
"computeSelfCrosstermsType_%d", i);
952 sprintf(user_des,
"computeSelfOneFourNbTholeType_%d", i);
955 case computeDPMTAType:
956 sprintf(user_des,
"computeDPMTAType_%d", i);
960 case computeDPMEType:
961 sprintf(user_des,
"computeDPMEType_%d", i);
965 sprintf(user_des,
"computePMEType_%d", i);
967 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 969 sprintf(user_des,
"computePMECUDAType_%d", i);
973 sprintf(user_des,
"computeEwaldType_%d", i);
976 sprintf(user_des,
"computeFullDirectType_%d", i);
979 sprintf(user_des,
"computeGlobalType_%d", i);
982 sprintf(user_des,
"computeStirType_%d", i);
985 sprintf(user_des,
"computeExtType_%d", i);
988 sprintf(user_des,
"computeQMType_%d", i);
991 sprintf(user_des,
"computeEFieldType_%d", i);
995 sprintf(user_des,
"computeGridForceType_%d", i);
999 sprintf(user_des,
"computeSphericalBCType_%d", i);
1002 sprintf(user_des,
"computeCylindricalBCType_%d", i);
1005 sprintf(user_des,
"computeTclBCType_%d", i);
1008 sprintf(user_des,
"computeRestraintsType_%d", i);
1011 sprintf(user_des,
"computeConsForceType_%d", i);
1014 sprintf(user_des,
"computeConsTorqueType_%d", i);
1017 NAMD_bug(
"Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
1020 int user_des_len = strlen(user_des);
1021 char *user_des_cst =
new char[user_des_len+1];
1022 memcpy(user_des_cst, user_des, user_des_len);
1023 user_des_cst[user_des_len] = 0;
1044 int myNode = node->
myid();
1046 if (
simParams->globalForcesOn && !myNode )
1048 DebugM(4,
"Mgr running on Node "<<CkMyPe()<<
"\n");
1053 #ifdef NODEGROUP_FORCE_REGISTER 1054 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1055 PatchData *patchData = cpdata.ckLocalBranch();
1056 patchData->master_mgr =
this;
1095 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1106 bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1110 for (
int i=0; i < map->nComputes; i++)
1112 if ( ! ( i % 100 ) )
1116 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1117 switch ( map->
type(i) )
1126 if ( ! deviceIsMine )
continue;
1131 if ( ! deviceIsMine )
continue;
1139 if ( ! deviceIsMineBonded )
continue;
1142 if ( map->computeData[i].node != myNode )
continue;
1149 if ( ! deviceIsMineBonded )
continue;
1152 if ( map->computeData[i].node != myNode )
continue;
1159 if ( ! deviceIsMineBonded )
continue;
1162 if ( map->computeData[i].node != myNode )
continue;
1169 if ( ! deviceIsMineBonded )
continue;
1172 if ( map->computeData[i].node != myNode )
continue;
1179 if ( ! deviceIsMineBonded )
continue;
1182 if ( map->computeData[i].node != myNode )
continue;
1189 if ( ! deviceIsMineBonded )
continue;
1192 if ( map->computeData[i].node != myNode )
continue;
1199 if ( ! deviceIsMineBonded )
continue;
1202 if ( map->computeData[i].node != myNode )
continue;
1209 if ( ! deviceIsMineBonded )
continue;
1212 if ( map->computeData[i].node != myNode )
continue;
1219 if ( ! deviceIsMineBonded )
continue;
1222 if ( map->computeData[i].node != myNode )
continue;
1226 case computeBondedCUDAType:
1227 if ( ! deviceIsMineBonded )
continue;
1228 if ( map->computeData[i].node != myNode )
continue;
1230 #endif // BONDED_CUDA 1233 if ( ! deviceIsMine )
continue;
1238 if ( map->computeData[i].node != myNode )
continue;
1240 #else // defined(NAMD_CUDA) || defined(NAMD_HIP) 1241 if ( map->computeData[i].node != myNode )
continue;
1243 DebugM(1,
"Compute " << i <<
'\n');
1244 DebugM(1,
" node = " << map->computeData[i].node <<
'\n');
1245 DebugM(1,
" type = " << map->computeData[i].type <<
'\n');
1246 DebugM(1,
" numPids = " << map->computeData[i].numPids <<
'\n');
1248 for (
int j=0; j < map->computeData[i].numPids; j++)
1250 DebugM(1,
" pid " << map->computeData[i].pids[j].pid <<
'\n');
1254 DebugM(1,
"\n---------------------------------------");
1255 DebugM(1,
"---------------------------------------\n");
1257 createCompute(i, map);
1261 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1268 if (deviceIsMineBonded) {
1269 getComputeBondedCUDA()->initialize();
1289 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1292 #ifdef NODEGROUP_FORCE_REGISTER 1298 #ifdef NODEGROUP_FORCE_REGISTER 1302 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1303 PatchData *patchData = cpdata.ckLocalBranch();
1304 CmiNodeLock &nl = patchData->nodeLock;
1310 patchData->master_mgr->recvComputeGlobalData(msg);
1326 patchData->master_mgr->recvComputeGlobalData(msg);
1343 DebugM(3,
"["<<CkMyPe()<<
"] calling recvComputeGlobalResults\n");
1347 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1348 DebugM(3,
"["<<CkMyPe()<<
"] msg to recvComputeGlobalData\n");
1349 cm[0].recvComputeGlobalData(msg);
1350 #ifdef NODEGROUP_FORCE_REGISTER 1354 DebugM(3,
"["<<CkMyPe()<<
"] done sendComputeGlobalData\n");
1360 if (masterServerObject)
1362 DebugM(3,
"["<<CkMyPe()<<
"] recvComputeGlobalData calling recvData\n");
1365 else NAMD_die(
"ComputeMgr::masterServerObject is NULL!");
1373 DebugM(3,
"["<< CkMyPe()<<
"] sendComputeGlobalResults seq "<<msg->
seq<<
"\n");
1375 #ifdef NODEGROUP_FORCE_REGISTER 1380 for (
int pe = 0; pe < CkMyNodeSize(); pe++) {
1381 if(CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe)!=
nullptr)
1385 delete CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe);
1399 DebugM(3,
"["<< CkMyPe() <<
"] ComputeMgr::sendComputeGlobalResults invoking bcast recvComputeGlobalResults\n");
1400 thisProxy.recvComputeGlobalResults(msg);
1401 #ifdef NODEGROUP_FORCE_REGISTER 1431 CmiEnableUrgentSend(1);
1435 CmiEnableUrgentSend(0);
1446 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1456 if (computeEwaldObject)
1459 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1460 cm[node].recvComputeEwaldData(msg);
1467 else NAMD_die(
"ComputeMgr::computeEwaldObject is NULL!");
1472 if (computeEwaldObject)
1474 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvData is NULL!");
1484 if (computeEwaldObject) {
1485 CmiEnableUrgentSend(1);
1487 CmiEnableUrgentSend(0);
1490 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvResults is NULL!");
1495 if ( computeDPMEObject )
1498 int node = computeDPMEObject->getMasterNode();
1499 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1500 cm.recvComputeDPMEData(msg,node);
1504 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1509 if ( computeDPMEObject )
1512 computeDPMEObject->recvData(msg);
1516 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1521 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1522 cm[node].recvComputeDPMEResults(msg);
1527 if ( computeDPMEObject )
1530 computeDPMEObject->recvResults(msg);
1534 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1570 #ifdef NODEGROUP_FORCE_REGISTER 1573 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1574 cpdata.setDeviceKernelUpdateCounter();
1580 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1584 #ifdef NODEGROUP_FORCE_REGISTER 1587 DebugM(3,
"Call recvCudaGlobalMasterCreateMsg on master PE " << CkMyPe() <<
".\n");
1590 std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client =
nullptr;
1591 const std::string library_name = args[0];
1593 std::shared_ptr<dlloader::DLLoader<CudaGlobalMaster::CudaGlobalMasterClient>> loader =
nullptr;
1594 for (
auto it = CudaGlobalMasterClientDlloaders.begin();
1595 it != CudaGlobalMasterClientDlloaders.end(); ++it) {
1596 if ((*it)->LibName() == library_name) {
1602 if (loader ==
nullptr) {
1606 iout <<
iINFO <<
"Loading library " << library_name
1607 <<
" on PE: " << CkMyPe() <<
"\n" <<
endi;
1608 loader->DLOpenLib();
1609 client = loader->DLGetInstance();
1610 }
catch (std::exception& e) {
1611 iout <<
iERROR <<
"Cannot load the shared library " << library_name <<
"\n" <<
endi;
1616 client->initialize(args,
1620 iout <<
iINFO <<
"CudaGlobalMaster client \"" << client->name()
1621 <<
"\"" <<
" initialized\n" <<
endi;
1622 }
catch (std::exception& e) {
1623 iout <<
iERROR <<
"Cannot initialize the CudaGlobalMaster client from " 1624 << library_name <<
"\n" <<
endi;
1627 CudaGlobalMasterClientDlloaders.push_back(loader);
1629 DebugM(3,
"Skip recvCudaGlobalMasterCreateMsg on master PE " <<
1630 CkMyPe() <<
" that is not scheduled for GPU-resident global master.\n");
1633 DebugM(3,
"Skip recvCudaGlobalMasterCreateMsg on non-master PE " << CkMyPe() <<
".\n");
1635 #endif // NODEGROUP_FORCE_REGISTER 1638 NAMD_die(
"GPU-resident mode is not enabled.\n");
1641 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1645 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1649 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1652 const std::string client_name_to_remove = args[0];
1654 #ifdef NODEGROUP_FORCE_REGISTER 1660 std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c =
nullptr;
1661 const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
1662 for (
size_t i = 0; i < clients.size(); ++i) {
1663 if (client_name_to_remove == clients[i]->name()) {
1669 gm->removeClient(c);
1670 iout <<
iINFO <<
"CudaGlobalMasterClient \"" 1671 << client_name_to_remove <<
"\" removed\n" <<
endi;
1673 const std::string error =
"CudaGlobalMasterClient \"" 1674 + client_name_to_remove +
"\" not found";
1680 #endif // NODEGROUP_FORCE_REGISTER 1683 NAMD_die(
"GPU-resident mode is not enabled.\n");
1686 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1689 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1693 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1694 std::vector<std::string> result_args;
1697 const std::string client_name_to_update = args[0];
1699 int error_code = TCL_OK;
1704 #ifdef NODEGROUP_FORCE_REGISTER 1710 std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c =
nullptr;
1711 const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
1712 for (
size_t i = 0; i < clients.size(); ++i) {
1713 if (client_name_to_update == clients[i]->name()) {
1719 result_args.push_back(client_name_to_update);
1720 error_code = c->updateFromTCLCommand(args);
1721 result_args.push_back(c->getTCLUpdateResult());
1722 iout <<
iINFO <<
"CudaGlobalMasterClient \"" 1723 << client_name_to_update <<
"\" updated\n" <<
endi;
1725 const std::string error =
"CudaGlobalMasterClient \"" 1726 + client_name_to_update +
"\" not found";
1732 #endif // NODEGROUP_FORCE_REGISTER 1735 NAMD_die(
"GPU-resident mode is not enabled.\n");
1738 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1741 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1742 cm[0].recvCudaGlobalMasterUpdateResultMsg(error_code, result_args);
1743 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) 1747 if (CkMyPe() == 0) {
1748 if (!args.empty()) {
1749 CudaGlobalMasterClientUpdateResults[args[0]] = tcl_error_code;
1750 CudaGlobalMasterClientUpdateResultStrings[args[0]] = args[1];
1753 const std::string error =
1754 "recvCudaGlobalMasterUpdateResultMsg is called on " +
1755 std::to_string(CkMyPe()) +
" but expected on PE 0!\n";
1761 return CudaGlobalMasterClientUpdateResults.at(client_name);
1765 return CudaGlobalMasterClientUpdateResultStrings.at(client_name);
1769 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1770 cm[pe].recvYieldDevice(CkMyPe());
1778 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1786 for (
int i=0;i < pes.size();i++) {
1789 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1799 for (
int i=0;i < pes.size();i++) {
1802 thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1812 for (
int i=0;i < pes.size();i++) {
1816 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1830 thisProxy[pe].recvFinishPatchOnPe(msg);
1839 for (
int i=0;i < pes.size();i++) {
1843 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1855 thisProxy[pe].recvFinishReductions(msg);
1866 thisProxy[pe].recvMessageEnqueueWork(msg);
1877 thisProxy[pe].recvLaunchWork(msg);
1886 for (
int i=0;i < pes.size();i++) {
1889 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1900 class ComputeBondedCUDAMsg :
public CMessage_ComputeBondedCUDAMsg {
1902 ComputeBondedCUDA* c;
1907 for (
int i=0;i < pes.size();i++) {
1908 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1910 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1915 msg->c->assignPatchesOnPe();
1920 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1922 thisProxy[pe].recvMessageEnqueueWork(msg);
1926 msg->c->messageEnqueueWork();
1931 for (
int i=0;i < pes.size();i++) {
1932 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1935 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1940 msg->c->openBoxesOnPe();
1944 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1945 for (
int i=0;i < pes.size();i++) {
1946 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1948 thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1952 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1953 msg->c->loadTuplesOnPe();
1958 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1960 thisProxy[pe].recvLaunchWork(msg);
1964 msg->c->launchWork();
1969 for (
int i=0;i < pes.size();i++) {
1970 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1973 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1978 msg->c->finishPatchesOnPe();
1983 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1985 thisProxy[pe].recvFinishReductions(msg);
1989 msg->c->finishReductions();
1994 for (
int i=0;i < pes.size();i++) {
1995 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1997 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
2002 msg->c->unregisterBoxesOnPe();
2006 #endif // BONDED_CUDA 2010 #include "ComputeMgr.def.h"
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
void finishPatchOnPe(int i)
#define COMPUTE_PROXY_PRIORITY
void recvComputeEwaldData(ComputeEwaldMsg *)
__thread DeviceCUDA * deviceCUDA
void updateLocalComputes()
#define NAMD_BONDEDGPU_IMPROPERS
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
void sendYieldDevice(int pe)
void recvData(ComputeEwaldMsg *)
virtual void initialize()
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
#define NAMD_BONDEDGPU_CROSSTERMS
void recvResults(ComputeEwaldMsg *)
void setNewNumPartitions(ComputeID cid, char numPartitions)
void recvResults(ComputeGlobalResultsMsg *)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
void saveComputeMap(const char *fname)
static ProxyMgr * Object()
#define NAMD_BONDEDGPU_ANISOS
#define TRACE_COMPOBJ_IDOFFSET
void updateLocalComputes5()
CudaComputeNonbonded * getCudaComputeNonbonded()
int getMasterNode() const
static PatchMap * Object()
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
#define NAMD_BONDEDGPU_ONEFOURENBTHOLES
void buildProxySpanningTree2()
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
#define NAMD_BONDEDGPU_ANGLES
#define NAMD_BONDEDGPU_THOLES
SimParameters * simParameters
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
void registerCompute(ComputeID cid, Compute *c)
std::string getCudaGlobalMasterUpdateResultString(const std::string &client_name) const
void basePatchIDList(int pe, PatchIDList &)
void setNumPartitions(ComputeID cid, char numPartitions)
void recvComputeConsForceMsg(ComputeConsForceMsg *)
std::ostream & endi(std::ostream &s)
virtual void initialize()
#define PROXY_DATA_PRIORITY
void updateLocalComputes3()
void Migrate(LDObjHandle handle, int dest)
int getCudaGlobalMasterUpdateResult(const std::string &client_name) const
int index_a(int pid) const
#define NAMD_BONDEDGPU_DIHEDRALS
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
void messageEnqueueWork()
int add(const Elem &elem)
void createComputes(ComputeMap *map)
void setNode(ComputeID cid, NodeID node)
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
Molecule stores the structural information for the system.
int computeGlobalResultsMsgSeq
void updateLocalComputes2(CkQdMsg *)
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
void doneUpdateLocalComputes()
int gridsize_c(void) const
void recvCudaGlobalMasterRemoveMsg(std::vector< std::string > args)
int getGlobalDevice() const
char newNumPartitions(ComputeID cid)
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
int computeGlobalResultsMsgMasterSeq
void sendComputeEwaldData(ComputeEwaldMsg *)
void registerUserEventsForAllComputeObjs()
int gridsize_a(void) const
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
#define NAMD_EVENT_START(eon, id)
static NAMD_HOST_DEVICE int offset_b(int i)
void recvYieldDevice(int pe)
void NAMD_bug(const char *err_msg)
static NAMD_HOST_DEVICE int offset_c(int i)
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
ComputeType type(ComputeID cid)
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
int index_b(int pid) const
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
#define NAMD_BONDEDGPU_EXCLS
bool device_shared_with_pe(int pe)
int numPartitions(ComputeID cid)
void unregisterBoxesOnPe()
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
bool getIsGlobalDevice() const
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
void createProxy(PatchID pid)
void setNewNode(ComputeID cid, NodeID node)
int partition(ComputeID cid)
void updateLocalComputes4(CkQdMsg *)
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
void enableComputeGlobalResults()
void registerComputeSelf(ComputeID cid, PatchID pid)
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
void recvComputeDPMEData(ComputeDPMEDataMsg *)
ComputeGlobal * computeGlobalObject
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
int index_c(int pid) const
void sendComputeDPMEData(ComputeDPMEDataMsg *)
void saveComputeMapChanges(int, CkGroupID)
void recvComputeGlobalData(ComputeGlobalDataMsg *)
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
static NAMD_HOST_DEVICE int offset_a(int i)
void updateComputes2(CkQdMsg *)
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
ComputeID cloneCompute(ComputeID src, int partition)
static ComputeMap * Object()
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
void del(int index, int num=1)
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void recvCudaGlobalMasterCreateMsg(std::vector< std::string > args)
void assignPatches(ComputeMgr *computeMgrIn)
CudaComputeNonbonded * getCudaComputeNonbonded()
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
int numPids(ComputeID cid)
void recvCudaGlobalMasterUpdateResultMsg(int tcl_error_code, std::vector< std::string > args)
int gridsize_b(void) const
void sendComputeGlobalData(ComputeGlobalDataMsg *)
int pid(ComputeID cid, int i)
std::ostream & iERROR(std::ostream &s)
#define SET_PRIORITY(MSG, SEQ, PRIO)
int trans(ComputeID cid, int i)
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
void recvCudaGlobalMasterUpdateMsg(std::vector< std::string > args)
void updateComputes(int, CkGroupID)
void sendComputeEwaldResults(ComputeEwaldMsg *)
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void splitComputes2(CkQdMsg *)
#define NAMD_BONDEDGPU_BONDS
#define PATCH_PRIORITY(PID)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
NodeID newNode(ComputeID cid)