11 #include "middle-conv.h" 14 #define MIN_DEBUG_LEVEL 3 18 #include "ComputeMgr.decl.h" 20 #include "ProxyMgr.decl.h" 108 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 110 #define __thread __declspec(thread) 117 CkpvAccess(BOCclass_group).computeMgr = thisgroup;
121 computeDPMEObject = 0;
122 computeEwaldObject = 0;
125 masterServerObject = NULL;
130 delete computeNonbondedWorkArrays;
131 if (masterServerObject != NULL)
delete masterServerObject;
132 for (
auto& loader: CudaGlobalMasterClientDlloaders) {
134 iout <<
iINFO <<
"Close library " << loader->LibName() <<
"\n" <<
endi;
135 loader->DLCloseLib();
142 updateComputesReturnEP = ep;
143 updateComputesReturnChareID = chareID;
144 updateComputesCount = CkNumPes();
148 NAMD_bug(
"updateComputes signaled on wrong Pe!");
151 CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
158 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
165 if ( skipSplitting ) {
166 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
168 CProxy_ComputeMgr(thisgroup).splitComputes();
175 if ( ! CkMyRank() ) {
179 for (
int i=0; i<nc; i++) {
183 CkPrintf(
"Warning: unable to partition compute %d\n", i);
189 if (computeMap->
newNode(i) == -1) {
192 for (
int j=1; j<nnp; ++j ) {
203 CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
210 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
216 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
217 ProxyMgr *proxyMgr = pm.ckLocalBranch();
223 for (
int i=0; i<nc; i++) {
225 if ( computeMap->
node(i) == CkMyPe() &&
231 if ( computeMap->
newNode(i) == CkMyPe() ) computeFlag.
add(i);
233 if (computeMap->
newNode(i) == CkMyPe() && computeMap->
node(i) != CkMyPe())
236 for (
int n=0; n < computeMap->
numPids(i); n++)
241 else if (computeMap->
node(i) == CkMyPe() &&
242 (computeMap->
newNode(i) != -1 && computeMap->
newNode(i) != CkMyPe() ))
252 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
260 CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
267 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
268 ProxyMgr *proxyMgr = pm.ckLocalBranch();
274 if ( ! CkMyRank() ) {
275 for (
int i=0; i<nc; i++) {
277 if (computeMap->
newNode(i) != -1) {
284 for(
int i=0; i<computeFlag.
size(); i++) createCompute(computeFlag[i], computeMap);
291 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
299 CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
316 if ( ! CkMyRank() ) {
343 CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
350 DebugM(4,
"doneUpdateLocalComputes on Pe("<<CkMyPe()<<
")\n");
351 void *msg = CkAllocMsg(0,0,0);
352 CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
356 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 367 ComputeBondedCUDA* getComputeBondedCUDA() {
389 #ifdef NODEGROUP_FORCE_REGISTER 390 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
391 PatchData *patchData = cpdata.ckLocalBranch();
392 suspendCounter=&(patchData->suspendCounter);
395 switch ( map->
type(i) )
398 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 402 computeNonbondedWorkArrays,
410 for (
int j = 0; j < 8; j++) {
411 pid8[j] = map->computeData[i].pids[j].pid;
412 trans8[j] = map->computeData[i].pids[j].trans;
415 computeNonbondedWorkArrays,
423 pid2[0] = map->computeData[i].pids[0].pid;
424 trans2[0] = map->computeData[i].pids[0].trans;
425 pid2[1] = map->computeData[i].pids[1].pid;
426 trans2[1] = map->computeData[i].pids[1].trans;
427 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 431 computeNonbondedWorkArrays,
438 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 447 case computeBondedCUDAType:
448 c = createComputeBondedCUDA(i,
this);
454 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP)) 458 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
469 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 473 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
484 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 488 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
499 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 503 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
514 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 518 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
541 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 545 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
569 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 572 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
582 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 585 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
595 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 598 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
608 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 611 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
621 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 624 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
644 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP)) 647 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
657 case computeDPMTAType:
658 c =
new ComputeDPMTA(i);
664 case computeDPMEType:
665 c = computeDPMEObject =
new ComputeDPME(i,
this);
671 c =
new ComputePme(i,map->computeData[i].pids[0].pid);
675 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 700 c =
new ComputeStir(i,map->computeData[i].pids[0].pid);
736 c =
new ComputeMsmMsa(i);
789 NAMD_bug(
"Unknown compute type in ComputeMgr::createCompute().");
796 #ifdef TRACE_COMPUTE_OBJECTS 801 int adim, bdim, cdim;
803 int x1, y1, z1, x2, y2, z2;
807 memset(user_des, 0, 50);
808 switch ( map->
type(i) )
811 sprintf(user_des,
"computeNonBondedSelfType_%d_pid_%d", i, map->
pid(i,0));
814 sprintf(user_des,
"computeLCPOType_%d_pid_%d", i, map->
pid(i,0));
821 t1 = map->
trans(i, 0);
826 t2 = map->
trans(i, 1);
833 sprintf(user_des,
"computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
835 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 837 case computeBondedCUDAType:
838 sprintf(user_des,
"computeBondedCUDAType_%d", i);
843 sprintf(user_des,
"computeExclsType_%d", i);
846 sprintf(user_des,
"computeBondsType_%d", i);
849 sprintf(user_des,
"computeAnglesType_%d", i);
852 sprintf(user_des,
"computeDihedralsType_%d", i);
855 sprintf(user_des,
"computeImpropersType_%d", i);
858 sprintf(user_des,
"computeTholeType_%d", i);
861 sprintf(user_des,
"computeAnisoType_%d", i);
864 sprintf(user_des,
"computeCrosstermsType_%d", i);
867 sprintf(user_des,
"computeSelfExclsType_%d", i);
870 sprintf(user_des,
"computeSelfBondsType_%d", i);
873 sprintf(user_des,
"computeSelfAnglesType_%d", i);
876 sprintf(user_des,
"computeSelfDihedralsType_%d", i);
879 sprintf(user_des,
"computeSelfImpropersType_%d", i);
882 sprintf(user_des,
"computeSelfTholeType_%d", i);
885 sprintf(user_des,
"computeSelfAnisoType_%d", i);
888 sprintf(user_des,
"computeSelfCrosstermsType_%d", i);
891 case computeDPMTAType:
892 sprintf(user_des,
"computeDPMTAType_%d", i);
896 case computeDPMEType:
897 sprintf(user_des,
"computeDPMEType_%d", i);
901 sprintf(user_des,
"computePMEType_%d", i);
903 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 905 sprintf(user_des,
"computePMECUDAType_%d", i);
909 sprintf(user_des,
"computeEwaldType_%d", i);
912 sprintf(user_des,
"computeFullDirectType_%d", i);
915 sprintf(user_des,
"computeGlobalType_%d", i);
918 sprintf(user_des,
"computeStirType_%d", i);
921 sprintf(user_des,
"computeExtType_%d", i);
924 sprintf(user_des,
"computeQMType_%d", i);
927 sprintf(user_des,
"computeEFieldType_%d", i);
931 sprintf(user_des,
"computeGridForceType_%d", i);
935 sprintf(user_des,
"computeSphericalBCType_%d", i);
938 sprintf(user_des,
"computeCylindricalBCType_%d", i);
941 sprintf(user_des,
"computeTclBCType_%d", i);
944 sprintf(user_des,
"computeRestraintsType_%d", i);
947 sprintf(user_des,
"computeConsForceType_%d", i);
950 sprintf(user_des,
"computeConsTorqueType_%d", i);
953 NAMD_bug(
"Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
956 int user_des_len = strlen(user_des);
957 char *user_des_cst =
new char[user_des_len+1];
958 memcpy(user_des_cst, user_des, user_des_len);
959 user_des_cst[user_des_len] = 0;
980 int myNode = node->
myid();
982 if (
simParams->globalForcesOn && !myNode )
984 DebugM(4,
"Mgr running on Node "<<CkMyPe()<<
"\n");
989 #ifdef NODEGROUP_FORCE_REGISTER 990 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
991 PatchData *patchData = cpdata.ckLocalBranch();
992 patchData->master_mgr =
this;
1031 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1042 bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1046 for (
int i=0; i < map->nComputes; i++)
1048 if ( ! ( i % 100 ) )
1052 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1053 switch ( map->
type(i) )
1062 if ( ! deviceIsMine )
continue;
1067 if ( ! deviceIsMine )
continue;
1075 if ( ! deviceIsMineBonded )
continue;
1078 if ( map->computeData[i].node != myNode )
continue;
1085 if ( ! deviceIsMineBonded )
continue;
1088 if ( map->computeData[i].node != myNode )
continue;
1095 if ( ! deviceIsMineBonded )
continue;
1098 if ( map->computeData[i].node != myNode )
continue;
1105 if ( ! deviceIsMineBonded )
continue;
1108 if ( map->computeData[i].node != myNode )
continue;
1115 if ( ! deviceIsMineBonded )
continue;
1118 if ( map->computeData[i].node != myNode )
continue;
1125 if ( ! deviceIsMineBonded )
continue;
1128 if ( map->computeData[i].node != myNode )
continue;
1132 case computeBondedCUDAType:
1133 if ( ! deviceIsMineBonded )
continue;
1134 if ( map->computeData[i].node != myNode )
continue;
1136 #endif // BONDED_CUDA 1139 if ( ! deviceIsMine )
continue;
1144 if ( map->computeData[i].node != myNode )
continue;
1146 #else // defined(NAMD_CUDA) || defined(NAMD_HIP) 1147 if ( map->computeData[i].node != myNode )
continue;
1149 DebugM(1,
"Compute " << i <<
'\n');
1150 DebugM(1,
" node = " << map->computeData[i].node <<
'\n');
1151 DebugM(1,
" type = " << map->computeData[i].type <<
'\n');
1152 DebugM(1,
" numPids = " << map->computeData[i].numPids <<
'\n');
1154 for (
int j=0; j < map->computeData[i].numPids; j++)
1156 DebugM(1,
" pid " << map->computeData[i].pids[j].pid <<
'\n');
1160 DebugM(1,
"\n---------------------------------------");
1161 DebugM(1,
"---------------------------------------\n");
1163 createCompute(i, map);
1167 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1174 if (deviceIsMineBonded) {
1175 getComputeBondedCUDA()->initialize();
1195 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1198 #ifdef NODEGROUP_FORCE_REGISTER 1204 #ifdef NODEGROUP_FORCE_REGISTER 1208 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1209 PatchData *patchData = cpdata.ckLocalBranch();
1210 CmiNodeLock &nl = patchData->nodeLock;
1216 patchData->master_mgr->recvComputeGlobalData(msg);
1232 patchData->master_mgr->recvComputeGlobalData(msg);
1249 DebugM(3,
"["<<CkMyPe()<<
"] calling recvComputeGlobalResults\n");
1253 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1254 DebugM(3,
"["<<CkMyPe()<<
"] msg to recvComputeGlobalData\n");
1255 cm[0].recvComputeGlobalData(msg);
1256 #ifdef NODEGROUP_FORCE_REGISTER 1260 DebugM(3,
"["<<CkMyPe()<<
"] done sendComputeGlobalData\n");
1266 if (masterServerObject)
1268 DebugM(3,
"["<<CkMyPe()<<
"] recvComputeGlobalData calling recvData\n");
1271 else NAMD_die(
"ComputeMgr::masterServerObject is NULL!");
1279 DebugM(3,
"["<< CkMyPe()<<
"] sendComputeGlobalResults seq "<<msg->
seq<<
"\n");
1281 #ifdef NODEGROUP_FORCE_REGISTER 1286 for (
int pe = 0; pe < CkMyNodeSize(); pe++) {
1287 if(CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe)!=
nullptr)
1291 delete CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe);
1305 DebugM(3,
"["<< CkMyPe() <<
"] ComputeMgr::sendComputeGlobalResults invoking bcast recvComputeGlobalResults\n");
1306 thisProxy.recvComputeGlobalResults(msg);
1307 #ifdef NODEGROUP_FORCE_REGISTER 1337 CmiEnableUrgentSend(1);
1341 CmiEnableUrgentSend(0);
1352 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1362 if (computeEwaldObject)
1365 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1366 cm[node].recvComputeEwaldData(msg);
1373 else NAMD_die(
"ComputeMgr::computeEwaldObject is NULL!");
1378 if (computeEwaldObject)
1380 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvData is NULL!");
1390 if (computeEwaldObject) {
1391 CmiEnableUrgentSend(1);
1393 CmiEnableUrgentSend(0);
1396 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvResults is NULL!");
1401 if ( computeDPMEObject )
1404 int node = computeDPMEObject->getMasterNode();
1405 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1406 cm.recvComputeDPMEData(msg,node);
1410 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1415 if ( computeDPMEObject )
1418 computeDPMEObject->recvData(msg);
1422 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1427 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1428 cm[node].recvComputeDPMEResults(msg);
1433 if ( computeDPMEObject )
1436 computeDPMEObject->recvResults(msg);
1440 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1476 #ifdef NODEGROUP_FORCE_REGISTER 1479 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1480 cpdata.setDeviceKernelUpdateCounter();
1490 #ifdef NODEGROUP_FORCE_REGISTER 1493 DebugM(3,
"Call recvCudaGlobalMasterCreateMsg on master PE " << CkMyPe() <<
".\n");
1496 std::shared_ptr<CudaGlobalMasterClient> client =
nullptr;
1497 const std::string library_name = args[0];
1499 std::shared_ptr<dlloader::DLLoader<CudaGlobalMasterClient>> loader =
nullptr;
1500 for (
auto it = CudaGlobalMasterClientDlloaders.begin();
1501 it != CudaGlobalMasterClientDlloaders.end(); ++it) {
1502 if ((*it)->LibName() == library_name) {
1508 if (loader ==
nullptr) {
1512 iout <<
iINFO <<
"Loading library " << library_name
1513 <<
" on PE: " << CkMyPe() <<
"\n" <<
endi;
1514 loader->DLOpenLib();
1515 client = loader->DLGetInstance();
1516 }
catch (std::exception& e) {
1517 iout <<
iERROR <<
"Cannot load the shared library " << library_name <<
"\n" <<
endi;
1522 client->initialize(args,
1526 iout <<
iINFO <<
"CudaGlobalMaster client \"" << client->name()
1527 <<
"\"" <<
" initialized\n" <<
endi;
1528 }
catch (std::exception& e) {
1529 iout <<
iERROR <<
"Cannot initialize the CudaGlobalMaster client from " 1530 << library_name <<
"\n" <<
endi;
1533 CudaGlobalMasterClientDlloaders.push_back(loader);
1535 DebugM(3,
"Skip recvCudaGlobalMasterCreateMsg on master PE " <<
1536 CkMyPe() <<
" that is not scheduled for GPU-resident global master.\n");
1539 DebugM(3,
"Skip recvCudaGlobalMasterCreateMsg on non-master PE " << CkMyPe() <<
".\n");
1541 #endif // NODEGROUP_FORCE_REGISTER 1544 NAMD_die(
"GPU-resident mode is not enabled.\n");
1547 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1558 const std::string client_name_to_remove = args[0];
1560 #ifdef NODEGROUP_FORCE_REGISTER 1566 std::shared_ptr<CudaGlobalMasterClient> c =
nullptr;
1567 const std::vector<std::shared_ptr<CudaGlobalMasterClient>>& clients = gm->getClients();
1568 for (
size_t i = 0; i < clients.size(); ++i) {
1569 if (client_name_to_remove == clients[i]->name()) {
1575 gm->removeClient(c);
1576 iout <<
iINFO <<
"CudaGlobalMasterClient \"" 1577 << client_name_to_remove <<
"\" removed\n" <<
endi;
1579 const std::string error =
"CudaGlobalMasterClient \"" 1580 + client_name_to_remove +
"\" not found";
1586 #endif // NODEGROUP_FORCE_REGISTER 1589 NAMD_die(
"GPU-resident mode is not enabled.\n");
1592 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1601 std::vector<std::string> result_args;
1604 const std::string client_name_to_update = args[0];
1606 #ifdef NODEGROUP_FORCE_REGISTER 1612 std::shared_ptr<CudaGlobalMasterClient> c =
nullptr;
1613 const std::vector<std::shared_ptr<CudaGlobalMasterClient>>& clients = gm->getClients();
1614 for (
size_t i = 0; i < clients.size(); ++i) {
1615 if (client_name_to_update == clients[i]->name()) {
1621 result_args.push_back(client_name_to_update);
1622 result_args.push_back(c->updateFromTCLCommand(args));
1623 iout <<
iINFO <<
"CudaGlobalMasterClient \"" 1624 << client_name_to_update <<
"\" updated\n" <<
endi;
1626 const std::string error =
"CudaGlobalMasterClient \"" 1627 + client_name_to_update +
"\" not found";
1633 #endif // NODEGROUP_FORCE_REGISTER 1636 NAMD_die(
"GPU-resident mode is not enabled.\n");
1639 NAMD_die(
"GPU-resident external forces are not enabled.\n");
1642 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1643 cm[0].recvCudaGlobalMasterUpdateResultMsg(result_args);
1648 if (CkMyPe() == 0) {
1649 if (!args.empty()) {
1650 CudaGlobalMasterClientUpdateResults[args[0]] = args[1];
1653 const std::string error =
1654 "recvCudaGlobalMasterUpdateResultMsg is called on " +
1655 std::to_string(CkMyPe()) +
" but expected on PE 0!\n";
1661 return CudaGlobalMasterClientUpdateResults.at(client_name);
1665 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1666 cm[pe].recvYieldDevice(CkMyPe());
1674 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 1682 for (
int i=0;i < pes.size();i++) {
1685 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1695 for (
int i=0;i < pes.size();i++) {
1698 thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1708 for (
int i=0;i < pes.size();i++) {
1712 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1726 thisProxy[pe].recvFinishPatchOnPe(msg);
1735 for (
int i=0;i < pes.size();i++) {
1739 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1751 thisProxy[pe].recvFinishReductions(msg);
1762 thisProxy[pe].recvMessageEnqueueWork(msg);
1773 thisProxy[pe].recvLaunchWork(msg);
1782 for (
int i=0;i < pes.size();i++) {
1785 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1796 class ComputeBondedCUDAMsg :
public CMessage_ComputeBondedCUDAMsg {
1798 ComputeBondedCUDA* c;
1803 for (
int i=0;i < pes.size();i++) {
1804 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1806 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1811 msg->c->assignPatchesOnPe();
1816 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1818 thisProxy[pe].recvMessageEnqueueWork(msg);
1822 msg->c->messageEnqueueWork();
1827 for (
int i=0;i < pes.size();i++) {
1828 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1831 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1836 msg->c->openBoxesOnPe();
1840 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1841 for (
int i=0;i < pes.size();i++) {
1842 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1844 thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1848 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1849 msg->c->loadTuplesOnPe();
1854 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1856 thisProxy[pe].recvLaunchWork(msg);
1860 msg->c->launchWork();
1865 for (
int i=0;i < pes.size();i++) {
1866 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1869 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1874 msg->c->finishPatchesOnPe();
1879 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1881 thisProxy[pe].recvFinishReductions(msg);
1885 msg->c->finishReductions();
1890 for (
int i=0;i < pes.size();i++) {
1891 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1893 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1898 msg->c->unregisterBoxesOnPe();
1902 #endif // BONDED_CUDA 1906 #include "ComputeMgr.def.h"
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
void finishPatchOnPe(int i)
#define COMPUTE_PROXY_PRIORITY
void recvComputeEwaldData(ComputeEwaldMsg *)
__thread DeviceCUDA * deviceCUDA
void updateLocalComputes()
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
void sendYieldDevice(int pe)
void recvData(ComputeEwaldMsg *)
virtual void initialize()
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
void recvResults(ComputeEwaldMsg *)
void setNewNumPartitions(ComputeID cid, char numPartitions)
void recvResults(ComputeGlobalResultsMsg *)
void recvCudaGlobalMasterUpdateResultMsg(std::vector< std::string > args)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
void saveComputeMap(const char *fname)
static ProxyMgr * Object()
#define TRACE_COMPOBJ_IDOFFSET
void updateLocalComputes5()
CudaComputeNonbonded * getCudaComputeNonbonded()
int getMasterNode() const
static PatchMap * Object()
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
void buildProxySpanningTree2()
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
SimParameters * simParameters
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
void registerCompute(ComputeID cid, Compute *c)
void basePatchIDList(int pe, PatchIDList &)
void setNumPartitions(ComputeID cid, char numPartitions)
void recvComputeConsForceMsg(ComputeConsForceMsg *)
std::ostream & endi(std::ostream &s)
virtual void initialize()
#define PROXY_DATA_PRIORITY
void updateLocalComputes3()
void Migrate(LDObjHandle handle, int dest)
int index_a(int pid) const
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
std::string getCudaGlobalMasterUpdateResult(const std::string &client_name) const
void messageEnqueueWork()
int add(const Elem &elem)
void createComputes(ComputeMap *map)
void setNode(ComputeID cid, NodeID node)
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
Molecule stores the structural information for the system.
int computeGlobalResultsMsgSeq
void updateLocalComputes2(CkQdMsg *)
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
void doneUpdateLocalComputes()
int gridsize_c(void) const
void recvCudaGlobalMasterRemoveMsg(std::vector< std::string > args)
int getGlobalDevice() const
char newNumPartitions(ComputeID cid)
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
int computeGlobalResultsMsgMasterSeq
void sendComputeEwaldData(ComputeEwaldMsg *)
void registerUserEventsForAllComputeObjs()
int gridsize_a(void) const
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
#define NAMD_EVENT_START(eon, id)
static NAMD_HOST_DEVICE int offset_b(int i)
void recvYieldDevice(int pe)
void NAMD_bug(const char *err_msg)
static NAMD_HOST_DEVICE int offset_c(int i)
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
ComputeType type(ComputeID cid)
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
int index_b(int pid) const
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
bool device_shared_with_pe(int pe)
int numPartitions(ComputeID cid)
void unregisterBoxesOnPe()
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
bool getIsGlobalDevice() const
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
void createProxy(PatchID pid)
void setNewNode(ComputeID cid, NodeID node)
int partition(ComputeID cid)
void updateLocalComputes4(CkQdMsg *)
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
void enableComputeGlobalResults()
void registerComputeSelf(ComputeID cid, PatchID pid)
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
void recvComputeDPMEData(ComputeDPMEDataMsg *)
ComputeGlobal * computeGlobalObject
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
int index_c(int pid) const
void sendComputeDPMEData(ComputeDPMEDataMsg *)
void saveComputeMapChanges(int, CkGroupID)
void recvComputeGlobalData(ComputeGlobalDataMsg *)
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
static NAMD_HOST_DEVICE int offset_a(int i)
void updateComputes2(CkQdMsg *)
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
ComputeID cloneCompute(ComputeID src, int partition)
static ComputeMap * Object()
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
void del(int index, int num=1)
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void recvCudaGlobalMasterCreateMsg(std::vector< std::string > args)
void assignPatches(ComputeMgr *computeMgrIn)
CudaComputeNonbonded * getCudaComputeNonbonded()
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
int numPids(ComputeID cid)
int gridsize_b(void) const
void sendComputeGlobalData(ComputeGlobalDataMsg *)
int pid(ComputeID cid, int i)
std::ostream & iERROR(std::ostream &s)
#define SET_PRIORITY(MSG, SEQ, PRIO)
int trans(ComputeID cid, int i)
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
void recvCudaGlobalMasterUpdateMsg(std::vector< std::string > args)
void updateComputes(int, CkGroupID)
void sendComputeEwaldResults(ComputeEwaldMsg *)
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void splitComputes2(CkQdMsg *)
#define PATCH_PRIORITY(PID)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
NodeID newNode(ComputeID cid)