11 #define MIN_DEBUG_LEVEL 1
15 #include "ComputeMgr.decl.h"
17 #include "ProxyMgr.decl.h"
102 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
104 #define __thread __declspec(thread)
111 CkpvAccess(BOCclass_group).computeMgr = thisgroup;
115 computeDPMEObject = 0;
116 computeEwaldObject = 0;
117 computeNonbondedCUDAObject = 0;
118 computeNonbondedMICObject = 0;
121 masterServerObject = NULL;
123 #if defined(NAMD_MIC)
126 int numPEs = CkNumPes();
127 int numInts = ((numPEs + (
sizeof(int)*8-1)) & (~(
sizeof(
int)*8-1))) / (
sizeof(
int)*8);
128 micPEData =
new int[numInts];
129 if (micPEData == NULL) {
NAMD_die(
"Unable to allocate memory for micPEData"); }
130 memset(micPEData, 0,
sizeof(
int) * numInts);
138 delete computeNonbondedWorkArrays;
139 if (masterServerObject != NULL)
delete masterServerObject;
144 updateComputesReturnEP = ep;
145 updateComputesReturnChareID = chareID;
146 updateComputesCount = CkNumPes();
150 NAMD_bug(
"updateComputes signaled on wrong Pe!");
153 CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
160 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
167 if ( skipSplitting ) {
168 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
170 CProxy_ComputeMgr(thisgroup).splitComputes();
177 if ( ! CkMyRank() ) {
181 for (
int i=0; i<nc; i++) {
185 CkPrintf(
"Warning: unable to partition compute %d\n", i);
191 if (computeMap->
newNode(i) == -1) {
194 for (
int j=1; j<nnp; ++j ) {
205 CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
212 CProxy_ComputeMgr(thisgroup).updateLocalComputes();
218 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
219 ProxyMgr *proxyMgr = pm.ckLocalBranch();
225 for (
int i=0; i<nc; i++) {
227 if ( computeMap->
node(i) == CkMyPe() &&
233 if ( computeMap->
newNode(i) == CkMyPe() ) computeFlag.
add(i);
235 if (computeMap->
newNode(i) == CkMyPe() && computeMap->
node(i) != CkMyPe())
238 for (
int n=0; n < computeMap->
numPids(i); n++)
243 else if (computeMap->
node(i) == CkMyPe() &&
244 (computeMap->
newNode(i) != -1 && computeMap->
newNode(i) != CkMyPe() ))
254 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
262 CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
269 CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
270 ProxyMgr *proxyMgr = pm.ckLocalBranch();
276 if ( ! CkMyRank() ) {
277 for (
int i=0; i<nc; i++) {
279 if (computeMap->
newNode(i) != -1) {
286 for(
int i=0; i<computeFlag.
size(); i++) createCompute(computeFlag[i], computeMap);
293 CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
301 CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
318 if ( ! CkMyRank() ) {
345 CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
352 DebugM(4,
"doneUpdateLocalComputes on Pe("<<CkMyPe()<<
")\n");
353 void *msg = CkAllocMsg(0,0,0);
354 CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
358 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
369 ComputeBondedCUDA* getComputeBondedCUDA() {
392 switch ( map->
type(i) )
395 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
401 #elif defined(NAMD_MIC)
402 if (map->directToDevice(i) == 0) {
404 computeNonbondedWorkArrays,
414 computeNonbondedWorkArrays,
422 for (
int j = 0; j < 8; j++) {
423 pid8[j] = map->computeData[i].pids[j].pid;
424 trans8[j] = map->computeData[i].pids[j].trans;
427 computeNonbondedWorkArrays,
435 pid2[0] = map->computeData[i].pids[0].pid;
436 trans2[0] = map->computeData[i].pids[0].trans;
437 pid2[1] = map->computeData[i].pids[1].pid;
438 trans2[1] = map->computeData[i].pids[1].trans;
439 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
445 #elif defined(NAMD_MIC)
446 if (map->directToDevice(i) == 0) {
448 computeNonbondedWorkArrays,
458 computeNonbondedWorkArrays,
465 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
486 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
488 case computeBondedCUDAType:
489 c = createComputeBondedCUDA(i,
this);
495 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP))
499 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
510 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
514 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
525 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
529 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
540 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
544 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
555 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
559 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
582 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
586 getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
610 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
613 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
623 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
626 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
636 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
639 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
649 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
652 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
662 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
665 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
685 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
688 getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
698 case computeDPMTAType:
699 c =
new ComputeDPMTA(i);
705 case computeDPMEType:
706 c = computeDPMEObject =
new ComputeDPME(i,
this);
712 c =
new ComputePme(i,map->computeData[i].pids[0].pid);
716 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
741 c =
new ComputeStir(i,map->computeData[i].pids[0].pid);
772 c =
new ComputeMsmMsa(i);
825 NAMD_bug(
"Unknown compute type in ComputeMgr::createCompute().");
832 #ifdef TRACE_COMPUTE_OBJECTS
837 int adim, bdim, cdim;
839 int x1, y1, z1, x2, y2, z2;
843 memset(user_des, 0, 50);
844 switch ( map->
type(i) )
847 sprintf(user_des,
"computeNonBondedSelfType_%d_pid_%d", i, map->
pid(i,0));
850 sprintf(user_des,
"computeLCPOType_%d_pid_%d", i, map->
pid(i,0));
857 t1 = map->
trans(i, 0);
862 t2 = map->
trans(i, 1);
869 sprintf(user_des,
"computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
871 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
873 case computeBondedCUDAType:
874 sprintf(user_des,
"computeBondedCUDAType_%d", i);
879 sprintf(user_des,
"computeExclsType_%d", i);
882 sprintf(user_des,
"computeBondsType_%d", i);
885 sprintf(user_des,
"computeAnglesType_%d", i);
888 sprintf(user_des,
"computeDihedralsType_%d", i);
891 sprintf(user_des,
"computeImpropersType_%d", i);
894 sprintf(user_des,
"computeTholeType_%d", i);
897 sprintf(user_des,
"computeAnisoType_%d", i);
900 sprintf(user_des,
"computeCrosstermsType_%d", i);
903 sprintf(user_des,
"computeSelfExclsType_%d", i);
906 sprintf(user_des,
"computeSelfBondsType_%d", i);
909 sprintf(user_des,
"computeSelfAnglesType_%d", i);
912 sprintf(user_des,
"computeSelfDihedralsType_%d", i);
915 sprintf(user_des,
"computeSelfImpropersType_%d", i);
918 sprintf(user_des,
"computeSelfTholeType_%d", i);
921 sprintf(user_des,
"computeSelfAnisoType_%d", i);
924 sprintf(user_des,
"computeSelfCrosstermsType_%d", i);
927 case computeDPMTAType:
928 sprintf(user_des,
"computeDPMTAType_%d", i);
932 case computeDPMEType:
933 sprintf(user_des,
"computeDPMEType_%d", i);
937 sprintf(user_des,
"computePMEType_%d", i);
939 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
941 sprintf(user_des,
"computePMECUDAType_%d", i);
945 sprintf(user_des,
"computeEwaldType_%d", i);
948 sprintf(user_des,
"computeFullDirectType_%d", i);
951 sprintf(user_des,
"computeGlobalType_%d", i);
954 sprintf(user_des,
"computeStirType_%d", i);
957 sprintf(user_des,
"computeExtType_%d", i);
960 sprintf(user_des,
"computeQMType_%d", i);
963 sprintf(user_des,
"computeEFieldType_%d", i);
967 sprintf(user_des,
"computeGridForceType_%d", i);
971 sprintf(user_des,
"computeSphericalBCType_%d", i);
974 sprintf(user_des,
"computeCylindricalBCType_%d", i);
977 sprintf(user_des,
"computeTclBCType_%d", i);
980 sprintf(user_des,
"computeRestraintsType_%d", i);
983 sprintf(user_des,
"computeConsForceType_%d", i);
986 sprintf(user_des,
"computeConsTorqueType_%d", i);
989 NAMD_bug(
"Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
992 int user_des_len = strlen(user_des);
993 char *user_des_cst =
new char[user_des_len+1];
994 memcpy(user_des_cst, user_des, user_des_len);
995 user_des_cst[user_des_len] = 0;
1016 int myNode = node->
myid();
1020 DebugM(4,
"Mgr running on Node "<<CkMyPe()<<
"\n");
1032 if (simParams->
SMDOn)
1045 if (simParams->
TMDOn)
1061 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1072 bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1080 for (
int i=0; i < map->nComputes; i++)
1082 if ( ! ( i % 100 ) )
1086 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1087 switch ( map->
type(i) )
1089 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1097 if ( ! deviceIsMine )
continue;
1102 if ( ! deviceIsMine )
continue;
1110 if ( ! deviceIsMineBonded )
continue;
1113 if ( map->computeData[i].node != myNode )
continue;
1120 if ( ! deviceIsMineBonded )
continue;
1123 if ( map->computeData[i].node != myNode )
continue;
1130 if ( ! deviceIsMineBonded )
continue;
1133 if ( map->computeData[i].node != myNode )
continue;
1140 if ( ! deviceIsMineBonded )
continue;
1143 if ( map->computeData[i].node != myNode )
continue;
1150 if ( ! deviceIsMineBonded )
continue;
1153 if ( map->computeData[i].node != myNode )
continue;
1160 if ( ! deviceIsMineBonded )
continue;
1163 if ( map->computeData[i].node != myNode )
continue;
1167 case computeBondedCUDAType:
1168 if ( ! deviceIsMineBonded )
continue;
1169 if ( map->computeData[i].node != myNode )
continue;
1177 if (map->directToDevice(i) != 0) {
1178 if ( ! deviceIsMine )
continue;
1181 if (map->computeData[i].node != myNode) {
continue; }
1186 if (map->directToDevice(i)) {
1187 if ( ! deviceIsMine )
continue;
1190 if (map->computeData[i].node != myNode) {
continue; }
1196 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1203 if ( ! deviceIsMine )
continue;
1205 if ( map->computeData[i].node != myNode )
continue;
1207 #else // defined(NAMD_CUDA) || defined(NAMD_MIC)
1208 if ( map->computeData[i].node != myNode )
continue;
1210 DebugM(1,
"Compute " << i <<
'\n');
1211 DebugM(1,
" node = " << map->computeData[i].node <<
'\n');
1212 DebugM(1,
" type = " << map->computeData[i].type <<
'\n');
1213 DebugM(1,
" numPids = " << map->computeData[i].numPids <<
'\n');
1215 for (
int j=0; j < map->computeData[i].numPids; j++)
1217 DebugM(1,
" pid " << map->computeData[i].pids[j].pid <<
'\n');
1221 DebugM(1,
"\n---------------------------------------");
1222 DebugM(1,
"---------------------------------------\n");
1224 createCompute(i, map);
1228 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1235 if ( computeNonbondedCUDAObject ) {
1241 if (deviceIsMineBonded) {
1242 getComputeBondedCUDA()->initialize();
1248 if ( computeNonbondedMICObject ) {
1268 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1274 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1275 cm[0].recvComputeGlobalData(msg);
1280 if (masterServerObject)
1284 else NAMD_die(
"ComputeMgr::masterServerObject is NULL!");
1290 thisProxy.recvComputeGlobalResults(msg);
1311 CmiEnableUrgentSend(1);
1313 CmiEnableUrgentSend(0);
1319 else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
1327 if (computeEwaldObject)
1330 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1331 cm[node].recvComputeEwaldData(msg);
1335 CkPrintf(
"skipping message on Pe(%d)\n", CkMyPe());
1338 else NAMD_die(
"ComputeMgr::computeEwaldObject is NULL!");
1343 if (computeEwaldObject)
1345 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvData is NULL!");
1355 if (computeEwaldObject) {
1356 CmiEnableUrgentSend(1);
1358 CmiEnableUrgentSend(0);
1361 else NAMD_die(
"ComputeMgr::computeEwaldObject in recvResults is NULL!");
1366 if ( computeDPMEObject )
1369 int node = computeDPMEObject->getMasterNode();
1370 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1371 cm.recvComputeDPMEData(msg,node);
1375 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1380 if ( computeDPMEObject )
1383 computeDPMEObject->recvData(msg);
1387 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1392 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1393 cm[node].recvComputeDPMEResults(msg);
1398 if ( computeDPMEObject )
1401 computeDPMEObject->recvResults(msg);
1405 else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
1435 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1436 cm[pe].recvYieldDevice(CkMyPe());
1440 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1449 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1450 int pe = CkNodeFirst(CkMyNode());
1451 int end = pe + CkNodeSize(CkMyNode());
1452 for( ; pe != end; ++pe ) {
1453 cm[pe].recvBuildCudaExclusions();
1457 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1462 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1468 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1469 int pe = CkNodeFirst(CkMyNode());
1470 int end = pe + CkNodeSize(CkMyNode());
1471 for( ; pe != end; ++pe ) {
1472 cm[pe].recvBuildCudaForceTable();
1476 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1481 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1487 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1488 int pe = CkNodeFirst(CkMyNode());
1489 int end = pe + CkNodeSize(CkMyNode());
1490 for( ; pe != end; ++pe ) {
1491 cm[pe].recvBuildMICForceTable();
1496 void build_mic_force_table();
1501 build_mic_force_table();
1513 msg->
master = computeNonbondedCUDAObject;
1515 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1516 cm[pe].recvCreateNonbondedCUDASlave(msg);
1520 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1526 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1527 cm[pe].recvNonbondedCUDASlaveReady(np,ac,seq);
1531 for (
int i=0; i<np; ++i ) {
1532 computeNonbondedCUDAObject->
patchReady(-1,ac,seq);
1544 thisProxy[pe].recvNonbondedCUDASlaveSkip(msg);
1548 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1558 int type = c->
type();
1561 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1562 wdProxy[pe].enqueueCUDA(msg);
1569 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1570 wdProxy[pe].finishCUDAPatch(msg);
1579 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1587 for (
int i=0;i < pes.size();i++) {
1590 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1600 for (
int i=0;i < pes.size();i++) {
1603 thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1613 for (
int i=0;i < pes.size();i++) {
1617 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1631 thisProxy[pe].recvFinishPatchOnPe(msg);
1640 for (
int i=0;i < pes.size();i++) {
1644 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1656 thisProxy[pe].recvFinishReductions(msg);
1667 thisProxy[pe].recvMessageEnqueueWork(msg);
1678 thisProxy[pe].recvLaunchWork(msg);
1687 for (
int i=0;i < pes.size();i++) {
1690 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1701 class ComputeBondedCUDAMsg :
public CMessage_ComputeBondedCUDAMsg {
1703 ComputeBondedCUDA* c;
1708 for (
int i=0;i < pes.size();i++) {
1709 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1711 thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1716 msg->c->assignPatchesOnPe();
1721 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1723 thisProxy[pe].recvMessageEnqueueWork(msg);
1727 msg->c->messageEnqueueWork();
1732 for (
int i=0;i < pes.size();i++) {
1733 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1736 thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1741 msg->c->openBoxesOnPe();
1745 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1746 for (
int i=0;i < pes.size();i++) {
1747 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1749 thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1753 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1754 msg->c->loadTuplesOnPe();
1759 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1761 thisProxy[pe].recvLaunchWork(msg);
1765 msg->c->launchWork();
1770 for (
int i=0;i < pes.size();i++) {
1771 ComputeBondedCUDAMsg *msg =
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
1774 thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1779 msg->c->finishPatchesOnPe();
1784 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1786 thisProxy[pe].recvFinishReductions(msg);
1790 msg->c->finishReductions();
1795 for (
int i=0;i < pes.size();i++) {
1796 ComputeBondedCUDAMsg *msg =
new ComputeBondedCUDAMsg;
1798 thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1803 msg->c->unregisterBoxesOnPe();
1807 #endif // BONDED_CUDA
1813 msg->
master = computeNonbondedMICObject;
1815 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1816 cm[pe].recvCreateNonbondedMICSlave(msg);
1826 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1827 cm[pe].recvNonbondedMICSlaveReady(np,ac,seq);
1831 for (
int i=0; i<np; ++i ) {
1832 computeNonbondedMICObject->
patchReady(-1,ac,seq);
1844 thisProxy[pe].recvNonbondedMICSlaveSkip(msg);
1858 int type = c->
type();
1861 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1862 wdProxy[pe].enqueueMIC(msg);
1866 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1867 cm.recvMICPEData(pe, data);
1871 if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) {
return; }
1872 int majorIndex = pe / (
sizeof(int)*8);
1873 int minorIndex = pe % (
sizeof(int)*8);
1875 micPEData[majorIndex] |= (0x01 << minorIndex);
1877 micPEData[majorIndex] &= ((~0x01) << minorIndex);
1882 return CProxy_ComputeMgr::ckLocalBranch(CkpvAccess(BOCclass_group).computeMgr)->isMICProcessor(pe);
1886 if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) {
return 0; }
1887 int majorIndex = pe / (
sizeof(int)*8);
1888 int minorIndex = pe % (
sizeof(int)*8);
1889 return ((micPEData[majorIndex] >> minorIndex) & 0x01);
1892 #include "ComputeMgr.def.h"
void sendNonbondedCUDASlaveEnqueuePatch(ComputeNonbondedCUDA *c, int, int, int, int, FinishWorkMsg *)
static int offset_b(int i)
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
void recvNonbondedMICSlaveReady(int, int, int)
void finishPatchOnPe(int i)
#define COMPUTE_PROXY_PRIORITY
void recvComputeEwaldData(ComputeEwaldMsg *)
void updateLocalComputes()
void sendNonbondedCUDASlaveSkip(ComputeNonbondedCUDA *c, int)
void sendBuildCudaForceTable()
void build_cuda_exclusions()
void sendYieldDevice(int pe)
void recvData(ComputeEwaldMsg *)
virtual void initialize()
void recvCreateNonbondedCUDASlave(NonbondedCUDASlaveMsg *)
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
void recvResults(ComputeEwaldMsg *)
void setNewNumPartitions(ComputeID cid, char numPartitions)
void recvYieldDevice(int pe)
ComputeNonbondedMIC * master
void build_cuda_force_table()
void recvResults(ComputeGlobalResultsMsg *)
static int offset_c(int i)
void saveComputeMap(const char *fname)
static ProxyMgr * Object()
#define TRACE_COMPOBJ_IDOFFSET
void updateLocalComputes5()
CudaComputeNonbonded * getCudaComputeNonbonded()
void del(int index, int num=1)
int gridsize_c(void) const
static PatchMap * Object()
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
void buildProxySpanningTree2()
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
static __thread ComputeMgr * computeMgr
SimParameters * simParameters
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
void registerCompute(ComputeID cid, Compute *c)
void basePatchIDList(int pe, PatchIDList &)
int index_a(int pid) const
void setNumPartitions(ComputeID cid, char numPartitions)
void recvComputeConsForceMsg(ComputeConsForceMsg *)
virtual void initialize()
#define PROXY_DATA_PRIORITY
void updateLocalComputes3()
void Migrate(LDObjHandle handle, int dest)
int isMICProcessor(int pe)
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
void sendNonbondedCUDASlaveReady(int, int, int, int)
void messageEnqueueWork()
char computeMapFilename[NAMD_FILENAME_BUFFER_SIZE]
void recvMICPEData(int, int)
LocalWorkMsg * localWorkMsg2
void createComputes(ComputeMap *map)
void setNode(ComputeID cid, NodeID node)
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
int computeGlobalResultsMsgSeq
void updateLocalComputes2(CkQdMsg *)
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
LocalWorkMsg *const localWorkMsg
void doneUpdateLocalComputes()
void register_cuda_compute_self(ComputeID c, PatchID pid)
char newNumPartitions(ComputeID cid)
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
void sendMICPEData(int, int)
int computeGlobalResultsMsgMasterSeq
void sendComputeEwaldData(ComputeEwaldMsg *)
void recvCreateNonbondedMICSlave(NonbondedMICSlaveMsg *)
void registerUserEventsForAllComputeObjs()
void sendBuildCudaExclusions()
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
bool mic_device_shared_with_pe(int pe)
int gridsize_a(void) const
void recvYieldDevice(int pe)
void sendCreateNonbondedCUDASlave(int, int)
void NAMD_bug(const char *err_msg)
static int offset_a(int i)
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
ComputeType type(ComputeID cid)
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
ComputeNonbondedCUDA * master
int index_b(int pid) const
bool device_shared_with_pe(int pe)
int numPartitions(ComputeID cid)
void unregisterBoxesOnPe()
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
void createProxy(PatchID pid)
void setNewNode(ComputeID cid, NodeID node)
int partition(ComputeID cid)
void updateLocalComputes4(CkQdMsg *)
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void recvNonbondedMICSlaveSkip(NonbondedMICSkipMsg *)
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
void enableComputeGlobalResults()
void registerComputeSelf(ComputeID cid, PatchID pid)
LocalWorkMsg * localWorkMsg2
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
void recvComputeDPMEData(ComputeDPMEDataMsg *)
ComputeGlobal * computeGlobalObject
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
int getMasterNode() const
int index_c(int pid) const
void sendComputeDPMEData(ComputeDPMEDataMsg *)
void saveComputeMapChanges(int, CkGroupID)
ComputeNonbondedMIC * compute
int add(const Elem &elem)
void recvComputeGlobalData(ComputeGlobalDataMsg *)
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
void sendNonbondedMICSlaveEnqueue(ComputeNonbondedMIC *c, int, int, int, int)
ComputeNonbondedCUDA * compute
void sendNonbondedMICSlaveReady(int, int, int, int)
void updateComputes2(CkQdMsg *)
ResizeArray< int > localHostedPatches
void sendNonbondedCUDASlaveEnqueue(ComputeNonbondedCUDA *c, int, int, int, int)
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
ComputeID cloneCompute(ComputeID src, int partition)
static ComputeMap * Object()
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
void recvYieldDevice(int pe)
__thread DeviceCUDA * deviceCUDA
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void assignPatches(ComputeMgr *computeMgrIn)
void recvNonbondedCUDASlaveReady(int, int, int)
char SMDFile[NAMD_FILENAME_BUFFER_SIZE]
CudaComputeNonbonded * getCudaComputeNonbonded()
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
int numPids(ComputeID cid)
void recvNonbondedCUDASlaveSkip(NonbondedCUDASkipMsg *)
void sendComputeGlobalData(ComputeGlobalDataMsg *)
int pid(ComputeID cid, int i)
virtual void patchReady(PatchID, int doneMigration, int seq)
#define SET_PRIORITY(MSG, SEQ, PRIO)
void recvBuildMICForceTable()
int trans(ComputeID cid, int i)
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
void sendNonbondedMICSlaveSkip(ComputeNonbondedMIC *c, int)
void sendBuildMICForceTable()
void updateComputes(int, CkGroupID)
int gridsize_b(void) const
void recvBuildCudaForceTable()
void sendComputeEwaldResults(ComputeEwaldMsg *)
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void sendCreateNonbondedMICSlave(int, int)
void recvBuildCudaExclusions()
void splitComputes2(CkQdMsg *)
ResizeArray< int > localHostedPatches
#define PATCH_PRIORITY(PID)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
NodeID newNode(ComputeID cid)
void register_cuda_compute_pair(ComputeID c, PatchID pid[], int t[])