10 #include "ProxyMgr.decl.h" 31 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK) 39 #define MIN_DEBUG_LEVEL 2 42 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE)) 59 msg_size +=
sizeof(msg->
node);
60 msg_size +=
sizeof(msg->
patch);
65 msg_size +=
sizeof(array_size);
66 msg_size += array_size *
sizeof(char);
69 int nonzero_count = 0;
70 for (
int i = 0; i < array_size; ++i ) {
71 if (
f[i].x != 0. ||
f[i].y != 0. ||
f[i].z != 0. ) { ++nonzero_count; }
73 msg_size += nonzero_count *
sizeof(
Vector);
76 void *msg_buf = CkAllocBuffer(msg,msg_size);
77 char *msg_cur = (
char *)msg_buf;
79 CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
node)),
sizeof(msg->
node));
80 msg_cur +=
sizeof(msg->
node);
81 CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
patch)),
sizeof(msg->
patch));
82 msg_cur +=
sizeof(msg->
patch);
85 *(
int *) msg_cur = array_size;
86 msg_cur +=
sizeof(int);
87 char *nonzero = msg_cur;
88 msg_cur += array_size *
sizeof(char);
89 msg_cur = (
char *)
ALIGN_8 (msg_cur);
93 for (
int i = 0; i < array_size; ++i ) {
94 if (
f[i].x != 0. ||
f[i].y != 0. ||
f[i].z != 0. ) {
104 msg_cur = (
char *) farr;
115 char *msg_cur = (
char*)ptr;
117 CmiMemcpy((
void*)(&(msg->
node)),(
void*)msg_cur,
sizeof(msg->
node));
118 msg_cur +=
sizeof(msg->
node);
119 CmiMemcpy((
void*)(&(msg->
patch)),(
void*)msg_cur,
sizeof(msg->
patch));
120 msg_cur +=
sizeof(msg->
patch);
123 int array_size = *(
int *) msg_cur;
124 msg_cur +=
sizeof(array_size);
125 msg->
forceList[j] = &(msg->forceListInternal[j]);
127 char *nonzero = msg_cur;
128 msg_cur += array_size *
sizeof(char);
129 msg_cur = (
char *)
ALIGN_8 (msg_cur);
132 for (
int i = 0; i < array_size; ++i ) {
139 f[i].x = 0.;
f[i].y = 0.;
f[i].z = 0.;
142 msg_cur = (
char *) farr;
155 tmpLen[i] = fls[i].
size();
156 iszeroLen += tmpLen[i];
158 char *tmpIszero =
new char[iszeroLen];
159 char *iszeroPtr = tmpIszero;
163 for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
164 if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
178 iszeroPtr = tmpIszero;
182 for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
183 if((*iszeroPtr)!=1) {
184 forcePtr->
x = fiPtr->x;
185 forcePtr->
y = fiPtr->y;
186 forcePtr->
z = fiPtr->z;
191 memcpy(retmsg->
isZero, tmpIszero,
sizeof(
char)*iszeroLen);
198 for(
int i=0; i<size; i++) {
199 numAllPes += tree[i].
numPes;
205 int *pAllPes = retmsg->
allPes;
206 for(
int i=0; i<size; i++) {
208 for(
int j=0; j<tree[i].
numPes; j++) {
209 *pAllPes = tree[i].
peIDs[j];
218 #ifdef PROCTRACE_DEBUG 219 DebugFileTrace *dft = DebugFileTrace::Object();
221 const char *patchname =
"ProxyPatch";
222 if(
procID == CkMyPe()) patchname =
"HomePatch";
223 dft->writeTrace(
"%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes\n",
229 dft->writeTrace(
"%s: ===%d===pes/node: ", tag,
patch);
233 dft->writeTrace(
"\n%s: ===%d===pe list: ", tag,
patch);
237 dft->writeTrace(
"%d ", *p);
241 dft->writeTrace(
"\n");
249 int nonzero_count = 0;
253 totalFLLen += array_size;
255 for (
int i = 0; i < array_size; ++i ) {
256 if (
f[i].x != 0. ||
f[i].y != 0. ||
f[i].z != 0. ) { ++nonzero_count; }
263 envelope *oenv = UsrToEnv(msg);
264 envelope *nenv = UsrToEnv(msg_buf);
265 CmiMemcpy(nenv->getPrioPtr(), oenv->getPrioPtr(), nenv->getPrioBytes());
269 for (
int i=0; i<nodeSize; i++) {
272 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 273 msg_buf->destPe = msg->destPe;
274 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK) 275 msg_buf->isFromImmMsgCall = msg->isFromImmMsgCall;
284 msg_buf->
flLen[j] = array_size;
286 for (
int i = 0; i < array_size; ++i , isNonZeroPtr++) {
287 if (
f[i].x != 0. ||
f[i].y != 0. ||
f[i].z != 0. ) {
309 for (
int i=0; i<ptr->
nodeSize; i++) {
312 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 313 msg->destPe = ptr->destPe;
314 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK) 315 msg->isFromImmMsgCall = ptr->isFromImmMsgCall;
324 int array_size = ptr->
flLen[j];
325 msg->
forceList[j] = &(msg->forceListInternal[j]);
329 for (
int i = 0; i < array_size; ++i, nonzero++ ) {
336 f[i].x = 0.;
f[i].y = 0.;
f[i].z = 0.;
349 if (CkpvAccess(ProxyMgr_instance)) {
350 NAMD_bug(
"Tried to create ProxyMgr twice.");
352 CkpvAccess(ProxyMgr_instance) =
this;
357 CkpvAccess(ProxyMgr_instance) = NULL;
362 if(CkMyRank()!=0)
return;
371 if(CkMyRank()!=0)
return;
380 if(CkMyRank()!=0)
return;
391 for ( pi = pi.
begin(); pi != pi.
end(); pi++)
393 delete pi->proxyPatch;
402 for ( pi = pi.
begin(); pi != pi.
end(); pi++)
404 if ( pi->proxyPatch->getNumComputes() == 0 ) {
405 toDelete.
add(pi->patchID);
410 for ( ; pidi != toDelete.
end(); ++pidi ) {
423 int myNode = CkMyPe();
424 enum PatchFlag { Unknown, Home, NeedProxy };
425 int *patchFlag =
new int[numPatches];
429 for ( i = 0; i < numPatches; ++i )
431 patchFlag[i] = ( patchMap->
node(i) == myNode ) ? Home : Unknown;
434 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP)) 439 for ( i = 0; i < basepids.
size(); ++i )
441 if ( patchMap->
node(basepids[i]) != myNode ) {
442 patchFlag[basepids[i]] = NeedProxy;
445 for ( j = 0; j < numNeighbors; ++j )
447 if ( ! patchFlag[neighbors[j]] ) {
448 patchFlag[neighbors[j]] = NeedProxy;
458 for ( i = 0; i < nc; ++i )
460 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 464 #elif defined(NAMD_MIC) 466 if ( computeMap->directToDevice(i) != 0 ) {
continue; }
468 if ( computeMap->
node(i) != myNode )
470 int numPid = computeMap->
numPids(i);
471 for ( j = 0; j < numPid; ++j )
473 int pid = computeMap->
pid(i,j);
474 if ( ! patchFlag[pid] ) {
475 patchFlag[pid] = NeedProxy;
480 for ( i = 0; i < numPatches; ++i ) {
481 if ( patchFlag[i] == NeedProxy )
495 DebugM(4,
"createProxy("<<pid<<
")\n");
527 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
528 cp[node].recvRegisterProxy(msg);
547 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
548 cp[node].recvUnregisterProxy(msg);
564 for (
int i=0; i<pids.
size(); i++) {
566 if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
567 #ifdef NODEAWARE_PROXY_SPANNINGTREE 568 home->buildNodeAwareSpanningTree();
583 for (
int i=0; i<pids.
size(); i++) {
585 if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
591 for(iter=iter.
begin(); iter!=iter.
end(); iter++) {
601 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
602 cp[0].recvProxies(pid, list, n);
608 #define MAX_INTERNODE 1 612 FILE *ofp = fopen(
"patch_proxylist.txt",
"w");
613 std::vector<int> plist;
614 for(
int i=0; i<np; i++) {
615 fprintf(ofp,
"%d: ", i);
617 fprintf(ofp,
"#%d ", listlen);
619 for(
int j=0; j<listlen; j++) {
622 std::sort(plist.begin(), plist.end());
623 for(
int j=0; j<listlen; j++) {
624 fprintf(ofp,
"%d ", plist[j]);
639 for (
int i=0; i<n; i++)
649 #ifdef NODEAWARE_PROXY_SPANNINGTREE 650 buildNodeAwareSpanningTree0();
663 for(
int i=0; i<nPatches; i++) {
667 for(
int j=0; j<plen; j++) {
679 #ifdef NODEAWARE_PROXY_SPANNINGTREE 680 buildNodeAwareSpanningTree0();
718 for (i=0; i<CkNumPes(); i++)
procidx[i] = i;
733 else if(CkNumPes()<4097)
735 else if(CkNumPes()<8193)
737 else if(CkNumPes()<16385)
741 for (
int i=0; i<exclude; i++)
if (
procidx[i] == p)
return 1;
746 #ifdef NODEAWARE_PROXY_SPANNINGTREE 748 void ProxyMgr::buildNodeAwareSpanningTree0(){
749 CkPrintf(
"Info: build node-aware spanning tree with send: %d, recv: %d with branch factor %d\n",
753 for (
int pid=0; pid<numPatches; pid++)
754 buildSinglePatchNodeAwareSpanningTree(pid, ptree.
proxylist[pid], ptree.naTrees[pid]);
777 for(;pid<numPatches; pid++) {
780 if(pid==numPatches) {
783 int *proxyNodeMap =
new int[CkNumNodes()];
784 memset(proxyNodeMap, 0,
sizeof(
int)*CkNumNodes());
790 for(
int i=1; i<lastInterNodeIdx; i++) {
791 int nid = onePatchT.
item(i).nodeID;
800 for(; pid<numPatches; pid++) {
804 for(
int i=1; i<=lastInterNodeIdx; i++) {
805 int nid = onePatchT.
item(i).nodeID;
815 int leastAmount = ~(1<<31);
818 for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.
size(); swapPos++) {
819 int chiNId = onePatchT.
item(swapPos).nodeID;
823 if(proxyNodeMap[chiNId]<leastAmount) {
824 leastAmount = proxyNodeMap[chiNId];
828 if(swapPos==onePatchT.
size()) {
829 CmiAssert(leastIdx!=-1);
837 proxyNodeMap[swapNode->
nodeID]++;
838 int tmp = curNode->
nodeID;
844 int *tmpPes = curNode->
peIDs;
846 swapNode->
peIDs = tmpPes;
849 delete [] proxyNodeMap;
859 if(CmiMyNodeSize()==1) {
865 for(;pid<numPatches; pid++) {
868 if(pid==numPatches) {
871 int *proxyCoreMap =
new int[CkNumPes()];
872 memset(proxyCoreMap, 0,
sizeof(
int)*CkNumPes());
878 for(
int i=1; i<lastInterNodeIdx; i++) {
879 int rootProcID = onePatchT.
item(i).peIDs[0];
880 proxyCoreMap[rootProcID]++;
886 for(; pid<numPatches; pid++) {
890 for(
int i=1; i<=lastInterNodeIdx; i++) {
892 int rootProcID = curNode->
peIDs[0];
897 proxyCoreMap[rootProcID]++;
904 int leastAmount = ~(1<<31);
908 for(swapPos=1; swapPos<curNode->
numPes; swapPos++) {
909 int otherCoreID = curNode->
peIDs[swapPos];
913 if(proxyCoreMap[otherCoreID]<leastAmount) {
914 leastAmount = proxyCoreMap[otherCoreID];
918 if(swapPos==curNode->
numPes) {
919 CmiAssert(leastIdx!=-1);
924 int tmp = curNode->
peIDs[swapPos];
926 curNode->
peIDs[0] = tmp;
938 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(
PatchID pid,
NodeIDList &proxyList,
951 std::map<int, int> proxyNodeMap;
952 std::vector<int> proxyNodeIDs;
953 std::map<int, int> proxyTreeIdx;
957 int hpNodeID = CkNodeOf(hpProcID);
958 proxyNodeMap[hpNodeID]=1;
959 proxyTreeIdx[hpNodeID]=0;
960 proxyNodeIDs.push_back(hpNodeID);
965 int procId = proxyList[i];
966 int nodeId = CkNodeOf(procId);
967 std::map<int, int>::iterator it=proxyNodeMap.find(nodeId);
968 if(it==proxyNodeMap.end()) {
969 proxyNodeMap[nodeId] = 1;
970 proxyTreeIdx[nodeId] = proxyNodeIDs.size();
971 proxyNodeIDs.push_back(nodeId);
973 proxyNodeMap[nodeId]++;
977 int numNodesWithProxies = proxyNodeIDs.
size();
978 oneNATree.
resize(numNodesWithProxies);
980 for(
int i=0; i<numNodesWithProxies; i++) {
982 delete [] oneNode->
peIDs;
983 oneNode->
nodeID = proxyNodeIDs[i];
984 oneNode->
peIDs =
new int[proxyNodeMap[oneNode->
nodeID]];
990 rootnode->
peIDs[0] = hpProcID;
994 int procId = proxyList[i];
995 int nodeId = CkNodeOf(procId);
996 int idxInTree = proxyTreeIdx[nodeId];
997 CmiAssert(idxInTree>=0 && idxInTree<numNodesWithProxies);
1003 #else //branch of NODEAWARE_PROXY_SPANNINGTREE 1008 CkPrintf(
"Info: build spanning tree with send: %d, recv: %d with branch factor %d\n",
1015 int *numPatchesOnNode =
new int[CkNumPes()];
1016 int numNodesWithPatches = 0;
1017 for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
1019 for (i=0; i<numPatches; i++) {
1021 numPatchesOnNode[node]++;
1022 if (numPatchesOnNode[node] == 1)
1023 numNodesWithPatches ++;
1025 int patchNodesLast =
1026 ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
1027 int *ntrees =
new int[CkNumPes()];
1028 for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
1030 for (
int pid=0; pid<numPatches; pid++)
1037 delete [] numPatchesOnNode;
1053 int oldindex = oldtree.
find(p);
1054 if (oldindex != -1 && oldindex <=
numProxies) {
1056 if (!isIntermediate) {
1068 if (tree.
find(p) != -1)
continue;
1070 if (patchNodesLast && numPatchesOnNode[p] ) {
1071 while (tree[e] != -1) { e--;
if (e==-1) e =
numProxies; }
1074 if (isIntermediate) ntrees[p]++;
1077 while (tree[s] != -1) { s++;
if (s==
numProxies+1) s = 1; }
1081 while (tree[e] != -1) { e--;
if (e==-1) e =
numProxies; }
1084 if (isIntermediate) ntrees[p]++;
1089 if (isIntermediate) ntrees[p]++;
1095 ptree.
sizes[pid] = treesize;
1102 delete [] numPatchesOnNode;
1109 for (
int pid=0; pid<numPatches; pid++) {
1111 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1129 CProxy_ProxyMgr cp(thisgroup);
1141 CProxy_ProxyMgr cp(thisgroup);
1155 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1156 cp[msg->
tree[0]].recvSpanningTree(msg);
1160 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1163 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1164 DebugFileTrace *dft = DebugFileTrace::Object();
1166 dft->writeTrace(
"PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->
patch);
1170 cp[pe].recvNodeAwareSpanningTree(msg);
1181 if (size > i+1) { child[i] = msg->
tree[i+1]; nChild++; }
1196 int level = 1, index=1;
1199 for (
int n=0; n<nChild; n++) {
1201 for (
int j=0; j<level; j++) {
1202 if (index >= size) { done = 1;
break; }
1203 tree[n].
add(msg->
tree[index]);
1212 if (tree[i].size()) {
1215 cmsg->
node = CkMyPe();
1230 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1231 DebugFileTrace *dft = DebugFileTrace::Object();
1242 int eNChild = treesize-1;
1244 CmiAssert(treesize>0);
1253 iNChild = (iSlots>iNChild)?iNChild:iSlots;
1256 int numChild = iNChild + eNChild;
1261 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1268 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
1274 CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
1279 proxy->setSTNodeChildren(0, NULL);
1294 ALLOCA(
int,children,numChild);
1297 for(
int i=0; i<eNChild; i++) {
1302 for(
int i=eNChild, j=1; i<numChild; i++, j++) {
1303 children[i] = msg->
allPes[j];
1307 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1308 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
1311 CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
1316 ALLOCA(
int,nodeChildren,eNChild+1);
1318 for(
int i=0; i<eNChild; i++) {
1319 nodeChildren[i] = CkNodeOf(*p);
1323 nodeChildren[eNChild] = CkNodeOf(msg->
allPes[0]);
1324 proxy->setSTNodeChildren(eNChild+1, nodeChildren);
1326 proxy->setSTNodeChildren(0, NULL);
1343 for(
int childID=0; childID<eNChild; childID++) {
1345 for(
int i=0; i<nodesToCnt; i++) {
1347 exTreeChildSize[childID].
add(cursize);
1348 exTreeChildPtr[childID].
add(pePtr);
1367 int *pePtr = msg->
allPes+1;
1370 for(
int childID=eNChild; childID<numChild; childID++) {
1372 for(
int i=0; i<nodesToCnt; i++) {
1373 exTreeChildSize[childID].
add(1);
1374 exTreeChildPtr[childID].
add(pePtr);
1388 for(
int i=0; i<numChild; i++) {
1391 int totalNodes = allSizes->
size();
1393 for(
int j=0; j<totalNodes; j++) totalPes += allSizes->
item(j);
1398 int *pAllPes = cmsg->
allPes;
1399 for(
int j=0; j<totalNodes; j++) {
1400 int numPes = allSizes->
item(j);
1402 memcpy(pAllPes, allPtrs->
item(j),
sizeof(int)*numPes);
1405 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1410 delete [] exTreeChildSize;
1411 delete [] exTreeChildPtr;
1416 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1417 DebugFileTrace *dft = DebugFileTrace::Object();
1419 dft->writeTrace(
"PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
1423 CmiAssert(proxy!=NULL);
1428 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1430 CmiEnableUrgentSend(1);
1431 cp[node].recvResults(msg);
1432 CmiEnableUrgentSend(0);
1441 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1443 CmiEnableUrgentSend(1);
1444 cp[node].recvResults(msg);
1445 CmiEnableUrgentSend(0);
1460 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1461 CmiAssert(destPe!=CkMyPe());
1463 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1467 cMsg->destPe = destPe;
1468 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1469 cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
1471 cp[destPe].recvImmediateResults(cMsg);
1485 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK) 1491 CkpvAccess(_qd)->create();
1501 NAMD_bug(
"ProxyMgr should receive result message on home processor");
1508 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1509 CmiEnableUrgentSend(1);
1510 cp[CkMyPe()].recvResults(omsg);
1511 CmiEnableUrgentSend(0);
1517 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1526 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1530 int destRank = CkRankOf(msg->destPe);
1531 PatchMap *pmap = localPatchMaps[destRank];
1534 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK) 1535 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1537 CProxy_ProxyMgr cp(localProxyMgr);
1538 CmiEnableUrgentSend(1);
1539 cp[msg->destPe].recvResults(msg);
1540 CmiEnableUrgentSend(0);
1552 CProxy_NodeProxyMgr cnp(thisgroup);
1555 cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);
1563 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1565 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1566 for(
int i=0; i<pcnt-1; i++) {
1568 cnp[pids[i]].recvImmediateProxyData(copymsg);
1570 cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
1574 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1575 cp.recvImmediateProxyData(msg,pcnt,pids);
1581 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK) 1587 CkpvAccess(_qd)->create();
1605 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1607 PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
1608 CmiAssert(treephs && ntreephs == npid);
1609 CmiUsePersistentHandle(treephs, ntreephs);
1612 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1613 CmiUsePersistentHandle(NULL, 0);
1618 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1619 cp.recvProxyData(newmsg,npid,pids);
1624 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1625 cp[CkMyPe()].recvProxyData(msg);
1629 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1630 CProxy_ProxyMgr cp(localProxyMgr);
1632 CmiAssert(ptn->
numPes!=0);
1636 int rank = CkRankOf(ptn->
peIDs[0]);
1637 PatchMap *pmap = localPatchMaps[rank];
1640 int npid = ppatch->getSTNNodeChild();
1641 int *pids = ppatch->getSTNodeChildPtr();
1646 if(pids[npid-1]==CkMyNode()) npid--;
1648 CProxy_NodeProxyMgr cnp(thisgroup);
1649 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1652 PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
1653 CmiAssert(treephs && ntreephs >= npid);
1654 CmiUsePersistentHandle(treephs, ntreephs);
1657 for(
int i=0; i<npid; i++) {
1659 cnp[pids[i]].recvImmediateProxyData(copymsg);
1661 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1662 CmiUsePersistentHandle(NULL, 0);
1666 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK) 1667 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1671 CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
1677 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1679 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1680 for(
int i=0; i<pcnt-1; i++) {
1682 cnp[pids[i]].recvImmediateProxyAll(copymsg);
1684 cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
1688 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1689 cp.recvImmediateProxyAll(msg,pcnt,pids);
1695 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK) 1701 CkpvAccess(_qd)->create();
1712 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1713 DebugFileTrace *dft = DebugFileTrace::Object();
1715 dft->writeTrace(
"PMgr::recvImmPAll for patch[%d]\n", msg->
patch);
1716 CmiAssert(proxy!=NULL);
1717 dft->writeTrace(
"PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->
patch);
1728 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1730 PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
1731 CmiAssert(treephs && ntreephs == npid);
1732 CmiUsePersistentHandle(treephs, ntreephs);
1735 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1736 CmiUsePersistentHandle(NULL, 0);
1741 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1742 cp[CkMyPe()].recvProxyAll(msg);
1746 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) 1747 CProxy_ProxyMgr cp(localProxyMgr);
1749 CmiAssert(ptn->
numPes!=0);
1750 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG) 1752 printf(
"NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->
patch, CkMyNode(), CkMyRank());
1753 for(
int i=0; i<ptn->
numPes; i++) {
1754 printf(
"%d, ", ptn->
peIDs[i]);
1762 int rank = CkRankOf(ptn->
peIDs[0]);
1763 PatchMap *pmap = localPatchMaps[rank];
1766 int npid = ppatch->getSTNNodeChild();
1767 int *pids = ppatch->getSTNodeChildPtr();
1772 if(pids[npid-1]==CkMyNode()) npid--;
1775 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1778 PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
1779 CmiAssert(treephs && ntreephs >= npid);
1780 CmiUsePersistentHandle(treephs, ntreephs);
1783 CProxy_NodeProxyMgr cnp(thisgroup);
1784 for(
int i=0; i<npid; i++) {
1786 cnp[pids[i]].recvImmediateProxyAll(copymsg);
1788 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE 1789 CmiUsePersistentHandle(NULL, 0);
1793 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK) 1794 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1798 CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
1802 void ProxyMgr::printProxySpanningTree(){
1803 #ifdef NODEAWARE_PROXY_SPANNINGTREE 1805 for(
int i=0; i<numPatches; i++) {
1807 printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size());
1809 printf(
"===%d=== pes/node: ", i);
1810 for(
int j=0; j<oneList.
size(); j++) {
1811 printf(
"%d ", oneList.
item(j).numPes);
1814 printf(
"===%d=== pe ids: ", i);
1815 for(
int j=0; j<oneList.
size(); j++) {
1816 for(
int k=0; k<oneList.
item(j).numPes; k++) {
1817 printf(
"%d ", oneList.
item(j).peIDs[k]);
1825 for(
int i=0; i<numPatches; i++) {
1827 printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size());
1829 printf(
"===%d=== pe ids: ", i);
1830 for(
int j=0; j<oneList.
size(); j++) {
1831 printf(
"%d ", oneList.
item(j));
1840 if(proxyInfo[patchID]) {
1841 delete proxyInfo[patchID];
1844 proxyInfo[patchID] = NULL;
1846 proxyInfo[patchID] =
new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
1851 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1853 CmiEnableUrgentSend(1);
1854 cp[node].recvResult(msg);
1855 CmiEnableUrgentSend(0);
1866 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1868 CmiEnableUrgentSend(1);
1869 cp[node].recvResult(msg);
1870 CmiEnableUrgentSend(0);
1883 int totalPatches = 0;
1884 int totalProxies = 0;
1885 for(
int i=0; i<bufSize; i++) {
1890 totalPatches += size;
1891 for(
int i=0; i<size; i++) {
1896 int msgPatchIdx = 0;
1897 int msgProxyPeIdx = 0;
1898 for(
int i=0; i<bufSize; i++) {
1905 memcpy(msg->
proxyPEs+msgProxyPeIdx, one->
proxyPEs+curPeIdx,
sizeof(
int)*curListLen);
1906 curPeIdx += curListLen;
1907 msgProxyPeIdx += curListLen;
1910 for(
int i=0; i<size; i++) {
1915 msgProxyPeIdx += curListLen;
1920 #define HOMEPATCH_TREE_BRFACTOR 2 1923 std::vector<int> nodesWithPatches;
1925 for(
int nodeId=0; nodeId<CkNumNodes(); ++nodeId) {
1927 int firstPe = CkNodeFirst(nodeId);
1928 int endPe = firstPe + CkNodeSize(nodeId);
1929 for(
int pe=firstPe; pe < endPe; ++pe) {
1932 if(hpCnt==0)
continue;
1934 nodesWithPatches.push_back(nodeId);
1935 if(CkMyNode() == nodeId) {
1937 myNodeIdx = nodesWithPatches.size()-1;
1938 numHomePatches = hpCnt;
1939 homepatchRecved = 0;
1955 if(myNodeIdx == 0) {
1959 parentNode = nodesWithPatches[parentIdx];
1964 int totalNodes = nodesWithPatches.size();
1967 if(kidId >= totalNodes)
break;
1970 if(numKidNodes!=0) {
1980 CmiLock(localDepositLock);
1981 insertIdx = homepatchRecved++;
1983 localProxyLists[insertIdx].
patchID = pid;
1984 localProxyLists[insertIdx].
numProxies = size;
1985 localProxyLists[insertIdx].
proxyList = plist;
1987 if(insertIdx == (numHomePatches-1)) {
1991 CmiUnlock(localDepositLock);
1996 CmiLock(localDepositLock);
1997 insertIdx = kidRecved++;
1999 remoteProxyLists[insertIdx] = msg;
2000 if(insertIdx == (numKidNodes-1)) {
2004 CmiUnlock(localDepositLock);
2008 if(homepatchRecved!=numHomePatches || kidRecved != numKidNodes)
return;
2010 homepatchRecved = 0;
2014 if(parentNode == -1) {
2016 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
2017 cp[0].recvPatchProxyInfo(msg);
2019 CProxy_NodeProxyMgr cnp(thisgroup);
2020 cnp[parentNode].sendProxyListInfo(msg);
2022 for(
int i=0; i<numKidNodes; i++) {
2023 delete remoteProxyLists[i];
2027 #include "ProxyMgr.def.h" Elem * find(const Elem &elem)
void copy(ResizeArray< Elem > &ra)
static void * pack(ProxyResultMsg *msg)
std::ostream & iINFO(std::ostream &s)
static ProxyResultVarsizeMsg * getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls)
void recvImmediateResults(ProxyCombinedResultRawMsg *)
void registerProxy(RegisterProxyMsg *)
void recvSpanningTree(ProxySpanningTreeMsg *)
void sendSpanningTreeToHomePatch(int pid, int *tree, int n)
void recvNodeAwareSTParent(int patch, int parent)
static ProxyMgr * Object()
void recvProxyAll(ProxyDataMsg *)
int flLen[Results::maxNumForces]
void createSTForHomePatches(PatchMap *pmap)
static PatchMap * Object()
void sendProxies(int pid, int *list, int n)
void buildProxySpanningTree2()
void recvImmediateResults(ProxyCombinedResultRawMsg *)
void sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
void basePatchIDList(int pe, PatchIDList &)
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
static ProxyCombinedResultMsg * fromRaw(ProxyCombinedResultRawMsg *msg)
HomePatchList * homePatchList()
static void processCpuLoad()
std::ostream & endi(std::ostream &s)
void receiveAll(ProxyDataMsg *)
int upstreamNeighbors(int pid, PatchID *neighbor_ids)
void receiveResults(ProxyResultVarsizeMsg *msg)
void recvRegisterProxy(RegisterProxyMsg *)
ResizeArrayIter< T > begin(void) const
int add(const Elem &elem)
int getSpanningTreeNChild(void)
static double averageLoad
Patch * patch(PatchID pid)
HomePatch * homePatch(PatchID pid)
int add(const Elem &elem)
static void outputProxyTree(ProxyTree &ptree, int np)
static ProxyCombinedResultRawMsg * toRaw(ProxyCombinedResultMsg *msg)
UniqueSetIter< T > begin(void) const
void setall(const Elem &elem)
void unregisterPatch(PatchID pid, HomePatch *pptr)
static ProxyNodeAwareSpanningTreeMsg * getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size)
void unregisterProxy(PatchID pid)
int numPatches(void) const
void buildProxySpanningTree()
void unregisterProxy(UnregisterProxyMsg *)
ProxyCombinedResultMsg * depositCombinedResultRawMsg(ProxyCombinedResultRawMsg *)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
void recvImmediateProxyAll(ProxyDataMsg *)
void recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
void removeUnusedProxies(void)
void receiveData(ProxyDataMsg *)
void homePatchIDList(PatchIDList &)
void recvResult(ProxyGBISP1ResultMsg *)
void recvProxies(int pid, int *list, int n)
void recvProxyData(ProxyDataMsg *)
static PatchProxyListMsg * createPatchProxyListMsg(PatchProxyListMsg **bufs, int bufSize, ProxyListInfo *info, int size)
void recvSpanningTree(int *t, int n)
void recvData(ProxyGBISP2DataMsg *)
void recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
PatchID getPatchID() const
void buildSpanningTree0()
#define ALLOCA(TYPE, NAME, SIZE)
void createProxy(PatchID pid)
void recvImmediateProxyAll(ProxyDataMsg *msg)
UniqueSetIter< T > end(void) const
ForceList * forceList[Results::maxNumForces]
#define HOMEPATCH_TREE_BRFACTOR
void recvResults(ProxyResultVarsizeMsg *)
void setSpanningTree(int, int *, int)
void sendProxyData(ProxyDataMsg *, int, int *)
void sendSpanningTree(ProxySpanningTreeMsg *)
void recvImmediateProxyData(ProxyDataMsg *)
ProxyCombinedResultMsg * depositCombinedResultMsg(ProxyCombinedResultMsg *)
#define PACK_RESIZE(DATA)
static int noInterNode(int p)
void sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
void sendProxyList(int pid, int *plist, int size)
void sendProxyAll(ProxyDataMsg *, int, int *)
ForceList * forceList[Results::maxNumForces]
static ComputeMap * Object()
void registerProxy(PatchID pid)
void sendResults(ProxyResultVarsizeMsg *)
int find(const Elem &e) const
int flLen[Results::maxNumForces]
int getSpanningTreeParent()
void setProxyTreeBranchFactor(int dim)
const int * getSpanningTreeChildPtr()
void buildSpanningTree(void)
int numPids(ComputeID cid)
int numPatchesOnNode(int node)
static ProxyResultMsg * unpack(void *ptr)
void receiveResult(ProxyGBISP1ResultMsg *msg)
int pid(ComputeID cid, int i)
static int compLoad(const void *a, const void *b)
void sendResult(ProxyGBISP1ResultMsg *)
PACK_MSG(ProxySpanningTreeMsg, PACK(patch);PACK(node);PACK_RESIZE(tree);)
void registerPatch(PatchID pid, HomePatch *pptr)
void registerPatch(int patchID, int numPes, int *pes)
void swap(ResizeArray< Elem > &ra)
int del(const Elem &elem)
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
void recvPatchProxyInfo(PatchProxyListMsg *msg)
void removeProxy(PatchID pid)
ResizeArrayIter< T > end(void) const
void contributeToParent()
void sendProxyListInfo(PatchProxyListMsg *msg)
PatchProxyListMsg(int num)
void recvUnregisterProxy(UnregisterProxyMsg *)
void recvImmediateProxyData(ProxyDataMsg *msg)