10 #include "ProxyMgr.decl.h"
31 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
40 #define MIN_DEBUG_LEVEL 2
43 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
60 msg_size +=
sizeof(msg->
node);
61 msg_size +=
sizeof(msg->
patch);
66 msg_size +=
sizeof(array_size);
67 msg_size += array_size *
sizeof(char);
70 int nonzero_count = 0;
71 for (
int i = 0; i < array_size; ++i ) {
72 if ( f[i].
x != 0. || f[i].
y != 0. || f[i].
z != 0. ) { ++nonzero_count; }
74 msg_size += nonzero_count *
sizeof(
Vector);
77 void *msg_buf = CkAllocBuffer(msg,msg_size);
78 char *msg_cur = (
char *)msg_buf;
80 CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
node)),
sizeof(msg->
node));
81 msg_cur +=
sizeof(msg->
node);
82 CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
patch)),
sizeof(msg->
patch));
83 msg_cur +=
sizeof(msg->
patch);
86 *(
int *) msg_cur = array_size;
87 msg_cur +=
sizeof(int);
88 char *nonzero = msg_cur;
89 msg_cur += array_size *
sizeof(char);
90 msg_cur = (
char *)
ALIGN_8 (msg_cur);
94 for (
int i = 0; i < array_size; ++i ) {
95 if ( f[i].
x != 0. || f[i].
y != 0. || f[i].
z != 0. ) {
105 msg_cur = (
char *) farr;
116 char *msg_cur = (
char*)ptr;
118 CmiMemcpy((
void*)(&(msg->
node)),(
void*)msg_cur,
sizeof(msg->
node));
119 msg_cur +=
sizeof(msg->
node);
120 CmiMemcpy((
void*)(&(msg->
patch)),(
void*)msg_cur,
sizeof(msg->
patch));
121 msg_cur +=
sizeof(msg->
patch);
124 int array_size = *(
int *) msg_cur;
125 msg_cur +=
sizeof(array_size);
126 msg->
forceList[j] = &(msg->forceListInternal[j]);
128 char *nonzero = msg_cur;
129 msg_cur += array_size *
sizeof(char);
130 msg_cur = (
char *)
ALIGN_8 (msg_cur);
133 for (
int i = 0; i < array_size; ++i ) {
140 f[i].
x = 0.; f[i].
y = 0.; f[i].
z = 0.;
143 msg_cur = (
char *) farr;
156 tmpLen[i] = fls[i].
size();
157 iszeroLen += tmpLen[i];
159 char *tmpIszero =
new char[iszeroLen];
160 char *iszeroPtr = tmpIszero;
164 for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
165 if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
178 memcpy(retmsg->
flLen, tmpLen,
sizeof(
int)*Results::maxNumForces);
179 iszeroPtr = tmpIszero;
183 for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
184 if((*iszeroPtr)!=1) {
185 forcePtr->
x = fiPtr->x;
186 forcePtr->
y = fiPtr->y;
187 forcePtr->
z = fiPtr->z;
192 memcpy(retmsg->
isZero, tmpIszero,
sizeof(
char)*iszeroLen);
199 for(
int i=0; i<size; i++) {
200 numAllPes += tree[i].
numPes;
206 int *pAllPes = retmsg->
allPes;
207 for(
int i=0; i<size; i++) {
209 for(
int j=0; j<tree[i].
numPes; j++) {
210 *pAllPes = tree[i].
peIDs[j];
219 #ifdef PROCTRACE_DEBUG
220 DebugFileTrace *dft = DebugFileTrace::Object();
222 const char *patchname =
"ProxyPatch";
223 if(
procID == CkMyPe()) patchname =
"HomePatch";
224 dft->writeTrace(
"%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes\n",
230 dft->writeTrace(
"%s: ===%d===pes/node: ", tag,
patch);
234 dft->writeTrace(
"\n%s: ===%d===pe list: ", tag,
patch);
238 dft->writeTrace(
"%d ", *p);
242 dft->writeTrace(
"\n");
250 int nonzero_count = 0;
254 totalFLLen += array_size;
256 for (
int i = 0; i < array_size; ++i ) {
257 if ( f[i].
x != 0. || f[i].
y != 0. || f[i].
z != 0. ) { ++nonzero_count; }
264 envelope *oenv = UsrToEnv(msg);
265 envelope *nenv = UsrToEnv(msg_buf);
266 CmiMemcpy(nenv->getPrioPtr(), oenv->getPrioPtr(), nenv->getPrioBytes());
270 for (
int i=0; i<nodeSize; i++) {
273 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
274 msg_buf->destPe = msg->destPe;
275 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
276 msg_buf->isFromImmMsgCall = msg->isFromImmMsgCall;
285 msg_buf->
flLen[j] = array_size;
287 for (
int i = 0; i < array_size; ++i , isNonZeroPtr++) {
288 if ( f[i].
x != 0. || f[i].
y != 0. || f[i].
z != 0. ) {
310 for (
int i=0; i<ptr->
nodeSize; i++) {
313 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
314 msg->destPe = ptr->destPe;
315 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
316 msg->isFromImmMsgCall = ptr->isFromImmMsgCall;
325 int array_size = ptr->
flLen[j];
326 msg->
forceList[j] = &(msg->forceListInternal[j]);
330 for (
int i = 0; i < array_size; ++i, nonzero++ ) {
337 f[i].
x = 0.; f[i].
y = 0.; f[i].
z = 0.;
350 if (CkpvAccess(ProxyMgr_instance)) {
351 NAMD_bug(
"Tried to create ProxyMgr twice.");
353 CkpvAccess(ProxyMgr_instance) =
this;
358 CkpvAccess(ProxyMgr_instance) = NULL;
363 if(CkMyRank()!=0)
return;
372 if(CkMyRank()!=0)
return;
381 if(CkMyRank()!=0)
return;
392 for ( pi = pi.
begin(); pi != pi.
end(); pi++)
394 delete pi->proxyPatch;
403 for ( pi = pi.
begin(); pi != pi.
end(); pi++)
405 if ( pi->proxyPatch->getNumComputes() == 0 ) {
406 toDelete.
add(pi->patchID);
411 for ( ; pidi != toDelete.
end(); ++pidi ) {
424 int myNode = CkMyPe();
425 enum PatchFlag { Unknown, Home, NeedProxy };
432 patchFlag[i] = ( patchMap->
node(i) == myNode ) ? Home : Unknown;
435 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP))
440 for ( i = 0; i < basepids.
size(); ++i )
442 if ( patchMap->
node(basepids[i]) != myNode ) {
443 patchFlag[basepids[i]] = NeedProxy;
446 for ( j = 0; j < numNeighbors; ++j )
448 if ( ! patchFlag[neighbors[j]] ) {
449 patchFlag[neighbors[j]] = NeedProxy;
459 for ( i = 0; i < nc; ++i )
461 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
465 #elif defined(NAMD_MIC)
467 if ( computeMap->directToDevice(i) != 0 ) {
continue; }
469 if ( computeMap->
node(i) != myNode )
471 int numPid = computeMap->
numPids(i);
472 for ( j = 0; j < numPid; ++j )
474 int pid = computeMap->
pid(i,j);
475 if ( ! patchFlag[pid] ) {
476 patchFlag[pid] = NeedProxy;
482 if ( patchFlag[i] == NeedProxy )
496 DebugM(4,
"createProxy("<<pid<<
")\n");
528 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
529 cp[node].recvRegisterProxy(msg);
548 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
549 cp[node].recvUnregisterProxy(msg);
565 for (
int i=0; i<pids.
size(); i++) {
567 if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
568 #ifdef NODEAWARE_PROXY_SPANNINGTREE
569 home->buildNodeAwareSpanningTree();
584 for (
int i=0; i<pids.
size(); i++) {
586 if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
592 for(iter=iter.
begin(); iter!=iter.
end(); iter++) {
602 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
603 cp[0].recvProxies(pid, list, n);
609 #define MAX_INTERNODE 1
613 FILE *ofp = fopen(
"patch_proxylist.txt",
"w");
614 std::vector<int>
plist;
615 for(
int i=0; i<np; i++) {
616 fprintf(ofp,
"%d: ", i);
618 fprintf(ofp,
"#%d ", listlen);
620 for(
int j=0; j<listlen; j++) {
624 for(
int j=0; j<listlen; j++) {
625 fprintf(ofp,
"%d ", plist[j]);
640 for (
int i=0; i<n; i++)
650 #ifdef NODEAWARE_PROXY_SPANNINGTREE
651 buildNodeAwareSpanningTree0();
664 for(
int i=0; i<nPatches; i++) {
668 for(
int j=0; j<plen; j++) {
680 #ifdef NODEAWARE_PROXY_SPANNINGTREE
681 buildNodeAwareSpanningTree0();
702 double d1 = cpuloads[i1];
703 double d2 = cpuloads[i2];
717 procidx =
new int[CkNumPes()];
719 for (i=0; i<CkNumPes(); i++) procidx[i] = i;
720 qsort(procidx, CkNumPes(),
sizeof(
int),
compLoad);
722 double averageLoad = 0.0;
723 for (i=0; i<CkNumPes(); i++) averageLoad += cpuloads[i];
724 averageLoad /= CkNumPes();
734 else if(CkNumPes()<4097)
736 else if(CkNumPes()<8193)
738 else if(CkNumPes()<16385)
742 for (
int i=0; i<exclude; i++)
if (procidx[i] == p)
return 1;
747 #ifdef NODEAWARE_PROXY_SPANNINGTREE
749 void ProxyMgr::buildNodeAwareSpanningTree0(){
750 CkPrintf(
"Info: build node-aware spanning tree with send: %d, recv: %d with branch factor %d\n",
755 buildSinglePatchNodeAwareSpanningTree(pid, ptree.
proxylist[pid], ptree.naTrees[pid]);
781 if(pid==numPatches) {
784 int *proxyNodeMap =
new int[CkNumNodes()];
785 memset(proxyNodeMap, 0,
sizeof(
int)*CkNumNodes());
791 for(
int i=1; i<lastInterNodeIdx; i++) {
792 int nid = onePatchT.
item(i).nodeID;
805 for(
int i=1; i<=lastInterNodeIdx; i++) {
806 int nid = onePatchT.
item(i).nodeID;
816 int leastAmount = ~(1<<31);
819 for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.
size(); swapPos++) {
820 int chiNId = onePatchT.
item(swapPos).nodeID;
824 if(proxyNodeMap[chiNId]<leastAmount) {
825 leastAmount = proxyNodeMap[chiNId];
829 if(swapPos==onePatchT.
size()) {
830 CmiAssert(leastIdx!=-1);
838 proxyNodeMap[swapNode->
nodeID]++;
839 int tmp = curNode->
nodeID;
845 int *tmpPes = curNode->
peIDs;
847 swapNode->
peIDs = tmpPes;
850 delete [] proxyNodeMap;
860 if(CmiMyNodeSize()==1) {
869 if(pid==numPatches) {
872 int *proxyCoreMap =
new int[CkNumPes()];
873 memset(proxyCoreMap, 0,
sizeof(
int)*CkNumPes());
879 for(
int i=1; i<lastInterNodeIdx; i++) {
880 int rootProcID = onePatchT.
item(i).peIDs[0];
881 proxyCoreMap[rootProcID]++;
891 for(
int i=1; i<=lastInterNodeIdx; i++) {
893 int rootProcID = curNode->
peIDs[0];
898 proxyCoreMap[rootProcID]++;
905 int leastAmount = ~(1<<31);
909 for(swapPos=1; swapPos<curNode->
numPes; swapPos++) {
910 int otherCoreID = curNode->
peIDs[swapPos];
914 if(proxyCoreMap[otherCoreID]<leastAmount) {
915 leastAmount = proxyCoreMap[otherCoreID];
919 if(swapPos==curNode->
numPes) {
920 CmiAssert(leastIdx!=-1);
925 int tmp = curNode->
peIDs[swapPos];
927 curNode->
peIDs[0] = tmp;
939 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(
PatchID pid,
NodeIDList &proxyList,
942 if (numProxies == 0) {
952 std::map<int, int> proxyNodeMap;
953 std::vector<int> proxyNodeIDs;
954 std::map<int, int> proxyTreeIdx;
958 int hpNodeID = CkNodeOf(hpProcID);
959 proxyNodeMap[hpNodeID]=1;
960 proxyTreeIdx[hpNodeID]=0;
961 proxyNodeIDs.push_back(hpNodeID);
966 int procId = proxyList[i];
967 int nodeId = CkNodeOf(procId);
968 std::map<int, int>::iterator it=proxyNodeMap.find(nodeId);
969 if(it==proxyNodeMap.end()) {
970 proxyNodeMap[nodeId] = 1;
971 proxyTreeIdx[nodeId] = proxyNodeIDs.size();
972 proxyNodeIDs.push_back(nodeId);
974 proxyNodeMap[nodeId]++;
978 int numNodesWithProxies = proxyNodeIDs.
size();
979 oneNATree.
resize(numNodesWithProxies);
981 for(
int i=0; i<numNodesWithProxies; i++) {
983 delete [] oneNode->
peIDs;
984 oneNode->
nodeID = proxyNodeIDs[i];
985 oneNode->
peIDs =
new int[proxyNodeMap[oneNode->
nodeID]];
991 rootnode->
peIDs[0] = hpProcID;
995 int procId = proxyList[i];
996 int nodeId = CkNodeOf(procId);
997 int idxInTree = proxyTreeIdx[nodeId];
998 CmiAssert(idxInTree>=0 && idxInTree<numNodesWithProxies);
1004 #else //branch of NODEAWARE_PROXY_SPANNINGTREE
1009 CkPrintf(
"Info: build spanning tree with send: %d, recv: %d with branch factor %d\n",
1016 int *numPatchesOnNode =
new int[CkNumPes()];
1017 int numNodesWithPatches = 0;
1018 for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
1022 numPatchesOnNode[node]++;
1023 if (numPatchesOnNode[node] == 1)
1024 numNodesWithPatches ++;
1026 int patchNodesLast =
1027 ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
1028 int *ntrees =
new int[CkNumPes()];
1029 for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
1034 if (numProxies == 0) {
1038 delete [] numPatchesOnNode;
1043 tree.
resize(numProxies+1);
1054 int oldindex = oldtree.
find(p);
1055 if (oldindex != -1 && oldindex <= numProxies) {
1057 if (!isIntermediate) {
1069 if (tree.
find(p) != -1)
continue;
1071 if (patchNodesLast && numPatchesOnNode[p] ) {
1072 while (tree[e] != -1) { e--;
if (e==-1) e =
numProxies; }
1075 if (isIntermediate) ntrees[p]++;
1078 while (tree[s] != -1) { s++;
if (s==numProxies+1) s = 1; }
1082 while (tree[e] != -1) { e--;
if (e==-1) e =
numProxies; }
1084 isIntermediate = (e*proxySpanDim+1 <=
numProxies);
1085 if (isIntermediate) ntrees[p]++;
1090 if (isIntermediate) ntrees[p]++;
1096 ptree.
sizes[pid] = treesize;
1103 delete [] numPatchesOnNode;
1112 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1113 if (numProxies == 0)
1119 if (numProxies == 0)
1130 CProxy_ProxyMgr cp(thisgroup);
1142 CProxy_ProxyMgr cp(thisgroup);
1156 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1157 cp[msg->
tree[0]].recvSpanningTree(msg);
1161 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1164 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1165 DebugFileTrace *dft = DebugFileTrace::Object();
1167 dft->writeTrace(
"PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->
patch);
1171 cp[pe].recvNodeAwareSpanningTree(msg);
1182 if (size > i+1) { child[i] = msg->
tree[i+1]; nChild++; }
1197 int level = 1, index=1;
1200 for (
int n=0; n<nChild; n++) {
1202 for (
int j=0; j<level; j++) {
1203 if (index >= size) { done = 1;
break; }
1204 tree[n].
add(msg->
tree[index]);
1213 if (tree[i].size()) {
1216 cmsg->
node = CkMyPe();
1231 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1232 DebugFileTrace *dft = DebugFileTrace::Object();
1243 int eNChild = treesize-1;
1245 CmiAssert(treesize>0);
1254 iNChild = (iSlots>iNChild)?iNChild:iSlots;
1257 int numChild = iNChild + eNChild;
1262 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1269 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
1275 CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
1280 proxy->setSTNodeChildren(0, NULL);
1295 ALLOCA(
int,children,numChild);
1298 for(
int i=0; i<eNChild; i++) {
1303 for(
int i=eNChild, j=1; i<numChild; i++, j++) {
1304 children[i] = msg->
allPes[j];
1308 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1309 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
1312 CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
1317 ALLOCA(
int,nodeChildren,eNChild+1);
1319 for(
int i=0; i<eNChild; i++) {
1320 nodeChildren[i] = CkNodeOf(*p);
1324 nodeChildren[eNChild] = CkNodeOf(msg->
allPes[0]);
1325 proxy->setSTNodeChildren(eNChild+1, nodeChildren);
1327 proxy->setSTNodeChildren(0, NULL);
1344 for(
int childID=0; childID<eNChild; childID++) {
1346 for(
int i=0; i<nodesToCnt; i++) {
1348 exTreeChildSize[childID].
add(cursize);
1349 exTreeChildPtr[childID].
add(pePtr);
1368 int *pePtr = msg->
allPes+1;
1371 for(
int childID=eNChild; childID<numChild; childID++) {
1373 for(
int i=0; i<nodesToCnt; i++) {
1374 exTreeChildSize[childID].
add(1);
1375 exTreeChildPtr[childID].
add(pePtr);
1389 for(
int i=0; i<numChild; i++) {
1392 int totalNodes = allSizes->
size();
1394 for(
int j=0; j<totalNodes; j++) totalPes += allSizes->
item(j);
1399 int *pAllPes = cmsg->
allPes;
1400 for(
int j=0; j<totalNodes; j++) {
1401 int numPes = allSizes->
item(j);
1403 memcpy(pAllPes, allPtrs->
item(j),
sizeof(int)*numPes);
1406 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1411 delete [] exTreeChildSize;
1412 delete [] exTreeChildPtr;
1417 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1418 DebugFileTrace *dft = DebugFileTrace::Object();
1420 dft->writeTrace(
"PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
1424 CmiAssert(proxy!=NULL);
1429 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1431 CmiEnableUrgentSend(1);
1432 cp[node].recvResults(msg);
1433 CmiEnableUrgentSend(0);
1442 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1444 CmiEnableUrgentSend(1);
1445 cp[node].recvResults(msg);
1446 CmiEnableUrgentSend(0);
1461 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1462 CmiAssert(destPe!=CkMyPe());
1464 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1468 cMsg->destPe = destPe;
1469 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1470 cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
1472 cp[destPe].recvImmediateResults(cMsg);
1486 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1492 CkpvAccess(_qd)->create();
1502 NAMD_bug(
"ProxyMgr should receive result message on home processor");
1509 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1510 CmiEnableUrgentSend(1);
1511 cp[CkMyPe()].recvResults(omsg);
1512 CmiEnableUrgentSend(0);
1518 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1527 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1531 int destRank = CkRankOf(msg->destPe);
1532 PatchMap *pmap = localPatchMaps[destRank];
1535 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
1536 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1538 CProxy_ProxyMgr cp(localProxyMgr);
1539 CmiEnableUrgentSend(1);
1540 cp[msg->destPe].recvResults(msg);
1541 CmiEnableUrgentSend(0);
1553 CProxy_NodeProxyMgr cnp(thisgroup);
1556 cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);
1564 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1566 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1567 for(
int i=0; i<pcnt-1; i++) {
1569 cnp[pids[i]].recvImmediateProxyData(copymsg);
1571 cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
1575 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1576 cp.recvImmediateProxyData(msg,pcnt,pids);
1582 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1588 CkpvAccess(_qd)->create();
1606 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1608 PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
1609 CmiAssert(treephs && ntreephs == npid);
1610 CmiUsePersistentHandle(treephs, ntreephs);
1613 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1614 CmiUsePersistentHandle(NULL, 0);
1619 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1620 cp.recvProxyData(newmsg,npid,pids);
1625 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1626 cp[CkMyPe()].recvProxyData(msg);
1630 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1631 CProxy_ProxyMgr cp(localProxyMgr);
1633 CmiAssert(ptn->
numPes!=0);
1637 int rank = CkRankOf(ptn->
peIDs[0]);
1638 PatchMap *pmap = localPatchMaps[rank];
1641 int npid = ppatch->getSTNNodeChild();
1642 int *pids = ppatch->getSTNodeChildPtr();
1647 if(pids[npid-1]==CkMyNode()) npid--;
1649 CProxy_NodeProxyMgr cnp(thisgroup);
1650 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1653 PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
1654 CmiAssert(treephs && ntreephs >= npid);
1655 CmiUsePersistentHandle(treephs, ntreephs);
1658 for(
int i=0; i<npid; i++) {
1660 cnp[pids[i]].recvImmediateProxyData(copymsg);
1662 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1663 CmiUsePersistentHandle(NULL, 0);
1667 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
1668 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1672 CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
1678 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1680 CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
1681 for(
int i=0; i<pcnt-1; i++) {
1683 cnp[pids[i]].recvImmediateProxyAll(copymsg);
1685 cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
1689 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1690 cp.recvImmediateProxyAll(msg,pcnt,pids);
1696 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1702 CkpvAccess(_qd)->create();
1713 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1714 DebugFileTrace *dft = DebugFileTrace::Object();
1716 dft->writeTrace(
"PMgr::recvImmPAll for patch[%d]\n", msg->
patch);
1717 CmiAssert(proxy!=NULL);
1718 dft->writeTrace(
"PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->
patch);
1729 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1731 PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
1732 CmiAssert(treephs && ntreephs == npid);
1733 CmiUsePersistentHandle(treephs, ntreephs);
1736 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1737 CmiUsePersistentHandle(NULL, 0);
1742 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1743 cp[CkMyPe()].recvProxyAll(msg);
1747 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1748 CProxy_ProxyMgr cp(localProxyMgr);
1750 CmiAssert(ptn->
numPes!=0);
1751 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1753 printf(
"NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->
patch, CkMyNode(), CkMyRank());
1754 for(
int i=0; i<ptn->
numPes; i++) {
1755 printf(
"%d, ", ptn->
peIDs[i]);
1763 int rank = CkRankOf(ptn->
peIDs[0]);
1764 PatchMap *pmap = localPatchMaps[rank];
1767 int npid = ppatch->getSTNNodeChild();
1768 int *pids = ppatch->getSTNodeChildPtr();
1773 if(pids[npid-1]==CkMyNode()) npid--;
1776 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1779 PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
1780 CmiAssert(treephs && ntreephs >= npid);
1781 CmiUsePersistentHandle(treephs, ntreephs);
1784 CProxy_NodeProxyMgr cnp(thisgroup);
1785 for(
int i=0; i<npid; i++) {
1787 cnp[pids[i]].recvImmediateProxyAll(copymsg);
1789 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1790 CmiUsePersistentHandle(NULL, 0);
1794 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
1795 msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
1799 CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
1803 void ProxyMgr::printProxySpanningTree(){
1804 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1808 printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size());
1810 printf(
"===%d=== pes/node: ", i);
1811 for(
int j=0; j<oneList.
size(); j++) {
1812 printf(
"%d ", oneList.
item(j).numPes);
1815 printf(
"===%d=== pe ids: ", i);
1816 for(
int j=0; j<oneList.
size(); j++) {
1817 for(
int k=0; k<oneList.
item(j).numPes; k++) {
1818 printf(
"%d ", oneList.
item(j).peIDs[k]);
1828 printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size());
1830 printf(
"===%d=== pe ids: ", i);
1831 for(
int j=0; j<oneList.
size(); j++) {
1832 printf(
"%d ", oneList.
item(j));
1841 if(proxyInfo[patchID]) {
1842 delete proxyInfo[patchID];
1845 proxyInfo[patchID] = NULL;
1847 proxyInfo[patchID] =
new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
1852 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1854 CmiEnableUrgentSend(1);
1855 cp[node].recvResult(msg);
1856 CmiEnableUrgentSend(0);
1867 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
1869 CmiEnableUrgentSend(1);
1870 cp[node].recvResult(msg);
1871 CmiEnableUrgentSend(0);
1884 int totalPatches = 0;
1885 int totalProxies = 0;
1886 for(
int i=0; i<bufSize; i++) {
1891 totalPatches += size;
1892 for(
int i=0; i<size; i++) {
1897 int msgPatchIdx = 0;
1898 int msgProxyPeIdx = 0;
1899 for(
int i=0; i<bufSize; i++) {
1906 memcpy(msg->
proxyPEs+msgProxyPeIdx, one->
proxyPEs+curPeIdx,
sizeof(
int)*curListLen);
1907 curPeIdx += curListLen;
1908 msgProxyPeIdx += curListLen;
1911 for(
int i=0; i<size; i++) {
1916 msgProxyPeIdx += curListLen;
1921 #define HOMEPATCH_TREE_BRFACTOR 2
1924 std::vector<int> nodesWithPatches;
1926 for(
int nodeId=0; nodeId<CkNumNodes(); ++nodeId) {
1928 int firstPe = CkNodeFirst(nodeId);
1929 int endPe = firstPe + CkNodeSize(nodeId);
1930 for(
int pe=firstPe; pe < endPe; ++pe) {
1933 if(hpCnt==0)
continue;
1935 nodesWithPatches.push_back(nodeId);
1936 if(CkMyNode() == nodeId) {
1938 myNodeIdx = nodesWithPatches.size()-1;
1939 numHomePatches = hpCnt;
1940 homepatchRecved = 0;
1956 if(myNodeIdx == 0) {
1960 parentNode = nodesWithPatches[parentIdx];
1965 int totalNodes = nodesWithPatches.size();
1968 if(kidId >= totalNodes)
break;
1971 if(numKidNodes!=0) {
1981 CmiLock(localDepositLock);
1982 insertIdx = homepatchRecved++;
1984 localProxyLists[insertIdx].
patchID = pid;
1985 localProxyLists[insertIdx].
numProxies = size;
1988 if(insertIdx == (numHomePatches-1)) {
1992 CmiUnlock(localDepositLock);
1997 CmiLock(localDepositLock);
1998 insertIdx = kidRecved++;
2000 remoteProxyLists[insertIdx] = msg;
2001 if(insertIdx == (numKidNodes-1)) {
2005 CmiUnlock(localDepositLock);
2009 if(homepatchRecved!=numHomePatches || kidRecved != numKidNodes)
return;
2011 homepatchRecved = 0;
2015 if(parentNode == -1) {
2017 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
2018 cp[0].recvPatchProxyInfo(msg);
2020 CProxy_NodeProxyMgr cnp(thisgroup);
2021 cnp[parentNode].sendProxyListInfo(msg);
2023 for(
int i=0; i<numKidNodes; i++) {
2024 delete remoteProxyLists[i];
2028 #include "ProxyMgr.def.h"
Elem * find(const Elem &elem)
static void * pack(ProxyResultMsg *msg)
std::ostream & iINFO(std::ostream &s)
static ProxyResultVarsizeMsg * getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls)
void recvImmediateResults(ProxyCombinedResultRawMsg *)
void registerProxy(RegisterProxyMsg *)
void copy(ResizeArray< Elem > &ra)
void recvSpanningTree(ProxySpanningTreeMsg *)
void sendSpanningTreeToHomePatch(int pid, int *tree, int n)
void recvNodeAwareSTParent(int patch, int parent)
static ProxyMgr * Object()
void recvProxyAll(ProxyDataMsg *)
int flLen[Results::maxNumForces]
void createSTForHomePatches(PatchMap *pmap)
static PatchMap * Object()
void sendProxies(int pid, int *list, int n)
void buildProxySpanningTree2()
void recvImmediateResults(ProxyCombinedResultRawMsg *)
int find(const Elem &e) const
void sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
void basePatchIDList(int pe, PatchIDList &)
static __thread unsigned int * plist
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
static ProxyCombinedResultMsg * fromRaw(ProxyCombinedResultRawMsg *msg)
HomePatchList * homePatchList()
static void processCpuLoad()
std::ostream & endi(std::ostream &s)
void receiveAll(ProxyDataMsg *)
int upstreamNeighbors(int pid, PatchID *neighbor_ids)
void receiveResults(ProxyResultVarsizeMsg *msg)
void recvRegisterProxy(RegisterProxyMsg *)
if(ComputeNonbondedUtil::goMethod==2)
int add(const Elem &elem)
int getSpanningTreeNChild(void)
static double averageLoad
Patch * patch(PatchID pid)
#define PACK_MSG(MSGTYPE, MSGDATA)
HomePatch * homePatch(PatchID pid)
static void outputProxyTree(ProxyTree &ptree, int np)
static ProxyCombinedResultRawMsg * toRaw(ProxyCombinedResultMsg *msg)
UniqueSetIter< T > begin(void) const
void unregisterPatch(PatchID pid, HomePatch *pptr)
static ProxyNodeAwareSpanningTreeMsg * getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size)
void unregisterProxy(PatchID pid)
ResizeArrayIter< T > end(void) const
void buildProxySpanningTree()
void unregisterProxy(UnregisterProxyMsg *)
ProxyCombinedResultMsg * depositCombinedResultRawMsg(ProxyCombinedResultRawMsg *)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
void recvImmediateProxyAll(ProxyDataMsg *)
void recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
void removeUnusedProxies(void)
void receiveData(ProxyDataMsg *)
void homePatchIDList(PatchIDList &)
void recvResult(ProxyGBISP1ResultMsg *)
void recvProxies(int pid, int *list, int n)
void recvProxyData(ProxyDataMsg *)
static PatchProxyListMsg * createPatchProxyListMsg(PatchProxyListMsg **bufs, int bufSize, ProxyListInfo *info, int size)
void recvSpanningTree(int *t, int n)
void recvData(ProxyGBISP2DataMsg *)
void setall(const Elem &elem)
void recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
void buildSpanningTree0()
#define ALLOCA(TYPE, NAME, SIZE)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void createProxy(PatchID pid)
void recvImmediateProxyAll(ProxyDataMsg *msg)
ForceList * forceList[Results::maxNumForces]
#define HOMEPATCH_TREE_BRFACTOR
void recvResults(ProxyResultVarsizeMsg *)
int add(const Elem &elem)
void setSpanningTree(int, int *, int)
void sendProxyData(ProxyDataMsg *, int, int *)
void sendSpanningTree(ProxySpanningTreeMsg *)
UniqueSetIter< T > end(void) const
BlockRadixSort::TempStorage sort
void recvImmediateProxyData(ProxyDataMsg *)
ProxyCombinedResultMsg * depositCombinedResultMsg(ProxyCombinedResultMsg *)
#define PACK_RESIZE(DATA)
static int noInterNode(int p)
void sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
int numPatches(void) const
void swap(ResizeArray< Elem > &ra)
void sendProxyList(int pid, int *plist, int size)
void sendProxyAll(ProxyDataMsg *, int, int *)
ForceList * forceList[Results::maxNumForces]
static ComputeMap * Object()
void registerProxy(PatchID pid)
void sendResults(ProxyResultVarsizeMsg *)
int flLen[Results::maxNumForces]
int getSpanningTreeParent()
void setProxyTreeBranchFactor(int dim)
const int * getSpanningTreeChildPtr()
void buildSpanningTree(void)
int numPids(ComputeID cid)
int numPatchesOnNode(int node)
static ProxyResultMsg * unpack(void *ptr)
void receiveResult(ProxyGBISP1ResultMsg *msg)
int pid(ComputeID cid, int i)
static int compLoad(const void *a, const void *b)
void sendResult(ProxyGBISP1ResultMsg *)
void registerPatch(PatchID pid, HomePatch *pptr)
void registerPatch(int patchID, int numPes, int *pes)
int del(const Elem &elem)
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
void recvPatchProxyInfo(PatchProxyListMsg *msg)
void removeProxy(PatchID pid)
void contributeToParent()
ResizeArrayIter< T > begin(void) const
void sendProxyListInfo(PatchProxyListMsg *msg)
PatchProxyListMsg(int num)
void recvUnregisterProxy(UnregisterProxyMsg *)
void recvImmediateProxyData(ProxyDataMsg *msg)