Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ProxyMgr.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "main.h"
00009 #include "BOCgroup.h"
00010 #include "ProxyMgr.decl.h"
00011 #include "ProxyMgr.h"
00012 #include "PatchMap.inl"
00013 #include "ProxyPatch.h"
00014 #include "ComputeMap.h"
00015 #include "HomePatch.h"
00016 #include <string.h>
00017 #include "ProcessorPrivate.h"
00018 #include "packmsg.h"
00019 #include "Priorities.h"
00020 #ifndef _NO_ALLOCA_H
00021 #include <alloca.h>
00022 #endif
00023 #ifndef _NO_MALLOC_H
00024 #include <malloc.h>
00025 #endif
00026 
00027 
00028 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
00029 #include "qd.h"
00030 #endif
00031 
00032 //#define DEBUGM
00033 #define MIN_DEBUG_LEVEL 2
00034 #include "Debug.h"
00035 
00036 int proxySendSpanning   = 0;
00037 int proxyRecvSpanning   = 0;
00038 const int proxySpanDim  = 9;
00039 const int inNodeProxySpanDim = 16;
00040 
00041 PACK_MSG(ProxyAtomsMsg,
00042   PACK(patch);
00043   PACK_RESIZE(atomIDList);
00044 )
00045 
00046 PACK_MSG(ProxySpanningTreeMsg,
00047   PACK(patch);
00048   PACK(node);
00049   PACK_RESIZE(tree);
00050 )
00051 
00052 void* ProxyResultMsg::pack(ProxyResultMsg *msg) {
00053 
00054   int msg_size = 0;
00055   msg_size += sizeof(msg->node);
00056   msg_size += sizeof(msg->patch);
00057 
00058   int j;
00059   for ( j = 0; j < Results::maxNumForces; ++j ) {
00060     int array_size = msg->forceList[j].size();
00061     msg_size += sizeof(array_size);
00062     msg_size += array_size * sizeof(char);    
00063     msg_size = ALIGN_8 (msg_size);
00064     Force* f = msg->forceList[j].begin();
00065     int nonzero_count = 0;
00066     for ( int i = 0; i < array_size; ++i ) {
00067       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00068     }
00069     msg_size += nonzero_count * sizeof(Vector);
00070   }
00071 
00072   void *msg_buf = CkAllocBuffer(msg,msg_size);
00073   char *msg_cur = (char *)msg_buf;
00074 
00075   CmiMemcpy((void*)msg_cur,(void*)(&(msg->node)),sizeof(msg->node));
00076   msg_cur += sizeof(msg->node);
00077   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00078   msg_cur += sizeof(msg->patch);
00079   for ( j = 0; j < Results::maxNumForces; ++j ) {
00080     int array_size = msg->forceList[j].size();
00081     *(int *) msg_cur = array_size;
00082     msg_cur += sizeof(int);
00083     char *nonzero = msg_cur;
00084     msg_cur += array_size * sizeof(char);
00085     msg_cur = (char *)ALIGN_8 (msg_cur);
00086     Vector *farr = (Vector *)msg_cur;
00087     Force* f = msg->forceList[j].begin();
00088 
00089     for ( int i = 0; i < array_size; ++i ) {
00090       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00091         nonzero[i] = 1;
00092         farr->x = f[i].x;
00093         farr->y = f[i].y;
00094         farr->z = f[i].z;
00095         farr ++;
00096       } else {
00097         nonzero[i] = 0;
00098       }
00099     }
00100     msg_cur = (char *) farr;      
00101   }
00102 
00103   delete msg;
00104   return msg_buf;
00105 }
00106 
00107 ProxyResultMsg* ProxyResultMsg::unpack(void *ptr) {
00108 
00109   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyResultMsg));
00110   ProxyResultMsg *msg = new (vmsg) ProxyResultMsg;
00111   char *msg_cur = (char*)ptr;
00112 
00113   CmiMemcpy((void*)(&(msg->node)),(void*)msg_cur,sizeof(msg->node));
00114   msg_cur += sizeof(msg->node);
00115   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00116   msg_cur += sizeof(msg->patch);
00117   int j;
00118   for ( j = 0; j < Results::maxNumForces; ++j ) {
00119     int array_size = *(int *) msg_cur;
00120     msg_cur += sizeof(array_size);
00121     msg->forceList[j].resize(array_size);
00122     char *nonzero = msg_cur;
00123     msg_cur += array_size * sizeof(char);    
00124     msg_cur = (char *)ALIGN_8 (msg_cur);
00125     Vector* farr = (Vector *) msg_cur;
00126     Force* f = msg->forceList[j].begin();
00127     for ( int i = 0; i < array_size; ++i ) {
00128       if ( nonzero[i] ) {
00129         f[i].x = farr->x;
00130         f[i].y = farr->y;
00131         f[i].z = farr->z;
00132         farr++;
00133       } else {
00134         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00135       }
00136     }    
00137     msg_cur = (char *) farr;
00138   }
00139 
00140   CkFreeMsg(ptr);
00141   return msg;
00142 }
00143 
00144 ProxyResultVarsizeMsg *ProxyResultVarsizeMsg::getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls){
00145 
00146     //1. decide the length of forceArr and iszero field.
00147     int tmpLen[Results::maxNumForces];
00148     int iszeroLen = 0;
00149     for (int i=0; i<Results::maxNumForces; i++){
00150         tmpLen[i] = fls[i].size();
00151         iszeroLen += tmpLen[i];
00152     }
00153     char *tmpIszero = new char[iszeroLen];
00154     char *iszeroPtr = tmpIszero;
00155     int fArrLen = 0;
00156     for(int i=0; i<Results::maxNumForces; i++) {        
00157         Force *fiPtr = fls[i].begin();
00158         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {         
00159             if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
00160                 *iszeroPtr=0;
00161                 fArrLen++;
00162             }else{
00163                 *iszeroPtr=1;
00164             }            
00165         }
00166     }
00167 
00168     //2. Ready to create the msg, and set all fields
00169     ProxyResultVarsizeMsg *retmsg = new(fArrLen, iszeroLen, prioSize)ProxyResultVarsizeMsg;
00170     retmsg->node = nid;
00171     retmsg->patch = pid;
00172     memcpy(retmsg->flLen, tmpLen, sizeof(int)*Results::maxNumForces);
00173     iszeroPtr = tmpIszero;
00174     Force *forcePtr = retmsg->forceArr;
00175     for(int i=0; i<Results::maxNumForces; i++) {        
00176         Force *fiPtr = fls[i].begin();
00177         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
00178             if((*iszeroPtr)!=1) {
00179                 forcePtr->x = fiPtr->x;
00180                 forcePtr->y = fiPtr->y;
00181                 forcePtr->z = fiPtr->z;
00182                 forcePtr++;
00183             }            
00184         }
00185     }
00186     memcpy(retmsg->isZero, tmpIszero, sizeof(char)*iszeroLen);
00187     delete [] tmpIszero;
00188     return retmsg;
00189 }
00190 
00191 ProxyNodeAwareSpanningTreeMsg *ProxyNodeAwareSpanningTreeMsg::getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size){
00192     int numAllPes = 0;
00193     for(int i=0; i<size; i++) {
00194         numAllPes += tree[i].numPes;
00195     }
00196     ProxyNodeAwareSpanningTreeMsg *retmsg = new(size, numAllPes, 0) ProxyNodeAwareSpanningTreeMsg;
00197     retmsg->patch = pid;
00198     retmsg->procID = nid;
00199     retmsg->numNodesWithProxies = size;    
00200     int *pAllPes = retmsg->allPes;
00201     for(int i=0; i<size; i++) {
00202         retmsg->numPesOfNode[i] = tree[i].numPes;
00203         for(int j=0; j<tree[i].numPes; j++) {
00204             *pAllPes = tree[i].peIDs[j];
00205             pAllPes++;
00206         }
00207     }
00208     return retmsg;
00209 }
00210 
00211 //Only available when macro PROCTRACE_DEBUG is defined
00212 void ProxyNodeAwareSpanningTreeMsg::printOut(char *tag){
00213 #ifdef PROCTRACE_DEBUG
00214     DebugFileTrace *dft = DebugFileTrace::Object();
00215     dft->openTrace();
00216     const char *patchname = "ProxyPatch";
00217     if(procID == CkMyPe()) patchname = "HomePatch";
00218     dft->writeTrace("%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes: \n", tag, patchname, patch, CkMyPe(), CkMyNode(), procID, numNodesWithProxies);
00219     if(numNodesWithProxies==0) {
00220         dft->closeTrace();
00221         return;
00222     }
00223     dft->writeTrace("%s: ===%d===pes/node: ", tag, patch);
00224     for(int i=0; i<numNodesWithProxies; i++) {
00225         dft->writeTrace("%d ", numPesOfNode[i]);
00226     }
00227     dft->writeTrace("\n%s: ===%d===pe list: ", tag, patch);
00228     int *p = allPes;
00229     for(int i=0; i<numNodesWithProxies; i++) {
00230         for(int j=0; j<numPesOfNode[i]; j++) {
00231             dft->writeTrace("%d ", *p);
00232             p++;
00233         }
00234     }
00235     dft->writeTrace("\n");    
00236     dft->closeTrace();
00237 #endif
00238 }
00239 
00240 // for spanning tree
00241 void* ProxyCombinedResultMsg::pack(ProxyCombinedResultMsg *msg) {
00242   int msg_size = 0;
00243   msg_size += sizeof(int) + msg->nodes.size()*sizeof(NodeID);
00244   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00245   msg_size += sizeof(msg->destPe);
00246   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00247   msg_size += sizeof(msg->isFromImmMsgCall);
00248   #endif
00249   #endif  
00250   msg_size += sizeof(msg->patch);
00251   int j;
00252   for ( j = 0; j < Results::maxNumForces; ++j ) {
00253     int array_size = msg->forceList[j].size();
00254     msg_size += sizeof(array_size);
00255     msg_size += array_size * sizeof(char);
00256     msg_size = ALIGN_8 (msg_size);
00257 
00258     Force* f = msg->forceList[j].begin();
00259     int nonzero_count = 0;
00260     for ( int i = 0; i < array_size; ++i ) {
00261       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00262     }
00263     msg_size += nonzero_count * sizeof(Force);
00264   }
00265 
00266   void *msg_buf = CkAllocBuffer(msg,msg_size);
00267   char *msg_cur = (char *)msg_buf;
00268 
00269   int nodeSize = msg->nodes.size();
00270   CmiMemcpy((void*)msg_cur,(void*)(&nodeSize), sizeof(nodeSize));
00271   msg_cur += sizeof(nodeSize);
00272   for (int i=0; i<nodeSize; i++) {
00273     CmiMemcpy((void*)msg_cur,(void*)(&msg->nodes[i]), sizeof(NodeID));
00274     msg_cur += sizeof(NodeID);
00275   }
00276   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00277   CmiMemcpy((void*)msg_cur,(void*)(&(msg->destPe)),sizeof(msg->destPe));
00278   msg_cur += sizeof(msg->destPe);
00279   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00280   CmiMemcpy((void*)msg_cur, (void*)(&(msg->isFromImmMsgCall)), sizeof(msg->isFromImmMsgCall));
00281   msg_cur += sizeof(msg->isFromImmMsgCall);
00282   #endif
00283   #endif
00284   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00285   msg_cur += sizeof(msg->patch);
00286   for ( j = 0; j < Results::maxNumForces; ++j ) {
00287     int array_size = msg->forceList[j].size();
00288     CmiMemcpy((void*)msg_cur,(void*)(&array_size),sizeof(array_size));
00289     msg_cur += sizeof(array_size);
00290     char *nonzero = msg_cur;
00291     msg_cur += array_size * sizeof(char);
00292     msg_cur = (char *)ALIGN_8 (msg_cur);
00293     Vector *farr = (Vector *) msg_cur; 
00294     Force* f = msg->forceList[j].begin();
00295 
00296     for ( int i = 0; i < array_size; ++i ) {
00297       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00298         nonzero[i] = 1;
00299         farr->x  =  f[i].x;
00300         farr->y  =  f[i].y;
00301         farr->z  =  f[i].z;
00302 
00303         farr ++;
00304       } else {
00305         nonzero[i] = 0;
00306       }
00307     }
00308     msg_cur = (char *) farr;
00309   }
00310 
00311   delete msg;
00312   return msg_buf;
00313 }
00314 
00315 ProxyCombinedResultMsg* ProxyCombinedResultMsg::unpack(void *ptr) {
00316   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyCombinedResultMsg));
00317   ProxyCombinedResultMsg *msg = new (vmsg) ProxyCombinedResultMsg;
00318   char *msg_cur = (char*)ptr;
00319 
00320   int nodeSize;
00321   CmiMemcpy((void*)(&nodeSize),(void*)msg_cur,sizeof(nodeSize));
00322   msg_cur += sizeof(nodeSize);
00323   for (int i=0; i<nodeSize; i++) {
00324     msg->nodes.add(*(int *)msg_cur);
00325     msg_cur += sizeof(NodeID);
00326   }
00327   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00328   CmiMemcpy((void*)(&(msg->destPe)),(void*)msg_cur,sizeof(msg->destPe));
00329   msg_cur += sizeof(msg->destPe);
00330   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00331   CmiMemcpy((void *)(&(msg->isFromImmMsgCall)), (void*)msg_cur, sizeof(msg->isFromImmMsgCall));
00332   msg_cur += sizeof(msg->isFromImmMsgCall);  
00333   #endif
00334   #endif
00335   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00336   msg_cur += sizeof(msg->patch);
00337   int j;
00338   for ( j = 0; j < Results::maxNumForces; ++j ) {
00339     int array_size;
00340     CmiMemcpy((void*)(&array_size),(void*)msg_cur,sizeof(array_size));
00341     msg_cur += sizeof(array_size);
00342     msg->forceList[j].resize(array_size);
00343     char *nonzero = msg_cur;
00344     msg_cur += array_size * sizeof(char);
00345     msg_cur = (char *)ALIGN_8 (msg_cur);
00346     Vector* farr = (Vector *) msg_cur;
00347     Force* f = msg->forceList[j].begin();
00348 
00349     for ( int i = 0; i < array_size; ++i ) {
00350       if ( nonzero[i] ) {
00351         f[i].x = farr->x;
00352         f[i].y = farr->y;
00353         f[i].z = farr->z;
00354         farr++;
00355       } else {
00356         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00357       }
00358     }
00359     msg_cur = (char *) farr;
00360   }
00361 
00362   CkFreeMsg(ptr);
00363   return msg;
00364 }
00365 
00366 // class static
00367 int ProxyMgr::nodecount = 0;
00368 
00369 ProxyMgr::ProxyMgr() { 
00370   if (CkpvAccess(ProxyMgr_instance)) {
00371     NAMD_bug("Tried to create ProxyMgr twice.");
00372   }
00373   CkpvAccess(ProxyMgr_instance) = this;
00374 }
00375 
00376 ProxyMgr::~ProxyMgr() { 
00377   removeProxies();
00378   CkpvAccess(ProxyMgr_instance) = NULL;
00379 }
00380 
00381 
00382 void ProxyMgr::setSendSpanning() {
00383   proxySendSpanning = 1;
00384 }
00385 
00386 int ProxyMgr::getSendSpanning() {
00387   return proxySendSpanning;
00388 }
00389 
00390 void ProxyMgr::setRecvSpanning() {
00391   proxyRecvSpanning = 1;
00392 }
00393 
00394 int ProxyMgr::getRecvSpanning() {
00395   return proxyRecvSpanning;
00396 }
00397 
00398 ProxyTree &ProxyMgr::getPtree() {
00399   return ptree;
00400 }
00401 
00402 void ProxyMgr::removeProxies(void)
00403 {
00404   ProxySetIter pi(proxySet);
00405   for ( pi = pi.begin(); pi != pi.end(); pi++)
00406   {
00407     delete pi->proxyPatch;
00408   }
00409   proxySet.clear();
00410 }
00411 
00412 void ProxyMgr::removeUnusedProxies(void)
00413 {
00414   ResizeArray<PatchID> toDelete;
00415   ProxySetIter pi(proxySet);
00416   for ( pi = pi.begin(); pi != pi.end(); pi++)
00417   {
00418     if ( pi->proxyPatch->getNumComputes() == 0 ) {
00419       toDelete.add(pi->patchID);
00420       //fprintf(stderr, "Proxy Deleted Patch %d Proc %d", pi->patchID, CkMyPe());
00421     }
00422   }
00423   PatchID *pidi = toDelete.begin();
00424   for ( ; pidi != toDelete.end(); ++pidi ) {
00425     removeProxy(*pidi);
00426   }
00427 }
00428 
00429 // Figure out which proxies we need and create them
00430 void ProxyMgr::createProxies(void)
00431 {
00432   // Delete the old proxies.
00433   removeProxies();
00434 
00435   PatchMap *patchMap = PatchMap::Object();
00436   int numPatches = patchMap->numPatches();
00437   int myNode = CkMyPe();
00438   enum PatchFlag { Unknown, Home, NeedProxy };
00439   int *patchFlag = new int[numPatches]; 
00440   int i, j;
00441 
00442   // Note all home patches.
00443   for ( i = 0; i < numPatches; ++i )
00444   {
00445     patchFlag[i] = ( patchMap->node(i) == myNode ) ? Home : Unknown;
00446   }
00447 
00448   // Add all upstream neighbors.
00449   PatchID neighbors[PatchMap::MaxOneAway];
00450   PatchIDList basepids;
00451   patchMap->basePatchIDList(myNode,basepids);
00452   for ( i = 0; i < basepids.size(); ++i )
00453   {
00454     if ( patchMap->node(basepids[i]) != myNode ) {
00455         patchFlag[basepids[i]] = NeedProxy;
00456     }
00457     int numNeighbors = patchMap->upstreamNeighbors(basepids[i],neighbors);
00458     for ( j = 0; j < numNeighbors; ++j )
00459     {
00460       if ( ! patchFlag[neighbors[j]] ) {
00461         patchFlag[neighbors[j]] = NeedProxy;
00462       }
00463     }
00464   }
00465 
00466   // Check all patch-based compute objects.
00467   ComputeMap *computeMap = ComputeMap::Object();
00468   int nc = computeMap->numComputes();
00469   for ( i = 0; i < nc; ++i )
00470   {
00471     if ( computeMap->node(i) != myNode || !computeMap->isPatchBased(i) ) 
00472       continue;
00473     int numPid = computeMap->numPids(i);
00474     for ( j = 0; j < numPid; ++j )
00475     {
00476       int pid = computeMap->pid(i,j);
00477       if ( ! patchFlag[pid] ) {
00478         patchFlag[pid] = NeedProxy;
00479       }
00480     }
00481   }
00482   
00483   // Create proxy list
00484   for ( i = 0; i < numPatches; ++i ) {
00485     if ( patchFlag[i] == NeedProxy )
00486     { // create proxy patch
00487       ProxyPatch *proxy = new ProxyPatch(i);
00488       proxySet.add(ProxyElem(i, proxy));
00489       patchMap->registerPatch(i, proxy);
00490     }
00491   }
00492   delete[] patchFlag;
00493 }
00494 
00495 void
00496 ProxyMgr::createProxy(PatchID pid) {
00497   Patch *p = PatchMap::Object()->patch(pid);
00498   if (!p) {
00499      DebugM(4,"createProxy("<<pid<<")\n");
00500      ProxyPatch *proxy = new ProxyPatch(pid);
00501      proxySet.add(ProxyElem(pid,proxy));
00502      PatchMap::Object()->registerPatch(pid,proxy);
00503   }
00504   else {
00505      DebugM(4,"createProxy("<<pid<<") found " << p->getPatchID() << "\n");
00506   }
00507     
00508 }
00509 
00510 void
00511 ProxyMgr::removeProxy(PatchID pid) {
00512   ProxyElem *p = proxySet.find(ProxyElem(pid));
00513   if (p) { 
00514     PatchMap::Object()->unregisterPatch(pid,p->proxyPatch);
00515     delete p->proxyPatch;
00516     proxySet.del(ProxyElem(pid));
00517     // iout << iINFO << "Removing unused proxy " << pid << " on " << iPE << ".\n" << endi;
00518   }
00519 }
00520   
00521 void
00522 ProxyMgr::registerProxy(PatchID pid) {
00523   // determine which node gets message
00524   NodeID node = PatchMap::Object()->node(pid);
00525 
00526   RegisterProxyMsg *msg = new RegisterProxyMsg;
00527 
00528   msg->node=CkMyPe();
00529   msg->patch = pid;
00530 
00531   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00532   cp[node].recvRegisterProxy(msg);
00533 }
00534 
00535 void
00536 ProxyMgr::recvRegisterProxy(RegisterProxyMsg *msg) {
00537   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00538   homePatch->registerProxy(msg); // message deleted in registerProxy()
00539 }
00540 
00541 void
00542 ProxyMgr::unregisterProxy(PatchID pid) {
00543   // determine which node gets message
00544   NodeID node = PatchMap::Object()->node(pid);
00545 
00546   UnregisterProxyMsg *msg = new UnregisterProxyMsg;
00547 
00548   msg->node=CkMyPe();
00549   msg->patch = pid;
00550 
00551   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00552   cp[node].recvUnregisterProxy(msg);
00553 }
00554 
00555 void
00556 ProxyMgr::recvUnregisterProxy(UnregisterProxyMsg *msg) {
00557   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00558   homePatch->unregisterProxy(msg); // message deleted in registerProxy()
00559 }
00560 
00561 void 
00562 ProxyMgr::buildProxySpanningTree()
00563 {
00564   PatchIDList pids;
00565   if (!CkMyPe()) iout << iINFO << "Building spanning tree ... send: " << proxySendSpanning << " recv: " << proxyRecvSpanning << "\n" << endi;
00566   PatchMap::Object()->homePatchIDList(pids);
00567   for (int i=0; i<pids.size(); i++) {
00568     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00569     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00570 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00571     home->buildNodeAwareSpanningTree();
00572 #else
00573     home->buildSpanningTree();
00574 #endif
00575   }
00576 }
00577 
00578 void 
00579 ProxyMgr::buildProxySpanningTree2()
00580 {
00581   PatchIDList pids;
00582   PatchMap::Object()->homePatchIDList(pids);
00583   for (int i=0; i<pids.size(); i++) {
00584     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00585     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00586     home->sendProxies();
00587   }
00588 }
00589 
00590 void 
00591 ProxyMgr::sendProxies(int pid, int *list, int n)
00592 {
00593   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00594   cp[0].recvProxies(pid, list, n);
00595 }
00596 
00597 //The value defines the max number of intermediate proxies (acting
00598 //as the node to relay proxy msgs to children) allowed to reside 
00599 //on a physical node for proxy spanning tree
00600 #define MAX_INTERNODE 1
00601 
00602 // only on PE 0
00603 void 
00604 ProxyMgr::recvProxies(int pid, int *list, int n)
00605 {
00606   int nPatches = PatchMap::Object()->numPatches();
00607   if (ptree.proxylist == NULL)
00608     ptree.proxylist = new NodeIDList[nPatches];
00609   ptree.proxylist[pid].resize(n);
00610   for (int i=0; i<n; i++)
00611     ptree.proxylist[pid][i] = list[i];
00612   ptree.proxyMsgCount ++;
00613   if (ptree.proxyMsgCount == nPatches) {
00614     ptree.proxyMsgCount = 0;
00615     // building and sending of trees is done in two steps now
00616     // so that the building step can be shifted to the load balancer
00617 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00618     buildNodeAwareSpanningTree0();
00619 #else
00620     buildSpanningTree0();    
00621 #endif
00622     sendSpanningTrees();
00623   }
00624 }
00625 
00626 //
00627 // XXX static and global variables are unsafe for shared memory builds.
00628 // The global and static vars should be eliminated.  
00629 // Unfortunately, the routines that use these below are actually 
00630 // in use in NAMD.
00631 //
00632 extern double *cpuloads;
00633 static int *procidx = NULL;
00634 static double averageLoad = 0.0;
00635 
00636 static int compLoad(const void *a, const void *b)
00637 {
00638   int i1 = *(int *)a;
00639   int i2 = *(int *)b;
00640   double d1 = cpuloads[i1];
00641   double d2 = cpuloads[i2];
00642   if (d1 < d2) 
00643     return 1;
00644   else if (d1 == d2) 
00645     return 0;
00646   else 
00647     return -1;
00648   // sort from high to low
00649 }
00650 
00651 static void processCpuLoad()
00652 {
00653   int i;
00654   if (!procidx) {
00655     procidx = new int[CkNumPes()];
00656   }
00657   for (i=0; i<CkNumPes(); i++) procidx[i] = i;
00658   qsort(procidx, CkNumPes(), sizeof(int), compLoad);
00659 
00660   double averageLoad = 0.0;
00661   for (i=0; i<CkNumPes(); i++) averageLoad += cpuloads[i];
00662   averageLoad /= CkNumPes();
00663 //  iout << "buildSpanningTree1: no intermediate node on " << procidx[0] << " " << procidx[1] << endi;
00664 
00665 }
00666 
00667 static int noInterNode(int p)
00668 {
00669   int exclude = 0;
00670   if(CkNumPes()<1025)
00671     exclude = 5;
00672   else if(CkNumPes()<4097)
00673     exclude = 10;
00674   else if(CkNumPes()<8193)
00675     exclude = 40;
00676   else if(CkNumPes()<16385)
00677     exclude = 40;
00678   else
00679     exclude = 80;
00680   for (int i=0; i<exclude; i++) if (procidx[i] == p) return 1;
00681 //  if (cpuloads[p] > averageLoad) return 1;
00682   return 0;
00683 }
00684 
00685 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00686 //only on PE 0
00687 void ProxyMgr::buildNodeAwareSpanningTree0(){
00688     int numPatches = PatchMap::Object()->numPatches();
00689     if (ptree.naTrees == NULL) ptree.naTrees = new proxyTreeNodeList[numPatches];
00690     //each element indiates the number of proxies residing on this node    
00691     int *proxyNodeMap = new int[CkNumNodes()];    
00692     for (int pid=0; pid<numPatches; pid++)     
00693         buildSinglePatchNodeAwareSpanningTree(pid, ptree.proxylist[pid], ptree.naTrees[pid], proxyNodeMap);
00694        
00695 
00696     //Debug
00697     //printf("#######################Naive ST#######################\n");
00698     //printProxySpanningTree();
00699 
00700     //Now the naive spanning tree has been constructed and stored in oneNATree;
00701     //Afterwards, some optimizations on this naive spanning tree could be done.
00702     //except the first element as the tree root always contains the processor
00703     //that has home patch
00704 
00705     //1st Optimization: reduce intermediate nodes as much as possible. In details,
00706     //the optimal case is that on a single physical smp node, there should be no
00707     //two proxies who act as the intermediate nodes to pass information to childrens
00708     //in the spanning tree. E.g, for patch A's proxy spanning tree, it has a node X as
00709     //its intermediate node. However, for patch B's, it also has a node X as its intermediate
00710     //node. We should avoid this situation as node X becomes the bottleneck as it has twice
00711     //amount of work to process now.
00712     //Step1: foward to the first patch that has proxies
00713     //Now proxyNodeMap records the info that how many intermediate nodes on a node
00714     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00715     int pid=0;
00716     for(;pid<numPatches; pid++) {
00717         if(ptree.proxylist[pid].size()>0) break;
00718     }
00719     if(pid==numPatches) {
00720         delete [] proxyNodeMap;
00721         return;
00722     }
00723     proxyTreeNodeList onePatchT = ptree.naTrees[pid];
00724     //If a node is an intermediate node, then its idx should satisfy
00725     //idx*proxySpanDim + 1 < onePatchT.size()
00726     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00727     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00728         int nid = onePatchT.item(i).nodeID;
00729         proxyNodeMap[nid]++;
00730     }
00731     //Step2: iterate over each patch's proxy spanning tree to adjust
00732     //the tree node positions. The bad thing here is that it may involve
00733     //many memory allocations and deallocation for small-size (~100bytes)
00734     //chunks.
00735     pid++; //advance to the next patch
00736     for(; pid<numPatches; pid++) {
00737         if(ptree.proxylist[pid].size()==0) continue;
00738         onePatchT = ptree.naTrees[pid];
00739         lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00740         for(int i=1; i<=lastInterNodeIdx; i++) {
00741             int nid = onePatchT.item(i).nodeID;
00742             if(proxyNodeMap[nid]<MAX_INTERNODE) {
00743                 proxyNodeMap[nid]++;
00744                 continue;
00745             }
00746             //the position is occupied, so search the children
00747             //nodes to see whether there's one to swap this node
00748             //if not found, find the first position that has smallest
00749             //amount of nodes.
00750             int leastIdx = -1;
00751             int leastAmount = ~(1<<31);
00752             //iterate children nodes
00753             int swapPos;
00754             for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.size(); swapPos++) {
00755                 int chiNId = onePatchT.item(swapPos).nodeID;
00756                 if(proxyNodeMap[chiNId]<MAX_INTERNODE) {
00757                     break;
00758                 }
00759                 if(proxyNodeMap[chiNId]<leastAmount) {
00760                     leastAmount = proxyNodeMap[chiNId];
00761                     leastIdx = swapPos;
00762                 }
00763             }
00764             CmiAssert(leastIdx!=-1); //because the above loop at least executes once
00765             if(swapPos==onePatchT.size()) {
00766                 //indicate we cannot find a physical node which
00767                 //still allows the intermediate proxy.
00768                 swapPos = leastIdx;
00769             }
00770             //swap the current proxy tree node "i" with node "swapPos"
00771             proxyTreeNode *curNode = &onePatchT.item(i);
00772             proxyTreeNode *swapNode = &onePatchT.item(swapPos);
00773             proxyNodeMap[swapNode->nodeID]++; //update the proxyNodeMap record
00774             int tmp = curNode->nodeID;
00775             curNode->nodeID = swapNode->nodeID;
00776             swapNode->nodeID = tmp;
00777             tmp = curNode->numPes;
00778             int tmpPes[tmp];
00779             memcpy(tmpPes, curNode->peIDs, sizeof(int)*tmp);
00780             delete [] curNode->peIDs;
00781             curNode->numPes = swapNode->numPes;
00782             curNode->peIDs = new int[swapNode->numPes];
00783             memcpy(curNode->peIDs, swapNode->peIDs, sizeof(int)*swapNode->numPes);
00784             swapNode->numPes = tmp;
00785             delete [] swapNode->peIDs;
00786             swapNode->peIDs = new int[tmp];
00787             memcpy(swapNode->peIDs, tmpPes, sizeof(int)*tmp);                      
00788         }
00789     }
00790     delete [] proxyNodeMap;    
00791 
00792     //Debug
00793     //printf("#######################After 1st optimization#######################\n");
00794     //printProxySpanningTree();
00795 
00796     //2nd optimization: similar to the 1st optimization but now thinking in
00797     //the core level. If we cannot avoid place two intermediate proxy
00798     //on the same node, we'd better to place them in different cores inside
00799     //the node
00800     if(CmiMyNodeSize()==1) {
00801         //No need to perform the second optimization as every node has only 1 core
00802         return;
00803     }
00804     int *proxyCoreMap = new int[CkNumPes()];
00805     memset(proxyCoreMap, 0, sizeof(int)*CkNumPes());
00806     //Step1: forward to the first patch that has proxies
00807     pid=0;
00808     for(;pid<numPatches; pid++) {
00809         if(ptree.proxylist[pid].size()>0) break;
00810     }
00811     if(pid==numPatches) {
00812         delete [] proxyCoreMap;
00813         return;
00814     }
00815     onePatchT = ptree.naTrees[pid];
00816     //If a node is an intermediate node, then its idx should satisfy
00817     //idx*proxySpanDim + 1 < onePatchT.size()
00818     lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00819     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00820         int rootProcID = onePatchT.item(i).peIDs[0];
00821         proxyCoreMap[rootProcID]++;
00822     }
00823     //Step2: iterate over each patch's proxy spanning tree to adjust
00824     //the root's position of intermediate proxies.
00825     pid++; //advance to the next patch
00826     for(; pid<numPatches; pid++) {
00827         if(ptree.proxylist[pid].size()==0) continue;
00828         onePatchT = ptree.naTrees[pid];
00829         lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00830         for(int i=1; i<=lastInterNodeIdx; i++) {
00831             proxyTreeNode *curNode = &onePatchT.item(i);
00832             int rootProcID = curNode->peIDs[0];
00833             if(curNode->numPes==1 || proxyCoreMap[rootProcID]<MAX_INTERNODE){
00834                 //if this node contains only 1 core, then we have to leave it as it is
00835                 //because there are no other cores in the same node that could be used to
00836                 //adjust
00837                 proxyCoreMap[rootProcID]++;
00838                 continue;
00839             }
00840             
00841             //foound more than MAX_INTERNODE intermediate proxies on the same core,
00842             //adjust the root id of the core of this proxy tree node
00843             int leastIdx = -1;
00844             int leastAmount = ~(1<<31);
00845             //iterate children nodes
00846             int swapPos;
00847             
00848             for(swapPos=1; swapPos<curNode->numPes; swapPos++) {
00849                 int otherCoreID = curNode->peIDs[swapPos];
00850                 if(proxyCoreMap[otherCoreID]<MAX_INTERNODE) {
00851                     break;
00852                 }
00853                 if(proxyCoreMap[otherCoreID]<leastAmount) {
00854                     leastAmount = proxyCoreMap[otherCoreID];
00855                     leastIdx = swapPos;
00856                 }
00857             }
00858             CmiAssert(leastIdx!=-1); //because the above loop body must execute at least once
00859             if(swapPos==curNode->numPes) {
00860                 //indicate we cannot find a physical node which
00861                 //still allows the intermediate proxy.
00862                 swapPos = leastIdx;
00863             }
00864             int tmp = curNode->peIDs[swapPos];
00865             curNode->peIDs[swapPos] = curNode->peIDs[0];
00866             curNode->peIDs[0] = tmp;
00867             proxyCoreMap[tmp]++;
00868         }      
00869     }
00870 
00871     delete proxyCoreMap;
00872 
00873     //Debug
00874     //printf("#######################After 2nd optimization#######################\n");
00875     //printProxySpanningTree();
00876 }
00877 
00878 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(PatchID pid, NodeIDList &proxyList, 
00879                                                      proxyTreeNodeList &ptnTree, int *proxyNodeMap){       
00880     int numProxies = proxyList.size();
00881     if (numProxies == 0) {
00882         CkPrintf ("This is sheer evil in building node-aware spanning tree!\n\n");            
00883         return;
00884     }        
00885     
00886     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00887     int proxyNodeList[numProxies+1]; //including the root node             
00888     
00889     //the processor id of home patch
00890     int hpProcID = PatchMap::Object()->node(pid);
00891     int hpNodeID = CkNodeOf(hpProcID);
00892     proxyNodeMap[hpNodeID]++;
00893     proxyNodeList[0] = hpNodeID;
00894     int numNodesWithProxies = 1;
00895     
00896     for(int i=0; i<numProxies; i++) {
00897         int procId = proxyList[i];
00898         int nodeId = CkNodeOf(procId);
00899         proxyNodeMap[nodeId]++;
00900         if(proxyNodeMap[nodeId]==1) {
00901             proxyNodeList[numNodesWithProxies] = nodeId;
00902             numNodesWithProxies++;
00903         }
00904     }
00905     proxyTreeNodeList &oneNATree = ptnTree;   // spanning tree
00906     oneNATree.resize(numNodesWithProxies);
00907     //initialize oneNATree
00908     for(int i=0; i<numNodesWithProxies; i++) {
00909         proxyTreeNode *oneNode = &oneNATree.item(i);
00910         delete oneNode->peIDs;
00911         oneNode->nodeID = proxyNodeList[i];
00912         oneNode->peIDs = new int[proxyNodeMap[oneNode->nodeID]];                        
00913         oneNode->numPes = 0; //initially set to zero as used for incrementing later
00914     }
00915     
00916     //set up the tree root which contains the home patch processor
00917     proxyTreeNode *rootnode = &oneNATree.item(0);
00918     rootnode->peIDs[0] = hpProcID;
00919     rootnode->numPes++;
00920     
00921     for(int i=0; i<numProxies; i++) {
00922         int procId = proxyList[i];
00923         int nodeId = CkNodeOf(procId);
00924         int idxInTree = -1;
00925         for(int j=0; j<numNodesWithProxies; j++) {
00926             if(proxyNodeList[j] == nodeId) {
00927                 idxInTree = j;
00928                 break;
00929             }
00930         }
00931         CmiAssert(idxInTree!=-1);
00932         proxyTreeNode *oneNode = &oneNATree.item(idxInTree);
00933         oneNode->peIDs[oneNode->numPes] = procId;
00934         oneNode->numPes++;
00935     }
00936 }
00937 #else //branch of NODEAWARE_PROXY_SPANNINGTREE
00938 // only on PE 0
00939 void 
00940 ProxyMgr::buildSpanningTree0()
00941 {
00942   int i;
00943 
00944   processCpuLoad();
00945 
00946   int *numPatchesOnNode = new int[CkNumPes()];
00947   int numNodesWithPatches = 0;
00948   for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
00949   int numPatches = PatchMap::Object()->numPatches();
00950   for (i=0; i<numPatches; i++) {
00951     int node = PatchMap::Object()->node(i);
00952     numPatchesOnNode[node]++;
00953     if (numPatchesOnNode[node] == 1)
00954       numNodesWithPatches ++;
00955   }
00956   int patchNodesLast =
00957     ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
00958   int *ntrees = new int[CkNumPes()];
00959   for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
00960   if (ptree.trees == NULL) ptree.trees = new NodeIDList[numPatches];
00961   for (int pid=0; pid<numPatches; pid++) 
00962   {
00963     int numProxies = ptree.proxylist[pid].size();
00964     if (numProxies == 0) {
00965       CkPrintf ("This is sheer evil!\n\n");
00966       //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
00967       return;
00968     }
00969     NodeIDList &tree = ptree.trees[pid];   // spanning tree
00970     NodeIDList oldtree = tree;
00971     tree.resize(numProxies+1);
00972     tree.setall(-1);
00973     tree[0] = PatchMap::Object()->node(pid);
00974     int s=1, e=numProxies;
00975     int nNonPatch = 0;
00976     int treesize = 1;
00977     int pp;
00978 
00979     // keep tree persistent for non-intermediate nodes
00980     for (pp=0; pp<numProxies; pp++) {
00981       int p = ptree.proxylist[pid][pp];
00982       int oldindex = oldtree.find(p);
00983       if (oldindex != -1 && oldindex <= numProxies) {
00984         int isIntermediate = (oldindex*proxySpanDim+1 <= numProxies);
00985         if (!isIntermediate) {
00986           tree[oldindex] = p;
00987         }
00988         else if (ntrees[p] < MAX_INTERNODE) {
00989           tree[oldindex] = p;
00990           ntrees[p] ++;
00991         }
00992       }
00993     }
00994 
00995     for (pp=0; pp<numProxies; pp++) {
00996       int p = ptree.proxylist[pid][pp];              // processor number
00997       if (tree.find(p) != -1) continue;        // already used
00998       treesize++;
00999       if (patchNodesLast && numPatchesOnNode[p] ) {
01000         while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01001         tree[e] = p;
01002         int isIntermediate = (e*proxySpanDim+1 <= numProxies);
01003         if (isIntermediate) ntrees[p]++;
01004       }
01005       else {
01006         while (tree[s] != -1) { s++; if (s==numProxies+1) s = 1; }
01007         int isIntermediate = (s*proxySpanDim+1 <= numProxies);
01008         if (isIntermediate && (ntrees[p] >= MAX_INTERNODE || noInterNode(p))) {   // TOO MANY INTERMEDIATE TREES
01009         //if (isIntermediate && ntrees[p] >= MAX_INTERNODE)    // TOO MANY INTERMEDIATE TREES
01010           while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01011           tree[e] = p;
01012           isIntermediate = (e*proxySpanDim+1 <= numProxies);
01013           if (isIntermediate) ntrees[p]++;
01014         }
01015         else {
01016           tree[s] = p;
01017           nNonPatch++;
01018           if (isIntermediate) ntrees[p]++;
01019         }
01020       }
01021     }
01022     // send homepatch's proxy tree
01023     if(ptree.sizes)
01024       ptree.sizes[pid] = treesize;
01025     //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, &tree[0], treesize);
01026   }
01027   /*for (i=0; i<CkNumPes(); i++) {
01028     if (ntrees[i] > MAX_INTERNODE) iout << "Processor " << i << "has (guess) " << ntrees[i] << " intermediate nodes." << endi;
01029   }*/
01030   delete [] ntrees;
01031   delete [] numPatchesOnNode;
01032 }
01033 #endif
01034 
01035 void ProxyMgr::sendSpanningTrees()
01036 {
01037   int numPatches = PatchMap::Object()->numPatches();
01038   for (int pid=0; pid<numPatches; pid++) {
01039     int numProxies = ptree.proxylist[pid].size();
01040 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01041     if (numProxies == 0)
01042       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, NULL, 0);
01043     else {
01044       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, ptree.naTrees[pid].begin(), ptree.naTrees[pid].size());
01045     }
01046 #else
01047     if (numProxies == 0)
01048       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01049     else {
01050       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, ptree.trees[pid].begin(), ptree.trees[pid].size());
01051     }
01052 #endif
01053   }
01054 }
01055 
01056 void ProxyMgr::sendSpanningTreeToHomePatch(int pid, int *tree, int n)
01057 {
01058   CProxy_ProxyMgr cp(thisgroup);
01059   cp[PatchMap::Object()->node(pid)].recvSpanningTreeOnHomePatch(pid, tree, n);
01060 }
01061 
01062 void ProxyMgr::recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
01063 {
01064   HomePatch *p = PatchMap::Object()->homePatch(pid);
01065   p->recvSpanningTree(tree, n);
01066 }
01067 
01068 void ProxyMgr::sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
01069 {
01070   CProxy_ProxyMgr cp(thisgroup);
01071   ProxyNodeAwareSpanningTreeMsg *msg = ProxyNodeAwareSpanningTreeMsg::getANewMsg(pid, -1, tree, n);
01072   cp[PatchMap::Object()->node(pid)].recvNodeAwareSpanningTreeOnHomePatch(msg);
01073 }
01074 
01075 void ProxyMgr::recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
01076 {
01077   HomePatch *p = PatchMap::Object()->homePatch(msg->patch);
01078   p->recvNodeAwareSpanningTree(msg);
01079   delete msg;
01080 }
01081 
01082 void 
01083 ProxyMgr::sendSpanningTree(ProxySpanningTreeMsg *msg) {
01084   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01085   cp[msg->tree[0]].recvSpanningTree(msg);
01086 }
01087 
01088 void ProxyMgr::sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01089   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01090   int pe = msg->allPes[0]; //the root procID
01091 
01092 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01093   DebugFileTrace *dft = DebugFileTrace::Object();
01094   dft->openTrace();
01095   dft->writeTrace("PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->patch);
01096   dft->closeTrace();
01097 #endif
01098 
01099   cp[pe].recvNodeAwareSpanningTree(msg);
01100 }
01101 
01102 void 
01103 ProxyMgr::recvSpanningTree(ProxySpanningTreeMsg *msg) {
01104   int size = msg->tree.size();
01105   int child[proxySpanDim];
01106   int nChild = 0;
01107   int i;
01108   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01109   for (i=0; i<proxySpanDim; i++) {
01110     if (size > i+1) { child[i] = msg->tree[i+1]; nChild++; }
01111   }
01112   if (!PatchMap::Object()->homePatch(msg->patch)) {
01113     proxy->setSpanningTree(msg->node, child, nChild);
01114   }
01115 
01116   // build subtree and pass down
01117   if (nChild == 0) return;
01118 
01119   nodecount ++;
01120   //if (nodecount > MAX_INTERNODE) 
01121   //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01122 
01123 //CkPrintf("[%d] %d:(%d) %d %d %d %d %d\n", CkMyPe(), msg->patch, size, msg->tree[0], msg->tree[1], msg->tree[2], msg->tree[3], msg->tree[4]);
01124   NodeIDList *tree = new NodeIDList[proxySpanDim];
01125   int level = 1, index=1;
01126   int done = 0;
01127   while (!done) {
01128     for (int n=0; n<nChild; n++) {
01129       if (done) break;
01130       for (int j=0; j<level; j++) {
01131        if (index >= size) { done = 1; break; }
01132        tree[n].add(msg->tree[index]);
01133        index++;
01134       }
01135     }
01136     level *=proxySpanDim;
01137   }
01138 
01139   ProxyMgr *proxyMgr = ProxyMgr::Object();
01140   for (i=0; i<proxySpanDim; i++) {
01141     if (tree[i].size()) {
01142       ProxySpanningTreeMsg *cmsg = new ProxySpanningTreeMsg;
01143       cmsg->patch = msg->patch;
01144       cmsg->node = CkMyPe();
01145       cmsg->tree = tree[i];
01146       proxyMgr->sendSpanningTree(cmsg);
01147     }
01148   }
01149 
01150   delete [] tree;
01151   delete msg;
01152 }
01153 
01154 //NOTE: have not considered how to deal with spanning tree inside a single physical node
01155 //--Chao Mei
01156 void ProxyMgr::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01157 
01158     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01159     DebugFileTrace *dft = DebugFileTrace::Object();
01160     dft->openTrace();
01161     dft->writeTrace("PMgr::recvST0 for patch[%d] with #nodes=%d\n", msg->patch, msg->numNodesWithProxies);
01162     dft->closeTrace();
01163     msg->printOut("PMgr::recvST");
01164     #endif
01165 
01166     //This function is divided into three parts. The tree root is msg->allPes[0]
01167     //1. set up its own immediate childrens
01168     int treesize = msg->numNodesWithProxies;    
01169     int iNChild = 0; //number of internal children
01170     int eNChild = 0; //number of external children
01171     if(treesize>0){
01172         iNChild = (msg->numPesOfNode[0]-1); //exclude the root itself
01173         eNChild = (proxySpanDim>(treesize-1))?(treesize-1):proxySpanDim;
01174     }
01175     int numChild = iNChild + eNChild;
01176     if(numChild==0){
01177         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01178         proxy->setSpanningTree(msg->procID, NULL, 0);
01179         #ifdef USE_NODEPATCHMGR
01180         //set up proxyInfo inside NodeProxyMgr
01181         if(!PatchMap::Object()->homePatch(msg->patch)){
01182             //only when this processor contains a proxy patch of "msg->patch"
01183             //is the patch registeration in NodeProxyMgr needed,
01184             //and itself needs to be registered
01185             CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01186             NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01187             npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);            
01188         }
01189         //set children in terms of node ids
01190         proxy->setSTNodeChildren(0, NULL);       
01191         #endif
01192         return;
01193     }
01194 
01195     nodecount++;
01196     //if (nodecount > MAX_INTERNODE) 
01197     //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01198 
01199     if(!PatchMap::Object()->homePatch(msg->patch)){
01200         //the home patch of this spanning tree has been already set up for its childrens
01201         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01202         int *children = (int*)alloca(numChild*sizeof(int));
01203         //add external children
01204         int *p = msg->allPes + msg->numPesOfNode[0];
01205         for(int i=0; i<eNChild; i++) {
01206             children[i] = *p;
01207             p += msg->numPesOfNode[i+1];
01208         }
01209         //add internal children
01210         for(int i=eNChild, j=1; i<numChild; i++, j++) {
01211             children[i] = msg->allPes[j]; 
01212         }
01213         proxy->setSpanningTree(msg->procID, children, numChild);
01214 
01215         #ifdef USE_NODEPATCHMGR
01216         //set up proxyInfo inside NodeProxyMgr
01217         CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01218         NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01219         npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);        
01220 
01221         //set children in terms of node ids
01222         int nodeChildren[eNChild+1];
01223         p = msg->allPes + msg->numPesOfNode[0];
01224         for(int i=0; i<eNChild; i++) {
01225             nodeChildren[i] = CkNodeOf(*p);
01226             p += msg->numPesOfNode[i+1];
01227         }
01228         //the last entry always stores the node id that contains this proxy
01229         nodeChildren[eNChild] = CkNodeOf(msg->allPes[0]);
01230         proxy->setSTNodeChildren(eNChild+1, nodeChildren);
01231         #endif
01232     }
01233 
01234     //2. send msgs for the tree to external children proxies
01235     if(eNChild > 0) {
01236         ResizeArray<int> *exTreeChildSize = new ResizeArray<int>[eNChild];
01237         ResizeArray<int *> *exTreeChildPtr = new ResizeArray<int *>[eNChild];    
01238     
01239         int nodesToCnt = 1; //the number of children each root (current root's 
01240                             //immedidate external nodes) has in each level
01241         int pos = 1; //track the iteration over msg->numPesOfNode and skip the current root
01242         int *pePtr = msg->allPes + msg->numPesOfNode[0];
01243         int done = 0;
01244         while(!done) {
01245             for(int childID=0; childID<eNChild;childID++) {
01246                 //iterate nodes on each level
01247                 for(int i=0; i<nodesToCnt; i++) {
01248                     int cursize = msg->numPesOfNode[pos];
01249                     exTreeChildSize[childID].add(cursize);
01250                     exTreeChildPtr[childID].add(pePtr);
01251                     pos++;
01252                     pePtr += cursize; 
01253                     if(pos==msg->numNodesWithProxies) {
01254                         done = 1;
01255                         break;
01256                     }
01257                 }
01258                 if(done) break;                         
01259             }
01260             nodesToCnt *= proxySpanDim;
01261         }
01262           
01263         for(int i=0; i<eNChild; i++) {                
01264             ResizeArray<int> *allSizes = &exTreeChildSize[i];
01265             ResizeArray<int *> *allPtrs = &exTreeChildPtr[i];
01266             int totalNodes = allSizes->size();
01267             int totalPes = 0;
01268             for(int j=0; j<totalNodes; j++) totalPes += allSizes->item(j);
01269             ProxyNodeAwareSpanningTreeMsg *cmsg = new(totalNodes, totalPes, 0) ProxyNodeAwareSpanningTreeMsg;
01270             cmsg->patch = msg->patch;
01271             cmsg->procID = CkMyPe();
01272             cmsg->numNodesWithProxies = totalNodes;
01273             int *pAllPes = cmsg->allPes;
01274             for(int j=0; j<totalNodes; j++) {
01275                 int numPes = allSizes->item(j);
01276                 cmsg->numPesOfNode[j] = numPes;
01277                 memcpy(pAllPes, allPtrs->item(j), sizeof(int)*numPes);
01278                 pAllPes += numPes;
01279             }
01280             #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01281             cmsg->printOut("sndExtChi:");
01282             #endif
01283             ProxyMgr::Object()->sendNodeAwareSpanningTree(cmsg);
01284         }    
01285         
01286         delete [] exTreeChildSize;
01287         delete [] exTreeChildPtr;  
01288     }
01289 
01290     //3. send msgs for the tree to the children proxies within the same (physical) node
01291     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01292     for(int i=0; i<iNChild; i++) {
01293         int pe = msg->allPes[i+1]; //excluding the root procID at allPes[0]
01294         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01295         DebugFileTrace *dft = DebugFileTrace::Object();
01296         dft->openTrace();
01297         dft->writeTrace("Preparing send a msg to internal children for patch[%d] from proc %d to proc %d\n", 
01298                      msg->patch, CkMyPe(), pe);
01299         dft->closeTrace();
01300         #endif
01301         cp[pe].recvNodeAwareSTParent(msg->patch, CkMyPe());
01302     }    
01303 
01304     delete msg;
01305 }
01306 
01307 void ProxyMgr::recvNodeAwareSTParent(int patch, int parent){
01308 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01309     DebugFileTrace *dft = DebugFileTrace::Object();
01310     dft->openTrace();
01311     dft->writeTrace("PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
01312     dft->closeTrace();
01313 #endif
01314     ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(patch);
01315     CmiAssert(proxy!=NULL);
01316     proxy->setSpanningTree(parent, NULL, 0);
01317 }
01318 
01319 void ProxyMgr::sendResults(ProxyResultVarsizeMsg *msg) {
01320     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01321     NodeID node = PatchMap::Object()->node(msg->patch);
01322     cp[node].recvResults(msg);
01323 }
01324 
01325 void ProxyMgr::recvResults(ProxyResultVarsizeMsg *msg) {
01326     HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01327     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01328 }
01329 
01330 void ProxyMgr::sendResults(ProxyResultMsg *msg) {
01331   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01332   NodeID node = PatchMap::Object()->node(msg->patch);
01333   cp[node].recvResults(msg);
01334 }
01335 
01336 void ProxyMgr::recvResults(ProxyResultMsg *msg) {
01337   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01338   home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01339 }
01340 
01341 void
01342 ProxyMgr::sendResults(ProxyCombinedResultMsg *msg) {
01343   ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01344   ProxyCombinedResultMsg *cMsg = patch->depositCombinedResultMsg(msg);
01345   if (cMsg) {    
01346     int destPe = patch->getSpanningTreeParent();
01347     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01348     if(destPe != CkMyPe()) {
01349 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01350       /*CkPrintf("ready to call node::recvImmRes on pe[%d] to dest[%d]\n", CkMyPe(), destPe);
01351       fflush(stdout);*/
01352 
01353       cMsg->destPe = destPe;
01354       CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01355       cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
01356 #else    
01357       cp[destPe].recvImmediateResults(cMsg);
01358 #endif
01359     }
01360     else{
01361       cp[destPe].recvResults(cMsg);
01362     }
01363   }
01364 }
01365 
01366 void
01367 ProxyMgr::recvResults(ProxyCombinedResultMsg *msg) {
01368 //Chao Mei: hack for QD in case of SMP with immediate msg
01369 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01370     if(proxyRecvSpanning && msg->isFromImmMsgCall){
01371 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01372 //    fflush(stdout);
01373         //To compensate for the counter loss for message creation
01374         //inside the process of immediate message on comm thread
01375         CkpvAccess(_qd)->create();
01376     }
01377 #endif
01378 
01379   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01380   if (home) {
01381     //printf("Home got a message\n");
01382     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01383   }
01384   else {
01385     NAMD_bug("ProxyMgr should receive result message on home processor");
01386   }
01387 }
01388 
01389 void ProxyMgr::recvImmediateResults(ProxyCombinedResultMsg *msg) {
01390   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01391   if (home) {
01392     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01393     cp[CkMyPe()].recvResults(msg);
01394   }
01395   else {
01396     ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01397     ProxyCombinedResultMsg *cMsg = patch->depositCombinedResultMsg(msg);
01398     if (cMsg) {
01399       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01400       cp[patch->getSpanningTreeParent()].recvImmediateResults(cMsg);
01401     }
01402   }
01403 }
01404 
01405 void NodeProxyMgr::recvImmediateResults(ProxyCombinedResultMsg *msg){
01406 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01407     //CkPrintf("recvImmRes called on comm thread%d pe[%d]\n", CkMyRank()==CmiMyNodeSize(), CkMyPe());
01408     //fflush(stdout);
01409     
01410     int destRank = CkRankOf(msg->destPe);
01411     PatchMap *pmap = localPatchMaps[destRank];
01412     HomePatch *home = pmap->homePatch(msg->patch);
01413     if (home) {
01414 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01415         msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01416 #endif
01417         CProxy_ProxyMgr cp(localProxyMgr);        
01418         cp[msg->destPe].recvResults(msg);        
01419 /*
01420         char *srcfrom = "Isfrom";
01421         if(CkMyRank()!=CmiMyNodeSize()) srcfrom="Notfrom";
01422       CkPrintf("%s comm thread from pe[%d]\n", srcfrom, CkMyPe());
01423       fflush(stdout);
01424 */  
01425     }
01426     else {
01427         ProxyPatch *patch = (ProxyPatch *)pmap->patch(msg->patch);
01428         ProxyCombinedResultMsg *cMsg = patch->depositCombinedResultMsg(msg);
01429         if (cMsg) {
01430             CProxy_NodeProxyMgr cnp(thisgroup);
01431             cMsg->destPe = patch->getSpanningTreeParent();
01432             cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);            
01433         }
01434     }
01435 #endif    
01436 }
01437 
01438 void
01439 ProxyMgr::sendProxyData(ProxyDataMsg *msg, int pcnt, int *pids) {
01440 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01441     if(proxySendSpanning == 1) {
01442         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01443         for(int i=0; i<pcnt-1; i++) {
01444             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01445             cnp[pids[i]].recvImmediateProxyData(copymsg);
01446         }
01447         cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
01448         return;
01449     }
01450 #endif
01451   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01452   cp.recvImmediateProxyData(msg,pcnt,pids);
01453 }
01454 
01455 void 
01456 ProxyMgr::recvProxyData(ProxyDataMsg *msg) {
01457 //Chao Mei: hack for QD in case of SMP with immediate msg
01458 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01459     if(proxySendSpanning && msg->isFromImmMsgCall){
01460 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01461 //    fflush(stdout);
01462         //To compensate for the counter loss for message creation
01463         //inside the process of immediate message on comm thread
01464         CkpvAccess(_qd)->create();
01465     }
01466 #endif
01467   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01468   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms()
01469 }
01470 
01471 void
01472 ProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {
01473   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);  
01474   if (proxySendSpanning == 1) {
01475     // copy the message and send to spanning children
01476     int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01477     int npid = proxy->getSpanningTreeChild(pids);
01478     if (npid) {        
01479         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);     
01480 
01481         ProxyMgr::Object()->sendProxyData(newmsg,npid,pids);
01482       #if 0
01483       //ChaoMei: buggy code??? the spanning tree doesn't always have 2 levels
01484       //At the second level of the tree immediate messages are not needed
01485       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01486       cp.recvProxyData(newmsg,npid,pids);
01487       #endif
01488     }
01489   }
01490   /* send to self via EP method to preserve priority */
01491   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01492   cp[CkMyPe()].recvProxyData(msg);
01493 }
01494 
01495 void NodeProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {    
01496 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01497     CProxy_ProxyMgr cp(localProxyMgr);
01498     proxyTreeNode *ptn = proxyInfo[msg->patch];
01499     CmiAssert(ptn->numPes!=0);
01500 
01501     //re-send msg to this nodes's children nodes.
01502     //only the first pe of a node of node-aware ST should contain children nodes
01503     int rank = CkRankOf(ptn->peIDs[0]);
01504     PatchMap *pmap = localPatchMaps[rank];
01505     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01506 
01507     int npid = ppatch->getSTNNodeChild();
01508     int pids[npid];
01509     if(npid>0) {        
01510         ppatch->getSTNodeChild(pids);
01511         //only needs to send to other nodes, so check the last entry of pids.
01512         //This is because the data for proxies on the same node have been sent
01513         //by NodeProxyMgr.
01514         if(pids[npid-1]==CkMyNode()) npid--;
01515     }    
01516     CProxy_NodeProxyMgr cnp(thisgroup);
01517     for(int i=0; i<npid; i++) {
01518         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01519         cnp[pids[i]].recvImmediateProxyData(copymsg);
01520     }    
01521 
01522     //re-send msg to it's internal cores
01523 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01524     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01525 #endif
01526     cp.recvProxyData(msg, ptn->numPes, ptn->peIDs);
01527 #else
01528     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01529 #endif
01530 }
01531 
01532 void
01533 ProxyMgr::sendProxyAll(ProxyDataMsg *msg, int pcnt, int *pids) {
01534 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01535     if(proxySendSpanning == 1) {
01536         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01537         for(int i=0; i<pcnt-1; i++) {
01538             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01539             cnp[pids[i]].recvImmediateProxyAll(copymsg);
01540         }
01541         cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
01542         return;
01543     }
01544 #endif
01545   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01546   cp.recvImmediateProxyAll(msg,pcnt,pids);
01547 }
01548 
01549 void 
01550 ProxyMgr::recvProxyAll(ProxyDataMsg *msg) {
01551 //Chao Mei: hack for QD in case of SMP with immediate msg
01552 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01553     if(proxySendSpanning && msg->isFromImmMsgCall){
01554 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01555 //    fflush(stdout);
01556         //To compensate for the counter loss for message creation
01557         //inside the process of immediate message on comm thread
01558         CkpvAccess(_qd)->create();
01559     }
01560 #endif
01561 
01562   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01563   proxy->receiveAll(msg); // deleted in ProxyPatch::receiveAtoms()
01564 }
01565 
01566 void
01567 ProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {
01568   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01569   #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01570   DebugFileTrace *dft = DebugFileTrace::Object();
01571   dft->openTrace();
01572   dft->writeTrace("PMgr::recvImmPAll for patch[%d]\n", msg->patch);
01573   CmiAssert(proxy!=NULL);
01574   dft->writeTrace("PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->patch);
01575   dft->closeTrace();
01576   #endif
01577   if (proxySendSpanning == 1) {
01578     // copy the message and send to spanning children
01579     int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01580     int npid = proxy->getSpanningTreeChild(pids);
01581     if (npid) {
01582         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);      
01583       ProxyMgr::Object()->sendProxyAll(newmsg,npid,pids);
01584     }
01585   }
01586   /* send to self via EP method to preserve priority */
01587   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01588   cp[CkMyPe()].recvProxyAll(msg);
01589 }
01590 
01591 void NodeProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {    
01592 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01593     CProxy_ProxyMgr cp(localProxyMgr);
01594     proxyTreeNode *ptn = proxyInfo[msg->patch];
01595     CmiAssert(ptn->numPes!=0);
01596     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01597     //This could be executed on comm thd.
01598     printf("NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->patch, CkMyNode(), CkMyRank());
01599     for(int i=0; i<ptn->numPes; i++) {
01600         printf("%d, ", ptn->peIDs[i]);
01601     }
01602     printf("\n");
01603     fflush(stdout);
01604     #endif
01605 
01606     //re-send msg to this nodes's children nodes.
01607     //only the first pe of a node of node-aware ST should contain children nodes
01608     int rank = CkRankOf(ptn->peIDs[0]);
01609     PatchMap *pmap = localPatchMaps[rank];
01610     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01611 
01612     int npid = ppatch->getSTNNodeChild();
01613     int pids[npid];
01614     if(npid>0) {        
01615         ppatch->getSTNodeChild(pids);
01616         //only needs to send to other nodes, so check the last entry of pids.
01617         //This is because the data for proxies on the same node have been sent
01618         //by NodeProxyMgr.
01619         if(pids[npid-1]==CkMyNode()) npid--;
01620     }
01621     
01622     CProxy_NodeProxyMgr cnp(thisgroup);
01623     for(int i=0; i<npid; i++) {
01624         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01625         cnp[pids[i]].recvImmediateProxyAll(copymsg);
01626     }    
01627 
01628     //re-send msg to it's internal cores
01629 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01630     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01631 #endif
01632     cp.recvProxyAll(msg, ptn->numPes, ptn->peIDs);
01633 #else
01634     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01635 #endif
01636 }
01637 
01638 void ProxyMgr::printProxySpanningTree(){
01639 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01640     int numPatches = PatchMap::Object()->numPatches();
01641     for(int i=0; i<numPatches; i++) {
01642         proxyTreeNodeList oneList = ptree.naTrees[i];
01643         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01644         if(ptree.proxylist[i].size()==0) continue;
01645         printf("===%d=== pes/node: ", i);
01646         for(int j=0; j<oneList.size(); j++) {
01647             printf("%d ", oneList.item(j).numPes);
01648         }
01649         printf("\n");
01650         printf("===%d=== pe ids: ", i);
01651         for(int j=0; j<oneList.size(); j++) {
01652             for(int k=0; k<oneList.item(j).numPes; k++) {
01653                 printf("%d ", oneList.item(j).peIDs[k]);
01654             }            
01655         }
01656         printf("\n");
01657     }    
01658     fflush(stdout);  
01659 #else
01660     int numPatches = PatchMap::Object()->numPatches();
01661     for(int i=0; i<numPatches; i++) {
01662         NodeIDList oneList = ptree.trees[i];
01663         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01664         if(ptree.proxylist[i].size()==0) continue;        
01665         printf("===%d=== pe ids: ", i);
01666         for(int j=0; j<oneList.size(); j++) {            
01667             printf("%d ", oneList.item(j));            
01668         }
01669         printf("\n");
01670     }    
01671     fflush(stdout);  
01672 #endif
01673 }
01674 
01675 void NodeProxyMgr::registerPatch(int patchID, int numPes, int *pes){
01676     if(proxyInfo[patchID]) {
01677         delete proxyInfo[patchID];
01678     }
01679     if(numPes == 0) {
01680         proxyInfo[patchID] = NULL;
01681     }else{
01682         proxyInfo[patchID] = new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
01683     }
01684 }
01685 
01686 #include "ProxyMgr.def.h"
01687 

Generated on Mon Nov 23 04:59:23 2009 for NAMD by  doxygen 1.3.9.1