Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ProxyMgr.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "main.h"
00009 #include "BOCgroup.h"
00010 #include "ProxyMgr.decl.h"
00011 #include "ProxyMgr.h"
00012 #include "PatchMap.inl"
00013 #include "ProxyPatch.h"
00014 #include "ComputeMap.h"
00015 #include "HomePatch.h"
00016 #include <string.h>
00017 #include "ProcessorPrivate.h"
00018 #include "packmsg.h"
00019 #include "Priorities.h"
00020 
00021 //#define DEBUGM
00022 #define MIN_DEBUG_LEVEL 2
00023 #include "Debug.h"
00024 
00025 int proxySendSpanning   = 0;
00026 int proxyRecvSpanning   = 0;
00027 const int proxySpanDim  = 9;
00028 const int inNodeProxySpanDim = 16;
00029 
00030 PACK_MSG(ProxyAtomsMsg,
00031   PACK(patch);
00032   PACK_RESIZE(atomIDList);
00033 )
00034 
00035 PACK_MSG(ProxySpanningTreeMsg,
00036   PACK(patch);
00037   PACK(node);
00038   PACK_RESIZE(tree);
00039 )
00040 
00041 void* ProxyResultMsg::pack(ProxyResultMsg *msg) {
00042 
00043   int msg_size = 0;
00044   msg_size += sizeof(msg->node);
00045   msg_size += sizeof(msg->patch);
00046 
00047   int j;
00048   for ( j = 0; j < Results::maxNumForces; ++j ) {
00049     int array_size = msg->forceList[j].size();
00050     msg_size += sizeof(array_size);
00051     msg_size += array_size * sizeof(char);    
00052     msg_size = ALIGN_8 (msg_size);
00053     Force* f = msg->forceList[j].begin();
00054     int nonzero_count = 0;
00055     for ( int i = 0; i < array_size; ++i ) {
00056       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00057     }
00058     msg_size += nonzero_count * sizeof(Vector);
00059   }
00060 
00061   void *msg_buf = CkAllocBuffer(msg,msg_size);
00062   char *msg_cur = (char *)msg_buf;
00063 
00064   CmiMemcpy((void*)msg_cur,(void*)(&(msg->node)),sizeof(msg->node));
00065   msg_cur += sizeof(msg->node);
00066   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00067   msg_cur += sizeof(msg->patch);
00068   for ( j = 0; j < Results::maxNumForces; ++j ) {
00069     int array_size = msg->forceList[j].size();
00070     *(int *) msg_cur = array_size;
00071     msg_cur += sizeof(int);
00072     char *nonzero = msg_cur;
00073     msg_cur += array_size * sizeof(char);
00074     msg_cur = (char *)ALIGN_8 (msg_cur);
00075     Vector *farr = (Vector *)msg_cur;
00076     Force* f = msg->forceList[j].begin();
00077 
00078     for ( int i = 0; i < array_size; ++i ) {
00079       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00080         nonzero[i] = 1;
00081         farr->x = f[i].x;
00082         farr->y = f[i].y;
00083         farr->z = f[i].z;
00084         farr ++;
00085       } else {
00086         nonzero[i] = 0;
00087       }
00088     }
00089     msg_cur = (char *) farr;      
00090   }
00091 
00092   delete msg;
00093   return msg_buf;
00094 }
00095 
00096 ProxyResultMsg* ProxyResultMsg::unpack(void *ptr) {
00097 
00098   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyResultMsg));
00099   ProxyResultMsg *msg = new (vmsg) ProxyResultMsg;
00100   char *msg_cur = (char*)ptr;
00101 
00102   CmiMemcpy((void*)(&(msg->node)),(void*)msg_cur,sizeof(msg->node));
00103   msg_cur += sizeof(msg->node);
00104   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00105   msg_cur += sizeof(msg->patch);
00106   int j;
00107   for ( j = 0; j < Results::maxNumForces; ++j ) {
00108     int array_size = *(int *) msg_cur;
00109     msg_cur += sizeof(array_size);
00110     msg->forceList[j].resize(array_size);
00111     char *nonzero = msg_cur;
00112     msg_cur += array_size * sizeof(char);    
00113     msg_cur = (char *)ALIGN_8 (msg_cur);
00114     Vector* farr = (Vector *) msg_cur;
00115     Force* f = msg->forceList[j].begin();
00116     for ( int i = 0; i < array_size; ++i ) {
00117       if ( nonzero[i] ) {
00118         f[i].x = farr->x;
00119         f[i].y = farr->y;
00120         f[i].z = farr->z;
00121         farr++;
00122       } else {
00123         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00124       }
00125     }    
00126     msg_cur = (char *) farr;
00127   }
00128 
00129   CkFreeMsg(ptr);
00130   return msg;
00131 }
00132 
00133 ProxyResultVarsizeMsg *ProxyResultVarsizeMsg::getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls){
00134 
00135     //1. decide the length of forceArr and iszero field.
00136     int tmpLen[Results::maxNumForces];
00137     int iszeroLen = 0;
00138     for (int i=0; i<Results::maxNumForces; i++){
00139         tmpLen[i] = fls[i].size();
00140         iszeroLen += tmpLen[i];
00141     }
00142     char *tmpIszero = new char[iszeroLen];
00143     char *iszeroPtr = tmpIszero;
00144     int fArrLen = 0;
00145     for(int i=0; i<Results::maxNumForces; i++) {        
00146         Force *fiPtr = fls[i].begin();
00147         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {         
00148             if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
00149                 *iszeroPtr=0;
00150                 fArrLen++;
00151             }else{
00152                 *iszeroPtr=1;
00153             }            
00154         }
00155     }
00156 
00157     //2. Ready to create the msg, and set all fields
00158     ProxyResultVarsizeMsg *retmsg = new(fArrLen, iszeroLen, prioSize)ProxyResultVarsizeMsg;
00159     retmsg->node = nid;
00160     retmsg->patch = pid;
00161     memcpy(retmsg->flLen, tmpLen, sizeof(int)*Results::maxNumForces);
00162     iszeroPtr = tmpIszero;
00163     Force *forcePtr = retmsg->forceArr;
00164     for(int i=0; i<Results::maxNumForces; i++) {        
00165         Force *fiPtr = fls[i].begin();
00166         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
00167             if((*iszeroPtr)!=1) {
00168                 forcePtr->x = fiPtr->x;
00169                 forcePtr->y = fiPtr->y;
00170                 forcePtr->z = fiPtr->z;
00171                 forcePtr++;
00172             }            
00173         }
00174     }
00175     memcpy(retmsg->isZero, tmpIszero, sizeof(char)*iszeroLen);
00176     delete [] tmpIszero;
00177     return retmsg;
00178 }
00179 
00180 ProxyNodeAwareSpanningTreeMsg *ProxyNodeAwareSpanningTreeMsg::getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size){
00181     int numAllPes = 0;
00182     for(int i=0; i<size; i++) {
00183         numAllPes += tree[i].numPes;
00184     }
00185     ProxyNodeAwareSpanningTreeMsg *retmsg = new(size, numAllPes)ProxyNodeAwareSpanningTreeMsg;
00186     retmsg->patch = pid;
00187     retmsg->procID = nid;
00188     retmsg->numNodesWithProxies = size;    
00189     int *pAllPes = retmsg->allPes;
00190     for(int i=0; i<size; i++) {
00191         retmsg->numPesOfNode[i] = tree[i].numPes;
00192         for(int j=0; j<tree[i].numPes; j++) {
00193             *pAllPes = tree[i].peIDs[j];
00194             pAllPes++;
00195         }
00196     }
00197     return retmsg;
00198 }
00199 
00200 //Only available when macro PROCTRACE_DEBUG is defined
00201 void ProxyNodeAwareSpanningTreeMsg::printOut(char *tag){
00202 #ifdef PROCTRACE_DEBUG
00203     DebugFileTrace *dft = DebugFileTrace::Object();
00204     dft->openTrace();
00205     const char *patchname = "ProxyPatch";
00206     if(procID == CkMyPe()) patchname = "HomePatch";
00207     dft->writeTrace("%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes: \n", tag, patchname, patch, CkMyPe(), CkMyNode(), procID, numNodesWithProxies);
00208     if(numNodesWithProxies==0) {
00209         dft->closeTrace();
00210         return;
00211     }
00212     dft->writeTrace("%s: ===%d===pes/node: ", tag, patch);
00213     for(int i=0; i<numNodesWithProxies; i++) {
00214         dft->writeTrace("%d ", numPesOfNode[i]);
00215     }
00216     dft->writeTrace("\n%s: ===%d===pe list: ", tag, patch);
00217     int *p = allPes;
00218     for(int i=0; i<numNodesWithProxies; i++) {
00219         for(int j=0; j<numPesOfNode[i]; j++) {
00220             dft->writeTrace("%d ", *p);
00221             p++;
00222         }
00223     }
00224     dft->writeTrace("\n");    
00225     dft->closeTrace();
00226 #endif
00227 }
00228 
00229 // for spanning tree
00230 void* ProxyCombinedResultMsg::pack(ProxyCombinedResultMsg *msg) {
00231   int msg_size = 0;
00232   msg_size += sizeof(int) + msg->nodes.size()*sizeof(NodeID);
00233   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00234   msg_size += sizeof(msg->destPe);
00235   #endif  
00236   msg_size += sizeof(msg->patch);
00237   int j;
00238   for ( j = 0; j < Results::maxNumForces; ++j ) {
00239     int array_size = msg->forceList[j].size();
00240     msg_size += sizeof(array_size);
00241     msg_size += array_size * sizeof(char);
00242     msg_size = ALIGN_8 (msg_size);
00243 
00244     Force* f = msg->forceList[j].begin();
00245     int nonzero_count = 0;
00246     for ( int i = 0; i < array_size; ++i ) {
00247       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00248     }
00249     msg_size += nonzero_count * sizeof(Force);
00250   }
00251 
00252   void *msg_buf = CkAllocBuffer(msg,msg_size);
00253   char *msg_cur = (char *)msg_buf;
00254 
00255   int nodeSize = msg->nodes.size();
00256   CmiMemcpy((void*)msg_cur,(void*)(&nodeSize), sizeof(nodeSize));
00257   msg_cur += sizeof(nodeSize);
00258   for (int i=0; i<nodeSize; i++) {
00259     CmiMemcpy((void*)msg_cur,(void*)(&msg->nodes[i]), sizeof(NodeID));
00260     msg_cur += sizeof(NodeID);
00261   }
00262   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00263   CmiMemcpy((void*)msg_cur,(void*)(&(msg->destPe)),sizeof(msg->destPe));
00264   msg_cur += sizeof(msg->destPe);
00265   #endif
00266   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00267   msg_cur += sizeof(msg->patch);
00268   for ( j = 0; j < Results::maxNumForces; ++j ) {
00269     int array_size = msg->forceList[j].size();
00270     CmiMemcpy((void*)msg_cur,(void*)(&array_size),sizeof(array_size));
00271     msg_cur += sizeof(array_size);
00272     char *nonzero = msg_cur;
00273     msg_cur += array_size * sizeof(char);
00274     msg_cur = (char *)ALIGN_8 (msg_cur);
00275     Vector *farr = (Vector *) msg_cur; 
00276     Force* f = msg->forceList[j].begin();
00277 
00278     for ( int i = 0; i < array_size; ++i ) {
00279       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00280         nonzero[i] = 1;
00281         farr->x  =  f[i].x;
00282         farr->y  =  f[i].y;
00283         farr->z  =  f[i].z;
00284 
00285         farr ++;
00286       } else {
00287         nonzero[i] = 0;
00288       }
00289     }
00290     msg_cur = (char *) farr;
00291   }
00292 
00293   delete msg;
00294   return msg_buf;
00295 }
00296 
00297 ProxyCombinedResultMsg* ProxyCombinedResultMsg::unpack(void *ptr) {
00298   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyCombinedResultMsg));
00299   ProxyCombinedResultMsg *msg = new (vmsg) ProxyCombinedResultMsg;
00300   char *msg_cur = (char*)ptr;
00301 
00302   int nodeSize;
00303   CmiMemcpy((void*)(&nodeSize),(void*)msg_cur,sizeof(nodeSize));
00304   msg_cur += sizeof(nodeSize);
00305   for (int i=0; i<nodeSize; i++) {
00306     msg->nodes.add(*(int *)msg_cur);
00307     msg_cur += sizeof(NodeID);
00308   }
00309   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00310   CmiMemcpy((void*)(&(msg->destPe)),(void*)msg_cur,sizeof(msg->destPe));
00311   msg_cur += sizeof(msg->destPe);
00312   #endif
00313   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00314   msg_cur += sizeof(msg->patch);
00315   int j;
00316   for ( j = 0; j < Results::maxNumForces; ++j ) {
00317     int array_size;
00318     CmiMemcpy((void*)(&array_size),(void*)msg_cur,sizeof(array_size));
00319     msg_cur += sizeof(array_size);
00320     msg->forceList[j].resize(array_size);
00321     char *nonzero = msg_cur;
00322     msg_cur += array_size * sizeof(char);
00323     msg_cur = (char *)ALIGN_8 (msg_cur);
00324     Vector* farr = (Vector *) msg_cur;
00325     Force* f = msg->forceList[j].begin();
00326 
00327     for ( int i = 0; i < array_size; ++i ) {
00328       if ( nonzero[i] ) {
00329         f[i].x = farr->x;
00330         f[i].y = farr->y;
00331         f[i].z = farr->z;
00332         farr++;
00333       } else {
00334         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00335       }
00336     }
00337     msg_cur = (char *) farr;
00338   }
00339 
00340   CkFreeMsg(ptr);
00341   return msg;
00342 }
00343 
00344 // class static
00345 int ProxyMgr::nodecount = 0;
00346 
00347 ProxyMgr::ProxyMgr() { 
00348   if (CkpvAccess(ProxyMgr_instance)) {
00349     NAMD_bug("Tried to create ProxyMgr twice.");
00350   }
00351   CkpvAccess(ProxyMgr_instance) = this;
00352 }
00353 
00354 ProxyMgr::~ProxyMgr() { 
00355   removeProxies();
00356   CkpvAccess(ProxyMgr_instance) = NULL;
00357 }
00358 
00359 
00360 void ProxyMgr::setSendSpanning() {
00361   proxySendSpanning = 1;
00362 }
00363 
00364 int ProxyMgr::getSendSpanning() {
00365   return proxySendSpanning;
00366 }
00367 
00368 void ProxyMgr::setRecvSpanning() {
00369   proxyRecvSpanning = 1;
00370 }
00371 
00372 int ProxyMgr::getRecvSpanning() {
00373   return proxyRecvSpanning;
00374 }
00375 
00376 ProxyTree &ProxyMgr::getPtree() {
00377   return ptree;
00378 }
00379 
00380 void ProxyMgr::removeProxies(void)
00381 {
00382   ProxySetIter pi(proxySet);
00383   for ( pi = pi.begin(); pi != pi.end(); pi++)
00384   {
00385     delete pi->proxyPatch;
00386   }
00387   proxySet.clear();
00388 }
00389 
00390 void ProxyMgr::removeUnusedProxies(void)
00391 {
00392   ResizeArray<PatchID> toDelete;
00393   ProxySetIter pi(proxySet);
00394   for ( pi = pi.begin(); pi != pi.end(); pi++)
00395   {
00396     if ( pi->proxyPatch->getNumComputes() == 0 ) {
00397       toDelete.add(pi->patchID);
00398       //fprintf(stderr, "Proxy Deleted Patch %d Proc %d", pi->patchID, CkMyPe());
00399     }
00400   }
00401   PatchID *pidi = toDelete.begin();
00402   for ( ; pidi != toDelete.end(); ++pidi ) {
00403     removeProxy(*pidi);
00404   }
00405 }
00406 
00407 // Figure out which proxies we need and create them
00408 void ProxyMgr::createProxies(void)
00409 {
00410   // Delete the old proxies.
00411   removeProxies();
00412 
00413   PatchMap *patchMap = PatchMap::Object();
00414   int numPatches = patchMap->numPatches();
00415   int myNode = CkMyPe();
00416   enum PatchFlag { Unknown, Home, NeedProxy };
00417   int *patchFlag = new int[numPatches]; 
00418   int i, j;
00419 
00420   // Note all home patches.
00421   for ( i = 0; i < numPatches; ++i )
00422   {
00423     patchFlag[i] = ( patchMap->node(i) == myNode ) ? Home : Unknown;
00424   }
00425 
00426   // Add all upstream neighbors.
00427   PatchID neighbors[PatchMap::MaxOneAway];
00428   PatchIDList basepids;
00429   patchMap->basePatchIDList(myNode,basepids);
00430   for ( i = 0; i < basepids.size(); ++i )
00431   {
00432     if ( patchMap->node(basepids[i]) != myNode ) {
00433         patchFlag[basepids[i]] = NeedProxy;
00434     }
00435     int numNeighbors = patchMap->upstreamNeighbors(basepids[i],neighbors);
00436     for ( j = 0; j < numNeighbors; ++j )
00437     {
00438       if ( ! patchFlag[neighbors[j]] ) {
00439         patchFlag[neighbors[j]] = NeedProxy;
00440       }
00441     }
00442   }
00443 
00444   // Check all patch-based compute objects.
00445   ComputeMap *computeMap = ComputeMap::Object();
00446   int nc = computeMap->numComputes();
00447   for ( i = 0; i < nc; ++i )
00448   {
00449     if ( computeMap->node(i) != myNode || !computeMap->isPatchBased(i) ) 
00450       continue;
00451     int numPid = computeMap->numPids(i);
00452     for ( j = 0; j < numPid; ++j )
00453     {
00454       int pid = computeMap->pid(i,j);
00455       if ( ! patchFlag[pid] ) {
00456         patchFlag[pid] = NeedProxy;
00457       }
00458     }
00459   }
00460   
00461   // Create proxy list
00462   for ( i = 0; i < numPatches; ++i ) {
00463     if ( patchFlag[i] == NeedProxy )
00464     { // create proxy patch
00465       ProxyPatch *proxy = new ProxyPatch(i);
00466       proxySet.add(ProxyElem(i, proxy));
00467       patchMap->registerPatch(i, proxy);
00468     }
00469   }
00470   delete[] patchFlag;
00471 }
00472 
00473 void
00474 ProxyMgr::createProxy(PatchID pid) {
00475   Patch *p = PatchMap::Object()->patch(pid);
00476   if (!p) {
00477      DebugM(4,"createProxy("<<pid<<")\n");
00478      ProxyPatch *proxy = new ProxyPatch(pid);
00479      proxySet.add(ProxyElem(pid,proxy));
00480      PatchMap::Object()->registerPatch(pid,proxy);
00481   }
00482   else {
00483      DebugM(4,"createProxy("<<pid<<") found " << p->getPatchID() << "\n");
00484   }
00485     
00486 }
00487 
00488 void
00489 ProxyMgr::removeProxy(PatchID pid) {
00490   ProxyElem *p = proxySet.find(ProxyElem(pid));
00491   if (p) { 
00492     PatchMap::Object()->unregisterPatch(pid,p->proxyPatch);
00493     delete p->proxyPatch;
00494     proxySet.del(ProxyElem(pid));
00495     // iout << iINFO << "Removing unused proxy " << pid << " on " << iPE << ".\n" << endi;
00496   }
00497 }
00498   
00499 void
00500 ProxyMgr::registerProxy(PatchID pid) {
00501   // determine which node gets message
00502   NodeID node = PatchMap::Object()->node(pid);
00503 
00504   RegisterProxyMsg *msg = new RegisterProxyMsg;
00505 
00506   msg->node=CkMyPe();
00507   msg->patch = pid;
00508 
00509   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00510 #if CHARM_VERSION > 050402
00511   cp[node].recvRegisterProxy(msg);
00512 #else
00513   cp.recvRegisterProxy(msg,node);
00514 #endif
00515 }
00516 
00517 void
00518 ProxyMgr::recvRegisterProxy(RegisterProxyMsg *msg) {
00519   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00520   homePatch->registerProxy(msg); // message deleted in registerProxy()
00521 }
00522 
00523 void
00524 ProxyMgr::unregisterProxy(PatchID pid) {
00525   // determine which node gets message
00526   NodeID node = PatchMap::Object()->node(pid);
00527 
00528   UnregisterProxyMsg *msg = new UnregisterProxyMsg;
00529 
00530   msg->node=CkMyPe();
00531   msg->patch = pid;
00532 
00533   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00534 #if CHARM_VERSION > 050402
00535   cp[node].recvUnregisterProxy(msg);
00536 #else
00537   cp.recvUnregisterProxy(msg,node);
00538 #endif
00539 }
00540 
00541 void
00542 ProxyMgr::recvUnregisterProxy(UnregisterProxyMsg *msg) {
00543   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00544   homePatch->unregisterProxy(msg); // message deleted in registerProxy()
00545 }
00546 
00547 void 
00548 ProxyMgr::buildProxySpanningTree()
00549 {
00550   PatchIDList pids;
00551   if (!CkMyPe()) iout << iINFO << "Building spanning tree ... send: " << proxySendSpanning << " recv: " << proxyRecvSpanning << "\n" << endi;
00552   PatchMap::Object()->homePatchIDList(pids);
00553   for (int i=0; i<pids.size(); i++) {
00554     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00555     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00556 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00557     home->buildNodeAwareSpanningTree();
00558 #else
00559     home->buildSpanningTree();
00560 #endif
00561   }
00562 }
00563 
00564 void 
00565 ProxyMgr::buildProxySpanningTree2()
00566 {
00567   PatchIDList pids;
00568   PatchMap::Object()->homePatchIDList(pids);
00569   for (int i=0; i<pids.size(); i++) {
00570     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00571     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00572     home->sendProxies();
00573   }
00574 }
00575 
00576 void 
00577 ProxyMgr::sendProxies(int pid, int *list, int n)
00578 {
00579   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00580   cp[0].recvProxies(pid, list, n);
00581 }
00582 
00583 //The value defines the max number of intermediate proxies (acting
00584 //as the node to relay proxy msgs to children) allowed to reside 
00585 //on a physical node for proxy spanning tree
00586 #define MAX_INTERNODE 1
00587 
00588 // only on PE 0
00589 void 
00590 ProxyMgr::recvProxies(int pid, int *list, int n)
00591 {
00592   int nPatches = PatchMap::Object()->numPatches();
00593   if (ptree.proxylist == NULL)
00594     ptree.proxylist = new NodeIDList[nPatches];
00595   ptree.proxylist[pid].resize(n);
00596   for (int i=0; i<n; i++)
00597     ptree.proxylist[pid][i] = list[i];
00598   ptree.proxyMsgCount ++;
00599   if (ptree.proxyMsgCount == nPatches) {
00600     ptree.proxyMsgCount = 0;
00601     // building and sending of trees is done in two steps now
00602     // so that the building step can be shifted to the load balancer
00603 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00604     buildNodeAwareSpanningTree0();
00605 #else
00606     buildSpanningTree0();    
00607 #endif
00608     sendSpanningTrees();
00609   }
00610 }
00611 
00612 //
00613 // XXX static and global variables are unsafe for shared memory builds.
00614 // The global and static vars should be eliminated.  
00615 // Unfortunately, the routines that use these below are actually 
00616 // in use in NAMD.
00617 //
00618 extern double *cpuloads;
00619 static int *procidx = NULL;
00620 static double averageLoad = 0.0;
00621 
00622 static int compLoad(const void *a, const void *b)
00623 {
00624   int i1 = *(int *)a;
00625   int i2 = *(int *)b;
00626   double d1 = cpuloads[i1];
00627   double d2 = cpuloads[i2];
00628   if (d1 < d2) 
00629     return 1;
00630   else if (d1 == d2) 
00631     return 0;
00632   else 
00633     return -1;
00634   // sort from high to low
00635 }
00636 
00637 static void processCpuLoad()
00638 {
00639   int i;
00640   if (!procidx) {
00641     procidx = new int[CkNumPes()];
00642   }
00643   for (i=0; i<CkNumPes(); i++) procidx[i] = i;
00644   qsort(procidx, CkNumPes(), sizeof(int), compLoad);
00645 
00646   double averageLoad = 0.0;
00647   for (i=0; i<CkNumPes(); i++) averageLoad += cpuloads[i];
00648   averageLoad /= CkNumPes();
00649 //  iout << "buildSpanningTree1: no intermediate node on " << procidx[0] << " " << procidx[1] << endi;
00650 
00651 }
00652 
00653 static int noInterNode(int p)
00654 {
00655   int exclude = 0;
00656   if(CkNumPes()<1025)
00657     exclude = 5;
00658   else if(CkNumPes()<4097)
00659     exclude = 10;
00660   else if(CkNumPes()<8193)
00661     exclude = 40;
00662   else if(CkNumPes()<16385)
00663     exclude = 40;
00664   else
00665     exclude = 80;
00666   for (int i=0; i<exclude; i++) if (procidx[i] == p) return 1;
00667 //  if (cpuloads[p] > averageLoad) return 1;
00668   return 0;
00669 }
00670 
00671 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00672 //only on PE 0
00673 void ProxyMgr::buildNodeAwareSpanningTree0(){
00674     int numPatches = PatchMap::Object()->numPatches();
00675     if (ptree.naTrees == NULL) ptree.naTrees = new proxyTreeNodeList[numPatches];
00676     //each element indiates the number of proxies residing on this node    
00677     int *proxyNodeMap = new int[CkNumNodes()];    
00678     for (int pid=0; pid<numPatches; pid++)     
00679         buildSinglePatchNodeAwareSpanningTree(pid, ptree.proxylist[pid], ptree.naTrees[pid], proxyNodeMap);
00680        
00681 
00682     //Debug
00683     //printf("#######################Naive ST#######################\n");
00684     //printProxySpanningTree();
00685 
00686     //Now the naive spanning tree has been constructed and stored in oneNATree;
00687     //Afterwards, some optimizations on this naive spanning tree could be done.
00688     //except the first element as the tree root always contains the processor
00689     //that has home patch
00690 
00691     //1st Optimization: reduce intermediate nodes as much as possible. In details,
00692     //the optimal case is that on a single physical smp node, there should be no
00693     //two proxies who act as the intermediate nodes to pass information to childrens
00694     //in the spanning tree. E.g, for patch A's proxy spanning tree, it has a node X as
00695     //its intermediate node. However, for patch B's, it also has a node X as its intermediate
00696     //node. We should avoid this situation as node X becomes the bottleneck as it has twice
00697     //amount of work to process now.
00698     //Step1: foward to the first patch that has proxies
00699     //Now proxyNodeMap records the info that how many intermediate nodes on a node
00700     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00701     int pid=0;
00702     for(;pid<numPatches; pid++) {
00703         if(ptree.proxylist[pid].size()>0) break;
00704     }
00705     if(pid==numPatches) {
00706         delete [] proxyNodeMap;
00707         return;
00708     }
00709     proxyTreeNodeList onePatchT = ptree.naTrees[pid];
00710     //If a node is an intermediate node, then its idx should satisfy
00711     //idx*proxySpanDim + 1 < onePatchT.size()
00712     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00713     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00714         int nid = onePatchT.item(i).nodeID;
00715         proxyNodeMap[nid]++;
00716     }
00717     //Step2: iterate over each patch's proxy spanning tree to adjust
00718     //the tree node positions. The bad thing here is that it may involve
00719     //many memory allocations and deallocation for small-size (~100bytes)
00720     //chunks.
00721     pid++; //advance to the next patch
00722     for(; pid<numPatches; pid++) {
00723         if(ptree.proxylist[pid].size()==0) continue;
00724         onePatchT = ptree.naTrees[pid];
00725         lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00726         for(int i=1; i<=lastInterNodeIdx; i++) {
00727             int nid = onePatchT.item(i).nodeID;
00728             if(proxyNodeMap[nid]<MAX_INTERNODE) {
00729                 proxyNodeMap[nid]++;
00730                 continue;
00731             }
00732             //the position is occupied, so search the children
00733             //nodes to see whether there's one to swap this node
00734             //if not found, find the first position that has smallest
00735             //amount of nodes.
00736             int leastIdx = -1;
00737             int leastAmount = ~(1<<31);
00738             //iterate children nodes
00739             int swapPos;
00740             for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.size(); swapPos++) {
00741                 int chiNId = onePatchT.item(swapPos).nodeID;
00742                 if(proxyNodeMap[chiNId]<MAX_INTERNODE) {
00743                     break;
00744                 }
00745                 if(proxyNodeMap[chiNId]<leastAmount) {
00746                     leastAmount = proxyNodeMap[chiNId];
00747                     leastIdx = swapPos;
00748                 }
00749             }
00750             CmiAssert(leastIdx!=-1); //because the above loop at least executes once
00751             if(swapPos==onePatchT.size()) {
00752                 //indicate we cannot find a physical node which
00753                 //still allows the intermediate proxy.
00754                 swapPos = leastIdx;
00755             }
00756             //swap the current proxy tree node "i" with node "swapPos"
00757             proxyTreeNode *curNode = &onePatchT.item(i);
00758             proxyTreeNode *swapNode = &onePatchT.item(swapPos);
00759             proxyNodeMap[swapNode->nodeID]++; //update the proxyNodeMap record
00760             int tmp = curNode->nodeID;
00761             curNode->nodeID = swapNode->nodeID;
00762             swapNode->nodeID = tmp;
00763             tmp = curNode->numPes;
00764             int tmpPes[tmp];
00765             memcpy(tmpPes, curNode->peIDs, sizeof(int)*tmp);
00766             delete [] curNode->peIDs;
00767             curNode->numPes = swapNode->numPes;
00768             curNode->peIDs = new int[swapNode->numPes];
00769             memcpy(curNode->peIDs, swapNode->peIDs, sizeof(int)*swapNode->numPes);
00770             swapNode->numPes = tmp;
00771             delete [] swapNode->peIDs;
00772             swapNode->peIDs = new int[tmp];
00773             memcpy(swapNode->peIDs, tmpPes, sizeof(int)*tmp);                      
00774         }
00775     }
00776     delete [] proxyNodeMap;    
00777 
00778     //Debug
00779     //printf("#######################After 1st optimization#######################\n");
00780     //printProxySpanningTree();
00781 
00782     //2nd optimization: similar to the 1st optimization but now thinking in
00783     //the core level. If we cannot avoid place two intermediate proxy
00784     //on the same node, we'd better to place them in different cores inside
00785     //the node
00786     if(CmiMyNodeSize()==1) {
00787         //No need to perform the second optimization as every node has only 1 core
00788         return;
00789     }
00790     int *proxyCoreMap = new int[CkNumPes()];
00791     memset(proxyCoreMap, 0, sizeof(int)*CkNumPes());
00792     //Step1: forward to the first patch that has proxies
00793     pid=0;
00794     for(;pid<numPatches; pid++) {
00795         if(ptree.proxylist[pid].size()>0) break;
00796     }
00797     if(pid==numPatches) {
00798         delete [] proxyCoreMap;
00799         return;
00800     }
00801     onePatchT = ptree.naTrees[pid];
00802     //If a node is an intermediate node, then its idx should satisfy
00803     //idx*proxySpanDim + 1 < onePatchT.size()
00804     lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00805     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00806         int rootProcID = onePatchT.item(i).peIDs[0];
00807         proxyCoreMap[rootProcID]++;
00808     }
00809     //Step2: iterate over each patch's proxy spanning tree to adjust
00810     //the root's position of intermediate proxies.
00811     pid++; //advance to the next patch
00812     for(; pid<numPatches; pid++) {
00813         if(ptree.proxylist[pid].size()==0) continue;
00814         onePatchT = ptree.naTrees[pid];
00815         lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00816         for(int i=1; i<=lastInterNodeIdx; i++) {
00817             proxyTreeNode *curNode = &onePatchT.item(i);
00818             int rootProcID = curNode->peIDs[0];
00819             if(curNode->numPes==1 || proxyCoreMap[rootProcID]<MAX_INTERNODE){
00820                 //if this node contains only 1 core, then we have to leave it as it is
00821                 //because there are no other cores in the same node that could be used to
00822                 //adjust
00823                 proxyCoreMap[rootProcID]++;
00824                 continue;
00825             }
00826             
00827             //foound more than MAX_INTERNODE intermediate proxies on the same core,
00828             //adjust the root id of the core of this proxy tree node
00829             int leastIdx = -1;
00830             int leastAmount = ~(1<<31);
00831             //iterate children nodes
00832             int swapPos;
00833             
00834             for(swapPos=1; swapPos<curNode->numPes; swapPos++) {
00835                 int otherCoreID = curNode->peIDs[swapPos];
00836                 if(proxyCoreMap[otherCoreID]<MAX_INTERNODE) {
00837                     break;
00838                 }
00839                 if(proxyCoreMap[otherCoreID]<leastAmount) {
00840                     leastAmount = proxyCoreMap[otherCoreID];
00841                     leastIdx = swapPos;
00842                 }
00843             }
00844             CmiAssert(leastIdx!=-1); //because the above loop body must execute at least once
00845             if(swapPos==curNode->numPes) {
00846                 //indicate we cannot find a physical node which
00847                 //still allows the intermediate proxy.
00848                 swapPos = leastIdx;
00849             }
00850             int tmp = curNode->peIDs[swapPos];
00851             curNode->peIDs[swapPos] = curNode->peIDs[0];
00852             curNode->peIDs[0] = tmp;
00853             proxyCoreMap[tmp]++;
00854         }      
00855     }
00856 
00857     delete proxyCoreMap;
00858 
00859     //Debug
00860     //printf("#######################After 2nd optimization#######################\n");
00861     //printProxySpanningTree();
00862 }
00863 
00864 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(PatchID pid, NodeIDList &proxyList, 
00865                                                      proxyTreeNodeList &ptnTree, int *proxyNodeMap){       
00866     int numProxies = proxyList.size();
00867     if (numProxies == 0) {
00868         CkPrintf ("This is sheer evil in building node-aware spanning tree!\n\n");            
00869         return;
00870     }        
00871     
00872     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00873     int proxyNodeList[numProxies+1]; //including the root node             
00874     
00875     //the processor id of home patch
00876     int hpProcID = PatchMap::Object()->node(pid);
00877     int hpNodeID = CkNodeOf(hpProcID);
00878     proxyNodeMap[hpNodeID]++;
00879     proxyNodeList[0] = hpNodeID;
00880     int numNodesWithProxies = 1;
00881     
00882     for(int i=0; i<numProxies; i++) {
00883         int procId = proxyList[i];
00884         int nodeId = CkNodeOf(procId);
00885         proxyNodeMap[nodeId]++;
00886         if(proxyNodeMap[nodeId]==1) {
00887             proxyNodeList[numNodesWithProxies] = nodeId;
00888             numNodesWithProxies++;
00889         }
00890     }
00891     proxyTreeNodeList &oneNATree = ptnTree;   // spanning tree
00892     oneNATree.resize(numNodesWithProxies);
00893     //initialize oneNATree
00894     for(int i=0; i<numNodesWithProxies; i++) {
00895         proxyTreeNode *oneNode = &oneNATree.item(i);
00896         delete oneNode->peIDs;
00897         oneNode->nodeID = proxyNodeList[i];
00898         oneNode->peIDs = new int[proxyNodeMap[oneNode->nodeID]];                        
00899         oneNode->numPes = 0; //initially set to zero as used for incrementing later
00900     }
00901     
00902     //set up the tree root which contains the home patch processor
00903     proxyTreeNode *rootnode = &oneNATree.item(0);
00904     rootnode->peIDs[0] = hpProcID;
00905     rootnode->numPes++;
00906     
00907     for(int i=0; i<numProxies; i++) {
00908         int procId = proxyList[i];
00909         int nodeId = CkNodeOf(procId);
00910         int idxInTree = -1;
00911         for(int j=0; j<numNodesWithProxies; j++) {
00912             if(proxyNodeList[j] == nodeId) {
00913                 idxInTree = j;
00914                 break;
00915             }
00916         }
00917         CmiAssert(idxInTree!=-1);
00918         proxyTreeNode *oneNode = &oneNATree.item(idxInTree);
00919         oneNode->peIDs[oneNode->numPes] = procId;
00920         oneNode->numPes++;
00921     }
00922 }
00923 #else //branch of NODEAWARE_PROXY_SPANNINGTREE
00924 // only on PE 0
00925 void 
00926 ProxyMgr::buildSpanningTree0()
00927 {
00928   int i;
00929 
00930   processCpuLoad();
00931 
00932   int *numPatchesOnNode = new int[CkNumPes()];
00933   int numNodesWithPatches = 0;
00934   for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
00935   int numPatches = PatchMap::Object()->numPatches();
00936   for (i=0; i<numPatches; i++) {
00937     int node = PatchMap::Object()->node(i);
00938     numPatchesOnNode[node]++;
00939     if (numPatchesOnNode[node] == 1)
00940       numNodesWithPatches ++;
00941   }
00942   int patchNodesLast =
00943     ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
00944   int *ntrees = new int[CkNumPes()];
00945   for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
00946   if (ptree.trees == NULL) ptree.trees = new NodeIDList[numPatches];
00947   for (int pid=0; pid<numPatches; pid++) 
00948   {
00949     int numProxies = ptree.proxylist[pid].size();
00950     if (numProxies == 0) {
00951       CkPrintf ("This is sheer evil!\n\n");
00952       //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
00953       return;
00954     }
00955     NodeIDList &tree = ptree.trees[pid];   // spanning tree
00956     NodeIDList oldtree = tree;
00957     tree.resize(numProxies+1);
00958     tree.setall(-1);
00959     tree[0] = PatchMap::Object()->node(pid);
00960     int s=1, e=numProxies;
00961     int nNonPatch = 0;
00962     int treesize = 1;
00963     int pp;
00964 
00965     // keep tree persistent for non-intermediate nodes
00966     for (pp=0; pp<numProxies; pp++) {
00967       int p = ptree.proxylist[pid][pp];
00968       int oldindex = oldtree.find(p);
00969       if (oldindex != -1 && oldindex <= numProxies) {
00970         int isIntermediate = (oldindex*proxySpanDim+1 <= numProxies);
00971         if (!isIntermediate) {
00972           tree[oldindex] = p;
00973         }
00974         else if (ntrees[p] < MAX_INTERNODE) {
00975           tree[oldindex] = p;
00976           ntrees[p] ++;
00977         }
00978       }
00979     }
00980 
00981     for (pp=0; pp<numProxies; pp++) {
00982       int p = ptree.proxylist[pid][pp];              // processor number
00983       if (tree.find(p) != -1) continue;        // already used
00984       treesize++;
00985       if (patchNodesLast && numPatchesOnNode[p] ) {
00986         while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
00987         tree[e] = p;
00988         int isIntermediate = (e*proxySpanDim+1 <= numProxies);
00989         if (isIntermediate) ntrees[p]++;
00990       }
00991       else {
00992         while (tree[s] != -1) { s++; if (s==numProxies+1) s = 1; }
00993         int isIntermediate = (s*proxySpanDim+1 <= numProxies);
00994         if (isIntermediate && (ntrees[p] >= MAX_INTERNODE || noInterNode(p))) {   // TOO MANY INTERMEDIATE TREES
00995         //if (isIntermediate && ntrees[p] >= MAX_INTERNODE)    // TOO MANY INTERMEDIATE TREES
00996           while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
00997           tree[e] = p;
00998           isIntermediate = (e*proxySpanDim+1 <= numProxies);
00999           if (isIntermediate) ntrees[p]++;
01000         }
01001         else {
01002           tree[s] = p;
01003           nNonPatch++;
01004           if (isIntermediate) ntrees[p]++;
01005         }
01006       }
01007     }
01008     // send homepatch's proxy tree
01009     if(ptree.sizes)
01010       ptree.sizes[pid] = treesize;
01011     //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, &tree[0], treesize);
01012   }
01013   /*for (i=0; i<CkNumPes(); i++) {
01014     if (ntrees[i] > MAX_INTERNODE) iout << "Processor " << i << "has (guess) " << ntrees[i] << " intermediate nodes." << endi;
01015   }*/
01016   delete [] ntrees;
01017   delete [] numPatchesOnNode;
01018 }
01019 #endif
01020 
01021 void ProxyMgr::sendSpanningTrees()
01022 {
01023   int numPatches = PatchMap::Object()->numPatches();
01024   for (int pid=0; pid<numPatches; pid++) {
01025     int numProxies = ptree.proxylist[pid].size();
01026 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01027     if (numProxies == 0)
01028       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, NULL, 0);
01029     else {
01030       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, ptree.naTrees[pid].begin(), ptree.naTrees[pid].size());
01031     }
01032 #else
01033     if (numProxies == 0)
01034       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01035     else {
01036       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, ptree.trees[pid].begin(), ptree.trees[pid].size());
01037     }
01038 #endif
01039   }
01040 }
01041 
01042 void ProxyMgr::sendSpanningTreeToHomePatch(int pid, int *tree, int n)
01043 {
01044   CProxy_ProxyMgr cp(thisgroup);
01045   cp[PatchMap::Object()->node(pid)].recvSpanningTreeOnHomePatch(pid, tree, n);
01046 }
01047 
01048 void ProxyMgr::recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
01049 {
01050   HomePatch *p = PatchMap::Object()->homePatch(pid);
01051   p->recvSpanningTree(tree, n);
01052 }
01053 
01054 void ProxyMgr::sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
01055 {
01056   CProxy_ProxyMgr cp(thisgroup);
01057   ProxyNodeAwareSpanningTreeMsg *msg = ProxyNodeAwareSpanningTreeMsg::getANewMsg(pid, -1, tree, n);
01058   cp[PatchMap::Object()->node(pid)].recvNodeAwareSpanningTreeOnHomePatch(msg);
01059 }
01060 
01061 void ProxyMgr::recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
01062 {
01063   HomePatch *p = PatchMap::Object()->homePatch(msg->patch);
01064   p->recvNodeAwareSpanningTree(msg);
01065   delete msg;
01066 }
01067 
01068 void 
01069 ProxyMgr::sendSpanningTree(ProxySpanningTreeMsg *msg) {
01070   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01071 #if CHARM_VERSION > 050402
01072   cp[msg->tree[0]].recvSpanningTree(msg);
01073 #else
01074   cp.recvSpanningTree(msg, msg->tree[0]);
01075 #endif
01076 }
01077 
01078 void ProxyMgr::sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01079   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01080   int pe = msg->allPes[0]; //the root procID
01081 
01082 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01083   DebugFileTrace *dft = DebugFileTrace::Object();
01084   dft->openTrace();
01085   dft->writeTrace("PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->patch);
01086   dft->closeTrace();
01087 #endif
01088 
01089 #if CHARM_VERSION > 050402
01090   cp[pe].recvNodeAwareSpanningTree(msg);
01091 #else
01092   cp.recvNodeAwareSpanningTree(msg, pe);
01093 #endif
01094 }
01095 
01096 void 
01097 ProxyMgr::recvSpanningTree(ProxySpanningTreeMsg *msg) {
01098   int size = msg->tree.size();
01099   int child[proxySpanDim];
01100   int nChild = 0;
01101   int i;
01102   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01103   for (i=0; i<proxySpanDim; i++) {
01104     if (size > i+1) { child[i] = msg->tree[i+1]; nChild++; }
01105   }
01106   if (!PatchMap::Object()->homePatch(msg->patch)) {
01107     proxy->setSpanningTree(msg->node, child, nChild);
01108   }
01109 
01110   // build subtree and pass down
01111   if (nChild == 0) return;
01112 
01113   nodecount ++;
01114   //if (nodecount > MAX_INTERNODE) 
01115   //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01116 
01117 //CkPrintf("[%d] %d:(%d) %d %d %d %d %d\n", CkMyPe(), msg->patch, size, msg->tree[0], msg->tree[1], msg->tree[2], msg->tree[3], msg->tree[4]);
01118   NodeIDList *tree = new NodeIDList[proxySpanDim];
01119   int level = 1, index=1;
01120   int done = 0;
01121   while (!done) {
01122     for (int n=0; n<nChild; n++) {
01123       if (done) break;
01124       for (int j=0; j<level; j++) {
01125        if (index >= size) { done = 1; break; }
01126        tree[n].add(msg->tree[index]);
01127        index++;
01128       }
01129     }
01130     level *=proxySpanDim;
01131   }
01132 
01133   ProxyMgr *proxyMgr = ProxyMgr::Object();
01134   for (i=0; i<proxySpanDim; i++) {
01135     if (tree[i].size()) {
01136       ProxySpanningTreeMsg *cmsg = new ProxySpanningTreeMsg;
01137       cmsg->patch = msg->patch;
01138       cmsg->node = CkMyPe();
01139       cmsg->tree = tree[i];
01140       proxyMgr->sendSpanningTree(cmsg);
01141     }
01142   }
01143 
01144   delete [] tree;
01145   delete msg;
01146 }
01147 
01148 //NOTE: have not considered how to deal with spanning tree inside a single physical node
01149 //--Chao Mei
01150 void ProxyMgr::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01151 
01152     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01153     DebugFileTrace *dft = DebugFileTrace::Object();
01154     dft->openTrace();
01155     dft->writeTrace("PMgr::recvST0 for patch[%d] with #nodes=%d\n", msg->patch, msg->numNodesWithProxies);
01156     dft->closeTrace();
01157     msg->printOut("PMgr::recvST");
01158     #endif
01159 
01160     //This function is divided into three parts. The tree root is msg->allPes[0]
01161     //1. set up its own immediate childrens
01162     int treesize = msg->numNodesWithProxies;    
01163     int iNChild = 0; //number of internal children
01164     int eNChild = 0; //number of external children
01165     if(treesize>0){
01166         iNChild = (msg->numPesOfNode[0]-1); //exclude the root itself
01167         eNChild = (proxySpanDim>(treesize-1))?(treesize-1):proxySpanDim;
01168     }
01169     int numChild = iNChild + eNChild;
01170     if(numChild==0){
01171         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01172         proxy->setSpanningTree(msg->procID, NULL, 0);
01173         #ifdef USE_NODEPATCHMGR
01174         //set up proxyInfo inside NodeProxyMgr
01175         if(!PatchMap::Object()->homePatch(msg->patch)){
01176             //only when this processor contains a proxy patch of "msg->patch"
01177             //is the patch registeration in NodeProxyMgr needed,
01178             //and itself needs to be registered
01179             CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01180             NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01181             npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);            
01182         }
01183         //set children in terms of node ids
01184         proxy->setSTNodeChildren(0, NULL);       
01185         #endif
01186         return;
01187     }
01188 
01189     nodecount++;
01190     //if (nodecount > MAX_INTERNODE) 
01191     //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01192 
01193     if(!PatchMap::Object()->homePatch(msg->patch)){
01194         //the home patch of this spanning tree has been already set up for its childrens
01195         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01196         int children[numChild];
01197         //add external children
01198         int *p = msg->allPes + msg->numPesOfNode[0];
01199         for(int i=0; i<eNChild; i++) {
01200             children[i] = *p;
01201             p += msg->numPesOfNode[i+1];
01202         }
01203         //add internal children
01204         for(int i=eNChild, j=1; i<numChild; i++, j++) {
01205             children[i] = msg->allPes[j]; 
01206         }
01207         proxy->setSpanningTree(msg->procID, children, numChild);
01208 
01209         #ifdef USE_NODEPATCHMGR
01210         //set up proxyInfo inside NodeProxyMgr
01211         CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01212         NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01213         npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);        
01214 
01215         //set children in terms of node ids
01216         int nodeChildren[eNChild+1];
01217         p = msg->allPes + msg->numPesOfNode[0];
01218         for(int i=0; i<eNChild; i++) {
01219             nodeChildren[i] = CkNodeOf(*p);
01220             p += msg->numPesOfNode[i+1];
01221         }
01222         //the last entry always stores the node id that contains this proxy
01223         nodeChildren[eNChild] = CkNodeOf(msg->allPes[0]);
01224         proxy->setSTNodeChildren(eNChild+1, nodeChildren);
01225         #endif
01226     }
01227 
01228     //2. send msgs for the tree to external children proxies
01229     if(eNChild > 0) {
01230         ResizeArray<int> *exTreeChildSize = new ResizeArray<int>[eNChild];
01231         ResizeArray<int *> *exTreeChildPtr = new ResizeArray<int *>[eNChild];    
01232     
01233         int nodesToCnt = 1; //the number of children each root (current root's 
01234                             //immedidate external nodes) has in each level
01235         int pos = 1; //track the iteration over msg->numPesOfNode and skip the current root
01236         int *pePtr = msg->allPes + msg->numPesOfNode[0];
01237         int done = 0;
01238         while(!done) {
01239             for(int childID=0; childID<eNChild;childID++) {
01240                 //iterate nodes on each level
01241                 for(int i=0; i<nodesToCnt; i++) {
01242                     int cursize = msg->numPesOfNode[pos];
01243                     exTreeChildSize[childID].add(cursize);
01244                     exTreeChildPtr[childID].add(pePtr);
01245                     pos++;
01246                     pePtr += cursize; 
01247                     if(pos==msg->numNodesWithProxies) {
01248                         done = 1;
01249                         break;
01250                     }
01251                 }
01252                 if(done) break;                         
01253             }
01254             nodesToCnt *= proxySpanDim;
01255         }
01256           
01257         for(int i=0; i<eNChild; i++) {                
01258             ResizeArray<int> *allSizes = &exTreeChildSize[i];
01259             ResizeArray<int *> *allPtrs = &exTreeChildPtr[i];
01260             int totalNodes = allSizes->size();
01261             int totalPes = 0;
01262             for(int j=0; j<totalNodes; j++) totalPes += allSizes->item(j);
01263             ProxyNodeAwareSpanningTreeMsg *cmsg = new(totalNodes, totalPes)ProxyNodeAwareSpanningTreeMsg;
01264             cmsg->patch = msg->patch;
01265             cmsg->procID = CkMyPe();
01266             cmsg->numNodesWithProxies = totalNodes;
01267             int *pAllPes = cmsg->allPes;
01268             for(int j=0; j<totalNodes; j++) {
01269                 int numPes = allSizes->item(j);
01270                 cmsg->numPesOfNode[j] = numPes;
01271                 memcpy(pAllPes, allPtrs->item(j), sizeof(int)*numPes);
01272                 pAllPes += numPes;
01273             }
01274             #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01275             cmsg->printOut("sndExtChi:");
01276             #endif
01277             ProxyMgr::Object()->sendNodeAwareSpanningTree(cmsg);
01278         }    
01279         
01280         delete [] exTreeChildSize;
01281         delete [] exTreeChildPtr;  
01282     }
01283 
01284     //3. send msgs for the tree to the children proxies within the same (physical) node
01285     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01286     for(int i=0; i<iNChild; i++) {
01287         int pe = msg->allPes[i+1]; //excluding the root procID at allPes[0]
01288         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01289         DebugFileTrace *dft = DebugFileTrace::Object();
01290         dft->openTrace();
01291         dft->writeTrace("Preparing send a msg to internal children for patch[%d] from proc %d to proc %d\n", 
01292                      msg->patch, CkMyPe(), pe);
01293         dft->closeTrace();
01294         #endif
01295         cp[pe].recvNodeAwareSTParent(msg->patch, CkMyPe());
01296     }    
01297 
01298     delete msg;
01299 }
01300 
01301 void ProxyMgr::recvNodeAwareSTParent(int patch, int parent){
01302 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01303     DebugFileTrace *dft = DebugFileTrace::Object();
01304     dft->openTrace();
01305     dft->writeTrace("PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
01306     dft->closeTrace();
01307 #endif
01308     ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(patch);
01309     CmiAssert(proxy!=NULL);
01310     proxy->setSpanningTree(parent, NULL, 0);
01311 }
01312 
01313 void ProxyMgr::sendResults(ProxyResultVarsizeMsg *msg) {
01314     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01315     NodeID node = PatchMap::Object()->node(msg->patch);
01316   #if CHARM_VERSION > 050402
01317     cp[node].recvResults(msg);
01318   #else
01319     cp.recvResults(msg, node);
01320   #endif
01321 }
01322 
01323 void ProxyMgr::recvResults(ProxyResultVarsizeMsg *msg) {
01324     HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01325     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01326 }
01327 
01328 void ProxyMgr::sendResults(ProxyResultMsg *msg) {
01329   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01330   NodeID node = PatchMap::Object()->node(msg->patch);
01331 #if CHARM_VERSION > 050402
01332   cp[node].recvResults(msg);
01333 #else
01334   cp.recvResults(msg, node);
01335 #endif
01336 }
01337 
01338 void ProxyMgr::recvResults(ProxyResultMsg *msg) {
01339   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01340   home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01341 }
01342 
01343 void
01344 ProxyMgr::sendResults(ProxyCombinedResultMsg *msg) {
01345   ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01346   ProxyCombinedResultMsg *cMsg = patch->depositCombinedResultMsg(msg);
01347   if (cMsg) {    
01348     int destPe = patch->getSpanningTreeParent();
01349     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01350     if(destPe != CkMyPe()) {
01351 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01352       cMsg->destPe = destPe;
01353       CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01354       cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
01355 #else    
01356       cp[destPe].recvImmediateResults(cMsg);
01357 #endif
01358     }
01359     else 
01360       cp[destPe].recvResults(cMsg);
01361   }
01362 }
01363 
01364 void
01365 ProxyMgr::recvResults(ProxyCombinedResultMsg *msg) {
01366   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01367   if (home) {
01368     //printf("Home got a message\n");
01369     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01370   }
01371   else {
01372     NAMD_bug("ProxyMgr should receive result message on home processor");
01373   }
01374 }
01375 
01376 void ProxyMgr::recvImmediateResults(ProxyCombinedResultMsg *msg) {
01377   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01378   if (home) {
01379     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01380 #if CHARM_VERSION > 050402
01381     cp[CkMyPe()].recvResults(msg);
01382 #else
01383     cp.recvResults(msg, CkMyPe());
01384 #endif
01385   }
01386   else {
01387     ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01388     ProxyCombinedResultMsg *cMsg = patch->depositCombinedResultMsg(msg);
01389     if (cMsg) {
01390       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01391 #if CHARM_VERSION > 050402
01392       cp[patch->getSpanningTreeParent()].recvImmediateResults(cMsg);
01393 #else
01394       cp.recvImmediateResults(cMsg, patch->getSpanningTreeParent());
01395 #endif
01396     }
01397   }
01398 }
01399 
01400 void NodeProxyMgr::recvImmediateResults(ProxyCombinedResultMsg *msg){
01401 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01402     int destRank = CkRankOf(msg->destPe);
01403     PatchMap *pmap = localPatchMaps[destRank];
01404     HomePatch *home = pmap->homePatch(msg->patch);
01405     if (home) {
01406         CProxy_ProxyMgr cp(localProxyMgr);        
01407         cp[msg->destPe].recvResults(msg);        
01408     }
01409     else {
01410