Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ProxyMgr.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "main.h"
00009 #include "BOCgroup.h"
00010 #include "ProxyMgr.decl.h"
00011 #include "ProxyMgr.h"
00012 #include "PatchMap.inl"
00013 #include "ProxyPatch.h"
00014 #include "ComputeMap.h"
00015 #include "HomePatch.h"
00016 #include <string.h>
00017 #include "ProcessorPrivate.h"
00018 #include "packmsg.h"
00019 #include "Priorities.h"
00020 #ifndef _NO_ALLOCA_H
00021 #include <alloca.h>
00022 #endif
00023 #ifndef _NO_MALLOC_H
00024 #include <malloc.h>
00025 #endif
00026 
00027 #include <map>
00028 #include <vector>
00029 #include <algorithm>
00030 
00031 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
00032 #include "qd.h"
00033 #endif
00034 
00035 //#define DEBUGM
00036 #define MIN_DEBUG_LEVEL 2
00037 #include "Debug.h"
00038 
00039 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00040 
00041 int proxySendSpanning   = 0;
00042 int proxyRecvSpanning   = 0;
00043 //"proxySpanDim" is a configuration parameter as "proxyTreeBranchFactor" in configuration file
00044 int proxySpanDim        = 4;
00045 int inNodeProxySpanDim = 16;
00046 
00047 PACK_MSG(ProxySpanningTreeMsg,
00048   PACK(patch);
00049   PACK(node);
00050   PACK_RESIZE(tree);
00051 )
00052 
00053 void* ProxyResultMsg::pack(ProxyResultMsg *msg) {
00054 
00055   int msg_size = 0;
00056   msg_size += sizeof(msg->node);
00057   msg_size += sizeof(msg->patch);
00058 
00059   int j;
00060   for ( j = 0; j < Results::maxNumForces; ++j ) {
00061     int array_size = msg->forceList[j]->size();
00062     msg_size += sizeof(array_size);
00063     msg_size += array_size * sizeof(char);    
00064     msg_size = ALIGN_8 (msg_size);
00065     Force* f = msg->forceList[j]->begin();
00066     int nonzero_count = 0;
00067     for ( int i = 0; i < array_size; ++i ) {
00068       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00069     }
00070     msg_size += nonzero_count * sizeof(Vector);
00071   }
00072 
00073   void *msg_buf = CkAllocBuffer(msg,msg_size);
00074   char *msg_cur = (char *)msg_buf;
00075 
00076   CmiMemcpy((void*)msg_cur,(void*)(&(msg->node)),sizeof(msg->node));
00077   msg_cur += sizeof(msg->node);
00078   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00079   msg_cur += sizeof(msg->patch);
00080   for ( j = 0; j < Results::maxNumForces; ++j ) {
00081     int array_size = msg->forceList[j]->size();
00082     *(int *) msg_cur = array_size;
00083     msg_cur += sizeof(int);
00084     char *nonzero = msg_cur;
00085     msg_cur += array_size * sizeof(char);
00086     msg_cur = (char *)ALIGN_8 (msg_cur);
00087     Vector *farr = (Vector *)msg_cur;
00088     Force* f = msg->forceList[j]->begin();
00089 
00090     for ( int i = 0; i < array_size; ++i ) {
00091       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00092         nonzero[i] = 1;
00093         farr->x = f[i].x;
00094         farr->y = f[i].y;
00095         farr->z = f[i].z;
00096         farr ++;
00097       } else {
00098         nonzero[i] = 0;
00099       }
00100     }
00101     msg_cur = (char *) farr;      
00102   }
00103 
00104   delete msg;
00105   return msg_buf;
00106 }
00107 
00108 ProxyResultMsg* ProxyResultMsg::unpack(void *ptr) {
00109 
00110   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyResultMsg));
00111   ProxyResultMsg *msg = new (vmsg) ProxyResultMsg;
00112   char *msg_cur = (char*)ptr;
00113 
00114   CmiMemcpy((void*)(&(msg->node)),(void*)msg_cur,sizeof(msg->node));
00115   msg_cur += sizeof(msg->node);
00116   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00117   msg_cur += sizeof(msg->patch);
00118   int j;
00119   for ( j = 0; j < Results::maxNumForces; ++j ) {
00120     int array_size = *(int *) msg_cur;
00121     msg_cur += sizeof(array_size);
00122     msg->forceList[j] = &(msg->forceListInternal[j]);
00123     msg->forceList[j]->resize(array_size);
00124     char *nonzero = msg_cur;
00125     msg_cur += array_size * sizeof(char);    
00126     msg_cur = (char *)ALIGN_8 (msg_cur);
00127     Vector* farr = (Vector *) msg_cur;
00128     Force* f = msg->forceList[j]->begin();
00129     for ( int i = 0; i < array_size; ++i ) {
00130       if ( nonzero[i] ) {
00131         f[i].x = farr->x;
00132         f[i].y = farr->y;
00133         f[i].z = farr->z;
00134         farr++;
00135       } else {
00136         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00137       }
00138     }    
00139     msg_cur = (char *) farr;
00140   }
00141 
00142   CkFreeMsg(ptr);
00143   return msg;
00144 }
00145 
00146 ProxyResultVarsizeMsg *ProxyResultVarsizeMsg::getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls){
00147 
00148     //1. decide the length of forceArr and iszero field.
00149     int tmpLen[Results::maxNumForces];
00150     int iszeroLen = 0;
00151     for (int i=0; i<Results::maxNumForces; i++){
00152         tmpLen[i] = fls[i].size();
00153         iszeroLen += tmpLen[i];
00154     }
00155     char *tmpIszero = new char[iszeroLen];
00156     char *iszeroPtr = tmpIszero;
00157     int fArrLen = 0;
00158     for(int i=0; i<Results::maxNumForces; i++) {        
00159         Force *fiPtr = fls[i].begin();
00160         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {         
00161             if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
00162                 *iszeroPtr=0;
00163                 fArrLen++;
00164             }else{
00165                 *iszeroPtr=1;
00166             }            
00167         }
00168     }
00169 
00170     //2. Ready to create the msg, and set all fields
00171     ProxyResultVarsizeMsg *retmsg = new(fArrLen, iszeroLen, prioSize)ProxyResultVarsizeMsg;
00172     retmsg->node = nid;
00173     retmsg->patch = pid;
00174     memcpy(retmsg->flLen, tmpLen, sizeof(int)*Results::maxNumForces);
00175     iszeroPtr = tmpIszero;
00176     Force *forcePtr = retmsg->forceArr;
00177     for(int i=0; i<Results::maxNumForces; i++) {        
00178         Force *fiPtr = fls[i].begin();
00179         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
00180             if((*iszeroPtr)!=1) {
00181                 forcePtr->x = fiPtr->x;
00182                 forcePtr->y = fiPtr->y;
00183                 forcePtr->z = fiPtr->z;
00184                 forcePtr++;
00185             }            
00186         }
00187     }
00188     memcpy(retmsg->isZero, tmpIszero, sizeof(char)*iszeroLen);
00189     delete [] tmpIszero;
00190     return retmsg;
00191 }
00192 
00193 ProxyNodeAwareSpanningTreeMsg *ProxyNodeAwareSpanningTreeMsg::getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size){
00194     int numAllPes = 0;
00195     for(int i=0; i<size; i++) {
00196         numAllPes += tree[i].numPes;
00197     }
00198     ProxyNodeAwareSpanningTreeMsg *retmsg = new(size, numAllPes, 0) ProxyNodeAwareSpanningTreeMsg;
00199     retmsg->patch = pid;
00200     retmsg->procID = nid;
00201     retmsg->numNodesWithProxies = size;
00202     int *pAllPes = retmsg->allPes;
00203     for(int i=0; i<size; i++) {
00204         retmsg->numPesOfNode[i] = tree[i].numPes;
00205         for(int j=0; j<tree[i].numPes; j++) {
00206             *pAllPes = tree[i].peIDs[j];
00207             pAllPes++;
00208         }
00209     }
00210     return retmsg;
00211 }
00212 
00213 //Only available when macro PROCTRACE_DEBUG is defined
00214 void ProxyNodeAwareSpanningTreeMsg::printOut(char *tag){
00215 #ifdef PROCTRACE_DEBUG
00216     DebugFileTrace *dft = DebugFileTrace::Object();
00217     dft->openTrace();
00218     const char *patchname = "ProxyPatch";
00219     if(procID == CkMyPe()) patchname = "HomePatch";
00220     dft->writeTrace("%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes\n", 
00221                     tag, patchname, patch, CkMyPe(), CkMyNode(), procID, numNodesWithProxies);
00222     if(numNodesWithProxies==0) {
00223         dft->closeTrace();
00224         return;
00225     }
00226     dft->writeTrace("%s: ===%d===pes/node: ", tag, patch);
00227     for(int i=0; i<numNodesWithProxies; i++) {
00228         dft->writeTrace("%d ", numPesOfNode[i]);
00229     }
00230     dft->writeTrace("\n%s: ===%d===pe list: ", tag, patch);
00231     int *p = allPes;
00232     for(int i=0; i<numNodesWithProxies; i++) {
00233         for(int j=0; j<numPesOfNode[i]; j++) {
00234             dft->writeTrace("%d ", *p);
00235             p++;
00236         }
00237     }
00238     dft->writeTrace("\n");    
00239     dft->closeTrace();
00240 #endif
00241 }
00242 
00243 // for spanning tree
00244 ProxyCombinedResultRawMsg* ProxyCombinedResultMsg::toRaw(ProxyCombinedResultMsg *msg) {
00245   int totalFLLen=0;
00246   int nonzero_count = 0;
00247   int nodeSize = msg->nodes.size();
00248   for (int j = 0; j < Results::maxNumForces; ++j ) {
00249         int array_size = msg->forceList[j]->size();
00250     totalFLLen +=  array_size;
00251     Force* f = msg->forceList[j]->begin();
00252     for ( int i = 0; i < array_size; ++i ) {
00253       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00254     }
00255   }
00256 
00257   ProxyCombinedResultRawMsg *msg_buf = new(nodeSize, totalFLLen, nonzero_count, PRIORITY_SIZE)ProxyCombinedResultRawMsg;
00258   //Copy envelope stuff
00259   {
00260          envelope *oenv = UsrToEnv(msg);
00261          envelope *nenv = UsrToEnv(msg_buf);
00262          CmiMemcpy(nenv->getPrioPtr(), oenv->getPrioPtr(), nenv->getPrioBytes());
00263   }
00264 
00265   msg_buf->nodeSize = nodeSize;
00266   for (int i=0; i<nodeSize; i++) {
00267     msg_buf->nodes[i] = msg->nodes[i];
00268   }
00269   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00270   msg_buf->destPe = msg->destPe;
00271   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00272   msg_buf->isFromImmMsgCall = msg->isFromImmMsgCall;
00273   #endif
00274   #endif
00275   msg_buf->patch = msg->patch;
00276 
00277   Force *farr = msg_buf->forceArr;
00278   char *isNonZeroPtr = msg_buf->isForceNonZero;
00279   for ( int j = 0; j < Results::maxNumForces; ++j ) {
00280         int array_size = msg->forceList[j]->size();
00281     msg_buf->flLen[j] = array_size;
00282     Force* f = msg->forceList[j]->begin();
00283     for ( int i = 0; i < array_size; ++i , isNonZeroPtr++) {
00284       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00285         *isNonZeroPtr = 1;
00286                 farr->x  =  f[i].x;
00287         farr->y  =  f[i].y;
00288         farr->z  =  f[i].z;
00289         farr ++;
00290       } else {
00291         *isNonZeroPtr = 0;
00292       }
00293     }
00294   }
00295   return msg_buf;
00296 }
00297 
00298 ProxyCombinedResultMsg* ProxyCombinedResultMsg::fromRaw(ProxyCombinedResultRawMsg *ptr) {
00299 
00300   //CkPrintf("[%d]: unpacking: plainData=%p\n", CkMyPe(), ptr->plainData);      
00301 
00302   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyCombinedResultMsg));
00303   ProxyCombinedResultMsg *msg = new (vmsg) ProxyCombinedResultMsg;
00304 
00305   for (int i=0; i<ptr->nodeSize; i++) {
00306     msg->nodes.add(ptr->nodes[i]);
00307   }
00308   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00309   msg->destPe = ptr->destPe;
00310   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00311   msg->isFromImmMsgCall = ptr->isFromImmMsgCall;
00312   #endif
00313   #endif
00314   msg->patch = ptr->patch;
00315 
00316   char *nonzero = ptr->isForceNonZero;
00317   Force* farr = ptr->forceArr;
00318 
00319   for ( int j = 0; j < Results::maxNumForces; ++j ) {
00320     int array_size = ptr->flLen[j];
00321     msg->forceList[j] = &(msg->forceListInternal[j]);
00322     msg->forceList[j]->resize(array_size);
00323     Force* f = msg->forceList[j]->begin();
00324 
00325     for ( int i = 0; i < array_size; ++i, nonzero++ ) {
00326       if ( *nonzero ) {
00327                 f[i].x = farr->x;
00328                 f[i].y = farr->y;
00329                 f[i].z = farr->z;
00330                 farr++;
00331       } else {
00332         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00333       }
00334     }
00335   }
00336 
00337   delete ptr;
00338   return msg;
00339 }
00340 
00341 // class static
00342 int ProxyMgr::nodecount = 0;
00343 
00344 ProxyMgr::ProxyMgr() { 
00345   if (CkpvAccess(ProxyMgr_instance)) {
00346     NAMD_bug("Tried to create ProxyMgr twice.");
00347   }
00348   CkpvAccess(ProxyMgr_instance) = this;
00349 }
00350 
00351 ProxyMgr::~ProxyMgr() { 
00352   removeProxies();
00353   CkpvAccess(ProxyMgr_instance) = NULL;
00354 }
00355 
00356 
00357 void ProxyMgr::setSendSpanning() {
00358   if(CkMyRank()!=0) return; 
00359   proxySendSpanning = 1;
00360 }
00361 
00362 int ProxyMgr::getSendSpanning() {
00363   return proxySendSpanning;
00364 }
00365 
00366 void ProxyMgr::setRecvSpanning() {
00367   if(CkMyRank()!=0) return;
00368   proxyRecvSpanning = 1;
00369 }
00370 
00371 int ProxyMgr::getRecvSpanning() {
00372   return proxyRecvSpanning;
00373 }
00374 
00375 void ProxyMgr::setProxyTreeBranchFactor(int dim){
00376     if(CkMyRank()!=0) return;
00377     proxySpanDim = dim;
00378 }
00379 
00380 ProxyTree &ProxyMgr::getPtree() {
00381   return ptree;
00382 }
00383 
00384 void ProxyMgr::removeProxies(void)
00385 {
00386   ProxySetIter pi(proxySet);
00387   for ( pi = pi.begin(); pi != pi.end(); pi++)
00388   {
00389     delete pi->proxyPatch;
00390   }
00391   proxySet.clear();
00392 }
00393 
00394 void ProxyMgr::removeUnusedProxies(void)
00395 {
00396   ResizeArray<PatchID> toDelete;
00397   ProxySetIter pi(proxySet);
00398   for ( pi = pi.begin(); pi != pi.end(); pi++)
00399   {
00400     if ( pi->proxyPatch->getNumComputes() == 0 ) {
00401       toDelete.add(pi->patchID);
00402       //fprintf(stderr, "Proxy Deleted Patch %d Proc %d", pi->patchID, CkMyPe());
00403     }
00404   }
00405   PatchID *pidi = toDelete.begin();
00406   for ( ; pidi != toDelete.end(); ++pidi ) {
00407     removeProxy(*pidi);
00408   }
00409 }
00410 
00411 // Figure out which proxies we need and create them
00412 void ProxyMgr::createProxies(void)
00413 {
00414   // Delete the old proxies.
00415   removeProxies();
00416 
00417   PatchMap *patchMap = PatchMap::Object();
00418   int numPatches = patchMap->numPatches();
00419   int myNode = CkMyPe();
00420   enum PatchFlag { Unknown, Home, NeedProxy };
00421   int *patchFlag = new int[numPatches]; 
00422   int i, j;
00423 
00424   // Note all home patches.
00425   for ( i = 0; i < numPatches; ++i )
00426   {
00427     patchFlag[i] = ( patchMap->node(i) == myNode ) ? Home : Unknown;
00428   }
00429 
00430   // Add all upstream neighbors.
00431   PatchID neighbors[PatchMap::MaxOneAway];
00432   PatchIDList basepids;
00433   patchMap->basePatchIDList(myNode,basepids);
00434   for ( i = 0; i < basepids.size(); ++i )
00435   {
00436     if ( patchMap->node(basepids[i]) != myNode ) {
00437         patchFlag[basepids[i]] = NeedProxy;
00438     }
00439     int numNeighbors = patchMap->upstreamNeighbors(basepids[i],neighbors);
00440     for ( j = 0; j < numNeighbors; ++j )
00441     {
00442       if ( ! patchFlag[neighbors[j]] ) {
00443         patchFlag[neighbors[j]] = NeedProxy;
00444       }
00445     }
00446   }
00447 
00448   // Check all patch-based compute objects.
00449   ComputeMap *computeMap = ComputeMap::Object();
00450   int nc = computeMap->numComputes();
00451   for ( i = 0; i < nc; ++i )
00452   {
00453 #ifdef NAMD_CUDA
00454     ComputeType t = computeMap->type(i);
00455     if ( t == computeNonbondedSelfType || t == computeNonbondedPairType )
00456       continue;
00457 #endif
00458     if ( computeMap->node(i) != myNode ) 
00459       continue;
00460     int numPid = computeMap->numPids(i);
00461     for ( j = 0; j < numPid; ++j )
00462     {
00463       int pid = computeMap->pid(i,j);
00464       if ( ! patchFlag[pid] ) {
00465         patchFlag[pid] = NeedProxy;
00466       }
00467     }
00468   }
00469   // Create proxy list
00470   for ( i = 0; i < numPatches; ++i ) {
00471     if ( patchFlag[i] == NeedProxy )
00472     { // create proxy patch
00473       ProxyPatch *proxy = new ProxyPatch(i);
00474       proxySet.add(ProxyElem(i, proxy));
00475       patchMap->registerPatch(i, proxy);
00476     }
00477   }
00478   delete[] patchFlag;
00479 }
00480 
00481 void
00482 ProxyMgr::createProxy(PatchID pid) {
00483   Patch *p = PatchMap::Object()->patch(pid);
00484   if (!p) {
00485      DebugM(4,"createProxy("<<pid<<")\n");
00486      ProxyPatch *proxy = new ProxyPatch(pid);
00487      proxySet.add(ProxyElem(pid,proxy));
00488      PatchMap::Object()->registerPatch(pid,proxy);
00489   }
00490   else {
00491      DebugM(4,"createProxy("<<pid<<") found " << p->getPatchID() << "\n");
00492   }
00493     
00494 }
00495 
00496 void
00497 ProxyMgr::removeProxy(PatchID pid) {
00498   ProxyElem *p = proxySet.find(ProxyElem(pid));
00499   if (p) { 
00500     PatchMap::Object()->unregisterPatch(pid,p->proxyPatch);
00501     delete p->proxyPatch;
00502     proxySet.del(ProxyElem(pid));
00503     // iout << iINFO << "Removing unused proxy " << pid << " on " << iPE << ".\n" << endi;
00504   }
00505 }
00506   
00507 void
00508 ProxyMgr::registerProxy(PatchID pid) {
00509   // determine which node gets message
00510   NodeID node = PatchMap::Object()->node(pid);
00511 
00512   RegisterProxyMsg *msg = new RegisterProxyMsg;
00513 
00514   msg->node=CkMyPe();
00515   msg->patch = pid;
00516 
00517   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00518   cp[node].recvRegisterProxy(msg);
00519 }
00520 
00521 void
00522 ProxyMgr::recvRegisterProxy(RegisterProxyMsg *msg) {
00523   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00524   homePatch->registerProxy(msg); // message deleted in registerProxy()
00525 }
00526 
00527 void
00528 ProxyMgr::unregisterProxy(PatchID pid) {
00529   // determine which node gets message
00530   NodeID node = PatchMap::Object()->node(pid);
00531 
00532   UnregisterProxyMsg *msg = new UnregisterProxyMsg;
00533 
00534   msg->node=CkMyPe();
00535   msg->patch = pid;
00536 
00537   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00538   cp[node].recvUnregisterProxy(msg);
00539 }
00540 
00541 void
00542 ProxyMgr::recvUnregisterProxy(UnregisterProxyMsg *msg) {
00543   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00544   homePatch->unregisterProxy(msg); // message deleted in registerProxy()
00545 }
00546 
00547 void 
00548 ProxyMgr::buildProxySpanningTree()
00549 {
00550   PatchIDList pids;
00551   if (!CkMyPe()) iout << iINFO << "Building spanning tree ... send: " << proxySendSpanning << " recv: " << proxyRecvSpanning 
00552       << " with branch factor " << proxySpanDim <<"\n" << endi;
00553   PatchMap::Object()->homePatchIDList(pids);
00554   for (int i=0; i<pids.size(); i++) {
00555     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00556     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00557 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00558     home->buildNodeAwareSpanningTree();
00559 #else
00560     home->buildSpanningTree();
00561 #endif
00562   }
00563 }
00564 
00565 void 
00566 ProxyMgr::buildProxySpanningTree2()
00567 {
00568 #if 0
00569   //The homePatchIDList is an expensive as it goes through
00570   //every patch ids.
00571   PatchIDList pids;
00572   PatchMap::Object()->homePatchIDList(pids);
00573   for (int i=0; i<pids.size(); i++) {
00574     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00575     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00576     home->sendProxies();
00577   }
00578 #else
00579   HomePatchList *hpl = PatchMap::Object()->homePatchList();
00580   HomePatchListIter iter(*hpl);
00581   for(iter=iter.begin(); iter!=iter.end(); iter++) {
00582           HomePatch *home = iter->patch;
00583           home->sendProxies();
00584   }
00585 #endif
00586 }
00587 
00588 void 
00589 ProxyMgr::sendProxies(int pid, int *list, int n)
00590 {
00591   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00592   cp[0].recvProxies(pid, list, n);
00593 }
00594 
00595 //The value defines the max number of intermediate proxies (acting
00596 //as the node to relay proxy msgs to children) allowed to reside 
00597 //on a physical node for proxy spanning tree
00598 #define MAX_INTERNODE 1
00599 
00600 //Only for debug
00601 static void outputProxyTree(ProxyTree &ptree, int np){
00602         FILE *ofp = fopen("patch_proxylist.txt", "w");
00603         std::vector<int> plist;
00604         for(int i=0; i<np; i++) {
00605                 fprintf(ofp, "%d: ", i);
00606                 int listlen = ptree.proxylist[i].size();
00607                 fprintf(ofp, "#%d ", listlen);
00608                 plist.clear();
00609                 for(int j=0; j<listlen; j++) {
00610                         plist.push_back(ptree.proxylist[i][j]);
00611                 }
00612                 std::sort(plist.begin(), plist.end());
00613                 for(int j=0; j<listlen; j++) {
00614                         fprintf(ofp, "%d ", plist[j]);
00615                 }
00616                 fprintf(ofp, "\n");
00617         }
00618         fclose(ofp);
00619 }
00620 
00621 // only on PE 0
00622 void 
00623 ProxyMgr::recvProxies(int pid, int *list, int n)
00624 {
00625   int nPatches = PatchMap::Object()->numPatches();
00626   if (ptree.proxylist == NULL)
00627     ptree.proxylist = new NodeIDList[nPatches];
00628   ptree.proxylist[pid].resize(n);
00629   for (int i=0; i<n; i++)
00630     ptree.proxylist[pid][i] = list[i];
00631   ptree.proxyMsgCount ++;
00632   if (ptree.proxyMsgCount == nPatches) {
00633         //debug
00634         //outputProxyTree(ptree, nPatches);
00635 
00636     ptree.proxyMsgCount = 0;
00637     // building and sending of trees is done in two steps now
00638     // so that the building step can be shifted to the load balancer
00639 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00640     buildNodeAwareSpanningTree0();
00641 #else
00642     buildSpanningTree0();    
00643 #endif
00644     sendSpanningTrees();
00645   }
00646 }
00647 
00648 void ProxyMgr::recvPatchProxyInfo(PatchProxyListMsg *msg){
00649         int nPatches = PatchMap::Object()->numPatches();
00650         if(ptree.proxylist == NULL) ptree.proxylist = new NodeIDList[nPatches];
00651         CmiAssert(msg->numPatches == nPatches);
00652         int peIdx = 0;
00653         for(int i=0; i<nPatches; i++) {
00654                 int pid = msg->patchIDs[i];
00655                 int plen = msg->proxyListLen[i];
00656                 ptree.proxylist[pid].resize(plen);
00657                 for(int j=0; j<plen; j++) {
00658                         ptree.proxylist[pid][j] = msg->proxyPEs[peIdx++];
00659                 }               
00660         }
00661         delete msg;
00662         
00663         //debug
00664         //outputProxyTree(ptree, nPatches);
00665 
00666         ptree.proxyMsgCount = 0;
00667     // building and sending of trees is done in two steps now
00668     // so that the building step can be shifted to the load balancer
00669 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00670     buildNodeAwareSpanningTree0();
00671 #else
00672     buildSpanningTree0();
00673 #endif
00674     sendSpanningTrees();
00675 }
00676 
00677 //
00678 // XXX static and global variables are unsafe for shared memory builds.
00679 // The global and static vars should be eliminated.  
00680 // Unfortunately, the routines that use these below are actually 
00681 // in use in NAMD.
00682 //
00683 extern double *cpuloads;
00684 static int *procidx = NULL;
00685 static double averageLoad = 0.0;
00686 
00687 static int compLoad(const void *a, const void *b)
00688 {
00689   int i1 = *(int *)a;
00690   int i2 = *(int *)b;
00691   double d1 = cpuloads[i1];
00692   double d2 = cpuloads[i2];
00693   if (d1 < d2) 
00694     return 1;
00695   else if (d1 == d2) 
00696     return 0;
00697   else 
00698     return -1;
00699   // sort from high to low
00700 }
00701 
00702 static void processCpuLoad()
00703 {
00704   int i;
00705   if (!procidx) {
00706     procidx = new int[CkNumPes()];
00707   }
00708   for (i=0; i<CkNumPes(); i++) procidx[i] = i;
00709   qsort(procidx, CkNumPes(), sizeof(int), compLoad);
00710 
00711   double averageLoad = 0.0;
00712   for (i=0; i<CkNumPes(); i++) averageLoad += cpuloads[i];
00713   averageLoad /= CkNumPes();
00714 //  iout << "buildSpanningTree1: no intermediate node on " << procidx[0] << " " << procidx[1] << endi;
00715 
00716 }
00717 
00718 static int noInterNode(int p)
00719 {
00720   int exclude = 0;
00721   if(CkNumPes()<1025)
00722     exclude = 5;
00723   else if(CkNumPes()<4097)
00724     exclude = 10;
00725   else if(CkNumPes()<8193)
00726     exclude = 40;
00727   else if(CkNumPes()<16385)
00728     exclude = 40;
00729   else
00730     exclude = 80;
00731   for (int i=0; i<exclude; i++) if (procidx[i] == p) return 1;
00732 //  if (cpuloads[p] > averageLoad) return 1;
00733   return 0;
00734 }
00735 
00736 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00737 //only on PE 0
00738 void ProxyMgr::buildNodeAwareSpanningTree0(){
00739         CkPrintf("Info: build node-aware spanning tree with send: %d, recv: %d with branch factor %d\n", 
00740                          proxySendSpanning, proxyRecvSpanning, proxySpanDim);
00741     int numPatches = PatchMap::Object()->numPatches();
00742     if (ptree.naTrees == NULL) ptree.naTrees = new proxyTreeNodeList[numPatches];
00743     for (int pid=0; pid<numPatches; pid++)     
00744         buildSinglePatchNodeAwareSpanningTree(pid, ptree.proxylist[pid], ptree.naTrees[pid]);
00745        
00746 
00747     //Debug
00748     //printf("#######################Naive ST#######################\n");
00749     //printProxySpanningTree();
00750 
00751     //Now the naive spanning tree has been constructed and stored in oneNATree;
00752     //Afterwards, some optimizations on this naive spanning tree could be done.
00753     //except the first element as the tree root always contains the processor
00754     //that has home patch
00755 
00756     //1st Optimization: reduce intermediate nodes as much as possible. In details,
00757     //the optimal case is that on a single physical smp node, there should be no
00758     //two proxies who act as the intermediate nodes to pass information to childrens
00759     //in the spanning tree. E.g, for patch A's proxy spanning tree, it has a node X as
00760     //its intermediate node. However, for patch B's, it also has a node X as its intermediate
00761     //node. We should avoid this situation as node X becomes the bottleneck as it has twice
00762     //amount of work to process now.
00763     //Step1: foward to the first patch that has proxies
00764     //Now proxyNodeMap records the info that how many intermediate nodes on a node
00765         //each element indiates the number of proxies residing on this node    
00766     int pid=0;
00767     for(;pid<numPatches; pid++) {
00768         if(ptree.proxylist[pid].size()>0) break;
00769     }
00770     if(pid==numPatches) {
00771         return;
00772     }
00773     int *proxyNodeMap = new int[CkNumNodes()];
00774     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00775    {
00776     proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00777     //If a node is an intermediate node, then its idx should satisfy
00778     //idx*proxySpanDim + 1 < onePatchT.size()
00779     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00780     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00781         int nid = onePatchT.item(i).nodeID;
00782         proxyNodeMap[nid]++;
00783     }
00784    }
00785     //Step2: iterate over each patch's proxy spanning tree to adjust
00786     //the tree node positions. The bad thing here is that it may involve
00787     //many memory allocations and deallocation for small-size (~100bytes)
00788     //chunks.
00789     pid++; //advance to the next patch
00790     for(; pid<numPatches; pid++) {
00791         if(ptree.proxylist[pid].size()==0) continue;
00792         proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00793         int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00794         for(int i=1; i<=lastInterNodeIdx; i++) {
00795             int nid = onePatchT.item(i).nodeID;
00796             if(proxyNodeMap[nid]<MAX_INTERNODE) {
00797                 proxyNodeMap[nid]++;
00798                 continue;
00799             }
00800             //the position is occupied, so search the children
00801             //nodes to see whether there's one to swap this node
00802             //if not found, find the first position that has smallest
00803             //amount of nodes.
00804             int leastIdx = -1;
00805             int leastAmount = ~(1<<31);
00806             //iterate children nodes
00807             int swapPos;
00808             for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.size(); swapPos++) {
00809                 int chiNId = onePatchT.item(swapPos).nodeID;
00810                 if(proxyNodeMap[chiNId]<MAX_INTERNODE) {
00811                     break;
00812                 }
00813                 if(proxyNodeMap[chiNId]<leastAmount) {
00814                     leastAmount = proxyNodeMap[chiNId];
00815                     leastIdx = swapPos;
00816                 }
00817             }
00818             if(swapPos==onePatchT.size()) {
00819                 CmiAssert(leastIdx!=-1); //because the above loop at least executes once
00820                 //indicate we cannot find a physical node which
00821                 //still allows the intermediate proxy.
00822                 swapPos = leastIdx;
00823             }
00824             //swap the current proxy tree node "i" with node "swapPos"
00825             proxyTreeNode *curNode = &onePatchT.item(i);
00826             proxyTreeNode *swapNode = &onePatchT.item(swapPos);
00827             proxyNodeMap[swapNode->nodeID]++; //update the proxyNodeMap record
00828             int tmp = curNode->nodeID;
00829             curNode->nodeID = swapNode->nodeID;
00830             swapNode->nodeID = tmp;
00831             tmp = curNode->numPes;
00832             ALLOCA(int,tmpPes,tmp);
00833             memcpy(tmpPes, curNode->peIDs, sizeof(int)*tmp);
00834             delete [] curNode->peIDs;
00835             curNode->numPes = swapNode->numPes;
00836             curNode->peIDs = new int[swapNode->numPes];
00837             memcpy(curNode->peIDs, swapNode->peIDs, sizeof(int)*swapNode->numPes);
00838             swapNode->numPes = tmp;
00839             delete [] swapNode->peIDs;
00840             swapNode->peIDs = new int[tmp];
00841             memcpy(swapNode->peIDs, tmpPes, sizeof(int)*tmp);                      
00842         }
00843     }
00844     delete [] proxyNodeMap;    
00845 
00846     //Debug
00847     //printf("#######################After 1st optimization#######################\n");
00848     //printProxySpanningTree();
00849 
00850     //2nd optimization: similar to the 1st optimization but now thinking in
00851     //the core level. If we cannot avoid place two intermediate proxy
00852     //on the same node, we'd better to place them in different cores inside
00853     //the node
00854     if(CmiMyNodeSize()==1) {
00855         //No need to perform the second optimization as every node has only 1 core
00856         return;
00857     }
00858     //Step1: forward to the first patch that has proxies
00859     pid=0;
00860     for(;pid<numPatches; pid++) {
00861         if(ptree.proxylist[pid].size()>0) break;
00862     }
00863     if(pid==numPatches) {
00864         return;
00865     }
00866     int *proxyCoreMap = new int[CkNumPes()];
00867     memset(proxyCoreMap, 0, sizeof(int)*CkNumPes());
00868    {
00869     proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00870     //If a node is an intermediate node, then its idx should satisfy
00871     //idx*proxySpanDim + 1 < onePatchT.size()
00872     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00873     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00874         int rootProcID = onePatchT.item(i).peIDs[0];
00875         proxyCoreMap[rootProcID]++;
00876     }
00877    }
00878     //Step2: iterate over each patch's proxy spanning tree to adjust
00879     //the root's position of intermediate proxies.
00880     pid++; //advance to the next patch
00881     for(; pid<numPatches; pid++) {
00882         if(ptree.proxylist[pid].size()==0) continue;
00883         proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00884         int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00885         for(int i=1; i<=lastInterNodeIdx; i++) {
00886             proxyTreeNode *curNode = &onePatchT.item(i);
00887             int rootProcID = curNode->peIDs[0];
00888             if(curNode->numPes==1 || proxyCoreMap[rootProcID]<MAX_INTERNODE){
00889                 //if this node contains only 1 core, then we have to leave it as it is
00890                 //because there are no other cores in the same node that could be used to
00891                 //adjust
00892                 proxyCoreMap[rootProcID]++;
00893                 continue;
00894             }
00895             
00896             //foound more than MAX_INTERNODE intermediate proxies on the same core,
00897             //adjust the root id of the core of this proxy tree node
00898             int leastIdx = -1;
00899             int leastAmount = ~(1<<31);
00900             //iterate children nodes
00901             int swapPos;
00902             
00903             for(swapPos=1; swapPos<curNode->numPes; swapPos++) {
00904                 int otherCoreID = curNode->peIDs[swapPos];
00905                 if(proxyCoreMap[otherCoreID]<MAX_INTERNODE) {
00906                     break;
00907                 }
00908                 if(proxyCoreMap[otherCoreID]<leastAmount) {
00909                     leastAmount = proxyCoreMap[otherCoreID];
00910                     leastIdx = swapPos;
00911                 }
00912             }
00913             if(swapPos==curNode->numPes) {
00914                 CmiAssert(leastIdx!=-1); //because the above loop body must execute at least once
00915                 //indicate we cannot find a physical node which
00916                 //still allows the intermediate proxy.
00917                 swapPos = leastIdx;
00918             }
00919             int tmp = curNode->peIDs[swapPos];
00920             curNode->peIDs[swapPos] = curNode->peIDs[0];
00921             curNode->peIDs[0] = tmp;
00922             proxyCoreMap[tmp]++;
00923         }      
00924     }
00925 
00926     delete proxyCoreMap;
00927 
00928     //Debug
00929     //printf("#######################After 2nd optimization#######################\n");
00930     //printProxySpanningTree();
00931 }
00932 
00933 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(PatchID pid, NodeIDList &proxyList, 
00934                                                      proxyTreeNodeList &ptnTree){       
00935     int numProxies = proxyList.size();
00936     if (numProxies == 0) {
00937         //CkPrintf ("This is sheer evil in building node-aware spanning tree!\n\n");            
00938         return;
00939     }        
00940  
00941     //usually the #proxies is at most 62 (considering 2-away in all dimensions)
00942     //so the access in proxyNodeMap and proxyTreeIdx is at most log2(62)=8 if
00943     //all proxies are in different nodes
00944     //could be better than a CkNumNodes() array in that cache perf. is better
00945     //because of the reduced memory footprint -Chao Mei
00946     std::map<int, int> proxyNodeMap; //<node id, numProxies>    
00947     std::vector<int> proxyNodeIDs;
00948     std::map<int, int> proxyTreeIdx; //<node id, idx in proxyNodeIDs>
00949     
00950     //the processor id of home patch
00951     int hpProcID = PatchMap::Object()->node(pid);
00952     int hpNodeID = CkNodeOf(hpProcID);
00953     proxyNodeMap[hpNodeID]=1;
00954     proxyTreeIdx[hpNodeID]=0;
00955     proxyNodeIDs.push_back(hpNodeID);
00956     //proxyNodeList[0] = hpNodeID;
00957     //int numNodesWithProxies = 1;
00958     
00959     for(int i=0; i<numProxies; i++) {
00960         int procId = proxyList[i];
00961         int nodeId = CkNodeOf(procId);
00962         std::map<int, int>::iterator it=proxyNodeMap.find(nodeId);
00963         if(it==proxyNodeMap.end()) {
00964             proxyNodeMap[nodeId] = 1;
00965             proxyTreeIdx[nodeId] = proxyNodeIDs.size();
00966             proxyNodeIDs.push_back(nodeId);
00967         }else{
00968             proxyNodeMap[nodeId]++;
00969         }        
00970     }
00971     proxyTreeNodeList &oneNATree = ptnTree;   // spanning tree
00972     int numNodesWithProxies = proxyNodeIDs.size();
00973     oneNATree.resize(numNodesWithProxies);
00974     //initialize oneNATree
00975     for(int i=0; i<numNodesWithProxies; i++) {
00976         proxyTreeNode *oneNode = &oneNATree.item(i);
00977         delete [] oneNode->peIDs;
00978         oneNode->nodeID = proxyNodeIDs[i];
00979         oneNode->peIDs = new int[proxyNodeMap[oneNode->nodeID]];                        
00980         oneNode->numPes = 0; //initially set to zero as used for incrementing later
00981     }
00982     
00983     //set up the tree root which contains the home patch processor
00984     proxyTreeNode *rootnode = &oneNATree.item(0);
00985     rootnode->peIDs[0] = hpProcID;
00986     rootnode->numPes++;
00987     
00988     for(int i=0; i<numProxies; i++) {
00989         int procId = proxyList[i];
00990         int nodeId = CkNodeOf(procId);
00991         int idxInTree = proxyTreeIdx[nodeId];
00992         CmiAssert(idxInTree>=0 && idxInTree<numNodesWithProxies);
00993         proxyTreeNode *oneNode = &oneNATree.item(idxInTree);
00994         oneNode->peIDs[oneNode->numPes] = procId;
00995         oneNode->numPes++;
00996     }
00997 }
00998 #else //branch of NODEAWARE_PROXY_SPANNINGTREE
00999 // only on PE 0
01000 void 
01001 ProxyMgr::buildSpanningTree0()
01002 {
01003         CkPrintf("Info: build spanning tree with send: %d, recv: %d with branch factor %d\n", 
01004                          proxySendSpanning, proxyRecvSpanning, proxySpanDim);
01005 
01006   int i;
01007 
01008   processCpuLoad();
01009 
01010   int *numPatchesOnNode = new int[CkNumPes()];
01011   int numNodesWithPatches = 0;
01012   for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
01013   int numPatches = PatchMap::Object()->numPatches();
01014   for (i=0; i<numPatches; i++) {
01015     int node = PatchMap::Object()->node(i);
01016     numPatchesOnNode[node]++;
01017     if (numPatchesOnNode[node] == 1)
01018       numNodesWithPatches ++;
01019   }
01020   int patchNodesLast =
01021     ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
01022   int *ntrees = new int[CkNumPes()];
01023   for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
01024   if (ptree.trees == NULL) ptree.trees = new NodeIDList[numPatches];
01025   for (int pid=0; pid<numPatches; pid++) 
01026   {
01027     int numProxies = ptree.proxylist[pid].size();
01028     if (numProxies == 0) {
01029       //CkPrintf ("This is sheer evil!\n\n");
01030       //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01031       delete [] ntrees;
01032       delete [] numPatchesOnNode;
01033       return;
01034     }
01035     NodeIDList &tree = ptree.trees[pid];   // spanning tree
01036     NodeIDList oldtree;  oldtree.swap(tree);
01037     tree.resize(numProxies+1);
01038     tree.setall(-1);
01039     tree[0] = PatchMap::Object()->node(pid);
01040     int s=1, e=numProxies;
01041     int nNonPatch = 0;
01042     int treesize = 1;
01043     int pp;
01044 
01045     // keep tree persistent for non-intermediate nodes
01046     for (pp=0; pp<numProxies; pp++) {
01047       int p = ptree.proxylist[pid][pp];
01048       int oldindex = oldtree.find(p);
01049       if (oldindex != -1 && oldindex <= numProxies) {
01050         int isIntermediate = (oldindex*proxySpanDim+1 <= numProxies);
01051         if (!isIntermediate) {
01052           tree[oldindex] = p;
01053         }
01054         else if (ntrees[p] < MAX_INTERNODE) {
01055           tree[oldindex] = p;
01056           ntrees[p] ++;
01057         }
01058       }
01059     }
01060 
01061     for (pp=0; pp<numProxies; pp++) {
01062       int p = ptree.proxylist[pid][pp];              // processor number
01063       if (tree.find(p) != -1) continue;        // already used
01064       treesize++;
01065       if (patchNodesLast && numPatchesOnNode[p] ) {
01066         while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01067         tree[e] = p;
01068         int isIntermediate = (e*proxySpanDim+1 <= numProxies);
01069         if (isIntermediate) ntrees[p]++;
01070       }
01071       else {
01072         while (tree[s] != -1) { s++; if (s==numProxies+1) s = 1; }
01073         int isIntermediate = (s*proxySpanDim+1 <= numProxies);
01074         if (isIntermediate && (ntrees[p] >= MAX_INTERNODE || noInterNode(p))) {   // TOO MANY INTERMEDIATE TREES
01075         //if (isIntermediate && ntrees[p] >= MAX_INTERNODE)    // TOO MANY INTERMEDIATE TREES
01076           while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01077           tree[e] = p;
01078           isIntermediate = (e*proxySpanDim+1 <= numProxies);
01079           if (isIntermediate) ntrees[p]++;
01080         }
01081         else {
01082           tree[s] = p;
01083           nNonPatch++;
01084           if (isIntermediate) ntrees[p]++;
01085         }
01086       }
01087     }
01088     // send homepatch's proxy tree
01089     if(ptree.sizes)
01090       ptree.sizes[pid] = treesize;
01091     //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, &tree[0], treesize);
01092   }
01093   /*for (i=0; i<CkNumPes(); i++) {
01094     if (ntrees[i] > MAX_INTERNODE) iout << "Processor " << i << "has (guess) " << ntrees[i] << " intermediate nodes." << endi;
01095   }*/
01096   delete [] ntrees;
01097   delete [] numPatchesOnNode;
01098 }
01099 #endif
01100 
01101 void ProxyMgr::sendSpanningTrees()
01102 {
01103   int numPatches = PatchMap::Object()->numPatches();
01104   for (int pid=0; pid<numPatches; pid++) {
01105     int numProxies = ptree.proxylist[pid].size();
01106 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01107     if (numProxies == 0)
01108       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, NULL, 0);
01109     else {
01110       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, ptree.naTrees[pid].begin(), ptree.naTrees[pid].size());
01111     }
01112 #else
01113     if (numProxies == 0)
01114       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01115     else {
01116       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, ptree.trees[pid].begin(), ptree.trees[pid].size());
01117     }
01118 #endif
01119   }
01120 }
01121 
01122 void ProxyMgr::sendSpanningTreeToHomePatch(int pid, int *tree, int n)
01123 {
01124   CProxy_ProxyMgr cp(thisgroup);
01125   cp[PatchMap::Object()->node(pid)].recvSpanningTreeOnHomePatch(pid, tree, n);
01126 }
01127 
01128 void ProxyMgr::recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
01129 {
01130   HomePatch *p = PatchMap::Object()->homePatch(pid);
01131   p->recvSpanningTree(tree, n);
01132 }
01133 
01134 void ProxyMgr::sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
01135 {
01136   CProxy_ProxyMgr cp(thisgroup);
01137   ProxyNodeAwareSpanningTreeMsg *msg = ProxyNodeAwareSpanningTreeMsg::getANewMsg(pid, -1, tree, n);
01138   cp[PatchMap::Object()->node(pid)].recvNodeAwareSpanningTreeOnHomePatch(msg);
01139 }
01140 
01141 void ProxyMgr::recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
01142 {
01143   HomePatch *p = PatchMap::Object()->homePatch(msg->patch);
01144   p->recvNodeAwareSpanningTree(msg);
01145   delete msg;
01146 }
01147 
01148 void 
01149 ProxyMgr::sendSpanningTree(ProxySpanningTreeMsg *msg) {
01150   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01151   cp[msg->tree[0]].recvSpanningTree(msg);
01152 }
01153 
01154 void ProxyMgr::sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01155   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01156   int pe = msg->allPes[0]; //the root procID
01157 
01158 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01159   DebugFileTrace *dft = DebugFileTrace::Object();
01160   dft->openTrace();
01161   dft->writeTrace("PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->patch);
01162   dft->closeTrace();
01163 #endif
01164 
01165   cp[pe].recvNodeAwareSpanningTree(msg);
01166 }
01167 
01168 void 
01169 ProxyMgr::recvSpanningTree(ProxySpanningTreeMsg *msg) {
01170   int size = msg->tree.size();
01171   int *child = new int[proxySpanDim];
01172   int nChild = 0;
01173   int i;
01174   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01175   for (i=0; i<proxySpanDim; i++) {
01176     if (size > i+1) { child[i] = msg->tree[i+1]; nChild++; }
01177   }
01178   if (!PatchMap::Object()->homePatch(msg->patch)) {
01179     proxy->setSpanningTree(msg->node, child, nChild);
01180   }
01181 
01182   // build subtree and pass down
01183  if (nChild > 0) {
01184 
01185   nodecount ++;
01186   //if (nodecount > MAX_INTERNODE) 
01187   //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01188 
01189 //CkPrintf("[%d] %d:(%d) %d %d %d %d %d\n", CkMyPe(), msg->patch, size, msg->tree[0], msg->tree[1], msg->tree[2], msg->tree[3], msg->tree[4]);
01190   NodeIDList *tree = new NodeIDList[proxySpanDim];
01191   int level = 1, index=1;
01192   int done = 0;
01193   while (!done) {
01194     for (int n=0; n<nChild; n++) {
01195       if (done) break;
01196       for (int j=0; j<level; j++) {
01197        if (index >= size) { done = 1; break; }
01198        tree[n].add(msg->tree[index]);
01199        index++;
01200       }
01201     }
01202     level *=proxySpanDim;
01203   }
01204 
01205   ProxyMgr *proxyMgr = ProxyMgr::Object();
01206   for (i=0; i<proxySpanDim; i++) {
01207     if (tree[i].size()) {
01208       ProxySpanningTreeMsg *cmsg = new ProxySpanningTreeMsg;
01209       cmsg->patch = msg->patch;
01210       cmsg->node = CkMyPe();
01211       cmsg->tree.copy(tree[i]);  // copy data for thread safety
01212       proxyMgr->sendSpanningTree(cmsg);
01213     }
01214   }
01215 
01216   delete [] tree;
01217  }
01218 
01219   delete [] child;
01220   delete msg;
01221 }
01222 
01223 //The "msg" represents the subtree rooted at this proc
01224 void ProxyMgr::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01225 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01226     DebugFileTrace *dft = DebugFileTrace::Object();
01227     dft->openTrace();
01228     dft->writeTrace("PMgr::recvST0 for patch[%d] with #nodes=%d\n", msg->patch, msg->numNodesWithProxies);
01229     dft->closeTrace();
01230     msg->printOut("PMgr::recvST");
01231 #endif
01232 
01233     //This function is divided into three parts. The tree root is msg->allPes[0]
01234     //1. set up its own immediate childrens
01235     int treesize = msg->numNodesWithProxies; //at least include one as its internal procs    
01236     int iNChild = msg->numPesOfNode[0]-1; //number of internal children
01237     int eNChild = treesize-1; //number of external children
01238 
01239     CmiAssert(treesize>0);
01240     //use the same way of computing children in HomePatch::setupChildrenFromProxySpanningTree    
01241     eNChild = (proxySpanDim>eNChild)?eNChild:proxySpanDim;
01242     int iSlots = proxySpanDim-eNChild;
01243     if(iNChild>0) {
01244         if(iSlots==0){
01245             //at least having one internal child
01246             iNChild = 1;    
01247         }else{
01248             iNChild = (iSlots>iNChild)?iNChild:iSlots;
01249         }
01250     }    
01251     int numChild = iNChild + eNChild;
01252     if(numChild==0){
01253         //Indicating this proxy is a leaf in the spanning tree
01254         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01255         proxy->setSpanningTree(msg->procID, NULL, 0);
01256 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01257                 //When using NODEPATCHMGR, the proc-level is a flat list attached to the node
01258                 //while the node-level spanning tree obeys the branch factor.
01259                 //As a result, when passing down spanning trees, if this proc is on the same node
01260                 //of its parent, then the NodeProxyMgr has already been set by its parent. There's
01261                 //no need resetting here. However, the nodeChildren attached to this proxy has
01262                 //to be set to NULL. -Chao Mei
01263                 int onSameNode = (CkMyNode() == CkNodeOf(msg->procID));
01264         //set up proxyInfo inside NodeProxyMgr
01265         if(!onSameNode && !PatchMap::Object()->homePatch(msg->patch)){
01266             //only when this processor contains a proxy patch of "msg->patch"
01267             //is the patch registeration in NodeProxyMgr needed,
01268             //and itself needs to be registered
01269             CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01270             NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01271             npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);            
01272         }
01273         //set children in terms of node ids
01274         proxy->setSTNodeChildren(0, NULL);       
01275 #endif
01276         delete msg;
01277         return;
01278     }
01279 
01280     nodecount++;
01281     //if (nodecount > MAX_INTERNODE) 
01282     //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01283 
01284     if(!PatchMap::Object()->homePatch(msg->patch)){
01285         //the home patch of this spanning tree has been already set
01286         //in HomePatch::setupChildrenFromProxySpanningTree, so we
01287         //only care about the children setup for proxy patches here
01288         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01289         ALLOCA(int,children,numChild);
01290         //add external children
01291         int *p = msg->allPes + msg->numPesOfNode[0];
01292         for(int i=0; i<eNChild; i++) {
01293             children[i] = *p;
01294             p += msg->numPesOfNode[i+1];
01295         }
01296         //add internal children
01297         for(int i=eNChild, j=1; i<numChild; i++, j++) {
01298             children[i] = msg->allPes[j]; 
01299         }
01300         proxy->setSpanningTree(msg->procID, children, numChild);
01301 
01302 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01303                 int onSameNode = (CkMyNode() == CkNodeOf(msg->procID));
01304                 if(!onSameNode) {
01305                         //set up proxyInfo inside NodeProxyMgr
01306                         CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01307                         NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01308                         npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);        
01309         
01310                         //set children in terms of node ids
01311                         ALLOCA(int,nodeChildren,eNChild+1);
01312                         p = msg->allPes + msg->numPesOfNode[0];
01313                         for(int i=0; i<eNChild; i++) {
01314                                 nodeChildren[i] = CkNodeOf(*p);
01315                                 p += msg->numPesOfNode[i+1];
01316                         }
01317                         //the last entry always stores the node id that contains this proxy
01318                         nodeChildren[eNChild] = CkNodeOf(msg->allPes[0]);
01319                         proxy->setSTNodeChildren(eNChild+1, nodeChildren);
01320                 } else {
01321                         proxy->setSTNodeChildren(0, NULL);
01322                 }
01323 #endif
01324     }
01325 
01326     //2. send msgs for the tree to children proxies
01327     ResizeArray<int> *exTreeChildSize = new ResizeArray<int>[numChild];
01328     ResizeArray<int *> *exTreeChildPtr = new ResizeArray<int *>[numChild];    
01329 
01330     //2a. first processing children of external nodes
01331     if(eNChild > 0) {    
01332         int nodesToCnt = 1; //the number of children each root (current root's 
01333                             //immedidate external nodes) has in each level
01334         int pos = 1; //track the iteration over msg->numPesOfNode and skip the current root
01335         int *pePtr = msg->allPes + msg->numPesOfNode[0];
01336         int done = 0;
01337         while(!done) {
01338             for(int childID=0; childID<eNChild; childID++) {
01339                 //iterate nodes on each level
01340                 for(int i=0; i<nodesToCnt; i++) {
01341                     int cursize = msg->numPesOfNode[pos];
01342                     exTreeChildSize[childID].add(cursize);
01343                     exTreeChildPtr[childID].add(pePtr);
01344                     pos++;
01345                     pePtr += cursize; 
01346                     if(pos==msg->numNodesWithProxies) {
01347                         done = 1;
01348                         break;
01349                     }
01350                 }
01351                 if(done) break;                         
01352             }
01353             nodesToCnt *= proxySpanDim;
01354         }
01355     }
01356 
01357     //2b. secondly processing children on the same node
01358     if(iNChild>0) {
01359         int nodesToCnt = 1; //the number of children each root (current root's 
01360                             //immedidate internal child proxies) has in each level
01361         int pos = 1; //track the iteration over proxies on the same node and skip the current root
01362         int *pePtr = msg->allPes+1; //skip the root
01363         int done = 0;
01364         while(!done) {
01365             for(int childID=eNChild; childID<numChild; childID++) {
01366                 //iterate nodes on each level
01367                 for(int i=0; i<nodesToCnt; i++) {                    
01368                     exTreeChildSize[childID].add(1);
01369                     exTreeChildPtr[childID].add(pePtr);
01370                     pos++;
01371                     pePtr++; 
01372                     if(pos==msg->numPesOfNode[0]) {
01373                         done = 1;
01374                         break;
01375                     }
01376                 }
01377                 if(done) break;                         
01378             }
01379             nodesToCnt *= proxySpanDim;
01380         }
01381     }
01382           
01383     for(int i=0; i<numChild; i++) {                
01384         ResizeArray<int> *allSizes = &exTreeChildSize[i];
01385         ResizeArray<int *> *allPtrs = &exTreeChildPtr[i];
01386         int totalNodes = allSizes->size();
01387         int totalPes = 0;
01388         for(int j=0; j<totalNodes; j++) totalPes += allSizes->item(j);
01389         ProxyNodeAwareSpanningTreeMsg *cmsg = new(totalNodes, totalPes, 0) ProxyNodeAwareSpanningTreeMsg;
01390         cmsg->patch = msg->patch;
01391         cmsg->procID = CkMyPe();
01392         cmsg->numNodesWithProxies = totalNodes;
01393         int *pAllPes = cmsg->allPes;
01394         for(int j=0; j<totalNodes; j++) {
01395             int numPes = allSizes->item(j);
01396             cmsg->numPesOfNode[j] = numPes;
01397             memcpy(pAllPes, allPtrs->item(j), sizeof(int)*numPes);
01398             pAllPes += numPes;
01399         }
01400         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01401         cmsg->printOut("sndChi:");
01402         #endif
01403         ProxyMgr::Object()->sendNodeAwareSpanningTree(cmsg);
01404     }
01405     delete [] exTreeChildSize;
01406     delete [] exTreeChildPtr;  
01407     delete msg;
01408 }
01409 
01410 void ProxyMgr::recvNodeAwareSTParent(int patch, int parent){
01411 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01412     DebugFileTrace *dft = DebugFileTrace::Object();
01413     dft->openTrace();
01414     dft->writeTrace("PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
01415     dft->closeTrace();
01416 #endif
01417     ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(patch);
01418     CmiAssert(proxy!=NULL);
01419     proxy->setSpanningTree(parent, NULL, 0);
01420 }
01421 
01422 void ProxyMgr::sendResults(ProxyResultVarsizeMsg *msg) {
01423     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01424     NodeID node = PatchMap::Object()->node(msg->patch);
01425     CmiEnableUrgentSend(1);
01426     cp[node].recvResults(msg);
01427     CmiEnableUrgentSend(0);
01428 }
01429 
01430 void ProxyMgr::recvResults(ProxyResultVarsizeMsg *msg) {
01431     HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01432     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01433 }
01434 
01435 void ProxyMgr::sendResults(ProxyResultMsg *msg) {
01436   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01437   NodeID node = PatchMap::Object()->node(msg->patch);
01438   CmiEnableUrgentSend(1);
01439   cp[node].recvResults(msg);
01440   CmiEnableUrgentSend(0);
01441 }
01442 
01443 void ProxyMgr::recvResults(ProxyResultMsg *msg) {
01444   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01445   home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01446 }
01447 
01448 //sendResults is a direct function call, not an entry method
01449 void ProxyMgr::sendResults(ProxyCombinedResultMsg *msg) {
01450   ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01451   ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultMsg(msg);
01452   if (ocMsg) {
01453         ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);        
01454     int destPe = patch->getSpanningTreeParent();
01455     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01456     CmiAssert(destPe!=CkMyPe());
01457     //if(destPe != CkMyPe()) {
01458 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01459       /*CkPrintf("ready to call node::recvImmRes on pe[%d] to dest[%d]\n", CkMyPe(), destPe);
01460       fflush(stdout);*/
01461 
01462       cMsg->destPe = destPe;
01463       CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01464       cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
01465 #else
01466       cp[destPe].recvImmediateResults(cMsg);
01467 #endif
01468     //}
01469     //else{
01471     //  cp[destPe].recvResults(cMsg);
01472     //}
01473   }
01474 }
01475 
01476 void ProxyMgr::recvResults(ProxyCombinedResultRawMsg *omsg) {
01477         ProxyCombinedResultRawMsg *msg = omsg;
01478 
01479 //Chao Mei: hack for QD in case of SMP with immediate msg
01480 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01481     if(proxyRecvSpanning && msg->isFromImmMsgCall){
01482 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01483 //    fflush(stdout);
01484         //To compensate for the counter loss for message creation
01485         //inside the process of immediate message on comm thread
01486         CkpvAccess(_qd)->create();
01487     }
01488 #endif
01489 
01490   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01491   if (home) {
01492     //printf("Home got a message\n");
01493     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01494   }
01495   else {
01496     NAMD_bug("ProxyMgr should receive result message on home processor");
01497   }
01498 }
01499 
01500 void ProxyMgr::recvImmediateResults(ProxyCombinedResultRawMsg *omsg) {
01501   HomePatch *home = PatchMap::Object()->homePatch(omsg->patch);
01502   if (home) {
01503     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);        
01504     CmiEnableUrgentSend(1);
01505     cp[CkMyPe()].recvResults(omsg);
01506     CmiEnableUrgentSend(0);
01507   }
01508   else {
01509     ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(omsg->patch);
01510         ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultRawMsg(omsg);
01511     if (ocMsg) {
01512                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01513                 ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);         
01514                 cp[patch->getSpanningTreeParent()].recvImmediateResults(cMsg);
01515     }
01516   }
01517 }
01518 
01519 void NodeProxyMgr::recvImmediateResults(ProxyCombinedResultRawMsg *omsg){
01520     ProxyCombinedResultRawMsg *msg = omsg;
01521 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01522     //CkPrintf("recvImmRes called on comm thread%d pe[%d]\n", CkMyRank()==CmiMyNodeSize(), CkMyPe());
01523     //fflush(stdout);
01524 
01525     int destRank = CkRankOf(msg->destPe);
01526     PatchMap *pmap = localPatchMaps[destRank];
01527     HomePatch *home = pmap->homePatch(msg->patch);
01528     if (home) {
01529 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01530         msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01531 #endif
01532         CProxy_ProxyMgr cp(localProxyMgr);
01533         CmiEnableUrgentSend(1);
01534         cp[msg->destPe].recvResults(msg);
01535         CmiEnableUrgentSend(0);
01536 /*
01537         char *srcfrom = "Isfrom";
01538         if(CkMyRank()!=CmiMyNodeSize()) srcfrom="Notfrom";
01539       CkPrintf("%s comm thread from pe[%d]\n", srcfrom, CkMyPe());
01540       fflush(stdout);
01541 */
01542     }
01543     else {
01544         ProxyPatch *patch = (ProxyPatch *)pmap->patch(msg->patch);
01545         ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultRawMsg(msg); 
01546         if (ocMsg) {
01547             CProxy_NodeProxyMgr cnp(thisgroup);
01548             ocMsg->destPe = patch->getSpanningTreeParent();
01549                         ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);
01550             cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);
01551         }
01552     }
01553 #endif
01554 }
01555 
01556 void
01557 ProxyMgr::sendProxyData(ProxyDataMsg *msg, int pcnt, int *pids) {
01558 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01559     if(proxySendSpanning == 1) {
01560         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01561         for(int i=0; i<pcnt-1; i++) {
01562             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01563             cnp[pids[i]].recvImmediateProxyData(copymsg);
01564         }
01565         cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
01566         return;
01567     }
01568 #endif
01569   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01570   cp.recvImmediateProxyData(msg,pcnt,pids);
01571 }
01572 
01573 void 
01574 ProxyMgr::recvProxyData(ProxyDataMsg *msg) {
01575 //Chao Mei: hack for QD in case of SMP with immediate msg
01576 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01577     if(proxySendSpanning && msg->isFromImmMsgCall){
01578 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01579 //    fflush(stdout);
01580         //To compensate for the counter loss for message creation
01581         //inside the process of immediate message on comm thread
01582         CkpvAccess(_qd)->create();
01583     }
01584 #endif
01585   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01586   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms()
01587 }
01588 
01589 void
01590 ProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {
01591   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);  
01592   if (proxySendSpanning == 1) {
01593     // copy the message and send to spanning children
01594     //int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01595     //int npid = proxy->getSpanningTreeChild(pids);
01596     int npid = proxy->getSpanningTreeNChild();
01597     int *pids = (int *)proxy->getSpanningTreeChildPtr();
01598     if (npid) {        
01599         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);     
01600 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01601         int ntreephs;
01602         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
01603         CmiAssert(treephs && ntreephs == npid);
01604         CmiUsePersistentHandle(treephs, ntreephs);
01605 #endif
01606         ProxyMgr::Object()->sendProxyData(newmsg,npid,pids);
01607 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01608         CmiUsePersistentHandle(NULL, 0);
01609 #endif
01610       #if 0
01611       //ChaoMei: buggy code??? the spanning tree doesn't always have 2 levels
01612       //At the second level of the tree immediate messages are not needed
01613       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01614       cp.recvProxyData(newmsg,npid,pids);
01615       #endif
01616     }
01617   }
01618   /* send to self via EP method to preserve priority */
01619   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01620   cp[CkMyPe()].recvProxyData(msg);
01621 }
01622 
01623 void NodeProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {    
01624 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01625     CProxy_ProxyMgr cp(localProxyMgr);
01626     proxyTreeNode *ptn = proxyInfo[msg->patch];
01627     CmiAssert(ptn->numPes!=0);
01628 
01629     //re-send msg to this nodes's children nodes.
01630     //only the first pe of a node of node-aware ST should contain children nodes
01631     int rank = CkRankOf(ptn->peIDs[0]);
01632     PatchMap *pmap = localPatchMaps[rank];
01633     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01634 
01635     int npid = ppatch->getSTNNodeChild();
01636     int *pids = ppatch->getSTNodeChildPtr();
01637     if(npid>0) {        
01638         //only needs to send to other nodes, so check the last entry of pids.
01639         //This is because the data for proxies on the same node have been sent
01640         //later in this function by NodeProxyMgr.
01641         if(pids[npid-1]==CkMyNode()) npid--;
01642     }    
01643     CProxy_NodeProxyMgr cnp(thisgroup);
01644 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01645     if (npid) {
01646         int ntreephs;
01647         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
01648         CmiAssert(treephs && ntreephs >= npid);
01649         CmiUsePersistentHandle(treephs, ntreephs);
01650     }
01651 #endif
01652     for(int i=0; i<npid; i++) {
01653         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01654         cnp[pids[i]].recvImmediateProxyData(copymsg);
01655     }    
01656 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01657     CmiUsePersistentHandle(NULL, 0);
01658 #endif
01659 
01660     //re-send msg to it's internal cores
01661 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01662     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01663 #endif
01664     cp.recvProxyData(msg, ptn->numPes, ptn->peIDs);
01665 #else
01666     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01667 #endif
01668 }
01669 
01670 void
01671 ProxyMgr::sendProxyAll(ProxyDataMsg *msg, int pcnt, int *pids) {
01672 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01673     if(proxySendSpanning == 1) {
01674         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01675         for(int i=0; i<pcnt-1; i++) {
01676             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01677             cnp[pids[i]].recvImmediateProxyAll(copymsg);
01678         }
01679         cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
01680         return;
01681     }
01682 #endif
01683   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01684   cp.recvImmediateProxyAll(msg,pcnt,pids);
01685 }
01686 
01687 void 
01688 ProxyMgr::recvProxyAll(ProxyDataMsg *msg) {
01689 //Chao Mei: hack for QD in case of SMP with immediate msg
01690 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01691     if(proxySendSpanning && msg->isFromImmMsgCall){
01692 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01693 //    fflush(stdout);
01694         //To compensate for the counter loss for message creation
01695         //inside the process of immediate message on comm thread
01696         CkpvAccess(_qd)->create();
01697     }
01698 #endif
01699 
01700   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01701   proxy->receiveAll(msg); // deleted in ProxyPatch::receiveAtoms()
01702 }
01703 
01704 void
01705 ProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {
01706   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01707   #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01708   DebugFileTrace *dft = DebugFileTrace::Object();
01709   dft->openTrace();
01710   dft->writeTrace("PMgr::recvImmPAll for patch[%d]\n", msg->patch);
01711   CmiAssert(proxy!=NULL);
01712   dft->writeTrace("PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->patch);
01713   dft->closeTrace();
01714   #endif
01715   if (proxySendSpanning == 1) {
01716     // copy the message and send to spanning children
01717     //int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01718     //int npid = proxy->getSpanningTreeChild(pids);
01719     int npid = proxy->getSpanningTreeNChild();
01720     int *pids = (int *)proxy->getSpanningTreeChildPtr();
01721     if (npid) {
01722         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);      
01723 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01724         int ntreephs;
01725         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
01726         CmiAssert(treephs && ntreephs == npid);
01727         CmiUsePersistentHandle(treephs, ntreephs);
01728 #endif
01729         ProxyMgr::Object()->sendProxyAll(newmsg,npid,pids);
01730 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01731         CmiUsePersistentHandle(NULL, 0);
01732 #endif
01733     }
01734   }
01735   /* send to self via EP method to preserve priority */
01736   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01737   cp[CkMyPe()].recvProxyAll(msg);
01738 }
01739 
01740 void NodeProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {    
01741 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01742     CProxy_ProxyMgr cp(localProxyMgr);
01743     proxyTreeNode *ptn = proxyInfo[msg->patch];
01744     CmiAssert(ptn->numPes!=0);
01745     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01746     //This could be executed on comm thd.
01747     printf("NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->patch, CkMyNode(), CkMyRank());
01748     for(int i=0; i<ptn->numPes; i++) {
01749         printf("%d, ", ptn->peIDs[i]);
01750     }
01751     printf("\n");
01752     fflush(stdout);
01753     #endif
01754 
01755     //re-send msg to this nodes's children nodes.
01756     //only the first pe of a node of node-aware ST should contain children nodes
01757     int rank = CkRankOf(ptn->peIDs[0]);
01758     PatchMap *pmap = localPatchMaps[rank];
01759     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01760 
01761     int npid = ppatch->getSTNNodeChild();
01762     int *pids = ppatch->getSTNodeChildPtr();
01763     if(npid>0) {        
01764         //only needs to send to other nodes, so check the last entry of pids.
01765         //This is because the data for proxies on the same node have been sent
01766         //later in this function by NodeProxyMgr.
01767         if(pids[npid-1]==CkMyNode()) npid--;
01768     }
01769     
01770 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01771     if (npid) {
01772         int ntreephs;
01773         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
01774         CmiAssert(treephs && ntreephs >= npid);
01775         CmiUsePersistentHandle(treephs, ntreephs);
01776     }
01777 #endif
01778     CProxy_NodeProxyMgr cnp(thisgroup);
01779     for(int i=0; i<npid; i++) {
01780         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01781         cnp[pids[i]].recvImmediateProxyAll(copymsg);
01782     }    
01783 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01784     CmiUsePersistentHandle(NULL, 0);
01785 #endif
01786 
01787     //re-send msg to it's internal cores
01788 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01789     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01790 #endif
01791     cp.recvProxyAll(msg, ptn->numPes, ptn->peIDs);
01792 #else
01793     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01794 #endif
01795 }
01796 
01797 void ProxyMgr::printProxySpanningTree(){
01798 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01799     int numPatches = PatchMap::Object()->numPatches();
01800     for(int i=0; i<numPatches; i++) {
01801         proxyTreeNodeList &oneList = ptree.naTrees[i];
01802         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01803         if(ptree.proxylist[i].size()==0) continue;
01804         printf("===%d=== pes/node: ", i);
01805         for(int j=0; j<oneList.size(); j++) {
01806             printf("%d ", oneList.item(j).numPes);
01807         }
01808         printf("\n");
01809         printf("===%d=== pe ids: ", i);
01810         for(int j=0; j<oneList.size(); j++) {
01811             for(int k=0; k<oneList.item(j).numPes; k++) {
01812                 printf("%d ", oneList.item(j).peIDs[k]);
01813             }            
01814         }
01815         printf("\n");
01816     }    
01817     fflush(stdout);  
01818 #else
01819     int numPatches = PatchMap::Object()->numPatches();
01820     for(int i=0; i<numPatches; i++) {
01821         NodeIDList &oneList = ptree.trees[i];
01822         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01823         if(ptree.proxylist[i].size()==0) continue;        
01824         printf("===%d=== pe ids: ", i);
01825         for(int j=0; j<oneList.size(); j++) {            
01826             printf("%d ", oneList.item(j));            
01827         }
01828         printf("\n");
01829     }    
01830     fflush(stdout);  
01831 #endif
01832 }
01833 
01834 void NodeProxyMgr::registerPatch(int patchID, int numPes, int *pes){
01835     if(proxyInfo[patchID]) {
01836         delete proxyInfo[patchID];
01837     }
01838     if(numPes == 0) {
01839         proxyInfo[patchID] = NULL;
01840     }else{
01841         proxyInfo[patchID] = new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
01842     }
01843 }
01844 
01845 void ProxyMgr::sendResult(ProxyGBISP1ResultMsg *msg) { //pp -r> hp
01846   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01847   NodeID node = PatchMap::Object()->node(msg->patch);
01848   CmiEnableUrgentSend(1);
01849   cp[node].recvResult(msg);
01850   CmiEnableUrgentSend(0);
01851 }
01852 void ProxyMgr::recvResult(ProxyGBISP1ResultMsg *msg) { //pp -r> hp
01853   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
01854   homePatch->receiveResult(msg); // message deleted in registerProxy()
01855 }
01856 void ProxyMgr::recvData(  ProxyGBISP2DataMsg *msg) {  //hp -d> pp
01857   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01858   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms() ?
01859 }
01860 void ProxyMgr::sendResult(ProxyGBISP2ResultMsg *msg) { //pp -r> hp
01861   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01862   NodeID node = PatchMap::Object()->node(msg->patch);
01863   CmiEnableUrgentSend(1);
01864   cp[node].recvResult(msg);
01865   CmiEnableUrgentSend(0);
01866 }
01867 void ProxyMgr::recvResult(ProxyGBISP2ResultMsg *msg) { //pp -r> hp
01868   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
01869   homePatch->receiveResult(msg); // message deleted in registerProxy()
01870 }
01871 void ProxyMgr::recvData(  ProxyGBISP3DataMsg *msg) {   //hp -d> pp
01872   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01873   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms() ?
01874 }
01875 
01876 PatchProxyListMsg *PatchProxyListMsg::createPatchProxyListMsg(PatchProxyListMsg **bufs, int bufSize, ProxyListInfo *info, int size){
01877         //1. compute the total patches this node manages, and the total length of all proxy lists
01878         int totalPatches = 0;
01879         int totalProxies = 0;
01880         for(int i=0; i<bufSize; i++) {
01881                 PatchProxyListMsg *one = bufs[i];
01882                 totalPatches += one->numPatches;
01883                 for(int j=0; j<one->numPatches; j++) totalProxies += one->proxyListLen[j];
01884         }
01885         totalPatches += size;
01886         for(int i=0; i<size; i++) {
01887                 totalProxies += info[i].numProxies;
01888         }
01889 
01890         PatchProxyListMsg *msg = new(totalPatches, totalPatches, totalProxies, 0)PatchProxyListMsg(totalPatches);
01891         int msgPatchIdx = 0;
01892         int msgProxyPeIdx = 0;
01893         for(int i=0; i<bufSize; i++) {
01894                 PatchProxyListMsg *one = bufs[i];
01895                 int curPeIdx = 0;
01896                 for(int j=0; j<one->numPatches; j++) {
01897                         msg->patchIDs[msgPatchIdx] = one->patchIDs[j];
01898                         int curListLen = one->proxyListLen[j];
01899                         msg->proxyListLen[msgPatchIdx++] = curListLen;
01900                         memcpy(msg->proxyPEs+msgProxyPeIdx, one->proxyPEs+curPeIdx, sizeof(int)*curListLen);
01901                         curPeIdx += curListLen;
01902                         msgProxyPeIdx += curListLen;
01903                 }
01904         }
01905         for(int i=0; i<size; i++) {
01906                 msg->patchIDs[msgPatchIdx] = info[i].patchID;
01907                 int curListLen = info[i].numProxies;
01908                 msg->proxyListLen[msgPatchIdx++] = curListLen;
01909                 memcpy(msg->proxyPEs+msgProxyPeIdx, info[i].proxyList, sizeof(int)*curListLen);
01910                 msgProxyPeIdx += curListLen;
01911         }
01912         return msg;
01913 }
01914 
01915 #define HOMEPATCH_TREE_BRFACTOR 2
01916 void NodeProxyMgr::createSTForHomePatches(PatchMap *pmap){
01917         //We use implicit tree construction for all home patches
01918         std::vector<int> nodesWithPatches; //record the id of node that has home patches
01919         int myNodeIdx = -1; //the index into the above vector of this node
01920         for(int nodeId=0; nodeId<CkNumNodes(); ++nodeId) {
01921                 int hpCnt = 0;
01922                 int firstPe = CkNodeFirst(nodeId);
01923                 int endPe = firstPe + CkNodeSize(nodeId);
01924                 for(int pe=firstPe; pe < endPe; ++pe) {
01925                         hpCnt += pmap->numPatchesOnNode(pe);
01926                 }
01927                 if(hpCnt==0) continue;
01928 
01929                 nodesWithPatches.push_back(nodeId);
01930                 if(CkMyNode() == nodeId) {
01931                         //on my node
01932                         myNodeIdx = nodesWithPatches.size()-1;
01933                         numHomePatches = hpCnt;
01934                         homepatchRecved = 0;
01935                         localProxyLists = new ProxyListInfo[hpCnt];
01936                         memset(localProxyLists, 0, sizeof(ProxyListInfo)*hpCnt);                        
01937                 }
01938         }
01939 
01940         if(myNodeIdx==-1){
01941                 //there's no home patches on this node
01942                 //just set to a value that doesn't make sense in spanning tree.
01943                 parentNode = -2; 
01944                 numKidNodes = 0;
01945                 kidRecved = 0;
01946                 return;
01947         }
01948         
01949         //calculate parent
01950         if(myNodeIdx == 0) {
01951                 parentNode = -1;
01952         }else{
01953                 int parentIdx = (myNodeIdx-1)/HOMEPATCH_TREE_BRFACTOR;
01954                 parentNode = nodesWithPatches[parentIdx];
01955         }
01956 
01957         //calculate kids
01958         numKidNodes = 0;
01959         int totalNodes = nodesWithPatches.size();
01960         for(int i=1; i<=HOMEPATCH_TREE_BRFACTOR; i++) {
01961                 int kidId = myNodeIdx*HOMEPATCH_TREE_BRFACTOR+i;
01962                 if(kidId >= totalNodes) break;
01963                 numKidNodes++;
01964         }
01965         if(numKidNodes!=0) {
01966                 remoteProxyLists = new PatchProxyListMsg *[numKidNodes];
01967         }
01968         kidRecved = 0;
01969 
01970         //CkPrintf("Node[%d] has %d homepatches with parent=%d and %d kids \n", CkMyNode(), numHomePatches, parentNode, numKidNodes);
01971 }
01972 
01973 void NodeProxyMgr::sendProxyList(int pid, int *plist, int size){
01974         int insertIdx; //indexed from 0
01975         CmiLock(localDepositLock);
01976         insertIdx = homepatchRecved++; //ensure the atomic increment
01977 
01978         localProxyLists[insertIdx].patchID = pid;
01979         localProxyLists[insertIdx].numProxies = size;
01980         localProxyLists[insertIdx].proxyList = plist;
01981 
01982         if(insertIdx == (numHomePatches-1)) {
01983                 //all local home patches have contributed
01984                 contributeToParent();
01985         }
01986         CmiUnlock(localDepositLock);
01987 }
01988 
01989 void NodeProxyMgr::sendProxyListInfo(PatchProxyListMsg *msg){
01990         int insertIdx; //indexed from 0
01991         CmiLock(localDepositLock);
01992         insertIdx = kidRecved++;
01993         
01994         remoteProxyLists[insertIdx] = msg;
01995         if(insertIdx == (numKidNodes-1)) {
01996                 //all kids have contributed;
01997                 contributeToParent();
01998         }
01999         CmiUnlock(localDepositLock);
02000 }
02001 
02002 void NodeProxyMgr::contributeToParent(){
02003         if(homepatchRecved!=numHomePatches || kidRecved != numKidNodes) return;
02004 
02005         homepatchRecved = 0;
02006         kidRecved = 0;
02007         //construct the msg
02008         PatchProxyListMsg *msg = PatchProxyListMsg::createPatchProxyListMsg(remoteProxyLists, numKidNodes, localProxyLists, numHomePatches);
02009         if(parentNode == -1) {
02010                 //send to proxy mgr on PE[0] as this is the root node
02011                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
02012                 cp[0].recvPatchProxyInfo(msg);
02013         }else{
02014                 CProxy_NodeProxyMgr cnp(thisgroup);
02015                 cnp[parentNode].sendProxyListInfo(msg);
02016         }
02017         for(int i=0; i<numKidNodes; i++) {
02018                 delete remoteProxyLists[i];
02019         }
02020 }
02021 
02022 #include "ProxyMgr.def.h"
02023 

Generated on Tue Jun 18 04:07:49 2013 for NAMD by  doxygen 1.3.9.1