ProxyMgr.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "main.h"
00009 #include "BOCgroup.h"
00010 #include "ProxyMgr.decl.h"
00011 #include "ProxyMgr.h"
00012 #include "PatchMap.inl"
00013 #include "ProxyPatch.h"
00014 #include "ComputeMap.h"
00015 #include "HomePatch.h"
00016 #include <string.h>
00017 #include "ProcessorPrivate.h"
00018 #include "packmsg.h"
00019 #include "Priorities.h"
00020 #ifndef _NO_ALLOCA_H
00021 #include <alloca.h>
00022 #endif
00023 #ifndef _NO_MALLOC_H
00024 #include <malloc.h>
00025 #endif
00026 
00027 #include <map>
00028 #include <vector>
00029 #include <algorithm>
00030 
00031 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
00032 #include "qd.h"
00033 #endif
00034 
00035 #include "ComputeNonbondedMICKernel.h"
00036 #include "SimParameters.h"
00037 #include "Node.h"
00038 
00039 //#define DEBUGM
00040 #define MIN_DEBUG_LEVEL 2
00041 #include "Debug.h"
00042 
00043 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00044 
00045 int proxySendSpanning   = 0;
00046 int proxyRecvSpanning   = 0;
00047 //"proxySpanDim" is a configuration parameter as "proxyTreeBranchFactor" in configuration file
00048 int proxySpanDim        = 4;
00049 int inNodeProxySpanDim = 16;
00050 
00051 PACK_MSG(ProxySpanningTreeMsg,
00052   PACK(patch);
00053   PACK(node);
00054   PACK_RESIZE(tree);
00055 )
00056 
00057 void* ProxyResultMsg::pack(ProxyResultMsg *msg) {
00058 
00059   int msg_size = 0;
00060   msg_size += sizeof(msg->node);
00061   msg_size += sizeof(msg->patch);
00062 
00063   int j;
00064   for ( j = 0; j < Results::maxNumForces; ++j ) {
00065     int array_size = msg->forceList[j]->size();
00066     msg_size += sizeof(array_size);
00067     msg_size += array_size * sizeof(char);    
00068     msg_size = ALIGN_8 (msg_size);
00069     Force* f = msg->forceList[j]->begin();
00070     int nonzero_count = 0;
00071     for ( int i = 0; i < array_size; ++i ) {
00072       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00073     }
00074     msg_size += nonzero_count * sizeof(Vector);
00075   }
00076 
00077   void *msg_buf = CkAllocBuffer(msg,msg_size);
00078   char *msg_cur = (char *)msg_buf;
00079 
00080   CmiMemcpy((void*)msg_cur,(void*)(&(msg->node)),sizeof(msg->node));
00081   msg_cur += sizeof(msg->node);
00082   CmiMemcpy((void*)msg_cur,(void*)(&(msg->patch)),sizeof(msg->patch));
00083   msg_cur += sizeof(msg->patch);
00084   for ( j = 0; j < Results::maxNumForces; ++j ) {
00085     int array_size = msg->forceList[j]->size();
00086     *(int *) msg_cur = array_size;
00087     msg_cur += sizeof(int);
00088     char *nonzero = msg_cur;
00089     msg_cur += array_size * sizeof(char);
00090     msg_cur = (char *)ALIGN_8 (msg_cur);
00091     Vector *farr = (Vector *)msg_cur;
00092     Force* f = msg->forceList[j]->begin();
00093 
00094     for ( int i = 0; i < array_size; ++i ) {
00095       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00096         nonzero[i] = 1;
00097         farr->x = f[i].x;
00098         farr->y = f[i].y;
00099         farr->z = f[i].z;
00100         farr ++;
00101       } else {
00102         nonzero[i] = 0;
00103       }
00104     }
00105     msg_cur = (char *) farr;      
00106   }
00107 
00108   delete msg;
00109   return msg_buf;
00110 }
00111 
00112 ProxyResultMsg* ProxyResultMsg::unpack(void *ptr) {
00113 
00114   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyResultMsg));
00115   ProxyResultMsg *msg = new (vmsg) ProxyResultMsg;
00116   char *msg_cur = (char*)ptr;
00117 
00118   CmiMemcpy((void*)(&(msg->node)),(void*)msg_cur,sizeof(msg->node));
00119   msg_cur += sizeof(msg->node);
00120   CmiMemcpy((void*)(&(msg->patch)),(void*)msg_cur,sizeof(msg->patch));
00121   msg_cur += sizeof(msg->patch);
00122   int j;
00123   for ( j = 0; j < Results::maxNumForces; ++j ) {
00124     int array_size = *(int *) msg_cur;
00125     msg_cur += sizeof(array_size);
00126     msg->forceList[j] = &(msg->forceListInternal[j]);
00127     msg->forceList[j]->resize(array_size);
00128     char *nonzero = msg_cur;
00129     msg_cur += array_size * sizeof(char);    
00130     msg_cur = (char *)ALIGN_8 (msg_cur);
00131     Vector* farr = (Vector *) msg_cur;
00132     Force* f = msg->forceList[j]->begin();
00133     for ( int i = 0; i < array_size; ++i ) {
00134       if ( nonzero[i] ) {
00135         f[i].x = farr->x;
00136         f[i].y = farr->y;
00137         f[i].z = farr->z;
00138         farr++;
00139       } else {
00140         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00141       }
00142     }    
00143     msg_cur = (char *) farr;
00144   }
00145 
00146   CkFreeMsg(ptr);
00147   return msg;
00148 }
00149 
00150 ProxyResultVarsizeMsg *ProxyResultVarsizeMsg::getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls){
00151 
00152     //1. decide the length of forceArr and iszero field.
00153     int tmpLen[Results::maxNumForces];
00154     int iszeroLen = 0;
00155     for (int i=0; i<Results::maxNumForces; i++){
00156         tmpLen[i] = fls[i].size();
00157         iszeroLen += tmpLen[i];
00158     }
00159     char *tmpIszero = new char[iszeroLen];
00160     char *iszeroPtr = tmpIszero;
00161     int fArrLen = 0;
00162     for(int i=0; i<Results::maxNumForces; i++) {        
00163         Force *fiPtr = fls[i].begin();
00164         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {         
00165             if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
00166                 *iszeroPtr=0;
00167                 fArrLen++;
00168             }else{
00169                 *iszeroPtr=1;
00170             }            
00171         }
00172     }
00173 
00174     //2. Ready to create the msg, and set all fields
00175     ProxyResultVarsizeMsg *retmsg = new(fArrLen, iszeroLen, prioSize)ProxyResultVarsizeMsg;
00176     retmsg->node = nid;
00177     retmsg->patch = pid;
00178     memcpy(retmsg->flLen, tmpLen, sizeof(int)*Results::maxNumForces);
00179     iszeroPtr = tmpIszero;
00180     Force *forcePtr = retmsg->forceArr;
00181     for(int i=0; i<Results::maxNumForces; i++) {        
00182         Force *fiPtr = fls[i].begin();
00183         for(int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
00184             if((*iszeroPtr)!=1) {
00185                 forcePtr->x = fiPtr->x;
00186                 forcePtr->y = fiPtr->y;
00187                 forcePtr->z = fiPtr->z;
00188                 forcePtr++;
00189             }            
00190         }
00191     }
00192     memcpy(retmsg->isZero, tmpIszero, sizeof(char)*iszeroLen);
00193     delete [] tmpIszero;
00194     return retmsg;
00195 }
00196 
00197 ProxyNodeAwareSpanningTreeMsg *ProxyNodeAwareSpanningTreeMsg::getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size){
00198     int numAllPes = 0;
00199     for(int i=0; i<size; i++) {
00200         numAllPes += tree[i].numPes;
00201     }
00202     ProxyNodeAwareSpanningTreeMsg *retmsg = new(size, numAllPes, 0) ProxyNodeAwareSpanningTreeMsg;
00203     retmsg->patch = pid;
00204     retmsg->procID = nid;
00205     retmsg->numNodesWithProxies = size;
00206     int *pAllPes = retmsg->allPes;
00207     for(int i=0; i<size; i++) {
00208         retmsg->numPesOfNode[i] = tree[i].numPes;
00209         for(int j=0; j<tree[i].numPes; j++) {
00210             *pAllPes = tree[i].peIDs[j];
00211             pAllPes++;
00212         }
00213     }
00214     return retmsg;
00215 }
00216 
00217 //Only available when macro PROCTRACE_DEBUG is defined
00218 void ProxyNodeAwareSpanningTreeMsg::printOut(char *tag){
00219 #ifdef PROCTRACE_DEBUG
00220     DebugFileTrace *dft = DebugFileTrace::Object();
00221     dft->openTrace();
00222     const char *patchname = "ProxyPatch";
00223     if(procID == CkMyPe()) patchname = "HomePatch";
00224     dft->writeTrace("%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes\n", 
00225                     tag, patchname, patch, CkMyPe(), CkMyNode(), procID, numNodesWithProxies);
00226     if(numNodesWithProxies==0) {
00227         dft->closeTrace();
00228         return;
00229     }
00230     dft->writeTrace("%s: ===%d===pes/node: ", tag, patch);
00231     for(int i=0; i<numNodesWithProxies; i++) {
00232         dft->writeTrace("%d ", numPesOfNode[i]);
00233     }
00234     dft->writeTrace("\n%s: ===%d===pe list: ", tag, patch);
00235     int *p = allPes;
00236     for(int i=0; i<numNodesWithProxies; i++) {
00237         for(int j=0; j<numPesOfNode[i]; j++) {
00238             dft->writeTrace("%d ", *p);
00239             p++;
00240         }
00241     }
00242     dft->writeTrace("\n");    
00243     dft->closeTrace();
00244 #endif
00245 }
00246 
00247 // for spanning tree
00248 ProxyCombinedResultRawMsg* ProxyCombinedResultMsg::toRaw(ProxyCombinedResultMsg *msg) {
00249   int totalFLLen=0;
00250   int nonzero_count = 0;
00251   int nodeSize = msg->nodes.size();
00252   for (int j = 0; j < Results::maxNumForces; ++j ) {
00253         int array_size = msg->forceList[j]->size();
00254     totalFLLen +=  array_size;
00255     Force* f = msg->forceList[j]->begin();
00256     for ( int i = 0; i < array_size; ++i ) {
00257       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) { ++nonzero_count; }
00258     }
00259   }
00260 
00261   ProxyCombinedResultRawMsg *msg_buf = new(nodeSize, totalFLLen, nonzero_count, PRIORITY_SIZE)ProxyCombinedResultRawMsg;
00262   //Copy envelope stuff
00263   {
00264          envelope *oenv = UsrToEnv(msg);
00265          envelope *nenv = UsrToEnv(msg_buf);
00266          CmiMemcpy(nenv->getPrioPtr(), oenv->getPrioPtr(), nenv->getPrioBytes());
00267   }
00268 
00269   msg_buf->nodeSize = nodeSize;
00270   for (int i=0; i<nodeSize; i++) {
00271     msg_buf->nodes[i] = msg->nodes[i];
00272   }
00273   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00274   msg_buf->destPe = msg->destPe;
00275   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00276   msg_buf->isFromImmMsgCall = msg->isFromImmMsgCall;
00277   #endif
00278   #endif
00279   msg_buf->patch = msg->patch;
00280 
00281   Force *farr = msg_buf->forceArr;
00282   char *isNonZeroPtr = msg_buf->isForceNonZero;
00283   for ( int j = 0; j < Results::maxNumForces; ++j ) {
00284         int array_size = msg->forceList[j]->size();
00285     msg_buf->flLen[j] = array_size;
00286     Force* f = msg->forceList[j]->begin();
00287     for ( int i = 0; i < array_size; ++i , isNonZeroPtr++) {
00288       if ( f[i].x != 0. || f[i].y != 0. || f[i].z != 0. ) {
00289         *isNonZeroPtr = 1;
00290                 farr->x  =  f[i].x;
00291         farr->y  =  f[i].y;
00292         farr->z  =  f[i].z;
00293         farr ++;
00294       } else {
00295         *isNonZeroPtr = 0;
00296       }
00297     }
00298   }
00299   delete msg;
00300   return msg_buf;
00301 }
00302 
00303 ProxyCombinedResultMsg* ProxyCombinedResultMsg::fromRaw(ProxyCombinedResultRawMsg *ptr) {
00304 
00305   //CkPrintf("[%d]: unpacking: plainData=%p\n", CkMyPe(), ptr->plainData);      
00306 
00307   void *vmsg = CkAllocBuffer(ptr,sizeof(ProxyCombinedResultMsg));
00308   ProxyCombinedResultMsg *msg = new (vmsg) ProxyCombinedResultMsg;
00309 
00310   for (int i=0; i<ptr->nodeSize; i++) {
00311     msg->nodes.add(ptr->nodes[i]);
00312   }
00313   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
00314   msg->destPe = ptr->destPe;
00315   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
00316   msg->isFromImmMsgCall = ptr->isFromImmMsgCall;
00317   #endif
00318   #endif
00319   msg->patch = ptr->patch;
00320 
00321   char *nonzero = ptr->isForceNonZero;
00322   Force* farr = ptr->forceArr;
00323 
00324   for ( int j = 0; j < Results::maxNumForces; ++j ) {
00325     int array_size = ptr->flLen[j];
00326     msg->forceList[j] = &(msg->forceListInternal[j]);
00327     msg->forceList[j]->resize(array_size);
00328     Force* f = msg->forceList[j]->begin();
00329 
00330     for ( int i = 0; i < array_size; ++i, nonzero++ ) {
00331       if ( *nonzero ) {
00332                 f[i].x = farr->x;
00333                 f[i].y = farr->y;
00334                 f[i].z = farr->z;
00335                 farr++;
00336       } else {
00337         f[i].x = 0.;  f[i].y = 0.;  f[i].z = 0.;
00338       }
00339     }
00340   }
00341 
00342   delete ptr;
00343   return msg;
00344 }
00345 
00346 // class static
00347 int ProxyMgr::nodecount = 0;
00348 
00349 ProxyMgr::ProxyMgr() { 
00350   if (CkpvAccess(ProxyMgr_instance)) {
00351     NAMD_bug("Tried to create ProxyMgr twice.");
00352   }
00353   CkpvAccess(ProxyMgr_instance) = this;
00354 }
00355 
00356 ProxyMgr::~ProxyMgr() { 
00357   removeProxies();
00358   CkpvAccess(ProxyMgr_instance) = NULL;
00359 }
00360 
00361 
00362 void ProxyMgr::setSendSpanning() {
00363   if(CkMyRank()!=0) return; 
00364   proxySendSpanning = 1;
00365 }
00366 
00367 int ProxyMgr::getSendSpanning() {
00368   return proxySendSpanning;
00369 }
00370 
00371 void ProxyMgr::setRecvSpanning() {
00372   if(CkMyRank()!=0) return;
00373   proxyRecvSpanning = 1;
00374 }
00375 
00376 int ProxyMgr::getRecvSpanning() {
00377   return proxyRecvSpanning;
00378 }
00379 
00380 void ProxyMgr::setProxyTreeBranchFactor(int dim){
00381     if(CkMyRank()!=0) return;
00382     proxySpanDim = dim;
00383 }
00384 
00385 ProxyTree &ProxyMgr::getPtree() {
00386   return ptree;
00387 }
00388 
00389 void ProxyMgr::removeProxies(void)
00390 {
00391   ProxySetIter pi(proxySet);
00392   for ( pi = pi.begin(); pi != pi.end(); pi++)
00393   {
00394     delete pi->proxyPatch;
00395   }
00396   proxySet.clear();
00397 }
00398 
00399 void ProxyMgr::removeUnusedProxies(void)
00400 {
00401   ResizeArray<PatchID> toDelete;
00402   ProxySetIter pi(proxySet);
00403   for ( pi = pi.begin(); pi != pi.end(); pi++)
00404   {
00405     if ( pi->proxyPatch->getNumComputes() == 0 ) {
00406       toDelete.add(pi->patchID);
00407       //fprintf(stderr, "Proxy Deleted Patch %d Proc %d", pi->patchID, CkMyPe());
00408     }
00409   }
00410   PatchID *pidi = toDelete.begin();
00411   for ( ; pidi != toDelete.end(); ++pidi ) {
00412     removeProxy(*pidi);
00413   }
00414 }
00415 
00416 // Figure out which proxies we need and create them
00417 void ProxyMgr::createProxies(void)
00418 {
00419   // Delete the old proxies.
00420   removeProxies();
00421 
00422   PatchMap *patchMap = PatchMap::Object();
00423   int numPatches = patchMap->numPatches();
00424   int myNode = CkMyPe();
00425   enum PatchFlag { Unknown, Home, NeedProxy };
00426   int *patchFlag = new int[numPatches]; 
00427   int i, j;
00428 
00429   // Note all home patches.
00430   for ( i = 0; i < numPatches; ++i )
00431   {
00432     patchFlag[i] = ( patchMap->node(i) == myNode ) ? Home : Unknown;
00433   }
00434 
00435 #ifndef NAMD_CUDA
00436   // Add all upstream neighbors.
00437   PatchID neighbors[PatchMap::MaxOneAway];
00438   PatchIDList basepids;
00439   patchMap->basePatchIDList(myNode,basepids);
00440   for ( i = 0; i < basepids.size(); ++i )
00441   {
00442     if ( patchMap->node(basepids[i]) != myNode ) {
00443         patchFlag[basepids[i]] = NeedProxy;
00444     }
00445     int numNeighbors = patchMap->upstreamNeighbors(basepids[i],neighbors);
00446     for ( j = 0; j < numNeighbors; ++j )
00447     {
00448       if ( ! patchFlag[neighbors[j]] ) {
00449         patchFlag[neighbors[j]] = NeedProxy;
00450       }
00451     }
00452   }
00453 #endif
00454 
00455   ComputeMap *computeMap = ComputeMap::Object();
00456 
00457   // Check all patch-based compute objects.
00458   int nc = computeMap->numComputes();
00459   for ( i = 0; i < nc; ++i )
00460   {
00461 #if defined(NAMD_CUDA)
00462     ComputeType t = computeMap->type(i);
00463     if ( t == computeNonbondedSelfType || t == computeNonbondedPairType )
00464       continue;
00465 #elif defined(NAMD_MIC)
00466     ComputeType t = computeMap->type(i);
00467     if ( computeMap->directToDevice(i) != 0 ) { continue; } // NOTE: Compute for device will take care of requiring the patch
00468 #endif
00469     if ( computeMap->node(i) != myNode ) 
00470       continue;
00471     int numPid = computeMap->numPids(i);
00472     for ( j = 0; j < numPid; ++j )
00473     {
00474       int pid = computeMap->pid(i,j);
00475       if ( ! patchFlag[pid] ) {
00476         patchFlag[pid] = NeedProxy;
00477       }
00478     }
00479   }
00480   // Create proxy list
00481   for ( i = 0; i < numPatches; ++i ) {
00482     if ( patchFlag[i] == NeedProxy )
00483     { // create proxy patch
00484       ProxyPatch *proxy = new ProxyPatch(i);
00485       proxySet.add(ProxyElem(i, proxy));
00486       patchMap->registerPatch(i, proxy);
00487     }
00488   }
00489   delete[] patchFlag;
00490 }
00491 
00492 void
00493 ProxyMgr::createProxy(PatchID pid) {
00494   Patch *p = PatchMap::Object()->patch(pid);
00495   if (!p) {
00496      DebugM(4,"createProxy("<<pid<<")\n");
00497      ProxyPatch *proxy = new ProxyPatch(pid);
00498      proxySet.add(ProxyElem(pid,proxy));
00499      PatchMap::Object()->registerPatch(pid,proxy);
00500   }
00501   else {
00502      DebugM(4,"createProxy("<<pid<<") found " << p->getPatchID() << "\n");
00503   }
00504     
00505 }
00506 
00507 void
00508 ProxyMgr::removeProxy(PatchID pid) {
00509   ProxyElem *p = proxySet.find(ProxyElem(pid));
00510   if (p) { 
00511     PatchMap::Object()->unregisterPatch(pid,p->proxyPatch);
00512     delete p->proxyPatch;
00513     proxySet.del(ProxyElem(pid));
00514     // iout << iINFO << "Removing unused proxy " << pid << " on " << iPE << ".\n" << endi;
00515   }
00516 }
00517   
00518 void
00519 ProxyMgr::registerProxy(PatchID pid) {
00520   // determine which node gets message
00521   NodeID node = PatchMap::Object()->node(pid);
00522 
00523   RegisterProxyMsg *msg = new RegisterProxyMsg;
00524 
00525   msg->node=CkMyPe();
00526   msg->patch = pid;
00527 
00528   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00529   cp[node].recvRegisterProxy(msg);
00530 }
00531 
00532 void
00533 ProxyMgr::recvRegisterProxy(RegisterProxyMsg *msg) {
00534   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00535   homePatch->registerProxy(msg); // message deleted in registerProxy()
00536 }
00537 
00538 void
00539 ProxyMgr::unregisterProxy(PatchID pid) {
00540   // determine which node gets message
00541   NodeID node = PatchMap::Object()->node(pid);
00542 
00543   UnregisterProxyMsg *msg = new UnregisterProxyMsg;
00544 
00545   msg->node=CkMyPe();
00546   msg->patch = pid;
00547 
00548   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00549   cp[node].recvUnregisterProxy(msg);
00550 }
00551 
00552 void
00553 ProxyMgr::recvUnregisterProxy(UnregisterProxyMsg *msg) {
00554   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
00555   homePatch->unregisterProxy(msg); // message deleted in registerProxy()
00556 }
00557 
00558 void 
00559 ProxyMgr::buildProxySpanningTree()
00560 {
00561   PatchIDList pids;
00562   if (!CkMyPe()) iout << iINFO << "Building spanning tree ... send: " << proxySendSpanning << " recv: " << proxyRecvSpanning 
00563       << " with branch factor " << proxySpanDim <<"\n" << endi;
00564   PatchMap::Object()->homePatchIDList(pids);
00565   for (int i=0; i<pids.size(); i++) {
00566     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00567     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00568 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00569     home->buildNodeAwareSpanningTree();
00570 #else
00571     home->buildSpanningTree();
00572 #endif
00573   }
00574 }
00575 
00576 void 
00577 ProxyMgr::buildProxySpanningTree2()
00578 {
00579 #if 0
00580   //The homePatchIDList is an expensive as it goes through
00581   //every patch ids.
00582   PatchIDList pids;
00583   PatchMap::Object()->homePatchIDList(pids);
00584   for (int i=0; i<pids.size(); i++) {
00585     HomePatch *home = PatchMap::Object()->homePatch(pids[i]);
00586     if (home == NULL) CkPrintf("ERROR: homepatch NULL\n");
00587     home->sendProxies();
00588   }
00589 #else
00590   HomePatchList *hpl = PatchMap::Object()->homePatchList();
00591   HomePatchListIter iter(*hpl);
00592   for(iter=iter.begin(); iter!=iter.end(); iter++) {
00593           HomePatch *home = iter->patch;
00594           home->sendProxies();
00595   }
00596 #endif
00597 }
00598 
00599 void 
00600 ProxyMgr::sendProxies(int pid, int *list, int n)
00601 {
00602   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
00603   cp[0].recvProxies(pid, list, n);
00604 }
00605 
00606 //The value defines the max number of intermediate proxies (acting
00607 //as the node to relay proxy msgs to children) allowed to reside 
00608 //on a physical node for proxy spanning tree
00609 #define MAX_INTERNODE 1
00610 
00611 //Only for debug
00612 static void outputProxyTree(ProxyTree &ptree, int np){
00613         FILE *ofp = fopen("patch_proxylist.txt", "w");
00614         std::vector<int> plist;
00615         for(int i=0; i<np; i++) {
00616                 fprintf(ofp, "%d: ", i);
00617                 int listlen = ptree.proxylist[i].size();
00618                 fprintf(ofp, "#%d ", listlen);
00619                 plist.clear();
00620                 for(int j=0; j<listlen; j++) {
00621                         plist.push_back(ptree.proxylist[i][j]);
00622                 }
00623                 std::sort(plist.begin(), plist.end());
00624                 for(int j=0; j<listlen; j++) {
00625                         fprintf(ofp, "%d ", plist[j]);
00626                 }
00627                 fprintf(ofp, "\n");
00628         }
00629         fclose(ofp);
00630 }
00631 
00632 // only on PE 0
00633 void 
00634 ProxyMgr::recvProxies(int pid, int *list, int n)
00635 {
00636   int nPatches = PatchMap::Object()->numPatches();
00637   if (ptree.proxylist == NULL)
00638     ptree.proxylist = new NodeIDList[nPatches];
00639   ptree.proxylist[pid].resize(n);
00640   for (int i=0; i<n; i++)
00641     ptree.proxylist[pid][i] = list[i];
00642   ptree.proxyMsgCount ++;
00643   if (ptree.proxyMsgCount == nPatches) {
00644         //debug
00645         //outputProxyTree(ptree, nPatches);
00646 
00647     ptree.proxyMsgCount = 0;
00648     // building and sending of trees is done in two steps now
00649     // so that the building step can be shifted to the load balancer
00650 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00651     buildNodeAwareSpanningTree0();
00652 #else
00653     buildSpanningTree0();    
00654 #endif
00655     sendSpanningTrees();
00656   }
00657 }
00658 
00659 void ProxyMgr::recvPatchProxyInfo(PatchProxyListMsg *msg){
00660         int nPatches = PatchMap::Object()->numPatches();
00661         if(ptree.proxylist == NULL) ptree.proxylist = new NodeIDList[nPatches];
00662         CmiAssert(msg->numPatches == nPatches);
00663         int peIdx = 0;
00664         for(int i=0; i<nPatches; i++) {
00665                 int pid = msg->patchIDs[i];
00666                 int plen = msg->proxyListLen[i];
00667                 ptree.proxylist[pid].resize(plen);
00668                 for(int j=0; j<plen; j++) {
00669                         ptree.proxylist[pid][j] = msg->proxyPEs[peIdx++];
00670                 }               
00671         }
00672         delete msg;
00673         
00674         //debug
00675         //outputProxyTree(ptree, nPatches);
00676 
00677         ptree.proxyMsgCount = 0;
00678     // building and sending of trees is done in two steps now
00679     // so that the building step can be shifted to the load balancer
00680 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00681     buildNodeAwareSpanningTree0();
00682 #else
00683     buildSpanningTree0();
00684 #endif
00685     sendSpanningTrees();
00686 }
00687 
00688 //
00689 // XXX static and global variables are unsafe for shared memory builds.
00690 // The global and static vars should be eliminated.  
00691 // Unfortunately, the routines that use these below are actually 
00692 // in use in NAMD.
00693 //
00694 extern double *cpuloads;
00695 static int *procidx = NULL;
00696 static double averageLoad = 0.0;
00697 
00698 static int compLoad(const void *a, const void *b)
00699 {
00700   int i1 = *(int *)a;
00701   int i2 = *(int *)b;
00702   double d1 = cpuloads[i1];
00703   double d2 = cpuloads[i2];
00704   if (d1 < d2) 
00705     return 1;
00706   else if (d1 == d2) 
00707     return 0;
00708   else 
00709     return -1;
00710   // sort from high to low
00711 }
00712 
00713 static void processCpuLoad()
00714 {
00715   int i;
00716   if (!procidx) {
00717     procidx = new int[CkNumPes()];
00718   }
00719   for (i=0; i<CkNumPes(); i++) procidx[i] = i;
00720   qsort(procidx, CkNumPes(), sizeof(int), compLoad);
00721 
00722   double averageLoad = 0.0;
00723   for (i=0; i<CkNumPes(); i++) averageLoad += cpuloads[i];
00724   averageLoad /= CkNumPes();
00725 //  iout << "buildSpanningTree1: no intermediate node on " << procidx[0] << " " << procidx[1] << endi;
00726 
00727 }
00728 
00729 static int noInterNode(int p)
00730 {
00731   int exclude = 0;
00732   if(CkNumPes()<1025)
00733     exclude = 5;
00734   else if(CkNumPes()<4097)
00735     exclude = 10;
00736   else if(CkNumPes()<8193)
00737     exclude = 40;
00738   else if(CkNumPes()<16385)
00739     exclude = 40;
00740   else
00741     exclude = 80;
00742   for (int i=0; i<exclude; i++) if (procidx[i] == p) return 1;
00743 //  if (cpuloads[p] > averageLoad) return 1;
00744   return 0;
00745 }
00746 
00747 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00748 //only on PE 0
00749 void ProxyMgr::buildNodeAwareSpanningTree0(){
00750         CkPrintf("Info: build node-aware spanning tree with send: %d, recv: %d with branch factor %d\n", 
00751                          proxySendSpanning, proxyRecvSpanning, proxySpanDim);
00752     int numPatches = PatchMap::Object()->numPatches();
00753     if (ptree.naTrees == NULL) ptree.naTrees = new proxyTreeNodeList[numPatches];
00754     for (int pid=0; pid<numPatches; pid++)     
00755         buildSinglePatchNodeAwareSpanningTree(pid, ptree.proxylist[pid], ptree.naTrees[pid]);
00756        
00757 
00758     //Debug
00759     //printf("#######################Naive ST#######################\n");
00760     //printProxySpanningTree();
00761 
00762     //Now the naive spanning tree has been constructed and stored in oneNATree;
00763     //Afterwards, some optimizations on this naive spanning tree could be done.
00764     //except the first element as the tree root always contains the processor
00765     //that has home patch
00766 
00767     //1st Optimization: reduce intermediate nodes as much as possible. In details,
00768     //the optimal case is that on a single physical smp node, there should be no
00769     //two proxies who act as the intermediate nodes to pass information to childrens
00770     //in the spanning tree. E.g, for patch A's proxy spanning tree, it has a node X as
00771     //its intermediate node. However, for patch B's, it also has a node X as its intermediate
00772     //node. We should avoid this situation as node X becomes the bottleneck as it has twice
00773     //amount of work to process now.
00774     //Step1: foward to the first patch that has proxies
00775     //Now proxyNodeMap records the info that how many intermediate nodes on a node
00776         //each element indiates the number of proxies residing on this node    
00777     int pid=0;
00778     for(;pid<numPatches; pid++) {
00779         if(ptree.proxylist[pid].size()>0) break;
00780     }
00781     if(pid==numPatches) {
00782         return;
00783     }
00784     int *proxyNodeMap = new int[CkNumNodes()];
00785     memset(proxyNodeMap, 0, sizeof(int)*CkNumNodes());
00786    {
00787     proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00788     //If a node is an intermediate node, then its idx should satisfy
00789     //idx*proxySpanDim + 1 < onePatchT.size()
00790     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00791     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00792         int nid = onePatchT.item(i).nodeID;
00793         proxyNodeMap[nid]++;
00794     }
00795    }
00796     //Step2: iterate over each patch's proxy spanning tree to adjust
00797     //the tree node positions. The bad thing here is that it may involve
00798     //many memory allocations and deallocation for small-size (~100bytes)
00799     //chunks.
00800     pid++; //advance to the next patch
00801     for(; pid<numPatches; pid++) {
00802         if(ptree.proxylist[pid].size()==0) continue;
00803         proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00804         int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00805         for(int i=1; i<=lastInterNodeIdx; i++) {
00806             int nid = onePatchT.item(i).nodeID;
00807             if(proxyNodeMap[nid]<MAX_INTERNODE) {
00808                 proxyNodeMap[nid]++;
00809                 continue;
00810             }
00811             //the position is occupied, so search the children
00812             //nodes to see whether there's one to swap this node
00813             //if not found, find the first position that has smallest
00814             //amount of nodes.
00815             int leastIdx = -1;
00816             int leastAmount = ~(1<<31);
00817             //iterate children nodes
00818             int swapPos;
00819             for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.size(); swapPos++) {
00820                 int chiNId = onePatchT.item(swapPos).nodeID;
00821                 if(proxyNodeMap[chiNId]<MAX_INTERNODE) {
00822                     break;
00823                 }
00824                 if(proxyNodeMap[chiNId]<leastAmount) {
00825                     leastAmount = proxyNodeMap[chiNId];
00826                     leastIdx = swapPos;
00827                 }
00828             }
00829             if(swapPos==onePatchT.size()) {
00830                 CmiAssert(leastIdx!=-1); //because the above loop at least executes once
00831                 //indicate we cannot find a physical node which
00832                 //still allows the intermediate proxy.
00833                 swapPos = leastIdx;
00834             }
00835             //swap the current proxy tree node "i" with node "swapPos"
00836             proxyTreeNode *curNode = &onePatchT.item(i);
00837             proxyTreeNode *swapNode = &onePatchT.item(swapPos);
00838             proxyNodeMap[swapNode->nodeID]++; //update the proxyNodeMap record
00839             int tmp = curNode->nodeID;
00840             curNode->nodeID = swapNode->nodeID;
00841             swapNode->nodeID = tmp;
00842             tmp = curNode->numPes;
00843             curNode->numPes = swapNode->numPes;
00844             swapNode->numPes = tmp;
00845             int *tmpPes = curNode->peIDs;
00846             curNode->peIDs = swapNode->peIDs;
00847             swapNode->peIDs = tmpPes;
00848         }
00849     }
00850     delete [] proxyNodeMap;    
00851 
00852     //Debug
00853     //printf("#######################After 1st optimization#######################\n");
00854     //printProxySpanningTree();
00855 
00856     //2nd optimization: similar to the 1st optimization but now thinking in
00857     //the core level. If we cannot avoid place two intermediate proxy
00858     //on the same node, we'd better to place them in different cores inside
00859     //the node
00860     if(CmiMyNodeSize()==1) {
00861         //No need to perform the second optimization as every node has only 1 core
00862         return;
00863     }
00864     //Step1: forward to the first patch that has proxies
00865     pid=0;
00866     for(;pid<numPatches; pid++) {
00867         if(ptree.proxylist[pid].size()>0) break;
00868     }
00869     if(pid==numPatches) {
00870         return;
00871     }
00872     int *proxyCoreMap = new int[CkNumPes()];
00873     memset(proxyCoreMap, 0, sizeof(int)*CkNumPes());
00874    {
00875     proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00876     //If a node is an intermediate node, then its idx should satisfy
00877     //idx*proxySpanDim + 1 < onePatchT.size()
00878     int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00879     for(int i=1; i<lastInterNodeIdx; i++) { //excluding the root node
00880         int rootProcID = onePatchT.item(i).peIDs[0];
00881         proxyCoreMap[rootProcID]++;
00882     }
00883    }
00884     //Step2: iterate over each patch's proxy spanning tree to adjust
00885     //the root's position of intermediate proxies.
00886     pid++; //advance to the next patch
00887     for(; pid<numPatches; pid++) {
00888         if(ptree.proxylist[pid].size()==0) continue;
00889         proxyTreeNodeList &onePatchT = ptree.naTrees[pid];
00890         int lastInterNodeIdx = (onePatchT.size()-2)/proxySpanDim;
00891         for(int i=1; i<=lastInterNodeIdx; i++) {
00892             proxyTreeNode *curNode = &onePatchT.item(i);
00893             int rootProcID = curNode->peIDs[0];
00894             if(curNode->numPes==1 || proxyCoreMap[rootProcID]<MAX_INTERNODE){
00895                 //if this node contains only 1 core, then we have to leave it as it is
00896                 //because there are no other cores in the same node that could be used to
00897                 //adjust
00898                 proxyCoreMap[rootProcID]++;
00899                 continue;
00900             }
00901             
00902             //foound more than MAX_INTERNODE intermediate proxies on the same core,
00903             //adjust the root id of the core of this proxy tree node
00904             int leastIdx = -1;
00905             int leastAmount = ~(1<<31);
00906             //iterate children nodes
00907             int swapPos;
00908             
00909             for(swapPos=1; swapPos<curNode->numPes; swapPos++) {
00910                 int otherCoreID = curNode->peIDs[swapPos];
00911                 if(proxyCoreMap[otherCoreID]<MAX_INTERNODE) {
00912                     break;
00913                 }
00914                 if(proxyCoreMap[otherCoreID]<leastAmount) {
00915                     leastAmount = proxyCoreMap[otherCoreID];
00916                     leastIdx = swapPos;
00917                 }
00918             }
00919             if(swapPos==curNode->numPes) {
00920                 CmiAssert(leastIdx!=-1); //because the above loop body must execute at least once
00921                 //indicate we cannot find a physical node which
00922                 //still allows the intermediate proxy.
00923                 swapPos = leastIdx;
00924             }
00925             int tmp = curNode->peIDs[swapPos];
00926             curNode->peIDs[swapPos] = curNode->peIDs[0];
00927             curNode->peIDs[0] = tmp;
00928             proxyCoreMap[tmp]++;
00929         }      
00930     }
00931 
00932     delete proxyCoreMap;
00933 
00934     //Debug
00935     //printf("#######################After 2nd optimization#######################\n");
00936     //printProxySpanningTree();
00937 }
00938 
00939 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(PatchID pid, NodeIDList &proxyList, 
00940                                                      proxyTreeNodeList &ptnTree){       
00941     int numProxies = proxyList.size();
00942     if (numProxies == 0) {
00943         //CkPrintf ("This is sheer evil in building node-aware spanning tree!\n\n");            
00944         return;
00945     }        
00946  
00947     //usually the #proxies is at most 62 (considering 2-away in all dimensions)
00948     //so the access in proxyNodeMap and proxyTreeIdx is at most log2(62)=8 if
00949     //all proxies are in different nodes
00950     //could be better than a CkNumNodes() array in that cache perf. is better
00951     //because of the reduced memory footprint -Chao Mei
00952     std::map<int, int> proxyNodeMap; //<node id, numProxies>    
00953     std::vector<int> proxyNodeIDs;
00954     std::map<int, int> proxyTreeIdx; //<node id, idx in proxyNodeIDs>
00955     
00956     //the processor id of home patch
00957     int hpProcID = PatchMap::Object()->node(pid);
00958     int hpNodeID = CkNodeOf(hpProcID);
00959     proxyNodeMap[hpNodeID]=1;
00960     proxyTreeIdx[hpNodeID]=0;
00961     proxyNodeIDs.push_back(hpNodeID);
00962     //proxyNodeList[0] = hpNodeID;
00963     //int numNodesWithProxies = 1;
00964     
00965     for(int i=0; i<numProxies; i++) {
00966         int procId = proxyList[i];
00967         int nodeId = CkNodeOf(procId);
00968         std::map<int, int>::iterator it=proxyNodeMap.find(nodeId);
00969         if(it==proxyNodeMap.end()) {
00970             proxyNodeMap[nodeId] = 1;
00971             proxyTreeIdx[nodeId] = proxyNodeIDs.size();
00972             proxyNodeIDs.push_back(nodeId);
00973         }else{
00974             proxyNodeMap[nodeId]++;
00975         }        
00976     }
00977     proxyTreeNodeList &oneNATree = ptnTree;   // spanning tree
00978     int numNodesWithProxies = proxyNodeIDs.size();
00979     oneNATree.resize(numNodesWithProxies);
00980     //initialize oneNATree
00981     for(int i=0; i<numNodesWithProxies; i++) {
00982         proxyTreeNode *oneNode = &oneNATree.item(i);
00983         delete [] oneNode->peIDs;
00984         oneNode->nodeID = proxyNodeIDs[i];
00985         oneNode->peIDs = new int[proxyNodeMap[oneNode->nodeID]];                        
00986         oneNode->numPes = 0; //initially set to zero as used for incrementing later
00987     }
00988     
00989     //set up the tree root which contains the home patch processor
00990     proxyTreeNode *rootnode = &oneNATree.item(0);
00991     rootnode->peIDs[0] = hpProcID;
00992     rootnode->numPes++;
00993     
00994     for(int i=0; i<numProxies; i++) {
00995         int procId = proxyList[i];
00996         int nodeId = CkNodeOf(procId);
00997         int idxInTree = proxyTreeIdx[nodeId];
00998         CmiAssert(idxInTree>=0 && idxInTree<numNodesWithProxies);
00999         proxyTreeNode *oneNode = &oneNATree.item(idxInTree);
01000         oneNode->peIDs[oneNode->numPes] = procId;
01001         oneNode->numPes++;
01002     }
01003 }
01004 #else //branch of NODEAWARE_PROXY_SPANNINGTREE
01005 // only on PE 0
01006 void 
01007 ProxyMgr::buildSpanningTree0()
01008 {
01009         CkPrintf("Info: build spanning tree with send: %d, recv: %d with branch factor %d\n", 
01010                          proxySendSpanning, proxyRecvSpanning, proxySpanDim);
01011 
01012   int i;
01013 
01014   processCpuLoad();
01015 
01016   int *numPatchesOnNode = new int[CkNumPes()];
01017   int numNodesWithPatches = 0;
01018   for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
01019   int numPatches = PatchMap::Object()->numPatches();
01020   for (i=0; i<numPatches; i++) {
01021     int node = PatchMap::Object()->node(i);
01022     numPatchesOnNode[node]++;
01023     if (numPatchesOnNode[node] == 1)
01024       numNodesWithPatches ++;
01025   }
01026   int patchNodesLast =
01027     ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
01028   int *ntrees = new int[CkNumPes()];
01029   for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
01030   if (ptree.trees == NULL) ptree.trees = new NodeIDList[numPatches];
01031   for (int pid=0; pid<numPatches; pid++) 
01032   {
01033     int numProxies = ptree.proxylist[pid].size();
01034     if (numProxies == 0) {
01035       //CkPrintf ("This is sheer evil!\n\n");
01036       //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01037       delete [] ntrees;
01038       delete [] numPatchesOnNode;
01039       return;
01040     }
01041     NodeIDList &tree = ptree.trees[pid];   // spanning tree
01042     NodeIDList oldtree;  oldtree.swap(tree);
01043     tree.resize(numProxies+1);
01044     tree.setall(-1);
01045     tree[0] = PatchMap::Object()->node(pid);
01046     int s=1, e=numProxies;
01047     int nNonPatch = 0;
01048     int treesize = 1;
01049     int pp;
01050 
01051     // keep tree persistent for non-intermediate nodes
01052     for (pp=0; pp<numProxies; pp++) {
01053       int p = ptree.proxylist[pid][pp];
01054       int oldindex = oldtree.find(p);
01055       if (oldindex != -1 && oldindex <= numProxies) {
01056         int isIntermediate = (oldindex*proxySpanDim+1 <= numProxies);
01057         if (!isIntermediate) {
01058           tree[oldindex] = p;
01059         }
01060         else if (ntrees[p] < MAX_INTERNODE) {
01061           tree[oldindex] = p;
01062           ntrees[p] ++;
01063         }
01064       }
01065     }
01066 
01067     for (pp=0; pp<numProxies; pp++) {
01068       int p = ptree.proxylist[pid][pp];              // processor number
01069       if (tree.find(p) != -1) continue;        // already used
01070       treesize++;
01071       if (patchNodesLast && numPatchesOnNode[p] ) {
01072         while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01073         tree[e] = p;
01074         int isIntermediate = (e*proxySpanDim+1 <= numProxies);
01075         if (isIntermediate) ntrees[p]++;
01076       }
01077       else {
01078         while (tree[s] != -1) { s++; if (s==numProxies+1) s = 1; }
01079         int isIntermediate = (s*proxySpanDim+1 <= numProxies);
01080         if (isIntermediate && (ntrees[p] >= MAX_INTERNODE || noInterNode(p))) {   // TOO MANY INTERMEDIATE TREES
01081         //if (isIntermediate && ntrees[p] >= MAX_INTERNODE)    // TOO MANY INTERMEDIATE TREES
01082           while (tree[e] != -1) { e--; if (e==-1) e = numProxies; }
01083           tree[e] = p;
01084           isIntermediate = (e*proxySpanDim+1 <= numProxies);
01085           if (isIntermediate) ntrees[p]++;
01086         }
01087         else {
01088           tree[s] = p;
01089           nNonPatch++;
01090           if (isIntermediate) ntrees[p]++;
01091         }
01092       }
01093     }
01094     // send homepatch's proxy tree
01095     if(ptree.sizes)
01096       ptree.sizes[pid] = treesize;
01097     //ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, &tree[0], treesize);
01098   }
01099   /*for (i=0; i<CkNumPes(); i++) {
01100     if (ntrees[i] > MAX_INTERNODE) iout << "Processor " << i << "has (guess) " << ntrees[i] << " intermediate nodes." << endi;
01101   }*/
01102   delete [] ntrees;
01103   delete [] numPatchesOnNode;
01104 }
01105 #endif
01106 
01107 void ProxyMgr::sendSpanningTrees()
01108 {
01109   int numPatches = PatchMap::Object()->numPatches();
01110   for (int pid=0; pid<numPatches; pid++) {
01111     int numProxies = ptree.proxylist[pid].size();
01112 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01113     if (numProxies == 0)
01114       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, NULL, 0);
01115     else {
01116       ProxyMgr::Object()->sendNodeAwareSpanningTreeToHomePatch(pid, ptree.naTrees[pid].begin(), ptree.naTrees[pid].size());
01117     }
01118 #else
01119     if (numProxies == 0)
01120       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, NULL, 0);
01121     else {
01122       ProxyMgr::Object()->sendSpanningTreeToHomePatch(pid, ptree.trees[pid].begin(), ptree.trees[pid].size());
01123     }
01124 #endif
01125   }
01126 }
01127 
01128 void ProxyMgr::sendSpanningTreeToHomePatch(int pid, int *tree, int n)
01129 {
01130   CProxy_ProxyMgr cp(thisgroup);
01131   cp[PatchMap::Object()->node(pid)].recvSpanningTreeOnHomePatch(pid, tree, n);
01132 }
01133 
01134 void ProxyMgr::recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
01135 {
01136   HomePatch *p = PatchMap::Object()->homePatch(pid);
01137   p->recvSpanningTree(tree, n);
01138 }
01139 
01140 void ProxyMgr::sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
01141 {
01142   CProxy_ProxyMgr cp(thisgroup);
01143   ProxyNodeAwareSpanningTreeMsg *msg = ProxyNodeAwareSpanningTreeMsg::getANewMsg(pid, -1, tree, n);
01144   cp[PatchMap::Object()->node(pid)].recvNodeAwareSpanningTreeOnHomePatch(msg);
01145 }
01146 
01147 void ProxyMgr::recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
01148 {
01149   HomePatch *p = PatchMap::Object()->homePatch(msg->patch);
01150   p->recvNodeAwareSpanningTree(msg);
01151   delete msg;
01152 }
01153 
01154 void 
01155 ProxyMgr::sendSpanningTree(ProxySpanningTreeMsg *msg) {
01156   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01157   cp[msg->tree[0]].recvSpanningTree(msg);
01158 }
01159 
01160 void ProxyMgr::sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01161   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01162   int pe = msg->allPes[0]; //the root procID
01163 
01164 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01165   DebugFileTrace *dft = DebugFileTrace::Object();
01166   dft->openTrace();
01167   dft->writeTrace("PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->patch);
01168   dft->closeTrace();
01169 #endif
01170 
01171   cp[pe].recvNodeAwareSpanningTree(msg);
01172 }
01173 
01174 void 
01175 ProxyMgr::recvSpanningTree(ProxySpanningTreeMsg *msg) {
01176   int size = msg->tree.size();
01177   int *child = new int[proxySpanDim];
01178   int nChild = 0;
01179   int i;
01180   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01181   for (i=0; i<proxySpanDim; i++) {
01182     if (size > i+1) { child[i] = msg->tree[i+1]; nChild++; }
01183   }
01184   if (!PatchMap::Object()->homePatch(msg->patch)) {
01185     proxy->setSpanningTree(msg->node, child, nChild);
01186   }
01187 
01188   // build subtree and pass down
01189  if (nChild > 0) {
01190 
01191   nodecount ++;
01192   //if (nodecount > MAX_INTERNODE) 
01193   //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01194 
01195 //CkPrintf("[%d] %d:(%d) %d %d %d %d %d\n", CkMyPe(), msg->patch, size, msg->tree[0], msg->tree[1], msg->tree[2], msg->tree[3], msg->tree[4]);
01196   NodeIDList *tree = new NodeIDList[proxySpanDim];
01197   int level = 1, index=1;
01198   int done = 0;
01199   while (!done) {
01200     for (int n=0; n<nChild; n++) {
01201       if (done) break;
01202       for (int j=0; j<level; j++) {
01203        if (index >= size) { done = 1; break; }
01204        tree[n].add(msg->tree[index]);
01205        index++;
01206       }
01207     }
01208     level *=proxySpanDim;
01209   }
01210 
01211   ProxyMgr *proxyMgr = ProxyMgr::Object();
01212   for (i=0; i<proxySpanDim; i++) {
01213     if (tree[i].size()) {
01214       ProxySpanningTreeMsg *cmsg = new ProxySpanningTreeMsg;
01215       cmsg->patch = msg->patch;
01216       cmsg->node = CkMyPe();
01217       cmsg->tree.copy(tree[i]);  // copy data for thread safety
01218       proxyMgr->sendSpanningTree(cmsg);
01219     }
01220   }
01221 
01222   delete [] tree;
01223  }
01224 
01225   delete [] child;
01226   delete msg;
01227 }
01228 
01229 //The "msg" represents the subtree rooted at this proc
01230 void ProxyMgr::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
01231 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01232     DebugFileTrace *dft = DebugFileTrace::Object();
01233     dft->openTrace();
01234     dft->writeTrace("PMgr::recvST0 for patch[%d] with #nodes=%d\n", msg->patch, msg->numNodesWithProxies);
01235     dft->closeTrace();
01236     msg->printOut("PMgr::recvST");
01237 #endif
01238 
01239     //This function is divided into three parts. The tree root is msg->allPes[0]
01240     //1. set up its own immediate childrens
01241     int treesize = msg->numNodesWithProxies; //at least include one as its internal procs    
01242     int iNChild = msg->numPesOfNode[0]-1; //number of internal children
01243     int eNChild = treesize-1; //number of external children
01244 
01245     CmiAssert(treesize>0);
01246     //use the same way of computing children in HomePatch::setupChildrenFromProxySpanningTree    
01247     eNChild = (proxySpanDim>eNChild)?eNChild:proxySpanDim;
01248     int iSlots = proxySpanDim-eNChild;
01249     if(iNChild>0) {
01250         if(iSlots==0){
01251             //at least having one internal child
01252             iNChild = 1;    
01253         }else{
01254             iNChild = (iSlots>iNChild)?iNChild:iSlots;
01255         }
01256     }    
01257     int numChild = iNChild + eNChild;
01258     if(numChild==0){
01259         //Indicating this proxy is a leaf in the spanning tree
01260         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01261         proxy->setSpanningTree(msg->procID, NULL, 0);
01262 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01263                 //When using NODEPATCHMGR, the proc-level is a flat list attached to the node
01264                 //while the node-level spanning tree obeys the branch factor.
01265                 //As a result, when passing down spanning trees, if this proc is on the same node
01266                 //of its parent, then the NodeProxyMgr has already been set by its parent. There's
01267                 //no need resetting here. However, the nodeChildren attached to this proxy has
01268                 //to be set to NULL. -Chao Mei
01269                 int onSameNode = (CkMyNode() == CkNodeOf(msg->procID));
01270         //set up proxyInfo inside NodeProxyMgr
01271         if(!onSameNode && !PatchMap::Object()->homePatch(msg->patch)){
01272             //only when this processor contains a proxy patch of "msg->patch"
01273             //is the patch registeration in NodeProxyMgr needed,
01274             //and itself needs to be registered
01275             CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01276             NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01277             npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);            
01278         }
01279         //set children in terms of node ids
01280         proxy->setSTNodeChildren(0, NULL);       
01281 #endif
01282         delete msg;
01283         return;
01284     }
01285 
01286     nodecount++;
01287     //if (nodecount > MAX_INTERNODE) 
01288     //  iout << "Processor " << CkMyPe() << "has (actual) " << nodecount << " intermediate nodes." << endi;
01289 
01290     if(!PatchMap::Object()->homePatch(msg->patch)){
01291         //the home patch of this spanning tree has been already set
01292         //in HomePatch::setupChildrenFromProxySpanningTree, so we
01293         //only care about the children setup for proxy patches here
01294         ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01295         ALLOCA(int,children,numChild);
01296         //add external children
01297         int *p = msg->allPes + msg->numPesOfNode[0];
01298         for(int i=0; i<eNChild; i++) {
01299             children[i] = *p;
01300             p += msg->numPesOfNode[i+1];
01301         }
01302         //add internal children
01303         for(int i=eNChild, j=1; i<numChild; i++, j++) {
01304             children[i] = msg->allPes[j]; 
01305         }
01306         proxy->setSpanningTree(msg->procID, children, numChild);
01307 
01308 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01309                 int onSameNode = (CkMyNode() == CkNodeOf(msg->procID));
01310                 if(!onSameNode) {
01311                         //set up proxyInfo inside NodeProxyMgr
01312                         CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
01313                         NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();        
01314                         npm->registerPatch(msg->patch, msg->numPesOfNode[0], msg->allPes);        
01315         
01316                         //set children in terms of node ids
01317                         ALLOCA(int,nodeChildren,eNChild+1);
01318                         p = msg->allPes + msg->numPesOfNode[0];
01319                         for(int i=0; i<eNChild; i++) {
01320                                 nodeChildren[i] = CkNodeOf(*p);
01321                                 p += msg->numPesOfNode[i+1];
01322                         }
01323                         //the last entry always stores the node id that contains this proxy
01324                         nodeChildren[eNChild] = CkNodeOf(msg->allPes[0]);
01325                         proxy->setSTNodeChildren(eNChild+1, nodeChildren);
01326                 } else {
01327                         proxy->setSTNodeChildren(0, NULL);
01328                 }
01329 #endif
01330     }
01331 
01332     //2. send msgs for the tree to children proxies
01333     ResizeArray<int> *exTreeChildSize = new ResizeArray<int>[numChild];
01334     ResizeArray<int *> *exTreeChildPtr = new ResizeArray<int *>[numChild];    
01335 
01336     //2a. first processing children of external nodes
01337     if(eNChild > 0) {    
01338         int nodesToCnt = 1; //the number of children each root (current root's 
01339                             //immedidate external nodes) has in each level
01340         int pos = 1; //track the iteration over msg->numPesOfNode and skip the current root
01341         int *pePtr = msg->allPes + msg->numPesOfNode[0];
01342         int done = 0;
01343         while(!done) {
01344             for(int childID=0; childID<eNChild; childID++) {
01345                 //iterate nodes on each level
01346                 for(int i=0; i<nodesToCnt; i++) {
01347                     int cursize = msg->numPesOfNode[pos];
01348                     exTreeChildSize[childID].add(cursize);
01349                     exTreeChildPtr[childID].add(pePtr);
01350                     pos++;
01351                     pePtr += cursize; 
01352                     if(pos==msg->numNodesWithProxies) {
01353                         done = 1;
01354                         break;
01355                     }
01356                 }
01357                 if(done) break;                         
01358             }
01359             nodesToCnt *= proxySpanDim;
01360         }
01361     }
01362 
01363     //2b. secondly processing children on the same node
01364     if(iNChild>0) {
01365         int nodesToCnt = 1; //the number of children each root (current root's 
01366                             //immedidate internal child proxies) has in each level
01367         int pos = 1; //track the iteration over proxies on the same node and skip the current root
01368         int *pePtr = msg->allPes+1; //skip the root
01369         int done = 0;
01370         while(!done) {
01371             for(int childID=eNChild; childID<numChild; childID++) {
01372                 //iterate nodes on each level
01373                 for(int i=0; i<nodesToCnt; i++) {                    
01374                     exTreeChildSize[childID].add(1);
01375                     exTreeChildPtr[childID].add(pePtr);
01376                     pos++;
01377                     pePtr++; 
01378                     if(pos==msg->numPesOfNode[0]) {
01379                         done = 1;
01380                         break;
01381                     }
01382                 }
01383                 if(done) break;                         
01384             }
01385             nodesToCnt *= proxySpanDim;
01386         }
01387     }
01388           
01389     for(int i=0; i<numChild; i++) {                
01390         ResizeArray<int> *allSizes = &exTreeChildSize[i];
01391         ResizeArray<int *> *allPtrs = &exTreeChildPtr[i];
01392         int totalNodes = allSizes->size();
01393         int totalPes = 0;
01394         for(int j=0; j<totalNodes; j++) totalPes += allSizes->item(j);
01395         ProxyNodeAwareSpanningTreeMsg *cmsg = new(totalNodes, totalPes, 0) ProxyNodeAwareSpanningTreeMsg;
01396         cmsg->patch = msg->patch;
01397         cmsg->procID = CkMyPe();
01398         cmsg->numNodesWithProxies = totalNodes;
01399         int *pAllPes = cmsg->allPes;
01400         for(int j=0; j<totalNodes; j++) {
01401             int numPes = allSizes->item(j);
01402             cmsg->numPesOfNode[j] = numPes;
01403             memcpy(pAllPes, allPtrs->item(j), sizeof(int)*numPes);
01404             pAllPes += numPes;
01405         }
01406         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01407         cmsg->printOut("sndChi:");
01408         #endif
01409         ProxyMgr::Object()->sendNodeAwareSpanningTree(cmsg);
01410     }
01411     delete [] exTreeChildSize;
01412     delete [] exTreeChildPtr;  
01413     delete msg;
01414 }
01415 
01416 void ProxyMgr::recvNodeAwareSTParent(int patch, int parent){
01417 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01418     DebugFileTrace *dft = DebugFileTrace::Object();
01419     dft->openTrace();
01420     dft->writeTrace("PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
01421     dft->closeTrace();
01422 #endif
01423     ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(patch);
01424     CmiAssert(proxy!=NULL);
01425     proxy->setSpanningTree(parent, NULL, 0);
01426 }
01427 
01428 void ProxyMgr::sendResults(ProxyResultVarsizeMsg *msg) {
01429     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01430     NodeID node = PatchMap::Object()->node(msg->patch);
01431     CmiEnableUrgentSend(1);
01432     cp[node].recvResults(msg);
01433     CmiEnableUrgentSend(0);
01434 }
01435 
01436 void ProxyMgr::recvResults(ProxyResultVarsizeMsg *msg) {
01437     HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01438     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01439 }
01440 
01441 void ProxyMgr::sendResults(ProxyResultMsg *msg) {
01442   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01443   NodeID node = PatchMap::Object()->node(msg->patch);
01444   CmiEnableUrgentSend(1);
01445   cp[node].recvResults(msg);
01446   CmiEnableUrgentSend(0);
01447 }
01448 
01449 void ProxyMgr::recvResults(ProxyResultMsg *msg) {
01450   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01451   home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01452 }
01453 
01454 //sendResults is a direct function call, not an entry method
01455 void ProxyMgr::sendResults(ProxyCombinedResultMsg *msg) {
01456   ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(msg->patch);
01457   ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultMsg(msg);
01458   if (ocMsg) {
01459         ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);        
01460     int destPe = patch->getSpanningTreeParent();
01461     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01462     CmiAssert(destPe!=CkMyPe());
01463     //if(destPe != CkMyPe()) {
01464 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01465       /*CkPrintf("ready to call node::recvImmRes on pe[%d] to dest[%d]\n", CkMyPe(), destPe);
01466       fflush(stdout);*/
01467 
01468       cMsg->destPe = destPe;
01469       CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01470       cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
01471 #else
01472       cp[destPe].recvImmediateResults(cMsg);
01473 #endif
01474     //}
01475     //else{
01477     //  cp[destPe].recvResults(cMsg);
01478     //}
01479   }
01480 }
01481 
01482 void ProxyMgr::recvResults(ProxyCombinedResultRawMsg *omsg) {
01483         ProxyCombinedResultRawMsg *msg = omsg;
01484 
01485 //Chao Mei: hack for QD in case of SMP with immediate msg
01486 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01487     if(proxyRecvSpanning && msg->isFromImmMsgCall){
01488 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01489 //    fflush(stdout);
01490         //To compensate for the counter loss for message creation
01491         //inside the process of immediate message on comm thread
01492         CkpvAccess(_qd)->create();
01493     }
01494 #endif
01495 
01496   HomePatch *home = PatchMap::Object()->homePatch(msg->patch);
01497   if (home) {
01498     //printf("Home got a message\n");
01499     home->receiveResults(msg); // delete done in HomePatch::receiveResults()
01500   }
01501   else {
01502     NAMD_bug("ProxyMgr should receive result message on home processor");
01503   }
01504 }
01505 
01506 void ProxyMgr::recvImmediateResults(ProxyCombinedResultRawMsg *omsg) {
01507   HomePatch *home = PatchMap::Object()->homePatch(omsg->patch);
01508   if (home) {
01509     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);        
01510     CmiEnableUrgentSend(1);
01511     cp[CkMyPe()].recvResults(omsg);
01512     CmiEnableUrgentSend(0);
01513   }
01514   else {
01515     ProxyPatch *patch = (ProxyPatch *)PatchMap::Object()->patch(omsg->patch);
01516         ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultRawMsg(omsg);
01517     if (ocMsg) {
01518                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01519                 ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);         
01520                 cp[patch->getSpanningTreeParent()].recvImmediateResults(cMsg);
01521     }
01522   }
01523 }
01524 
01525 void NodeProxyMgr::recvImmediateResults(ProxyCombinedResultRawMsg *omsg){
01526     ProxyCombinedResultRawMsg *msg = omsg;
01527 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01528     //CkPrintf("recvImmRes called on comm thread%d pe[%d]\n", CkMyRank()==CmiMyNodeSize(), CkMyPe());
01529     //fflush(stdout);
01530 
01531     int destRank = CkRankOf(msg->destPe);
01532     PatchMap *pmap = localPatchMaps[destRank];
01533     HomePatch *home = pmap->homePatch(msg->patch);
01534     if (home) {
01535 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01536         msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01537 #endif
01538         CProxy_ProxyMgr cp(localProxyMgr);
01539         CmiEnableUrgentSend(1);
01540         cp[msg->destPe].recvResults(msg);
01541         CmiEnableUrgentSend(0);
01542 /*
01543         char *srcfrom = "Isfrom";
01544         if(CkMyRank()!=CmiMyNodeSize()) srcfrom="Notfrom";
01545       CkPrintf("%s comm thread from pe[%d]\n", srcfrom, CkMyPe());
01546       fflush(stdout);
01547 */
01548     }
01549     else {
01550         ProxyPatch *patch = (ProxyPatch *)pmap->patch(msg->patch);
01551         ProxyCombinedResultMsg *ocMsg = patch->depositCombinedResultRawMsg(msg); 
01552         if (ocMsg) {
01553             CProxy_NodeProxyMgr cnp(thisgroup);
01554             ocMsg->destPe = patch->getSpanningTreeParent();
01555                         ProxyCombinedResultRawMsg *cMsg = ProxyCombinedResultMsg::toRaw(ocMsg);
01556             cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);
01557         }
01558     }
01559 #endif
01560 }
01561 
01562 void
01563 ProxyMgr::sendProxyData(ProxyDataMsg *msg, int pcnt, int *pids) {
01564 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01565     if(proxySendSpanning == 1) {
01566         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01567         for(int i=0; i<pcnt-1; i++) {
01568             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01569             cnp[pids[i]].recvImmediateProxyData(copymsg);
01570         }
01571         cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
01572         return;
01573     }
01574 #endif
01575   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01576   cp.recvImmediateProxyData(msg,pcnt,pids);
01577 }
01578 
01579 void 
01580 ProxyMgr::recvProxyData(ProxyDataMsg *msg) {
01581 //Chao Mei: hack for QD in case of SMP with immediate msg
01582 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01583     if(proxySendSpanning && msg->isFromImmMsgCall){
01584 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01585 //    fflush(stdout);
01586         //To compensate for the counter loss for message creation
01587         //inside the process of immediate message on comm thread
01588         CkpvAccess(_qd)->create();
01589     }
01590 #endif
01591   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01592   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms()
01593 }
01594 
01595 void
01596 ProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {
01597   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);  
01598   if (proxySendSpanning == 1) {
01599     // copy the message and send to spanning children
01600     //int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01601     //int npid = proxy->getSpanningTreeChild(pids);
01602     int npid = proxy->getSpanningTreeNChild();
01603     int *pids = (int *)proxy->getSpanningTreeChildPtr();
01604     if (npid) {        
01605         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);     
01606 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01607         int ntreephs;
01608         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
01609         CmiAssert(treephs && ntreephs == npid);
01610         CmiUsePersistentHandle(treephs, ntreephs);
01611 #endif
01612         ProxyMgr::Object()->sendProxyData(newmsg,npid,pids);
01613 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01614         CmiUsePersistentHandle(NULL, 0);
01615 #endif
01616       #if 0
01617       //ChaoMei: buggy code??? the spanning tree doesn't always have 2 levels
01618       //At the second level of the tree immediate messages are not needed
01619       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01620       cp.recvProxyData(newmsg,npid,pids);
01621       #endif
01622     }
01623   }
01624   /* send to self via EP method to preserve priority */
01625   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01626   cp[CkMyPe()].recvProxyData(msg);
01627 }
01628 
01629 void NodeProxyMgr::recvImmediateProxyData(ProxyDataMsg *msg) {    
01630 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01631     CProxy_ProxyMgr cp(localProxyMgr);
01632     proxyTreeNode *ptn = proxyInfo[msg->patch];
01633     CmiAssert(ptn->numPes!=0);
01634 
01635     //re-send msg to this nodes's children nodes.
01636     //only the first pe of a node of node-aware ST should contain children nodes
01637     int rank = CkRankOf(ptn->peIDs[0]);
01638     PatchMap *pmap = localPatchMaps[rank];
01639     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01640 
01641     int npid = ppatch->getSTNNodeChild();
01642     int *pids = ppatch->getSTNodeChildPtr();
01643     if(npid>0) {        
01644         //only needs to send to other nodes, so check the last entry of pids.
01645         //This is because the data for proxies on the same node have been sent
01646         //later in this function by NodeProxyMgr.
01647         if(pids[npid-1]==CkMyNode()) npid--;
01648     }    
01649     CProxy_NodeProxyMgr cnp(thisgroup);
01650 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01651     if (npid) {
01652         int ntreephs;
01653         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
01654         CmiAssert(treephs && ntreephs >= npid);
01655         CmiUsePersistentHandle(treephs, ntreephs);
01656     }
01657 #endif
01658     for(int i=0; i<npid; i++) {
01659         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01660         cnp[pids[i]].recvImmediateProxyData(copymsg);
01661     }    
01662 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01663     CmiUsePersistentHandle(NULL, 0);
01664 #endif
01665 
01666     //re-send msg to it's internal cores
01667 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01668     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01669 #endif
01670     cp.recvProxyData(msg, ptn->numPes, ptn->peIDs);
01671 #else
01672     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01673 #endif
01674 }
01675 
01676 void
01677 ProxyMgr::sendProxyAll(ProxyDataMsg *msg, int pcnt, int *pids) {
01678 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01679     if(proxySendSpanning == 1) {
01680         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
01681         for(int i=0; i<pcnt-1; i++) {
01682             ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01683             cnp[pids[i]].recvImmediateProxyAll(copymsg);
01684         }
01685         cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
01686         return;
01687     }
01688 #endif
01689   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01690   cp.recvImmediateProxyAll(msg,pcnt,pids);
01691 }
01692 
01693 void 
01694 ProxyMgr::recvProxyAll(ProxyDataMsg *msg) {
01695 //Chao Mei: hack for QD in case of SMP with immediate msg
01696 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
01697     if(proxySendSpanning && msg->isFromImmMsgCall){
01698 //    CkPrintf("qdcreate called on pe[%d]\n", CkMyPe());
01699 //    fflush(stdout);
01700         //To compensate for the counter loss for message creation
01701         //inside the process of immediate message on comm thread
01702         CkpvAccess(_qd)->create();
01703     }
01704 #endif
01705 
01706   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01707   proxy->receiveAll(msg); // deleted in ProxyPatch::receiveAtoms()
01708 }
01709 
01710 void
01711 ProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {
01712   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01713   #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01714   DebugFileTrace *dft = DebugFileTrace::Object();
01715   dft->openTrace();
01716   dft->writeTrace("PMgr::recvImmPAll for patch[%d]\n", msg->patch);
01717   CmiAssert(proxy!=NULL);
01718   dft->writeTrace("PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->patch);
01719   dft->closeTrace();
01720   #endif
01721   if (proxySendSpanning == 1) {
01722     // copy the message and send to spanning children
01723     //int *pids = (int*)alloca(proxy->getSpanningTreeNChild()*sizeof(int));
01724     //int npid = proxy->getSpanningTreeChild(pids);
01725     int npid = proxy->getSpanningTreeNChild();
01726     int *pids = (int *)proxy->getSpanningTreeChildPtr();
01727     if (npid) {
01728         ProxyDataMsg *newmsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);      
01729 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01730         int ntreephs;
01731         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
01732         CmiAssert(treephs && ntreephs == npid);
01733         CmiUsePersistentHandle(treephs, ntreephs);
01734 #endif
01735         ProxyMgr::Object()->sendProxyAll(newmsg,npid,pids);
01736 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01737         CmiUsePersistentHandle(NULL, 0);
01738 #endif
01739     }
01740   }
01741   /* send to self via EP method to preserve priority */
01742   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01743   cp[CkMyPe()].recvProxyAll(msg);
01744 }
01745 
01746 void NodeProxyMgr::recvImmediateProxyAll(ProxyDataMsg *msg) {    
01747 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
01748     CProxy_ProxyMgr cp(localProxyMgr);
01749     proxyTreeNode *ptn = proxyInfo[msg->patch];
01750     CmiAssert(ptn->numPes!=0);
01751     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01752     //This could be executed on comm thd.
01753     printf("NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->patch, CkMyNode(), CkMyRank());
01754     for(int i=0; i<ptn->numPes; i++) {
01755         printf("%d, ", ptn->peIDs[i]);
01756     }
01757     printf("\n");
01758     fflush(stdout);
01759     #endif
01760 
01761     //re-send msg to this nodes's children nodes.
01762     //only the first pe of a node of node-aware ST should contain children nodes
01763     int rank = CkRankOf(ptn->peIDs[0]);
01764     PatchMap *pmap = localPatchMaps[rank];
01765     ProxyPatch *ppatch = (ProxyPatch *)pmap->patch(msg->patch);
01766 
01767     int npid = ppatch->getSTNNodeChild();
01768     int *pids = ppatch->getSTNodeChildPtr();
01769     if(npid>0) {        
01770         //only needs to send to other nodes, so check the last entry of pids.
01771         //This is because the data for proxies on the same node have been sent
01772         //later in this function by NodeProxyMgr.
01773         if(pids[npid-1]==CkMyNode()) npid--;
01774     }
01775     
01776 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01777     if (npid) {
01778         int ntreephs;
01779         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
01780         CmiAssert(treephs && ntreephs >= npid);
01781         CmiUsePersistentHandle(treephs, ntreephs);
01782     }
01783 #endif
01784     CProxy_NodeProxyMgr cnp(thisgroup);
01785     for(int i=0; i<npid; i++) {
01786         ProxyDataMsg *copymsg = (ProxyDataMsg *)CkCopyMsg((void **)&msg);
01787         cnp[pids[i]].recvImmediateProxyAll(copymsg);
01788     }    
01789 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
01790     CmiUsePersistentHandle(NULL, 0);
01791 #endif
01792 
01793     //re-send msg to it's internal cores
01794 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)
01795     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
01796 #endif
01797     cp.recvProxyAll(msg, ptn->numPes, ptn->peIDs);
01798 #else
01799     CkAbort("Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
01800 #endif
01801 }
01802 
01803 void ProxyMgr::printProxySpanningTree(){
01804 #ifdef NODEAWARE_PROXY_SPANNINGTREE
01805     int numPatches = PatchMap::Object()->numPatches();
01806     for(int i=0; i<numPatches; i++) {
01807         proxyTreeNodeList &oneList = ptree.naTrees[i];
01808         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01809         if(ptree.proxylist[i].size()==0) continue;
01810         printf("===%d=== pes/node: ", i);
01811         for(int j=0; j<oneList.size(); j++) {
01812             printf("%d ", oneList.item(j).numPes);
01813         }
01814         printf("\n");
01815         printf("===%d=== pe ids: ", i);
01816         for(int j=0; j<oneList.size(); j++) {
01817             for(int k=0; k<oneList.item(j).numPes; k++) {
01818                 printf("%d ", oneList.item(j).peIDs[k]);
01819             }            
01820         }
01821         printf("\n");
01822     }    
01823     fflush(stdout);  
01824 #else
01825     int numPatches = PatchMap::Object()->numPatches();
01826     for(int i=0; i<numPatches; i++) {
01827         NodeIDList &oneList = ptree.trees[i];
01828         printf("ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.size()); 
01829         if(ptree.proxylist[i].size()==0) continue;        
01830         printf("===%d=== pe ids: ", i);
01831         for(int j=0; j<oneList.size(); j++) {            
01832             printf("%d ", oneList.item(j));            
01833         }
01834         printf("\n");
01835     }    
01836     fflush(stdout);  
01837 #endif
01838 }
01839 
01840 void NodeProxyMgr::registerPatch(int patchID, int numPes, int *pes){
01841     if(proxyInfo[patchID]) {
01842         delete proxyInfo[patchID];
01843     }
01844     if(numPes == 0) {
01845         proxyInfo[patchID] = NULL;
01846     }else{
01847         proxyInfo[patchID] = new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
01848     }
01849 }
01850 
01851 void ProxyMgr::sendResult(ProxyGBISP1ResultMsg *msg) { //pp -r> hp
01852   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01853   NodeID node = PatchMap::Object()->node(msg->patch);
01854   CmiEnableUrgentSend(1);
01855   cp[node].recvResult(msg);
01856   CmiEnableUrgentSend(0);
01857 }
01858 void ProxyMgr::recvResult(ProxyGBISP1ResultMsg *msg) { //pp -r> hp
01859   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
01860   homePatch->receiveResult(msg); // message deleted in registerProxy()
01861 }
01862 void ProxyMgr::recvData(  ProxyGBISP2DataMsg *msg) {  //hp -d> pp
01863   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01864   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms() ?
01865 }
01866 void ProxyMgr::sendResult(ProxyGBISP2ResultMsg *msg) { //pp -r> hp
01867   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
01868   NodeID node = PatchMap::Object()->node(msg->patch);
01869   CmiEnableUrgentSend(1);
01870   cp[node].recvResult(msg);
01871   CmiEnableUrgentSend(0);
01872 }
01873 void ProxyMgr::recvResult(ProxyGBISP2ResultMsg *msg) { //pp -r> hp
01874   HomePatch *homePatch = PatchMap::Object()->homePatch(msg->patch);
01875   homePatch->receiveResult(msg); // message deleted in registerProxy()
01876 }
01877 void ProxyMgr::recvData(  ProxyGBISP3DataMsg *msg) {   //hp -d> pp
01878   ProxyPatch *proxy = (ProxyPatch *) PatchMap::Object()->patch(msg->patch);
01879   proxy->receiveData(msg); // deleted in ProxyPatch::receiveAtoms() ?
01880 }
01881 
01882 PatchProxyListMsg *PatchProxyListMsg::createPatchProxyListMsg(PatchProxyListMsg **bufs, int bufSize, ProxyListInfo *info, int size){
01883         //1. compute the total patches this node manages, and the total length of all proxy lists
01884         int totalPatches = 0;
01885         int totalProxies = 0;
01886         for(int i=0; i<bufSize; i++) {
01887                 PatchProxyListMsg *one = bufs[i];
01888                 totalPatches += one->numPatches;
01889                 for(int j=0; j<one->numPatches; j++) totalProxies += one->proxyListLen[j];
01890         }
01891         totalPatches += size;
01892         for(int i=0; i<size; i++) {
01893                 totalProxies += info[i].numProxies;
01894         }
01895 
01896         PatchProxyListMsg *msg = new(totalPatches, totalPatches, totalProxies, 0)PatchProxyListMsg(totalPatches);
01897         int msgPatchIdx = 0;
01898         int msgProxyPeIdx = 0;
01899         for(int i=0; i<bufSize; i++) {
01900                 PatchProxyListMsg *one = bufs[i];
01901                 int curPeIdx = 0;
01902                 for(int j=0; j<one->numPatches; j++) {
01903                         msg->patchIDs[msgPatchIdx] = one->patchIDs[j];
01904                         int curListLen = one->proxyListLen[j];
01905                         msg->proxyListLen[msgPatchIdx++] = curListLen;
01906                         memcpy(msg->proxyPEs+msgProxyPeIdx, one->proxyPEs+curPeIdx, sizeof(int)*curListLen);
01907                         curPeIdx += curListLen;
01908                         msgProxyPeIdx += curListLen;
01909                 }
01910         }
01911         for(int i=0; i<size; i++) {
01912                 msg->patchIDs[msgPatchIdx] = info[i].patchID;
01913                 int curListLen = info[i].numProxies;
01914                 msg->proxyListLen[msgPatchIdx++] = curListLen;
01915                 memcpy(msg->proxyPEs+msgProxyPeIdx, info[i].proxyList, sizeof(int)*curListLen);
01916                 msgProxyPeIdx += curListLen;
01917         }
01918         return msg;
01919 }
01920 
01921 #define HOMEPATCH_TREE_BRFACTOR 2
01922 void NodeProxyMgr::createSTForHomePatches(PatchMap *pmap){
01923         //We use implicit tree construction for all home patches
01924         std::vector<int> nodesWithPatches; //record the id of node that has home patches
01925         int myNodeIdx = -1; //the index into the above vector of this node
01926         for(int nodeId=0; nodeId<CkNumNodes(); ++nodeId) {
01927                 int hpCnt = 0;
01928                 int firstPe = CkNodeFirst(nodeId);
01929                 int endPe = firstPe + CkNodeSize(nodeId);
01930                 for(int pe=firstPe; pe < endPe; ++pe) {
01931                         hpCnt += pmap->numPatchesOnNode(pe);
01932                 }
01933                 if(hpCnt==0) continue;
01934 
01935                 nodesWithPatches.push_back(nodeId);
01936                 if(CkMyNode() == nodeId) {
01937                         //on my node
01938                         myNodeIdx = nodesWithPatches.size()-1;
01939                         numHomePatches = hpCnt;
01940                         homepatchRecved = 0;
01941                         localProxyLists = new ProxyListInfo[hpCnt];
01942                         memset(localProxyLists, 0, sizeof(ProxyListInfo)*hpCnt);                        
01943                 }
01944         }
01945 
01946         if(myNodeIdx==-1){
01947                 //there's no home patches on this node
01948                 //just set to a value that doesn't make sense in spanning tree.
01949                 parentNode = -2; 
01950                 numKidNodes = 0;
01951                 kidRecved = 0;
01952                 return;
01953         }
01954         
01955         //calculate parent
01956         if(myNodeIdx == 0) {
01957                 parentNode = -1;
01958         }else{
01959                 int parentIdx = (myNodeIdx-1)/HOMEPATCH_TREE_BRFACTOR;
01960                 parentNode = nodesWithPatches[parentIdx];
01961         }
01962 
01963         //calculate kids
01964         numKidNodes = 0;
01965         int totalNodes = nodesWithPatches.size();
01966         for(int i=1; i<=HOMEPATCH_TREE_BRFACTOR; i++) {
01967                 int kidId = myNodeIdx*HOMEPATCH_TREE_BRFACTOR+i;
01968                 if(kidId >= totalNodes) break;
01969                 numKidNodes++;
01970         }
01971         if(numKidNodes!=0) {
01972                 remoteProxyLists = new PatchProxyListMsg *[numKidNodes];
01973         }
01974         kidRecved = 0;
01975 
01976         //CkPrintf("Node[%d] has %d homepatches with parent=%d and %d kids \n", CkMyNode(), numHomePatches, parentNode, numKidNodes);
01977 }
01978 
01979 void NodeProxyMgr::sendProxyList(int pid, int *plist, int size){
01980         int insertIdx; //indexed from 0
01981         CmiLock(localDepositLock);
01982         insertIdx = homepatchRecved++; //ensure the atomic increment
01983 
01984         localProxyLists[insertIdx].patchID = pid;
01985         localProxyLists[insertIdx].numProxies = size;
01986         localProxyLists[insertIdx].proxyList = plist;
01987 
01988         if(insertIdx == (numHomePatches-1)) {
01989                 //all local home patches have contributed
01990                 contributeToParent();
01991         }
01992         CmiUnlock(localDepositLock);
01993 }
01994 
01995 void NodeProxyMgr::sendProxyListInfo(PatchProxyListMsg *msg){
01996         int insertIdx; //indexed from 0
01997         CmiLock(localDepositLock);
01998         insertIdx = kidRecved++;
01999         
02000         remoteProxyLists[insertIdx] = msg;
02001         if(insertIdx == (numKidNodes-1)) {
02002                 //all kids have contributed;
02003                 contributeToParent();
02004         }
02005         CmiUnlock(localDepositLock);
02006 }
02007 
02008 void NodeProxyMgr::contributeToParent(){
02009         if(homepatchRecved!=numHomePatches || kidRecved != numKidNodes) return;
02010 
02011         homepatchRecved = 0;
02012         kidRecved = 0;
02013         //construct the msg
02014         PatchProxyListMsg *msg = PatchProxyListMsg::createPatchProxyListMsg(remoteProxyLists, numKidNodes, localProxyLists, numHomePatches);
02015         if(parentNode == -1) {
02016                 //send to proxy mgr on PE[0] as this is the root node
02017                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
02018                 cp[0].recvPatchProxyInfo(msg);
02019         }else{
02020                 CProxy_NodeProxyMgr cnp(thisgroup);
02021                 cnp[parentNode].sendProxyListInfo(msg);
02022         }
02023         for(int i=0; i<numKidNodes; i++) {
02024                 delete remoteProxyLists[i];
02025         }
02026 }
02027 
02028 #include "ProxyMgr.def.h"
02029 

Generated on Tue Nov 21 01:17:14 2017 for NAMD by  doxygen 1.4.7