NamdHybridLB.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /home/cvs/namd/cvsroot/namd2/src/NamdHybridLB.C,v $
00003  * $Author: jim $
00004  * $Date: 2017/03/30 20:06:17 $
00005  * $Revision: 1.40 $
00006  *****************************************************************************/
00007 
00008 #if !defined(WIN32) || defined(__CYGWIN__)
00009 #include <unistd.h>
00010 #endif
00011 #include <fcntl.h>
00012 
00013 #include "InfoStream.h"
00014 #include "NamdHybridLB.h"
00015 #include "Node.h"
00016 #include "PatchMap.h"
00017 #include "ComputeMap.h"
00018 #include "LdbCoordinator.h"
00019 
00020 class SplitComputesMsg : public CMessage_SplitComputesMsg {
00021 public:
00022   double maxUnsplit;
00023   double averageLoad;
00024   double avgCompute;
00025   double maxCompute;
00026   int maxComputeId;
00027   int nMoveableComputes;
00028   int numPesAvailable;
00029   int n;
00030   int *cid;
00031   float *load;
00032 };
00033 
00034 #include "NamdHybridLB.def.h"
00035 
00036 // #define DUMP_LDBDATA 1
00037 // #define LOAD_LDBDATA 1
00038 
00039 extern int isPmeProcessor(int); 
00040 #ifdef MEM_OPT_VERSION
00041 extern int isOutputProcessor(int); 
00042 #endif
00043 // Load array defined in NamdCentLB.C
00044 extern double *cpuloads;
00045 
00049 void CreateNamdHybridLB() {
00050         CProxy_NamdHybridLB::ckNew();
00051 
00052         // creating an array to store the loads of all processors
00053         // to be used with proxy spanning tree
00054         if (CkMyPe() == 0 && cpuloads == NULL) {
00055                 cpuloads = new double[CkNumPes()];
00056                 CmiAssert(cpuloads != NULL);
00057                 for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0;
00058         }
00059 }
00060 
00064 NamdHybridLB::NamdHybridLB(): HybridBaseLB(CkLBOptions(-1))
00065 {
00066   // setting the name
00067   lbname = (char *)"NamdHybridLB";
00068 
00069   delete tree;        // delete the tree built from the base class
00070   const SimParameters* simParams = Node::Object()->simParameters;
00071   if (CkNumPes() <= simParams->hybridGroupSize)  {
00072     tree = new TwoLevelTree;   // similar to centralized load balancing
00073   }
00074   else {
00075     tree = new ThreeLevelTree(simParams->hybridGroupSize);
00076     initTree();
00077     // can only do shrink strategy on levels > 1
00078     statsStrategy = SHRINK_NULL;
00079   }
00080 
00081   // initializing thisProxy
00082   thisProxy = CProxy_NamdHybridLB(thisgroup);
00083   
00084   // initializing the central LB
00085   centralLB = AllocateNamdCentLB();
00086 
00087   // initializing the dummy LB
00088   dummyLB = AllocateNamdDummyLB();
00089 
00090   // assigning initial values to variables
00091   from_procs = NULL;
00092   computeArray = NULL;
00093   patchArray = NULL;
00094   processorArray = NULL;
00095   updateCount = 0;
00096   splitCount = 0;
00097   splitComputesMsgs = 0;
00098   updateFlag = false;
00099   collectFlag = false;
00100 
00101 }
00102 
00108 bool NamdHybridLB::QueryBalanceNow(int _step){ 
00109   if ( LdbCoordinator::Object()->takingLdbData ) {
00110           return true;
00111   } else {
00112           return false;
00113   } 
00114 }
00115 
00116 bool NamdHybridLB::QueryDumpData() {
00117 #if 0                                                                                             
00118   if (LdbCoordinator::Object()->ldbCycleNum == 1)  return true;                                
00119   if (LdbCoordinator::Object()->ldbCycleNum == 2)  return true;                                
00120 #endif                                                                                            
00121   return false;                                                                                
00122 }
00123 
00124 #if 0
00125 
00130 LBVectorMigrateMsg* NamdHybridLB::VectorStrategy(LDStats* stats){
00131   CkPrintf("[%d] Using Vector Strategy to balance the load\n",CkMyPe());
00132   LBVectorMigrateMsg* msg = new(0,0) LBVectorMigrateMsg;
00133   msg->n_moves = 0;
00134   msg->level = currentLevel;
00135   return msg;
00136 }
00137 #endif
00138 
00139 /*
00140  * Runs the load balancing strategy
00141  */
00142 CLBMigrateMsg* NamdHybridLB::Strategy(LDStats* stats)
00143 {
00144         int i;
00145         // CkPrintf("[%d] NamdHybridLB at Strategy\n",CkMyPe());
00146         
00147         // calling the centralLB for level 1            
00148         if(currentLevel == 1){
00149                 LevelData *lData = levelData[currentLevel];
00150                 CLBMigrateMsg *msg;
00151                 LocalLBInfoMsg *newMsg;
00152                 msg = GrpLevelStrategy(stats);
00153 
00154                 // creating a new message to send to its parent
00155                 newMsg = new(msg->n_moves,endPE-startPE+1) LocalLBInfoMsg;
00156                 newMsg->n_moves = msg->n_moves;
00157                 newMsg->startPE = startPE;
00158                 newMsg->endPE = endPE;
00159                 for(i=0; i<msg->n_moves; i++){
00160                         newMsg->moves[i] = msg->moves[i];
00161                 }
00162                 for(i=0; i<endPE-startPE+1; i++){
00163                         newMsg->cpuloads[i] = peLoads[i];
00164                 }
00165                 delete [] peLoads;
00166                 thisProxy[0].UpdateLocalLBInfo(newMsg);
00167                 return msg;
00168         }else{
00169                 dummyLB->work(stats);
00170                 return createMigrateMsg(stats);
00171         }
00172 }
00173 
00177 void NamdHybridLB::UpdateLocalLBInfo(LocalLBInfoMsg *msg){
00178         int children;
00179         int i;
00180 
00181         // getting the number of children
00182         children = tree->numNodes(currentLevel);
00183         // CkPrintf("[%d] Updating compute map, total %d\n",CkMyPe(),siblings);
00184 
00185         // getting the compute map to insert the changes coming from the children
00186         ComputeMap *computeMap = ComputeMap::Object();
00187 
00188         // traversing the set of moves in msg
00189         for(i=0; i<msg->n_moves; i++){
00190             if (msg->moves[i].to_pe != -1)
00191                 computeMap->setNewNode(msg->moves[i].obj.id.id[0],msg->moves[i].to_pe); 
00192         }
00193 
00194         // CODING
00195         // updating cpuloads array
00196         for(i=msg->startPE; i<=msg->endPE; i++){
00197                 cpuloads[i] = msg->cpuloads[i-msg->startPE];
00198         }
00199 
00200         // checking if all children have sent the update
00201         updateCount++;
00202         if(updateCount == children){
00203                 updateCount = 0;
00204                 updateFlag = true;
00205                  // CkPrintf("[%d] UPDATE READY\n",CkMyPe());           
00206         }
00207 
00208         // checking if the collect info is ready
00209         if(updateFlag && collectFlag){
00210                 updateFlag = false;
00211                 collectFlag = false;    
00212                 thisProxy[parent_backup].CollectInfo(loc_backup, n_backup, fromlevel_backup);
00213         }
00214 
00215         delete msg;
00216 }
00217 
00218 void NamdHybridLB::splitComputes(SplitComputesMsg *msg) {
00219   const int children = tree->numNodes(1);
00220 
00221   if ( ! splitComputesMsgs ) {
00222     splitComputesMsgs = new SplitComputesMsg*[children];
00223   }
00224   
00225   splitComputesMsgs[splitCount] = msg;
00226 
00227   if ( ++splitCount == children ) {
00228     splitCount = 0;
00229 
00230     const SimParameters* simParams = Node::Object()->simParameters;
00231     ComputeMap *computeMap = ComputeMap::Object();
00232 
00233     double maxUnsplit = 0.;
00234     double averageLoad = 0.;
00235     double avgCompute = 0.;
00236     double maxCompute = 0.;
00237     int maxComputeId = -1;
00238     int nMoveableComputes = 0;
00239     int numPesAvailable = 0;
00240     int nToSplit = 0;
00241 
00242     for ( int j=0; j < children; ++j ) {
00243       SplitComputesMsg *msg = splitComputesMsgs[j];
00244       if ( msg->maxUnsplit > maxUnsplit ) { maxUnsplit = msg->maxUnsplit; }
00245       if ( msg->maxCompute > maxCompute ) { maxCompute = msg->maxCompute; maxComputeId = msg->maxComputeId; }
00246       averageLoad += msg->averageLoad * msg->numPesAvailable;
00247       numPesAvailable += msg->numPesAvailable;
00248       avgCompute += msg->avgCompute * msg->nMoveableComputes;
00249       nMoveableComputes += msg->nMoveableComputes;
00250       nToSplit += msg->n;
00251     }
00252     
00253     averageLoad /= numPesAvailable;
00254     if ( nMoveableComputes ) avgCompute /= nMoveableComputes; else avgCompute = 0.;
00255 
00256     CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
00257             maxComputeId, maxCompute, 100. * maxCompute / averageLoad, averageLoad);
00258     CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
00259             avgCompute, 100. * avgCompute / averageLoad, averageLoad);
00260 
00261     if ( ! nToSplit ) {
00262       for ( int j=0; j < children; ++j ) {
00263         delete splitComputesMsgs[j];
00264       }
00265     } else {
00266       // partitions are stored as char but mostly limited by
00267       // high load noise at low outer-loop iteration counts
00268       int maxParts = 10;
00269 #ifdef NAMD_CUDA
00270 //split LCPO compute very small, else CUDA compute is delayed
00271       if (simParams->LCPOOn) {
00272         maxParts = 20;
00273       }
00274 #endif
00275       int totalAddedParts = 0;
00276       maxCompute = averageLoad / 10.;
00277       if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
00278       if ( simParams->ldbRelativeGrainsize > 0. ) {
00279         maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
00280       }
00281       CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
00282 
00283       for ( int j=0; j < children; ++j ) {
00284         SplitComputesMsg *msg = splitComputesMsgs[j];
00285         for (int i=0; i < msg->n; ++i) {
00286           int nparts = (int) ceil(msg->load[i] / maxCompute);
00287           if ( nparts > maxParts ) nparts = maxParts;
00288           if ( nparts < 1 ) nparts = 1;
00289           computeMap->setNewNumPartitions(msg->cid[i],nparts);
00290           totalAddedParts += nparts - 1;
00291         }
00292         delete msg;
00293       }
00294 
00295       CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
00296               nMoveableComputes,nMoveableComputes+totalAddedParts);
00297       CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
00298     }
00299   }
00300 }
00301 
00302 
00307 CLBMigrateMsg* NamdHybridLB::GrpLevelStrategy(LDStats* stats) {
00308   int numProcessors = stats->nprocs();  // number of processors at group level
00309   int numPatches = PatchMap::Object()->numPatches();
00310   ComputeMap *computeMap = ComputeMap::Object();
00311   const int numComputes = computeMap->numComputes();
00312   const int numGroupComputes = stats->n_migrateobjs;
00313   const SimParameters* simParams = Node::Object()->simParameters;
00314 
00315   if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00316   // these data structures are global and need to be distributed
00317   if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00318   if ( ! computeArray ) computeArray = new computeInfo[numGroupComputes];
00319   if ( ! from_procs ) from_procs = new int[numGroupComputes];
00320 
00321   int nMoveableComputes = buildData(stats);
00322   CmiAssert(nMoveableComputes <= numGroupComputes);
00323 
00324 
00325 #if LDB_DEBUG
00326 #define DUMP_LDBDATA 1
00327 #define LOAD_LDBDATA 1
00328 #endif
00329 
00330 #if DUMP_LDBDATA 
00331   dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00332 #elif LOAD_LDBDATA
00333   loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00334   // CkExit();
00335 #endif
00336 
00337   double averageLoad = 0.;
00338   double avgCompute;
00339   double maxCompute;
00340   int maxComputeId;
00341   int numPesAvailable;
00342   {
00343    int i;
00344    double total = 0.;
00345    maxCompute = 0.;
00346    int maxi = 0;
00347    for (i=0; i<nMoveableComputes; i++) {
00348       double load = computeArray[i].load;
00349       total += load;
00350       if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
00351    }
00352    avgCompute = total / nMoveableComputes;
00353    maxComputeId = computeArray[maxi].handle.id.id[0];
00354 
00355     int P = stats->nprocs();
00356    numPesAvailable = 0;
00357    for (i=0; i<P; i++) {
00358       if (processorArray[i].available) {
00359         ++numPesAvailable;
00360         total += processorArray[i].backgroundLoad;
00361       }
00362    }
00363    if (numPesAvailable == 0)
00364      NAMD_die("No processors available for load balancing!\n");
00365 
00366    averageLoad = total/numPesAvailable;
00367   }
00368 
00369   int i_split = 0;
00370   double maxUnsplit = 0.;
00371 
00372   if ( step() == 1 ) {
00373     for (int i=0; i<nMoveableComputes; i++) {
00374       const int cid = computeArray[i].handle.id.id[0];
00375       if ( computeMap->numPartitions(cid) == 0 ) {
00376         const double load = computeArray[i].load;
00377         if ( load > maxUnsplit ) maxUnsplit = load;
00378         continue;
00379       }
00380       ++i_split;
00381     }
00382   }
00383 
00384   {
00385     SplitComputesMsg *msg = new(i_split,i_split) SplitComputesMsg;
00386     msg->maxUnsplit = maxUnsplit;
00387     msg->averageLoad = averageLoad;
00388     msg->avgCompute = avgCompute;
00389     msg->maxCompute = maxCompute;
00390     msg->maxComputeId = maxComputeId;
00391     msg->nMoveableComputes = nMoveableComputes;
00392     msg->numPesAvailable = numPesAvailable;
00393     msg->n = i_split;
00394 
00395     if ( step() == 1 ) {
00396       i_split = 0;
00397       for (int i=0; i<nMoveableComputes; i++) {
00398         computeArray[i].processor = computeArray[i].oldProcessor;
00399         const int cid = computeArray[i].handle.id.id[0];
00400         if ( computeMap->numPartitions(cid) == 0 ) {
00401           continue;
00402         }
00403         msg->cid[i_split] = cid;
00404         msg->load[i_split] = computeArray[i].load;
00405         ++i_split;
00406       }
00407     }
00408 
00409     thisProxy[0].splitComputes(msg);
00410   }
00411 
00412   if ( step() == 1 ) {
00413     // compute splitting only
00414   } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
00415     if (step() < 4)
00416       TorusLB(computeArray, patchArray, processorArray,
00417                   nMoveableComputes, numPatches, numProcessors);
00418     else
00419       RefineTorusLB(computeArray, patchArray, processorArray,
00420                   nMoveableComputes, numPatches, numProcessors, 1);
00421   } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
00422     TorusLB(computeArray, patchArray, processorArray,
00423                   nMoveableComputes, numPatches, numProcessors);
00424   } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00425     RefineTorusLB(computeArray, patchArray, processorArray,
00426                   nMoveableComputes, numPatches, numProcessors, 1);
00427   } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
00428     NAMD_die("Old load balancer strategy is not compatible with hybrid balancer.");
00429     if (step() < 4)
00430       Alg7(computeArray, patchArray, processorArray,
00431                   nMoveableComputes, numPatches, numProcessors);
00432     else
00433       RefineOnly(computeArray, patchArray, processorArray,
00434                   nMoveableComputes, numPatches, numProcessors);
00435   }
00436 
00437 #if LDB_DEBUG && USE_TOPOMAP
00438   TopoManager tmgr;
00439   int pe1, pe2, pe3, hops=0;
00440   /* This is double counting the hops
00441   for(int i=0; i<nMoveableComputes; i++)
00442   {
00443     pe1 = computeArray[i].processor;
00444     pe2 = patchArray[computeArray[i].patch1].processor;
00445     pe3 = patchArray[computeArray[i].patch2].processor;
00446     hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00447     if(computeArray[i].patch1 != computeArray[i].patch2)
00448       hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
00449   }*/
00450   for (int i=0; i<numPatches; i++)  {
00451     //int num = patchArray[i].proxiesOn.numElements();
00452     pe1 = patchArray[i].processor;
00453     Iterator nextProc;
00454     processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00455     while (p) {
00456       pe2 = p->Id;
00457       hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00458       p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00459     }
00460   }
00461   CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00462 #endif
00463 
00464 #if DUMP_LDBDATA
00465   dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00466 #elif LOAD_LDBDATA
00467   dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00468   // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00469   // CkExit();
00470 #endif
00471 
00472   // For error checking:
00473   // Count up computes, to see if somebody doesn't have any computes
00474   int i;
00475 #if 0
00476   int* computeCount = new int[numProcessors];
00477   for(i=0; i<numProcessors; i++)
00478     computeCount[i]=0;
00479   for(i=0; i<nMoveableComputes; i++)
00480     computeCount[computeArray[i].processor]++;
00481   for(i=0; i<numProcessors; i++) {
00482     if (computeCount[i]==0)
00483       iout << iINFO <<"Warning: Processor " << i 
00484            << " has NO moveable computes.\n" << endi;
00485   }
00486   delete [] computeCount;
00487 #endif
00488   
00489   CkVec<MigrateInfo *> migrateInfo;
00490   for(i=0;i<nMoveableComputes;i++) {
00491     if (computeArray[i].processor != from_procs[i]+stats->procs[0].pe) {
00492       /* CkPrintf("[%d] Obj %d migrating from %d (%d) to %d\n",
00493                      CkMyPe(),computeArray[i].handle.id.id[0],
00494                          from_procs[i], computeArray[i].oldProcessor, computeArray[i].processor); */
00495       MigrateInfo *migrateMe = new MigrateInfo;
00496       migrateMe->obj = computeArray[i].handle;
00497       //migrateMe->from_pe = computeArray[i].oldProcessor;
00498       int frompe = from_procs[i];
00499       if (frompe == numProcessors)
00500         frompe = -1;
00501       else
00502         frompe = frompe + stats->procs[0].pe;
00503       migrateMe->from_pe = frompe;
00504       migrateMe->to_pe = computeArray[i].processor;
00505       if (frompe == -1) {
00506           // don't know yet which processor this compute belongs to, but
00507           // inform receiver
00508         LDObjData obj;
00509         obj.handle = computeArray[i].handle;
00510         thisProxy[computeArray[i].processor].ObjMigrated(obj, NULL, 0, currentLevel-1);
00511       } 
00512       migrateInfo.insertAtEnd(migrateMe);
00513 
00514       // sneak in updates to ComputeMap
00515       //ERASE CkPrintf("%d setting %d to processor %d\n",CkMyPe(),computeArray[i].handle.id.id[0],computeArray[i].processor);
00516       computeMap->setNewNode(computeArray[i].handle.id.id[0],
00517                                 computeArray[i].processor);
00518     }
00519   }
00520   // CkPrintf("LOAD BALANCING READY %d\n",CkMyPe()); 
00521 
00522   LBMigrateMsg* msg;
00523   msg = createMigrateMsg(migrateInfo, numProcessors);
00524 
00525   peLoads = new double [numProcessors]; 
00526   startPE = processorArray[0].Id;
00527   endPE = processorArray[numProcessors-1].Id;
00528   // CkPrintf("[%d] numProcessors=%d, %d to %d\n",CkMyPe(),numProcessors,processorArray[0].Id,processorArray[numProcessors-1].Id);
00529   for (i=0; i<numProcessors; i++) {
00530         peLoads[i] = processorArray[i].load;
00531   }
00532 
00533 
00534   delete [] from_procs;
00535   delete [] processorArray;
00536   delete [] patchArray;
00537   delete [] computeArray;
00538 
00539   from_procs = NULL;
00540   processorArray = NULL;
00541   patchArray = NULL;
00542   computeArray = NULL;
00543   
00544   return msg;
00545 
00546 }
00547 
00548 void NamdHybridLB::dumpDataASCII(char *file, int numProcessors,
00549                                int numPatches, int numComputes)
00550 {
00551   char filename[128];
00552   sprintf(filename, "%s_%d.%d", file, CkMyPe(), step());
00553   FILE* fp = fopen(filename,"w");
00554   if (fp == NULL){
00555      perror("dumpLDStatsASCII");
00556      return;
00557   }
00558   // CkPrintf("***** DUMP data to file: %s ***** \n", filename);
00559   fprintf(fp,"%d %d %d\n",numProcessors,numPatches,numComputes);
00560 
00561   int i;
00562   for(i=0;i<numProcessors;i++) {
00563     processorInfo* p = processorArray + i;
00564     fprintf(fp,"%d %e %e %e %e\n",p->Id,p->load,p->backgroundLoad,p->computeLoad,p->idleTime);
00565   }
00566 
00567   for(i=0;i < numPatches; i++) {
00568     patchInfo* p = patchArray + i;
00569     fprintf(fp,"%d %e %d %d\n",p->Id,p->load,p->processor,p->numAtoms);
00570   }
00571 
00572   for(i=0; i < numComputes; i++) {
00573     computeInfo* c = computeArray + i;
00574     fprintf(fp,"%d %e %d %d %d %d",c->Id,c->load,c->patch1,c->patch2,
00575             c->processor,c->oldProcessor);
00576     fprintf(fp, "\n");
00577   }
00578 
00579   // dump patchSet
00580   for (i=0; i< numProcessors; i++) {
00581       int num = processorArray[i].proxies.numElements();
00582       fprintf(fp, "%d %d: ", i, num);
00583       Iterator nextProxy;
00584       patchInfo *p = (patchInfo *)processorArray[i].proxies.
00585         iterator((Iterator *)&nextProxy);
00586       while (p) {
00587           fprintf(fp, "%d ", p->Id);
00588           p = (patchInfo *)processorArray[i].proxies.
00589             next((Iterator*)&nextProxy);
00590       }
00591       fprintf(fp, "\n");
00592   }
00593   // dump proxiesOn
00594   for (i=0; i<numPatches; i++)  {
00595     int num = patchArray[i].proxiesOn.numElements();
00596     fprintf(fp, "%d %d: ", i, num);
00597       Iterator nextProc;
00598       processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.
00599         iterator((Iterator *)&nextProc);
00600       while (p) {
00601         fprintf(fp, "%d ", p->Id);
00602         p = (processorInfo *)patchArray[i].proxiesOn.
00603           next((Iterator*)&nextProc);
00604       }
00605       fprintf(fp, "\n");
00606   }
00607 
00608   fclose(fp);
00609   //CkExit();
00610 }
00611 
00612 
00616 int NamdHybridLB::buildData(LDStats* stats) {
00617   int n_pes = stats->nprocs();
00618 
00619   PatchMap* patchMap = PatchMap::Object();
00620   ComputeMap* computeMap = ComputeMap::Object();
00621   const SimParameters* simParams = Node::Object()->simParameters;
00622 
00623   BigReal bgfactor = simParams->ldbBackgroundScaling;
00624   BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
00625   BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
00626   int pmeOn = simParams->PMEOn;
00627   int unLoadPme = simParams->ldbUnloadPME;
00628   int pmeBarrier = simParams->PMEBarrier;
00629   int unLoadZero = simParams->ldbUnloadZero;
00630   int unLoadOne = simParams->ldbUnloadOne;
00631   int unLoadIO= simParams->ldbUnloadOutputPEs;
00632   // traversing the list of processors and getting their load information
00633   int i, pe_no;
00634   for (i=0; i<n_pes; ++i) {
00635     pe_no = stats->procs[i].pe;
00636 
00637     // BACKUP processorArray[i].Id = i; 
00638     processorArray[i].Id = pe_no;               // absolute pe number
00639     processorArray[i].available = true;
00640     // BACKUP if ( pmeOn && isPmeProcessor(i) )
00641     if ( pmeOn && isPmeProcessor(pe_no) ) {
00642       processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
00643     // BACKUP } else if (patchMap->numPatchesOnNode(i) > 0) {
00644     } else if (patchMap->numPatchesOnNode(pe_no) > 0) {
00645       processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
00646     } else {
00647       processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
00648     }
00649     processorArray[i].idleTime = stats->procs[i].idletime;
00650     processorArray[i].load = processorArray[i].computeLoad = 0.0;
00651   }
00652 
00653   // If I am group zero, then offload processor 0 and 1 in my group
00654   if(stats->procs[0].pe == 0) {
00655     if(unLoadZero) processorArray[0].available = false;
00656     if(unLoadOne) processorArray[1].available = false;
00657   }
00658 
00659   // if all pes are Pme, disable this flag
00660   if (pmeOn && unLoadPme) {
00661     for (i=0; i<n_pes; i++) {
00662       if(!isPmeProcessor(stats->procs[i].pe))  break;
00663     }
00664     if (i == n_pes) {
00665       iout << iINFO << "Turned off unLoadPme flag!\n"  << endi;
00666       unLoadPme = 0;
00667     }
00668   }
00669 
00670   if (pmeOn && unLoadPme) {
00671     for (i=0; i<n_pes; i++) {
00672       if ((pmeBarrier && i==0) || isPmeProcessor(stats->procs[i].pe)) 
00673         processorArray[i].available = false;
00674     }
00675   }
00676 
00677   // if all pes are output, disable this flag
00678 #ifdef MEM_OPT_VERSION
00679   if (unLoadIO) {
00680       if (simParams->numoutputprocs == n_pes) {
00681           iout << iINFO << "Turned off unLoadIO flag!\n"  << endi;
00682           unLoadIO = 0;
00683       }
00684   }
00685   if (unLoadIO){
00686       for (i=0; i<n_pes; i++) {
00687           if (isOutputProcessor(stats->procs[i].pe)) 
00688               processorArray[i].available = false;
00689       }
00690   }
00691 #endif
00692 
00693   // need to go over all patches to get all required proxies
00694   int numPatches = patchMap->numPatches();
00695   int totalLocalProxies = 0;
00696   int totalProxies = 0;
00697   for ( int pid=0; pid<numPatches; ++pid ) {
00698         int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00699 
00700         patchArray[pid].Id = pid;
00701         patchArray[pid].numAtoms = 0;
00702         patchArray[pid].processor = patchMap->node(pid);
00703 
00704         const int numProxies = 
00705 #if 0 // USE_TOPOMAP - this function needs to be there for the hybrid case
00706         requiredProxiesOnProcGrid(pid,neighborNodes);
00707 #else
00708         requiredProxies(pid, neighborNodes);
00709 #endif
00710 
00711         int numLocalProxies = 0;
00712         for (int k=0; k<numProxies; k++) {
00713                 if( (neighborNodes[k] >= stats->procs[0].pe) && (neighborNodes[k] <= stats->procs[n_pes-1].pe) ){
00714                         ++numLocalProxies;
00715                         int index = neighborNodes[k] - stats->procs[0].pe;
00716                         processorArray[index].proxies.unchecked_insert(&patchArray[pid]);
00717                         patchArray[pid].proxiesOn.unchecked_insert(&processorArray[index]);
00718                 }
00719         }
00720 #if 0
00721         if ( numLocalProxies ) {
00722             CkPrintf("LDB Pe %d patch %d has %d local of %d total proxies\n",
00723                 CkMyPe(), pid, numLocalProxies, numProxies);
00724         }
00725 #endif
00726         totalLocalProxies += numLocalProxies;
00727         totalProxies += numProxies;
00728   }
00729 #if 0
00730   CkPrintf("LDB Pe %d has %d local of %d total proxies\n",
00731                 CkMyPe(), totalLocalProxies, totalProxies);
00732 #endif
00733   
00734   int nMoveableComputes=0;
00735   int index;
00736 
00737   int j;
00738 
00739   // this loop goes over only the objects in this group
00740   for(j=0; j < stats->n_objs; j++) {
00741         const LDObjData &this_obj = stats->objData[j];
00742         int frompe = stats->from_proc[j];
00743 
00744         // filter out non-NAMD managed objects (like PME array)
00745         if (this_obj.omID().id.idx != 1) {
00746                 // CmiAssert(frompe>=0 && frompe<n_pes);
00747                 // CkPrintf("non-NAMD object %d on pe %d with walltime %lf\n",
00748                 // this_obj.id().id[0], frompe + stats->procs[0].pe, this_obj.wallTime);
00749                 processorArray[frompe].backgroundLoad += this_obj.wallTime;
00750                 continue;
00751         }
00752 
00753         if (this_obj.id().id[1] == -2) { // Its a patch
00754                 // handled above to get required proxies from all patches
00755                 processorArray[frompe].backgroundLoad += this_obj.wallTime;
00756         } else if (this_obj.id().id[1] == -3) { // Its a bonded compute
00757                 processorArray[frompe].backgroundLoad += this_obj.wallTime;
00758         } else if (this_obj.migratable && this_obj.wallTime != 0.) { // Its a compute
00759 
00760                 const int cid = this_obj.id().id[0];
00761                 const int p0 = computeMap->pid(cid,0);
00762 
00763                 // For self-interactions, just return the same pid twice
00764                 int p1;
00765                 if (computeMap->numPids(cid) > 1)
00766                         p1 = computeMap->pid(cid,1);
00767                         else p1 = p0;
00768                         computeArray[nMoveableComputes].Id = cid;
00769                         //BACKUP computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
00770                         if (frompe >= n_pes) {  // from outside
00771 CkPrintf("assigning random old processor...this looks broken\n");
00772                           computeArray[nMoveableComputes].oldProcessor = CrnRand()%n_pes + stats->procs[0].pe;     // random
00773                         }
00774                         else {
00775                           computeArray[nMoveableComputes].oldProcessor = frompe + stats->procs[0].pe;
00776                         }
00777                         from_procs[nMoveableComputes] = frompe;
00778 
00779                         //BACKUP2 index = stats->from_proc[j] - stats->procs[0].pe;
00780                         //BACKUP processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
00781                         int index = computeArray[nMoveableComputes].oldProcessor - stats->procs[0].pe; 
00782                         processorArray[index].computeLoad += this_obj.wallTime;
00783                         computeArray[nMoveableComputes].processor = -1;
00784                         computeArray[nMoveableComputes].patch1 = p0;
00785                         computeArray[nMoveableComputes].patch2 = p1;
00786                         computeArray[nMoveableComputes].handle = this_obj.handle;
00787                         computeArray[nMoveableComputes].load = this_obj.wallTime;
00788                         nMoveableComputes++;
00789         }
00790   }
00791 
00792         for (i=0; i<n_pes; i++) {
00793           processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
00794         }
00795         stats->clear();
00796         return nMoveableComputes;
00797 }
00798 
00799 
00800 int NamdHybridLB::requiredProxies(PatchID id, int neighborNodes[])
00801 {
00802   PatchMap* patchMap = PatchMap::Object();
00803   int myNode = patchMap->node(id);
00804   int nProxyNodes = 0;
00805 
00806 #define IF_NEW_NODE \
00807     int j; \
00808     for ( j=0; j<nProxyNodes && neighborNodes[j] != proxyNode; ++j ); \
00809     if ( j == nProxyNodes )
00810 
00811   PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00812   neighbors[0] = id;
00813   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00814   for ( int i = 0; i < numNeighbors; ++i ) {
00815     const int proxyNode = patchMap->basenode(neighbors[i]);
00816     if ( proxyNode != myNode ) {
00817       IF_NEW_NODE {
00818         neighborNodes[nProxyNodes] = proxyNode;
00819         nProxyNodes++;
00820       }
00821     }
00822   }
00823 
00824   // Distribute initial default proxies across empty processors.
00825   // This shouldn't be necessary, but may constrain the load balancer
00826   // and avoid placing too many proxies on a single processor.  -JCP
00827   
00828   // This code needs to be turned off when the creation of ST is
00829   // shifted to the load balancers -ASB
00830 
00831 #if 1
00832   int numNodes = CkNumPes();
00833   int numPatches = patchMap->numPatches();
00834   int emptyNodes = numNodes - numPatches;
00835   if ( emptyNodes > numPatches ) {
00836     int nodesPerPatch = nProxyNodes + 1 + (emptyNodes-1) / numPatches;
00837     int maxNodesPerPatch = PatchMap::MaxOneAway + PatchMap::MaxTwoAway;
00838     if ( nodesPerPatch > maxNodesPerPatch ) nodesPerPatch = maxNodesPerPatch;
00839     int proxyNode = (myNode + 1) % numNodes;
00840     while ( nProxyNodes < nodesPerPatch &&
00841                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00842       if ( proxyNode != myNode ) {
00843         IF_NEW_NODE {
00844           neighborNodes[nProxyNodes] = proxyNode;
00845           nProxyNodes++;
00846         }
00847       }
00848       proxyNode = (proxyNode + 1) % numNodes;
00849     }
00850     proxyNode = (myNode - 1 + numNodes) % numNodes;
00851     while ( nProxyNodes < nodesPerPatch &&
00852                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00853       if ( proxyNode != myNode ) {
00854         IF_NEW_NODE {
00855           neighborNodes[nProxyNodes] = proxyNode;
00856           nProxyNodes++;
00857         }
00858       }
00859       proxyNode = (proxyNode - 1 + numNodes) % numNodes;
00860     }
00861     proxyNode = (myNode + 1) % numNodes;
00862     int count = 0;
00863     while ( nProxyNodes < nodesPerPatch ) {
00864       if ( ! patchMap->numPatchesOnNode(proxyNode) && proxyNode != myNode ) {
00865         IF_NEW_NODE {
00866           neighborNodes[nProxyNodes] = proxyNode;
00867           nProxyNodes++;
00868         }
00869       }
00870       proxyNode = (proxyNode + 1) % numNodes;
00871       count ++; if (count == numNodes) break;   // we looped all
00872     }
00873   } else {
00874     int proxyNode = myNode - 1;
00875     if ( proxyNode >= 0 && ! patchMap->numPatchesOnNode(proxyNode) ) {
00876       if ( proxyNode != myNode ) {
00877         IF_NEW_NODE {
00878           neighborNodes[nProxyNodes] = proxyNode;
00879           nProxyNodes++;
00880         }
00881       }
00882     }
00883     proxyNode = myNode + 1;
00884     if ( proxyNode < numNodes && ! patchMap->numPatchesOnNode(proxyNode) ) {
00885       if ( proxyNode != myNode ) {
00886         IF_NEW_NODE {
00887           neighborNodes[nProxyNodes] = proxyNode;
00888           nProxyNodes++;
00889         }
00890       }
00891     }
00892   }
00893 #endif
00894 
00895   return nProxyNodes;
00896 }
00897 
00898 #if 0
00899 void NamdHybridLB::CollectInfo(Location *loc, int n, int fromlevel)
00900 {
00901    int atlevel = fromlevel + 1;
00902    LevelData *lData = levelData[atlevel];
00903    lData->info_recved++;
00904 
00905    CkVec<Location> &matchedObjs = lData->matchedObjs;
00906 CmiAssert(0);
00907 
00908    // sort into mactched and unmatched list
00909    std::map<LDObjKey, int> &unmatchedObjs = lData->unmatchedObjs;
00910    for (int i=0; i<n; i++) {
00911      std::map<LDObjKey, int>::iterator iter = unmatchedObjs.find(loc[i].key);
00912      if (iter != unmatchedObjs.end()) {
00913        CmiAssert(iter->second != -1 || loc[i].loc != -1);
00914        if (loc[i].loc == -1) loc[i].loc = iter->second;
00915        matchedObjs.push_back(loc[i]);
00916        unmatchedObjs.erase(iter);
00917      }
00918      else
00919        unmatchedObjs[loc[i].key] = loc[i].loc;
00920    }
00921 
00922 //  DEBUGF(("[%d] level %d has %d unmatched and %d matched. \n", CkMyPe(), atlevel, unmatchedObjs.size(), matchedObjs.size()));
00923 
00924    if (lData->info_recved == lData->nChildren) {
00925      lData->info_recved = 0;
00926      if (_lb_args.debug() > 1)
00927          CkPrintf("[%d] CollectInfo at level %d started at %f\n",
00928                 CkMyPe(), atlevel, CkWallTimer());
00929      if (lData->parent != -1) {
00930 
00931                 // NAMD specific
00932                 CkVec<Location> unmatchedbuf;
00933                 for(std::map<LDObjKey, int>::const_iterator it = unmatchedObjs.begin(); it != unmatchedObjs.end(); ++it){
00934                 unmatchedbuf.push_back(Location(it->first, it->second));
00935                 }
00936                 // checking if update of ComputeMap is ready before calling parent
00937                 if(CkMyPe() == 0){
00938                         if(updateFlag){
00939                                 updateFlag = false;
00940                                 collectFlag = false;
00941                                 thisProxy[lData->parent].CollectInfo(unmatchedbuf.getVec(), unmatchedbuf.size(), atlevel);
00942                         }else{
00943                                 CkPrintf("[%d] COMPUTEMAP UPDATE NOT READY\n",CkMyPe());
00944                                 collectFlag = true;
00945                                 parent_backup = lData->parent;
00946                                 loc_backup = unmatchedbuf.getVec();
00947                                 n_backup = unmatchedbuf.size();
00948                                 fromlevel_backup = atlevel;
00949                         }
00950                 }else{
00951                         // send only unmatched ones up the tree
00952                         thisProxy[lData->parent].CollectInfo(unmatchedbuf.getVec(), unmatchedbuf.size(), atlevel);
00953                 }
00954 
00955      }
00956      else { // root
00957        // we should have all answers now
00958        CmiAssert(unmatchedObjs.size() == 0);
00959        // start send match list down
00960        thisProxy.PropagateInfo(matchedObjs.getVec(), matchedObjs.size(), atlevel, lData->nChildren, lData->children);
00961        lData->statsData->clear();
00962      }
00963    }
00964 }
00965 #endif

Generated on Wed Nov 22 01:17:16 2017 for NAMD by  doxygen 1.4.7