NamdCentLB.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /home/cvs/namd/cvsroot/namd2/src/NamdCentLB.C,v $
00003  * $Author: jim $
00004  * $Date: 2017/03/30 20:06:17 $
00005  * $Revision: 1.125 $
00006  *****************************************************************************/
00007 
00008 #if !defined(WIN32) || defined(__CYGWIN__)
00009 #include <unistd.h>
00010 #endif
00011 #include <fcntl.h>
00012 
00013 #include "InfoStream.h"
00014 #include "NamdCentLB.h"
00015 #include "NamdCentLB.def.h"
00016 #include "Node.h"
00017 #include "PatchMap.h"
00018 #include "ComputeMap.h"
00019 #include "LdbCoordinator.h"
00020 
00021 // #define DUMP_LDBDATA 1
00022 // #define LOAD_LDBDATA 1
00023 
00024 double *cpuloads = NULL;
00025 
00026 void CreateNamdCentLB() {
00027   // CkPrintf("[%d] creating NamdCentLB %d\n",CkMyPe(),loadbalancer);
00028   loadbalancer = CProxy_NamdCentLB::ckNew();
00029   // CkPrintf("[%d] created NamdCentLB %d\n",CkMyPe(),loadbalancer);
00030   if (CkMyRank() == 0 && cpuloads == NULL) {    
00031     cpuloads = new double[CkNumPes()];
00032     CmiAssert(cpuloads != NULL);
00033     for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0;
00034   }
00035 }
00036 
00037 NamdCentLB *AllocateNamdCentLB() {
00038   return new NamdCentLB((CkMigrateMessage*)NULL);
00039 }
00040 
00044 NamdCentLB::NamdCentLB(CkMigrateMessage *msg): CentralLB(msg) {
00045   processorArray = 0;
00046   patchArray = 0;
00047   computeArray = 0;
00048 } 
00049 
00050 NamdCentLB::NamdCentLB(): CentralLB(CkLBOptions(-1))
00051 {
00052   //  if (CkMyPe()==0)
00053   //   CkPrintf("[%d] NamdCentLB created\n",CkMyPe());
00054   processorArray = 0;
00055   patchArray = 0;
00056   computeArray = 0;
00057 }
00058 
00059 /*
00060 NamdCentLB::~NamdCentLB()
00061 {
00062   delete [] processorArray;
00063   delete [] patchArray;
00064   delete [] computeArray;
00065 }
00066 */
00067 
00068 bool NamdCentLB::QueryBalanceNow(int _step)
00069 {
00070   //  CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),_step);
00071   if ( LdbCoordinator::Object()->takingLdbData ) {
00072     return true;
00073   } else {
00074     return false;
00075   }
00076 }
00077 
00078 bool NamdCentLB::QueryDumpData()
00079 {
00080 #if 0
00081   if (LdbCoordinator::Object()->ldbCycleNum == 1)  return true;
00082   if (LdbCoordinator::Object()->ldbCycleNum == 2)  return true;
00083 #endif
00084   return false;
00085 }
00086 
00087 CLBMigrateMsg* NamdCentLB::Strategy(LDStats* stats)
00088 {
00089   //  CkPrintf("LDB: All statistics received at %f, %f\n",
00090   //  CmiTimer(),CmiWallTimer());
00091 
00092   int numProcessors = stats->nprocs();
00093   int numPatches = PatchMap::Object()->numPatches();
00094   ComputeMap *computeMap = ComputeMap::Object();
00095   const int numComputes = computeMap->numComputes();
00096   const SimParameters* simParams = Node::Object()->simParameters;
00097 
00098   // these sizes should never change
00099   if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00100   if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00101   if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00102 
00103   int nMoveableComputes = buildData(stats);
00104 
00105 #if LDB_DEBUG
00106 #define DUMP_LDBDATA 1
00107 #define LOAD_LDBDATA 1
00108 #endif
00109 
00110 #if DUMP_LDBDATA 
00111   dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00112 #elif LOAD_LDBDATA
00113   loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00114   // CkExit();
00115 #endif
00116 
00117   double averageLoad = 0.;
00118   double avgCompute = 0.;
00119   if ( nMoveableComputes ) {
00120    int i;
00121    double total = 0.;
00122    double maxCompute = 0.;
00123    int maxi = 0;
00124    for (i=0; i<nMoveableComputes; i++) {
00125       double load = computeArray[i].load;
00126       total += load;
00127       if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
00128    }
00129    avgCompute = total / nMoveableComputes;
00130 
00131     int P = stats->nprocs();
00132    int numPesAvailable = 0;
00133    for (i=0; i<P; i++) {
00134       if (processorArray[i].available) {
00135         ++numPesAvailable;
00136         total += processorArray[i].backgroundLoad;
00137       }
00138    }
00139    if (numPesAvailable == 0)
00140      NAMD_die("No processors available for load balancing!\n");
00141 
00142    averageLoad = total/numPesAvailable;
00143    CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
00144             computeArray[maxi].handle.id.id[0],
00145             maxCompute, 100. * maxCompute / averageLoad, averageLoad);
00146    CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
00147             avgCompute, 100. * avgCompute / averageLoad, averageLoad);
00148   }
00149 
00150   if ( step() == 1 ) {
00151     // compute splitting only
00152     // partitions are stored as char but mostly limited by
00153     // high load noise at low outer-loop iteration counts
00154     int maxParts = 10;
00155 #ifdef NAMD_CUDA
00156 //split LCPO compute very small, else CUDA compute is delayed
00157     if (simParams->LCPOOn) {
00158       maxParts = 20;
00159     }
00160 #endif
00161     int totalAddedParts = 0;
00162     double maxCompute = averageLoad / 10.;
00163     if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
00164     if ( simParams->ldbRelativeGrainsize > 0. ) {
00165       maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
00166     }
00167     CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
00168     double maxUnsplit = 0.;
00169     for (int i=0; i<nMoveableComputes; i++) {
00170       computeArray[i].processor = computeArray[i].oldProcessor;
00171       const int cid = computeArray[i].handle.id.id[0];
00172       const double load = computeArray[i].load;
00173       if ( computeMap->numPartitions(cid) == 0 ) {
00174         if ( load > maxUnsplit ) maxUnsplit = load;
00175         continue;
00176       }
00177       int nparts = (int) ceil(load / maxCompute);
00178       if ( nparts > maxParts ) nparts = maxParts;
00179       if ( nparts < 1 ) nparts = 1;
00180       if ( 0 && nparts > 1 ) {
00181         CkPrintf("LDB: Partitioning compute %d with load %f by %d\n",
00182                   cid, load, nparts);
00183       }
00184       computeMap->setNewNumPartitions(cid,nparts);
00185       totalAddedParts += nparts - 1;
00186     }
00187     CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
00188               nMoveableComputes,nMoveableComputes+totalAddedParts);
00189     CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
00190   } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
00191     if (step() < 4)
00192       TorusLB(computeArray, patchArray, processorArray,
00193                   nMoveableComputes, numPatches, numProcessors);
00194     else
00195       RefineTorusLB(computeArray, patchArray, processorArray,
00196                   nMoveableComputes, numPatches, numProcessors, 1);
00197   } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
00198     TorusLB(computeArray, patchArray, processorArray,
00199                   nMoveableComputes, numPatches, numProcessors);
00200   } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00201     RefineTorusLB(computeArray, patchArray, processorArray,
00202                   nMoveableComputes, numPatches, numProcessors, 1);
00203   } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
00204     if (step() < 4)
00205       Alg7(computeArray, patchArray, processorArray,
00206                   nMoveableComputes, numPatches, numProcessors);
00207     else
00208       RefineOnly(computeArray, patchArray, processorArray, 
00209                   nMoveableComputes, numPatches, numProcessors);
00210   }
00211 
00212 #if LDB_DEBUG && USE_TOPOMAP
00213   TopoManager tmgr;
00214   int pe1, pe2, pe3, hops=0;
00215   /* This is double counting the hops
00216   for(int i=0; i<nMoveableComputes; i++)
00217   {
00218     pe1 = computeArray[i].processor;
00219     pe2 = patchArray[computeArray[i].patch1].processor;
00220     pe3 = patchArray[computeArray[i].patch2].processor;
00221     hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00222     if(computeArray[i].patch1 != computeArray[i].patch2)
00223       hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
00224   }*/
00225   for (int i=0; i<numPatches; i++)  {
00226     //int num = patchArray[i].proxiesOn.numElements();
00227     pe1 = patchArray[i].processor;
00228     Iterator nextProc;
00229     processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00230     while (p) {
00231       pe2 = p->Id;
00232       hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00233       p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00234     }
00235   }
00236   CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00237 #endif
00238 
00239 #if DUMP_LDBDATA
00240   dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00241 #elif LOAD_LDBDATA
00242   dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00243   // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00244   // CkExit();
00245 #endif
00246 
00247   // For error checking:
00248   // Count up computes, to see if somebody doesn't have any computes
00249   int i;
00250 #if 0
00251   int* computeCount = new int[numProcessors];
00252   for(i=0; i<numProcessors; i++)
00253     computeCount[i]=0;
00254   for(i=0; i<nMoveableComputes; i++)
00255     computeCount[computeArray[i].processor]++;
00256   for(i=0; i<numProcessors; i++) {
00257     if (computeCount[i]==0)
00258       iout << iINFO <<"Warning: Processor " << i 
00259            << " has NO moveable computes.\n" << endi;
00260   }
00261   delete [] computeCount;
00262 #endif
00263   
00264   CkVec<MigrateInfo *> migrateInfo;
00265   for(i=0;i<nMoveableComputes;i++) {
00266     if (computeArray[i].processor != computeArray[i].oldProcessor) {
00267       //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00268       //               CkMyPe(),computeArray[i].handle.id.id[0],
00269       //               computeArray[i].processor,computeArray[i].oldProcessor);
00270       MigrateInfo *migrateMe = new MigrateInfo;
00271       migrateMe->obj = computeArray[i].handle;
00272       migrateMe->from_pe = computeArray[i].oldProcessor;
00273       migrateMe->to_pe = computeArray[i].processor;
00274       migrateInfo.insertAtEnd(migrateMe);
00275 
00276       // sneak in updates to ComputeMap
00277       computeMap->setNewNode(computeArray[i].handle.id.id[0],
00278                                 computeArray[i].processor);
00279     }
00280   }
00281   
00282   int migrate_count=migrateInfo.length();
00283   // CkPrintf("NamdCentLB migrating %d elements\n",migrate_count);
00284   CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00285 
00286   msg->n_moves = migrate_count;
00287   for(i=0; i < migrate_count; i++) {
00288     MigrateInfo* item = migrateInfo[i];
00289     msg->moves[i] = *item;
00290     delete item;
00291     migrateInfo[i] = 0;
00292   }
00293 
00294   for (i=0; i<numProcessors; i++) {
00295     cpuloads[i] = processorArray[i].load;
00296   }
00297 
00298   delete [] processorArray;
00299   delete [] patchArray;
00300   delete [] computeArray;
00301 
00302   processorArray = NULL;
00303   patchArray = NULL;
00304   computeArray = NULL;
00305   
00306   return msg;
00307 };
00308 
00309 #ifndef WIN32
00310 
00311 void NamdCentLB::dumpDataASCII(char *file, int numProcessors,
00312                                int numPatches, int numComputes)
00313 {
00314   char filename[128];
00315   sprintf(filename, "%s.%d", file, step());
00316   FILE* fp = fopen(filename,"w");
00317   if (fp == NULL){
00318      perror("dumpLDStatsASCII");
00319      return;
00320   }
00321   CkPrintf("***** DUMP data to file: %s ***** \n", filename);
00322   fprintf(fp,"%d %d %d\n",numProcessors,numPatches,numComputes);
00323 
00324   int i;
00325   for(i=0;i<numProcessors;i++) {
00326     processorInfo* p = processorArray + i;
00327     fprintf(fp,"%d %e %e %e %e\n",p->Id,p->load,p->backgroundLoad,p->computeLoad,p->idleTime);
00328   }
00329 
00330   for(i=0;i < numPatches; i++) {
00331     patchInfo* p = patchArray + i;
00332     fprintf(fp,"%d %e %d %d\n",p->Id,p->load,p->processor,p->numAtoms);
00333   }
00334     
00335   for(i=0; i < numComputes; i++) {
00336     computeInfo* c = computeArray + i;
00337     fprintf(fp,"%d %e %d %d %d %d",c->Id,c->load,c->patch1,c->patch2,
00338             c->processor,c->oldProcessor);
00339     fprintf(fp, "\n");
00340   }
00341 
00342   // dump patchSet
00343   for (i=0; i< numProcessors; i++) {
00344       int num = processorArray[i].proxies.numElements();
00345       fprintf(fp, "%d %d: ", i, num);
00346       Iterator nextProxy;
00347       patchInfo *p = (patchInfo *)processorArray[i].proxies.
00348         iterator((Iterator *)&nextProxy);
00349       while (p) {
00350           fprintf(fp, "%d ", p->Id);
00351           p = (patchInfo *)processorArray[i].proxies.
00352             next((Iterator*)&nextProxy);
00353       }
00354       fprintf(fp, "\n");
00355   }
00356   // dump proxiesOn
00357   for (i=0; i<numPatches; i++)  {
00358     int num = patchArray[i].proxiesOn.numElements();
00359     fprintf(fp, "%d %d: ", i, num);
00360       Iterator nextProc;
00361       processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.
00362         iterator((Iterator *)&nextProc);
00363       while (p) {
00364         fprintf(fp, "%d ", p->Id);
00365         p = (processorInfo *)patchArray[i].proxiesOn.
00366           next((Iterator*)&nextProc);
00367       }
00368       fprintf(fp, "\n");
00369   }
00370 
00371   fclose(fp);
00372   //CkExit();
00373 }
00374 
00375 void NamdCentLB::loadDataASCII(char *file, int &numProcessors,
00376                                int &numPatches, int &numComputes)
00377 {
00378   char filename[128];
00379   //sprintf(filename, "%s.%d", file, step());
00380   sprintf(filename, "%s", file);
00381 
00382   CkPrintf("***** Load ascii data from file: %s ***** \n", filename);
00383 
00384   FILE* fp = fopen(filename, "r");
00385   if (fp == NULL){
00386      perror("loadDataASCII");
00387      return;
00388   }
00389 
00390   fscanf(fp,"%d %d %d",&numProcessors,&numPatches,&numComputes);
00391 
00392   printf("numProcs: %d numPatches: %d numComputes: %d\n", numProcessors,numPatches, numComputes);
00393 
00394   delete [] processorArray;
00395   delete [] patchArray;
00396   delete [] computeArray;
00397   processorArray = new processorInfo[numProcessors];
00398   patchArray = new patchInfo[numPatches];
00399   computeArray = new computeInfo[numComputes];
00400 
00401   int i;
00402   for(i=0;i<numProcessors;i++) {
00403     processorInfo* p = processorArray + i;
00404     fscanf(fp,"%d %le %le %le", &p->Id, &p->load, &p->backgroundLoad, &p->computeLoad);
00405     fscanf(fp,"%le\n", &p->idleTime);
00406     if (p->Id != i) CmiAbort("Reading processorArray error!");
00407 //    p->backgroundLoad = 0.0;
00408   }
00409 
00410   for(i=0;i < numPatches; i++) {
00411     patchInfo* p = patchArray + i;
00412     fscanf(fp,"%d %le %d %d\n",&p->Id,&p->load,&p->processor,&p->numAtoms);
00413     if (p->Id != i || p->processor > numProcessors || p->processor < 0) 
00414       CmiAbort("Reading patchArray error!");
00415   }
00416     
00417   for(i=0; i < numComputes; i++) {
00418     computeInfo* c = computeArray + i;
00419     fscanf(fp,"%d %le %d %d %d %d",&c->Id,&c->load,&c->patch1,&c->patch2,
00420             &c->processor,&c->oldProcessor);
00421 
00422     if (c->patch1 < 0 || c->patch1 > numPatches || c->patch2 < 0 || c->patch2 > numPatches)
00423       CmiAbort("Reading computeArray error!");
00424   // printf("%d %e %d %d %d %d\n", c->Id,c->load,c->patch1,c->patch2,c->processor,c->oldProcessor);
00425   }
00426 
00427   // dump patchSet
00428   for (i=0; i< numProcessors; i++) {
00429       int num, curp;
00430       fscanf(fp,"%d %d: ",&curp, &num);
00431       if(curp != i)
00432         CmiAbort("Reading patchsSet error!");
00433       for (int j=0; j<num; j++) {
00434           int id;
00435           fscanf(fp,"%d",&id);
00436           processorArray[i].proxies.unchecked_insert(&patchArray[id]);
00437       }
00438   }
00439   // dump proxiesOn
00440   for (i=0; i<numPatches; i++)  {
00441       int num, curp;
00442       fscanf(fp,"%d %d: ",&curp, &num);
00443       if(curp != i)
00444         CmiAbort("Reading proxiesOn error!");
00445       for (int j=0; j<num; j++) {
00446           int id;
00447           fscanf(fp,"%d",&id);
00448           patchArray[i].proxiesOn.insert(&processorArray[id]);
00449       }
00450   }
00451 
00452   fclose(fp);
00453 }
00454 #endif
00455 
00456 extern int isPmeProcessor(int); 
00457 #ifdef MEM_OPT_VERSION
00458 extern int isOutputProcessor(int); 
00459 #endif
00460 #if defined(NAMD_MIC)
00461 extern int isMICProcessor(int);
00462 #endif
00463 
00464 int NamdCentLB::buildData(LDStats* stats)
00465 {
00466   int n_pes = stats->nprocs();
00467 
00468   PatchMap* patchMap = PatchMap::Object();
00469   ComputeMap* computeMap = ComputeMap::Object();
00470   const SimParameters* simParams = Node::Object()->simParameters;
00471 
00472   BigReal bgfactor = simParams->ldbBackgroundScaling;
00473   BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
00474   BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
00475   int pmeOn = simParams->PMEOn;
00476   int unLoadPme = simParams->ldbUnloadPME;
00477   int pmeBarrier = simParams->PMEBarrier;
00478   int unLoadZero = simParams->ldbUnloadZero;
00479   int unLoadOne = simParams->ldbUnloadOne;
00480   int unLoadIO= simParams->ldbUnloadOutputPEs;
00481   int i;
00482   for (i=0; i<n_pes; ++i) {
00483     processorArray[i].Id = i;
00484     processorArray[i].available = true;
00485     if ( pmeOn && isPmeProcessor(i) ) {
00486       processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
00487     } else if (patchMap->numPatchesOnNode(i) > 0) {
00488       processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
00489     } else {
00490       processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
00491     }
00492     processorArray[i].idleTime = stats->procs[i].idletime;
00493     processorArray[i].load = processorArray[i].computeLoad = 0.0;
00494   }
00495 
00496 /* *********** this code is defunct *****************
00497 #if 0
00498   double bgfactor = 1.0 + 1.0 * CkNumPes()/1000.0;
00499   if ( bgfactor > 2.0 ) bgfactor = 2.0;
00500   iout << iINFO << "Scaling background load by " << bgfactor << ".\n" << endi;
00501   int i;
00502   for (i=0; i<n_pes; i++) {
00503     processorArray[i].Id = i;
00504     processorArray[i].backgroundLoad = bgfactor * stats[i].bg_walltime;
00505   }
00506 
00507   double bg_weight = 0.7;
00508 
00509   int i;
00510   for (i=0; i<n_pes; i++) {
00511     processorArray[i].Id = i;
00512     if (patchMap->numPatchesOnNode(i) > 0)
00513       processorArray[i].backgroundLoad = bg_weight * stats->procs[i].bg_walltime;
00514     else 
00515       processorArray[i].backgroundLoad = stats[i].bg_walltime;
00516   }
00517   
00518   //Modification to reduce the coputeload on PME processors
00519   const SimParameters* simParams = Node::Object()->simParameters;  
00520   
00521   // CkPrintf("BACKGROUND LOAD\n");
00522   if(simParams->PMEOn) {
00523     double bgfactor = 1.0 + 1.0 * CkNumPes()/1000.0;
00524     if ( bgfactor > 2.0 ) bgfactor = 2.0;
00525     for (i=0; i<n_pes; i++) {
00526       // CkPrintf("BG[%d] =  %5.5lf,", i, processorArray[i].backgroundLoad);
00527       if(isPmeProcessor(i)) {
00528         processorArray[i].backgroundLoad *= bgfactor;
00529       }
00530       // CkPrintf("%5.5lf;  ", processorArray[i].backgroundLoad);
00531     }
00532   }
00533   // CkPrintf("\n");
00534 #endif  
00535 *********** end of defunct code *********** */
00536 
00537   if (unLoadZero) processorArray[0].available = false;
00538   if (unLoadOne) processorArray[1].available = false;
00539 
00540   // if all pes are Pme, disable this flag
00541   if (pmeOn && unLoadPme) {
00542     for (i=0; i<n_pes; i++) {
00543       if (!isPmeProcessor(i))  break;
00544     }
00545     if (i == n_pes) {
00546       iout << iINFO << "Turned off unLoadPme flag!\n"  << endi;
00547       unLoadPme = 0;
00548     }
00549   }
00550   
00551   if (pmeOn && unLoadPme) {
00552     for (i=0; i<n_pes; i++) {
00553       if ((pmeBarrier && i==0) || isPmeProcessor(i)) 
00554         processorArray[i].available = false;
00555     }
00556   }
00557   // if all pes are output, disable this flag
00558 #ifdef MEM_OPT_VERSION
00559 
00560   if (unLoadIO) {
00561       if (simParams->numoutputprocs == n_pes) {
00562           iout << iINFO << "Turned off unLoadIO flag!\n"  << endi;
00563           unLoadIO = 0;
00564       }
00565   }
00566   if (unLoadIO){
00567     iout << iINFO << "Testing for output processors!\n"  << endi;
00568       for (i=0; i<n_pes; i++) {
00569           if (isOutputProcessor(stats->procs[i].pe)) 
00570             {
00571               //              iout << iINFO << "Removed output PE "<< stats->procs[i].pe <<" from available list!\n"  << endi;
00572               processorArray[i].available = false;
00573             }
00574           else
00575             {
00576               //              iout << iINFO << "Nonoutput PE "<< stats->procs[i].pe <<" is in available list!\n"  << endi;
00577             }
00578       }
00579   }
00580 #endif
00581 
00582   // Unload PEs driving MIC devices, if need be
00583   #if defined(NAMD_MIC)
00584     if (simParams->mic_unloadMICPEs != 0) {
00585       for (i = 0; i < n_pes; i++) {
00586         if (isMICProcessor(i) != 0) { processorArray[i].available = false; }
00587       }
00588     }
00589   #endif
00590 
00591   int nMoveableComputes=0;
00592   int nProxies = 0;             // total number of estimated proxies
00593   int nIdleComputes = 0;
00594 
00595   int j;
00596   for (j=0; j < stats->n_objs; j++) {
00597       const LDObjData &this_obj = stats->objData[j];
00598       int frompe = stats->from_proc[j];
00599 
00600       // filter out non-NAMD managed objects (like PME array)
00601       if (this_obj.omID().id.idx != 1) {
00602         // CkPrintf("non-NAMD object %d on pe %d with walltime %lf\n",
00603         // this_obj.id().id[0], stats->from_proc[j], this_obj.wallTime);
00604         processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00605         continue;
00606       }
00607 
00608       if (this_obj.id().id[1] == -2) { // Its a patch
00609         const int pid = this_obj.id().id[0];
00610         int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00611 
00612         patchArray[pid].Id = pid;
00613         patchArray[pid].numAtoms = 0;
00614         patchArray[pid].processor = stats->from_proc[j];
00615         const int numProxies = 
00616 #if USE_TOPOMAP
00617         requiredProxiesOnProcGrid(pid,neighborNodes);
00618 #else
00619         requiredProxies(pid, neighborNodes);
00620 #endif
00621 
00622         nProxies += numProxies;
00623 
00624         for (int k=0; k<numProxies; k++) {
00625           processorArray[neighborNodes[k]].proxies.unchecked_insert(&patchArray[pid]);
00626           patchArray[pid].proxiesOn.unchecked_insert(&processorArray[neighborNodes[k]]);
00627         }
00628         processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00629       } else if (this_obj.id().id[1] == -3) { // Its a bonded compute
00630         processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00631       } else if (this_obj.migratable) { // Its a compute
00632        if ( this_obj.wallTime == 0. ) { // don't migrate idle computes
00633          ++nIdleComputes;
00634        } else {
00635         const int cid = this_obj.id().id[0];
00636         const int p0 = computeMap->pid(cid,0);
00637 
00638         // For self-interactions, just return the same pid twice
00639         int p1;
00640         if (computeMap->numPids(cid) > 1)
00641           p1 = computeMap->pid(cid,1);
00642         else p1 = p0;
00643         computeArray[nMoveableComputes].Id = cid;
00644         computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
00645         processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
00646         computeArray[nMoveableComputes].processor = -1;
00647         computeArray[nMoveableComputes].patch1 = p0;
00648         computeArray[nMoveableComputes].patch2 = p1;
00649         computeArray[nMoveableComputes].handle = this_obj.handle;
00650         computeArray[nMoveableComputes].load = this_obj.wallTime;
00651         nMoveableComputes++;
00652        }
00653       } else {
00654         processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00655       }
00656     }
00657 
00658    if ( nIdleComputes )
00659      CkPrintf("LDB: %d computes have load of zero\n", nIdleComputes);
00660 
00661 /* *********** this code is defunct *****************
00662 #if 0
00663   int averageProxy = nProxies / n_pes;
00664   CkPrintf("total proxies: %d, avervage: %d\n", nProxies, averageProxy);
00665   for (i=0; i<n_pes; i++) {
00666     // too many proxies on this node, weight the background load
00667     int proxies = processorArray[i].proxies.numElements();
00668     if (proxies > averageProxy) {
00669       double factor = 1.0*(proxies-averageProxy)/nProxies;
00670       processorArray[i].backgroundLoad *= (1.0 + factor);
00671       CkPrintf("On [%d]: too many proxies: %d, increased bg load by %f\n", i, nProxies, factor);
00672     }
00673   }
00674 #endif
00675 *********** end of defunct code *********** */
00676 
00677   for (i=0; i<n_pes; i++) {
00678     processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
00679   }
00680   stats->clear();
00681   return nMoveableComputes;
00682 }
00683 
00684 // Figure out which proxies we will definitely create on other
00685 // nodes, without regard for non-bonded computes.  This code is swiped
00686 // from ProxyMgr, and changes there probable need to be propagated here.
00687 
00688 int NamdCentLB::requiredProxies(PatchID id, int neighborNodes[])
00689 {
00690   PatchMap* patchMap = PatchMap::Object();
00691   int myNode = patchMap->node(id);
00692   int nProxyNodes = 0;
00693 
00694 #define IF_NEW_NODE \
00695     int j; \
00696     for ( j=0; j<nProxyNodes && neighborNodes[j] != proxyNode; ++j ); \
00697     if ( j == nProxyNodes )
00698 
00699   PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00700   neighbors[0] = id;
00701   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00702   for ( int i = 0; i < numNeighbors; ++i ) {
00703     const int proxyNode = patchMap->basenode(neighbors[i]);
00704     if ( proxyNode != myNode ) {
00705       IF_NEW_NODE {
00706         neighborNodes[nProxyNodes] = proxyNode;
00707         nProxyNodes++;
00708       }
00709     }
00710   }
00711 
00712   // Distribute initial default proxies across empty processors.
00713   // This shouldn't be necessary, but may constrain the load balancer
00714   // and avoid placing too many proxies on a single processor.  -JCP
00715 
00716   // This code needs to be turned off when the creation of ST is
00717   // shifted to the load balancers -ASB
00718 
00719 #if 1
00720   int numPes = CkNumPes();
00721   int numPatches = patchMap->numPatches();
00722   int emptyNodes = numPes - numPatches;
00723   if ( emptyNodes > numPatches ) {
00724     int nodesPerPatch = nProxyNodes + 1 + (emptyNodes-1) / numPatches;
00725     int maxNodesPerPatch = PatchMap::MaxOneAway + PatchMap::MaxTwoAway;
00726     if ( nodesPerPatch > maxNodesPerPatch ) nodesPerPatch = maxNodesPerPatch;
00727     int proxyNode = (myNode + 1) % numPes;
00728     while ( nProxyNodes < nodesPerPatch &&
00729                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00730       if ( proxyNode != myNode ) {
00731         IF_NEW_NODE {
00732           neighborNodes[nProxyNodes] = proxyNode;
00733           nProxyNodes++;
00734         }
00735       }
00736       proxyNode = (proxyNode + 1) % numPes;
00737     }
00738     proxyNode = (myNode - 1 + numPes) % numPes;
00739     while ( nProxyNodes < nodesPerPatch &&
00740                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00741       if ( proxyNode != myNode ) {
00742         IF_NEW_NODE {
00743           neighborNodes[nProxyNodes] = proxyNode;
00744           nProxyNodes++;
00745         }
00746       }
00747       proxyNode = (proxyNode - 1 + numPes) % numPes;
00748     }
00749     proxyNode = (myNode + 1) % numPes;
00750     int count = 0;
00751     while ( nProxyNodes < nodesPerPatch ) {
00752       if ( ! patchMap->numPatchesOnNode(proxyNode) && proxyNode != myNode ) {
00753         IF_NEW_NODE {
00754           neighborNodes[nProxyNodes] = proxyNode;
00755           nProxyNodes++;
00756         }
00757       }
00758       proxyNode = (proxyNode + 1) % numPes;
00759       count ++; if (count == numPes) break;   // we looped all
00760     }
00761   } else {
00762     int proxyNode = myNode - 1;
00763     if ( proxyNode >= 0 && ! patchMap->numPatchesOnNode(proxyNode) ) {
00764       if ( proxyNode != myNode ) {
00765         IF_NEW_NODE {
00766           neighborNodes[nProxyNodes] = proxyNode;
00767           nProxyNodes++;
00768         }
00769       }
00770     }
00771     proxyNode = myNode + 1;
00772     if ( proxyNode < numPes && ! patchMap->numPatchesOnNode(proxyNode) ) {
00773       if ( proxyNode != myNode ) {
00774         IF_NEW_NODE {
00775           neighborNodes[nProxyNodes] = proxyNode;
00776           nProxyNodes++;
00777         }
00778       }
00779     }
00780   }
00781 #endif
00782 
00783   return nProxyNodes;
00784 }
00785 
00786 #if USE_TOPOMAP 
00787 // Figure out which proxies we will definitely create on other nodes,
00788 // without regard for non-bonded computes.  This code is swiped from
00789 // ProxyMgr, and changes there probable need to be propagated here.
00790 // The proxies are placed on nearby processors on the 3d-grid along
00791 // the X, Y, Z and T dimensions
00792 
00793 int NamdCentLB::requiredProxiesOnProcGrid(PatchID id, int neighborNodes[])
00794 {
00795   enum proxyHere { No, Yes };
00796   int numPes = CkNumPes();
00797   proxyHere *proxyNodes = new proxyHere[numPes];
00798   int nProxyNodes;
00799   int i, j, k, l;
00800 
00801   int xsize = 0, ysize = 0, zsize = 0, tsize = 0;
00802   int my_x = 0, my_y = 0, my_z = 0, my_t = 0;
00803 
00804   PatchMap* patchMap = PatchMap::Object();
00805   int myNode = patchMap->node(id);
00806     
00807   TopoManager tmgr;
00808   xsize = tmgr.getDimNX();
00809   ysize = tmgr.getDimNY();
00810   zsize = tmgr.getDimNZ();
00811   tsize = tmgr.getDimNT();
00812   
00813   tmgr.rankToCoordinates(myNode, my_x, my_y, my_z, my_t);
00814   
00815   if(xsize * ysize * zsize * tsize != CkNumPes()) {
00816     delete [] proxyNodes;
00817     return requiredProxies(id, neighborNodes);
00818   }  
00819 
00820   // Note all home patches.
00821   for ( i = 0; i < numPes; ++i )
00822   {
00823     proxyNodes[i] = No;
00824   }
00825   nProxyNodes = 0;
00826 
00827   // Check all two-away neighbors.
00828   // This is really just one-away neighbors, since 
00829   // two-away always returns zero: RKB
00830   PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00831 
00832   // Assign a proxy to all your neighbors. But dont increment counter
00833   // because these have to be there anyway.
00834   neighbors[0] = id;  
00835   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00836   
00837   // Small Flag chooses between different loadbalancing schemes.
00838   // Small Flag == true, patches are close to each other
00839   // false, patches are far from each other
00840   bool smallFlag = false;
00841   double pnodes = CkNumPes();
00842   pnodes *= 0.25;    
00843   smallFlag = (patchMap->numPatches() > pnodes )?1:0;
00844 
00845   //If there are lot of patches its likely they will all be neighbors, 
00846   //so all we need to do is to place proxies on downstream patches.
00847   //if (smallFlag) {
00848   for ( i = 1; i < numNeighbors; ++i )
00849     {
00850       int proxyNode = patchMap->basenode(neighbors[i]);
00851       
00852       if (proxyNode != myNode)
00853         if (proxyNodes[proxyNode] == No)
00854           {
00855             proxyNodes[proxyNode] = Yes;
00856             neighborNodes[nProxyNodes] = proxyNode;
00857             nProxyNodes++;
00858           }
00859     }
00860   //}
00861  
00862   if (step() > 2) {
00863     delete [] proxyNodes;
00864     return nProxyNodes;
00865   }
00866  
00867   // Place numPesPerPatch proxies on the 3d torus neighbors of a processor
00868 
00869   int numPatches = patchMap->numPatches();
00870   int emptyNodes = numPes - numPatches;
00871   //if ( emptyNodes > numPatches ) {
00872   
00873   int nodesPerPatch = nProxyNodes + 4 * (emptyNodes-1) / numPatches + 1;
00874   int proxyNode = 0 ;
00875   int proxy_x=0, proxy_y=0, proxy_z=0;
00876   
00877   //Choose from the 26 neighbors of mynode.
00878   //CkAssert(nodesPerPatch - nProxyNodes <= 26);  
00879   //Too few patches otherwise, try twoaway?
00880   
00881   for(k=-1; k<= 1; k++) {
00882     proxy_z = (my_z + k + zsize) % zsize;
00883     for(j=-1; j <= 1; j++) {
00884       proxy_y = (my_y + j + ysize) % ysize;
00885       for(i = -1; i <= 1; i++) {
00886         proxy_x = (my_x + i + xsize) % xsize;
00887         for(l = 0; l < tsize; l++) {
00888           if(i == 0 && j == 0 && k == 0 && l == 0)
00889             continue;
00890 
00891           proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z, l);
00892 
00893           if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00894              proxyNodes[proxyNode] == No) {
00895             proxyNodes[proxyNode] = Yes;
00896             neighborNodes[nProxyNodes] = proxyNode;
00897             nProxyNodes++;
00898           }
00899           
00900           if(nProxyNodes >= nodesPerPatch || 
00901              nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00902             break;
00903         } // end for
00904 
00905         if(nProxyNodes >= nodesPerPatch || 
00906            nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00907           break;
00908       } // end for
00909       
00910       if(nProxyNodes >= nodesPerPatch || 
00911          nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00912         break;    
00913     } // end for
00914 
00915     if(nProxyNodes >= nodesPerPatch || 
00916        nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00917       break;      
00918   } // end for
00919 
00920 #if 1
00921   if(!smallFlag) {
00922     for(k=-2; k<= 2; k+=2) {
00923       proxy_z = (my_z + k + zsize) % zsize;
00924       for(j=-2; j <= 2; j+=2) {
00925         proxy_y = (my_y + j + ysize) % ysize;
00926         for(i = -2; i <= 2; i+=2) {
00927           proxy_x = (my_x + i + xsize) % xsize;
00928           for(l = 0; l < tsize; l++) {
00929             if(i == 0 && j == 0 && k == 0 && l == 0)
00930               continue;
00931           
00932             proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z, l);
00933           
00934             if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00935                proxyNodes[proxyNode] == No) {
00936               proxyNodes[proxyNode] = Yes;
00937               neighborNodes[nProxyNodes] = proxyNode;
00938               nProxyNodes++;
00939             }
00940             
00941             if(nProxyNodes >= nodesPerPatch || 
00942                nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00943               break;
00944           } // end for
00945 
00946           if(nProxyNodes >= nodesPerPatch || 
00947              nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00948             break;
00949         } // end for
00950         
00951         if(nProxyNodes >= nodesPerPatch || 
00952            nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00953           break;          
00954       } // end for
00955 
00956       if(nProxyNodes >= nodesPerPatch || 
00957          nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00958         break;    
00959     } // end for
00960   }
00961 
00962 #else
00963   #if 0
00964   const SimParameters* params = Node::Object()->simParameters;
00965 
00966   if(!smallFlag) {
00967     //Add two-away proxies
00968     if(patchMap->numaway_a() == 2) {
00969       proxy_y = (my_y + 2) % ysize;
00970       proxy_x = my_x  % xsize;
00971       proxy_z = my_z  % zsize;
00972       
00973       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00974       if(proxyNodes[proxyNode] == No) {
00975         proxyNodes[proxyNode] = Yes;
00976         neighborNodes[nProxyNodes] = proxyNode;
00977       nProxyNodes++;
00978       }
00979       
00980       proxy_y = (my_y - 2 + ysize) % ysize;
00981       proxy_x = my_x  % xsize;
00982       proxy_z = my_z % zsize;
00983       
00984       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00985       if(proxyNodes[proxyNode] == No) {
00986         proxyNodes[proxyNode] = Yes;
00987         neighborNodes[nProxyNodes] = proxyNode;
00988         nProxyNodes++;
00989       }
00990     }
00991     
00992     //Add two away proxies
00993     if(patchMap->numaway_b() == 2) {
00994       proxy_y = my_y  % ysize;
00995       proxy_x = my_x  % xsize;
00996       proxy_z = (my_z + 2) % zsize;
00997       
00998       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00999       if(proxyNodes[proxyNode] == No) {
01000         proxyNodes[proxyNode] = Yes;
01001         neighborNodes[nProxyNodes] = proxyNode;
01002         nProxyNodes++;
01003       }
01004       
01005       proxy_y = my_y  % ysize;
01006       proxy_x = my_x  % xsize;
01007       proxy_z = (my_z - 2 + zsize) % zsize;
01008       
01009       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01010       if(proxyNodes[proxyNode] == No) {
01011         proxyNodes[proxyNode] = Yes;
01012         neighborNodes[nProxyNodes] = proxyNode;
01013         nProxyNodes++;
01014       }
01015     }
01016     
01017     //Add two away proxies
01018     if(patchMap->numaway_c() == 2) {
01019       proxy_y = my_y  % ysize;
01020       proxy_x = (my_x + 2) % xsize;
01021       proxy_z = my_z  % zsize;
01022       
01023       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01024       if(proxyNodes[proxyNode] == No) {
01025         proxyNodes[proxyNode] = Yes;
01026         neighborNodes[nProxyNodes] = proxyNode;
01027       nProxyNodes++;
01028       }
01029       
01030       proxy_y = my_y  % ysize;
01031       proxy_x = (my_x  - 2 + xsize) % xsize;
01032       proxy_z = my_z % zsize;
01033       
01034       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01035       if(proxyNodes[proxyNode] == No) {
01036         proxyNodes[proxyNode] = Yes;
01037         neighborNodes[nProxyNodes] = proxyNode;
01038         nProxyNodes++;
01039       }
01040     }
01041   }
01042   #endif
01043 #endif
01044   
01045   // CkPrintf("Returning %d proxies\n", nProxyNodes);
01046 
01047   delete [] proxyNodes;
01048   return nProxyNodes;
01049 }
01050 
01051 #endif

Generated on Sat Nov 18 01:17:14 2017 for NAMD by  doxygen 1.4.7