Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

NamdCentLB.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /home/cvs/namd/cvsroot/namd2/src/NamdCentLB.C,v $
00003  * $Author: bhatele $
00004  * $Date: 2008/08/28 03:36:23 $
00005  * $Revision: 1.87 $
00006  *****************************************************************************/
00007 
00008 #if !defined(WIN32) || defined(__CYGWIN__)
00009 #include <unistd.h>
00010 #endif
00011 #include <fcntl.h>
00012 
00013 #include "InfoStream.h"
00014 #include "NamdCentLB.h"
00015 #include "NamdCentLB.def.h"
00016 #include "Node.h"
00017 #include "PatchMap.h"
00018 #include "ComputeMap.h"
00019 #include "LdbCoordinator.h"
00020 
00021 // #define DUMP_LDBDATA 1
00022 // #define LOAD_LDBDATA 1
00023 
00024 double *cpuloads = NULL;
00025 
00026 void CreateNamdCentLB()
00027 {
00028   // CkPrintf("[%d] creating NamdCentLB %d\n",CkMyPe(),loadbalancer);
00029   loadbalancer = CProxy_NamdCentLB::ckNew();
00030   // CkPrintf("[%d] created NamdCentLB %d\n",CkMyPe(),loadbalancer);
00031   cpuloads = new double[CkNumPes()];
00032   for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0;
00033 }
00034 
00035 #if CHARM_VERSION > 050610
00036 NamdCentLB::NamdCentLB(): CentralLB(CkLBOptions(-1))
00037 #else
00038 NamdCentLB::NamdCentLB()
00039 #endif
00040 {
00041   //  if (CkMyPe()==0)
00042   //   CkPrintf("[%d] NamdCentLB created\n",CkMyPe());
00043   processorArray = 0;
00044   patchArray = 0;
00045   computeArray = 0;
00046 }
00047 
00048 /*
00049 NamdCentLB::~NamdCentLB()
00050 {
00051   delete [] processorArray;
00052   delete [] patchArray;
00053   delete [] computeArray;
00054 }
00055 */
00056 
00057 CmiBool NamdCentLB::QueryBalanceNow(int _step)
00058 {
00059   //  CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),_step);
00060   if ( LdbCoordinator::Object()->takingLdbData ) {
00061     return CmiTrue;
00062   } else {
00063     return CmiFalse;
00064   }
00065 }
00066 
00067 CmiBool NamdCentLB::QueryDumpData()
00068 {
00069 #if 0
00070   if (LdbCoordinator::Object()->ldbCycleNum == 1)  return CmiTrue;
00071   if (LdbCoordinator::Object()->ldbCycleNum == 2)  return CmiTrue;
00072 #endif
00073   return CmiFalse;
00074 }
00075             
00076 CLBMigrateMsg* NamdCentLB::Strategy(CentralLB::LDStats* stats, int count)
00077 {
00078   //  CkPrintf("LDB: All statistics received at %f, %f\n",
00079   //  CmiTimer(),CmiWallTimer());
00080 
00081   int numProcessors = count;
00082   int numPatches = PatchMap::Object()->numPatches();
00083   ComputeMap *computeMap = ComputeMap::Object();
00084   const int numComputes = computeMap->numComputes();
00085   const SimParameters* simParams = Node::Object()->simParameters;
00086 
00087   // these sizes should never change
00088   if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00089   if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00090   if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00091 
00092   int nMoveableComputes = buildData(stats,count);
00093 
00094 #if LDB_DEBUG
00095 #define DUMP_LDBDATA 1
00096 #define LOAD_LDBDATA 1
00097 #endif
00098 
00099 #if DUMP_LDBDATA 
00100   dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00101 #elif LOAD_LDBDATA
00102   loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00103   // CkExit();
00104 #endif
00105 
00106   if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00107     RefineOnly(computeArray, patchArray, processorArray,
00108                   nMoveableComputes, numPatches, numProcessors);
00109   } else if (simParams->ldbStrategy == LDBSTRAT_ALG7) {
00110     Alg7(computeArray, patchArray, processorArray,
00111                   nMoveableComputes, numPatches, numProcessors);
00112   } else if (simParams->ldbStrategy == LDBSTRAT_ASB8) {
00113     if (step() < 2)
00114       TorusLB(computeArray, patchArray, processorArray,
00115                   nMoveableComputes, numPatches, numProcessors);
00116     else
00117       RefineTorusLB(computeArray, patchArray, processorArray,
00118                   nMoveableComputes, numPatches, numProcessors, 1);
00119   } else if (simParams->ldbStrategy == LDBSTRAT_OTHER) {
00120     if (step() < 2)
00121       Alg7(computeArray, patchArray, processorArray,
00122                   nMoveableComputes, numPatches, numProcessors);
00123     else
00124       RefineOnly(computeArray, patchArray, processorArray, 
00125                   nMoveableComputes, numPatches, numProcessors);
00126   }
00127 
00128 #if LDB_DEBUG && USE_TOPOMAP
00129   TopoManager tmgr;
00130   int pe1, pe2, pe3, hops=0;
00131   /* This is double counting the hops
00132   for(int i=0; i<nMoveableComputes; i++)
00133   {
00134     pe1 = computeArray[i].processor;
00135     pe2 = patchArray[computeArray[i].patch1].processor;
00136     pe3 = patchArray[computeArray[i].patch2].processor;
00137     hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00138     if(computeArray[i].patch1 != computeArray[i].patch2)
00139       hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
00140   }*/
00141   for (int i=0; i<numPatches; i++)  {
00142     //int num = patchArray[i].proxiesOn.numElements();
00143     pe1 = patchArray[i].processor;
00144     Iterator nextProc;
00145     processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00146     while (p) {
00147       pe2 = p->Id;
00148       hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00149       p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00150     }
00151   }
00152   CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00153 #endif
00154 
00155 #if DUMP_LDBDATA
00156   dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00157 #elif LOAD_LDBDATA
00158   dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00159   // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00160   // CkExit();
00161 #endif
00162 
00163   // For error checking:
00164   // Count up computes, to see if somebody doesn't have any computes
00165   int i;
00166 #if 0
00167   int* computeCount = new int[numProcessors];
00168   for(i=0; i<numProcessors; i++)
00169     computeCount[i]=0;
00170   for(i=0; i<nMoveableComputes; i++)
00171     computeCount[computeArray[i].processor]++;
00172   for(i=0; i<numProcessors; i++) {
00173     if (computeCount[i]==0)
00174       iout << iINFO <<"Warning: Processor " << i 
00175            << " has NO moveable computes.\n" << endi;
00176   }
00177   delete [] computeCount;
00178 #endif
00179   
00180   CkVec<MigrateInfo *> migrateInfo;
00181   for(i=0;i<nMoveableComputes;i++) {
00182     if (computeArray[i].processor != computeArray[i].oldProcessor) {
00183       //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00184       //               CkMyPe(),computeArray[i].handle.id.id[0],
00185       //               computeArray[i].processor,computeArray[i].oldProcessor);
00186       MigrateInfo *migrateMe = new MigrateInfo;
00187       migrateMe->obj = computeArray[i].handle;
00188       migrateMe->from_pe = computeArray[i].oldProcessor;
00189       migrateMe->to_pe = computeArray[i].processor;
00190       migrateInfo.insertAtEnd(migrateMe);
00191 
00192       // sneak in updates to ComputeMap
00193       computeMap->setNewNode(computeArray[i].handle.id.id[0],
00194                                 computeArray[i].processor);
00195     }
00196   }
00197   
00198   int migrate_count=migrateInfo.length();
00199   // CkPrintf("NamdCentLB migrating %d elements\n",migrate_count);
00200 #if CHARM_VERSION > 050611
00201   CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00202 #else
00203   CLBMigrateMsg* msg = new(&migrate_count,1) CLBMigrateMsg;
00204 #endif
00205   msg->n_moves = migrate_count;
00206   for(i=0; i < migrate_count; i++) {
00207     MigrateInfo* item = migrateInfo[i];
00208     msg->moves[i] = *item;
00209     delete item;
00210     migrateInfo[i] = 0;
00211   }
00212 
00213   for (i=0; i<numProcessors; i++) {
00214     cpuloads[i] = processorArray[i].load;
00215   }
00216 
00217   delete [] processorArray;
00218   delete [] patchArray;
00219   delete [] computeArray;
00220 
00221   processorArray = NULL;
00222   patchArray = NULL;
00223   computeArray = NULL;
00224   
00225   return msg;
00226 };
00227 
00228 #ifndef WIN32
00229 
00230 void NamdCentLB::dumpDataASCII(char *file, int numProcessors,
00231                                int numPatches, int numComputes)
00232 {
00233   char filename[128];
00234   sprintf(filename, "%s.%d", file, step());
00235   FILE* fp = fopen(filename,"w");
00236   if (fp == NULL){
00237      perror("dumpLDStatsASCII");
00238      return;
00239   }
00240   CkPrintf("***** DUMP data to file: %s ***** \n", filename);
00241   fprintf(fp,"%d %d %d\n",numProcessors,numPatches,numComputes);
00242 
00243   int i;
00244   for(i=0;i<numProcessors;i++) {
00245     processorInfo* p = processorArray + i;
00246     fprintf(fp,"%d %e %e %e %e\n",p->Id,p->load,p->backgroundLoad,p->computeLoad,p->idleTime);
00247   }
00248 
00249   for(i=0;i < numPatches; i++) {
00250     patchInfo* p = patchArray + i;
00251     fprintf(fp,"%d %e %d %d\n",p->Id,p->load,p->processor,p->numAtoms);
00252   }
00253     
00254   for(i=0; i < numComputes; i++) {
00255     computeInfo* c = computeArray + i;
00256     fprintf(fp,"%d %e %d %d %d %d",c->Id,c->load,c->patch1,c->patch2,
00257             c->processor,c->oldProcessor);
00258 #if CHARM_VERSION > 50910
00259     fprintf(fp, " %e %e", c->minTime, c->maxTime);
00260 #endif
00261     fprintf(fp, "\n");
00262   }
00263 
00264   // dump patchSet
00265   for (i=0; i< numProcessors; i++) {
00266       int num = processorArray[i].proxies.numElements();
00267       fprintf(fp, "%d %d: ", i, num);
00268       Iterator nextProxy;
00269       patchInfo *p = (patchInfo *)processorArray[i].proxies.
00270         iterator((Iterator *)&nextProxy);
00271       while (p) {
00272           fprintf(fp, "%d ", p->Id);
00273           p = (patchInfo *)processorArray[i].proxies.
00274             next((Iterator*)&nextProxy);
00275       }
00276       fprintf(fp, "\n");
00277   }
00278   // dump proxiesOn
00279   for (i=0; i<numPatches; i++)  {
00280     int num = patchArray[i].proxiesOn.numElements();
00281     fprintf(fp, "%d %d: ", i, num);
00282       Iterator nextProc;
00283       processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.
00284         iterator((Iterator *)&nextProc);
00285       while (p) {
00286         fprintf(fp, "%d ", p->Id);
00287         p = (processorInfo *)patchArray[i].proxiesOn.
00288           next((Iterator*)&nextProc);
00289       }
00290       fprintf(fp, "\n");
00291   }
00292 
00293   fclose(fp);
00294   //CkExit();
00295 }
00296 
00297 void NamdCentLB::loadDataASCII(char *file, int &numProcessors,
00298                                int &numPatches, int &numComputes)
00299 {
00300   char filename[128];
00301   //sprintf(filename, "%s.%d", file, step());
00302   sprintf(filename, "%s", file);
00303 
00304   CkPrintf("***** Load ascii data from file: %s ***** \n", filename);
00305 
00306   FILE* fp = fopen(filename, "r");
00307   if (fp == NULL){
00308      perror("loadDataASCII");
00309      return;
00310   }
00311 
00312   fscanf(fp,"%d %d %d",&numProcessors,&numPatches,&numComputes);
00313 
00314   printf("numProcs: %d numPatches: %d numComputes: %d\n", numProcessors,numPatches, numComputes);
00315 
00316   delete [] processorArray;
00317   delete [] patchArray;
00318   delete [] computeArray;
00319   processorArray = new processorInfo[numProcessors];
00320   patchArray = new patchInfo[numPatches];
00321   computeArray = new computeInfo[numComputes];
00322 
00323   int i;
00324   for(i=0;i<numProcessors;i++) {
00325     processorInfo* p = processorArray + i;
00326     fscanf(fp,"%d %le %le %le", &p->Id, &p->load, &p->backgroundLoad, &p->computeLoad);
00327     fscanf(fp,"%le\n", &p->idleTime);
00328     if (p->Id != i) CmiAbort("Reading processorArray error!");
00329 //    p->backgroundLoad = 0.0;
00330   }
00331 
00332   for(i=0;i < numPatches; i++) {
00333     patchInfo* p = patchArray + i;
00334     fscanf(fp,"%d %le %d %d\n",&p->Id,&p->load,&p->processor,&p->numAtoms);
00335     if (p->Id != i || p->processor > numProcessors || p->processor < 0) 
00336       CmiAbort("Reading patchArray error!");
00337   }
00338     
00339   for(i=0; i < numComputes; i++) {
00340     computeInfo* c = computeArray + i;
00341     fscanf(fp,"%d %le %d %d %d %d",&c->Id,&c->load,&c->patch1,&c->patch2,
00342             &c->processor,&c->oldProcessor);
00343 #if CHARM_VERSION > 50910
00344     fscanf(fp, " %le %le", &c->minTime, &c->maxTime);
00345 #endif
00346     if (c->patch1 < 0 || c->patch1 > numPatches || c->patch2 < 0 || c->patch2 > numPatches)
00347       CmiAbort("Reading computeArray error!");
00348 //  printf("%d %e %d %d %d %d %e %e\n", c->Id,c->load,c->patch1,c->patch2,c->processor,c->oldProcessor,c->minTime,c->maxTime);
00349   }
00350 
00351   // dump patchSet
00352   for (i=0; i< numProcessors; i++) {
00353       int num = processorArray[i].proxies.numElements();
00354       fscanf(fp,"%d",&num);
00355       for (int j=0; j<num; j++) {
00356           int id;
00357           fscanf(fp,"%d",&id);
00358           processorArray[i].proxies.insert(&patchArray[id]);
00359       }
00360   }
00361   // dump proxiesOn
00362   for (i=0; i<numPatches; i++)  {
00363       int num;
00364       fscanf(fp,"%d",&num);
00365       for (int j=0; j<num; j++) {
00366           int id;
00367           fscanf(fp,"%d",&id);
00368           patchArray[i].proxiesOn.insert(&processorArray[id]);
00369       }
00370   }
00371 
00372   fclose(fp);
00373 }
00374 #endif
00375 
00376 extern int isPmeProcessor(int); 
00377 
00378 int NamdCentLB::buildData(CentralLB::LDStats* stats, int count)
00379 {
00380   PatchMap* patchMap = PatchMap::Object();
00381   ComputeMap* computeMap = ComputeMap::Object();
00382   const SimParameters* simParams = Node::Object()->simParameters;
00383 
00384   BigReal bgfactor = simParams->ldbBackgroundScaling;
00385   BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
00386   BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
00387   int pmeOn = simParams->PMEOn;
00388   int unLoadPme = simParams->ldbUnloadPME;
00389   int pmeBarrier = simParams->PMEBarrier;
00390   int unLoadZero = simParams->ldbUnloadZero;
00391   int unLoadOne = simParams->ldbUnloadOne;
00392   int unLoadRankZero = simParams->ldbUnloadRankZero;
00393   int unLoadSMP = simParams->ldbUnloadSMP;
00394 
00395   int i;
00396   for (i=0; i<count; ++i) {
00397     processorArray[i].Id = i;
00398     processorArray[i].available = CmiTrue;
00399     if ( pmeOn && isPmeProcessor(i) ) {
00400 #if CHARM_VERSION > 050607
00401       processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
00402 #else
00403       processorArray[i].backgroundLoad = pmebgfactor * stats[i].bg_walltime;
00404 #endif
00405     } else if (patchMap->numPatchesOnNode(i) > 0) {
00406 #if CHARM_VERSION > 050607
00407       processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
00408 #else
00409       processorArray[i].backgroundLoad = homebgfactor * stats[i].bg_walltime;
00410 #endif
00411     } else {
00412 #if CHARM_VERSION > 050607
00413       processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
00414 #else
00415       processorArray[i].backgroundLoad = bgfactor * stats[i].bg_walltime;
00416 #endif
00417     }
00418     processorArray[i].idleTime = stats->procs[i].idletime;
00419     processorArray[i].load = processorArray[i].computeLoad = 0.0;
00420   }
00421 
00422 /* *********** this code is defunct *****************
00423 #if 0
00424   double bgfactor = 1.0 + 1.0 * CkNumPes()/1000.0;
00425   if ( bgfactor > 2.0 ) bgfactor = 2.0;
00426   iout << iINFO << "Scaling background load by " << bgfactor << ".\n" << endi;
00427   int i;
00428   for (i=0; i<count; i++) {
00429     processorArray[i].Id = i;
00430     processorArray[i].backgroundLoad = bgfactor * stats[i].bg_walltime;
00431   }
00432 
00433   double bg_weight = 0.7;
00434 
00435   int i;
00436   for (i=0; i<count; i++) {
00437     processorArray[i].Id = i;
00438     if (patchMap->numPatchesOnNode(i) > 0)
00439 #if CHARM_VERSION > 050607
00440       processorArray[i].backgroundLoad = bg_weight * stats->procs[i].bg_walltime;
00441 #else
00442       processorArray[i].backgroundLoad = bg_weight * stats[i].bg_walltime;
00443 #endif
00444     else 
00445 #if CHARM_VERSION > 050607
00446       processorArray[i].backgroundLoad = stats[i].bg_walltime;
00447 #else
00448       processorArray[i].backgroundLoad = stats->procs[i].bg_walltime;
00449 #endif
00450   }
00451   
00452   //Modification to reduce the coputeload on PME processors
00453   const SimParameters* simParams = Node::Object()->simParameters;  
00454   
00455   // CkPrintf("BACKGROUND LOAD\n");
00456   if(simParams->PMEOn) {
00457     double bgfactor = 1.0 + 1.0 * CkNumPes()/1000.0;
00458     if ( bgfactor > 2.0 ) bgfactor = 2.0;
00459     for (i=0; i<count; i++) {
00460       // CkPrintf("BG[%d] =  %5.5lf,", i, processorArray[i].backgroundLoad);
00461       if(isPmeProcessor(i)) {
00462         processorArray[i].backgroundLoad *= bgfactor;
00463       }
00464       // CkPrintf("%5.5lf;  ", processorArray[i].backgroundLoad);
00465     }
00466   }
00467   // CkPrintf("\n");
00468 #endif  
00469 *********** end of defunct code *********** */
00470 
00471   if (unLoadZero) processorArray[0].available = CmiFalse;
00472   if (unLoadOne) processorArray[1].available = CmiFalse;
00473   if (unLoadRankZero) {
00474     for (int i=0; i<count; i+=4) 
00475       processorArray[i].available = CmiFalse;
00476   }
00477 
00478   // if all pes are Pme, disable this flag
00479   if (pmeOn && unLoadPme) {
00480     for (i=0; i<count; i++) {
00481       if (!isPmeProcessor(i))  break;
00482     }
00483     if (i==count) {
00484       iout << iINFO << "Turned off unLoadPme flag!\n"  << endi;
00485       unLoadPme = 0;
00486     }
00487   }
00488   
00489   if (pmeOn && unLoadPme) {
00490     for (i=0; i<count; i++) {
00491       if ((pmeBarrier && i==0) || isPmeProcessor(i)) 
00492         processorArray[i].available = CmiFalse;
00493     }
00494   }
00495 
00496   if (unLoadSMP) {
00497     int ppn = simParams->procsPerNode;
00498     int unloadrank = simParams->ldbUnloadRank;
00499     for (int i=0; i<count; i+=ppn) {
00500       processorArray[i+unloadrank].available = CmiFalse;
00501     }
00502   }
00503 
00504   int nMoveableComputes=0;
00505   int nProxies = 0;             // total number of estimated proxies
00506 #if CHARM_VERSION > 050607
00507   int j;
00508   for (j=0; j < stats->n_objs; j++) {
00509       const LDObjData &this_obj = stats->objData[j];
00510       int frompe = stats->from_proc[j];
00511 #else
00512   for (i=0; i < count; i++) {
00513     int j;
00514     for (j=0; j < stats[i].n_objs; j++) {
00515       const LDObjData &this_obj = stats[i].objData[j];
00516       int frompe = i;
00517 #endif
00518       // filter out non-NAMD managed objects (like PME array)
00519 #if CHARM_VERSION > 050405
00520       if (this_obj.omID().id.idx != 1) continue;
00521 #elif CHARM_VERSION > 050403
00522       if (this_obj.omID.id.idx != 1) continue;
00523 #else
00524       if (this_obj.omID.id != 1) continue;
00525 #endif
00526 #if CHARM_VERSION > 050405
00527       if (this_obj.id().id[1] == -2) { // Its a patch
00528         const int pid = this_obj.id().id[0];
00529 #else
00530       if (this_obj.id.id[1] == -2) { // Its a patch
00531         const int pid = this_obj.id.id[0];
00532 #endif
00533         int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00534 
00535         patchArray[pid].Id = pid;
00536         patchArray[pid].numAtoms = 0;
00537 #if CHARM_VERSION > 050607
00538         patchArray[pid].processor = stats->from_proc[j];
00539 #else
00540         patchArray[pid].processor = i;
00541 #endif
00542         const int numProxies = 
00543 #if USE_TOPOMAP
00544         requiredProxiesOnProcGrid(pid,neighborNodes);
00545 #else
00546         requiredProxies(pid, neighborNodes);
00547 #endif
00548 
00549         nProxies += numProxies;
00550 
00551         for (int k=0; k<numProxies; k++) {
00552           processorArray[neighborNodes[k]].proxies.insert(&patchArray[pid]);
00553           patchArray[pid].proxiesOn.insert(&processorArray[neighborNodes[k]]);
00554         }
00555       } else if (this_obj.migratable) { // Its a compute
00556 #if CHARM_VERSION > 050405
00557         const int cid = this_obj.id().id[0];
00558 #else
00559         const int cid = this_obj.id.id[0];
00560 #endif
00561         const int p0 = computeMap->pid(cid,0);
00562 
00563         // For self-interactions, just return the same pid twice
00564         int p1;
00565         if (computeMap->numPids(cid) > 1)
00566           p1 = computeMap->pid(cid,1);
00567         else p1 = p0;
00568         computeArray[nMoveableComputes].Id = cid;
00569 #if CHARM_VERSION > 050607
00570         computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
00571         processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
00572 #else
00573         computeArray[nMoveableComputes].oldProcessor = i;
00574         processorArray[i].computeLoad += this_obj.wallTime;
00575 #endif
00576         computeArray[nMoveableComputes].processor = -1;
00577         computeArray[nMoveableComputes].patch1 = p0;
00578         computeArray[nMoveableComputes].patch2 = p1;
00579         computeArray[nMoveableComputes].handle = this_obj.handle;
00580         computeArray[nMoveableComputes].load = this_obj.wallTime;
00581 #if CHARM_VERSION > 50910
00582         computeArray[nMoveableComputes].minTime = this_obj.minWall;
00583         computeArray[nMoveableComputes].maxTime = this_obj.maxWall;
00584 #endif
00585         nMoveableComputes++;
00586       }
00587     }
00588 #if ! ( CHARM_VERSION > 050607 )
00589   }
00590 #endif
00591 
00592 /* *********** this code is defunct *****************
00593 #if 0
00594   int averageProxy = nProxies / count;
00595   CkPrintf("total proxies: %d, avervage: %d\n", nProxies, averageProxy);
00596   for (i=0; i<count; i++) {
00597     // too many proxies on this node, weight the background load
00598     int proxies = processorArray[i].proxies.numElements();
00599     if (proxies > averageProxy) {
00600       double factor = 1.0*(proxies-averageProxy)/nProxies;
00601       processorArray[i].backgroundLoad *= (1.0 + factor);
00602       CkPrintf("On [%d]: too many proxies: %d, increased bg load by %f\n", i, nProxies, factor);
00603     }
00604   }
00605 #endif
00606         *********** end of defunct code *********** */
00607   for (i=0; i<count; i++) {
00608     processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
00609   }
00610   stats->clear();
00611   return nMoveableComputes;
00612 }
00613 
00614 // Figure out which proxies we will definitely create on other
00615 // nodes, without regard for non-bonded computes.  This code is swiped
00616 // from ProxyMgr, and changes there probable need to be propagated here.
00617 
00618 int NamdCentLB::requiredProxies(PatchID id, int neighborNodes[])
00619 {
00620   enum proxyHere { No, Yes };
00621   int numNodes = CkNumPes();
00622   proxyHere *proxyNodes = new proxyHere[numNodes];
00623   int nProxyNodes;
00624   int i;
00625 
00626   // Note all home patches.
00627   for ( i = 0; i < numNodes; ++i )
00628   {
00629     proxyNodes[i] = No;
00630   }
00631   nProxyNodes=0;
00632 
00633   // Check all two-away neighbors.
00634   // This is really just one-away neighbors, since 
00635   // two-away always returns zero: RKB
00636   PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00637 
00638   PatchMap* patchMap = PatchMap::Object();
00639 
00640   int myNode = patchMap->node(id);
00641   neighbors[0] = id;
00642   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00643   for ( i = 0; i < numNeighbors; ++i )
00644   {
00645     const int proxyNode = patchMap->basenode(neighbors[i]);
00646     if (proxyNode != myNode)
00647       if (proxyNodes[proxyNode] == No)
00648       {
00649         proxyNodes[proxyNode] = Yes;
00650         neighborNodes[nProxyNodes] = proxyNode;
00651         nProxyNodes++;
00652       }
00653   }
00654 
00655   // Distribute initial default proxies across empty processors.
00656   // This shouldn't be necessary, but may constrain the load balancer
00657   // and avoid placing too many proxies on a single processor.  -JCP
00658   
00659   // This code needs to be turned off when the creation of ST is
00660   // shifted to the load balancers -ASB
00661 
00662 #if 1
00663   int numPatches = patchMap->numPatches();
00664   int emptyNodes = numNodes - numPatches;
00665   if ( emptyNodes > numPatches ) {
00666     int nodesPerPatch = nProxyNodes + 1 + (emptyNodes-1) / numPatches;
00667     int proxyNode = (myNode + 1) % numNodes;
00668     while ( nProxyNodes < nodesPerPatch &&
00669                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00670       if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00671         proxyNodes[proxyNode] = Yes;
00672         neighborNodes[nProxyNodes] = proxyNode;
00673         nProxyNodes++;
00674       }
00675       proxyNode = (proxyNode + 1) % numNodes;
00676     }
00677     proxyNode = (myNode - 1 + numNodes) % numNodes;
00678     while ( nProxyNodes < nodesPerPatch &&
00679                         ! patchMap->numPatchesOnNode(proxyNode) ) {
00680       if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00681         proxyNodes[proxyNode] = Yes;
00682         neighborNodes[nProxyNodes] = proxyNode;
00683         nProxyNodes++;
00684       }
00685       proxyNode = (proxyNode - 1 + numNodes) % numNodes;
00686     }
00687     proxyNode = (myNode + 1) % numNodes;
00688     int count = 0;
00689     while ( nProxyNodes < nodesPerPatch ) {
00690       if ( ! patchMap->numPatchesOnNode(proxyNode) &&
00691            proxyNode != myNode && proxyNodes[proxyNode] == No) {
00692         proxyNodes[proxyNode] = Yes;
00693         neighborNodes[nProxyNodes] = proxyNode;
00694         nProxyNodes++;
00695       }
00696       proxyNode = (proxyNode + 1) % numNodes;
00697       count ++; if (count == numNodes) break;   // we looped all
00698     }
00699   } else {
00700     int proxyNode = myNode - 1;
00701     if ( proxyNode >= 0 && ! patchMap->numPatchesOnNode(proxyNode) ) {
00702       if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00703         proxyNodes[proxyNode] = Yes;
00704         neighborNodes[nProxyNodes] = proxyNode;
00705         nProxyNodes++;
00706       }
00707     }
00708     proxyNode = myNode + 1;
00709     if ( proxyNode < numNodes && ! patchMap->numPatchesOnNode(proxyNode) ) {
00710       if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00711         proxyNodes[proxyNode] = Yes;
00712         neighborNodes[nProxyNodes] = proxyNode;
00713         nProxyNodes++;
00714       }
00715     }
00716   }
00717 #endif
00718 
00719   delete [] proxyNodes;
00720   return nProxyNodes;
00721 }
00722 
00723 #if USE_TOPOMAP 
00724 // Figure out which proxies we will definitely create on other nodes,
00725 // without regard for non-bonded computes.  This code is swiped from
00726 // ProxyMgr, and changes there probable need to be propagated here.
00727 // The proxies are placed on nearby processors on the 3d-grid along
00728 // the X,Y,Z dimensions
00729 
00730 int NamdCentLB::requiredProxiesOnProcGrid(PatchID id, int neighborNodes[])
00731 {
00732   enum proxyHere { No, Yes };
00733   int numNodes = CkNumPes();
00734   proxyHere *proxyNodes = new proxyHere[numNodes];
00735   int nProxyNodes;
00736   int i,j,k;
00737 
00738   int xsize = 0, ysize = 0, zsize = 0;
00739   int my_x =0, my_y = 0, my_z = 0;
00740 
00741   PatchMap* patchMap = PatchMap::Object();
00742   int myNode = patchMap->node(id);
00743     
00744   TopoManager tmgr;
00745   xsize = tmgr.getDimX();
00746   ysize = tmgr.getDimY();
00747   zsize = tmgr.getDimZ();
00748   
00749   tmgr.rankToCoordinates(myNode, my_x, my_y, my_z);
00750   
00751   if(xsize * ysize * zsize != CkNumPes()) {
00752     delete [] proxyNodes;
00753     return requiredProxies(id, neighborNodes);
00754   }  
00755 
00756 
00757   // Note all home patches.
00758   for ( i = 0; i < numNodes; ++i )
00759   {
00760     proxyNodes[i] = No;
00761   }
00762   nProxyNodes=0;
00763 
00764   // Check all two-away neighbors.
00765   // This is really just one-away neighbors, since 
00766   // two-away always returns zero: RKB
00767   PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00768 
00769   //Assign a proxy to all your neighbors. But dont increment counter
00770   //because these have to be there anyway.
00771   
00772   neighbors[0] = id;  
00773   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00774   
00775   //Small Flag chooses between different loadbalancing schemes.
00776   //Small Flag == true, patches are close to each other
00777   //false, patches are far from each other
00778   CmiBool smallFlag = CmiFalse;
00779   double pnodes = CkNumPes();
00780   pnodes *= 0.25;    
00781   smallFlag = (patchMap->numPatches() > pnodes )?1:0;
00782 
00783   //If there are lot of patches its likely they will all be neighbors, 
00784   //so all we need to do is to place proxies on downstream patches.
00785   //if (smallFlag) {
00786   for ( i = 1; i < numNeighbors; ++i )
00787     {
00788       int proxyNode = patchMap->basenode(neighbors[i]);
00789       
00790       if (proxyNode != myNode)
00791         if (proxyNodes[proxyNode] == No)
00792           {
00793             proxyNodes[proxyNode] = Yes;
00794             neighborNodes[nProxyNodes] = proxyNode;
00795             nProxyNodes++;
00796           }
00797     }
00798   //}
00799  
00800   if (step() > 2) {
00801     delete [] proxyNodes;
00802     return nProxyNodes;
00803   }
00804  
00805   //Place numNodesPerPatch proxies on the 3d torus neighbors of a processor
00806 
00807   int numPatches = patchMap->numPatches();
00808   int emptyNodes = numNodes - numPatches;
00809   //if ( emptyNodes > numPatches ) {
00810   
00811   int nodesPerPatch = nProxyNodes + 4 * (emptyNodes-1) / numPatches + 1;
00812   int proxyNode = 0 ;
00813   int proxy_x=0, proxy_y=0, proxy_z=0;
00814   
00815   //Choose from the 26 neighbors of mynode.
00816   //CkAssert(nodesPerPatch - nProxyNodes <= 26);  
00817   //Too few patches otherwise, try twoaway?
00818   
00819   for(k=-1; k<= 1; k++) {
00820     proxy_z = (my_z + k + zsize) % zsize;
00821     for(j=-1; j <= 1; j++) {
00822       proxy_y = (my_y + j + ysize) % ysize;
00823       for(i = -1; i <= 1; i++) {
00824         if(i == 0 && j == 0 && k == 0)
00825           continue;
00826 
00827         proxy_x = (my_x + i + xsize) % xsize;
00828         proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00829 
00830         if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00831            proxyNodes[proxyNode] == No) {
00832           proxyNodes[proxyNode] = Yes;
00833           neighborNodes[nProxyNodes] = proxyNode;
00834           nProxyNodes++;
00835         }
00836         
00837         if(nProxyNodes >= nodesPerPatch || 
00838            nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00839           break;          
00840       }
00841       
00842       if(nProxyNodes >= nodesPerPatch || 
00843          nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00844         break;    
00845     }
00846     if(nProxyNodes >= nodesPerPatch || 
00847        nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00848       break;      
00849   }        
00850 
00851 #if 1
00852   if(!smallFlag) {
00853     for(k=-2; k<= 2; k+=2) {
00854       proxy_z = (my_z + k + zsize) % zsize;
00855       for(j=-2; j <= 2; j+=2) {
00856         proxy_y = (my_y + j + ysize) % ysize;
00857         for(i = -2; i <= 2; i+=2) {
00858           if(i == 0 && j == 0 && k == 0)
00859             continue;
00860           
00861           proxy_x = (my_x + i + xsize) % xsize;
00862           proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00863           
00864           if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00865              proxyNodes[proxyNode] == No) {
00866             proxyNodes[proxyNode] = Yes;
00867             neighborNodes[nProxyNodes] = proxyNode;
00868             nProxyNodes++;
00869           }
00870           
00871           if(nProxyNodes >= nodesPerPatch || 
00872              nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00873             break;        
00874         }
00875         
00876         if(nProxyNodes >= nodesPerPatch || 
00877            nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00878           break;          
00879       }
00880       if(nProxyNodes >= nodesPerPatch || 
00881          nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00882         break;    
00883     }        
00884   }
00885 
00886 #else
00887   const SimParameters* params = Node::Object()->simParameters;
00888 
00889   if(!smallFlag) {
00890     //Add two-away proxies
00891     if(patchMap->numaway_a() == 2) {
00892       proxy_y = (my_y + 2) % ysize;
00893       proxy_x = my_x  % xsize;
00894       proxy_z = my_z  % zsize;
00895       
00896       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00897       if(proxyNodes[proxyNode] == No) {
00898         proxyNodes[proxyNode] = Yes;
00899         neighborNodes[nProxyNodes] = proxyNode;
00900       nProxyNodes++;
00901       }
00902       
00903       proxy_y = (my_y - 2 + ysize) % ysize;
00904       proxy_x = my_x  % xsize;
00905       proxy_z = my_z % zsize;
00906       
00907       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00908       if(proxyNodes[proxyNode] == No) {
00909         proxyNodes[proxyNode] = Yes;
00910         neighborNodes[nProxyNodes] = proxyNode;
00911         nProxyNodes++;
00912       }
00913     }
00914     
00915     //Add two away proxies
00916     if(patchMap->numaway_b() == 2) {
00917       proxy_y = my_y  % ysize;
00918       proxy_x = my_x  % xsize;
00919       proxy_z = (my_z + 2) % zsize;
00920       
00921       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00922       if(proxyNodes[proxyNode] == No) {
00923         proxyNodes[proxyNode] = Yes;
00924         neighborNodes[nProxyNodes] = proxyNode;
00925         nProxyNodes++;
00926       }
00927       
00928       proxy_y = my_y  % ysize;
00929       proxy_x = my_x  % xsize;
00930       proxy_z = (my_z - 2 + zsize) % zsize;
00931       
00932       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00933       if(proxyNodes[proxyNode] == No) {
00934         proxyNodes[proxyNode] = Yes;
00935         neighborNodes[nProxyNodes] = proxyNode;
00936         nProxyNodes++;
00937       }
00938     }
00939     
00940     //Add two away proxies
00941     if(patchMap->numaway_c() == 2) {
00942       proxy_y = my_y  % ysize;
00943       proxy_x = (my_x + 2) % xsize;
00944       proxy_z = my_z  % zsize;
00945       
00946       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00947       if(proxyNodes[proxyNode] == No) {
00948         proxyNodes[proxyNode] = Yes;
00949         neighborNodes[nProxyNodes] = proxyNode;
00950       nProxyNodes++;
00951       }
00952       
00953       proxy_y = my_y  % ysize;
00954       proxy_x = (my_x  - 2 + xsize) % xsize;
00955       proxy_z = my_z % zsize;
00956       
00957       proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00958       if(proxyNodes[proxyNode] == No) {
00959         proxyNodes[proxyNode] = Yes;
00960         neighborNodes[nProxyNodes] = proxyNode;
00961         nProxyNodes++;
00962       }
00963     }
00964   }
00965 #endif
00966   
00967   // CkPrintf("Returning %d proxies\n", nProxyNodes);
00968 
00969   delete [] proxyNodes;
00970   return nProxyNodes;
00971 }
00972 
00973 #endif

Generated on Mon Oct 13 04:07:42 2008 for NAMD by  doxygen 1.3.9.1