NamdCentLB Class Reference

#include <NamdCentLB.h>

List of all members.

Public Member Functions

 NamdCentLB ()
 NamdCentLB (CkMigrateMessage *)
CLBMigrateMsg * Strategy (LDStats *stats)


Detailed Description

Definition at line 52 of file NamdCentLB.h.


Constructor & Destructor Documentation

NamdCentLB::NamdCentLB (  ) 

Definition at line 50 of file NamdCentLB.C.

00050                       : CentralLB(CkLBOptions(-1))
00051 {
00052   //  if (CkMyPe()==0)
00053   //   CkPrintf("[%d] NamdCentLB created\n",CkMyPe());
00054   processorArray = 0;
00055   patchArray = 0;
00056   computeArray = 0;
00057 }

NamdCentLB::NamdCentLB ( CkMigrateMessage *  msg  ) 

Migratable Object Constructor.

Definition at line 44 of file NamdCentLB.C.

00044                                            : CentralLB(msg) {
00045   processorArray = 0;
00046   patchArray = 0;
00047   computeArray = 0;
00048 } 


Member Function Documentation

CLBMigrateMsg * NamdCentLB::Strategy ( LDStats *  stats  ) 

Definition at line 87 of file NamdCentLB.C.

References averageLoad, processorInfo::backgroundLoad, cpuloads, endi(), computeInfo::handle, InfoRecord::Id, iINFO(), iout, LDBSTRAT_COMPREHENSIVE, LDBSTRAT_DEFAULT, LDBSTRAT_OLD, LDBSTRAT_REFINEONLY, InfoRecord::load, NAMD_die(), ComputeMap::numComputes(), ComputeMap::numPartitions(), PatchMap::numPatches(), numPatches, Node::Object(), ComputeMap::Object(), PatchMap::Object(), computeInfo::oldProcessor, computeInfo::processor, ComputeMap::setNewNode(), ComputeMap::setNewNumPartitions(), Node::simParameters, and simParams.

00088 {
00089   //  CkPrintf("LDB: All statistics received at %f, %f\n",
00090   //  CmiTimer(),CmiWallTimer());
00091 
00092   int numProcessors = stats->nprocs();
00093   int numPatches = PatchMap::Object()->numPatches();
00094   ComputeMap *computeMap = ComputeMap::Object();
00095   const int numComputes = computeMap->numComputes();
00096   const SimParameters* simParams = Node::Object()->simParameters;
00097 
00098   // these sizes should never change
00099   if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00100   if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00101   if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00102 
00103   int nMoveableComputes = buildData(stats);
00104 
00105 #if LDB_DEBUG
00106 #define DUMP_LDBDATA 1
00107 #define LOAD_LDBDATA 1
00108 #endif
00109 
00110 #if DUMP_LDBDATA 
00111   dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00112 #elif LOAD_LDBDATA
00113   loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00114   // CkExit();
00115 #endif
00116 
00117   double averageLoad = 0.;
00118   double avgCompute = 0.;
00119   if ( nMoveableComputes ) {
00120    int i;
00121    double total = 0.;
00122    double maxCompute = 0.;
00123    int maxi = 0;
00124    for (i=0; i<nMoveableComputes; i++) {
00125       double load = computeArray[i].load;
00126       total += load;
00127       if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
00128    }
00129    avgCompute = total / nMoveableComputes;
00130 
00131     int P = stats->nprocs();
00132    int numPesAvailable = 0;
00133    for (i=0; i<P; i++) {
00134       if (processorArray[i].available) {
00135         ++numPesAvailable;
00136         total += processorArray[i].backgroundLoad;
00137       }
00138    }
00139    if (numPesAvailable == 0)
00140      NAMD_die("No processors available for load balancing!\n");
00141 
00142    averageLoad = total/numPesAvailable;
00143    CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
00144             computeArray[maxi].handle.id.id[0],
00145             maxCompute, 100. * maxCompute / averageLoad, averageLoad);
00146    CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
00147             avgCompute, 100. * avgCompute / averageLoad, averageLoad);
00148   }
00149 
00150   if ( step() == 1 ) {
00151     // compute splitting only
00152     // partitions are stored as char but mostly limited by
00153     // high load noise at low outer-loop iteration counts
00154     int maxParts = 10;
00155 #ifdef NAMD_CUDA
00156 //split LCPO compute very small, else CUDA compute is delayed
00157     if (simParams->LCPOOn) {
00158       maxParts = 20;
00159     }
00160 #endif
00161     int totalAddedParts = 0;
00162     double maxCompute = averageLoad / 10.;
00163     if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
00164     if ( simParams->ldbRelativeGrainsize > 0. ) {
00165       maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
00166     }
00167     CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
00168     double maxUnsplit = 0.;
00169     for (int i=0; i<nMoveableComputes; i++) {
00170       computeArray[i].processor = computeArray[i].oldProcessor;
00171       const int cid = computeArray[i].handle.id.id[0];
00172       const double load = computeArray[i].load;
00173       if ( computeMap->numPartitions(cid) == 0 ) {
00174         if ( load > maxUnsplit ) maxUnsplit = load;
00175         continue;
00176       }
00177       int nparts = (int) ceil(load / maxCompute);
00178       if ( nparts > maxParts ) nparts = maxParts;
00179       if ( nparts < 1 ) nparts = 1;
00180       if ( 0 && nparts > 1 ) {
00181         CkPrintf("LDB: Partitioning compute %d with load %f by %d\n",
00182                   cid, load, nparts);
00183       }
00184       computeMap->setNewNumPartitions(cid,nparts);
00185       totalAddedParts += nparts - 1;
00186     }
00187     CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
00188               nMoveableComputes,nMoveableComputes+totalAddedParts);
00189     CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
00190   } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
00191     if (step() < 4)
00192       TorusLB(computeArray, patchArray, processorArray,
00193                   nMoveableComputes, numPatches, numProcessors);
00194     else
00195       RefineTorusLB(computeArray, patchArray, processorArray,
00196                   nMoveableComputes, numPatches, numProcessors, 1);
00197   } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
00198     TorusLB(computeArray, patchArray, processorArray,
00199                   nMoveableComputes, numPatches, numProcessors);
00200   } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00201     RefineTorusLB(computeArray, patchArray, processorArray,
00202                   nMoveableComputes, numPatches, numProcessors, 1);
00203   } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
00204     if (step() < 4)
00205       Alg7(computeArray, patchArray, processorArray,
00206                   nMoveableComputes, numPatches, numProcessors);
00207     else
00208       RefineOnly(computeArray, patchArray, processorArray, 
00209                   nMoveableComputes, numPatches, numProcessors);
00210   }
00211 
00212 #if LDB_DEBUG && USE_TOPOMAP
00213   TopoManager tmgr;
00214   int pe1, pe2, pe3, hops=0;
00215   /* This is double counting the hops
00216   for(int i=0; i<nMoveableComputes; i++)
00217   {
00218     pe1 = computeArray[i].processor;
00219     pe2 = patchArray[computeArray[i].patch1].processor;
00220     pe3 = patchArray[computeArray[i].patch2].processor;
00221     hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00222     if(computeArray[i].patch1 != computeArray[i].patch2)
00223       hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
00224   }*/
00225   for (int i=0; i<numPatches; i++)  {
00226     //int num = patchArray[i].proxiesOn.numElements();
00227     pe1 = patchArray[i].processor;
00228     Iterator nextProc;
00229     processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00230     while (p) {
00231       pe2 = p->Id;
00232       hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00233       p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00234     }
00235   }
00236   CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00237 #endif
00238 
00239 #if DUMP_LDBDATA
00240   dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00241 #elif LOAD_LDBDATA
00242   dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00243   // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00244   // CkExit();
00245 #endif
00246 
00247   // For error checking:
00248   // Count up computes, to see if somebody doesn't have any computes
00249   int i;
00250 #if 0
00251   int* computeCount = new int[numProcessors];
00252   for(i=0; i<numProcessors; i++)
00253     computeCount[i]=0;
00254   for(i=0; i<nMoveableComputes; i++)
00255     computeCount[computeArray[i].processor]++;
00256   for(i=0; i<numProcessors; i++) {
00257     if (computeCount[i]==0)
00258       iout << iINFO <<"Warning: Processor " << i 
00259            << " has NO moveable computes.\n" << endi;
00260   }
00261   delete [] computeCount;
00262 #endif
00263   
00264   CkVec<MigrateInfo *> migrateInfo;
00265   for(i=0;i<nMoveableComputes;i++) {
00266     if (computeArray[i].processor != computeArray[i].oldProcessor) {
00267       //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00268       //               CkMyPe(),computeArray[i].handle.id.id[0],
00269       //               computeArray[i].processor,computeArray[i].oldProcessor);
00270       MigrateInfo *migrateMe = new MigrateInfo;
00271       migrateMe->obj = computeArray[i].handle;
00272       migrateMe->from_pe = computeArray[i].oldProcessor;
00273       migrateMe->to_pe = computeArray[i].processor;
00274       migrateInfo.insertAtEnd(migrateMe);
00275 
00276       // sneak in updates to ComputeMap
00277       computeMap->setNewNode(computeArray[i].handle.id.id[0],
00278                                 computeArray[i].processor);
00279     }
00280   }
00281   
00282   int migrate_count=migrateInfo.length();
00283   // CkPrintf("NamdCentLB migrating %d elements\n",migrate_count);
00284   CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00285 
00286   msg->n_moves = migrate_count;
00287   for(i=0; i < migrate_count; i++) {
00288     MigrateInfo* item = migrateInfo[i];
00289     msg->moves[i] = *item;
00290     delete item;
00291     migrateInfo[i] = 0;
00292   }
00293 
00294   for (i=0; i<numProcessors; i++) {
00295     cpuloads[i] = processorArray[i].load;
00296   }
00297 
00298   delete [] processorArray;
00299   delete [] patchArray;
00300   delete [] computeArray;
00301 
00302   processorArray = NULL;
00303   patchArray = NULL;
00304   computeArray = NULL;
00305   
00306   return msg;
00307 };


The documentation for this class was generated from the following files:
Generated on Sat Sep 23 01:17:20 2017 for NAMD by  doxygen 1.4.7