Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

LdbCoordinator Class Reference

#include <LdbCoordinator.h>

Inheritance diagram for LdbCoordinator:

BOCclass List of all members.

Public Member Functions

 LdbCoordinator ()
 ~LdbCoordinator (void)
void initialize (PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void createLoadBalancer ()
void patchLoad (PatchID id, int nAtoms, int timestep)
void startWork (const LDObjHandle &handle)
void pauseWork (const LDObjHandle &handle)
void skipWork (const LDObjHandle &handle)
void endWork (const LDObjHandle &handle)
void rebalance (Sequencer *seq, PatchID id)
void rebalance (Controller *seq)
void nodeDone (CkReductionMsg *)
void updateComputesReady ()
void barrier (void)
void resume (void)
void resumeReady (CkQdMsg *msg)
void resume2 (void)
int getNumStepsToRun (void)
void ReceiveAtSync (void)
void Migrate (LDObjHandle handle, int dest)
void RecvMigrate (LdbMigrateMsg *)
void ExpectMigrate (LdbMigrateMsg *)
void ResumeFromSync (void)
void ExecuteMigrations (void)
void awakenSequencers (void)
int requiredProxies (PatchID id, int[])
void printRequiredProxies (PatchID id, FILE *fp)
void printLocalLdbReport (void)

Static Public Member Functions

LdbCoordinatorObject ()
void staticMigrateFn (LDObjHandle handle, int dest)
void staticStatsFn (LDOMHandle h, int state)
void staticQueryEstLoadFn (LDOMHandle h)
void staticReceiveAtSync (void *data)
void staticResumeFromSync (void *data)

Public Attributes

int stepsPerLdbCycle
int nLocalComputes
int nLocalPatches
int nPatchesReported
int nPatchesExpected
int nComputesReported
int nComputesExpected
int controllerReported
int controllerExpected
int nStatsMessagesReceived
int nStatsMessagesExpected
ComputeMapcomputeMap
PatchMappatchMap
int * patchNAtoms
int nPatches
ControllercontrollerThread
Sequencer ** sequencerThreads
int ldbCycleNum
int numStepsToRun
int firstLdbStep
int totalStepsDone
int takingLdbData
FILE * ldbStatsFP
computeInfocomputeArray
patchInfopatchArray
processorInfoprocessorArray
LBDatabase * theLbdb
LDOMid myOMid
LDOMHandle myHandle
LdbMigrateMsgmigrateMsgs
int numComputes
int nRegisteredObjs
LDBarrierClient ldBarrierHandle
int reg_all_objs
LDObjHandle * patchHandles

Constructor & Destructor Documentation

LdbCoordinator::LdbCoordinator  ) 
 

Definition at line 117 of file LdbCoordinator.C.

References computeArray, CreateNamdCentLB(), iERROR(), iFILE, iout, iPE(), ldBarrierHandle, ldbCycleNum, ldbStatsFP, migrateMsgs, myHandle, myOMid, nLocalComputes, nLocalPatches, numComputes, patchArray, patchNAtoms, processorArray, reg_all_objs, sequencerThreads, staticQueryEstLoadFn(), staticReceiveAtSync(), staticResumeFromSync(), takingLdbData, theLbdb, and totalStepsDone.

00118 {
00119   if (CkpvAccess(LdbCoordinator_instance) == NULL) {
00120     CkpvAccess(LdbCoordinator_instance) = this;
00121   } else {
00122     iout << iFILE << iERROR << iPE 
00123          << "LdbCoordinator instanced twice on same node!" << endi;
00124     CkExit();
00125   }
00126   
00127 #if 0
00128   // Create a load balancer
00129   if (CkMyPe() == 0) {
00130     //   CreateCentralLB();
00131     CreateNamdCentLB();
00132     //   CreateNamdNborLB();
00133   }
00134 #endif
00135 
00136   ldbCycleNum = 1;
00137   takingLdbData = 1;
00138   totalStepsDone = 0;
00139   nLocalComputes = nLocalPatches = 0;
00140   patchNAtoms = (int *) NULL;
00141   sequencerThreads = (Sequencer **) NULL;
00142   ldbStatsFP = NULL;
00143   computeArray = NULL;
00144   patchArray = NULL;
00145   processorArray = NULL;
00146 
00147   // Register self as an object manager for new charm++ balancer framework
00148   theLbdb = LBDatabase::Object(); 
00149 
00150   // Set the load balancing period (in seconds).  Without this the
00151   // load balancing framework will hang until 1 second has passed
00152   // since the last load balancing, causing hiccups in very fast runs.
00153   // Unfortunately, the clock is already set for the first load
00154   // balancing, but only +LBPeriod 1.0e-5 can fix that in older charm.
00155   // For newer versions this is handled in initproc above.
00156 
00157   theLbdb->SetLBPeriod(1.0e-5);
00158 
00159   myOMid.id.idx = 1;
00160   LDCallbacks cb = { (LDMigrateFn)staticMigrateFn,
00161                      (LDStatsFn)staticStatsFn,
00162                      (LDQueryEstLoadFn)staticQueryEstLoadFn
00163                    };
00164   myHandle = theLbdb->RegisterOM(myOMid,(void*)this,cb);
00165 
00166   // Add myself as a local barrier receiver, so I know when I might
00167   // be registering objects.
00168   theLbdb->AddLocalBarrierReceiver((LDBarrierFn)staticReceiveAtSync,
00169                                    (void*)this);;
00170 
00171   // Also, add a local barrier client, to trigger load balancing
00172   ldBarrierHandle = theLbdb->
00173     AddLocalBarrierClient((LDResumeFn)staticResumeFromSync,
00174                           (void*)this);
00175   migrateMsgs = 0; // linked list
00176   numComputes = 0;
00177   reg_all_objs = 1;
00178 }

LdbCoordinator::~LdbCoordinator void   ) 
 

Definition at line 180 of file LdbCoordinator.C.

References ldbStatsFP.

00181 {
00182   delete [] patchNAtoms;
00183   delete [] sequencerThreads;
00184   if (CkMyPe() == 0)
00185   {
00186     delete [] computeArray;
00187     delete [] patchArray;
00188     delete [] processorArray;
00189   }
00190   if (ldbStatsFP)
00191     fclose(ldbStatsFP);
00192 
00193 }


Member Function Documentation

void LdbCoordinator::awakenSequencers void   ) 
 

Definition at line 695 of file LdbCoordinator.C.

References Sequencer::awaken(), Controller::awaken(), controllerThread, PatchMap::numPatches(), patchMap, and sequencerThreads.

Referenced by resume2().

00696 {
00697   if (controllerThread)
00698   {
00699     controllerThread->awaken();
00700     controllerThread = NULL;
00701   }
00702   for(int i=0; i < patchMap->numPatches(); i++)
00703   {
00704     if (sequencerThreads[i])
00705     {
00706       sequencerThreads[i]->awaken();
00707     }
00708     sequencerThreads[i]= NULL;
00709   }
00710 }

void LdbCoordinator::barrier void   ) 
 

Definition at line 595 of file LdbCoordinator.C.

References controllerReported, ldBarrierHandle, NAMD_bug(), nComputesReported, nPatchesReported, and theLbdb.

00596 {
00597   if ( (nPatchesReported != nPatchesExpected) 
00598        || (nComputesReported != nComputesExpected)
00599        || (controllerReported != controllerExpected) )
00600   {
00601     NAMD_bug("Load balancer received wrong number of events.\n");
00602   }
00603 
00604   theLbdb->AtLocalBarrier(ldBarrierHandle);
00605 }

void LdbCoordinator::createLoadBalancer  ) 
 

Definition at line 195 of file LdbCoordinator.C.

References CreateNamdCentLB(), CreateNamdHybridLB(), SimParameters::ldBalancer, Node::Object(), Node::simParameters, and simParams.

Referenced by Node::startup().

00196 {
00197   const SimParameters *simParams = Node::Object()->simParameters;
00198 
00199   // Create hierarchical or centralized load balancers
00200   // Currently centralized is the default
00201   if (simParams->ldBalancer == LDBAL_CENTRALIZED) {
00202     CkPrintf("LDB: Central LB being created...\n");
00203     CreateNamdCentLB();
00204   } else if (simParams->ldBalancer == LDBAL_HYBRID) {
00205     CkPrintf("LDB: Hybrid LB being created...\n");
00206     CreateNamdHybridLB();
00207   }
00208 }

void LdbCoordinator::endWork const LDObjHandle &  handle  )  [inline]
 

Definition at line 64 of file LdbCoordinator.h.

References nComputesReported, and theLbdb.

Referenced by ComputePatchPair::doWork(), ComputePatch::doWork(), ComputeLCPO::doWork(), and ComputeHomeTuples< TholeElem, Thole, TholeValue >::doWork().

00064                                           {  // both
00065     theLbdb->ObjectStop(handle);
00066     nComputesReported++;
00067   }

void LdbCoordinator::ExecuteMigrations void   ) 
 

Definition at line 619 of file LdbCoordinator.C.

References ComputeMgr::updateComputes(), and updateComputesReady().

Referenced by nodeDone().

00620 {
00621  // computeMgr->updateComputes() call only on Node(0) i.e. right here
00622   // This will barrier for all Nodes - (i.e. Computes must be
00623   // here and with proxies before anyone can start up
00624 
00625   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
00626   ComputeMgr *computeMgr = cm.ckLocalBranch();
00627   computeMgr->updateComputes(CkIndex_LdbCoordinator::
00628                              updateComputesReady(),thisgroup);
00629 }

void LdbCoordinator::ExpectMigrate LdbMigrateMsg  ) 
 

Definition at line 646 of file LdbCoordinator.C.

References LdbMigrateMsg::from, LdbMigrateMsg::handle, migrateMsgs, myHandle, LdbMigrateMsg::next, and theLbdb.

Referenced by Migrate(), and RecvMigrate().

00647 {
00648   if ( m->from != CkMyPe() ) {
00649     m->handle = theLbdb->RegisterObj(myHandle,m->handle.id,0,1);
00650     theLbdb->Migrated(m->handle);
00651   }
00652 
00653   m->next = migrateMsgs;
00654   migrateMsgs = m;
00655 }

int LdbCoordinator::getNumStepsToRun void   )  [inline]
 

Definition at line 77 of file LdbCoordinator.h.

Referenced by Sequencer::rebalanceLoad(), and Controller::rebalanceLoad().

00077 { return numStepsToRun; }

void LdbCoordinator::initialize PatchMap pmap,
ComputeMap cmap,
int  reinit = 0
 

Definition at line 210 of file LdbCoordinator.C.

References ComputeMap::compute(), computeArray, computeMap, controllerExpected, controllerReported, controllerThread, CreateNamdCentLB(), CreateNamdNborLB(), SimParameters::firstLdbStep, firstLdbStep, LdbMigrateMsg::handle, PatchMap::homePatch(), iERROR(), iFILE, iout, iPE(), SimParameters::lastLdbStep, SimParameters::ldBalancer, ldbCycleNum, SimParameters::ldbPeriod, SimParameters::ldbStrategy, Compute::ldObjHandle, HomePatch::ldObjHandle, migrateMsgs, myHandle, Node::myid(), NAMD_bug(), NAMD_die(), nComputesExpected, nComputesReported, LdbMigrateMsg::next, nLocalComputes, nLocalPatches, ComputeMap::node(), PatchMap::node(), nPatches, nPatchesExpected, nPatchesReported, nStatsMessagesExpected, nStatsMessagesReceived, ComputeMap::numComputes(), numComputes, PatchMap::numHomePatches(), Node::numNodes(), PatchMap::numPatches(), ComputeMap::numPids(), numStepsToRun, Node::Object(), patchArray, patchHandles, patchMap, patchNAtoms, ComputeMap::pid(), processorArray, reg_all_objs, sequencerThreads, Node::simParameters, simParams, SimParameters::stepsPerCycle, stepsPerLdbCycle, takingLdbData, theLbdb, totalStepsDone, and ComputeMap::type().

Referenced by resume(), and Node::startup().

00211 {
00212   const SimParameters *simParams = Node::Object()->simParameters;
00213 
00214 #if 0
00215   static int lbcreated = 0; // XXX static variables are unsafe for SMP
00216   // PE0 first time Create a load balancer
00217   if (CkMyPe() == 0 && !lbcreated) {
00218     if (simParams->ldbStrategy == LDBSTRAT_ALGNBOR) 
00219       CreateNamdNborLB();
00220     else {
00221       //   CreateCentralLB();
00222       CreateNamdCentLB();
00223     }
00224     lbcreated = 1;
00225   }
00226 #endif
00227 
00228   //  DebugM(10,"stepsPerLdbCycle initialized\n");
00229   stepsPerLdbCycle = simParams->ldbPeriod;
00230   firstLdbStep = simParams->firstLdbStep;
00231   int lastLdbStep = simParams->lastLdbStep;
00232   int stepsPerCycle = simParams->stepsPerCycle;
00233 
00234   computeMap = cMap;
00235   patchMap = pMap;
00236 
00237   // Set the number of received messages correctly for node 0
00238 
00239   nStatsMessagesExpected = Node::Object()->numNodes();
00240   nStatsMessagesReceived = 0;
00241 
00242   if (patchNAtoms) 
00243     delete [] patchNAtoms;  // Depends on delete NULL to do nothing
00244   nPatches = patchMap->numPatches();
00245   patchNAtoms = new int[nPatches];
00246 
00247   typedef Sequencer *seqPtr;
00248 
00249   if ( ! reinit ) {
00250     delete [] sequencerThreads;  // Depends on delete NULL to do nothing
00251     sequencerThreads = new seqPtr[nPatches];
00252   }
00253 
00254   nLocalPatches=0;
00255 
00256   int i;
00257   for(i=0;i<nPatches;i++)
00258   {
00259     if (patchMap->node(i) == Node::Object()->myid())
00260     {
00261       nLocalPatches++;
00262       patchNAtoms[i]=0;
00263     } else {
00264       patchNAtoms[i]=-1;
00265     }
00266     if ( ! reinit ) sequencerThreads[i]=NULL;
00267   }
00268   if ( ! reinit ) controllerThread = NULL;
00269   if (nLocalPatches != patchMap->numHomePatches())
00270     NAMD_die("Disaggreement in patchMap data.\n");
00271  
00272   const int oldNumComputes = numComputes;
00273   nLocalComputes = 0;
00274   numComputes = computeMap->numComputes();
00275 
00276   for(i=0;i<numComputes;i++)  {
00277     if ( (computeMap->node(i) == Node::Object()->myid())
00278          && ( 0
00279 #ifndef NAMD_CUDA
00280               || (computeMap->type(i) == computeNonbondedSelfType)
00281               || (computeMap->type(i) == computeNonbondedPairType)
00282 #endif
00283               || (computeMap->type(i) == computeLCPOType)
00284               || (computeMap->type(i) == computeSelfExclsType)
00285               || (computeMap->type(i) == computeSelfBondsType)
00286               || (computeMap->type(i) == computeSelfAnglesType)
00287               || (computeMap->type(i) == computeSelfDihedralsType)
00288               || (computeMap->type(i) == computeSelfImpropersType)
00289               || (computeMap->type(i) == computeSelfTholeType)
00290               || (computeMap->type(i) == computeSelfAnisoType)
00291               || (computeMap->type(i) == computeSelfCrosstermsType)
00292 
00293                  || (computeMap->type(i) == computeBondsType)
00294                  || (computeMap->type(i) == computeExclsType)
00295                  || (computeMap->type(i) == computeAnglesType)
00296                  || (computeMap->type(i) == computeDihedralsType)
00297                  || (computeMap->type(i) == computeImpropersType)
00298                  || (computeMap->type(i) == computeTholeType)
00299                  || (computeMap->type(i) == computeAnisoType)
00300                  || (computeMap->type(i) == computeCrosstermsType)
00301         ) ) {
00302       nLocalComputes++;
00303     }
00304   }
00305   
00306   // New LB frameworks registration
00307 
00308   // Allocate data structure to save incoming migrations.  Processor
00309   // zero will get all migrations
00310 
00311   // If this is the first time through, we need it register patches
00312   if (ldbCycleNum == reg_all_objs) {
00313     if ( Node::Object()->simParameters->ldBalancer == LDBAL_CENTRALIZED ) {
00314       reg_all_objs = 3;
00315     }
00316     // Tell the lbdb that I'm registering objects, until I'm done
00317     // registering them.
00318     theLbdb->RegisteringObjects(myHandle);
00319     
00320    if ( ldbCycleNum == 1 ) {
00321     patchHandles = new LDObjHandle[nLocalPatches];
00322     int patch_count=0;
00323     int i;
00324     for(i=0;i<nPatches;i++)
00325       if (patchMap->node(i) == Node::Object()->myid()) {
00326         LDObjid elemID;
00327         elemID.id[0] = i;
00328         elemID.id[1] = elemID.id[2] = elemID.id[3] = -2;
00329 
00330         if (patch_count >= nLocalPatches) {
00331           iout << iFILE << iERROR << iPE 
00332                << "LdbCoordinator found too many local patches!" << endi;
00333           CkExit();
00334         }
00335         HomePatch *p = patchMap->homePatch(i);
00336         p->ldObjHandle = 
00337         patchHandles[patch_count] 
00338           = theLbdb->RegisterObj(myHandle,elemID,0,0);
00339         patch_count++;
00340 
00341       }
00342    }
00343   
00344     if ( numComputes > oldNumComputes ) {
00345       // Register computes
00346       for(i=oldNumComputes; i<numComputes; i++)  {
00347         if ( computeMap->node(i) == Node::Object()->myid())
00348         {
00349           if ( 0
00350 #ifndef NAMD_CUDA
00351                   || (computeMap->type(i) == computeNonbondedSelfType)
00352                   || (computeMap->type(i) == computeNonbondedPairType)
00353 #endif
00354                   || (computeMap->type(i) == computeLCPOType)
00355                   || (computeMap->type(i) == computeSelfExclsType)
00356                   || (computeMap->type(i) == computeSelfBondsType)
00357                   || (computeMap->type(i) == computeSelfAnglesType)
00358                   || (computeMap->type(i) == computeSelfDihedralsType)
00359                   || (computeMap->type(i) == computeSelfImpropersType)
00360                   || (computeMap->type(i) == computeSelfTholeType)
00361                   || (computeMap->type(i) == computeSelfAnisoType)
00362                   || (computeMap->type(i) == computeSelfCrosstermsType)
00363                 )  {
00364           // Register the object with the load balancer
00365           // Store the depended patch IDs in the rest of the element ID
00366           LDObjid elemID;
00367           elemID.id[0] = i;
00368         
00369           if (computeMap->numPids(i) > 2)
00370             elemID.id[3] = computeMap->pid(i,2);
00371           else elemID.id[3] = -1;
00372 
00373           if (computeMap->numPids(i) > 1)
00374             elemID.id[2] =  computeMap->pid(i,1);
00375           else elemID.id[2] = -1;
00376 
00377           if (computeMap->numPids(i) > 0)
00378             elemID.id[1] =  computeMap->pid(i,0);
00379           else elemID.id[1] = -1;
00380 
00381           Compute *c = computeMap->compute(i);
00382           if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");
00383 
00384           c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,1);
00385           }
00386           else if ( (computeMap->type(i) == computeBondsType)
00387                  || (computeMap->type(i) == computeExclsType)
00388                  || (computeMap->type(i) == computeAnglesType)
00389                  || (computeMap->type(i) == computeDihedralsType)
00390                  || (computeMap->type(i) == computeImpropersType)
00391                  || (computeMap->type(i) == computeTholeType)
00392                  || (computeMap->type(i) == computeAnisoType)
00393                  || (computeMap->type(i) == computeCrosstermsType)
00394                ) {
00395           // Register the object with the load balancer
00396           // Store the depended patch IDs in the rest of the element ID
00397           LDObjid elemID;
00398           elemID.id[0] = i;
00399         
00400           elemID.id[1] = elemID.id[2] = elemID.id[3] = -3;
00401 
00402           Compute *c = computeMap->compute(i);
00403           if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer");
00404 
00405           c->ldObjHandle = theLbdb->RegisterObj(myHandle,elemID,0,0);
00406           }
00407         }
00408       }
00409     }
00410     theLbdb->DoneRegisteringObjects(myHandle);
00411   }
00412 
00413   // process saved migration messages, if any
00414   while ( migrateMsgs ) {
00415     LdbMigrateMsg *m = migrateMsgs;
00416     migrateMsgs = m->next;
00417     Compute *c = computeMap->compute(m->handle.id.id[0]);
00418     if ( ! c ) NAMD_bug("LdbCoordinator::initialize() null compute pointer 2");
00419     c->ldObjHandle = m->handle;
00420     delete m;
00421   }
00422 
00423   // Fixup to take care of the extra timestep at startup
00424   // This is pretty ugly here, but it makes the count correct
00425   
00426   // iout << "LDB Cycle Num: " << ldbCycleNum << "\n";
00427 
00428  if ( simParams->ldBalancer == LDBAL_CENTRALIZED ) {
00429   if (ldbCycleNum == 1 || ldbCycleNum == 3) {
00430     numStepsToRun = stepsPerCycle;
00431     totalStepsDone += numStepsToRun;
00432     takingLdbData = 0;
00433     theLbdb->CollectStatsOff();
00434   } else if (ldbCycleNum == 2 || ldbCycleNum == 4) {
00435     numStepsToRun = firstLdbStep - stepsPerCycle;
00436     while ( numStepsToRun <= 0 ) numStepsToRun += stepsPerCycle;
00437     totalStepsDone += numStepsToRun;
00438     takingLdbData = 1;
00439     theLbdb->CollectStatsOn();
00440   } else if ( (ldbCycleNum <= 6) || !takingLdbData )
00441   {
00442     totalStepsDone += firstLdbStep;
00443     if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
00444       numStepsToRun = -1;
00445       takingLdbData = 0;
00446       theLbdb->CollectStatsOff();
00447     } else {
00448       numStepsToRun = firstLdbStep;
00449       takingLdbData = 1;
00450       theLbdb->CollectStatsOn();
00451     }
00452   }
00453   else 
00454   {
00455     totalStepsDone += stepsPerLdbCycle - firstLdbStep;
00456     if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
00457       numStepsToRun = -1;
00458       takingLdbData = 0;
00459       theLbdb->CollectStatsOff();
00460     } else {
00461       numStepsToRun = stepsPerLdbCycle - firstLdbStep;
00462       takingLdbData = 0;
00463       theLbdb->CollectStatsOff();
00464     }
00465   }
00466  } else {
00467   if (ldbCycleNum==1)
00468   {
00469     totalStepsDone += firstLdbStep;
00470     numStepsToRun = firstLdbStep;
00471     takingLdbData = 0;
00472     theLbdb->CollectStatsOff();
00473   }
00474   else if ( (ldbCycleNum <= 4) || !takingLdbData )
00475   {
00476     totalStepsDone += firstLdbStep;
00477     if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
00478       numStepsToRun = -1;
00479       takingLdbData = 0;
00480       theLbdb->CollectStatsOff();
00481     } else {
00482       numStepsToRun = firstLdbStep;
00483       takingLdbData = 1;
00484       theLbdb->CollectStatsOn();
00485     }
00486   }
00487   else 
00488   {
00489     totalStepsDone += stepsPerLdbCycle - firstLdbStep;
00490     if(lastLdbStep != -1 && totalStepsDone > lastLdbStep) {
00491       numStepsToRun = -1;
00492       takingLdbData = 0;
00493       theLbdb->CollectStatsOff();
00494     } else {
00495       numStepsToRun = stepsPerLdbCycle - firstLdbStep;
00496       takingLdbData = 0;
00497       theLbdb->CollectStatsOff();
00498     }
00499   }
00500  }
00501 
00502 /*-----------------------------------------------------------------------------*
00503  * --------------------------------------------------------------------------- *
00504  * Comments inserted by Abhinav to clarify relation between ldbCycleNum,       *
00505  * load balancing step numbers (printed by the step() function) and            *
00506  * tracing of the steps                                                        *
00507  * --------------------------------------------------------------------------- *
00508  * If trace is turned off in the beginning, then tracing is turned on          *
00509  * at ldbCycleNum = 4 and turned off at ldbCycleNum = 8. ldbCycleNum can       *
00510  * be adjusted by specifying firstLdbStep and ldbPeriod which are set by       *
00511  * default to 5*stepspercycle and 200*stepspercycle if not specified.          *
00512  *                                                                             *
00513  * If we choose firstLdbStep = 20 and ldbPeriod = 100, we have the             *
00514  * following timeline (for these particular numbers):                          *
00515  *                                                                             *
00516  * Tracing         :  <------ off ------><------------- on -----------><-- off *
00517  * Ldb Step() No   :              1     2     3        4      5       6      7 *
00518  * Iteration Steps : 00====20====40====60====80======160====180=====260====280 *
00519  * ldbCycleNum     :  1     2     3     4     5        6      7       8      9 *
00520  * Instrumention   :          Inst  Inst  Inst           Inst            Inst  *
00521  * LDB Strategy    :              TLB  RLB   RLB            RLB            RLB *
00522  *                                                                             *
00523  * TLB = TorusLB                                                               *
00524  * RLB = RefineTorusLB                                                         *
00525  * Inst = Instrumentation Phase (no real load balancing)                       *
00526  * --------------------------------------------------------------------------- *
00527  *-----------------------------------------------------------------------------*
00528  */
00529 #if 0 //replaced by traceBarrier at Controller and Sequencer
00530   if (traceAvailable()) {
00531     static int specialTracing = 0; // XXX static variables are unsafe for SMP
00532     if (ldbCycleNum == 1 && traceIsOn() == 0)  specialTracing = 1;
00533     if (specialTracing) {
00534       if (ldbCycleNum == 4) traceBegin();
00535       if (ldbCycleNum == 8) traceEnd();
00536     }
00537   }
00538 #endif
00539   
00540   nPatchesReported = 0;
00541   nPatchesExpected = nLocalPatches;
00542   nComputesReported = 0;
00543   nComputesExpected = nLocalComputes * numStepsToRun;
00544   controllerReported = 0;
00545   controllerExpected = ! CkMyPe();
00546 
00547   if (CkMyPe() == 0)
00548   {
00549     if (computeArray == NULL)
00550       computeArray = new computeInfo[numComputes];
00551     if (patchArray == NULL)
00552       patchArray = new patchInfo[nPatches];
00553     if (processorArray == NULL)
00554       processorArray = new processorInfo[CkNumPes()];
00555   }
00556     
00557   theLbdb->ClearLoads();
00558 }

void LdbCoordinator::Migrate LDObjHandle  handle,
int  dest
 

Definition at line 66 of file LdbCoordinator.C.

References ExpectMigrate(), LdbMigrateMsg::from, LdbMigrateMsg::handle, and LdbMigrateMsg::to.

Referenced by staticMigrateFn(), and ComputeMgr::updateLocalComputes().

00067 {
00068   LdbMigrateMsg* msg = new LdbMigrateMsg;
00069   msg->handle = handle;
00070   msg->from = CkMyPe();
00071   msg->to = dest;
00072   if ( msg->to != CkMyPe() ) {
00073     CProxy_LdbCoordinator ldbProxy(thisgroup);
00074     ldbProxy[CkMyPe()].RecvMigrate(msg);
00075   } else {
00076     ExpectMigrate(msg);
00077   }
00078 }

void LdbCoordinator::nodeDone CkReductionMsg *   ) 
 

Definition at line 607 of file LdbCoordinator.C.

References ExecuteMigrations(), iout, and updateComputesReady().

00608 {
00609   delete msg;
00610 
00611   iout << "LDB: ============== END OF LOAD BALANCING =============== " << CmiWallTimer() << "\n" << endi;
00612   if ( takingLdbData ) {
00613       ExecuteMigrations();
00614   } else {
00615       updateComputesReady();
00616   }
00617 }

LdbCoordinator* LdbCoordinator::Object  )  [inline, static]
 

Definition at line 47 of file LdbCoordinator.h.

Referenced by ComputePatchPair::doWork(), ComputePatch::doWork(), ComputeLCPO::doWork(), ComputeHomeTuples< TholeElem, Thole, TholeValue >::doWork(), SimpleBroadcastObject< Tensor >::get(), ComputeNonbondedSelf::noWork(), ComputeNonbondedPair::noWork(), ComputeLCPO::noWork(), Sequencer::rebalanceLoad(), Controller::rebalanceLoad(), Sequencer::Sequencer(), Node::startup(), HomePatch::submitLoadStats(), Sequencer::suspend(), Sequencer::terminate(), and ComputeMgr::updateLocalComputes().

00047                                    { 
00048     return CkpvAccess(LdbCoordinator_instance); 
00049   }

void LdbCoordinator::patchLoad PatchID  id,
int  nAtoms,
int  timestep
 

Definition at line 560 of file LdbCoordinator.C.

References DebugM, nPatches, nPatchesReported, and patchNAtoms.

Referenced by HomePatch::submitLoadStats().

00561 {
00562   CmiAssert( id >=0 && id < nPatches);
00563   if (patchNAtoms[id] != -1) {
00564     patchNAtoms[id] = nAtoms;
00565     nPatchesReported++;
00566   } else {
00567     DebugM(10, "::patchLoad() Unexpected patch reporting in\n");
00568   }
00569 }

void LdbCoordinator::pauseWork const LDObjHandle &  handle  )  [inline]
 

Definition at line 58 of file LdbCoordinator.h.

References theLbdb.

Referenced by ComputePatchPair::doWork(), ComputePatch::doWork(), SimpleBroadcastObject< Tensor >::get(), Sequencer::suspend(), and Sequencer::terminate().

00058                                             {  // stop timer only
00059     theLbdb->ObjectStop(handle);
00060   }

void LdbCoordinator::printLocalLdbReport void   ) 
 

Definition at line 740 of file LdbCoordinator.C.

References j, PatchMap::numPatches(), patchMap, and patchNAtoms.

00741 {
00742   char outputBuf[255];
00743   char *curLoc;
00744 
00745   CkPrintf("%d:Patch report:\n",CkMyPe());
00746   
00747   curLoc = outputBuf;
00748   int i,j=0;
00749   for(i=0; i<patchMap->numPatches(); i++)
00750   {
00751     if (patchNAtoms[i] != -1)
00752     {
00753       curLoc += sprintf(curLoc,"%5d: %5d ",i,patchNAtoms[i]);
00754       j++;
00755     } 
00756     if (((j % 4) == 0) && j)
00757     {
00758       curLoc = outputBuf;
00759       CkPrintf("[%d]%s\n",CkMyPe(),outputBuf);
00760       j=0;
00761     }
00762   }
00763 
00764   CkPrintf("%d:Compute report:\n",CkMyPe());
00765   
00766   curLoc = outputBuf;
00767   j=0;
00768 }

void LdbCoordinator::printRequiredProxies PatchID  id,
FILE *  fp
 

Definition at line 770 of file LdbCoordinator.C.

References requiredProxies().

00771 {
00772   // Check all two-away neighbors.
00773   // This is really just one-away neighbors, since 
00774   // two-away always returns zero: RKB
00775   int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00776   const int nProxyNodes = requiredProxies(id,neighborNodes);
00777 
00778   fprintf(fp,"%4d ",nProxyNodes);
00779 
00780   for(int i=0;i<nProxyNodes;i++)
00781     fprintf(fp,"%4d ",neighborNodes[i]);
00782 }

void LdbCoordinator::rebalance Controller seq  ) 
 

Definition at line 580 of file LdbCoordinator.C.

References controllerReported, controllerThread, DebugM, iout, SimParameters::ldBalancer, Node::Object(), and Node::simParameters.

00581 {
00582   if (Node::Object()->simParameters->ldBalancer == LDBAL_NONE)
00583     return;
00584 
00585   iout << "LDB: ============= START OF LOAD BALANCING ============== " << CmiWallTimer() << "\n" << endi;
00586   DebugM(3, "Controller reached load balance barrier.\n");
00587   controllerReported = 1;
00588   controllerThread = c;
00589 
00590   CProxy_LdbCoordinator(thisgroup).barrier();
00591 
00592   CthSuspend();
00593 }

void LdbCoordinator::rebalance Sequencer seq,
PatchID  id
 

Definition at line 571 of file LdbCoordinator.C.

References SimParameters::ldBalancer, Node::Object(), sequencerThreads, Node::simParameters, and Sequencer::suspend().

Referenced by Sequencer::rebalanceLoad(), and Controller::rebalanceLoad().

00572 {
00573   if (Node::Object()->simParameters->ldBalancer == LDBAL_NONE)
00574     return;
00575 
00576   sequencerThreads[pid] = seq;
00577   seq->suspend();
00578 }

void LdbCoordinator::ReceiveAtSync void   ) 
 

Definition at line 100 of file LdbCoordinator.C.

References myHandle, and theLbdb.

00101 {
00102   theLbdb->RegisteringObjects(myHandle);
00103 }

void LdbCoordinator::RecvMigrate LdbMigrateMsg  ) 
 

Definition at line 631 of file LdbCoordinator.C.

References ExpectMigrate(), LdbMigrateMsg::handle, theLbdb, and LdbMigrateMsg::to.

00632 {
00633   // This method receives the migration from the framework,
00634   // unregisters it, and sends it to the destination PE
00635 
00636   if ( m->to != CkMyPe() ) {
00637     theLbdb->UnregisterObj(m->handle);
00638 
00639     CProxy_LdbCoordinator  ldbProxy(thisgroup);
00640     ldbProxy[m->to].ExpectMigrate(m);
00641   } else {
00642     ExpectMigrate(m);
00643   }
00644 }

int LdbCoordinator::requiredProxies PatchID  id,
int  []
 

Definition at line 716 of file LdbCoordinator.C.

References PatchMap::basenode(), PatchMap::downstreamNeighbors(), j, PatchMap::node(), PatchID, and patchMap.

Referenced by printRequiredProxies().

00717 {
00718   PatchID neighbors[1 + PatchMap::MaxOneAway];
00719   neighbors[0] = id;
00720   int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00721 
00722   int nProxyNodes = 0;
00723   int myNode = patchMap->node(id);
00724   for ( int i = 0; i < numNeighbors; ++i ) {
00725     const int proxyNode = patchMap->basenode(neighbors[i]);
00726     if ( proxyNode != myNode ) {
00727       int j;
00728       for ( j = 0; j < nProxyNodes; ++j ) {
00729         if ( neighborNodes[j] == proxyNode ) break;
00730       }
00731       if ( j == nProxyNodes ) {
00732         neighborNodes[nProxyNodes] = proxyNode;
00733         nProxyNodes++;
00734       }
00735     }
00736   }
00737   return nProxyNodes;
00738 }

void LdbCoordinator::resume void   ) 
 

Definition at line 664 of file LdbCoordinator.C.

References DebugM, initialize(), ldbCycleNum, Sync::Object(), and Sync::openSync().

00665 {
00666   DebugM(3,"resume()\n");
00667   //  printLocalLdbReport();
00668 
00669   ldbCycleNum++;
00670   initialize(PatchMap::Object(),ComputeMap::Object(),1);
00671 
00672   Sync::Object()->openSync();
00673 }

void LdbCoordinator::resume2 void   ) 
 

Definition at line 684 of file LdbCoordinator.C.

References awakenSequencers(), and DebugM.

00685 {
00686   DebugM(3,"resume2()\n");
00687 
00688 #if CONVERSE_VERSION_ELAN
00689   //  enableBlockingReceives();
00690 #endif
00691 
00692   awakenSequencers();
00693 }

void LdbCoordinator::ResumeFromSync void   ) 
 

Definition at line 110 of file LdbCoordinator.C.

References myHandle, and theLbdb.

00111 {
00112   theLbdb->DoneRegisteringObjects(myHandle);
00113   CkCallback cb(CkIndex_LdbCoordinator::nodeDone(NULL), 0, thisgroup);
00114   contribute(0, NULL, CkReduction::random, cb);
00115 }

void LdbCoordinator::resumeReady CkQdMsg *  msg  ) 
 

Definition at line 675 of file LdbCoordinator.C.

References DebugM, and iout.

00675                                              {
00676 
00677   iout << "LDB: =============== DONE WITH MIGRATION ================ " << CmiWallTimer() << "\n" << endi;
00678   DebugM(3,"resumeReady()\n");
00679   delete msg;
00680 
00681   CProxy_LdbCoordinator(thisgroup).resume2();
00682 }

void LdbCoordinator::skipWork const LDObjHandle &  handle  )  [inline]
 

Definition at line 61 of file LdbCoordinator.h.

References nComputesReported.

Referenced by ComputeNonbondedSelf::noWork(), ComputeNonbondedPair::noWork(), and ComputeLCPO::noWork().

00061                                            {  // increment counter only
00062     nComputesReported++;
00063   }

void LdbCoordinator::startWork const LDObjHandle &  handle  )  [inline]
 

Definition at line 55 of file LdbCoordinator.h.

References theLbdb.

Referenced by ComputePatchPair::doWork(), ComputePatch::doWork(), ComputeLCPO::doWork(), ComputeHomeTuples< TholeElem, Thole, TholeValue >::doWork(), SimpleBroadcastObject< Tensor >::get(), and Sequencer::suspend().

00055                                             {  // start timer
00056     theLbdb->ObjectStart(handle);
00057   }

void LdbCoordinator::staticMigrateFn LDObjHandle  handle,
int  dest
[static]
 

Definition at line 60 of file LdbCoordinator.C.

References Migrate().

00061 {
00062    LdbCoordinator *ldbCoordinator = (LdbCoordinator *)LDOMUserData(handle.omhandle);
00063    ldbCoordinator->Migrate(handle,dest);
00064 }

void LdbCoordinator::staticQueryEstLoadFn LDOMHandle  h  )  [static]
 

Definition at line 85 of file LdbCoordinator.C.

Referenced by LdbCoordinator().

00086 {
00087   CkPrintf("I'm supposed to query load\n");
00088 }

void LdbCoordinator::staticReceiveAtSync void *  data  )  [static]
 

Definition at line 90 of file LdbCoordinator.C.

Referenced by LdbCoordinator().

00091 {
00092 
00093 #if CONVERSE_VERSION_ELAN
00094     //disableBlockingReceives();
00095 #endif
00096 
00097   ((LdbCoordinator*)data)->ReceiveAtSync();
00098 }

void LdbCoordinator::staticResumeFromSync void *  data  )  [static]
 

Definition at line 105 of file LdbCoordinator.C.

Referenced by LdbCoordinator().

00106 {
00107   ((LdbCoordinator*)data)->ResumeFromSync();
00108 }

void LdbCoordinator::staticStatsFn LDOMHandle  h,
int  state
[static]
 

Definition at line 80 of file LdbCoordinator.C.

00081 {
00082   CkPrintf("I'm supposed to set stats\n");
00083 }

void LdbCoordinator::updateComputesReady  ) 
 

Definition at line 657 of file LdbCoordinator.C.

References DebugM.

Referenced by ExecuteMigrations(), and nodeDone().

00657                                          {
00658   DebugM(3,"updateComputesReady()\n");
00659 
00660   CProxy_LdbCoordinator(thisgroup).resume();
00661   CkStartQD(CkIndex_LdbCoordinator::resumeReady((CkQdMsg*)0),&thishandle);
00662 }


Member Data Documentation

computeInfo* LdbCoordinator::computeArray
 

Definition at line 123 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

ComputeMap* LdbCoordinator::computeMap
 

Definition at line 107 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::controllerExpected
 

Definition at line 104 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::controllerReported
 

Definition at line 103 of file LdbCoordinator.h.

Referenced by barrier(), initialize(), and rebalance().

Controller* LdbCoordinator::controllerThread
 

Definition at line 111 of file LdbCoordinator.h.

Referenced by awakenSequencers(), initialize(), and rebalance().

int LdbCoordinator::firstLdbStep
 

Definition at line 117 of file LdbCoordinator.h.

Referenced by initialize().

LDBarrierClient LdbCoordinator::ldBarrierHandle
 

Definition at line 132 of file LdbCoordinator.h.

Referenced by barrier(), and LdbCoordinator().

int LdbCoordinator::ldbCycleNum
 

Definition at line 114 of file LdbCoordinator.h.

Referenced by initialize(), LdbCoordinator(), and resume().

FILE* LdbCoordinator::ldbStatsFP
 

Definition at line 122 of file LdbCoordinator.h.

Referenced by LdbCoordinator(), and ~LdbCoordinator().

LdbMigrateMsg* LdbCoordinator::migrateMsgs
 

Definition at line 129 of file LdbCoordinator.h.

Referenced by ExpectMigrate(), initialize(), and LdbCoordinator().

LDOMHandle LdbCoordinator::myHandle
 

Definition at line 128 of file LdbCoordinator.h.

Referenced by ExpectMigrate(), initialize(), LdbCoordinator(), ReceiveAtSync(), and ResumeFromSync().

LDOMid LdbCoordinator::myOMid
 

Definition at line 127 of file LdbCoordinator.h.

Referenced by LdbCoordinator().

int LdbCoordinator::nComputesExpected
 

Definition at line 102 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::nComputesReported
 

Definition at line 101 of file LdbCoordinator.h.

Referenced by barrier(), endWork(), initialize(), and skipWork().

int LdbCoordinator::nLocalComputes
 

Definition at line 97 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

int LdbCoordinator::nLocalPatches
 

Definition at line 98 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

int LdbCoordinator::nPatches
 

Definition at line 110 of file LdbCoordinator.h.

Referenced by initialize(), and patchLoad().

int LdbCoordinator::nPatchesExpected
 

Definition at line 100 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::nPatchesReported
 

Definition at line 99 of file LdbCoordinator.h.

Referenced by barrier(), initialize(), and patchLoad().

int LdbCoordinator::nRegisteredObjs
 

Definition at line 131 of file LdbCoordinator.h.

int LdbCoordinator::nStatsMessagesExpected
 

Definition at line 106 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::nStatsMessagesReceived
 

Definition at line 105 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::numComputes
 

Definition at line 130 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

int LdbCoordinator::numStepsToRun
 

Definition at line 115 of file LdbCoordinator.h.

Referenced by initialize().

patchInfo* LdbCoordinator::patchArray
 

Definition at line 124 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

LDObjHandle* LdbCoordinator::patchHandles
 

Definition at line 134 of file LdbCoordinator.h.

Referenced by initialize().

PatchMap* LdbCoordinator::patchMap
 

Definition at line 108 of file LdbCoordinator.h.

Referenced by awakenSequencers(), initialize(), printLocalLdbReport(), and requiredProxies().

int* LdbCoordinator::patchNAtoms
 

Definition at line 109 of file LdbCoordinator.h.

Referenced by initialize(), LdbCoordinator(), patchLoad(), and printLocalLdbReport().

processorInfo* LdbCoordinator::processorArray
 

Definition at line 125 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

int LdbCoordinator::reg_all_objs
 

Definition at line 133 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

Sequencer** LdbCoordinator::sequencerThreads
 

Definition at line 112 of file LdbCoordinator.h.

Referenced by awakenSequencers(), initialize(), LdbCoordinator(), and rebalance().

int LdbCoordinator::stepsPerLdbCycle
 

Definition at line 96 of file LdbCoordinator.h.

Referenced by initialize().

int LdbCoordinator::takingLdbData
 

Definition at line 120 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().

LBDatabase* LdbCoordinator::theLbdb
 

Definition at line 126 of file LdbCoordinator.h.

Referenced by barrier(), endWork(), ExpectMigrate(), initialize(), LdbCoordinator(), pauseWork(), ReceiveAtSync(), RecvMigrate(), ResumeFromSync(), and startWork().

int LdbCoordinator::totalStepsDone
 

Definition at line 118 of file LdbCoordinator.h.

Referenced by initialize(), and LdbCoordinator().


The documentation for this class was generated from the following files:
Generated on Tue May 21 04:07:29 2013 for NAMD by  doxygen 1.3.9.1