LdbCoordinator.h

Go to the documentation of this file.
00001 
00007 /*****************************************************************************
00008  * $Source: /home/cvs/namd/cvsroot/namd2/src/LdbCoordinator.h,v $
00009  * $Author: jim $
00010  * $Date: 2013/09/06 19:11:37 $
00011  * $Revision: 1.47 $
00012  *****************************************************************************/
00013 
00014 #ifndef LDBCOORDINATOR_H
00015 #define LDBCOORDINATOR_H
00016 
00017 #include <stdio.h>
00018 
00019 #include <charm++.h>
00020 #include <LBDatabase.h>
00021 
00022 #include "NamdTypes.h"
00023 #include "BOCgroup.h"
00024 #include "LdbCoordinator.decl.h"
00025 
00026 class PatchMap;
00027 class ComputeMap;
00028 class Controller;
00029 class Sequencer;
00030 class computeInfo;
00031 class patchInfo;
00032 class processorInfo;
00033 
00034 enum {LDB_PATCHES = 4096};
00035 enum {LDB_COMPUTES = 16384};
00036 enum {COMPUTEMAX = 16384};
00037 enum {PATCHMAX = 4096};
00038 enum {PROCESSORMAX = 512};
00039 
00040 void LdbCoordinator_initproc();
00041 
00042 class LdbCoordinator : public CBase_LdbCoordinator
00043 {
00044 public:
00045   LdbCoordinator();
00046   ~LdbCoordinator(void);
00047   static LdbCoordinator *Object()  { 
00048     return CkpvAccess(LdbCoordinator_instance); 
00049   }
00050 
00051   void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0);
00052   void createLoadBalancer();
00053   void patchLoad(PatchID id, int nAtoms, int timestep);
00054 
00055   void startWork(const LDObjHandle &handle) {  // start timer
00056     theLbdb->ObjectStart(handle);
00057   }
00058   void pauseWork(const LDObjHandle &handle) {  // stop timer only
00059     theLbdb->ObjectStop(handle);
00060   }
00061   void skipWork(const LDObjHandle &handle) {  // increment counter only
00062     nComputesReported++;
00063   }
00064   void endWork(const LDObjHandle &handle) {  // both
00065     theLbdb->ObjectStop(handle);
00066     nComputesReported++;
00067   }
00068 
00069   void rebalance(Sequencer *seq, PatchID id);
00070   void rebalance(Controller *seq);
00071   void nodeDone(CkReductionMsg *);
00072   void updateComputesReady();
00073   void barrier(void);
00074   void resume(void);
00075   void resumeReady(CkQdMsg *msg);
00076   void resume2(void);
00077   int getNumStepsToRun(void) { return numStepsToRun; }
00078   static void staticMigrateFn(LDObjHandle handle, int dest);
00079   static void staticStatsFn(LDOMHandle h, int state);
00080   static void staticQueryEstLoadFn(LDOMHandle h);
00081   static void staticReceiveAtSync(void* data);
00082   static void staticResumeFromSync(void* data);
00083   void ReceiveAtSync(void);
00084   void Migrate(LDObjHandle handle, int dest);
00085   void RecvMigrate(LdbMigrateMsg*);
00086   void ExpectMigrate(LdbMigrateMsg*);
00087   void ResumeFromSync(void);
00088 
00089 public:
00090   void ExecuteMigrations(void);
00091   void awakenSequencers(void);
00092   int requiredProxies(PatchID id, int []);
00093   void printRequiredProxies(PatchID id, FILE *fp);
00094   void printLocalLdbReport(void);
00095 
00096   int stepsPerLdbCycle;
00097   int nLocalComputes;
00098   int nLocalPatches;
00099   int nPatchesReported;
00100   int nPatchesExpected;
00101   int nComputesReported;
00102   int nComputesExpected;
00103   int controllerReported;
00104   int controllerExpected;
00105   int nStatsMessagesReceived;
00106   int nStatsMessagesExpected;
00107   ComputeMap *computeMap;
00108   PatchMap *patchMap;
00109   int *patchNAtoms;
00110   int  nPatches;
00111   Controller *controllerThread;
00112   Sequencer **sequencerThreads;
00113 
00114   int ldbCycleNum;
00115   int numStepsToRun;    // tells Controller how many time steps to run 
00116                         // before another load balancing
00117   int firstLdbStep;
00118   int totalStepsDone;   // keeps a count of the total number of
00119                         // time steps to stop load balancing
00120   int takingLdbData;
00121 
00122   FILE *ldbStatsFP;
00123   computeInfo *computeArray;
00124   patchInfo *patchArray;
00125   processorInfo *processorArray;
00126   LBDatabase *theLbdb;
00127   LDOMid myOMid;
00128   LDOMHandle myHandle;
00129   LdbMigrateMsg *migrateMsgs;
00130   int numComputes;
00131   int nRegisteredObjs;
00132   LDBarrierClient ldBarrierHandle;
00133   int reg_all_objs;
00134   LDObjHandle* patchHandles;
00135 
00136   void sendCollectLoads(CollectLoadsMsg*);
00137   void collectLoads(CollectLoadsMsg*);
00138 private:
00139   int collPes;
00140   int reverted;
00141   int initTotalProxies;
00142   int finalTotalProxies;
00143   int initMaxPeProxies;
00144   int finalMaxPeProxies;
00145   int initMaxPatchProxies;
00146   int finalMaxPatchProxies;
00147   double initTime;
00148   double finalTime;
00149   double initMemory;
00150   double finalMemory;
00151   double initAvgPeLoad;
00152   double finalAvgPeLoad;
00153   double initMaxPeLoad;
00154   double finalMaxPeLoad;
00155 };
00156 
00157 class CollectLoadsMsg : public CMessage_CollectLoadsMsg {
00158 public:
00159   int firstPe;
00160   int lastPe;
00161   int reverted;
00162   int initTotalProxies;
00163   int finalTotalProxies;
00164   int initMaxPeProxies;
00165   int finalMaxPeProxies;
00166   int initMaxPatchProxies;
00167   int finalMaxPatchProxies;
00168   double initTime;
00169   double finalTime;
00170   double initMemory;
00171   double finalMemory;
00172   double initAvgPeLoad;
00173   double finalAvgPeLoad;
00174   double initMaxPeLoad;
00175   double finalMaxPeLoad;
00176   char strategyName[16];
00177 };
00178 
00179 class LdbMigrateMsg : public CMessage_LdbMigrateMsg
00180 {
00181 public:
00182   LDObjHandle handle;
00183   int from;
00184   int to;
00185   LdbMigrateMsg *next;
00186 };
00187 
00188 
00189 #endif // LDBCOORDINATOR_H
00190 

Generated on Mon Nov 20 01:17:12 2017 for NAMD by  doxygen 1.4.7