22 #include "LdbCoordinator.decl.h" 31 #define MIN_DEBUG_LEVEL 3 42 #include "ComputeMgr.decl.h" 46 #if CONVERSE_VERSION_ELAN 47 extern "C" void enableBlockingReceives();
48 extern "C" void disableBlockingReceives();
57 #ifndef LB_MANAGER_VERSION 73 if ( msg->
to != CkMyPe() ) {
74 CProxy_LdbCoordinator ldbProxy(thisgroup);
75 ldbProxy[CkMyPe()].RecvMigrate(msg);
83 CkPrintf(
"I'm supposed to set stats\n");
88 CkPrintf(
"I'm supposed to query load\n");
94 #if CONVERSE_VERSION_ELAN 114 CkCallback cb(CkIndex_LdbCoordinator::nodeDone(NULL), 0, thisgroup);
115 contribute(0, NULL, CkReduction::random, cb);
120 if (CkpvAccess(LdbCoordinator_instance) == NULL) {
121 CkpvAccess(LdbCoordinator_instance) =
this;
123 NAMD_bug(
"LdbCoordinator instanced twice on same node!");
148 #ifndef LB_MANAGER_VERSION 159 #ifdef LB_MANAGER_VERSION 205 CkPrintf(
"LDB: Central LB being created...\n");
208 CkPrintf(
"LDB: Hybrid LB being created...\n");
220 int lastLdbStep =
simParams->lastLdbStep;
221 int stepsPerCycle =
simParams->stepsPerCycle;
259 NAMD_die(
"Disaggreement in patchMap data.\n");
268 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC))
269 #if defined(NAMD_MIC) 277 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
349 NAMD_bug(
"LdbCoordinator found too many local patches!");
366 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC))
367 #if defined(NAMD_MIC) 375 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
410 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer");
415 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
447 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer");
462 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer 2");
573 #if 0 //replaced by traceBarrier at Controller and Sequencer 574 if (traceAvailable()) {
575 static int specialTracing = 0;
576 if (
ldbCycleNum == 1 && traceIsOn() == 0) specialTracing = 1;
577 if (specialTracing) {
596 int freq =
simParams->multigratorPressureFreq;
599 if ((step % freq) != 0) dstep = freq - (step % freq);
603 if (step==0) numPressureCycles--;
625 CmiAssert(
id >=0 &&
id <
nPatches);
630 DebugM(10,
"::patchLoad() Unexpected patch reporting in\n");
648 iout <<
"LDB: ============= START OF LOAD BALANCING ============== " << CmiWallTimer() <<
"\n" <<
endi;
649 DebugM(3,
"Controller reached load balance barrier.\n");
653 CProxy_LdbCoordinator(thisgroup).barrier();
674 NAMD_bug(
"Load balancer received wrong number of events.\n");
684 iout <<
"LDB: ============== END OF LOAD BALANCING =============== " << CmiWallTimer() <<
"\n" <<
endi;
698 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
709 if ( m->
to != CkMyPe() ) {
712 CProxy_LdbCoordinator ldbProxy(thisgroup);
713 ldbProxy[m->
to].ExpectMigrate(m);
721 if ( m->
from != CkMyPe() ) {
731 DebugM(3,
"updateComputesReady()\n");
733 CProxy_LdbCoordinator(thisgroup).resume();
734 CkStartQD(CkIndex_LdbCoordinator::resumeReady((CkQdMsg*)0),&thishandle);
750 iout <<
"LDB: =============== DONE WITH MIGRATION ================ " << CmiWallTimer() <<
"\n" <<
endi;
751 DebugM(3,
"resumeReady()\n");
754 CProxy_LdbCoordinator(thisgroup).resume2();
761 #if CONVERSE_VERSION_ELAN 797 for (
int i = 0; i < numNeighbors; ++i ) {
799 if ( proxyNode != myNode ) {
801 for ( j = 0; j < nProxyNodes; ++j ) {
802 if ( neighborNodes[j] == proxyNode )
break;
804 if ( j == nProxyNodes ) {
805 neighborNodes[nProxyNodes] = proxyNode;
818 CkPrintf(
"%d:Patch report:\n",CkMyPe());
826 curLoc += sprintf(curLoc,
"%5d: %5d ",i,
patchNAtoms[i]);
829 if (((j % 4) == 0) && j)
832 CkPrintf(
"[%d]%s\n",CkMyPe(),outputBuf);
837 CkPrintf(
"%d:Compute report:\n",CkMyPe());
851 fprintf(fp,
"%4d ",nProxyNodes);
853 for(
int i=0;i<nProxyNodes;i++)
854 fprintf(fp,
"%4d ",neighborNodes[i]);
858 CProxy_LdbCoordinator(thisgroup)[0].collectLoads(msg);
863 if ( collPes == 0 ) {
865 initTotalProxies = 0;
866 finalTotalProxies = 0;
867 initMaxPeProxies = 0;
868 finalMaxPeProxies = 0;
869 initMaxPatchProxies = 0;
870 finalMaxPatchProxies = 0;
882 #define COLL_MAX(F) if ( msg->F > F ) F = msg->F; 883 #define COLL_AVG(F) F += msg->F * (double) numPes / (double) CkNumPes(); 884 #define COLL_SUM(F) F += msg->F; 903 if ( collPes == CkNumPes() ) {
905 iout <<
"LDB: TIME " << initTime <<
" LOAD: AVG " << initAvgPeLoad
906 <<
" MAX " << initMaxPeLoad <<
" PROXIES: TOTAL " << initTotalProxies <<
" MAXPE " <<
907 initMaxPeProxies <<
" MAXPATCH " << initMaxPatchProxies <<
" " <<
"None" 908 <<
" MEM: " << initMemory <<
" MB\n";
909 if ( reverted )
iout <<
"LDB: Reverting to original mapping on " << reverted <<
" balancers\n";
910 iout <<
"LDB: TIME " << finalTime <<
" LOAD: AVG " << finalAvgPeLoad
911 <<
" MAX " << finalMaxPeLoad <<
" PROXIES: TOTAL " << finalTotalProxies <<
" MAXPE " <<
912 finalMaxPeProxies <<
" MAXPATCH " << finalMaxPatchProxies <<
" " << msg->
strategyName 913 <<
" MEM: " << finalMemory <<
" MB\n";
921 #include "LdbCoordinator.def.h"
int requiredProxies(PatchID id, int [])
void sendCollectLoads(CollectLoadsMsg *)
#define NAMD_BONDEDGPU_IMPROPERS
void LdbCoordinator_initproc()
#define NAMD_BONDEDGPU_CROSSTERMS
Controller * controllerThread
void collectLoads(CollectLoadsMsg *)
#define NAMD_BONDEDGPU_ANISOS
void resumeReady(CkQdMsg *msg)
static PatchMap * Object()
#define NAMD_BONDEDGPU_ANGLES
Sequencer ** sequencerThreads
#define NAMD_BONDEDGPU_THOLES
SimParameters * simParameters
int nStatsMessagesExpected
LDObjHandle * patchHandles
void updateComputesReady()
void AtSyncBarrierReached(void)
void createLoadBalancer()
std::ostream & endi(std::ostream &s)
represents nonbonded or self compute
void Migrate(LDObjHandle handle, int dest)
#define NAMD_BONDEDGPU_DIHEDRALS
void printRequiredProxies(PatchID id, FILE *fp)
HomePatch * homePatch(PatchID pid)
void awakenSequencers(void)
void patchLoad(PatchID id, int nAtoms, int timestep)
void ResumeFromSync(void)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
int numPatches(void) const
LdbMigrateMsg * migrateMsgs
void CreateNamdHybridLB()
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
void rebalance(Sequencer *seq, PatchID id)
#define NAMD_BONDEDGPU_EXCLS
const int & LdbIdField(const LdbId &id, const int index)
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
static void staticQueryEstLoadFn(LDOMHandle h)
void ExpectMigrate(LdbMigrateMsg *)
void nodeDone(CkReductionMsg *)
#define LDBAL_CENTRALIZED
static void staticReceiveAtSync(void *data)
int basenode(int pid) const
int downstreamNeighbors(int pid, PatchID *neighbor_ids)
Compute * compute(ComputeID cid)
static ComputeMap * Object()
void printLocalLdbReport(void)
computeInfo * computeArray
int nStatsMessagesReceived
int numPids(ComputeID cid)
represents bonded compute
static void staticMigrateFn(LDObjHandle handle, int dest)
int pid(ComputeID cid, int i)
LDBarrierClient ldBarrierHandle
static void staticResumeFromSync(void *data)
static void staticStatsFn(LDOMHandle h, int state)
void updateComputes(int, CkGroupID)
processorInfo * processorArray
void ExecuteMigrations(void)
#define NAMD_BONDEDGPU_BONDS
void RecvMigrate(LdbMigrateMsg *)