22 #include "LdbCoordinator.decl.h" 31 #define MIN_DEBUG_LEVEL 3 42 #include "ComputeMgr.decl.h" 46 #if CONVERSE_VERSION_ELAN 47 extern "C" void enableBlockingReceives();
48 extern "C" void disableBlockingReceives();
57 #ifndef LB_MANAGER_VERSION 73 if ( msg->
to != CkMyPe() ) {
74 CProxy_LdbCoordinator ldbProxy(thisgroup);
75 ldbProxy[CkMyPe()].RecvMigrate(msg);
83 CkPrintf(
"I'm supposed to set stats\n");
88 CkPrintf(
"I'm supposed to query load\n");
94 #if CONVERSE_VERSION_ELAN 114 CkCallback cb(CkIndex_LdbCoordinator::nodeDone(NULL), 0, thisgroup);
115 contribute(0, NULL, CkReduction::random, cb);
120 if (CkpvAccess(LdbCoordinator_instance) == NULL) {
121 CkpvAccess(LdbCoordinator_instance) =
this;
123 NAMD_bug(
"LdbCoordinator instanced twice on same node!");
148 #ifndef LB_MANAGER_VERSION 159 #ifdef LB_MANAGER_VERSION 205 CkPrintf(
"LDB: Central LB being created...\n");
208 CkPrintf(
"LDB: Hybrid LB being created...\n");
220 int lastLdbStep =
simParams->lastLdbStep;
221 int stepsPerCycle =
simParams->stepsPerCycle;
259 NAMD_die(
"Disaggreement in patchMap data.\n");
268 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC))
269 #if defined(NAMD_MIC) 277 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
351 NAMD_bug(
"LdbCoordinator found too many local patches!");
368 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC))
369 #if defined(NAMD_MIC) 377 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
413 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer");
418 #
if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(BONDED_CUDA)
451 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer");
466 if ( ! c )
NAMD_bug(
"LdbCoordinator::initialize() null compute pointer 2");
577 #if 0 //replaced by traceBarrier at Controller and Sequencer 578 if (traceAvailable()) {
579 static int specialTracing = 0;
580 if (
ldbCycleNum == 1 && traceIsOn() == 0) specialTracing = 1;
581 if (specialTracing) {
600 int freq =
simParams->multigratorPressureFreq;
603 if ((step % freq) != 0) dstep = freq - (step % freq);
607 if (step==0) numPressureCycles--;
629 CmiAssert(
id >=0 &&
id <
nPatches);
634 DebugM(10,
"::patchLoad() Unexpected patch reporting in\n");
652 iout <<
"LDB: ============= START OF LOAD BALANCING ============== " << CmiWallTimer() <<
"\n" <<
endi;
653 DebugM(3,
"Controller reached load balance barrier.\n");
657 CProxy_LdbCoordinator(thisgroup).barrier();
678 NAMD_bug(
"Load balancer received wrong number of events.\n");
688 iout <<
"LDB: ============== END OF LOAD BALANCING =============== " << CmiWallTimer() <<
"\n" <<
endi;
702 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
713 if ( m->
to != CkMyPe() ) {
716 CProxy_LdbCoordinator ldbProxy(thisgroup);
717 ldbProxy[m->
to].ExpectMigrate(m);
725 if ( m->
from != CkMyPe() ) {
735 DebugM(3,
"updateComputesReady()\n");
737 CProxy_LdbCoordinator(thisgroup).resume();
738 CkStartQD(CkIndex_LdbCoordinator::resumeReady((CkQdMsg*)0),&thishandle);
754 iout <<
"LDB: =============== DONE WITH MIGRATION ================ " << CmiWallTimer() <<
"\n" <<
endi;
755 DebugM(3,
"resumeReady()\n");
758 CProxy_LdbCoordinator(thisgroup).resume2();
765 #if CONVERSE_VERSION_ELAN 801 for (
int i = 0; i < numNeighbors; ++i ) {
803 if ( proxyNode != myNode ) {
805 for ( j = 0; j < nProxyNodes; ++j ) {
806 if ( neighborNodes[j] == proxyNode )
break;
808 if ( j == nProxyNodes ) {
809 neighborNodes[nProxyNodes] = proxyNode;
822 CkPrintf(
"%d:Patch report:\n",CkMyPe());
830 curLoc += sprintf(curLoc,
"%5d: %5d ",i,
patchNAtoms[i]);
833 if (((j % 4) == 0) && j)
836 CkPrintf(
"[%d]%s\n",CkMyPe(),outputBuf);
841 CkPrintf(
"%d:Compute report:\n",CkMyPe());
855 fprintf(fp,
"%4d ",nProxyNodes);
857 for(
int i=0;i<nProxyNodes;i++)
858 fprintf(fp,
"%4d ",neighborNodes[i]);
862 CProxy_LdbCoordinator(thisgroup)[0].collectLoads(msg);
867 if ( collPes == 0 ) {
869 initTotalProxies = 0;
870 finalTotalProxies = 0;
871 initMaxPeProxies = 0;
872 finalMaxPeProxies = 0;
873 initMaxPatchProxies = 0;
874 finalMaxPatchProxies = 0;
886 #define COLL_MAX(F) if ( msg->F > F ) F = msg->F; 887 #define COLL_AVG(F) F += msg->F * (double) numPes / (double) CkNumPes(); 888 #define COLL_SUM(F) F += msg->F; 907 if ( collPes == CkNumPes() ) {
909 iout <<
"LDB: TIME " << initTime <<
" LOAD: AVG " << initAvgPeLoad
910 <<
" MAX " << initMaxPeLoad <<
" PROXIES: TOTAL " << initTotalProxies <<
" MAXPE " <<
911 initMaxPeProxies <<
" MAXPATCH " << initMaxPatchProxies <<
" " <<
"None" 912 <<
" MEM: " << initMemory <<
" MB\n";
913 if ( reverted )
iout <<
"LDB: Reverting to original mapping on " << reverted <<
" balancers\n";
914 iout <<
"LDB: TIME " << finalTime <<
" LOAD: AVG " << finalAvgPeLoad
915 <<
" MAX " << finalMaxPeLoad <<
" PROXIES: TOTAL " << finalTotalProxies <<
" MAXPE " <<
916 finalMaxPeProxies <<
" MAXPATCH " << finalMaxPatchProxies <<
" " << msg->
strategyName 917 <<
" MEM: " << finalMemory <<
" MB\n";
925 #include "LdbCoordinator.def.h"
int requiredProxies(PatchID id, int [])
void sendCollectLoads(CollectLoadsMsg *)
#define NAMD_BONDEDGPU_IMPROPERS
void LdbCoordinator_initproc()
#define NAMD_BONDEDGPU_CROSSTERMS
Controller * controllerThread
void collectLoads(CollectLoadsMsg *)
#define NAMD_BONDEDGPU_ANISOS
void resumeReady(CkQdMsg *msg)
static PatchMap * Object()
#define NAMD_BONDEDGPU_ONEFOURENBTHOLES
#define NAMD_BONDEDGPU_ANGLES
Sequencer ** sequencerThreads
#define NAMD_BONDEDGPU_THOLES
SimParameters * simParameters
int nStatsMessagesExpected
LDObjHandle * patchHandles
void updateComputesReady()
void AtSyncBarrierReached(void)
void createLoadBalancer()
std::ostream & endi(std::ostream &s)
represents nonbonded or self compute
void Migrate(LDObjHandle handle, int dest)
#define NAMD_BONDEDGPU_DIHEDRALS
void printRequiredProxies(PatchID id, FILE *fp)
HomePatch * homePatch(PatchID pid)
void awakenSequencers(void)
void patchLoad(PatchID id, int nAtoms, int timestep)
void ResumeFromSync(void)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
int numPatches(void) const
LdbMigrateMsg * migrateMsgs
void CreateNamdHybridLB()
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
void rebalance(Sequencer *seq, PatchID id)
#define NAMD_BONDEDGPU_EXCLS
const int & LdbIdField(const LdbId &id, const int index)
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
static void staticQueryEstLoadFn(LDOMHandle h)
void ExpectMigrate(LdbMigrateMsg *)
void nodeDone(CkReductionMsg *)
#define LDBAL_CENTRALIZED
static void staticReceiveAtSync(void *data)
int basenode(int pid) const
int downstreamNeighbors(int pid, PatchID *neighbor_ids)
Compute * compute(ComputeID cid)
static ComputeMap * Object()
void printLocalLdbReport(void)
computeInfo * computeArray
int nStatsMessagesReceived
int numPids(ComputeID cid)
represents bonded compute
static void staticMigrateFn(LDObjHandle handle, int dest)
int pid(ComputeID cid, int i)
LDBarrierClient ldBarrierHandle
static void staticResumeFromSync(void *data)
static void staticStatsFn(LDOMHandle h, int state)
void updateComputes(int, CkGroupID)
processorInfo * processorArray
void ExecuteMigrations(void)
#define NAMD_BONDEDGPU_BONDS
void RecvMigrate(LdbMigrateMsg *)