23 #define ST_NODE_LOAD 0.005
24 #define PROXY_LOAD 0.001
25 #define COMPUTE_LOAD 0.00005
28 processorInfo *processorArray,
int nComps,
int nPatches,
int nPes)
49 const int endGroup = beginGroup +
P;
50 #define INGROUP(PROC) ((PROC) >= beginGroup && (PROC) < endGroup)
66 for (i=0; i<nPatches; i++) {
81 computeArray[i].processor = -1;
93 float *temploads =
new float[
P];
142 if (
P == CkNumPes() ) {
144 if (
P != CkNumPes() ) {
147 while ( maxinw < CkNumPes() ) {
152 std::setw(w) << std::right <<
processors[0].
Id <<
"-" <<
156 iout <<
" Reverting to original mapping\n" <<
endi;
168 const int endGroup = beginGroup +
P;
177 if (
P != CkNumPes() ) {
249 iout <<
iINFO <<
"Strategy not implemented for the base class.\n" <<
"\n";
276 int numSelfComputes, numPairComputes, numBgSelfComputes, numBgPairComputes;
281 numBgSelfComputes = 0;
282 numBgPairComputes = 0;
302 int numBgComputes = numBgPairComputes + numBgSelfComputes;
309 if ( numBgComputes < 0.3 * numComputes )
break;
370 int numSelfComputes, numPairComputes;
406 #if COMPUTE_CORRECTION
457 <<
" load " << c->
load <<
" to " << p->
Id <<
" new load "
459 <<
" nPatches " << nPatches <<
" nProxies " << nProxies;
460 if ( nPatches + nProxies < 2 )
iout <<
" addProxy";
461 if ( badForComm )
iout <<
" badForComm";
469 iout <<
iINFO <<
"ERROR: Rebalancer tried to deAssign an object that is not on the processor.\n" <<
endi;
473 double temp_load = 0.0;
480 CmiAssert( fabs(temp_load - p->
load) < 0.001 );
549 if ( c->
load + p->
load < thresholdLoad) {
550 int nPatches, nProxies, badForComm;
555 pcpair *pair = &grid[nPatches][nProxies][badForComm];
565 double oldval = pair->
p->
load - pair->
c->
load;
569 if (newval < oldval) {
580 int no_new_proxies = 0;
597 iout <<
"\nBefore Refinement Summary" <<
"\n";
626 if ( ! no_new_proxies ) {
640 #define REASSIGN(GRID) if (GRID.c) { \
641 deAssign(GRID.c, donor); \
642 assign(GRID.c, GRID.p); \
667 proxiesOn.iterator((
Iterator *)&nextProc);
671 proxiesOn.next((
Iterator*)&nextProc);
677 proxiesOn.iterator((
Iterator *)&nextProc);
681 proxiesOn.next((
Iterator*)&nextProc);
694 else if ( no_new_proxies ) { finish = 0;
break; }
702 if (bestP->
load > averageLoad) lightProcessors->
remove(bestP);
703 if (donor->
load > thresholdLoad)
711 <<
"ERROR: Rebalancer::refine() algorithm is broken.\n" <<
endi;
757 else { finish = 0;
break; }
758 if (bestP->
load > averageLoad) lightProcessors->
remove(bestP);
759 if (donor->
load > thresholdLoad)
767 iout <<
"After Refinement Summary" <<
"\n";
771 iout <<
iINFO <<
"Refine: No solution found for overLoad = "
776 delete heavyProcessors;
777 delete lightProcessors;
794 iout <<
"******** Processors with background load > average load ********" <<
"\n";
797 int numOverloaded = 0;
798 for (
int ip=0; ip<
P; ip++) {
806 if ( numOverloaded ) {
808 <<
" processors are overloaded due to high background load.\n" <<
endi;
811 iout <<
"******** Processor List Ends ********" <<
"\n\n";
814 const double overloadStep = 0.01;
815 const double overloadStart = overload_start;
816 double dCurOverload = max / avg;
819 int maxOverload = (int)((dCurOverload - overloadStart)/overloadStep + 1);
820 double dMinOverload = minOverload * overloadStep + overloadStart;
821 double dMaxOverload = maxOverload * overloadStep + overloadStart;
825 <<
"Balancing from " << minOverload <<
" = " << dMinOverload
826 <<
" to " << maxOverload <<
"=" << dMaxOverload
827 <<
" dCurOverload=" << dCurOverload <<
" max=" << max <<
" avg=" << avg
840 iout <<
iINFO <<
"ERROR: Could not refine at max overload\n" <<
endi;
846 while (!refineDone) {
847 if (maxOverload - minOverload <= 1)
850 curOverload = (maxOverload + minOverload ) / 2;
852 overLoad = curOverload * overloadStep + overloadStart;
854 iout <<
iINFO <<
"Testing curOverload " << curOverload
856 << minOverload <<
", " << maxOverload
860 maxOverload = curOverload;
862 minOverload = curOverload;
870 iout <<
iINFO <<
"ready to print result \n" <<
"\n";
877 int i, total = 0, numBytes = 0;
880 int maxpatchproxies = 0;
881 double avgBgLoad =0.0;
883 for (i=0; i<
P; i++) {
887 if ( nproxies > maxproxies ) maxproxies = nproxies;
897 if ( myProxies > maxpatchproxies ) maxpatchproxies = myProxies;
908 if ( P == CkNumPes() ) {
910 if ( P != CkNumPes() ) {
913 while ( maxinw < CkNumPes() ) {
918 std::setw(w) << std::right <<
processors[0].
Id <<
"-" <<
923 <<
" MAX " << max <<
" PROXIES: TOTAL " << total <<
" MAXPE " <<
924 maxproxies <<
" MAXPATCH " << maxpatchproxies <<
" " <<
strategyName
929 if ( P != CkNumPes() ) {
932 NAMD_bug(
"Rebalancer::printLoads(0) called with hybrid balancer.");
935 if (
collMsg )
NAMD_bug(
"Rebalancer::printLoads(1) collMsg not null.");
969 NAMD_bug(
"Rebalancer::printLoads() called with unknown phase.");
984 for (i=1; i<
P; i++) {
995 iout <<
iINFO <<
" min = " << min <<
" processor " << min_proc <<
"\n";
996 iout <<
iINFO <<
" max = " << max <<
" processor " << max_proc <<
"\n";
997 iout <<
iINFO <<
" total = " << total <<
" average = " << total/P <<
"\n";
1008 for (i=0; i<
P; i++) {
1015 CmiPrintf(
"Warning: no processors available for load balancing!\n");
1033 double bgtotal = 0.;
1034 for (i=0; i<
P; i++) {
1042 for (i=0; i<
P; i++) {
1045 if ( bgload < bgavg ) {
1075 int *nPatches,
int *nProxies,
int *isBadForCommunication)
1079 int patch_count = 0;
1080 int proxy_count = 0;
1083 const int endGroup = beginGroup +
P;
1107 *nPatches = patch_count;
1108 *nProxies = proxy_count;
1110 if ( isBadForCommunication ) {
1113 if ( patch_count + proxy_count < 2 ) {
1118 if ( proxiesPerPeLimit < 6 ) proxiesPerPeLimit = 6;
1123 if ( proxiesPerPatchLimit < 6 ) proxiesPerPatchLimit = 6;
1125 if ( ! bad && ! pa1_avail ) {
1129 index = realPe - beginGroup;
1131 if (
processors[index].backgroundLoad > bgLoadLimit) bad = 1;
1136 if ( ! bad && ! pa2_avail ) {
1140 index = realPe - beginGroup;
1142 if (
processors[index].backgroundLoad > bgLoadLimit) bad = 1;
1150 *isBadForCommunication = bad;
1158 #ifndef NODEAWARE_PROXY_SPANNINGTREE
1187 CkPrintf(
"Done intialising\n");
1188 #ifdef NODEAWARE_PROXY_SPANNINGTREE
BlockLoad::TempStorage load
void sendCollectLoads(CollectLoadsMsg *)
std::ostream & iINFO(std::ostream &s)
static ProxyMgr * Object()
static PatchMap * Object()
CollectLoadsMsg * collMsg
void createSpanningTree()
void assign(computeInfo *c, processorInfo *pRec)
std::ostream & endi(std::ostream &s)
std::ostream & iWARN(std::ostream &s)
void insert(InfoRecord *)
void refine_togrid(pcgrid &grid, double thresholdLoad, processorInfo *p, computeInfo *c)
processorInfo * processors
void printLoads(int phase=0)
void numAvailable(computeInfo *c, processorInfo *p, int *nPatches, int *nProxies, int *isBadForCommunication)
static Units next(Units u)
void deAssign(computeInfo *c, processorInfo *pRec)
InfoRecord * next(Iterator *)
void multirefine(double overload_start=1.02)
void NAMD_bug(const char *err_msg)
maxHeap * computeBgSelfHeap
maxHeap * computeSelfHeap
maxHeap * computePairHeap
void increment(int pe, int patch)
void buildSpanningTree0()
void adjustBackgroundLoadAndComputeAverage()
static LdbCoordinator * Object()
void decrement(int pe, int patch)
maxHeap * computeBgPairHeap
int isAvailableOn(patchInfo *patch, processorInfo *p)
void unchecked_insert(InfoRecord *)
int getVal(int pe, int patch)
InfoRecord * iterator(Iterator *)
Rebalancer(computeInfo *computeArray, patchInfo *patchArray, processorInfo *processorArray, int nComps, int nPatches, int nPes)
const char * strategyName