16 #define EXPAND_INNER_BRICK 2
19 int npas,
int npes,
int flag) :
Rebalancer(cs, pas, pes, ncs, npas, npes)
27 for(
int i=0; i<4; i++) {
46 void RefineTorusLB::strategy() {
50 const int endGroup = beginGroup +
P;
51 #define INGROUP(PROC) ((PROC) >= beginGroup && (PROC) < endGroup)
74 double step = 0.01, start = 1.01 + ((double)P)/((double)numComputes);
78 int maxLoad = (int)((dCurLoad - start)/step + 1);
79 double dMinLoad = minLoad * step + start;
80 double dMaxLoad = maxLoad * step + start;
90 CkPrintf(
"Error: Could not refine at max overload\n");
97 if(maxLoad - minLoad <= 1)
100 curLoad = (maxLoad + minLoad)/2;
122 const int endGroup = beginGroup +
P;
125 for(
int i=0; i<
P; i++) {
133 iout <<
"\n Before Refinement Summary\n" <<
endi;
139 for(
int j=0; j<6; j++) {
140 bestPe[j] = &pcpairarray[j];
141 goodPe[j] = &pcpairarray[j+6];
151 for(
int j=0; j<6; j++) {
163 #define SELECT_REALPE(X) if INGROUP((X)) { \
164 selectPes(&processors[(X) - beginGroup], c); \
171 if ( realPe2 != realPe1 ) {
192 #define REASSIGN(GRID) if (GRID->c) { deAssign(GRID->c, donor); \
193 assign(GRID->c, GRID->p); bestP = GRID->p; }
223 if ( ! bestP && CmiNumNodes() > 1 ) {
225 good.
c = 0; good.
p = 0;
230 int realNode1 = CmiNodeOf(realPe1);
231 int nodeSize = CmiNodeSize(realNode1);
232 if ( nodeSize > 1 ) {
233 int firstpe = CmiNodeFirst(realNode1);
234 for (
int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
246 if ( realPe2 != realPe1 ) {
247 int realNode2 = CmiNodeOf(realPe2);
248 if ( realNode2 != realNode1 ) {
249 nodeSize = CmiNodeSize(realNode2);
250 if ( nodeSize > 1 ) {
251 int firstpe = CmiNodeFirst(realNode2);
252 for (
int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
273 if ( ! bestP && ( CmiNumPhysicalNodes() > 1 ) &&
274 ( CmiNumPhysicalNodes() < CmiNumNodes() ) ) {
276 good.
c = 0; good.
p = 0;
281 int realNode1 = CmiPhysicalNodeID(realPe1);
284 CmiGetPesOnPhysicalNode(realNode1, &rpelist, &nodeSize);
285 if ( nodeSize > 1 ) {
286 for (
int ipe = 0; ipe < nodeSize; ++ipe ) {
287 int rpe = rpelist[ipe];
299 if ( realPe2 != realPe1 ) {
300 int realNode2 = CmiPhysicalNodeID(realPe2);
301 if ( realNode2 != realNode1 ) {
302 CmiGetPesOnPhysicalNode(realNode2, &rpelist, &nodeSize);
303 if ( nodeSize > 1 ) {
304 for (
int ipe = 0; ipe < nodeSize; ++ipe ) {
305 int rpe = rpelist[ipe];
326 if(bestP->
load > averageLoad) {
349 int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
350 int dimNX, dimNY, dimNZ, dimNT;
353 good.
c = 0; good.
p = 0;
362 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
363 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
364 dimNX = tmgr.getDimNX();
365 dimNY = tmgr.getDimNY();
366 dimNZ = tmgr.getDimNZ();
367 dimNT = tmgr.getDimNT();
385 for(
int i=xm; i<=xM; i++)
386 for(
int j=ym; j<=yM; j++)
387 for(
int k=zm; k<=zM; k++)
388 for(
int l=0; l<dimNT; l++)
390 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
415 good.
c = 0; good.
p = 0;
424 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
425 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
426 dimNX = tmgr.getDimNX();
427 dimNY = tmgr.getDimNY();
428 dimNZ = tmgr.getDimNZ();
429 dimNT = tmgr.getDimNT();
435 for(
int i=xM+1; i<xm+dimNX; i++)
436 for(
int j=0; j<dimNY; j++)
437 for(
int k=0; k<dimNZ; k++)
438 for(
int l=0; l<dimNT; l++)
440 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
453 for(
int j=yM+1; j<ym+dimNY; j++)
454 for(
int i=xm; i<=xM; i++)
455 for(
int k=0; k<dimNZ; k++)
456 for(
int l=0; l<dimNT; l++)
458 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
472 for(
int k=zM+1; k<zm+dimNZ; k++)
473 for(
int i=xm; i<=xM; i++)
474 for(
int j=ym; j<=yM; j++)
475 for(
int l=0; l<dimNT; l++)
477 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
498 if (good.
p->
load > averageLoad) lightPes->
remove(good.
p);
552 if(bestP->
load > averageLoad) lightPes->
remove(bestP);
567 iout <<
"After Refinement Summary\n" <<
endi;
588 index = (numEither*(numEither+1))/2 +
numProxies;
592 int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
593 int dimNX, dimNY, dimNZ, dimNT;
598 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
599 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
600 dimNX = tmgr.getDimNX();
601 dimNY = tmgr.getDimNY();
602 dimNZ = tmgr.getDimNZ();
603 dimNT = tmgr.getDimNT();
612 tmgr.rankToCoordinates(p->
Id, x, y, z, t);
613 int wB =
withinBrick(x, y, z, xm, xM, dimNX, ym, yM, dimNY, zm, zM, dimNZ);
616 pcpair* &oldp = bestPe[index];
618 if (!(oldp->p) || ((p->
load + c->
load) < (oldp->p->load + oldp->c->load))) {
624 pcpair* &oldp = goodPe[index];
625 double loadDiff = 0.0;
631 loadDiff = oldp->p->
load + oldp->c->load - p->
load - c->
load;
632 if ( (loadDiff > 0.4) || (loadDiff > 0.0 && (tmgr.getHopsBetweenRanks(p->
Id, p1) + tmgr.getHopsBetweenRanks(p->
Id, p2) < tmgr.getHopsBetweenRanks((oldp->p)->Id, p1) + tmgr.getHopsBetweenRanks((oldp->p)->Id, p2))) ) {
BlockLoad::TempStorage load
#define EXPAND_INNER_BRICK
void createSpanningTree()
void assign(computeInfo *c, processorInfo *pRec)
std::ostream & endi(std::ostream &s)
void insert(InfoRecord *)
processorInfo * processors
void printLoads(int phase=0)
void numAvailable(computeInfo *c, processorInfo *p, int *nPatches, int *nProxies, int *isBadForCommunication)
void deAssign(computeInfo *c, processorInfo *pRec)
InfoRecord * next(Iterator *)
int withinBrick(int x, int y, int z, int xm, int xM, int dimX, int ym, int yM, int dimY, int zm, int zM, int dimZ)
void brickDim(int a, int b, int dim, int &min, int &max)
RefineTorusLB(computeInfo *cs, patchInfo *pas, processorInfo *pes, int ncs, int npas, int npes, int flag)
InfoRecord * iterator(Iterator *)
const char * strategyName