00001
00002
00003
00004
00005
00006
00007
00015 #include "RefineTorusLB.h"
00016 #define EXPAND_INNER_BRICK 2
00017
00018 RefineTorusLB::RefineTorusLB(computeInfo *cs, patchInfo *pas, processorInfo *pes, int ncs,
00019 int npas, int npes, int flag) : Rebalancer(cs, pas, pes, ncs, npas, npes)
00020 {
00021 if(flag==1) {
00022 strategyName = "RefineTorusLB";
00023 strategy();
00024
00025 #if 0
00026 if(proxySendSpanning || proxyRecvSpanning) {
00027 for(int i=0; i<4; i++) {
00028 decrSTLoad();
00029 computeAverage();
00030 createSpanningTree();
00031 incrSTLoad();
00032
00033
00034 InitProxyUsage();
00035 binaryRefine();
00036 printLoads();
00037 createSpanningTree();
00038 }
00039 }
00040 #endif
00041 }
00042 }
00043
00044 RefineTorusLB::~RefineTorusLB() { }
00045
00046 void RefineTorusLB::strategy() {
00047 int index, realPe;
00048
00049 const int beginGroup = processors[0].Id;
00050 const int endGroup = beginGroup + P;
00051 #define INGROUP(PROC) ((PROC) >= beginGroup && (PROC) < endGroup)
00052
00053 firstAssignInRefine = 0;
00054 for(int i=0; i<numComputes; i++){
00055
00056 realPe = computes[i].oldProcessor;
00057 if INGROUP(realPe) {
00058 index = realPe - processors[0].Id;
00059 assign((computeInfo *) &(computes[i]), (processorInfo *) &(processors[index]));
00060 }
00061 }
00062 firstAssignInRefine = 1;
00063
00064 printLoads();
00065 binaryRefine();
00066 printLoads();
00067 }
00068
00069 void RefineTorusLB::binaryRefine() {
00070
00071 computeAverage();
00072 double max = computeMax();
00073
00074 double step = 0.01, start = 1.01 + ((double)P)/((double)numComputes);
00075 double dCurLoad = max/averageLoad;
00076 int curLoad;
00077 int minLoad = 0;
00078 int maxLoad = (int)((dCurLoad - start)/step + 1);
00079 double dMinLoad = minLoad * step + start;
00080 double dMaxLoad = maxLoad * step + start;
00081
00082
00083 int done=0;
00084 overLoad = dMinLoad;
00085 if(newRefine())
00086 done = 1;
00087 else {
00088 overLoad = dMaxLoad;
00089 if(!newRefine()) {
00090 CkPrintf("Error: Could not refine at max overload\n");
00091 done = 1;
00092 }
00093 }
00094
00095
00096 while(!done) {
00097 if(maxLoad - minLoad <= 1)
00098 done = 1;
00099 else {
00100 curLoad = (maxLoad + minLoad)/2;
00101 overLoad = curLoad * step + start;
00102 if(newRefine())
00103 maxLoad = curLoad;
00104 else
00105 minLoad = curLoad;
00106 }
00107 }
00108 }
00109
00110 int RefineTorusLB::newRefine() {
00111 int done = 1;
00112 maxHeap *heavyPes = new maxHeap(P);
00113 IRSet *lightPes = new IRSet();
00114 processorInfo *donor, *p, *bestP;
00115 computeInfo *c;
00116 Iterator nextC, nextP;
00117 pcpair good;
00118 double thresholdLoad = overLoad * averageLoad;
00119 int index, realPe;
00120
00121 const int beginGroup = processors[0].Id;
00122 const int endGroup = beginGroup + P;
00123
00124
00125 for(int i=0; i<P; i++) {
00126 if (processors[i].load > thresholdLoad)
00127 heavyPes->insert((InfoRecord *) &(processors[i]));
00128 else
00129 lightPes->insert((InfoRecord *) &(processors[i]));
00130 }
00131
00132 #if LDB_DEBUG
00133 iout << "\n Before Refinement Summary\n" << endi;
00134 printSummary();
00135 #endif
00136
00137 pcpair pcpairarray[12];
00138
00139 for(int j=0; j<6; j++) {
00140 bestPe[j] = &pcpairarray[j];
00141 goodPe[j] = &pcpairarray[j+6];
00142 }
00143
00144 while(1) {
00145 while(donor = (processorInfo*)heavyPes->deleteMax())
00146 if(donor->computeSet.hasElements())
00147 break;
00148
00149 if(!donor) break;
00150
00151 for(int j=0; j<6; j++) {
00152 bestPe[j]->reset();
00153 goodPe[j]->reset();
00154 }
00155
00156 nextC.id = 0;
00157 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00158
00159 while(c) {
00160
00161
00162
00163 #define SELECT_REALPE(X) if INGROUP((X)) { \
00164 selectPes(&processors[(X) - beginGroup], c); \
00165 }
00166
00167 int realPe1 = patches[c->patch1].processor;
00168 SELECT_REALPE(realPe1)
00169
00170 int realPe2 = patches[c->patch2].processor;
00171 if ( realPe2 != realPe1 ) {
00172 SELECT_REALPE(realPe2)
00173 }
00174
00175
00176 p = (processorInfo *)(patches[c->patch1].proxiesOn.iterator((Iterator *)&nextP));
00177 while(p) {
00178 if INGROUP(p->Id) selectPes(p, c);
00179 p = (processorInfo *)(patches[c->patch1].proxiesOn.next((Iterator *)&nextP));
00180 }
00181
00182 p = (processorInfo *)(patches[c->patch2].proxiesOn.iterator((Iterator *)&nextP));
00183 while(p) {
00184 if INGROUP(p->Id) selectPes(p, c);
00185 p = (processorInfo *)(patches[c->patch2].proxiesOn.next((Iterator *)&nextP));
00186 }
00187
00188 nextC.id++;
00189 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00190 }
00191
00192 #define REASSIGN(GRID) if (GRID->c) { deAssign(GRID->c, donor); \
00193 assign(GRID->c, GRID->p); bestP = GRID->p; }
00194
00195 bestP = 0;
00196
00197 REASSIGN(bestPe[5])
00198 #if USE_TOPOMAP
00199 else REASSIGN(goodPe[5])
00200 #endif
00201 else REASSIGN(bestPe[4])
00202 #if USE_TOPOMAP
00203 else REASSIGN(goodPe[4])
00204 #endif
00205 else REASSIGN(bestPe[3])
00206 #if USE_TOPOMAP
00207 else REASSIGN(goodPe[3])
00208 #endif
00209 else REASSIGN(bestPe[1])
00210 #if USE_TOPOMAP
00211 else REASSIGN(goodPe[1])
00212 #endif
00213 else REASSIGN(bestPe[2])
00214 #if USE_TOPOMAP
00215 else REASSIGN(goodPe[2])
00216 #endif
00217 else REASSIGN(bestPe[0])
00218 #if USE_TOPOMAP
00219 else REASSIGN(goodPe[0])
00220 #endif
00221
00222
00223 if ( ! bestP && CmiNumNodes() > 1 ) {
00224 double minLoad = overLoad * averageLoad;
00225 good.c = 0; good.p = 0;
00226 nextC.id = 0;
00227 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00228 while(c) {
00229 int realPe1 = patches[c->patch1].processor;
00230 int realNode1 = CmiNodeOf(realPe1);
00231 int nodeSize = CmiNodeSize(realNode1);
00232 if ( nodeSize > 1 ) {
00233 int firstpe = CmiNodeFirst(realNode1);
00234 for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00235 if INGROUP(rpe) {
00236 p = &processors[rpe - beginGroup];
00237 if ( p->available && ( p->load + c->load < minLoad ) ) {
00238 minLoad = p->load + c->load;
00239 good.c = c;
00240 good.p = p;
00241 }
00242 }
00243 }
00244 }
00245 int realPe2 = patches[c->patch2].processor;
00246 if ( realPe2 != realPe1 ) {
00247 int realNode2 = CmiNodeOf(realPe2);
00248 if ( realNode2 != realNode1 ) {
00249 nodeSize = CmiNodeSize(realNode2);
00250 if ( nodeSize > 1 ) {
00251 int firstpe = CmiNodeFirst(realNode2);
00252 for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00253 if INGROUP(rpe) {
00254 p = &processors[rpe - beginGroup];
00255 if ( p->available && ( p->load + c->load < minLoad ) ) {
00256 minLoad = p->load + c->load;
00257 good.c = c;
00258 good.p = p;
00259 }
00260 }
00261 }
00262 }
00263 }
00264 }
00265 nextC.id++;
00266 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00267 }
00268
00269 REASSIGN((&good))
00270 }
00271
00272
00273 if ( ! bestP && ( CmiNumPhysicalNodes() > 1 ) &&
00274 ( CmiNumPhysicalNodes() < CmiNumNodes() ) ) {
00275 double minLoad = overLoad * averageLoad;
00276 good.c = 0; good.p = 0;
00277 nextC.id = 0;
00278 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00279 while(c) {
00280 int realPe1 = patches[c->patch1].processor;
00281 int realNode1 = CmiPhysicalNodeID(realPe1);
00282 int *rpelist;
00283 int nodeSize;
00284 CmiGetPesOnPhysicalNode(realNode1, &rpelist, &nodeSize);
00285 if ( nodeSize > 1 ) {
00286 for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00287 int rpe = rpelist[ipe];
00288 if INGROUP(rpe) {
00289 p = &processors[rpe - beginGroup];
00290 if ( p->available && ( p->load + c->load < minLoad ) ) {
00291 minLoad = p->load + c->load;
00292 good.c = c;
00293 good.p = p;
00294 }
00295 }
00296 }
00297 }
00298 int realPe2 = patches[c->patch2].processor;
00299 if ( realPe2 != realPe1 ) {
00300 int realNode2 = CmiPhysicalNodeID(realPe2);
00301 if ( realNode2 != realNode1 ) {
00302 CmiGetPesOnPhysicalNode(realNode2, &rpelist, &nodeSize);
00303 if ( nodeSize > 1 ) {
00304 for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00305 int rpe = rpelist[ipe];
00306 if INGROUP(rpe) {
00307 p = &processors[rpe - beginGroup];
00308 if ( p->available && ( p->load + c->load < minLoad ) ) {
00309 minLoad = p->load + c->load;
00310 good.c = c;
00311 good.p = p;
00312 }
00313 }
00314 }
00315 }
00316 }
00317 }
00318 nextC.id++;
00319 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00320 }
00321
00322 REASSIGN((&good))
00323 }
00324
00325 if(bestP) {
00326 if(bestP->load > averageLoad) {
00327
00328 lightPes->remove(bestP);
00329 } else {
00330
00331 }
00332 if(donor->load > overLoad*averageLoad) {
00333
00334 heavyPes->insert((InfoRecord *) donor);
00335 }
00336 else {
00337
00338 lightPes->insert((InfoRecord *) donor);
00339 }
00340
00341 continue;
00342 }
00343
00344
00345
00346 int found = 0;
00347 #if USE_TOPOMAP
00348
00349 int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00350 int dimNX, dimNY, dimNZ, dimNT;
00351 double minLoad;
00352
00353 good.c = 0; good.p = 0;
00354 minLoad = overLoad*averageLoad;
00355 nextC.id = 0;
00356 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00357
00358 while(c) {
00359 p1 = patches[c->patch1].processor;
00360 p2 = patches[c->patch2].processor;
00361
00362 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00363 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00364 dimNX = tmgr.getDimNX();
00365 dimNY = tmgr.getDimNY();
00366 dimNZ = tmgr.getDimNZ();
00367 dimNT = tmgr.getDimNT();
00368
00369 brickDim(x1, x2, dimNX, xm, xM);
00370 brickDim(y1, y2, dimNY, ym, yM);
00371 brickDim(z1, z2, dimNZ, zm, zM);
00372
00373
00374 #if 0
00375 if(xm>=EXPAND_INNER_BRICK) xm=xm-EXPAND_INNER_BRICK; else xm=0;
00376 if(ym>=EXPAND_INNER_BRICK) ym=ym-EXPAND_INNER_BRICK; else ym=0;
00377 if(zm>=EXPAND_INNER_BRICK) zm=zm-EXPAND_INNER_BRICK; else zm=0;
00378
00379 xM=xM+EXPAND_INNER_BRICK;
00380 yM=yM+EXPAND_INNER_BRICK;
00381 zM=zM+EXPAND_INNER_BRICK;
00382 #endif
00383
00384
00385 for(int i=xm; i<=xM; i++)
00386 for(int j=ym; j<=yM; j++)
00387 for(int k=zm; k<=zM; k++)
00388 for(int l=0; l<dimNT; l++)
00389 {
00390 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00391 if ( ! INGROUP(pe) ) continue;
00392 p = &processors[pe - beginGroup];
00393 if(c->load + p->load < minLoad) {
00394 minLoad = c->load + p->load;
00395 good.c = c;
00396 good.p = p;
00397 }
00398 }
00399 nextC.id++;
00400 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00401 }
00402
00403 if(good.c) {
00404 found = 1;
00405
00406 }
00407 else {
00408 found = 0;
00409
00410 }
00411
00412
00413 minLoad = overLoad * averageLoad;
00414 if(found==0) {
00415 good.c = 0; good.p = 0;
00416 p = 0;
00417
00418 nextC.id = 0;
00419 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00420 while(c) {
00421 p1 = patches[c->patch1].processor;
00422 p2 = patches[c->patch2].processor;
00423
00424 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00425 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00426 dimNX = tmgr.getDimNX();
00427 dimNY = tmgr.getDimNY();
00428 dimNZ = tmgr.getDimNZ();
00429 dimNT = tmgr.getDimNT();
00430
00431 brickDim(x1, x2, dimNX, xm, xM);
00432 brickDim(y1, y2, dimNY, ym, yM);
00433 brickDim(z1, z2, dimNZ, zm, zM);
00434
00435 for(int i=xM+1; i<xm+dimNX; i++)
00436 for(int j=0; j<dimNY; j++)
00437 for(int k=0; k<dimNZ; k++)
00438 for(int l=0; l<dimNT; l++)
00439 {
00440 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00441 if ( ! INGROUP(pe) ) continue;
00442 p = &processors[pe - beginGroup];
00443 if(c->load + p->load < minLoad) {
00444 good.c = c;
00445 good.p = p;
00446 found = 1; break;
00447 }
00448 }
00449
00450 if(found==1)
00451 break;
00452 else {
00453 for(int j=yM+1; j<ym+dimNY; j++)
00454 for(int i=xm; i<=xM; i++)
00455 for(int k=0; k<dimNZ; k++)
00456 for(int l=0; l<dimNT; l++)
00457 {
00458 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00459 if ( ! INGROUP(pe) ) continue;
00460 p = &processors[pe - beginGroup];
00461 if(c->load + p->load < minLoad) {
00462 good.c = c;
00463 good.p = p;
00464 found = 1; break;
00465 }
00466 }
00467 }
00468
00469 if(found==1)
00470 break;
00471 else {
00472 for(int k=zM+1; k<zm+dimNZ; k++)
00473 for(int i=xm; i<=xM; i++)
00474 for(int j=ym; j<=yM; j++)
00475 for(int l=0; l<dimNT; l++)
00476 {
00477 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00478 if ( ! INGROUP(pe) ) continue;
00479 p = &processors[pe - beginGroup];
00480 if(c->load + p->load < minLoad) {
00481 good.c = c;
00482 good.p = p;
00483 found = 1; break;
00484 }
00485 }
00486 }
00487
00488 if(found==1) break;
00489
00490 nextC.id++;
00491 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00492 }
00493 }
00494
00495 if(found == 1) {
00496 deAssign(good.c, donor);
00497 assign(good.c, good.p);
00498 if (good.p->load > averageLoad) lightPes->remove(good.p);
00499 if (donor->load > overLoad*averageLoad)
00500 heavyPes->insert((InfoRecord *) donor);
00501 else
00502 lightPes->insert((InfoRecord *) donor);
00503 continue;
00504 }
00505
00506 #endif
00507
00508
00509 p = (processorInfo *)lightPes->iterator((Iterator *) &nextP);
00510 if(found == 0) {
00511 while (p)
00512 {
00513 nextC.id = 0;
00514 c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00515 while (c)
00516 {
00517 selectPes(p, c);
00518 nextC.id++;
00519 c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00520 }
00521 p = (processorInfo *)lightPes->next((Iterator *) &nextP);
00522 }
00523
00524 bestP = 0;
00525 REASSIGN(bestPe[5])
00526 #if USE_TOPOMAP
00527 else REASSIGN(goodPe[5])
00528 #endif
00529 else REASSIGN(bestPe[4])
00530 #if USE_TOPOMAP
00531 else REASSIGN(goodPe[4])
00532 #endif
00533 else REASSIGN(bestPe[3])
00534 #if USE_TOPOMAP
00535 else REASSIGN(goodPe[3])
00536 #endif
00537 else REASSIGN(bestPe[1])
00538 #if USE_TOPOMAP
00539 else REASSIGN(goodPe[1])
00540 #endif
00541 else REASSIGN(bestPe[2])
00542 #if USE_TOPOMAP
00543 else REASSIGN(goodPe[2])
00544 #endif
00545 else REASSIGN(bestPe[0])
00546 #if USE_TOPOMAP
00547 else REASSIGN(goodPe[0])
00548 #endif
00549 }
00550
00551 if(bestP) {
00552 if(bestP->load > averageLoad) lightPes->remove(bestP);
00553 if(donor->load > overLoad*averageLoad)
00554 heavyPes->insert((InfoRecord *) donor);
00555 else
00556 lightPes->insert((InfoRecord *) donor);
00557 continue;
00558 }
00559 else {
00560 done = 0;
00561 break;
00562 }
00563
00564 }
00565
00566 #if LDB_DEBUG
00567 iout << "After Refinement Summary\n" << endi;
00568 printSummary();
00569 #endif
00570
00571 delete heavyPes;
00572 delete lightPes;
00573
00574 return done;
00575 }
00576
00577 void RefineTorusLB::selectPes(processorInfo *p, computeInfo *c) {
00578 if (p->available == CmiFalse)
00579 return;
00580
00581
00582
00583
00584
00585 int numPatches, numProxies, index;
00586 numAvailable(c, p, &numPatches, &numProxies, 0 );
00587 int numEither = numPatches + numProxies;
00588 index = (numEither*(numEither+1))/2 + numProxies;
00589
00590 #if USE_TOPOMAP
00591 int x, y, z, t;
00592 int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00593 int dimNX, dimNY, dimNZ, dimNT;
00594 double minLoad;
00595 p1 = patches[c->patch1].processor;
00596 p2 = patches[c->patch2].processor;
00597
00598 tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00599 tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00600 dimNX = tmgr.getDimNX();
00601 dimNY = tmgr.getDimNY();
00602 dimNZ = tmgr.getDimNZ();
00603 dimNT = tmgr.getDimNT();
00604
00605 brickDim(x1, x2, dimNX, xm, xM);
00606 brickDim(y1, y2, dimNY, ym, yM);
00607 brickDim(z1, z2, dimNZ, zm, zM);
00608 #endif
00609
00610 if (p->load + c->load < overLoad * averageLoad) {
00611 #if USE_TOPOMAP
00612 tmgr.rankToCoordinates(p->Id, x, y, z, t);
00613 int wB = withinBrick(x, y, z, xm, xM, dimNX, ym, yM, dimNY, zm, zM, dimNZ);
00614 if (wB) {
00615 #endif
00616 pcpair* &oldp = bestPe[index];
00617
00618 if (!(oldp->p) || ((p->load + c->load) < (oldp->p->load + oldp->c->load))) {
00619 oldp->p = p;
00620 oldp->c = c;
00621 }
00622 #if USE_TOPOMAP
00623 } else {
00624 pcpair* &oldp = goodPe[index];
00625 double loadDiff = 0.0;
00626
00627 if (!(oldp->p)) {
00628 oldp->p = p;
00629 oldp->c = c;
00630 } else {
00631 loadDiff = oldp->p->load + oldp->c->load - p->load - c->load;
00632 if ( (loadDiff > 0.4) || (loadDiff > 0.0 && (tmgr.getHopsBetweenRanks(p->Id, p1) + tmgr.getHopsBetweenRanks(p->Id, p2) < tmgr.getHopsBetweenRanks((oldp->p)->Id, p1) + tmgr.getHopsBetweenRanks((oldp->p)->Id, p2))) ) {
00633 oldp->p = p;
00634 oldp->c = c;
00635 }
00636 }
00637 }
00638 #endif
00639 }
00640 }
00641