RefineTorusLB Class Reference

#include <RefineTorusLB.h>

Inheritance diagram for RefineTorusLB:

Rebalancer TorusLB List of all members.

Public Member Functions

 RefineTorusLB (computeInfo *cs, patchInfo *pas, processorInfo *pes, int ncs, int npas, int npes, int flag)
 ~RefineTorusLB ()
void binaryRefine ()
int newRefine ()

Detailed Description

Definition at line 13 of file RefineTorusLB.h.


Constructor & Destructor Documentation

RefineTorusLB::RefineTorusLB ( computeInfo cs,
patchInfo pas,
processorInfo pes,
int  ncs,
int  npas,
int  npes,
int  flag 
)

Definition at line 18 of file RefineTorusLB.C.

References binaryRefine(), Rebalancer::computeAverage(), Rebalancer::createSpanningTree(), Rebalancer::decrSTLoad(), Rebalancer::incrSTLoad(), Rebalancer::InitProxyUsage(), Rebalancer::printLoads(), proxyRecvSpanning, proxySendSpanning, and Rebalancer::strategyName.

00019                               : Rebalancer(cs, pas, pes, ncs, npas, npes)
00020 {
00021   if(flag==1) {
00022     strategyName = "RefineTorusLB";
00023     strategy();
00024     // CREATE THE SPANNING TREE IN THE LOAD BALANCER
00025 #if 0
00026     if(proxySendSpanning || proxyRecvSpanning) {
00027     for(int i=0; i<4; i++) {
00028       decrSTLoad();
00029       computeAverage();
00030       createSpanningTree();
00031       incrSTLoad();
00032       // for(int i=0; i<P; i++)
00033       //   delete [] processors[i].proxyUsage;
00034       InitProxyUsage();
00035       binaryRefine();
00036       printLoads();
00037       createSpanningTree();
00038     }
00039     }
00040 #endif
00041   }
00042 }

RefineTorusLB::~RefineTorusLB (  ) 

Definition at line 44 of file RefineTorusLB.C.

00044 { }


Member Function Documentation

void RefineTorusLB::binaryRefine (  ) 

Definition at line 69 of file RefineTorusLB.C.

References Rebalancer::averageLoad, Rebalancer::computeAverage(), Rebalancer::computeMax(), newRefine(), and Rebalancer::overLoad.

Referenced by RefineTorusLB().

00069                                  {
00070   // compute the max and average load
00071   computeAverage();
00072   double max = computeMax();
00073 
00074   double step = 0.01, start = 1.01 + ((double)P)/((double)numComputes);
00075   double dCurLoad = max/averageLoad;
00076   int curLoad;
00077   int minLoad = 0;
00078   int maxLoad = (int)((dCurLoad - start)/step + 1);
00079   double dMinLoad = minLoad * step + start;
00080   double dMaxLoad = maxLoad * step + start;
00081 
00082   // check the two limits of the search: start and dMaxLoad
00083   int done=0;
00084   overLoad = dMinLoad;
00085   if(newRefine())
00086     done = 1;
00087   else {
00088     overLoad = dMaxLoad;
00089     if(!newRefine()) {
00090       CkPrintf("Error: Could not refine at max overload\n");
00091       done = 1;
00092     }
00093   } 
00094 
00095   // do a binary search between start and dMaxLoad until we succeed
00096   while(!done) {
00097     if(maxLoad - minLoad <= 1)
00098       done = 1;
00099     else {
00100       curLoad = (maxLoad + minLoad)/2;
00101       overLoad = curLoad * step + start;
00102       if(newRefine())
00103         maxLoad = curLoad;
00104       else
00105         minLoad = curLoad;
00106     }
00107   }
00108 }

int RefineTorusLB::newRefine (  ) 

Definition at line 110 of file RefineTorusLB.C.

References processorInfo::available, Rebalancer::averageLoad, processorInfo::computeSet, maxHeap::deleteMax(), endi(), IRSet::hasElements(), Iterator::id, InfoRecord::Id, INGROUP, IRSet::insert(), maxHeap::insert(), iout, IRSet::iterator(), j, InfoRecord::load, IRSet::next(), Rebalancer::overLoad, computeInfo::patch1, computeInfo::patch2, Rebalancer::patches, Rebalancer::printSummary(), patchInfo::processor, Rebalancer::processors, REASSIGN, and SELECT_REALPE.

Referenced by binaryRefine().

00110                              {
00111   int done = 1;
00112   maxHeap *heavyPes = new maxHeap(P);
00113   IRSet *lightPes = new IRSet();
00114   processorInfo *donor, *p, *bestP;
00115   computeInfo *c;
00116   Iterator nextC, nextP;
00117   pcpair good;
00118   double thresholdLoad = overLoad * averageLoad;
00119   int index, realPe;
00120 
00121   const int beginGroup = processors[0].Id;
00122   const int endGroup = beginGroup + P;
00123 
00124   // create a heap and set of heavy and light pes respectively
00125   for(int i=0; i<P; i++) {
00126     if (processors[i].load > thresholdLoad)
00127       heavyPes->insert((InfoRecord *) &(processors[i]));
00128     else
00129       lightPes->insert((InfoRecord *) &(processors[i]));
00130   }
00131 
00132 #if LDB_DEBUG
00133   iout << "\n Before Refinement Summary\n" << endi;
00134   printSummary();
00135 #endif
00136 
00137   pcpair pcpairarray[12];
00138      
00139   for(int j=0; j<6; j++) {
00140     bestPe[j] = &pcpairarray[j];    // new pcpair();
00141     goodPe[j] = &pcpairarray[j+6];  // new pcpair();
00142   }
00143 
00144   while(1) {
00145     while(donor = (processorInfo*)heavyPes->deleteMax())
00146       if(donor->computeSet.hasElements())
00147         break;
00148     
00149     if(!donor) break;
00150 
00151     for(int j=0; j<6; j++) {
00152       bestPe[j]->reset();
00153       goodPe[j]->reset();
00154     }
00155 
00156     nextC.id = 0;
00157     c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00158 
00159     while(c) {
00160       // Look at pes which have the compute's patches
00161 
00162       // HYBRID check if processor is in local group
00163 #define SELECT_REALPE(X) if INGROUP((X)) { \
00164         selectPes(&processors[(X) - beginGroup], c); \
00165       }
00166 
00167       int realPe1 = patches[c->patch1].processor;
00168       SELECT_REALPE(realPe1)
00169 
00170       int realPe2 = patches[c->patch2].processor;
00171       if ( realPe2 != realPe1 ) {
00172         SELECT_REALPE(realPe2)
00173       }
00174 
00175       // Try the processors which have the patches' proxies
00176       p = (processorInfo *)(patches[c->patch1].proxiesOn.iterator((Iterator *)&nextP));
00177       while(p) {                                            // patch 1
00178         if INGROUP(p->Id) selectPes(p, c);
00179         p = (processorInfo *)(patches[c->patch1].proxiesOn.next((Iterator *)&nextP));
00180       }
00181   
00182       p = (processorInfo *)(patches[c->patch2].proxiesOn.iterator((Iterator *)&nextP));
00183       while(p) {                                            //patch 2
00184         if INGROUP(p->Id) selectPes(p, c);
00185         p = (processorInfo *)(patches[c->patch2].proxiesOn.next((Iterator *)&nextP));
00186       }
00187       
00188       nextC.id++;
00189       c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00190     } // end of compute loop
00191 
00192 #define REASSIGN(GRID) if (GRID->c) { deAssign(GRID->c, donor); \
00193         assign(GRID->c, GRID->p); bestP = GRID->p; }
00194 
00195     bestP = 0;
00196     // see if we have found a compute processor pair
00197     REASSIGN(bestPe[5])
00198 #if USE_TOPOMAP
00199     else REASSIGN(goodPe[5])
00200 #endif
00201     else REASSIGN(bestPe[4])
00202 #if USE_TOPOMAP
00203     else REASSIGN(goodPe[4])
00204 #endif
00205     else REASSIGN(bestPe[3])
00206 #if USE_TOPOMAP
00207     else REASSIGN(goodPe[3])
00208 #endif
00209     else REASSIGN(bestPe[1])
00210 #if USE_TOPOMAP
00211     else REASSIGN(goodPe[1])
00212 #endif
00213     else REASSIGN(bestPe[2])
00214 #if USE_TOPOMAP
00215     else REASSIGN(goodPe[2])
00216 #endif
00217     else REASSIGN(bestPe[0])
00218 #if USE_TOPOMAP
00219     else REASSIGN(goodPe[0])
00220 #endif
00221 
00222   // Try all pes on the nodes of the home patches
00223     if ( ! bestP && CmiNumNodes() > 1 ) {  // else not useful
00224       double minLoad = overLoad * averageLoad;
00225       good.c = 0; good.p = 0;
00226       nextC.id = 0;
00227       c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00228       while(c) {
00229         int realPe1 = patches[c->patch1].processor;
00230         int realNode1 = CmiNodeOf(realPe1);
00231         int nodeSize = CmiNodeSize(realNode1);
00232         if ( nodeSize > 1 ) {  // else did it already
00233           int firstpe = CmiNodeFirst(realNode1);
00234           for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00235             if INGROUP(rpe) {
00236               p = &processors[rpe - beginGroup];
00237               if ( p->available && ( p->load + c->load < minLoad ) ) {
00238                 minLoad = p->load + c->load;
00239                 good.c = c;
00240                 good.p = p;
00241               }
00242             }
00243           }
00244         }
00245         int realPe2 = patches[c->patch2].processor;
00246         if ( realPe2 != realPe1 ) {
00247           int realNode2 = CmiNodeOf(realPe2);
00248           if ( realNode2 != realNode1 ) {  // else did it already
00249             nodeSize = CmiNodeSize(realNode2);
00250             if ( nodeSize > 1 ) {
00251               int firstpe = CmiNodeFirst(realNode2);
00252               for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00253                 if INGROUP(rpe) {
00254                   p = &processors[rpe - beginGroup];
00255                   if ( p->available && ( p->load + c->load < minLoad ) ) {
00256                     minLoad = p->load + c->load;
00257                     good.c = c;
00258                     good.p = p;
00259                   }
00260                 }
00261               }
00262             }
00263           }
00264         }
00265         nextC.id++;
00266         c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00267       } // end of compute loop
00268 
00269       REASSIGN((&good))
00270     }
00271 
00272   // Try all pes on the physical nodes of the home patches
00273     if ( ! bestP && ( CmiNumPhysicalNodes() > 1 ) &&
00274          ( CmiNumPhysicalNodes() < CmiNumNodes() ) ) {  // else not useful
00275       double minLoad = overLoad * averageLoad;
00276       good.c = 0; good.p = 0;
00277       nextC.id = 0;
00278       c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00279       while(c) {
00280         int realPe1 = patches[c->patch1].processor;
00281         int realNode1 = CmiPhysicalNodeID(realPe1);
00282         int *rpelist;
00283         int nodeSize;
00284         CmiGetPesOnPhysicalNode(realNode1, &rpelist, &nodeSize);
00285         if ( nodeSize > 1 ) {  // else did it already
00286           for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00287             int rpe = rpelist[ipe];
00288             if INGROUP(rpe) {
00289               p = &processors[rpe - beginGroup];
00290               if ( p->available && ( p->load + c->load < minLoad ) ) {
00291                 minLoad = p->load + c->load;
00292                 good.c = c;
00293                 good.p = p;
00294               }
00295             }
00296           }
00297         }
00298         int realPe2 = patches[c->patch2].processor;
00299         if ( realPe2 != realPe1 ) {
00300           int realNode2 = CmiPhysicalNodeID(realPe2);
00301           if ( realNode2 != realNode1 ) {  // else did it already
00302             CmiGetPesOnPhysicalNode(realNode2, &rpelist, &nodeSize);
00303             if ( nodeSize > 1 ) {  // else did it already
00304               for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00305                 int rpe = rpelist[ipe];
00306                 if INGROUP(rpe) {
00307                   p = &processors[rpe - beginGroup];
00308                   if ( p->available && ( p->load + c->load < minLoad ) ) {
00309                     minLoad = p->load + c->load;
00310                     good.c = c;
00311                     good.p = p;
00312                   }
00313                 }
00314               }
00315             }
00316           }
00317         }
00318         nextC.id++;
00319         c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00320       } // end of compute loop
00321 
00322       REASSIGN((&good))
00323     }
00324 
00325     if(bestP) {
00326       if(bestP->load > averageLoad) {
00327         // CkPrintf("Acceptor %d became heavy%f %f\n", bestP->Id, bestP->load, overLoad*averageLoad);
00328         lightPes->remove(bestP);
00329       } else {
00330         // CkPrintf("Acceptor %d still light %f %f\n", bestP->Id, bestP->load, overLoad*averageLoad);
00331       }
00332       if(donor->load > overLoad*averageLoad) {
00333         // CkPrintf("Donor %d still heavy %f %f\n", donor->Id, donor->load, overLoad*averageLoad);
00334         heavyPes->insert((InfoRecord *) donor);
00335       }
00336       else {
00337         // CkPrintf("Donor %d became light %f %f\n", donor->Id, donor->load, overLoad*averageLoad);
00338         lightPes->insert((InfoRecord *) donor);
00339       }
00340       
00341       continue;
00342     }
00343     //else
00344       //CkPrintf("1st try failed\n");
00345 
00346     int found = 0;
00347 #if USE_TOPOMAP
00348     // if this fails, look at the inner brick
00349     int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00350     int dimNX, dimNY, dimNZ, dimNT;
00351     double minLoad;
00352 
00353     good.c = 0; good.p = 0;
00354     minLoad = overLoad*averageLoad;
00355     nextC.id = 0;
00356     c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00357  
00358     while(c) {    
00359       p1 = patches[c->patch1].processor;
00360       p2 = patches[c->patch2].processor;
00361 
00362       tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00363       tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00364       dimNX = tmgr.getDimNX();
00365       dimNY = tmgr.getDimNY();
00366       dimNZ = tmgr.getDimNZ();
00367       dimNT = tmgr.getDimNT();
00368 
00369       brickDim(x1, x2, dimNX, xm, xM);
00370       brickDim(y1, y2, dimNY, ym, yM);
00371       brickDim(z1, z2, dimNZ, zm, zM);
00372 
00373       // to expand the inner brick by some hops
00374 #if 0
00375       if(xm>=EXPAND_INNER_BRICK) xm=xm-EXPAND_INNER_BRICK; else xm=0;
00376       if(ym>=EXPAND_INNER_BRICK) ym=ym-EXPAND_INNER_BRICK; else ym=0;
00377       if(zm>=EXPAND_INNER_BRICK) zm=zm-EXPAND_INNER_BRICK; else zm=0;
00378 
00379       xM=xM+EXPAND_INNER_BRICK;
00380       yM=yM+EXPAND_INNER_BRICK;
00381       zM=zM+EXPAND_INNER_BRICK;
00382 #endif
00383 
00384       // first go over the processors inside the brick and choose the least 
00385       for(int i=xm; i<=xM; i++)
00386         for(int j=ym; j<=yM; j++)
00387           for(int k=zm; k<=zM; k++)
00388             for(int l=0; l<dimNT; l++)
00389             {
00390               pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00391               if ( ! INGROUP(pe) ) continue;
00392               p = &processors[pe - beginGroup];
00393               if(c->load + p->load < minLoad) {
00394                 minLoad = c->load + p->load;
00395                 good.c = c;
00396                 good.p = p;
00397               }
00398             }
00399       nextC.id++;
00400       c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00401     }
00402 
00403     if(good.c) {
00404       found = 1;
00405       //CkPrintf("2nd try succeeded\n");
00406     }
00407     else {
00408       found = 0;
00409       //CkPrintf("2nd try failed\n");
00410     }
00411 
00412     // if that also fails, look at the outer brick
00413     minLoad = overLoad * averageLoad;
00414     if(found==0) {
00415       good.c = 0; good.p = 0;
00416       p = 0;
00417 
00418       nextC.id = 0;
00419       c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00420       while(c) {
00421         p1 = patches[c->patch1].processor;
00422         p2 = patches[c->patch2].processor;
00423 
00424         tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00425         tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00426         dimNX = tmgr.getDimNX();
00427         dimNY = tmgr.getDimNY();
00428         dimNZ = tmgr.getDimNZ();
00429         dimNT = tmgr.getDimNT();
00430 
00431         brickDim(x1, x2, dimNX, xm, xM);
00432         brickDim(y1, y2, dimNY, ym, yM);
00433         brickDim(z1, z2, dimNZ, zm, zM);
00434 
00435         for(int i=xM+1; i<xm+dimNX; i++)
00436           for(int j=0; j<dimNY; j++)
00437             for(int k=0; k<dimNZ; k++)
00438               for(int l=0; l<dimNT; l++)
00439               {
00440                 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00441                 if ( ! INGROUP(pe) ) continue;
00442                 p = &processors[pe - beginGroup];
00443                 if(c->load + p->load < minLoad) {
00444                   good.c = c;
00445                   good.p = p;
00446                   found = 1; break;
00447                 }
00448               }
00449 
00450         if(found==1)
00451           break;
00452         else {
00453           for(int j=yM+1; j<ym+dimNY; j++)
00454             for(int i=xm; i<=xM; i++)
00455               for(int k=0; k<dimNZ; k++)
00456                 for(int l=0; l<dimNT; l++)
00457                 {
00458                   pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00459                   if ( ! INGROUP(pe) ) continue;
00460                   p = &processors[pe - beginGroup];
00461                   if(c->load + p->load < minLoad) {
00462                     good.c = c;
00463                     good.p = p;
00464                     found = 1; break;
00465                   }
00466                 }
00467         }
00468  
00469         if(found==1)
00470           break;
00471         else {
00472           for(int k=zM+1; k<zm+dimNZ; k++)
00473             for(int i=xm; i<=xM; i++)
00474               for(int j=ym; j<=yM; j++)
00475                 for(int l=0; l<dimNT; l++)
00476                 {
00477                   pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00478                   if ( ! INGROUP(pe) ) continue;
00479                   p = &processors[pe - beginGroup];
00480                   if(c->load + p->load < minLoad) {
00481                     good.c = c;
00482                     good.p = p;
00483                     found = 1; break;
00484                   }
00485                 }
00486         }
00487 
00488         if(found==1) break;
00489 
00490         nextC.id++;
00491         c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00492       } 
00493     }
00494 
00495     if(found == 1) {
00496       deAssign(good.c, donor);
00497       assign(good.c, good.p);
00498       if (good.p->load > averageLoad) lightPes->remove(good.p);
00499       if (donor->load > overLoad*averageLoad)
00500         heavyPes->insert((InfoRecord *) donor);
00501       else
00502         lightPes->insert((InfoRecord *) donor);
00503       continue;
00504     }
00505 
00506 #endif /* USE_TOPOMAP */
00507 
00508     // find the first processor to place the compute on
00509     p = (processorInfo *)lightPes->iterator((Iterator *) &nextP);
00510     if(found == 0) {
00511      while (p)
00512       {
00513         nextC.id = 0;
00514         c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00515         while (c)
00516         {
00517           selectPes(p, c);
00518           nextC.id++;
00519           c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00520         }
00521         p = (processorInfo *)lightPes->next((Iterator *) &nextP);
00522       }
00523 
00524       bestP = 0;
00525       REASSIGN(bestPe[5])
00526 #if USE_TOPOMAP
00527       else REASSIGN(goodPe[5])
00528 #endif
00529       else REASSIGN(bestPe[4])
00530 #if USE_TOPOMAP
00531       else REASSIGN(goodPe[4])
00532 #endif
00533       else REASSIGN(bestPe[3])
00534 #if USE_TOPOMAP
00535       else REASSIGN(goodPe[3])
00536 #endif
00537       else REASSIGN(bestPe[1])
00538 #if USE_TOPOMAP
00539       else REASSIGN(goodPe[1])
00540 #endif
00541       else REASSIGN(bestPe[2])
00542 #if USE_TOPOMAP
00543       else REASSIGN(goodPe[2])
00544 #endif
00545       else REASSIGN(bestPe[0])
00546 #if USE_TOPOMAP
00547       else REASSIGN(goodPe[0])
00548 #endif
00549     }
00550 
00551     if(bestP) {
00552       if(bestP->load > averageLoad) lightPes->remove(bestP);
00553       if(donor->load > overLoad*averageLoad)
00554         heavyPes->insert((InfoRecord *) donor);
00555       else
00556         lightPes->insert((InfoRecord *) donor);
00557       continue;
00558     }
00559     else {
00560       done = 0;
00561       break;
00562     }
00563  
00564   } // end of while loop
00565 
00566 #if LDB_DEBUG
00567   iout << "After Refinement Summary\n" << endi;
00568   printSummary();
00569 #endif
00570 
00571   delete heavyPes;
00572   delete lightPes;
00573 
00574   return done;
00575 }


The documentation for this class was generated from the following files:
Generated on Tue Nov 21 01:17:20 2017 for NAMD by  doxygen 1.4.7