Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

RefineTorusLB.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /home/cvs/namd/cvsroot/namd2/src/RefineTorusLB.C,v $
00003  * $Author: bhatele $
00004  * $Date: 2008/08/28 03:37:35 $
00005  * $Revision: 1.16 $
00006  *****************************************************************************/
00007 
00015 #include "RefineTorusLB.h"
00016 #define EXPAND_INNER_BRICK 2
00017 
00018 RefineTorusLB::RefineTorusLB(computeInfo *cs, patchInfo *pas, processorInfo *pes, int ncs, 
00019 int npas, int npes, int flag) : Rebalancer(cs, pas, pes, ncs, npas, npes)
00020 {
00021   if(flag==1) {
00022     strategyName = "RefineTorusLB";
00023     strategy();
00024     // CREATE THE SPANNING TREE IN THE LOAD BALANCER
00025 #if 0
00026     if(proxySendSpanning || proxyRecvSpanning) {
00027     for(int i=0; i<4; i++) {
00028       decrSTLoad();
00029       computeAverage();
00030       createSpanningTree();
00031       incrSTLoad();
00032       // for(int i=0; i<P; i++)
00033       //   delete [] processors[i].proxyUsage;
00034       InitProxyUsage();
00035       binaryRefine();
00036       printLoads();
00037       createSpanningTree();
00038     }
00039     }
00040 #endif
00041   }
00042 }
00043 
00044 RefineTorusLB::~RefineTorusLB() { }
00045 
00046 void RefineTorusLB::strategy() {
00047   firstAssignInRefine = 0;
00048   for(int i=0; i<numComputes; i++)
00049     assign((computeInfo *) &(computes[i]), (processorInfo *) &(processors[computes[i].oldProcessor]));
00050   firstAssignInRefine = 1;
00051 
00052   binaryRefine();
00053   printLoads();
00054 }
00055 
00056 void RefineTorusLB::binaryRefine() {
00057   // compute the max and average load
00058   computeAverage();
00059   double max = computeMax();
00060 
00061   double step = 0.01, start = 1.05;
00062   double dCurLoad = max/averageLoad;
00063   int curLoad;
00064   int minLoad = 0;
00065   int maxLoad = (int)((dCurLoad - start)/step + 1);
00066   double dMinLoad = minLoad * step + start;
00067   double dMaxLoad = maxLoad * step + start;
00068 
00069   // check the two limits of the search: start and dMaxLoad
00070   int done=0;
00071   overLoad = dMinLoad;
00072   if(newRefine())
00073     done = 1;
00074   else {
00075     overLoad = dMaxLoad;
00076     if(!newRefine()) {
00077       CkPrintf("Error: Could not refine at max overload\n");
00078       done = 1;
00079     }
00080   } 
00081 
00082   // do a binary search between start and dMaxLoad until we succeed
00083   while(!done) {
00084     if(maxLoad - minLoad <= 1)
00085       done = 1;
00086     else {
00087       curLoad = (maxLoad + minLoad)/2;
00088       overLoad = curLoad * step + start;
00089       if(newRefine())
00090         maxLoad = curLoad;
00091       else
00092         minLoad = curLoad;
00093     }
00094   }
00095 }
00096 
00097 int RefineTorusLB::newRefine() {
00098   int done = 1;
00099   maxHeap *heavyPes = new maxHeap(P);
00100   Set *lightPes = new Set();
00101   processorInfo *donor, *p, *bestP;
00102   computeInfo *c;
00103   Iterator nextC, nextP;
00104   pcpair good;
00105   double thresholdLoad = overLoad * averageLoad;
00106 
00107   // create a heap and set of heavy and light pes respectively
00108   for(int i=0; i<P; i++) {
00109     if (processors[i].load > thresholdLoad)
00110       heavyPes->insert((InfoRecord *) &(processors[i]));
00111     else
00112       lightPes->insert((InfoRecord *) &(processors[i]));
00113   }
00114 
00115   pcpair pcpairarray[12];
00116      
00117   for(int j=0; j<6; j++) {
00118     bestPe[j] = &pcpairarray[j];    // new pcpair();
00119     goodPe[j] = &pcpairarray[j+6];  // new pcpair();
00120   }
00121 
00122   while(1) {
00123     while(donor = (processorInfo*)heavyPes->deleteMax())
00124       if(donor->computeSet.numElements())
00125         break;
00126     
00127     if(!donor) break;
00128 
00129     for(int j=0; j<6; j++) {
00130       bestPe[j]->reset();
00131       goodPe[j]->reset();
00132     }
00133 
00134     nextC.id = 0;
00135     c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00136 
00137     while(c) {
00138       // Look at the processors which have the compute's patches first
00139       p = &processors[patches[c->patch1].processor];        // patch 1
00140       selectPes(p, c);
00141       p = &processors[patches[c->patch2].processor];        // patch 2
00142       selectPes(p, c);
00143 
00144       // Try the processors which have the patches' proxies
00145       p = (processorInfo *)(patches[c->patch1].proxiesOn.iterator((Iterator *)&nextP));
00146       while(p) {                                            // patch 1
00147         selectPes(p, c);
00148         p = (processorInfo *)(patches[c->patch1].proxiesOn.next((Iterator *)&nextP));
00149       }
00150   
00151       p = (processorInfo *)(patches[c->patch2].proxiesOn.iterator((Iterator *)&nextP));
00152       while(p) {                                            //patch 2
00153         selectPes(p, c);
00154         p = (processorInfo *)(patches[c->patch2].proxiesOn.next((Iterator *)&nextP));
00155       }
00156       
00157       nextC.id++;
00158       c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00159     } // end of compute loop
00160 
00161 #define REASSIGN(GRID) if (GRID->c) { deAssign(GRID->c, donor); \
00162         assign(GRID->c, GRID->p); bestP = GRID->p; }
00163 
00164     bestP = 0;
00165     // see if we have found a compute processor pair
00166     REASSIGN(bestPe[5])
00167 #if USE_TOPOMAP
00168     else REASSIGN(goodPe[5])
00169 #endif
00170     else REASSIGN(bestPe[4])
00171 #if USE_TOPOMAP
00172     else REASSIGN(goodPe[4])
00173 #endif
00174     else REASSIGN(bestPe[3])
00175 #if USE_TOPOMAP
00176     else REASSIGN(goodPe[3])
00177 #endif
00178     else REASSIGN(bestPe[1])
00179 #if USE_TOPOMAP
00180     else REASSIGN(goodPe[1])
00181 #endif
00182     else REASSIGN(bestPe[2])
00183 #if USE_TOPOMAP
00184     else REASSIGN(goodPe[2])
00185 #endif
00186     else REASSIGN(bestPe[0])
00187 #if USE_TOPOMAP
00188     else REASSIGN(goodPe[0])
00189 #endif
00190 
00191     if(bestP) {
00192       if(bestP->load > averageLoad) {
00193         // CkPrintf("Acceptor %d became heavy%f %f\n", bestP->Id, bestP->load, overLoad*averageLoad);
00194         lightPes->remove(bestP);
00195       } else {
00196         // CkPrintf("Acceptor %d still light %f %f\n", bestP->Id, bestP->load, overLoad*averageLoad);
00197       }
00198       if(donor->load > overLoad*averageLoad) {
00199         // CkPrintf("Donor %d still heavy %f %f\n", donor->Id, donor->load, overLoad*averageLoad);
00200         heavyPes->insert((InfoRecord *) donor);
00201       }
00202       else {
00203         // CkPrintf("Donor %d became light %f %f\n", donor->Id, donor->load, overLoad*averageLoad);
00204         lightPes->insert((InfoRecord *) donor);
00205       }
00206       
00207       continue;
00208     }
00209     //else
00210       //CkPrintf("1st try failed\n");
00211 
00212     int found = 0;
00213 #if USE_TOPOMAP
00214     // if this fails, look at the inner brick
00215     int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00216     int dimNX, dimNY, dimNZ, dimNT;
00217     double minLoad;
00218 
00219     good.c = 0; good.p = 0;
00220     minLoad = overLoad*averageLoad;
00221     nextC.id = 0;
00222     c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00223  
00224     while(c) {    
00225       p1 = patches[c->patch1].processor;
00226       p2 = patches[c->patch2].processor;
00227 
00228       tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00229       tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00230       dimNX = tmgr.getDimNX();
00231       dimNY = tmgr.getDimNY();
00232       dimNZ = tmgr.getDimNZ();
00233       dimNT = tmgr.getDimNT();
00234 
00235       brickDim(x1, x2, dimNX, xm, xM);
00236       brickDim(y1, y2, dimNY, ym, yM);
00237       brickDim(z1, z2, dimNZ, zm, zM);
00238 
00239       // to expand the inner brick by some hops
00240 #if 0
00241       if(xm>=EXPAND_INNER_BRICK) xm=xm-EXPAND_INNER_BRICK; else xm=0;
00242       if(ym>=EXPAND_INNER_BRICK) ym=ym-EXPAND_INNER_BRICK; else ym=0;
00243       if(zm>=EXPAND_INNER_BRICK) zm=zm-EXPAND_INNER_BRICK; else zm=0;
00244 
00245       xM=xM+EXPAND_INNER_BRICK;
00246       yM=yM+EXPAND_INNER_BRICK;
00247       zM=zM+EXPAND_INNER_BRICK;
00248 #endif
00249 
00250       // first go over the processors inside the brick and choose the least 
00251       for(int i=xm; i<=xM; i++)
00252         for(int j=ym; j<=yM; j++)
00253           for(int k=zm; k<=zM; k++)
00254             for(int l=0; l<dimNT; l++)
00255             {
00256               pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00257               p = &processors[pe];
00258               if(c->load + p->load < minLoad) {
00259                 minLoad = c->load + p->load;
00260                 good.c = c;
00261                 good.p = p;
00262               }
00263             }
00264       nextC.id++;
00265       c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00266     }
00267 
00268     if(good.c) {
00269       found = 1;
00270       //CkPrintf("2nd try succeeded\n");
00271     }
00272     else {
00273       found = 0;
00274       //CkPrintf("2nd try failed\n");
00275     }
00276 
00277     // if that also fails, look at the outer brick
00278     minLoad = overLoad * averageLoad;
00279     if(found==0) {
00280       good.c = 0; good.p = 0;
00281       p = 0;
00282 
00283       nextC.id = 0;
00284       c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00285       while(c) {
00286         p1 = patches[c->patch1].processor;
00287         p2 = patches[c->patch2].processor;
00288 
00289         tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00290         tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00291         dimNX = tmgr.getDimNX();
00292         dimNY = tmgr.getDimNY();
00293         dimNZ = tmgr.getDimNZ();
00294         dimNT = tmgr.getDimNT();
00295 
00296         brickDim(x1, x2, dimNX, xm, xM);
00297         brickDim(y1, y2, dimNY, ym, yM);
00298         brickDim(z1, z2, dimNZ, zm, zM);
00299 
00300         for(int i=xM+1; i<xm+dimNX; i++)
00301           for(int j=0; j<dimNY; j++)
00302             for(int k=0; k<dimNZ; k++)
00303               for(int l=0; l<dimNT; l++)
00304               {
00305                 pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00306                 p = &processors[pe];
00307                 if(c->load + p->load < minLoad) {
00308                   good.c = c;
00309                   good.p = p;
00310                   found = 1; break;
00311                 }
00312               }
00313 
00314         if(found==1)
00315           break;
00316         else {
00317           for(int j=yM+1; j<ym+dimNY; j++)
00318             for(int i=xm; i<=xM; i++)
00319               for(int k=0; k<dimNZ; k++)
00320                 for(int l=0; l<dimNT; l++)
00321                 {
00322                   pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00323                   p = &processors[pe];
00324                   if(c->load + p->load < minLoad) {
00325                     good.c = c;
00326                     good.p = p;
00327                     found = 1; break;
00328                   }
00329                 }
00330         }
00331  
00332         if(found==1)
00333           break;
00334         else {
00335           for(int k=zM+1; k<zm+dimNZ; k++)
00336             for(int i=xm; i<=xM; i++)
00337               for(int j=ym; j<=yM; j++)
00338                 for(int l=0; l<dimNT; l++)
00339                 {
00340                   pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00341                   p = &processors[pe];
00342                   if(c->load + p->load < minLoad) {
00343                     good.c = c;
00344                     good.p = p;
00345                     found = 1; break;
00346                   }
00347                 }
00348         }
00349 
00350         if(found==1) break;
00351 
00352         nextC.id++;
00353         c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00354       } 
00355     }
00356 
00357     if(found == 1) {
00358       deAssign(good.c, donor);
00359       assign(good.c, good.p);
00360       if (good.p->load > averageLoad) lightPes->remove(good.p);
00361       if (donor->load > overLoad*averageLoad)
00362         heavyPes->insert((InfoRecord *) donor);
00363       else
00364         lightPes->insert((InfoRecord *) donor);
00365       continue;
00366     }
00367 
00368 #endif /* USE_TOPOMAP */
00369 
00370     // find the first processor to place the compute on
00371     p = (processorInfo *)lightPes->iterator((Iterator *) &nextP);
00372     if(found == 0) {
00373      while (p)
00374       {
00375         nextC.id = 0;
00376         c = (computeInfo *)donor->computeSet.iterator((Iterator *)&nextC);
00377         while (c)
00378         {
00379           selectPes(p, c);
00380           nextC.id++;
00381           c = (computeInfo *) donor->computeSet.next((Iterator *)&nextC);
00382         }
00383         p = (processorInfo *)lightPes->next((Iterator *) &nextP);
00384       }
00385 
00386       bestP = 0;
00387       REASSIGN(bestPe[5])
00388 #if USE_TOPOMAP
00389       else REASSIGN(goodPe[5])
00390 #endif
00391       else REASSIGN(bestPe[4])
00392 #if USE_TOPOMAP
00393       else REASSIGN(goodPe[4])
00394 #endif
00395       else REASSIGN(bestPe[3])
00396 #if USE_TOPOMAP
00397       else REASSIGN(goodPe[3])
00398 #endif
00399       else REASSIGN(bestPe[1])
00400 #if USE_TOPOMAP
00401       else REASSIGN(goodPe[1])
00402 #endif
00403       else REASSIGN(bestPe[2])
00404 #if USE_TOPOMAP
00405       else REASSIGN(goodPe[2])
00406 #endif
00407       else REASSIGN(bestPe[0])
00408 #if USE_TOPOMAP
00409       else REASSIGN(goodPe[0])
00410 #endif
00411     }
00412 
00413     if(bestP) {
00414       if(bestP->load > averageLoad) lightPes->remove(bestP);
00415       if(donor->load > overLoad*averageLoad)
00416         heavyPes->insert((InfoRecord *) donor);
00417       else
00418         lightPes->insert((InfoRecord *) donor);
00419       continue;
00420     }
00421     else {
00422       done = 0;
00423       break;
00424     }
00425  
00426   } // end of while loop
00427 
00428   delete heavyPes;
00429   delete lightPes;
00430 
00431   return done;
00432 }
00433 
00434 void RefineTorusLB::selectPes(processorInfo *p, computeInfo *c) {
00435   if(p->available == CmiFalse)
00436     return;
00437 
00438   // find the position in bestPe/goodPe to place this pair
00439   // HP HP HP HP HP HP
00440   // 02 11 20 01 10 00
00441   //  5  4  3  2  1  0
00442   int numPatches, numProxies, badForComm, index;
00443   numAvailable(c, p, &numPatches, &numProxies, &badForComm); 
00444   index = ((numPatches==2) ? (numPatches+1) : numPatches) + (numProxies * 2 + 1);
00445 
00446   if(numPatches==0 && numProxies==1)
00447     index--;
00448   if(numProxies==0)
00449     index--; 
00450   /*if(numPatches == 2 || numProxies == 2) index = 5;
00451   if(numPatches == 1 && numProxies == 1) index = 5;
00452   else if(numPatches == 1 || numProxies == 1) index = 4;
00453   else index = 3;*/
00454 
00455 #if USE_TOPOMAP
00456   int x, y, z, t;
00457   int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00458   int dimNX, dimNY, dimNZ, dimNT;
00459   double minLoad;
00460   p1 = patches[c->patch1].processor;
00461   p2 = patches[c->patch2].processor;
00462 
00463   tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00464   tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00465   dimNX = tmgr.getDimNX();
00466   dimNY = tmgr.getDimNY();
00467   dimNZ = tmgr.getDimNZ();
00468   dimNT = tmgr.getDimNT();
00469 
00470   brickDim(x1, x2, dimNX, xm, xM);
00471   brickDim(y1, y2, dimNY, ym, yM);
00472   brickDim(z1, z2, dimNZ, zm, zM);
00473 #endif
00474 
00475   if(p->load + c->load < overLoad * averageLoad) {
00476 #if USE_TOPOMAP
00477     tmgr.rankToCoordinates(p->Id, x, y, z, t);
00478     int wB = withinBrick(x, y, z, xm, xM, dimNX, ym, yM, dimNY, zm, zM, dimNZ);
00479     if(wB) {
00480 #endif
00481       pcpair* &newp = bestPe[index];
00482 
00483       if (!(newp->c) || ((p->load + c->load) < (newp->p->load + newp->c->load))) {
00484         newp->p = p;
00485         newp->c = c;
00486       } 
00487 #if USE_TOPOMAP
00488     } else {
00489       pcpair* &newp = goodPe[index];
00490       double loadDiff = newp->p->load + newp->c->load - p->load - c->load;
00491       if (loadDiff<0) loadDiff *= (-1);
00492 
00493       if (!(newp->c) || ( loadDiff > 0.4 * averageLoad && (p->load + c->load) < (newp->p->load + newp->c->load) ) ) {
00494         newp->p = p;
00495         newp->c = c;
00496       } else {
00497         if (tmgr.getHopsBetweenRanks(p->Id, p1) + tmgr.getHopsBetweenRanks(p->Id, p2) < tmgr.getHopsBetweenRanks((newp->p)->Id, p1) + tmgr.getHopsBetweenRanks((newp->p)->Id, p2)) {
00498           newp->p = p;
00499           newp->c = c;
00500         }
00501       }
00502     }
00503 #endif
00504   }
00505 }
00506 

Generated on Sat Sep 6 04:07:42 2008 for NAMD by  doxygen 1.3.9.1