TorusLB.C

Go to the documentation of this file.
00001 /*****************************************************************************
00002  * $Source: /home/cvs/namd/cvsroot/namd2/src/TorusLB.C,v $
00003  * $Author: jim $
00004  * $Date: 2013/08/30 18:18:20 $
00005  * $Revision: 1.35 $
00006  *****************************************************************************/
00007  
00015 #include "TorusLB.h"
00016 #include "ProxyMgr.h"
00017 #define SHRINK_INNER_BRICK 1
00018 
00019 TorusLB::TorusLB(computeInfo *cs, patchInfo *pas, processorInfo *pes, int ncs, 
00020 int npas, int npes) : RefineTorusLB(cs, pas, pes, ncs, npas, npes, 0)
00021 {
00022   strategyName = "TorusLB";
00023   strategy();
00024   //if ( computeMax() <= origMaxLoad ) {
00025   //  binaryRefine();
00026   //  printLoads();
00027   //}
00028   // CREATE THE SPANNING TREE IN THE LOAD BALANCER
00029   //if(proxySendSpanning || proxyRecvSpanning)
00030   //  createSpanningTree();
00031 }
00032 
00033 TorusLB::~TorusLB() { }
00034 
00035 void TorusLB::strategy() {
00036   int index;
00037   // compute the average load by (compute load + background load) / numPesAvailable
00038   computeAverage();
00039   // two heaps of self and pair computes
00040   makeTwoHeaps();
00041 
00042   const int beginGroup = processors[0].Id;
00043   const int endGroup = beginGroup + P;
00044 #define INGROUP(PROC) ((PROC) >= beginGroup && (PROC) < endGroup)
00045 
00046   computeInfo *c;
00047   processorInfo *p, *minp;
00048   Iterator nextP;
00049   overLoad = 1.2;
00050 
00051   for(int I=0; I<numComputes; I++) {
00052 
00053   c = (computeInfo *) computePairHeap->deleteMax();
00054   if ( ! c ) c = (computeInfo *) computeSelfHeap->deleteMax(); 
00055 
00056   if(c->processor != -1) continue; // go to the next compute
00057   if(!c) CkAbort("TorusLB: Compute Heap empty!\n");
00058 
00059   for(int j=0; j<6; j++) {
00060     bestPe[j] = 0;
00061     goodPe[j] = 0;
00062     badPe[j] = 0;
00063   }
00064 
00065   // Look at pes which have the compute's patches
00066 
00067   // HYBRID check if processor is in local group
00068 #define SELECT_REALPE(X) if INGROUP((X)) { \
00069   selectPes(&processors[(X) - beginGroup], c); \
00070   }
00071 
00072   const int realPe1 = patches[c->patch1].processor;
00073   SELECT_REALPE(realPe1)
00074 
00075   const int realPe2 = patches[c->patch2].processor;
00076   if ( realPe2 != realPe1 ) {
00077     SELECT_REALPE(realPe2)
00078   }
00079 
00080   // Try the processors which have the patches' proxies
00081   p = (processorInfo *)(patches[c->patch1].proxiesOn.iterator((Iterator *)&nextP));
00082   while(p) {                                            // patch 1
00083     if INGROUP(p->Id) selectPes(p, c);
00084     p = (processorInfo *)(patches[c->patch1].proxiesOn.next((Iterator *)&nextP));
00085   } 
00086 
00087   p = (processorInfo *)(patches[c->patch2].proxiesOn.iterator((Iterator *)&nextP));
00088   while(p) {                                            // patch 2
00089     if INGROUP(p->Id) selectPes(p, c);
00090     p = (processorInfo *)(patches[c->patch2].proxiesOn.next((Iterator *)&nextP));
00091   }
00092 
00093   // see if we have found a processor to place the compute on
00094   p = 0;
00095   if((p = bestPe[5])
00096 #if USE_TOPOMAP
00097   || (p = goodPe[5])
00098 #endif
00099   || (p = bestPe[4])
00100 #if USE_TOPOMAP
00101   || (p = goodPe[4])
00102 #endif
00103   || (p = bestPe[3])
00104 #if USE_TOPOMAP
00105   || (p = goodPe[3])
00106 #endif
00107   || (p = bestPe[1])
00108 #if USE_TOPOMAP
00109   || (p = goodPe[1])
00110 #endif
00111   || (p = bestPe[2])
00112 #if USE_TOPOMAP
00113   || (p = goodPe[2])
00114 #endif
00115   || (p = bestPe[0])
00116 #if USE_TOPOMAP
00117   || (p = goodPe[0])
00118 #endif
00119   ) {
00120     assign(c, p);
00121     continue;
00122   }
00123 
00124     // Try all pes on the nodes of the home patches
00125     if ( CmiNumNodes() > 1 ) {  // else not useful
00126       double minLoad = overLoad * averageLoad;
00127       minp = 0;
00128       int realNode1 = CmiNodeOf(realPe1);
00129       int nodeSize = CmiNodeSize(realNode1);
00130       if ( nodeSize > 1 ) {  // else did it already
00131         int firstpe = CmiNodeFirst(realNode1);
00132         for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00133           if INGROUP(rpe) {
00134             p = &processors[rpe - beginGroup];
00135             if ( p->available && ( p->load + c->load < minLoad ) ) {
00136               minLoad = p->load + c->load;
00137               minp = p;
00138             }
00139           }
00140         }
00141       }
00142       if ( realPe2 != realPe1 ) {
00143         int realNode2 = CmiNodeOf(realPe2);
00144         if ( realNode2 != realNode1 ) {  // else did it already
00145           nodeSize = CmiNodeSize(realNode2);
00146           if ( nodeSize > 1 ) {
00147             int firstpe = CmiNodeFirst(realNode2);
00148             for ( int rpe = firstpe; rpe < firstpe+nodeSize; ++rpe ) {
00149               if INGROUP(rpe) {
00150                 p = &processors[rpe - beginGroup];
00151                 if ( p->available && ( p->load + c->load < minLoad ) ) {
00152                   minLoad = p->load + c->load;
00153                   minp = p;
00154                 }
00155               }
00156             }
00157           }
00158         }
00159       }
00160       if(minp) {
00161         assign(c, minp);
00162         continue;
00163       }
00164     }
00165 
00166     // Try all pes on the physical nodes of the home patches
00167     if ( ( CmiNumPhysicalNodes() > 1 ) &&
00168          ( CmiNumPhysicalNodes() < CmiNumNodes() ) ) {  // else not useful
00169       double minLoad = overLoad * averageLoad;
00170       minp = 0;
00171       int realNode1 = CmiPhysicalNodeID(realPe1);
00172       int *rpelist;
00173       int nodeSize;
00174       CmiGetPesOnPhysicalNode(realNode1, &rpelist, &nodeSize);
00175       if ( nodeSize > 1 ) {  // else did it already
00176         for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00177           int rpe = rpelist[ipe];
00178           if INGROUP(rpe) {
00179             p = &processors[rpe - beginGroup];
00180             if ( p->available && ( p->load + c->load < minLoad ) ) {
00181               minLoad = p->load + c->load;
00182               minp = p;
00183             }
00184           }
00185         }
00186       }
00187       if ( realPe2 != realPe1 ) {
00188         int realNode2 = CmiPhysicalNodeID(realPe2);
00189         if ( realNode2 != realNode1 ) {  // else did it already
00190           CmiGetPesOnPhysicalNode(realNode2, &rpelist, &nodeSize);
00191           if ( nodeSize > 1 ) {  // else did it already
00192             for ( int ipe = 0; ipe < nodeSize; ++ipe ) {
00193               int rpe = rpelist[ipe];
00194               if INGROUP(rpe) {
00195                 p = &processors[rpe - beginGroup];
00196                 if ( p->available && ( p->load + c->load < minLoad ) ) {
00197                   minLoad = p->load + c->load;
00198                   minp = p;
00199                 }
00200               }
00201             }
00202           }
00203         }
00204       }
00205       if(minp) {
00206         assign(c, minp);
00207         continue;
00208       }
00209     }
00210 
00211  
00212   int found = 0;
00213 #if USE_TOPOMAP
00214   // If no processor found, go through the whole list in a topological fashion
00215   // first try the inner brick
00216   int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00217   int dimNX, dimNY, dimNZ, dimNT;
00218   double minLoad;
00219   p1 = patches[c->patch1].processor;
00220   p2 = patches[c->patch2].processor;
00221 
00222   tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00223   tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00224   dimNX = tmgr.getDimNX();
00225   dimNY = tmgr.getDimNY();
00226   dimNZ = tmgr.getDimNZ();
00227   dimNT = tmgr.getDimNT();
00228 
00229   brickDim(x1, x2, dimNX, xm, xM);
00230   brickDim(y1, y2, dimNY, ym, yM);
00231   brickDim(z1, z2, dimNZ, zm, zM);
00232 
00233   // to shrink the inner brick by some hops
00234 #if 0
00235   xm=xm+SHRINK_INNER_BRICK;
00236   ym=ym+SHRINK_INNER_BRICK;
00237   zm=zm+SHRINK_INNER_BRICK;
00238 
00239   xM=xM-SHRINK_INNER_BRICK;
00240   yM=yM-SHRINK_INNER_BRICK;
00241   zM=zM-SHRINK_INNER_BRICK;
00242 #endif
00243 
00244   // first go over the processors inside the brick and choose the least 
00245   // overloaded one
00246   p = 0; minp = 0;
00247   minLoad = overLoad * averageLoad;
00248   for(int i=xm; i<=xM; i++)
00249     for(int j=ym; j<=yM; j++)
00250       for(int k=zm; k<=zM; k++)
00251         for(int l=0; l<dimNT; l++)
00252         {
00253           pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00254           if ( ! INGROUP(pe) ) continue;
00255           p = &processors[pe - beginGroup];
00256           if(c->load + p->load < minLoad) { 
00257             minLoad = c->load + p->load;
00258             minp = p;
00259             found = 1;
00260           }
00261         }
00262 
00263   // if no success, then go through the remaining torus (outer brick)
00264   // and pick the first underloaded one
00265   minLoad = overLoad * averageLoad;
00266   if(found == 0) {
00267     p = 0; minp = 0;
00268     for(int i=xM+1; i<xm+dimNX; i++)
00269       for(int j=0; j<dimNY; j++)
00270         for(int k=0; k<dimNZ; k++)
00271           for(int l=0; l<dimNT; l++)
00272           {
00273             pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00274             if ( ! INGROUP(pe) ) continue;
00275             p = &processors[pe - beginGroup];
00276             if(c->load + p->load < minLoad) { 
00277               minp = p;
00278               found = 1; break;
00279             }
00280           }
00281   }
00282 
00283   if(found == 0) {
00284     for(int j=yM+1; j<ym+dimNY; j++)
00285       for(int i=xm; i<=xM; i++)
00286         for(int k=0; k<dimNZ; k++)
00287           for(int l=0; l<dimNT; l++)
00288           {
00289             pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00290             if ( ! INGROUP(pe) ) continue;
00291             p = &processors[pe - beginGroup];
00292             if(c->load + p->load < minLoad) { 
00293               minp = p;
00294               found = 1; break;
00295             }
00296           }
00297   }
00298 
00299   if(found == 0) {
00300     for(int k=zM+1; k<zm+dimNZ; k++)
00301       for(int i=xm; i<=xM; i++)
00302         for(int j=ym; j<=yM; j++)
00303           for(int l=0; l<dimNT; l++)
00304           {
00305             pe = tmgr.coordinatesToRank(i%dimNX, j%dimNY, k%dimNZ, l);
00306             if ( ! INGROUP(pe) ) continue;
00307             p = &processors[pe - beginGroup];
00308             if(c->load + p->load < minLoad) { 
00309               minp = p;
00310               found = 1; break;
00311             }
00312           }
00313   }
00314 
00315   
00316   if(found == 1) {
00317     assign(c, minp);
00318     continue;
00319   }
00320 
00321 #endif /* USE_TOPOMAP */
00322 
00323   if(found == 0) {
00324     heapIterator nextp;
00325     processorInfo *p = (processorInfo *)(pes->iterator((heapIterator *) &nextp));
00326     while (p) {
00327       selectPes(p, c);
00328       p = (processorInfo *)(pes->next(&nextp));
00329     }
00330     p = 0;
00331     if((p = bestPe[5])
00332 #if USE_TOPOMAP
00333     || (p = goodPe[5])
00334 #endif
00335     || (p = bestPe[4])
00336 #if USE_TOPOMAP
00337     || (p = goodPe[4])
00338 #endif
00339     || (p = bestPe[3])
00340 #if USE_TOPOMAP
00341     || (p = goodPe[3])
00342 #endif
00343     || (p = bestPe[1])
00344 #if USE_TOPOMAP
00345     || (p = goodPe[1])
00346 #endif
00347     || (p = bestPe[2])
00348 #if USE_TOPOMAP
00349     || (p = goodPe[2])
00350 #endif
00351     || (p = bestPe[0])
00352 #if USE_TOPOMAP
00353     || (p = goodPe[0])
00354 #endif
00355     ) {
00356       assign(c, p);
00357       found = 1;
00358       continue;
00359     }
00360   }
00361 
00362   if(found == 0) {
00363     p = 0;
00364     if((p = badPe[5])
00365     || (p = badPe[4])
00366     || (p = badPe[3])
00367     || (p = badPe[1])
00368     || (p = badPe[2])
00369     || (p = badPe[0])) {
00370       assign(c, p);
00371       found = 1;
00372       continue;
00373     }
00374   }
00375 
00376   if(found == 0) {
00377      CkPrintf("TorusLB: No receiver found average %f overload %f\n", averageLoad, overLoad);
00378      CkAbort("TorusLB: No receiver found\n");
00379   }
00380  
00381   } // end of computes for-loop
00382 
00383   printLoads(3);
00384 }
00385 
00386 void TorusLB::selectPes(processorInfo *p, computeInfo *c) {
00387   if (p->available == false)
00388     return;
00389 
00390   // find the position in bestPe/goodPe to place this pair
00391   // HP HP HP HP HP HP
00392   // 02 11 20 01 10 00
00393   //  5  4  3  2  1  0
00394   int numPatches, numProxies, /* badForComm, */ index;
00395   numAvailable(c, p, &numPatches, &numProxies, 0 /* &badForComm */); 
00396   int numEither = numPatches + numProxies;
00397   index = (numEither*(numEither+1))/2 + numProxies;
00398 
00399 #if USE_TOPOMAP
00400   int x, y, z, t;
00401   int p1, p2, pe, x1, x2, xm, xM, y1, y2, ym, yM, z1, z2, zm, zM, t1, t2;
00402   int dimNX, dimNY, dimNZ, dimNT;
00403   double minLoad;
00404   p1 = patches[c->patch1].processor;
00405   p2 = patches[c->patch2].processor;
00406 
00407   tmgr.rankToCoordinates(p1, x1, y1, z1, t1);
00408   tmgr.rankToCoordinates(p2, x2, y2, z2, t2);
00409   dimNX = tmgr.getDimNX();
00410   dimNY = tmgr.getDimNY();
00411   dimNZ = tmgr.getDimNZ();
00412   dimNT = tmgr.getDimNT();
00413 
00414   brickDim(x1, x2, dimNX, xm, xM);
00415   brickDim(y1, y2, dimNY, ym, yM);
00416   brickDim(z1, z2, dimNZ, zm, zM);
00417 #endif
00418 
00419   if (p->load + c->load < overLoad * averageLoad) {
00420   // replace only if the new processor is underloaded
00421 #if USE_TOPOMAP
00422     tmgr.rankToCoordinates(p->Id, x, y, z, t);
00423     int wB = withinBrick(x, y, z, xm, xM, dimNX, ym, yM, dimNY, zm, zM, dimNZ);
00424     if (wB) {
00425       // if within the brick formed by the patch processors
00426 #endif
00427       // or the non-topology case
00428       processorInfo* &oldp = bestPe[index];
00429       if (!(oldp) || p->load < oldp->load )
00430         oldp = p;
00431 #if USE_TOPOMAP
00432     } else {
00433       // if outside the brick formed by the patch processors
00434       processorInfo* &oldp = goodPe[index];
00435       double loadDiff = 0.0;
00436       
00437       if (!(oldp)) {
00438         // replace if there is no processor at that position
00439         oldp = p;
00440       }
00441       else {
00442         // get the load difference if the processor exixts
00443         loadDiff = oldp->load - p->load;
00444         if ((loadDiff > 0.4) || (loadDiff > 0.0 && (tmgr.getHopsBetweenRanks(p->Id, p1) + 
00445             tmgr.getHopsBetweenRanks(p->Id, p2) < tmgr.getHopsBetweenRanks(oldp->Id, p1) + 
00446             tmgr.getHopsBetweenRanks(oldp->Id, p2)))) {
00447           // if weights are similar, look at the hops
00448           oldp = p;
00449         }
00450       }
00451     }
00452 #endif
00453   } else {
00454     // for the first placement, we must find a processor to place
00455     // the compute on, so choose a bad processor if necessary
00456     processorInfo* &oldp = badPe[index];
00457     if (!(oldp) || p->load < oldp->load )
00458       oldp = p;
00459   }
00460 }
00461 

Generated on Sat Sep 23 01:17:16 2017 for NAMD by  doxygen 1.4.7