RecBisection.C

Go to the documentation of this file.
00001 
00007 #include <math.h>
00008 #include <stdlib.h>
00009 
00010 #include "RecBisection.h"
00011 #include "PatchMap.inl"
00012 #include "Patch.h"
00013 #include "PatchMgr.h"
00014 
00015 /* ********************************************************************* */
00016 /* Constructor for the RecBisection Class                                */
00017 /* ********************************************************************* */
00018 RecBisection::RecBisection(int numpartitions, PatchMap *thePatchMap)
00019 {
00020     patchMap    = thePatchMap;
00021     npartition  = numpartitions;
00022     numPatches  = patchMap->numPatches();
00023     partitions  = new Partition[npartition];
00024     patchload   = new PatchLoad[numPatches];
00025     currentp    = 0;
00026 
00027     if ( partitions == NULL ||
00028          patchload == NULL )
00029     {
00030       NAMD_die("memory allocation failed in RecBisection::RecBisection");
00031     }
00032 
00033 
00034     // the cost coeffiencients that is used to compute the load introduced
00035     // to the processor by a patch
00036 
00037     c_local0    = 1;
00038     c_local1    = 0.015;
00039     c_edge0     = 0.001;
00040     c_edge1     = 0.001;
00041     c_icompute0 = 0.001;
00042     c_icompute1 = 0.000035;
00043 
00044     //c_local0    = 1.0;
00045     //c_local1    = 0.;
00046     //c_edge0     = 0.;
00047     //c_edge1     = 0.;
00048     //c_icompute0 = 0.;
00049     //c_icompute1 = 0.;
00050 }
00051 
00052 
00053 /* ********************************************************************* */
00054 /* Destructor for the RecBisection Class                                */
00055 /* ********************************************************************* */
00056 RecBisection::~RecBisection()
00057 {
00058     delete [] partitions; 
00059     delete [] patchload;
00060 }
00061 
00062 
00063 
00064 /* *********************************************************************** */
00065 /* This is a recursive function to partition a 3-D mesh into n cubical     */
00066 /* subpartitions with approximately equal loads.                           */
00067 /*                                                                         */
00068 /* Input  n : total number of subpartitions that the partition "p" has to  */
00069 /*            be divided.  Its value is any integer > 0                    */
00070 /*        p : the current partition                                        */
00071 /*                                                                         */
00072 /* The partition p is divided into two partitions (say p1 and p2) such     */
00073 /* that a) p1 and p2 equally loaded b) p1 and p2 further to be partitioned */
00074 /* ino n1 and n2 subpartitions.                                            */
00075 /* If n is even then n1=n2=n/2. Otherwise n1 is one more than n2.          */
00076 /*                                                                         */
00077 /* Since the subpartitions are rectangualr prisms (not an artibrary shape),*/
00078 /* it is not always possible to find a bisection point where the load      */
00079 /* is equally divided.                                                     */
00080 /* The following strategy is used to get a good partitioning:              */
00081 /* We divide the initial partition along x, y and z directions tentatively */
00082 /* and choose the one that gives the best division                          */
00083 /* *********************************************************************** */
00084 
00085 void RecBisection::rec_divide(int n, const Partition &p)
00086 {
00087     int       i=0,j=0,k=0;              // general purpose index vars
00088     int       posi[3],posj[3],posk[3];  // division points along x,y,z direction
00089     int       mindir;                   // the best direction
00090     int       n1, n2;                   // number of subpartitions in p1 and p2
00091     int       p1_empty[3], p2_empty[3]; // true if a subpartition is empty
00092     float     load1;                   // desired load of the first subpartition
00093     float     prevload,currentload;    
00094     float     loadarray[3];            // actual loads of p1 (for each x,y,z
00095                                        // division)
00096     float     diff;                    // load diffenrence (actual - desired)
00097     float     mindiff;                 // minimum difference (among directions)
00098     Partition p1;                      // first subpartition
00099     Partition p2;                      // second subpartition
00100 
00101 
00102     if (n==1)
00103     {
00104        // no further subdivision
00105        // record teh partition p as a final partition
00106        partitions[currentp++] = p;
00107        return;
00108     }
00109 
00110     // calculate division ratio: 1/2 iff n is even, otherwise
00111     // first partition has more load
00112 
00113     n2 = n/2;
00114     n1 = n-n2;
00115 
00116     load1  = ( (float) n1/(float) n ) * p.load;
00117 
00118     for(i=XDIR; i<=ZDIR; i++) {p1_empty[i] = p2_empty[i] = 0;}
00119 
00120     p1 = p;
00121     p2 = p;
00122 
00123     // now try dividing along the x,y,z directions 
00124     
00125     // try x-axis 
00126     currentload = 0.0;
00127     i = p.origin.x;
00128     while(currentload < load1 && i<= p.corner.x) 
00129       {
00130         prevload = currentload;
00131         for(j=p.origin.y; j<=p.corner.y; j++)
00132           for(k=p.origin.z; k<=p.corner.z; k++)
00133             currentload += patchload[patchMap->pid(i,j,k)].total;
00134         if (currentload > load1)
00135            if ( prev_better(prevload,currentload,load1) ) 
00136                {currentload=prevload; break;}
00137         i++; 
00138       }
00139     posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
00140     loadarray[XDIR] = currentload; 
00141     if (i == p.origin.x) p1_empty[XDIR] = 1;
00142     if (i > p.corner.x)  p2_empty[XDIR] = 1;
00143 
00144 
00145     // z axis
00146     currentload = 0.0;
00147     k = p.origin.z;
00148     while(currentload < load1 && k <= p.corner.z)
00149       {
00150         prevload = currentload;
00151         for(i=p.origin.x; i<=p.corner.x; i++)
00152           for(j=p.origin.y; j<=p.corner.y; j++)
00153             currentload += patchload[patchMap->pid(i,j,k)].total;
00154         if (currentload > load1)
00155            if ( prev_better(prevload,currentload,load1) )
00156                {currentload=prevload; break;}
00157         k++;
00158       }
00159 
00160     posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
00161     loadarray[ZDIR] = currentload; 
00162     if (k == p.origin.z) p1_empty[ZDIR] = 1;
00163     if (k > p.corner.z)  p2_empty[ZDIR] = 1;
00164     
00165 
00166     // y axis
00167     currentload = 0.0;
00168     j = p.origin.y;
00169     while(currentload < load1 && j <= p.corner.y) 
00170       {
00171         prevload = currentload;
00172         for(i=p.origin.x; i<=p.corner.x; i++)
00173           for(k=p.origin.z; k<=p.corner.z; k++)
00174             currentload += patchload[patchMap->pid(i,j,k)].total;
00175         if (currentload > load1)
00176            if ( prev_better(prevload,currentload,load1) )
00177                {currentload=prevload; break;}
00178         j++; 
00179       }
00180     posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
00181     loadarray[YDIR] = currentload; 
00182     if (j == p.origin.y) p1_empty[YDIR] = 1;
00183     if (j > p.corner.y)  p2_empty[YDIR] = 1;
00184 
00185     // determine the best division direction
00186     mindiff = load1;
00187     mindir   = -1;
00188     for(i=XDIR; i<=ZDIR; i++) { 
00189        diff =  load1 - loadarray[i];
00190        if (diff < 0.0) diff = -diff;
00191        if (mindiff >= diff) {mindiff = diff; mindir = i;}
00192     }
00193 
00194     // always divide along x or y if possible
00195     int lx = p.corner.x - p.origin.x + 1;
00196     if ( n >= 2*lx || lx >= 2*n || n > 10 || lx > 10 ) {
00197       int n2x = n * (p.load - loadarray[XDIR]) / p.load + 0.5;
00198       if ( lx > 1 && n2x > 0 && n2x < n ) mindir = XDIR;
00199     }
00200 
00201     // revise n1 and n2 based on selected splitting dimension
00202     n2 = n * (p.load - loadarray[mindir]) / p.load + 0.5;
00203     if ( n2 < 1 ) n2 = 1;
00204     if ( n2 >= n ) n2 = n-1;
00205     n1 = n-n2;
00206 
00207     // divide along mindir
00208     switch (mindir) {
00209       case XDIR: p1.corner.x = posi[XDIR] - 1;
00210                  p2.origin.x = posi[XDIR];
00211                  break;
00212       case YDIR: p1.corner.y = posj[YDIR] - 1;
00213                  p2.origin.y = posj[YDIR];
00214                  break;
00215       case ZDIR: p1.corner.z = posk[ZDIR] - 1;
00216                  p2.origin.z = posk[ZDIR];
00217                  break;
00218       default:   NAMD_bug("RecBisection failing horribly!");
00219     }
00220     p1.load = loadarray[mindir];
00221     p2.load = p.load - p1.load;
00222     if (!p1_empty[mindir]) rec_divide(n1,p1); 
00223     if (!p2_empty[mindir]) rec_divide(n2,p2);
00224 }
00225 
00226 
00227 /* ************************************************************************ */
00228 /* Compute the initial overhead/load of each patch to the processor. The    */
00229 /* load of patches are computed as follows: Each patch has a fixed cost and */
00230 /* a variable cost depending of number atoms it has; c_local0 and c_local1  */
00231 /* respectively. Secondly, due to interaction with each patch, the patch    */
00232 /* incurs some load determined by c_edge0 and c_edge1 cost parameters.      */
00233 /* Finally, each patch has load due to computing forces between its certain */
00234 /* neighbours, with cost coefficients c_icompute0 and c_icompute1           */
00235 /* ************************************************************************ */
00236 
00237 void RecBisection::compute_patch_load() 
00238 {  
00239    int   i,nix,neighbour;
00240    int   numAtoms, numFixed, numNeighAtoms, numNeighFixed;
00241    float total_icompute;
00242 
00243    for(i=0; i<numPatches; i++) { 
00244 #ifdef MEM_OPT_VERSION
00245      numAtoms = patchMap->numAtoms(i);
00246      numFixed = patchMap->numFixedAtoms(i);
00247 #else
00248      numAtoms      = patchMap->patch(i)->getNumAtoms();
00249      numFixed      = patchMap->patch(i)->getNumFixedAtoms();
00250 #endif
00251      patchload[i].total = 0.0;
00252      patchload[i].edge  = 0.0;
00253 
00254      patchload[i].local = c_local0 + c_local1 * numAtoms;
00255      patchload[i].local += c_icompute0 +
00256         c_icompute1*(numAtoms*numAtoms-numFixed*numFixed);
00257 
00258 
00259      total_icompute = 0.0;
00260 
00261      PatchID neighbors[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00262      int nNeighbors = patchMap->oneAwayNeighbors(i,neighbors);
00263      
00264      for(nix=0; nix<nNeighbors; nix++) {
00265        neighbour = neighbors[nix];
00266 #ifdef MEM_OPT_VERSION      
00267        numNeighAtoms = patchMap->numAtoms(neighbour);
00268        numNeighFixed = patchMap->numFixedAtoms(neighbour);
00269 #else
00270        numNeighAtoms = patchMap->patch(neighbour)->getNumAtoms();
00271        numNeighFixed = patchMap->patch(neighbour)->getNumFixedAtoms();
00272 #endif
00273        patchload[i].icompute[nix] = 
00274          c_icompute0 +
00275          c_icompute1*(numNeighAtoms*numAtoms-numNeighFixed*numFixed);
00276        total_icompute += patchload[i].icompute[nix]; 
00277        patchload[i].edge += c_edge0 + c_edge1 * numNeighAtoms; 
00278      }
00279      patchload[i].total+=patchload[i].local+total_icompute+patchload[i].edge; 
00280    }
00281 }
00282 
00283 
00284 
00285 
00286 /* *********************************************************************  */
00287 /* Partitions a 3D space. First a recursive algorithm divides the initial */
00288 /* space into rectangular prisms with approximately equal loads.          */
00289 /* Then these rectangular prisms ar emodified to firther increase the     */
00290 /* load balance and reduce communication cost                             */
00291 /* *********************************************************************  */
00292 
00293 int RecBisection::partition(int *dest_arr)
00294 {
00295     int i;
00296   
00297     top_partition.origin.x = 0; 
00298     top_partition.origin.y = 0; 
00299     top_partition.origin.z = 0; 
00300     top_partition.corner.x  = patchMap->gridsize_a()-1;
00301     top_partition.corner.y  = patchMap->gridsize_b()-1;
00302     top_partition.corner.z  = patchMap->gridsize_c()-1;
00303     top_partition.load      = 0.0;
00304 
00305     // calculate estimated computational load due to each patch
00306     compute_patch_load();
00307 
00308     for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
00309 
00310     // divide into rectangular prisms with load as equal as possible
00311     rec_divide(npartition,top_partition);
00312 
00313     if (currentp != npartition) 
00314           return 0;
00315     else  {
00316       if (dest_arr==NULL)
00317           assignNodes();
00318       else
00319           assign_nodes_arr(dest_arr);
00320     }
00321 
00322     return 1;
00323 }
00324 
00325 
00326 /* ********************************************************************* */
00327 /* partitioning done, update PatchDistib data structure to assign the    */
00328 /* patches to nodes. (patches in partition i is assigned to node i)      */
00329 /* ********************************************************************* */
00330 
00331 void RecBisection::assignNodes()
00332 {
00333     int i,j,k,pix;
00334     Partition p;
00335 
00336     for(pix=0; pix<npartition; pix++)
00337     {
00338        p = partitions[pix];
00339        for(i=p.origin.x; i<=p.corner.x; i++)
00340           for(j=p.origin.y; j<=p.corner.y; j++)
00341              for(k=p.origin.z; k<=p.corner.z; k++)
00342                 patchMap->assignNode(patchMap->pid(i,j,k),pix);  
00343     }
00344 
00345     int npatches = patchMap->numPatches();
00346     for (int pid=0; pid<npatches; ++pid ) 
00347       patchMap->assignBaseNode(pid);
00348 }
00349 
00350 /* ********************************************************************* */
00351 /* partitioning done, save results to an array, rather than updating     */
00352 /* the PatchDistib data structure.  Otherwise, thist is identical to     */
00353 /* assignNodes()                                                         */
00354 /* ********************************************************************* */
00355 
00356 void RecBisection::assign_nodes_arr(int *dest_arr)
00357 {
00358     int i,j,k,pix;
00359     Partition p;
00360 
00361     for(pix=0; pix<npartition; pix++)
00362     {
00363        p = partitions[pix];
00364        for(i=p.origin.x; i<=p.corner.x; i++)
00365           for(j=p.origin.y; j<=p.corner.y; j++)
00366              for(k=p.origin.z; k<=p.corner.z; k++)  {
00367                 dest_arr[patchMap->pid(i,j,k)] = pix;  
00368               }
00369     }
00370 }
00371 
00372 
00373 /* ************************************************************************ */
00374 /* to be implemented:                                                       */
00375 /* this functions revisits edge directions to refine the load distribution  */
00376 /* ************************************************************************ */
00377 void RecBisection::refine_edges()
00378 {
00379 }
00380 
00381 
00382 /* ************************************************************************ */
00383 /* to be implemented:                                                       */
00384 /* this function refines the boundaries of subpartitions to redice the      */
00385 /* communication across processors                                          */
00386 /* ************************************************************************ */
00387 void RecBisection::refine_boundaries()
00388 {
00389 }
00390 
00391 
00392 /* ************************************************************************ */
00393 /* to be implemented:                                                       */
00394 /* refine boundries invokes this function for eeach surface                 */
00395 /* ************************************************************************ */
00396 void RecBisection::refine_surface()
00397 {
00398 }
00399 
00400 /* ************************************************************************ */
00401 /* return true if the difference between previous load (prev1) and desired  */
00402 /* load (load1) is less than  teh difference between current an desired     */
00403 /* ************************************************************************ */
00404 
00405 int RecBisection::prev_better(float prev, float current, float load1)
00406 {
00407    float diff1,diff2;
00408 
00409    diff1 = load1 - prev;
00410    diff2 = current - load1;
00411 
00412    if (diff1 < 0.0) diff1 = -diff1;
00413    if (diff2 < 0.0) diff2 = -diff2;
00414 
00415    return (diff1 <= diff2);
00416 }
00417 
00418 #if USE_TOPOMAP 
00419 
00420 /* *********************************************************************  */
00421 /* Partitions a 3D processor into rectangular prisms of different
00422    sizes corresponding to the patches. Then a processor in the prism
00423    is assigned to hold the patch.
00424 *************************************/
00425 
00426 int RecBisection::partitionProcGrid(int X, int Y, int Z, int *dest_arr) {
00427     
00428     int i = 0;
00429     top_partition.origin.x = 0; 
00430     top_partition.origin.y = 0; 
00431     top_partition.origin.z = 0; 
00432     top_partition.corner.x  = patchMap->gridsize_a()-1;
00433     top_partition.corner.y  = patchMap->gridsize_b()-1;
00434     top_partition.corner.z  = patchMap->gridsize_c()-1;
00435     top_partition.load      = 0.0;
00436 
00437     // calculate estimated computational load due to each patch
00438     compute_patch_load();
00439    
00440     for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
00441     
00442     int new_X = X, new_Y = Y, new_Z = Z;
00443     DimensionMap dm;
00444 
00445     dm.x = 0; dm.y = 1; dm.z = 2;
00446     
00447     findOptimalDimensions(X, Y, Z, new_X, new_Y, new_Z, dm);
00448     
00449     Partition proc_p;
00450     
00451     proc_p.origin.x = 0;
00452     proc_p.corner.x = new_X - 1;
00453     
00454     proc_p.origin.y = 0;
00455     proc_p.corner.y = new_Y - 1;
00456     
00457     proc_p.origin.z = 0;
00458     proc_p.corner.z = new_Z - 1;
00459     
00460     iout << "\nLDB: Partitioning Proc Grid of size " << "[" << X << "] [" << Y << "] [" << Z << "]\n" << endi;
00461 
00462     // divide processor grid into rectangular prisms whose sizes
00463     // corrspond to the computation load of the patches. Moreover
00464     // neoghboring should also be on nearby processors on the grid
00465     int rc = topogrid_rec_divide(proc_p, top_partition);
00466     
00467     if (rc < 0)
00468       return rc;
00469 
00470     assignPatchesToProcGrid(dest_arr, X, Y, Z, dm);
00471     
00472     return 1;
00473 }
00474 
00475 //Partition both a processor grid and a patch grid. Only useful when
00476 //number of processors is 2 times greater than number of patches. It
00477 //returns a processor partition for each patch.
00478 
00479 int RecBisection::topogrid_rec_divide(Partition &proc_p, Partition &patch_p) {
00480   
00481     Partition proc_p1, proc_p2, patch_p1, patch_p2;
00482     int i=0, j=0, k=0;
00483     int posi[3],posj[3],posk[3];  // division points along x,y,z direction
00484     int mindir;                   // the best direction
00485     int p1_empty[3], p2_empty[3]; // true if a subpartition is empty
00486     double loadarray[3];          // actual loads of p1 (for each x,y,z division)
00487     double diff;                  // load diffenrence (actual - desired)
00488     double mindiff;               // minimum difference (among directions)
00489     
00490     proc_p1 = proc_p2 = proc_p;
00491     patch_p1 = patch_p2 = patch_p;
00492 
00493     p1_empty[0] = p1_empty[1] = p1_empty[2] = 0;
00494     p2_empty[0] = p2_empty[1] = p2_empty[2] = 0;    
00495     /*
00496     CkPrintf("topo rec divide pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n", proc_p.origin.x, proc_p.origin.y, 
00497              proc_p.origin.z, proc_p.corner.x, proc_p.corner.y, proc_p.corner.z, 
00498              patch_p.origin.x, patch_p.origin.y, patch_p.origin.z, patch_p.corner.x, 
00499              patch_p.corner.y, patch_p.corner.z);
00500     */
00501 
00502     //We have just one patch left in the partition
00503     if(patch_p.origin.x == patch_p.corner.x && 
00504        patch_p.origin.y == patch_p.corner.y && 
00505        patch_p.origin.z == patch_p.corner.z) {
00506         
00507         int pid = patchMap->pid(patch_p.origin.x,
00508                                 patch_p.origin.y, patch_p.origin.z);
00509       
00510         //This patch owns all processors in the partition proc_p
00511         partitions[pid] = proc_p;
00512         
00513         //CkPrintf("Assigning patch %d to partition with origin at %d,%d,%d\n", pid, 
00514         // proc_p.origin.x, 
00515         // proc_p.origin.y, proc_p.origin.z);
00516 
00517         return 1;
00518     }  
00519     
00520     
00521     if(proc_p.origin.x == proc_p.corner.x && 
00522        proc_p.origin.y == proc_p.corner.y && 
00523        proc_p.origin.z == proc_p.corner.z) {
00524       
00525       return -1;
00526       /*
00527         for(k = patch_p.origin.z; k < patch_p.corner.z; k++) 
00528         for(j = patch_p.origin.y; j < patch_p.corner.y; j++) 
00529         for(i = patch_p.origin.x; i < patch_p.corner.x; i++) {
00530         
00531         partitions[patchMap->pid(i,j,k)] = proc_p;
00532         }       
00533         return;
00534       */
00535     }
00536     
00537     double load_x, load_y, load_z;
00538     load_x = (int) (proc_p.corner.x - proc_p.origin.x + 1)/2;
00539     load_x /= proc_p.corner.x - proc_p.origin.x + 1;
00540     load_x *= patch_p.load;
00541     
00542     
00543     load_y = (int) (proc_p.corner.y - proc_p.origin.y + 1)/2;
00544     load_y /= proc_p.corner.y - proc_p.origin.y + 1;
00545     load_y *= patch_p.load;
00546     
00547     load_z = (int) (proc_p.corner.z - proc_p.origin.z + 1)/2;
00548     load_z /= proc_p.corner.z - proc_p.origin.z + 1;
00549     load_z *= patch_p.load;
00550     
00551     loadarray[XDIR] = loadarray[YDIR] = loadarray[ZDIR] = 10.0 * patch_p.load;
00552     
00553     double currentload = 0;
00554     double prevload = 0;
00555     if(load_x > 0 && patch_p.origin.x != patch_p.corner.x) { 
00556         //Split the processors in X dimension
00557         //Also split patches in X dimension
00558         
00559         currentload = 0.0;
00560         i = patch_p.origin.x;
00561         
00562         while(currentload < load_x && i <= patch_p.corner.x) {
00563             prevload = currentload;
00564             for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
00565                 for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
00566                     currentload += patchload[patchMap->pid(i,j,k)].total;
00567             
00568             if (currentload > load_x)
00569                 if ( prev_better(prevload,currentload,load_x) ) { 
00570                     currentload=prevload; 
00571                     break;
00572                 }
00573             i++; 
00574         }
00575         
00576         posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
00577         loadarray[XDIR] = currentload; 
00578         if (i == patch_p.origin.x) p1_empty[XDIR] = 1;
00579         if (i > patch_p.corner.x)  p2_empty[XDIR] = 1;
00580     }
00581 
00582     if(load_z > 0 && patch_p.origin.z != patch_p.corner.z) { //Split patches in Z dimension
00583         //Also split patches in Z dimension
00584         
00585         // z axis
00586         currentload = 0.0;
00587         k = patch_p.origin.z;
00588         while(currentload < load_z && k <= patch_p.corner.z){
00589             prevload = currentload;
00590             for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
00591                 for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
00592                     currentload += patchload[patchMap->pid(i,j,k)].total;
00593             
00594             if (currentload > load_z)
00595                 if ( prev_better(prevload,currentload,load_z) )
00596                     {currentload=prevload; break;}
00597             k++;
00598         }
00599         
00600         posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
00601         loadarray[ZDIR] = currentload; 
00602         if (k == patch_p.origin.z) p1_empty[ZDIR] = 1;
00603         if (k > patch_p.corner.z)  p2_empty[ZDIR] = 1;
00604     }
00605     
00606     if(load_y > 0 && patch_p.origin.y != patch_p.corner.y) { //Y dimension
00607         
00608         // y axis
00609         currentload = 0.0;
00610         j = patch_p.origin.y;
00611         while(currentload < load_y && j <= patch_p.corner.y) {
00612             prevload = currentload;
00613             for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
00614                 for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
00615                     currentload += patchload[patchMap->pid(i,j,k)].total;
00616             
00617             if (currentload > load_y)
00618                 if ( prev_better(prevload,currentload,load_y) )
00619                     {currentload=prevload; break;}
00620             j++; 
00621         }
00622         posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
00623         loadarray[YDIR] = currentload; 
00624         if (j == patch_p.origin.y) p1_empty[YDIR] = 1;
00625         if (j > patch_p.corner.y)  p2_empty[YDIR] = 1;
00626     }
00627     
00628     //Need to choose the best division. Need to be careful because the
00629     //processor grid may not be partitionable in all dimensions
00630     
00631     mindiff  = 10.0 * patch_p.load;
00632     mindir   = -1;
00633     for(i=ZDIR; i >= XDIR; i--) { 
00634         
00635         double load1 = 0;
00636         if(i == XDIR)
00637             load1 = load_x;
00638         else if(i == YDIR)
00639             load1 = load_y;
00640         else if(i == ZDIR)
00641             load1= load_z;
00642         
00643         diff =  load1 - loadarray[i];
00644         if (diff < 0.0) diff = -diff;
00645         if (mindiff > diff) {mindiff = diff; mindir = i;}
00646     }
00647 
00648     int pdir = 0;
00649 
00650     //Give up on loads and divide it some way.  Later change to the
00651     //max of the differences along processors and patches
00652     if(mindiff >= patch_p.load) {
00653         if(patch_p.origin.x != patch_p.corner.x) {
00654             mindir = XDIR;
00655             posi[XDIR] = (patch_p.origin.x + patch_p.corner.x + 1)/2;
00656         }
00657         else if(patch_p.origin.y != patch_p.corner.y) {
00658             mindir = YDIR;
00659             posj[YDIR] = (patch_p.origin.y + patch_p.corner.y + 1)/2;
00660         }
00661         else {
00662             mindir = ZDIR;
00663             posk[ZDIR] = (patch_p.origin.z + patch_p.corner.z + 1)/2;
00664         }
00665         
00666         if(load_x > 0) 
00667           pdir = XDIR;
00668         else if(load_y > 0) 
00669           pdir = YDIR;
00670         else    
00671           pdir = ZDIR;
00672                 
00673         loadarray[mindir] = 0.5 * patch_p.load;
00674         loadarray[pdir] = 0.5 * patch_p.load;
00675     }
00676     else {
00677         pdir = mindir;
00678     }
00679 
00680     int n1 = 0, n2 = 0;
00681 
00682     // divide processor along pdir
00683     switch (pdir) {
00684     case XDIR: 
00685         n1 = loadarray[XDIR] * (proc_p.corner.x - proc_p.origin.x) / patch_p.load;
00686         n2 = proc_p.corner.x - proc_p.origin.x - n1;
00687         
00688         proc_p1.corner.x = proc_p.origin.x + n1;
00689         proc_p2.origin.x = proc_p.origin.x + n1 + 1;
00690         
00691         break;
00692     case YDIR: 
00693         
00694         n1 = loadarray[YDIR] * (proc_p.corner.y - proc_p.origin.y) / patch_p.load;
00695         n2 = proc_p.corner.y - proc_p.origin.y - n1;
00696         
00697         proc_p1.corner.y = proc_p.origin.y + n1;
00698         proc_p2.origin.y = proc_p.origin.y + n1 + 1;    
00699         break;
00700         
00701     case ZDIR: 
00702         n1 = loadarray[ZDIR] * (proc_p.corner.z - proc_p.origin.z) / patch_p.load;
00703         n2 = proc_p.corner.z - proc_p.origin.z - n1;
00704         
00705         proc_p1.corner.z = proc_p.origin.z + n1;
00706         proc_p2.origin.z = proc_p.origin.z + n1 + 1;
00707         
00708         break;
00709     default:   NAMD_bug("RecBisection failing horribly!");
00710     }    
00711 
00712     // divide PATCH along mindir
00713     switch (mindir) {
00714     case XDIR: 
00715         patch_p1.corner.x = posi[XDIR] - 1;
00716         patch_p2.origin.x = posi[XDIR];
00717     
00718         break;
00719     case YDIR: 
00720         patch_p1.corner.y = posj[YDIR] - 1;
00721         patch_p2.origin.y = posj[YDIR];
00722         break;
00723         
00724     case ZDIR: 
00725         patch_p1.corner.z = posk[ZDIR] - 1;
00726         patch_p2.origin.z = posk[ZDIR];
00727         break;
00728 
00729     default:   NAMD_bug("RecBisection failing horribly!");
00730     }
00731   
00732     patch_p1.load = loadarray[mindir];
00733     patch_p2.load = patch_p.load - patch_p1.load;
00734 
00735     /*
00736     CkPrintf("Calling Topo Rec Divide along %d direction with pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n", mindir, proc_p1.origin.x, proc_p1.origin.y, 
00737              proc_p1.origin.z, proc_p1.corner.x, proc_p1.corner.y, proc_p1.corner.z, 
00738              patch_p1.origin.x, patch_p1.origin.y, patch_p1.origin.z, patch_p1.corner.x, 
00739              patch_p1.corner.y, patch_p1.corner.z);
00740     
00741     CkPrintf("Calling Topo Rec Divide along %d direction with pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n\n\n", mindir, proc_p2.origin.x, proc_p2.origin.y, 
00742              proc_p2.origin.z, proc_p2.corner.x, proc_p2.corner.y, proc_p2.corner.z, 
00743              patch_p2.origin.x, patch_p2.origin.y, patch_p2.origin.z, patch_p2.corner.x, 
00744              patch_p2.corner.y, patch_p2.corner.z);
00745     */
00746 
00747     int rc = 0;
00748 
00749     if (!p1_empty[mindir]) 
00750       rc = topogrid_rec_divide(proc_p1, patch_p1); 
00751     else
00752       return -1;
00753 
00754     if (rc < 0)
00755       return rc;
00756 
00757     if (!p2_empty[mindir]) 
00758       rc = topogrid_rec_divide(proc_p2, patch_p2);
00759     else
00760       return -1;
00761 
00762     if (rc < 0)
00763       return rc;    
00764 
00765     return 1;
00766 }
00767 
00768 
00769 //Assign all patches to their corresponding processors
00770 void RecBisection::assignPatchesToProcGrid(int *dest_arr, int X, int Y, int Z,
00771                                            DimensionMap dm)
00772 {
00773     int pix;
00774     Partition p;
00775     TopoManager tmgr;
00776  
00777     srand(CkNumPes() * 1000);
00778     
00779     int coord[3];
00780 
00781     //Naive scheme where I just choose the origin of the proc grie
00782     for(pix=0; pix<npartition; pix++) {
00783         p = partitions[pix];
00784         //Get the actual origin of that processor prism and assign it
00785         //to this patch
00786         
00787         int xdiff, ydiff, zdiff;
00788 
00789         xdiff = p.corner.x - p.origin.x + 1;
00790         ydiff = p.corner.y - p.origin.y + 1;
00791         zdiff = p.corner.z - p.origin.z + 1;
00792 
00793         coord[0] = p.origin.x + rand() % xdiff;
00794         coord[1] = p.origin.y + rand() % ydiff;
00795         coord[2] = p.origin.z + rand() % zdiff;
00796         
00797         int pe = tmgr.coordinatesToRank(coord[dm.x], coord[dm.y], coord[dm.z], 0);
00798         patchMap->assignNode(pix, pe);  
00799         dest_arr[pix] = pe;  
00800     }
00801 
00802     int npatches = patchMap->numPatches();
00803     for (int pid=0; pid<npatches; ++pid ) 
00804       patchMap->assignBaseNode(pid);
00805 }
00806 
00807 #endif //USE_TOPOMAP

Generated on Thu Sep 21 01:17:14 2017 for NAMD by  doxygen 1.4.7