WorkDistrib.C File Reference

#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
struct  pe_sortop_bit_reversed
struct  pe_sortop_coord_x
struct  pe_sortop_coord_y
class  PatchMapMsg
class  ComputeMapMsg
struct  nodesort
struct  TopoManagerWrapper
struct  TopoManagerWrapper::pe_sortop_topo
struct  patch_sortop_curve_a
struct  patch_sortop_curve_b
struct  patch_sortop_curve_c

Defines

#define MIN_DEBUG_LEVEL   2
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Functions

static void build_ordering (void *)
void topo_getargs (char **argv)
static int compare_bit_reversed (int a, int b)
static bool less_than_bit_reversed (int a, int b)
void cuda_initialize ()
void mic_initialize ()
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)

Variables

__thread DeviceCUDAdeviceCUDA
static int randtopo
static int eventMachineProgress


Detailed Description

Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.


Define Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Referenced by WorkDistrib::enqueueAngles(), WorkDistrib::enqueueAniso(), WorkDistrib::enqueueBonds(), WorkDistrib::enqueueCrossterms(), WorkDistrib::enqueueCUDA(), WorkDistrib::enqueueCUDAP2(), WorkDistrib::enqueueCUDAP3(), WorkDistrib::enqueueDihedrals(), WorkDistrib::enqueueExcls(), WorkDistrib::enqueueGromacsPair(), WorkDistrib::enqueueImpropers(), WorkDistrib::enqueueMIC(), WorkDistrib::enqueuePme(), WorkDistrib::enqueueSelfA1(), WorkDistrib::enqueueSelfA2(), WorkDistrib::enqueueSelfA3(), WorkDistrib::enqueueSelfB1(), WorkDistrib::enqueueSelfB2(), WorkDistrib::enqueueSelfB3(), WorkDistrib::enqueueThole(), WorkDistrib::enqueueWork(), WorkDistrib::enqueueWorkA1(), WorkDistrib::enqueueWorkA2(), WorkDistrib::enqueueWorkA3(), WorkDistrib::enqueueWorkB1(), WorkDistrib::enqueueWorkB2(), WorkDistrib::enqueueWorkB3(), WorkDistrib::enqueueWorkC(), WorkDistrib::finishCUDA(), WorkDistrib::finishCUDAP2(), WorkDistrib::finishCUDAP3(), WorkDistrib::finishMIC(), WorkDistrib::messageEnqueueWork(), WorkDistrib::messageFinishCUDA(), and WorkDistrib::messageFinishMIC().

#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.


Function Documentation

static void build_ordering ( void *   )  [static]

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

00086                                    {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }

static int compare_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 120 of file WorkDistrib.C.

00120                                               {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }

void cuda_initialize (  ) 

Definition at line 20 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

00020                        {
00021         deviceCUDA = new DeviceCUDA();
00022         deviceCUDA->initialize();
00023 }

static bool less_than_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 129 of file WorkDistrib.C.

00129                                                  {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }

void mic_initialize (  ) 

Referenced by WorkDistrib::peOrderingReady().

static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
) [static]

Definition at line 268 of file WorkDistrib.C.

References x, and y.

Referenced by WorkDistrib::sortPmePes().

00272     {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
) [static]

Definition at line 2015 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

02022     {
02023 
02024   SimParameters *simParams = Node::Object()->simParameters;
02025   PatchMap *patchMap = PatchMap::Object();
02026   int *patches = patch_begin;
02027   int npatches = patch_end - patch_begin;
02028   int *nodes = node_begin;
02029   int nnodes = node_end - node_begin;
02030 
02031   // assign patch loads
02032   double totalRawLoad = 0;
02033   for ( int i=0; i<npatches; ++i ) {
02034     int pid=patches[i];
02035 #ifdef MEM_OPT_VERSION
02036     double load = patchMap->numAtoms(pid) + 10;      
02037 #else
02038     double load = patchMap->patch(pid)->getNumAtoms() + 10;
02039 #endif
02040     patchLoads[pid] = load;
02041     sortedLoads[i] = load;
02042     totalRawLoad += load;
02043   }
02044   std::sort(sortedLoads,sortedLoads+npatches);
02045 
02046   // limit maxPatchLoad to adjusted average load per node
02047   double sumLoad = 0;
02048   double maxPatchLoad = 1;
02049   for ( int i=0; i<npatches; ++i ) {
02050     double load = sortedLoads[i];
02051     double total = sumLoad + (npatches-i) * load;
02052     if ( nnodes * load > total ) break;
02053     sumLoad += load;
02054     maxPatchLoad = load;
02055   }
02056   double totalLoad = 0;
02057   for ( int i=0; i<npatches; ++i ) {
02058     int pid=patches[i];
02059     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02060     totalLoad += patchLoads[pid];
02061   }
02062   if ( nnodes * maxPatchLoad > totalLoad )
02063     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02064 
02065   int a_len, b_len, c_len;
02066   int a_min, b_min, c_min;
02067   { // find dimensions
02068     a_min = patchMap->index_a(patches[0]);
02069     b_min = patchMap->index_b(patches[0]);
02070     c_min = patchMap->index_c(patches[0]);
02071     int a_max = a_min;
02072     int b_max = b_min;
02073     int c_max = c_min;
02074     for ( int i=1; i<npatches; ++i ) {
02075       int a = patchMap->index_a(patches[i]);
02076       int b = patchMap->index_b(patches[i]);
02077       int c = patchMap->index_c(patches[i]);
02078       if ( a < a_min ) a_min = a;
02079       if ( b < b_min ) b_min = b;
02080       if ( c < c_min ) c_min = c;
02081       if ( a > a_max ) a_max = a;
02082       if ( b > b_max ) b_max = b;
02083       if ( c > c_max ) c_max = c;
02084     }
02085     a_len = a_max - a_min;
02086     b_len = b_max - b_min;
02087     c_len = c_max - c_min;
02088   }
02089 
02090   int *node_split = node_begin;
02091 
02092   if ( simParams->disableTopology ) ; else
02093   if ( a_len >= b_len && a_len >= c_len ) {
02094     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02095   } else if ( b_len >= a_len && b_len >= c_len ) {
02096     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02097   } else if ( c_len >= a_len && c_len >= b_len ) {
02098     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02099   }
02100 
02101   if ( node_split == node_begin ) {  // unable to split torus
02102     // make sure physical nodes are together
02103     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02104     // find physical node boundary to split on
02105     int i_split = 0;
02106     for ( int i=0; i<nnodes; ++i ) {
02107       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02108         int mid = (nnodes+1)/2;
02109         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02110         else break;
02111       }
02112     }
02113     node_split = node_begin + i_split;
02114   }
02115 
02116   if ( node_split == node_begin ) {
02117     if ( simParams->verboseTopology ) {
02118       int crds[3];
02119       tmgr.coords(*node_begin, crds);
02120       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02121                CmiPhysicalNodeID(*node_begin), *node_begin,
02122                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02123                a_min, b_min, c_min, npatches,
02124                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02125     }
02126 
02127     // final sort along a to minimize pme message count
02128     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02129 
02130     // walk through patches in sorted order
02131     int *node = node_begin;
02132     sumLoad = 0;
02133     for ( int i=0; i < npatches; ++i ) {
02134       int pid = patches[i];
02135       assignedNode[pid] = *node;
02136       sumLoad += patchLoads[pid];
02137       double targetLoad = totalLoad *
02138         ((double)(node-node_begin+1) / (double)nnodes);
02139       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02140                 i, *node,
02141                 patchMap->index_a(pid),
02142                 patchMap->index_b(pid),
02143                 patchMap->index_c(pid),
02144                 sumLoad, targetLoad);
02145       double extra = ( i+1 < npatches ? 0.5 * patchLoads[patches[i+1]] : 0 );
02146       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02147     }
02148 
02149     return;
02150   }
02151 
02152   if ( a_len >= b_len && a_len >= c_len ) {
02153     if ( 0 ) CkPrintf("sort a\n");
02154     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02155   } else if ( b_len >= a_len && b_len >= c_len ) {
02156     if ( 0 ) CkPrintf("sort b\n");
02157     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02158   } else if ( c_len >= a_len && c_len >= b_len ) {
02159     if ( 0 ) CkPrintf("sort c\n");
02160     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02161   }
02162 
02163   int *patch_split;
02164   { // walk through patches in sorted order
02165     int *node = node_begin;
02166     sumLoad = 0;
02167     for ( patch_split = patch_begin;
02168           patch_split != patch_end && node != node_split;
02169           ++patch_split ) {
02170       sumLoad += patchLoads[*patch_split];
02171       double targetLoad = totalLoad *
02172         ((double)(node-node_begin+1) / (double)nnodes);
02173       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02174                 patch_split - patch_begin, *node,
02175                 patchMap->index_a(*patch_split),
02176                 patchMap->index_b(*patch_split),
02177                 patchMap->index_c(*patch_split),
02178                 sumLoad, targetLoad);
02179       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02180       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02181     }
02182     double targetLoad = totalLoad *
02183       ((double)(node_split-node_begin) / (double)nnodes);
02184     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02185               node_split-node_begin, nnodes,
02186               patch_split-patch_begin, npatches,
02187               sumLoad, targetLoad);
02188   }
02189 
02190   // recurse
02191   recursive_bisect_with_curve(
02192     patch_begin, patch_split, node_begin, node_split,
02193     patchLoads, sortedLoads, assignedNode, tmgr);
02194   recursive_bisect_with_curve(
02195     patch_split, patch_end, node_split, node_end,
02196     patchLoads, sortedLoads, assignedNode, tmgr);
02197 }

void topo_getargs ( char **  argv  ) 

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

00090                                {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

int eventMachineProgress [static]

Definition at line 96 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo [static]

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().


Generated on Fri Sep 22 01:17:16 2017 for NAMD by  doxygen 1.4.7