WorkDistrib.C File Reference

#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
struct  pe_sortop_bit_reversed
struct  pe_sortop_coord_x
struct  pe_sortop_coord_y
class  PatchMapMsg
class  ComputeMapMsg
struct  nodesort
struct  TopoManagerWrapper
struct  TopoManagerWrapper::pe_sortop_topo
struct  patch_sortop_curve_a
struct  patch_sortop_curve_b
struct  patch_sortop_curve_c

Defines

#define MIN_DEBUG_LEVEL   2
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Functions

static void build_ordering (void *)
void topo_getargs (char **argv)
static int compare_bit_reversed (int a, int b)
static bool less_than_bit_reversed (int a, int b)
void cuda_initialize ()
void mic_initialize ()
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)

Variables

__thread DeviceCUDAdeviceCUDA
static int randtopo
static int eventMachineProgress


Detailed Description

Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.


Define Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Referenced by WorkDistrib::enqueueAngles(), WorkDistrib::enqueueAniso(), WorkDistrib::enqueueBonds(), WorkDistrib::enqueueCrossterms(), WorkDistrib::enqueueCUDA(), WorkDistrib::enqueueCUDAP2(), WorkDistrib::enqueueCUDAP3(), WorkDistrib::enqueueDihedrals(), WorkDistrib::enqueueExcls(), WorkDistrib::enqueueGromacsPair(), WorkDistrib::enqueueImpropers(), WorkDistrib::enqueueMIC(), WorkDistrib::enqueuePme(), WorkDistrib::enqueueSelfA1(), WorkDistrib::enqueueSelfA2(), WorkDistrib::enqueueSelfA3(), WorkDistrib::enqueueSelfB1(), WorkDistrib::enqueueSelfB2(), WorkDistrib::enqueueSelfB3(), WorkDistrib::enqueueThole(), WorkDistrib::enqueueWork(), WorkDistrib::enqueueWorkA1(), WorkDistrib::enqueueWorkA2(), WorkDistrib::enqueueWorkA3(), WorkDistrib::enqueueWorkB1(), WorkDistrib::enqueueWorkB2(), WorkDistrib::enqueueWorkB3(), WorkDistrib::enqueueWorkC(), WorkDistrib::finishCUDA(), WorkDistrib::finishCUDAP2(), WorkDistrib::finishCUDAP3(), WorkDistrib::finishMIC(), WorkDistrib::messageEnqueueWork(), WorkDistrib::messageFinishCUDA(), and WorkDistrib::messageFinishMIC().

#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.


Function Documentation

static void build_ordering ( void *   )  [static]

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

00086                                    {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }

static int compare_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 120 of file WorkDistrib.C.

00120                                               {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }

void cuda_initialize (  ) 

Definition at line 20 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

00020                        {
00021         deviceCUDA = new DeviceCUDA();
00022         deviceCUDA->initialize();
00023 }

static bool less_than_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 129 of file WorkDistrib.C.

00129                                                  {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }

void mic_initialize (  ) 

Referenced by WorkDistrib::peOrderingReady().

static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
) [static]

Definition at line 268 of file WorkDistrib.C.

References x, and y.

Referenced by WorkDistrib::sortPmePes().

00272     {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
) [static]

Definition at line 2017 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

02024     {
02025 
02026   SimParameters *simParams = Node::Object()->simParameters;
02027   PatchMap *patchMap = PatchMap::Object();
02028   int *patches = patch_begin;
02029   int npatches = patch_end - patch_begin;
02030   int *nodes = node_begin;
02031   int nnodes = node_end - node_begin;
02032 
02033   // assign patch loads
02034   double totalRawLoad = 0;
02035   for ( int i=0; i<npatches; ++i ) {
02036     int pid=patches[i];
02037 #ifdef MEM_OPT_VERSION
02038     double load = patchMap->numAtoms(pid) + 10;      
02039 #else
02040     double load = patchMap->patch(pid)->getNumAtoms() + 10;
02041 #endif
02042     patchLoads[pid] = load;
02043     sortedLoads[i] = load;
02044     totalRawLoad += load;
02045   }
02046   std::sort(sortedLoads,sortedLoads+npatches);
02047 
02048   // limit maxPatchLoad to adjusted average load per node
02049   double sumLoad = 0;
02050   double maxPatchLoad = 1;
02051   for ( int i=0; i<npatches; ++i ) {
02052     double load = sortedLoads[i];
02053     double total = sumLoad + (npatches-i) * load;
02054     if ( nnodes * load > total ) break;
02055     sumLoad += load;
02056     maxPatchLoad = load;
02057   }
02058   double totalLoad = 0;
02059   for ( int i=0; i<npatches; ++i ) {
02060     int pid=patches[i];
02061     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02062     totalLoad += patchLoads[pid];
02063   }
02064   if ( nnodes * maxPatchLoad > totalLoad )
02065     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02066 
02067   int a_len, b_len, c_len;
02068   int a_min, b_min, c_min;
02069   { // find dimensions
02070     a_min = patchMap->index_a(patches[0]);
02071     b_min = patchMap->index_b(patches[0]);
02072     c_min = patchMap->index_c(patches[0]);
02073     int a_max = a_min;
02074     int b_max = b_min;
02075     int c_max = c_min;
02076     for ( int i=1; i<npatches; ++i ) {
02077       int a = patchMap->index_a(patches[i]);
02078       int b = patchMap->index_b(patches[i]);
02079       int c = patchMap->index_c(patches[i]);
02080       if ( a < a_min ) a_min = a;
02081       if ( b < b_min ) b_min = b;
02082       if ( c < c_min ) c_min = c;
02083       if ( a > a_max ) a_max = a;
02084       if ( b > b_max ) b_max = b;
02085       if ( c > c_max ) c_max = c;
02086     }
02087     a_len = a_max - a_min;
02088     b_len = b_max - b_min;
02089     c_len = c_max - c_min;
02090   }
02091 
02092   int *node_split = node_begin;
02093 
02094   if ( simParams->disableTopology ) ; else
02095   if ( a_len >= b_len && a_len >= c_len ) {
02096     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02097   } else if ( b_len >= a_len && b_len >= c_len ) {
02098     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02099   } else if ( c_len >= a_len && c_len >= b_len ) {
02100     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02101   }
02102 
02103   if ( node_split == node_begin ) {  // unable to split torus
02104     // make sure physical nodes are together
02105     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02106     // find physical node boundary to split on
02107     int i_split = 0;
02108     for ( int i=0; i<nnodes; ++i ) {
02109       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02110         int mid = (nnodes+1)/2;
02111         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02112         else break;
02113       }
02114     }
02115     node_split = node_begin + i_split;
02116   }
02117 
02118   if ( node_split == node_begin ) {
02119     if ( simParams->verboseTopology ) {
02120       int crds[3];
02121       tmgr.coords(*node_begin, crds);
02122       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02123                CmiPhysicalNodeID(*node_begin), *node_begin,
02124                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02125                a_min, b_min, c_min, npatches,
02126                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02127     }
02128 
02129     // final sort along a to minimize pme message count
02130     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02131 
02132     // walk through patches in sorted order
02133     int *node = node_begin;
02134     sumLoad = 0;
02135     for ( int i=0; i < npatches; ++i ) {
02136       int pid = patches[i];
02137       assignedNode[pid] = *node;
02138       sumLoad += patchLoads[pid];
02139       double targetLoad = totalLoad *
02140         ((double)(node-node_begin+1) / (double)nnodes);
02141       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02142                 i, *node,
02143                 patchMap->index_a(pid),
02144                 patchMap->index_b(pid),
02145                 patchMap->index_c(pid),
02146                 sumLoad, targetLoad);
02147       double extra = ( i+1 < npatches ? 0.5 * patchLoads[patches[i+1]] : 0 );
02148       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02149     }
02150 
02151     return;
02152   }
02153 
02154   if ( a_len >= b_len && a_len >= c_len ) {
02155     if ( 0 ) CkPrintf("sort a\n");
02156     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02157   } else if ( b_len >= a_len && b_len >= c_len ) {
02158     if ( 0 ) CkPrintf("sort b\n");
02159     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02160   } else if ( c_len >= a_len && c_len >= b_len ) {
02161     if ( 0 ) CkPrintf("sort c\n");
02162     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02163   }
02164 
02165   int *patch_split;
02166   { // walk through patches in sorted order
02167     int *node = node_begin;
02168     sumLoad = 0;
02169     for ( patch_split = patch_begin;
02170           patch_split != patch_end && node != node_split;
02171           ++patch_split ) {
02172       sumLoad += patchLoads[*patch_split];
02173       double targetLoad = totalLoad *
02174         ((double)(node-node_begin+1) / (double)nnodes);
02175       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02176                 patch_split - patch_begin, *node,
02177                 patchMap->index_a(*patch_split),
02178                 patchMap->index_b(*patch_split),
02179                 patchMap->index_c(*patch_split),
02180                 sumLoad, targetLoad);
02181       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02182       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02183     }
02184     double targetLoad = totalLoad *
02185       ((double)(node_split-node_begin) / (double)nnodes);
02186     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02187               node_split-node_begin, nnodes,
02188               patch_split-patch_begin, npatches,
02189               sumLoad, targetLoad);
02190   }
02191 
02192   // recurse
02193   recursive_bisect_with_curve(
02194     patch_begin, patch_split, node_begin, node_split,
02195     patchLoads, sortedLoads, assignedNode, tmgr);
02196   recursive_bisect_with_curve(
02197     patch_split, patch_end, node_split, node_end,
02198     patchLoads, sortedLoads, assignedNode, tmgr);
02199 }

void topo_getargs ( char **  argv  ) 

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

00090                                {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

int eventMachineProgress [static]

Definition at line 96 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo [static]

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().


Generated on Sat Nov 18 01:17:17 2017 for NAMD by  doxygen 1.4.7