WorkDistrib.C File Reference

#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
struct  pe_sortop_bit_reversed
struct  pe_sortop_coord_x
struct  pe_sortop_coord_y
class  PatchMapMsg
struct  nodesort
struct  TopoManagerWrapper
struct  TopoManagerWrapper::pe_sortop_topo
struct  patch_sortop_curve_a
struct  patch_sortop_curve_b
struct  patch_sortop_curve_c

Defines

#define MIN_DEBUG_LEVEL   2
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Functions

static void build_ordering (void *)
void topo_getargs (char **argv)
static int compare_bit_reversed (int a, int b)
static bool less_than_bit_reversed (int a, int b)
void cuda_initialize ()
void mic_initialize ()
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)

Variables

__thread DeviceCUDAdeviceCUDA
static int randtopo
static int eventMachineProgress


Detailed Description

Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.


Define Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Referenced by WorkDistrib::enqueueAngles(), WorkDistrib::enqueueAniso(), WorkDistrib::enqueueBonds(), WorkDistrib::enqueueCrossterms(), WorkDistrib::enqueueCUDA(), WorkDistrib::enqueueCUDAP2(), WorkDistrib::enqueueCUDAP3(), WorkDistrib::enqueueDihedrals(), WorkDistrib::enqueueExcls(), WorkDistrib::enqueueGromacsPair(), WorkDistrib::enqueueImpropers(), WorkDistrib::enqueueMIC(), WorkDistrib::enqueuePme(), WorkDistrib::enqueueSelfA1(), WorkDistrib::enqueueSelfA2(), WorkDistrib::enqueueSelfA3(), WorkDistrib::enqueueSelfB1(), WorkDistrib::enqueueSelfB2(), WorkDistrib::enqueueSelfB3(), WorkDistrib::enqueueThole(), WorkDistrib::enqueueWork(), WorkDistrib::enqueueWorkA1(), WorkDistrib::enqueueWorkA2(), WorkDistrib::enqueueWorkA3(), WorkDistrib::enqueueWorkB1(), WorkDistrib::enqueueWorkB2(), WorkDistrib::enqueueWorkB3(), WorkDistrib::enqueueWorkC(), WorkDistrib::finishCUDA(), WorkDistrib::finishCUDAP2(), WorkDistrib::finishCUDAP3(), WorkDistrib::finishMIC(), WorkDistrib::messageEnqueueWork(), WorkDistrib::messageFinishCUDA(), and WorkDistrib::messageFinishMIC().

#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.


Function Documentation

static void build_ordering ( void *   )  [static]

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

00086                                    {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }

static int compare_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 120 of file WorkDistrib.C.

00120                                               {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }

void cuda_initialize (  ) 

Definition at line 20 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

00020                        {
00021         deviceCUDA = new DeviceCUDA();
00022         deviceCUDA->initialize();
00023 }

static bool less_than_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 129 of file WorkDistrib.C.

00129                                                  {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }

void mic_initialize (  ) 

Referenced by WorkDistrib::peOrderingReady().

static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
) [static]

Definition at line 268 of file WorkDistrib.C.

References x, and y.

Referenced by WorkDistrib::sortPmePes().

00272     {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
) [static]

Definition at line 1972 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

01979     {
01980 
01981   SimParameters *simParams = Node::Object()->simParameters;
01982   PatchMap *patchMap = PatchMap::Object();
01983   int *patches = patch_begin;
01984   int npatches = patch_end - patch_begin;
01985   int *nodes = node_begin;
01986   int nnodes = node_end - node_begin;
01987 
01988   // assign patch loads
01989   double totalRawLoad = 0;
01990   for ( int i=0; i<npatches; ++i ) {
01991     int pid=patches[i];
01992 #ifdef MEM_OPT_VERSION
01993     double load = patchMap->numAtoms(pid) + 10;      
01994 #else
01995     double load = patchMap->patch(pid)->getNumAtoms() + 10;
01996 #endif
01997     patchLoads[pid] = load;
01998     sortedLoads[i] = load;
01999     totalRawLoad += load;
02000   }
02001   std::sort(sortedLoads,sortedLoads+npatches);
02002 
02003   // limit maxPatchLoad to adjusted average load per node
02004   double sumLoad = 0;
02005   double maxPatchLoad = 1;
02006   for ( int i=0; i<npatches; ++i ) {
02007     double load = sortedLoads[i];
02008     double total = sumLoad + (npatches-i) * load;
02009     if ( nnodes * load > total ) break;
02010     sumLoad += load;
02011     maxPatchLoad = load;
02012   }
02013   double totalLoad = 0;
02014   for ( int i=0; i<npatches; ++i ) {
02015     int pid=patches[i];
02016     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02017     totalLoad += patchLoads[pid];
02018   }
02019   if ( nnodes * maxPatchLoad > totalLoad )
02020     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02021 
02022   int a_len, b_len, c_len;
02023   int a_min, b_min, c_min;
02024   { // find dimensions
02025     a_min = patchMap->index_a(patches[0]);
02026     b_min = patchMap->index_b(patches[0]);
02027     c_min = patchMap->index_c(patches[0]);
02028     int a_max = a_min;
02029     int b_max = b_min;
02030     int c_max = c_min;
02031     for ( int i=1; i<npatches; ++i ) {
02032       int a = patchMap->index_a(patches[i]);
02033       int b = patchMap->index_b(patches[i]);
02034       int c = patchMap->index_c(patches[i]);
02035       if ( a < a_min ) a_min = a;
02036       if ( b < b_min ) b_min = b;
02037       if ( c < c_min ) c_min = c;
02038       if ( a > a_max ) a_max = a;
02039       if ( b > b_max ) b_max = b;
02040       if ( c > c_max ) c_max = c;
02041     }
02042     a_len = a_max - a_min;
02043     b_len = b_max - b_min;
02044     c_len = c_max - c_min;
02045   }
02046 
02047   int *node_split = node_begin;
02048 
02049   if ( simParams->disableTopology ) ; else
02050   if ( a_len >= b_len && a_len >= c_len ) {
02051     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02052   } else if ( b_len >= a_len && b_len >= c_len ) {
02053     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02054   } else if ( c_len >= a_len && c_len >= b_len ) {
02055     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02056   }
02057 
02058   if ( node_split == node_begin ) {  // unable to split torus
02059     // make sure physical nodes are together
02060     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02061     // find physical node boundary to split on
02062     int i_split = 0;
02063     for ( int i=0; i<nnodes; ++i ) {
02064       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02065         int mid = (nnodes+1)/2;
02066         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02067         else break;
02068       }
02069     }
02070     node_split = node_begin + i_split;
02071   }
02072 
02073   if ( node_split == node_begin ) {
02074     if ( simParams->verboseTopology ) {
02075       int crds[3];
02076       tmgr.coords(*node_begin, crds);
02077       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02078                CmiPhysicalNodeID(*node_begin), *node_begin,
02079                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02080                a_min, b_min, c_min, npatches,
02081                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02082     }
02083 
02084     // final sort along a to minimize pme message count
02085     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02086 
02087     // walk through patches in sorted order
02088     int *node = node_begin;
02089     sumLoad = 0;
02090     for ( int i=0; i < npatches; ++i ) {
02091       int pid = patches[i];
02092       assignedNode[pid] = *node;
02093       sumLoad += patchLoads[pid];
02094       double targetLoad = totalLoad *
02095         ((double)(node-node_begin+1) / (double)nnodes);
02096       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02097                 i, *node,
02098                 patchMap->index_a(pid),
02099                 patchMap->index_b(pid),
02100                 patchMap->index_c(pid),
02101                 sumLoad, targetLoad);
02102       double extra = ( i+1 < npatches ? 0.5 * patchLoads[patches[i+1]] : 0 );
02103       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02104     }
02105 
02106     return;
02107   }
02108 
02109   if ( a_len >= b_len && a_len >= c_len ) {
02110     if ( 0 ) CkPrintf("sort a\n");
02111     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02112   } else if ( b_len >= a_len && b_len >= c_len ) {
02113     if ( 0 ) CkPrintf("sort b\n");
02114     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02115   } else if ( c_len >= a_len && c_len >= b_len ) {
02116     if ( 0 ) CkPrintf("sort c\n");
02117     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02118   }
02119 
02120   int *patch_split;
02121   { // walk through patches in sorted order
02122     int *node = node_begin;
02123     sumLoad = 0;
02124     for ( patch_split = patch_begin;
02125           patch_split != patch_end && node != node_split;
02126           ++patch_split ) {
02127       sumLoad += patchLoads[*patch_split];
02128       double targetLoad = totalLoad *
02129         ((double)(node-node_begin+1) / (double)nnodes);
02130       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02131                 patch_split - patch_begin, *node,
02132                 patchMap->index_a(*patch_split),
02133                 patchMap->index_b(*patch_split),
02134                 patchMap->index_c(*patch_split),
02135                 sumLoad, targetLoad);
02136       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02137       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02138     }
02139     double targetLoad = totalLoad *
02140       ((double)(node_split-node_begin) / (double)nnodes);
02141     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02142               node_split-node_begin, nnodes,
02143               patch_split-patch_begin, npatches,
02144               sumLoad, targetLoad);
02145   }
02146 
02147   // recurse
02148   recursive_bisect_with_curve(
02149     patch_begin, patch_split, node_begin, node_split,
02150     patchLoads, sortedLoads, assignedNode, tmgr);
02151   recursive_bisect_with_curve(
02152     patch_split, patch_end, node_split, node_end,
02153     patchLoads, sortedLoads, assignedNode, tmgr);
02154 }

void topo_getargs ( char **  argv  ) 

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

00090                                {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }


Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

int eventMachineProgress [static]

Definition at line 96 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo [static]

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().


Generated on Mon Sep 24 01:17:17 2018 for NAMD by  doxygen 1.4.7