NAMD
Classes | Macros | Functions | Variables
WorkDistrib.C File Reference
#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "ConfigList.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
 
struct  pe_sortop_bit_reversed
 
struct  pe_sortop_coord_x
 
struct  pe_sortop_coord_y
 
class  PatchMapMsg
 
struct  nodesort
 
struct  TopoManagerWrapper
 
struct  TopoManagerWrapper::pe_sortop_topo
 
struct  patch_sortop_curve_a
 
struct  patch_sortop_curve_b
 
struct  patch_sortop_curve_c
 

Macros

#define MIN_DEBUG_LEVEL   2
 
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
 

Functions

static void build_ordering (void *)
 
void topo_getargs (char **argv)
 
static int compare_bit_reversed (int a, int b)
 
static bool less_than_bit_reversed (int a, int b)
 
void cuda_initialize ()
 
void mic_initialize ()
 
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
 
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
static int randtopo
 
static int eventMachineProgress
 

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.

Macro Definition Documentation

◆ MACHINE_PROGRESS

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

◆ MIN_DEBUG_LEVEL

#define MIN_DEBUG_LEVEL   2

Definition at line 62 of file WorkDistrib.C.

Function Documentation

◆ build_ordering()

static void build_ordering ( void *  )
static

Definition at line 87 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

87  {
89 }
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:181

◆ compare_bit_reversed()

static int compare_bit_reversed ( int  a,
int  b 
)
static

Definition at line 125 of file WorkDistrib.C.

Referenced by pe_sortop_bit_reversed::operator()().

125  {
126  int d = a ^ b;
127  int c = 1;
128  if ( d ) while ( ! (d & c) ) {
129  c = c << 1;
130  }
131  return (a & c) - (b & c);
132 }

◆ cuda_initialize()

void cuda_initialize ( )

Definition at line 27 of file DeviceCUDA.C.

References cuda_finalize(), deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

27  {
28  deviceCUDA = new DeviceCUDA();
30  std::atexit(cuda_finalize);
31 }
void initialize()
Definition: DeviceCUDA.C:107
void cuda_finalize()
Definition: DeviceCUDA.C:34
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ less_than_bit_reversed()

static bool less_than_bit_reversed ( int  a,
int  b 
)
static

Definition at line 134 of file WorkDistrib.C.

134  {
135  int d = a ^ b;
136  int c = 1;
137  if ( d ) while ( ! (d & c) ) {
138  c = c << 1;
139  }
140  return d && (b & c);
141 }

◆ mic_initialize()

void mic_initialize ( )

◆ recursive_bisect_coord()

static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
)
static

Definition at line 273 of file WorkDistrib.C.

Referenced by WorkDistrib::sortPmePes().

277  {
278  int x_len = x_end - x_begin;
279  int y_len = y_end - y_begin;
280  if ( x_len == 1 && y_len == 1 ) {
281  // done, now put this pe in the right place
282  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
283  coord[*pe_begin].x, coord[*pe_begin].y);
284  result[x_begin*ydim + y_begin] = *pe_begin;
285  return;
286  }
287  int *pe_end = pe_begin + x_len * y_len;
288  if ( x_len >= y_len ) {
289  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
290  int x_split = x_begin + x_len / 2;
291  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
292  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
293  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
294  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
295  } else {
296  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
297  int y_split = y_begin + y_len / 2;
298  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
299  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
300  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
301  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
302  }
303 }
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:273

◆ recursive_bisect_with_curve()

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
)
static

Definition at line 2097 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

2104  {
2105 
2107  PatchMap *patchMap = PatchMap::Object();
2108  int *patches = patch_begin;
2109  int npatches = patch_end - patch_begin;
2110  int *nodes = node_begin;
2111  int nnodes = node_end - node_begin;
2112 
2113  // assign patch loads
2114  const int emptyPatchLoad = simParams->emptyPatchLoad;
2115  double totalRawLoad = 0;
2116  for ( int i=0; i<npatches; ++i ) {
2117  int pid=patches[i];
2118 #ifdef MEM_OPT_VERSION
2119  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2120 #else
2121  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2122 #endif
2123  patchLoads[pid] = load;
2124  sortedLoads[i] = load;
2125  totalRawLoad += load;
2126  }
2127  std::sort(sortedLoads,sortedLoads+npatches);
2128 
2129  // limit maxPatchLoad to adjusted average load per node
2130  double sumLoad = 0;
2131  double maxPatchLoad = 1;
2132  for ( int i=0; i<npatches; ++i ) {
2133  double load = sortedLoads[i];
2134  double total = sumLoad + (npatches-i) * load;
2135  if ( nnodes * load > total ) break;
2136  sumLoad += load;
2137  maxPatchLoad = load;
2138  }
2139  double totalLoad = 0;
2140  for ( int i=0; i<npatches; ++i ) {
2141  int pid=patches[i];
2142  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2143  totalLoad += patchLoads[pid];
2144  }
2145  if ( nnodes * maxPatchLoad > totalLoad )
2146  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2147 
2148  int a_len, b_len, c_len;
2149  int a_min, b_min, c_min;
2150  { // find dimensions
2151  a_min = patchMap->index_a(patches[0]);
2152  b_min = patchMap->index_b(patches[0]);
2153  c_min = patchMap->index_c(patches[0]);
2154  int a_max = a_min;
2155  int b_max = b_min;
2156  int c_max = c_min;
2157  for ( int i=1; i<npatches; ++i ) {
2158  int a = patchMap->index_a(patches[i]);
2159  int b = patchMap->index_b(patches[i]);
2160  int c = patchMap->index_c(patches[i]);
2161  if ( a < a_min ) a_min = a;
2162  if ( b < b_min ) b_min = b;
2163  if ( c < c_min ) c_min = c;
2164  if ( a > a_max ) a_max = a;
2165  if ( b > b_max ) b_max = b;
2166  if ( c > c_max ) c_max = c;
2167  }
2168  a_len = a_max - a_min;
2169  b_len = b_max - b_min;
2170  c_len = c_max - c_min;
2171  }
2172 
2173  int *node_split = node_begin;
2174 
2175  if ( simParams->disableTopology ) ; else
2176  if ( a_len >= b_len && a_len >= c_len ) {
2177  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2178  } else if ( b_len >= a_len && b_len >= c_len ) {
2179  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2180  } else if ( c_len >= a_len && c_len >= b_len ) {
2181  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2182  }
2183 
2184  if ( node_split == node_begin ) { // unable to split torus
2185  // make sure physical nodes are together
2186  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2187  // find physical node boundary to split on
2188  int i_split = 0;
2189  for ( int i=0; i<nnodes; ++i ) {
2190  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2191  int mid = (nnodes+1)/2;
2192  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2193  else break;
2194  }
2195  }
2196  node_split = node_begin + i_split;
2197  }
2198 
2199  bool final_patch_sort = false;
2200 
2201  if ( node_split == node_begin ) { // all on same physical node
2202  if ( ( simParams->verboseTopology ) &&
2203  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2204  int crds[3];
2205  tmgr.coords(*node_begin, crds);
2206  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2207  CmiPhysicalNodeID(*node_begin), *node_begin,
2208  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2209  a_min, b_min, c_min, npatches,
2210  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2211  }
2212 
2213  // final sort along a to minimize pme message count
2214  final_patch_sort = true;
2215 
2216  // find node (process) boundary to split on
2217  int i_split = 0;
2218  for ( int i=0; i<nnodes; ++i ) {
2219  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2220  int mid = (nnodes+1)/2;
2221  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2222  else break;
2223  }
2224  }
2225  node_split = node_begin + i_split;
2226  }
2227 
2228  if ( node_split == node_begin ) { // all on same node (process)
2229  if ( ( simParams->verboseTopology ) &&
2230  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2231  int crds[3];
2232  tmgr.coords(*node_begin, crds);
2233  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2234  CmiNodeOf(*node_begin), *node_begin, npatches,
2235  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2236  }
2237 
2238  // no natural divisions so just split at midpoint
2239  node_split = node_begin + nnodes/2;
2240  }
2241 
2242  if ( nnodes == 1 ) { // down to a single pe
2243  // assign all patches
2244  int *node = node_begin;
2245  sumLoad = 0;
2246  for ( int i=0; i < npatches; ++i ) {
2247  int pid = patches[i];
2248  assignedNode[pid] = *node;
2249  sumLoad += patchLoads[pid];
2250  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2251  i, *node,
2252  patchMap->index_a(pid),
2253  patchMap->index_b(pid),
2254  patchMap->index_c(pid),
2255  patchLoads[pid], sumLoad);
2256  }
2257 
2258  return;
2259  }
2260 
2261  if ( final_patch_sort ) {
2262  // final sort along a to minimize pme message count
2263  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2264  } else if ( a_len >= b_len && a_len >= c_len ) {
2265  if ( 0 ) CkPrintf("sort a\n");
2266  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2267  } else if ( b_len >= a_len && b_len >= c_len ) {
2268  if ( 0 ) CkPrintf("sort b\n");
2269  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2270  } else if ( c_len >= a_len && c_len >= b_len ) {
2271  if ( 0 ) CkPrintf("sort c\n");
2272  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2273  }
2274 
2275  int *patch_split;
2276  { // walk through patches in sorted order
2277  int *node = node_begin;
2278  sumLoad = 0;
2279  for ( patch_split = patch_begin;
2280  patch_split != patch_end && node != node_split;
2281  ++patch_split ) {
2282  sumLoad += patchLoads[*patch_split];
2283  double targetLoad = totalLoad *
2284  ((double)(node-node_begin+1) / (double)nnodes);
2285  if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2286  patch_split - patch_begin, *node,
2287  patchMap->index_a(*patch_split),
2288  patchMap->index_b(*patch_split),
2289  patchMap->index_c(*patch_split),
2290  sumLoad, targetLoad);
2291  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2292  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2293  }
2294  double targetLoad = totalLoad *
2295  ((double)(node_split-node_begin) / (double)nnodes);
2296  if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2297  node_split-node_begin, nnodes,
2298  patch_split-patch_begin, npatches,
2299  sumLoad, targetLoad);
2300  }
2301 
2302  // recurse
2304  patch_begin, patch_split, node_begin, node_split,
2305  patchLoads, sortedLoads, assignedNode, tmgr);
2307  patch_split, patch_end, node_split, node_end,
2308  patchLoads, sortedLoads, assignedNode, tmgr);
2309 }
static Node * Object()
Definition: Node.h:86
int getNumAtoms() const
Definition: Patch.h:105
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:2097
static PatchMap * Object()
Definition: PatchMap.h:27
SimParameters * simParameters
Definition: Node.h:181
int index_a(int pid) const
Definition: PatchMap.h:86
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int index_b(int pid) const
Definition: PatchMap.h:87
int index_c(int pid) const
Definition: PatchMap.h:88
#define simParams
Definition: Output.C:129
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1946
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1981

◆ topo_getargs()

void topo_getargs ( char **  argv)

Definition at line 91 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

91  {
92  randtopo = CmiGetArgFlag(argv, "+randtopo");
93  if ( CkMyPe() >= CkNumPes() ) return;
94 #if CCD_COND_FN_EXISTS
95  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
96 #else
97  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
98 #endif
99 }
static int randtopo
Definition: WorkDistrib.C:85
static void build_ordering(void *)
Definition: WorkDistrib.C:87

Variable Documentation

◆ deviceCUDA

__thread DeviceCUDA* deviceCUDA

Definition at line 23 of file DeviceCUDA.C.

Referenced by cuda_initialize().

◆ eventMachineProgress

int eventMachineProgress
static

Definition at line 101 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

◆ randtopo

int randtopo
static

Definition at line 85 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().