NAMD
Classes | Macros | Functions | Variables
WorkDistrib.C File Reference
#include <stdio.h>
#include "ComputeOneFourNbTholes.h"
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Parameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "ConfigList.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
 
struct  pe_sortop_bit_reversed
 
struct  pe_sortop_coord_x
 
struct  pe_sortop_coord_y
 
class  PatchMapMsg
 
struct  nodesort
 
struct  TopoManagerWrapper
 
struct  TopoManagerWrapper::pe_sortop_topo
 
struct  patch_sortop_curve_a
 
struct  patch_sortop_curve_b
 
struct  patch_sortop_curve_c
 

Macros

#define MIN_DEBUG_LEVEL   2
 
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
 

Functions

static void build_ordering (void *)
 
void topo_getargs (char **argv)
 
static int compare_bit_reversed (int a, int b)
 
static bool less_than_bit_reversed (int a, int b)
 
void cuda_initialize ()
 
void mic_initialize ()
 
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
 
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
static int randtopo
 
static int eventMachineProgress
 

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.

Macro Definition Documentation

◆ MACHINE_PROGRESS

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

◆ MIN_DEBUG_LEVEL

#define MIN_DEBUG_LEVEL   2

Definition at line 64 of file WorkDistrib.C.

Function Documentation

◆ build_ordering()

static void build_ordering ( void *  )
static

Definition at line 89 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

89  {
91 }
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:183

◆ compare_bit_reversed()

static int compare_bit_reversed ( int  a,
int  b 
)
static

Definition at line 127 of file WorkDistrib.C.

Referenced by pe_sortop_bit_reversed::operator()().

127  {
128  int d = a ^ b;
129  int c = 1;
130  if ( d ) while ( ! (d & c) ) {
131  c = c << 1;
132  }
133  return (a & c) - (b & c);
134 }

◆ cuda_initialize()

void cuda_initialize ( )

Definition at line 27 of file DeviceCUDA.C.

References cuda_finalize(), deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

27  {
28  deviceCUDA = new DeviceCUDA();
30  if (CkMyRank() == 0) {
31  std::atexit(cuda_finalize);
32  }
33 }
void initialize()
Definition: DeviceCUDA.C:109
void cuda_finalize()
Definition: DeviceCUDA.C:36
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ less_than_bit_reversed()

static bool less_than_bit_reversed ( int  a,
int  b 
)
static

Definition at line 136 of file WorkDistrib.C.

136  {
137  int d = a ^ b;
138  int c = 1;
139  if ( d ) while ( ! (d & c) ) {
140  c = c << 1;
141  }
142  return d && (b & c);
143 }

◆ mic_initialize()

void mic_initialize ( )

◆ recursive_bisect_coord()

static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
)
static

Definition at line 275 of file WorkDistrib.C.

Referenced by WorkDistrib::sortPmePes().

279  {
280  int x_len = x_end - x_begin;
281  int y_len = y_end - y_begin;
282  if ( x_len == 1 && y_len == 1 ) {
283  // done, now put this pe in the right place
284  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
285  coord[*pe_begin].x, coord[*pe_begin].y);
286  result[x_begin*ydim + y_begin] = *pe_begin;
287  return;
288  }
289  int *pe_end = pe_begin + x_len * y_len;
290  if ( x_len >= y_len ) {
291  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
292  int x_split = x_begin + x_len / 2;
293  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
294  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
295  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
296  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
297  } else {
298  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
299  int y_split = y_begin + y_len / 2;
300  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
301  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
302  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
303  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
304  }
305 }
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:275

◆ recursive_bisect_with_curve()

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
)
static

Definition at line 2109 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

2116  {
2117 
2119  PatchMap *patchMap = PatchMap::Object();
2120  int *patches = patch_begin;
2121  int npatches = patch_end - patch_begin;
2122  int *nodes = node_begin;
2123  int nnodes = node_end - node_begin;
2124 
2125  // assign patch loads
2126  const int emptyPatchLoad = simParams->emptyPatchLoad;
2127  double totalRawLoad = 0;
2128  for ( int i=0; i<npatches; ++i ) {
2129  int pid=patches[i];
2130 #ifdef MEM_OPT_VERSION
2131  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2132 #else
2133  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2134 #endif
2135  patchLoads[pid] = load;
2136  sortedLoads[i] = load;
2137  totalRawLoad += load;
2138  }
2139  std::sort(sortedLoads,sortedLoads+npatches);
2140 
2141  // limit maxPatchLoad to adjusted average load per node
2142  double sumLoad = 0;
2143  double maxPatchLoad = 1;
2144  for ( int i=0; i<npatches; ++i ) {
2145  double load = sortedLoads[i];
2146  double total = sumLoad + (npatches-i) * load;
2147  if ( nnodes * load > total ) break;
2148  sumLoad += load;
2149  maxPatchLoad = load;
2150  }
2151  double totalLoad = 0;
2152  for ( int i=0; i<npatches; ++i ) {
2153  int pid=patches[i];
2154  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2155  totalLoad += patchLoads[pid];
2156  }
2157  if ( nnodes * maxPatchLoad > totalLoad )
2158  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2159 
2160  int a_len, b_len, c_len;
2161  int a_min, b_min, c_min;
2162  { // find dimensions
2163  a_min = patchMap->index_a(patches[0]);
2164  b_min = patchMap->index_b(patches[0]);
2165  c_min = patchMap->index_c(patches[0]);
2166  int a_max = a_min;
2167  int b_max = b_min;
2168  int c_max = c_min;
2169  for ( int i=1; i<npatches; ++i ) {
2170  int a = patchMap->index_a(patches[i]);
2171  int b = patchMap->index_b(patches[i]);
2172  int c = patchMap->index_c(patches[i]);
2173  if ( a < a_min ) a_min = a;
2174  if ( b < b_min ) b_min = b;
2175  if ( c < c_min ) c_min = c;
2176  if ( a > a_max ) a_max = a;
2177  if ( b > b_max ) b_max = b;
2178  if ( c > c_max ) c_max = c;
2179  }
2180  a_len = a_max - a_min;
2181  b_len = b_max - b_min;
2182  c_len = c_max - c_min;
2183  }
2184 
2185  int *node_split = node_begin;
2186 
2187  if ( simParams->disableTopology ) ; else
2188  if ( a_len >= b_len && a_len >= c_len ) {
2189  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2190  } else if ( b_len >= a_len && b_len >= c_len ) {
2191  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2192  } else if ( c_len >= a_len && c_len >= b_len ) {
2193  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2194  }
2195 
2196  if ( node_split == node_begin ) { // unable to split torus
2197  // make sure physical nodes are together
2198  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2199  // find physical node boundary to split on
2200  int i_split = 0;
2201  for ( int i=0; i<nnodes; ++i ) {
2202  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2203  int mid = (nnodes+1)/2;
2204  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2205  else break;
2206  }
2207  }
2208  node_split = node_begin + i_split;
2209  }
2210 
2211  bool final_patch_sort = false;
2212 
2213  if ( node_split == node_begin ) { // all on same physical node
2214  if ( ( simParams->verboseTopology ) &&
2215  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2216  int crds[3];
2217  tmgr.coords(*node_begin, crds);
2218  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2219  CmiPhysicalNodeID(*node_begin), *node_begin,
2220  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2221  a_min, b_min, c_min, npatches,
2222  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2223  }
2224 
2225  // final sort along a to minimize pme message count
2226  final_patch_sort = true;
2227 
2228  // find node (process) boundary to split on
2229  int i_split = 0;
2230  for ( int i=0; i<nnodes; ++i ) {
2231  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2232  int mid = (nnodes+1)/2;
2233  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2234  else break;
2235  }
2236  }
2237  node_split = node_begin + i_split;
2238  }
2239 
2240  if ( node_split == node_begin ) { // all on same node (process)
2241  if ( ( simParams->verboseTopology ) &&
2242  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2243  int crds[3];
2244  tmgr.coords(*node_begin, crds);
2245  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2246  CmiNodeOf(*node_begin), *node_begin, npatches,
2247  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2248  }
2249 
2250  // no natural divisions so just split at midpoint
2251  node_split = node_begin + nnodes/2;
2252  }
2253 
2254  if ( nnodes == 1 ) { // down to a single pe
2255  // assign all patches
2256  int *node = node_begin;
2257  sumLoad = 0;
2258  for ( int i=0; i < npatches; ++i ) {
2259  int pid = patches[i];
2260  assignedNode[pid] = *node;
2261  sumLoad += patchLoads[pid];
2262  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2263  i, *node,
2264  patchMap->index_a(pid),
2265  patchMap->index_b(pid),
2266  patchMap->index_c(pid),
2267  patchLoads[pid], sumLoad);
2268  }
2269 
2270  return;
2271  }
2272 
2273  if ( final_patch_sort ) {
2274  // final sort along a to minimize pme message count
2275  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2276  } else if ( a_len >= b_len && a_len >= c_len ) {
2277  if ( 0 ) CkPrintf("sort a\n");
2278  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2279  } else if ( b_len >= a_len && b_len >= c_len ) {
2280  if ( 0 ) CkPrintf("sort b\n");
2281  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2282  } else if ( c_len >= a_len && c_len >= b_len ) {
2283  if ( 0 ) CkPrintf("sort c\n");
2284  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2285  }
2286 
2287  int *patch_split;
2288  { // walk through patches in sorted order
2289  int *node = node_begin;
2290  sumLoad = 0;
2291  for ( patch_split = patch_begin;
2292  patch_split != patch_end && node != node_split;
2293  ++patch_split ) {
2294  sumLoad += patchLoads[*patch_split];
2295  double targetLoad = totalLoad *
2296  ((double)(node-node_begin+1) / (double)nnodes);
2297  if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2298  patch_split - patch_begin, *node,
2299  patchMap->index_a(*patch_split),
2300  patchMap->index_b(*patch_split),
2301  patchMap->index_c(*patch_split),
2302  sumLoad, targetLoad);
2303  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2304  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2305  }
2306  double targetLoad = totalLoad *
2307  ((double)(node_split-node_begin) / (double)nnodes);
2308  if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2309  node_split-node_begin, nnodes,
2310  patch_split-patch_begin, npatches,
2311  sumLoad, targetLoad);
2312  }
2313 
2314  // recurse
2316  patch_begin, patch_split, node_begin, node_split,
2317  patchLoads, sortedLoads, assignedNode, tmgr);
2319  patch_split, patch_end, node_split, node_end,
2320  patchLoads, sortedLoads, assignedNode, tmgr);
2321 }
static Node * Object()
Definition: Node.h:86
int getNumAtoms() const
Definition: Patch.h:105
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:2109
static PatchMap * Object()
Definition: PatchMap.h:27
SimParameters * simParameters
Definition: Node.h:181
int index_a(int pid) const
Definition: PatchMap.h:86
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int index_b(int pid) const
Definition: PatchMap.h:87
int index_c(int pid) const
Definition: PatchMap.h:88
#define simParams
Definition: Output.C:131
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1958
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1993

◆ topo_getargs()

void topo_getargs ( char **  argv)

Definition at line 93 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

93  {
94  randtopo = CmiGetArgFlag(argv, "+randtopo");
95  if ( CkMyPe() >= CkNumPes() ) return;
96 #if CCD_COND_FN_EXISTS
97  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
98 #else
99  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
100 #endif
101 }
static int randtopo
Definition: WorkDistrib.C:87
static void build_ordering(void *)
Definition: WorkDistrib.C:89

Variable Documentation

◆ deviceCUDA

__thread DeviceCUDA* deviceCUDA

Definition at line 23 of file DeviceCUDA.C.

Referenced by cuda_initialize().

◆ eventMachineProgress

int eventMachineProgress
static

Definition at line 103 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

◆ randtopo

int randtopo
static

Definition at line 87 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().