NAMD
Classes | Macros | Functions | Variables
WorkDistrib.C File Reference
#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
 
struct  pe_sortop_bit_reversed
 
struct  pe_sortop_coord_x
 
struct  pe_sortop_coord_y
 
class  PatchMapMsg
 
struct  nodesort
 
struct  TopoManagerWrapper
 
struct  TopoManagerWrapper::pe_sortop_topo
 
struct  patch_sortop_curve_a
 
struct  patch_sortop_curve_b
 
struct  patch_sortop_curve_c
 

Macros

#define MIN_DEBUG_LEVEL   2
 
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
 

Functions

static void build_ordering (void *)
 
void topo_getargs (char **argv)
 
static int compare_bit_reversed (int a, int b)
 
static bool less_than_bit_reversed (int a, int b)
 
void cuda_initialize ()
 
void mic_initialize ()
 
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
 
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
static int randtopo
 
static int eventMachineProgress
 

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.

Macro Definition Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.

Function Documentation

static void build_ordering ( void )
static

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

86  {
88 }
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:180
static int compare_bit_reversed ( int  a,
int  b 
)
static

Definition at line 124 of file WorkDistrib.C.

124  {
125  int d = a ^ b;
126  int c = 1;
127  if ( d ) while ( ! (d & c) ) {
128  c = c << 1;
129  }
130  return (a & c) - (b & c);
131 }
void cuda_initialize ( )

Definition at line 24 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

24  {
25  deviceCUDA = new DeviceCUDA();
27 }
void initialize()
Definition: DeviceCUDA.C:89
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
static bool less_than_bit_reversed ( int  a,
int  b 
)
static

Definition at line 133 of file WorkDistrib.C.

133  {
134  int d = a ^ b;
135  int c = 1;
136  if ( d ) while ( ! (d & c) ) {
137  c = c << 1;
138  }
139  return d && (b & c);
140 }
void mic_initialize ( )
static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
)
static

Definition at line 272 of file WorkDistrib.C.

References sort, x, and y.

Referenced by WorkDistrib::sortPmePes().

276  {
277  int x_len = x_end - x_begin;
278  int y_len = y_end - y_begin;
279  if ( x_len == 1 && y_len == 1 ) {
280  // done, now put this pe in the right place
281  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
282  coord[*pe_begin].x, coord[*pe_begin].y);
283  result[x_begin*ydim + y_begin] = *pe_begin;
284  return;
285  }
286  int *pe_end = pe_begin + x_len * y_len;
287  if ( x_len >= y_len ) {
288  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
289  int x_split = x_begin + x_len / 2;
290  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
291  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
292  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
293  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
294  } else {
295  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
296  int y_split = y_begin + y_len / 2;
297  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
298  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
299  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
300  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
301  }
302 }
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:272
BlockRadixSort::TempStorage sort
gridSize y
gridSize x
static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
)
static

Definition at line 1976 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), SimParameters::disableTopology, SimParameters::emptyPatchLoad, Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), load, NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, sort, TopoManagerWrapper::sortAndSplit(), and SimParameters::verboseTopology.

1983  {
1984 
1986  PatchMap *patchMap = PatchMap::Object();
1987  int *patches = patch_begin;
1988  int npatches = patch_end - patch_begin;
1989  int *nodes = node_begin;
1990  int nnodes = node_end - node_begin;
1991 
1992  // assign patch loads
1993  const int emptyPatchLoad = simParams->emptyPatchLoad;
1994  double totalRawLoad = 0;
1995  for ( int i=0; i<npatches; ++i ) {
1996  int pid=patches[i];
1997 #ifdef MEM_OPT_VERSION
1998  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
1999 #else
2000  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2001 #endif
2002  patchLoads[pid] = load;
2003  sortedLoads[i] = load;
2004  totalRawLoad += load;
2005  }
2006  std::sort(sortedLoads,sortedLoads+npatches);
2007 
2008  // limit maxPatchLoad to adjusted average load per node
2009  double sumLoad = 0;
2010  double maxPatchLoad = 1;
2011  for ( int i=0; i<npatches; ++i ) {
2012  double load = sortedLoads[i];
2013  double total = sumLoad + (npatches-i) * load;
2014  if ( nnodes * load > total ) break;
2015  sumLoad += load;
2016  maxPatchLoad = load;
2017  }
2018  double totalLoad = 0;
2019  for ( int i=0; i<npatches; ++i ) {
2020  int pid=patches[i];
2021  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2022  totalLoad += patchLoads[pid];
2023  }
2024  if ( nnodes * maxPatchLoad > totalLoad )
2025  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2026 
2027  int a_len, b_len, c_len;
2028  int a_min, b_min, c_min;
2029  { // find dimensions
2030  a_min = patchMap->index_a(patches[0]);
2031  b_min = patchMap->index_b(patches[0]);
2032  c_min = patchMap->index_c(patches[0]);
2033  int a_max = a_min;
2034  int b_max = b_min;
2035  int c_max = c_min;
2036  for ( int i=1; i<npatches; ++i ) {
2037  int a = patchMap->index_a(patches[i]);
2038  int b = patchMap->index_b(patches[i]);
2039  int c = patchMap->index_c(patches[i]);
2040  if ( a < a_min ) a_min = a;
2041  if ( b < b_min ) b_min = b;
2042  if ( c < c_min ) c_min = c;
2043  if ( a > a_max ) a_max = a;
2044  if ( b > b_max ) b_max = b;
2045  if ( c > c_max ) c_max = c;
2046  }
2047  a_len = a_max - a_min;
2048  b_len = b_max - b_min;
2049  c_len = c_max - c_min;
2050  }
2051 
2052  int *node_split = node_begin;
2053 
2054  if ( simParams->disableTopology ) ; else
2055  if ( a_len >= b_len && a_len >= c_len ) {
2056  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2057  } else if ( b_len >= a_len && b_len >= c_len ) {
2058  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2059  } else if ( c_len >= a_len && c_len >= b_len ) {
2060  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2061  }
2062 
2063  if ( node_split == node_begin ) { // unable to split torus
2064  // make sure physical nodes are together
2065  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2066  // find physical node boundary to split on
2067  int i_split = 0;
2068  for ( int i=0; i<nnodes; ++i ) {
2069  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2070  int mid = (nnodes+1)/2;
2071  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2072  else break;
2073  }
2074  }
2075  node_split = node_begin + i_split;
2076  }
2077 
2078  bool final_patch_sort = false;
2079 
2080  if ( node_split == node_begin ) { // all on same physical node
2081  if ( ( simParams->verboseTopology ) &&
2082  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2083  int crds[3];
2084  tmgr.coords(*node_begin, crds);
2085  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2086  CmiPhysicalNodeID(*node_begin), *node_begin,
2087  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2088  a_min, b_min, c_min, npatches,
2089  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2090  }
2091 
2092  // final sort along a to minimize pme message count
2093  final_patch_sort = true;
2094 
2095  // find node (process) boundary to split on
2096  int i_split = 0;
2097  for ( int i=0; i<nnodes; ++i ) {
2098  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2099  int mid = (nnodes+1)/2;
2100  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2101  else break;
2102  }
2103  }
2104  node_split = node_begin + i_split;
2105  }
2106 
2107  if ( node_split == node_begin ) { // all on same node (process)
2108  if ( ( simParams->verboseTopology ) &&
2109  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2110  int crds[3];
2111  tmgr.coords(*node_begin, crds);
2112  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2113  CmiNodeOf(*node_begin), *node_begin, npatches,
2114  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2115  }
2116 
2117  // no natural divisions so just split at midpoint
2118  node_split = node_begin + nnodes/2;
2119  }
2120 
2121  if ( nnodes == 1 ) { // down to a single pe
2122  // assign all patches
2123  int *node = node_begin;
2124  sumLoad = 0;
2125  for ( int i=0; i < npatches; ++i ) {
2126  int pid = patches[i];
2127  assignedNode[pid] = *node;
2128  sumLoad += patchLoads[pid];
2129  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2130  i, *node,
2131  patchMap->index_a(pid),
2132  patchMap->index_b(pid),
2133  patchMap->index_c(pid),
2134  patchLoads[pid], sumLoad);
2135  }
2136 
2137  return;
2138  }
2139 
2140  if ( final_patch_sort ) {
2141  // final sort along a to minimize pme message count
2142  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2143  } else if ( a_len >= b_len && a_len >= c_len ) {
2144  if ( 0 ) CkPrintf("sort a\n");
2145  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2146  } else if ( b_len >= a_len && b_len >= c_len ) {
2147  if ( 0 ) CkPrintf("sort b\n");
2148  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2149  } else if ( c_len >= a_len && c_len >= b_len ) {
2150  if ( 0 ) CkPrintf("sort c\n");
2151  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2152  }
2153 
2154  int *patch_split;
2155  { // walk through patches in sorted order
2156  int *node = node_begin;
2157  sumLoad = 0;
2158  for ( patch_split = patch_begin;
2159  patch_split != patch_end && node != node_split;
2160  ++patch_split ) {
2161  sumLoad += patchLoads[*patch_split];
2162  double targetLoad = totalLoad *
2163  ((double)(node-node_begin+1) / (double)nnodes);
2164  if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
2165  patch_split - patch_begin, *node,
2166  patchMap->index_a(*patch_split),
2167  patchMap->index_b(*patch_split),
2168  patchMap->index_c(*patch_split),
2169  sumLoad, targetLoad);
2170  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2171  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2172  }
2173  double targetLoad = totalLoad *
2174  ((double)(node_split-node_begin) / (double)nnodes);
2175  if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
2176  node_split-node_begin, nnodes,
2177  patch_split-patch_begin, npatches,
2178  sumLoad, targetLoad);
2179  }
2180 
2181  // recurse
2183  patch_begin, patch_split, node_begin, node_split,
2184  patchLoads, sortedLoads, assignedNode, tmgr);
2186  patch_split, patch_end, node_split, node_end,
2187  patchLoads, sortedLoads, assignedNode, tmgr);
2188 }
static Node * Object()
Definition: Node.h:86
BlockLoad::TempStorage load
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:1976
static PatchMap * Object()
Definition: PatchMap.h:27
SimParameters * simParameters
Definition: Node.h:178
int index_a(int pid) const
Definition: PatchMap.h:86
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int index_b(int pid) const
Definition: PatchMap.h:87
int index_c(int pid) const
Definition: PatchMap.h:88
BlockRadixSort::TempStorage sort
#define simParams
Definition: Output.C:127
int getNumAtoms()
Definition: Patch.h:105
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1825
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1860
void topo_getargs ( char **  argv)

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

90  {
91  randtopo = CmiGetArgFlag(argv, "+randtopo");
92  if ( CkMyPe() >= CkNumPes() ) return;
93 #if CCD_COND_FN_EXISTS
94  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
95 #else
96  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
97 #endif
98 }
static int randtopo
Definition: WorkDistrib.C:84
static void build_ordering(void *)
Definition: WorkDistrib.C:86

Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 22 of file DeviceCUDA.C.

int eventMachineProgress
static

Definition at line 100 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo
static

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().