#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "ConfigList.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Classes
class	ComputeMapChangeMsg

struct	pe_sortop_bit_reversed

struct	pe_sortop_coord_x

struct	pe_sortop_coord_y

class	PatchMapMsg

struct	nodesort

struct	TopoManagerWrapper

struct	TopoManagerWrapper::pe_sortop_topo

struct	patch_sortop_curve_a

struct	patch_sortop_curve_b

struct	patch_sortop_curve_c

Macros
#define	MIN_DEBUG_LEVEL 2

#define	MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Functions
static void	build_ordering (void *)

void	topo_getargs (char **argv)

static int	compare_bit_reversed (int a, int b)

static bool	less_than_bit_reversed (int a, int b)

void	cuda_initialize ()

void	mic_initialize ()

static void	recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int pe_begin, ScaledPosition coord, int *result, int ydim)

static void	recursive_bisect_with_curve (int patch_begin, int patch_end, int node_begin, int node_end, double patchLoads, double sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)

Variables
__thread DeviceCUDA *	deviceCUDA

static int	randtopo

static int	eventMachineProgress

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.

Macro Definition Documentation

◆ MACHINE_PROGRESS

#define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

◆ MIN_DEBUG_LEVEL

#define MIN_DEBUG_LEVEL 2

Definition at line 62 of file WorkDistrib.C.

Function Documentation

◆ build_ordering()

static void build_ordering ( void * )

static

Definition at line 87 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

                                    {
   WorkDistrib::buildNodeAwarePeOrdering();
 }

◆ compare_bit_reversed()

static int compare_bit_reversed	(	int	a,
		int	b
	)

static

Definition at line 125 of file WorkDistrib.C.

Referenced by pe_sortop_bit_reversed::operator()().

                                               {
   int d = a ^ b;
   int c = 1;
   if ( d ) while ( ! (d & c) ) {
     c = c << 1;
   }
   return (a & c) - (b & c);
 }

◆ cuda_initialize()

void cuda_initialize ( )

Definition at line 27 of file DeviceCUDA.C.

References cuda_finalize(), deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

                        {
     deviceCUDA = new DeviceCUDA();
     deviceCUDA->initialize();
     std::atexit(cuda_finalize);
 }

◆ less_than_bit_reversed()

static bool less_than_bit_reversed	(	int	a,
		int	b
	)

static

Definition at line 134 of file WorkDistrib.C.

                                                  {
   int d = a ^ b;
   int c = 1;
   if ( d ) while ( ! (d & c) ) {
     c = c << 1;
   }
   return d && (b & c);
 }

◆ mic_initialize()

void mic_initialize ( )

Referenced by WorkDistrib::peOrderingReady().

◆ recursive_bisect_coord()

static void recursive_bisect_coord	(	int	x_begin,
		int	x_end,
		int	y_begin,
		int	y_end,
		int *	pe_begin,
		ScaledPosition *	coord,
		int *	result,
		int	ydim
	)

static

Definition at line 273 of file WorkDistrib.C.

Referenced by WorkDistrib::sortPmePes().

     {
   int x_len = x_end - x_begin;
   int y_len = y_end - y_begin;
   if ( x_len == 1 && y_len == 1 ) {
     // done, now put this pe in the right place
     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
       coord[*pe_begin].x, coord[*pe_begin].y);
     result[x_begin*ydim + y_begin] = *pe_begin;
     return;
   }
   int *pe_end = pe_begin + x_len * y_len;
   if ( x_len >= y_len ) {
     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
     int x_split = x_begin + x_len / 2;
     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
   } else {
     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
     int y_split = y_begin + y_len / 2;
     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
   }
 }

◆ recursive_bisect_with_curve()

static void recursive_bisect_with_curve	(	int *	patch_begin,
		int *	patch_end,
		int *	node_begin,
		int *	node_end,
		double *	patchLoads,
		double *	sortedLoads,
		int *	assignedNode,
		TopoManagerWrapper &	tmgr
	)

static

Definition at line 2097 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, simParams, and TopoManagerWrapper::sortAndSplit().

     {
 
   SimParameters *simParams = Node::Object()->simParameters;
   PatchMap *patchMap = PatchMap::Object();
   int *patches = patch_begin;
   int npatches = patch_end - patch_begin;
   int *nodes = node_begin;
   int nnodes = node_end - node_begin;
 
   // assign patch loads
   const int emptyPatchLoad = simParams->emptyPatchLoad;
   double totalRawLoad = 0;
   for ( int i=0; i<npatches; ++i ) {
     int pid=patches[i];
 #ifdef MEM_OPT_VERSION
     double load = patchMap->numAtoms(pid) + emptyPatchLoad;
 #else
     double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
 #endif
     patchLoads[pid] = load;
     sortedLoads[i] = load;
     totalRawLoad += load;
   }
   std::sort(sortedLoads,sortedLoads+npatches);
 
   // limit maxPatchLoad to adjusted average load per node
   double sumLoad = 0;
   double maxPatchLoad = 1;
   for ( int i=0; i<npatches; ++i ) {
     double load = sortedLoads[i];
     double total = sumLoad + (npatches-i) * load;
     if ( nnodes * load > total ) break;
     sumLoad += load;
     maxPatchLoad = load;
   }
   double totalLoad = 0;
   for ( int i=0; i<npatches; ++i ) {
     int pid=patches[i];
     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
     totalLoad += patchLoads[pid];
   }
   if ( nnodes * maxPatchLoad > totalLoad )
     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
 
   int a_len, b_len, c_len;
   int a_min, b_min, c_min;
   { // find dimensions
     a_min = patchMap->index_a(patches[0]);
     b_min = patchMap->index_b(patches[0]);
     c_min = patchMap->index_c(patches[0]);
     int a_max = a_min;
     int b_max = b_min;
     int c_max = c_min;
     for ( int i=1; i<npatches; ++i ) {
       int a = patchMap->index_a(patches[i]);
       int b = patchMap->index_b(patches[i]);
       int c = patchMap->index_c(patches[i]);
       if ( a < a_min ) a_min = a;
       if ( b < b_min ) b_min = b;
       if ( c < c_min ) c_min = c;
       if ( a > a_max ) a_max = a;
       if ( b > b_max ) b_max = b;
       if ( c > c_max ) c_max = c;
     }
     a_len = a_max - a_min;
     b_len = b_max - b_min;
     c_len = c_max - c_min;
   }
 
   int *node_split = node_begin;
 
   if ( simParams->disableTopology ) ; else
   if ( a_len >= b_len && a_len >= c_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
   } else if ( b_len >= a_len && b_len >= c_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
   } else if ( c_len >= a_len && c_len >= b_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
   }
 
   if ( node_split == node_begin ) {  // unable to split torus
     // make sure physical nodes are together
     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
     // find physical node boundary to split on
     int i_split = 0;
     for ( int i=0; i<nnodes; ++i ) {
       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
         else break;
       }
     }
     node_split = node_begin + i_split;
   }
 
   bool final_patch_sort = false;
 
   if ( node_split == node_begin ) {  // all on same physical node
     if ( ( simParams->verboseTopology ) &&
         nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
       int crds[3];
       tmgr.coords(*node_begin, crds);
       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
                CmiPhysicalNodeID(*node_begin), *node_begin,
                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
                a_min, b_min, c_min, npatches,
                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
     }
 
     // final sort along a to minimize pme message count
     final_patch_sort = true;
 
     // find node (process) boundary to split on
     int i_split = 0;
     for ( int i=0; i<nnodes; ++i ) {
       if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
         else break;
       }
     }
     node_split = node_begin + i_split;
   }
 
   if ( node_split == node_begin ) {  // all on same node (process)
     if ( ( simParams->verboseTopology ) &&
         nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
       int crds[3];
       tmgr.coords(*node_begin, crds);
       CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
                CmiNodeOf(*node_begin), *node_begin, npatches,
                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
     }
 
     // no natural divisions so just split at midpoint
     node_split = node_begin + nnodes/2;
   }
 
   if ( nnodes == 1 ) {  // down to a single pe
     // assign all patches
     int *node = node_begin;
     sumLoad = 0;
     for ( int i=0; i < npatches; ++i ) {
       int pid = patches[i];
       assignedNode[pid] = *node;
       sumLoad += patchLoads[pid];
       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
                 i, *node,
                 patchMap->index_a(pid),
                 patchMap->index_b(pid),
                 patchMap->index_c(pid),
                 patchLoads[pid], sumLoad);
     }
 
     return;
   }
 
   if ( final_patch_sort ) {
     // final sort along a to minimize pme message count
     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
   } else if ( a_len >= b_len && a_len >= c_len ) {
     if ( 0 ) CkPrintf("sort a\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
   } else if ( b_len >= a_len && b_len >= c_len ) {
     if ( 0 ) CkPrintf("sort b\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
   } else if ( c_len >= a_len && c_len >= b_len ) {
     if ( 0 ) CkPrintf("sort c\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
   }
 
   int *patch_split;
   { // walk through patches in sorted order
     int *node = node_begin;
     sumLoad = 0;
     for ( patch_split = patch_begin;
           patch_split != patch_end && node != node_split;
           ++patch_split ) {
       sumLoad += patchLoads[*patch_split];
       double targetLoad = totalLoad *
         ((double)(node-node_begin+1) / (double)nnodes);
       if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
                 patch_split - patch_begin, *node,
                 patchMap->index_a(*patch_split),
                 patchMap->index_b(*patch_split),
                 patchMap->index_c(*patch_split),
                 sumLoad, targetLoad);
       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
     }
     double targetLoad = totalLoad *
       ((double)(node_split-node_begin) / (double)nnodes);
     if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
               node_split-node_begin, nnodes,
               patch_split-patch_begin, npatches,
               sumLoad, targetLoad);
   }
 
   // recurse
   recursive_bisect_with_curve(
     patch_begin, patch_split, node_begin, node_split,
     patchLoads, sortedLoads, assignedNode, tmgr);
   recursive_bisect_with_curve(
     patch_split, patch_end, node_split, node_end,
     patchLoads, sortedLoads, assignedNode, tmgr);
 }

◆ topo_getargs()

void topo_getargs ( char ** argv )

Definition at line 91 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

                                {
   randtopo = CmiGetArgFlag(argv, "+randtopo");
   if ( CkMyPe() >= CkNumPes() ) return;
 #if CCD_COND_FN_EXISTS
   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
 #else
   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
 #endif
 }

Variable Documentation

◆ deviceCUDA

__thread DeviceCUDA* deviceCUDA

Definition at line 23 of file DeviceCUDA.C.

Referenced by cuda_initialize().

◆ eventMachineProgress

int eventMachineProgress

static

Definition at line 101 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

◆ randtopo

int randtopo

static

Definition at line 85 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().

Classes

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ MACHINE_PROGRESS

◆ MIN_DEBUG_LEVEL

Function Documentation

◆ build_ordering()

◆ compare_bit_reversed()

◆ cuda_initialize()

◆ less_than_bit_reversed()

◆ mic_initialize()

◆ recursive_bisect_coord()

◆ recursive_bisect_with_curve()

◆ topo_getargs()

Variable Documentation

◆ deviceCUDA

◆ eventMachineProgress

◆ randtopo