NAMD
Public Member Functions | List of all members
NamdCentLB Class Reference

#include <NamdCentLB.h>

Inheritance diagram for NamdCentLB:

Public Member Functions

 NamdCentLB (const CkLBOptions &opt)
 
 NamdCentLB (CkMigrateMessage *)
 
CLBMigrateMsg * Strategy (LDStats *stats)
 

Detailed Description

Definition at line 52 of file NamdCentLB.h.

Constructor & Destructor Documentation

NamdCentLB::NamdCentLB ( const CkLBOptions &  opt)

Definition at line 51 of file NamdCentLB.C.

51  : CentralLB(opt)
52 {
53  // if (CkMyPe()==0)
54  // CkPrintf("[%d] NamdCentLB created\n",CkMyPe());
55  processorArray = 0;
56  patchArray = 0;
57  computeArray = 0;
58 }
NamdCentLB::NamdCentLB ( CkMigrateMessage *  msg)

Migratable Object Constructor.

Definition at line 45 of file NamdCentLB.C.

45  : CentralLB(msg) {
46  processorArray = 0;
47  patchArray = 0;
48  computeArray = 0;
49 }

Member Function Documentation

CLBMigrateMsg * NamdCentLB::Strategy ( LDStats *  stats)

Definition at line 88 of file NamdCentLB.C.

References averageLoad, processorInfo::backgroundLoad, cpuloads, endi(), computeInfo::handle, InfoRecord::Id, iINFO(), iout, SimParameters::LCPOOn, LdbIdField(), SimParameters::ldbRelativeGrainsize, LDBSTRAT_COMPREHENSIVE, LDBSTRAT_DEFAULT, LDBSTRAT_OLD, LDBSTRAT_REFINEONLY, SimParameters::ldbStrategy, InfoRecord::load, load, NAMD_die(), ComputeMap::numComputes(), ComputeMap::numPartitions(), PatchMap::numPatches(), numPatches, PatchMap::Object(), Node::Object(), ComputeMap::Object(), computeInfo::oldProcessor, computeInfo::processor, patchInfo::processor, ComputeMap::setNewNode(), ComputeMap::setNewNumPartitions(), Node::simParameters, and simParams.

89 {
90  // CkPrintf("LDB: All statistics received at %f, %f\n",
91  // CmiTimer(),CmiWallTimer());
92 
93  int numProcessors = stats->nprocs();
95  ComputeMap *computeMap = ComputeMap::Object();
96  const int numComputes = computeMap->numComputes();
98 
99  // these sizes should never change
100  if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
101  if ( ! patchArray ) patchArray = new patchInfo[numPatches];
102  if ( ! computeArray ) computeArray = new computeInfo[numComputes];
103 
104  int nMoveableComputes = buildData(stats);
105 
106 #if LDB_DEBUG
107 #define DUMP_LDBDATA 1
108 #define LOAD_LDBDATA 1
109 #endif
110 
111 #if DUMP_LDBDATA
112  dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
113 #elif LOAD_LDBDATA
114  loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
115  // CkExit();
116 #endif
117 
118  double averageLoad = 0.;
119  double avgCompute = 0.;
120  if ( nMoveableComputes ) {
121  int i;
122  double total = 0.;
123  double maxCompute = 0.;
124  int maxi = 0;
125  for (i=0; i<nMoveableComputes; i++) {
126  double load = computeArray[i].load;
127  total += load;
128  if ( load > maxCompute ) { maxCompute = load; maxi = i; }
129  }
130  avgCompute = total / nMoveableComputes;
131 
132  int P = stats->nprocs();
133  int numPesAvailable = 0;
134  for (i=0; i<P; i++) {
135  if (processorArray[i].available) {
136  ++numPesAvailable;
137  total += processorArray[i].backgroundLoad;
138  }
139  }
140  if (numPesAvailable == 0)
141  NAMD_die("No processors available for load balancing!\n");
142 
143  averageLoad = total/numPesAvailable;
144  CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
145  LdbIdField(computeArray[maxi].handle.id, 0),
146  maxCompute, 100. * maxCompute / averageLoad, averageLoad);
147  CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
148  avgCompute, 100. * avgCompute / averageLoad, averageLoad);
149  }
150 
151  if ( step() == 1 ) {
152  // compute splitting only
153  // partitions are stored as char but mostly limited by
154  // high load noise at low outer-loop iteration counts
155  int maxParts = 10;
156 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
157 //split LCPO compute very small, else CUDA compute is delayed
158  if (simParams->LCPOOn) {
159  maxParts = 20;
160  }
161 #endif
162  int totalAddedParts = 0;
163  double maxCompute = averageLoad / 10.;
164  if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
165  if ( simParams->ldbRelativeGrainsize > 0. ) {
166  maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
167  }
168  CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
169  double maxUnsplit = 0.;
170  for (int i=0; i<nMoveableComputes; i++) {
171  computeArray[i].processor = computeArray[i].oldProcessor;
172  const int cid = LdbIdField(computeArray[i].handle.id, 0);
173  const double load = computeArray[i].load;
174  if ( computeMap->numPartitions(cid) == 0 ) {
175  if ( load > maxUnsplit ) maxUnsplit = load;
176  continue;
177  }
178  int nparts = (int) ceil(load / maxCompute);
179  if ( nparts > maxParts ) nparts = maxParts;
180  if ( nparts < 1 ) nparts = 1;
181  if ( 0 && nparts > 1 ) {
182  CkPrintf("LDB: Partitioning compute %d with load %f by %d\n",
183  cid, load, nparts);
184  }
185  computeMap->setNewNumPartitions(cid,nparts);
186  totalAddedParts += nparts - 1;
187  }
188  CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
189  nMoveableComputes,nMoveableComputes+totalAddedParts);
190  CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
191  } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
192  if (step() < 4)
193  TorusLB(computeArray, patchArray, processorArray,
194  nMoveableComputes, numPatches, numProcessors);
195  else
196  RefineTorusLB(computeArray, patchArray, processorArray,
197  nMoveableComputes, numPatches, numProcessors, 1);
198  } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
199  TorusLB(computeArray, patchArray, processorArray,
200  nMoveableComputes, numPatches, numProcessors);
201  } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
202  RefineTorusLB(computeArray, patchArray, processorArray,
203  nMoveableComputes, numPatches, numProcessors, 1);
204  } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
205  if (step() < 4)
206  Alg7(computeArray, patchArray, processorArray,
207  nMoveableComputes, numPatches, numProcessors);
208  else
209  RefineOnly(computeArray, patchArray, processorArray,
210  nMoveableComputes, numPatches, numProcessors);
211  }
212 
213 #if LDB_DEBUG && USE_TOPOMAP
214  TopoManager tmgr;
215  int pe1, pe2, pe3, hops=0;
216  /* This is double counting the hops
217  for(int i=0; i<nMoveableComputes; i++)
218  {
219  pe1 = computeArray[i].processor;
220  pe2 = patchArray[computeArray[i].patch1].processor;
221  pe3 = patchArray[computeArray[i].patch2].processor;
222  hops += tmgr.getHopsBetweenRanks(pe1, pe2);
223  if(computeArray[i].patch1 != computeArray[i].patch2)
224  hops += tmgr.getHopsBetweenRanks(pe1, pe3);
225  }*/
226  for (int i=0; i<numPatches; i++) {
227  //int num = patchArray[i].proxiesOn.numElements();
228  pe1 = patchArray[i].processor;
229  Iterator nextProc;
230  processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
231  while (p) {
232  pe2 = p->Id;
233  hops += tmgr.getHopsBetweenRanks(pe1, pe2);
234  p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
235  }
236  }
237  CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
238 #endif
239 
240 #if DUMP_LDBDATA
241  dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
242 #elif LOAD_LDBDATA
243  dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
244  // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
245  // CkExit();
246 #endif
247 
248  // For error checking:
249  // Count up computes, to see if somebody doesn't have any computes
250  int i;
251 #if 0
252  int* computeCount = new int[numProcessors];
253  for(i=0; i<numProcessors; i++)
254  computeCount[i]=0;
255  for(i=0; i<nMoveableComputes; i++)
256  computeCount[computeArray[i].processor]++;
257  for(i=0; i<numProcessors; i++) {
258  if (computeCount[i]==0)
259  iout << iINFO <<"Warning: Processor " << i
260  << " has NO moveable computes.\n" << endi;
261  }
262  delete [] computeCount;
263 #endif
264 
265  std::vector<MigrateInfo *> migrateInfo;
266  for(i=0;i<nMoveableComputes;i++) {
267  if (computeArray[i].processor != computeArray[i].oldProcessor) {
268  // CkPrintf("[%d] Obj %d migrating from %d to %d\n",
269  // CkMyPe(),computeArray[i].handle.id.id[0],
270  // computeArray[i].processor,computeArray[i].oldProcessor);
271  MigrateInfo *migrateMe = new MigrateInfo;
272  migrateMe->obj = computeArray[i].handle;
273  migrateMe->from_pe = computeArray[i].oldProcessor;
274  migrateMe->to_pe = computeArray[i].processor;
275  migrateInfo.push_back(migrateMe);
276 
277  // sneak in updates to ComputeMap
278  computeMap->setNewNode(LdbIdField(computeArray[i].handle.id, 0),
279  computeArray[i].processor);
280  }
281  }
282 
283  const int migrate_count=migrateInfo.size();
284  // CkPrintf("NamdCentLB migrating %d elements\n",migrate_count);
285  CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
286 
287  msg->n_moves = migrate_count;
288  for(i=0; i < migrate_count; i++) {
289  MigrateInfo* item = migrateInfo[i];
290  msg->moves[i] = *item;
291  delete item;
292  migrateInfo[i] = nullptr;
293  }
294 
295  for (i=0; i<numProcessors; i++) {
296  cpuloads[i] = processorArray[i].load;
297  }
298 
299  delete [] processorArray;
300  delete [] patchArray;
301  delete [] computeArray;
302 
303  processorArray = NULL;
304  patchArray = NULL;
305  computeArray = NULL;
306 
307  return msg;
308 };
static Node * Object()
Definition: Node.h:86
BlockLoad::TempStorage load
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
Definition: Alg7.h:13
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
BigReal ldbRelativeGrainsize
int numComputes(void)
Definition: ComputeMap.h:101
static PatchMap * Object()
Definition: PatchMap.h:27
double * cpuloads
Definition: NamdCentLB.C:24
SimParameters * simParameters
Definition: Node.h:178
#define LDBSTRAT_REFINEONLY
Definition: SimParameters.h:67
int Id
Definition: elements.h:16
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
int processor
Definition: elements.h:24
#define iout
Definition: InfoStream.h:51
int oldProcessor
Definition: elements.h:25
static double averageLoad
Definition: ProxyMgr.C:696
#define LDBSTRAT_DEFAULT
Definition: SimParameters.h:65
#define LDBSTRAT_OLD
Definition: SimParameters.h:68
int numPartitions(ComputeID cid)
Definition: ComputeMap.C:135
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
const int & LdbIdField(const LdbId &id, const int index)
void NAMD_die(const char *err_msg)
Definition: common.C:85
double load
Definition: elements.h:15
Definition: Set.h:19
#define LDBSTRAT_COMPREHENSIVE
Definition: SimParameters.h:66
LDObjHandle handle
Definition: elements.h:26
#define simParams
Definition: Output.C:127
int numPatches(void) const
Definition: PatchMap.h:59
static ComputeMap * Object()
Definition: ComputeMap.h:89
int processor
Definition: elements.h:31
double backgroundLoad
Definition: elements.h:39

The documentation for this class was generated from the following files: