93 int numProcessors = stats->nprocs();
100 if ( ! processorArray ) processorArray =
new processorInfo[numProcessors];
101 if ( ! patchArray ) patchArray =
new patchInfo[numPatches];
102 if ( ! computeArray ) computeArray =
new computeInfo[numComputes];
104 int nMoveableComputes = buildData(stats);
107 #define DUMP_LDBDATA 1 108 #define LOAD_LDBDATA 1 112 dumpDataASCII(
"ldbd_before", numProcessors, numPatches, nMoveableComputes);
114 loadDataASCII(
"ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
119 double avgCompute = 0.;
120 if ( nMoveableComputes ) {
123 double maxCompute = 0.;
125 for (i=0; i<nMoveableComputes; i++) {
126 double load = computeArray[i].
load;
128 if ( load > maxCompute ) { maxCompute = load; maxi = i; }
130 avgCompute = total / nMoveableComputes;
132 int P = stats->nprocs();
133 int numPesAvailable = 0;
134 for (i=0; i<P; i++) {
135 if (processorArray[i].available) {
140 if (numPesAvailable == 0)
141 NAMD_die(
"No processors available for load balancing!\n");
144 CkPrintf(
"LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
147 CkPrintf(
"LDB: Average compute %f is %.1f%% of average load %f\n",
156 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 162 int totalAddedParts = 0;
164 if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
165 if (
simParams->ldbRelativeGrainsize > 0. ) {
168 CkPrintf(
"LDB: Partitioning computes with target load %f\n", maxCompute);
169 double maxUnsplit = 0.;
170 for (
int i=0; i<nMoveableComputes; i++) {
172 const int cid =
LdbIdField(computeArray[i].handle.id, 0);
173 const double load = computeArray[i].
load;
175 if ( load > maxUnsplit ) maxUnsplit = load;
178 int nparts = (int) ceil(load / maxCompute);
179 if ( nparts > maxParts ) nparts = maxParts;
180 if ( nparts < 1 ) nparts = 1;
181 if ( 0 && nparts > 1 ) {
182 CkPrintf(
"LDB: Partitioning compute %d with load %f by %d\n",
186 totalAddedParts += nparts - 1;
188 CkPrintf(
"LDB: Increased migratable compute count from %d to %d\n",
189 nMoveableComputes,nMoveableComputes+totalAddedParts);
190 CkPrintf(
"LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
193 TorusLB(computeArray, patchArray, processorArray,
194 nMoveableComputes, numPatches, numProcessors);
197 nMoveableComputes, numPatches, numProcessors, 1);
199 TorusLB(computeArray, patchArray, processorArray,
200 nMoveableComputes, numPatches, numProcessors);
203 nMoveableComputes, numPatches, numProcessors, 1);
206 Alg7(computeArray, patchArray, processorArray,
207 nMoveableComputes, numPatches, numProcessors);
209 RefineOnly(computeArray, patchArray, processorArray,
210 nMoveableComputes, numPatches, numProcessors);
213 #if LDB_DEBUG && USE_TOPOMAP 215 int pe1, pe2, pe3, hops=0;
226 for (
int i=0; i<numPatches; i++) {
233 hops += tmgr.getHopsBetweenRanks(pe1, pe2);
237 CkPrintf(
"Load Balancing: Number of Hops: %d\n", hops);
241 dumpDataASCII(
"ldbd_after", numProcessors, numPatches, nMoveableComputes);
243 dumpDataASCII(
"ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
252 int* computeCount =
new int[numProcessors];
253 for(i=0; i<numProcessors; i++)
255 for(i=0; i<nMoveableComputes; i++)
256 computeCount[computeArray[i].processor]++;
257 for(i=0; i<numProcessors; i++) {
258 if (computeCount[i]==0)
259 iout <<
iINFO <<
"Warning: Processor " << i
260 <<
" has NO moveable computes.\n" <<
endi;
262 delete [] computeCount;
265 std::vector<MigrateInfo *> migrateInfo;
266 for(i=0;i<nMoveableComputes;i++) {
267 if (computeArray[i].processor != computeArray[i].oldProcessor) {
271 MigrateInfo *migrateMe =
new MigrateInfo;
272 migrateMe->obj = computeArray[i].
handle;
274 migrateMe->to_pe = computeArray[i].
processor;
275 migrateInfo.push_back(migrateMe);
279 computeArray[i].processor);
283 const int migrate_count=migrateInfo.size();
285 CLBMigrateMsg* msg =
new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
287 msg->n_moves = migrate_count;
288 for(i=0; i < migrate_count; i++) {
289 MigrateInfo* item = migrateInfo[i];
290 msg->moves[i] = *item;
292 migrateInfo[i] =
nullptr;
295 for (i=0; i<numProcessors; i++) {
299 delete [] processorArray;
300 delete [] patchArray;
301 delete [] computeArray;
303 processorArray = NULL;
std::ostream & iINFO(std::ostream &s)
void setNewNumPartitions(ComputeID cid, char numPartitions)
static PatchMap * Object()
SimParameters * simParameters
#define LDBSTRAT_REFINEONLY
std::ostream & endi(std::ostream &s)
static double averageLoad
int numPatches(void) const
int numPartitions(ComputeID cid)
void setNewNode(ComputeID cid, NodeID node)
const int & LdbIdField(const LdbId &id, const int index)
void NAMD_die(const char *err_msg)
#define LDBSTRAT_COMPREHENSIVE
static ComputeMap * Object()