00001
00002
00003
00004
00005
00006
00007
00008 #if !defined(WIN32) || defined(__CYGWIN__)
00009 #include <unistd.h>
00010 #endif
00011 #include <fcntl.h>
00012
00013 #include "InfoStream.h"
00014 #include "NamdCentLB.h"
00015 #include "NamdCentLB.def.h"
00016 #include "Node.h"
00017 #include "PatchMap.h"
00018 #include "ComputeMap.h"
00019 #include "LdbCoordinator.h"
00020
00021
00022
00023
00024 double *cpuloads = NULL;
00025
00026 void CreateNamdCentLB() {
00027
00028 loadbalancer = CProxy_NamdCentLB::ckNew();
00029
00030 if (CkMyRank() == 0 && cpuloads == NULL) {
00031 cpuloads = new double[CkNumPes()];
00032 CmiAssert(cpuloads != NULL);
00033 for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0;
00034 }
00035 }
00036
00037 NamdCentLB *AllocateNamdCentLB() {
00038 return new NamdCentLB((CkMigrateMessage*)NULL);
00039 }
00040
00044 NamdCentLB::NamdCentLB(CkMigrateMessage *msg): CentralLB(msg) {
00045 processorArray = 0;
00046 patchArray = 0;
00047 computeArray = 0;
00048 }
00049
00050 NamdCentLB::NamdCentLB(): CentralLB(CkLBOptions(-1))
00051 {
00052
00053
00054 processorArray = 0;
00055 patchArray = 0;
00056 computeArray = 0;
00057 }
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 CmiBool NamdCentLB::QueryBalanceNow(int _step)
00069 {
00070
00071 if ( LdbCoordinator::Object()->takingLdbData ) {
00072 return CmiTrue;
00073 } else {
00074 return CmiFalse;
00075 }
00076 }
00077
00078 CmiBool NamdCentLB::QueryDumpData()
00079 {
00080 #if 0
00081 if (LdbCoordinator::Object()->ldbCycleNum == 1) return CmiTrue;
00082 if (LdbCoordinator::Object()->ldbCycleNum == 2) return CmiTrue;
00083 #endif
00084 return CmiFalse;
00085 }
00086
00087 #if CHARM_VERSION > 60301
00088 CLBMigrateMsg* NamdCentLB::Strategy(LDStats* stats)
00089 #else
00090
00091 CLBMigrateMsg* NamdCentLB::Strategy(LDStats* stats, int n_pes)
00092 #endif
00093 {
00094
00095
00096
00097 #if CHARM_VERSION > 60301
00098 int numProcessors = stats->nprocs();
00099 #else
00100 int numProcessors = stats->count;
00101 #endif
00102 int numPatches = PatchMap::Object()->numPatches();
00103 ComputeMap *computeMap = ComputeMap::Object();
00104 const int numComputes = computeMap->numComputes();
00105 const SimParameters* simParams = Node::Object()->simParameters;
00106
00107
00108 if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00109 if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00110 if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00111
00112 int nMoveableComputes = buildData(stats);
00113
00114 #if LDB_DEBUG
00115 #define DUMP_LDBDATA 1
00116 #define LOAD_LDBDATA 1
00117 #endif
00118
00119 #if DUMP_LDBDATA
00120 dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00121 #elif LOAD_LDBDATA
00122 loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00123
00124 #endif
00125
00126 double averageLoad = 0.;
00127 double avgCompute;
00128 {
00129 int i;
00130 double total = 0.;
00131 double maxCompute = 0.;
00132 int maxi = 0;
00133 for (i=0; i<nMoveableComputes; i++) {
00134 double load = computeArray[i].load;
00135 total += load;
00136 if ( load > maxCompute ) { maxCompute = load; maxi = i; }
00137 }
00138 avgCompute = total / nMoveableComputes;
00139
00140 #if CHARM_VERSION > 60301
00141 int P = stats->nprocs();
00142 #else
00143 int P = stats->count;
00144 #endif
00145 int numPesAvailable = 0;
00146 for (i=0; i<P; i++) {
00147 if (processorArray[i].available) {
00148 ++numPesAvailable;
00149 total += processorArray[i].backgroundLoad;
00150 }
00151 }
00152 if (numPesAvailable == 0)
00153 NAMD_die("No processors available for load balancing!\n");
00154
00155 averageLoad = total/numPesAvailable;
00156 CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
00157 computeArray[maxi].handle.id.id[0],
00158 maxCompute, 100. * maxCompute / averageLoad, averageLoad);
00159 CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
00160 avgCompute, 100. * avgCompute / averageLoad, averageLoad);
00161 }
00162
00163 if ( step() == 1 ) {
00164
00165
00166
00167 int maxParts = 10;
00168 #ifdef NAMD_CUDA
00169
00170 if (simParams->LCPOOn) {
00171 maxParts = 20;
00172 }
00173 #endif
00174 int totalAddedParts = 0;
00175 double maxCompute = averageLoad / 10.;
00176 if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
00177 if ( simParams->ldbRelativeGrainsize > 0. ) {
00178 maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
00179 }
00180 CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
00181 double maxUnsplit = 0.;
00182 for (int i=0; i<nMoveableComputes; i++) {
00183 computeArray[i].processor = computeArray[i].oldProcessor;
00184 const int cid = computeArray[i].handle.id.id[0];
00185 const double load = computeArray[i].load;
00186 if ( computeMap->numPartitions(cid) == 0 ) {
00187 if ( load > maxUnsplit ) maxUnsplit = load;
00188 continue;
00189 }
00190 int nparts = (int) ceil(load / maxCompute);
00191 if ( nparts > maxParts ) nparts = maxParts;
00192 if ( nparts < 1 ) nparts = 1;
00193 if ( nparts > 1 ) {
00194 CkPrintf("LDB: Partitioning compute %d with load %f by %d\n",
00195 cid, load, nparts);
00196 }
00197 computeMap->setNewNumPartitions(cid,nparts);
00198 totalAddedParts += nparts - 1;
00199 }
00200 CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
00201 nMoveableComputes,nMoveableComputes+totalAddedParts);
00202 CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
00203 } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) {
00204 if (step() < 4)
00205 TorusLB(computeArray, patchArray, processorArray,
00206 nMoveableComputes, numPatches, numProcessors);
00207 else
00208 RefineTorusLB(computeArray, patchArray, processorArray,
00209 nMoveableComputes, numPatches, numProcessors, 1);
00210 } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
00211 TorusLB(computeArray, patchArray, processorArray,
00212 nMoveableComputes, numPatches, numProcessors);
00213 } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00214 RefineTorusLB(computeArray, patchArray, processorArray,
00215 nMoveableComputes, numPatches, numProcessors, 1);
00216 } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
00217 if (step() < 4)
00218 Alg7(computeArray, patchArray, processorArray,
00219 nMoveableComputes, numPatches, numProcessors);
00220 else
00221 RefineOnly(computeArray, patchArray, processorArray,
00222 nMoveableComputes, numPatches, numProcessors);
00223 }
00224
00225 #if LDB_DEBUG && USE_TOPOMAP
00226 TopoManager tmgr;
00227 int pe1, pe2, pe3, hops=0;
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 for (int i=0; i<numPatches; i++) {
00239
00240 pe1 = patchArray[i].processor;
00241 Iterator nextProc;
00242 processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00243 while (p) {
00244 pe2 = p->Id;
00245 hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00246 p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00247 }
00248 }
00249 CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00250 #endif
00251
00252 #if DUMP_LDBDATA
00253 dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00254 #elif LOAD_LDBDATA
00255 dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00256
00257
00258 #endif
00259
00260
00261
00262 int i;
00263 #if 0
00264 int* computeCount = new int[numProcessors];
00265 for(i=0; i<numProcessors; i++)
00266 computeCount[i]=0;
00267 for(i=0; i<nMoveableComputes; i++)
00268 computeCount[computeArray[i].processor]++;
00269 for(i=0; i<numProcessors; i++) {
00270 if (computeCount[i]==0)
00271 iout << iINFO <<"Warning: Processor " << i
00272 << " has NO moveable computes.\n" << endi;
00273 }
00274 delete [] computeCount;
00275 #endif
00276
00277 CkVec<MigrateInfo *> migrateInfo;
00278 for(i=0;i<nMoveableComputes;i++) {
00279 if (computeArray[i].processor != computeArray[i].oldProcessor) {
00280
00281
00282
00283 MigrateInfo *migrateMe = new MigrateInfo;
00284 migrateMe->obj = computeArray[i].handle;
00285 migrateMe->from_pe = computeArray[i].oldProcessor;
00286 migrateMe->to_pe = computeArray[i].processor;
00287 migrateInfo.insertAtEnd(migrateMe);
00288
00289
00290 computeMap->setNewNode(computeArray[i].handle.id.id[0],
00291 computeArray[i].processor);
00292 }
00293 }
00294
00295 int migrate_count=migrateInfo.length();
00296
00297 CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00298
00299 msg->n_moves = migrate_count;
00300 for(i=0; i < migrate_count; i++) {
00301 MigrateInfo* item = migrateInfo[i];
00302 msg->moves[i] = *item;
00303 delete item;
00304 migrateInfo[i] = 0;
00305 }
00306
00307 for (i=0; i<numProcessors; i++) {
00308 cpuloads[i] = processorArray[i].load;
00309 }
00310
00311 delete [] processorArray;
00312 delete [] patchArray;
00313 delete [] computeArray;
00314
00315 processorArray = NULL;
00316 patchArray = NULL;
00317 computeArray = NULL;
00318
00319 return msg;
00320 };
00321
00322 #ifndef WIN32
00323
00324 void NamdCentLB::dumpDataASCII(char *file, int numProcessors,
00325 int numPatches, int numComputes)
00326 {
00327 char filename[128];
00328 sprintf(filename, "%s.%d", file, step());
00329 FILE* fp = fopen(filename,"w");
00330 if (fp == NULL){
00331 perror("dumpLDStatsASCII");
00332 return;
00333 }
00334 CkPrintf("***** DUMP data to file: %s ***** \n", filename);
00335 fprintf(fp,"%d %d %d\n",numProcessors,numPatches,numComputes);
00336
00337 int i;
00338 for(i=0;i<numProcessors;i++) {
00339 processorInfo* p = processorArray + i;
00340 fprintf(fp,"%d %e %e %e %e\n",p->Id,p->load,p->backgroundLoad,p->computeLoad,p->idleTime);
00341 }
00342
00343 for(i=0;i < numPatches; i++) {
00344 patchInfo* p = patchArray + i;
00345 fprintf(fp,"%d %e %d %d\n",p->Id,p->load,p->processor,p->numAtoms);
00346 }
00347
00348 for(i=0; i < numComputes; i++) {
00349 computeInfo* c = computeArray + i;
00350 fprintf(fp,"%d %e %d %d %d %d",c->Id,c->load,c->patch1,c->patch2,
00351 c->processor,c->oldProcessor);
00352 fprintf(fp, "\n");
00353 }
00354
00355
00356 for (i=0; i< numProcessors; i++) {
00357 int num = processorArray[i].proxies.numElements();
00358 fprintf(fp, "%d %d: ", i, num);
00359 Iterator nextProxy;
00360 patchInfo *p = (patchInfo *)processorArray[i].proxies.
00361 iterator((Iterator *)&nextProxy);
00362 while (p) {
00363 fprintf(fp, "%d ", p->Id);
00364 p = (patchInfo *)processorArray[i].proxies.
00365 next((Iterator*)&nextProxy);
00366 }
00367 fprintf(fp, "\n");
00368 }
00369
00370 for (i=0; i<numPatches; i++) {
00371 int num = patchArray[i].proxiesOn.numElements();
00372 fprintf(fp, "%d %d: ", i, num);
00373 Iterator nextProc;
00374 processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.
00375 iterator((Iterator *)&nextProc);
00376 while (p) {
00377 fprintf(fp, "%d ", p->Id);
00378 p = (processorInfo *)patchArray[i].proxiesOn.
00379 next((Iterator*)&nextProc);
00380 }
00381 fprintf(fp, "\n");
00382 }
00383
00384 fclose(fp);
00385
00386 }
00387
00388 void NamdCentLB::loadDataASCII(char *file, int &numProcessors,
00389 int &numPatches, int &numComputes)
00390 {
00391 char filename[128];
00392
00393 sprintf(filename, "%s", file);
00394
00395 CkPrintf("***** Load ascii data from file: %s ***** \n", filename);
00396
00397 FILE* fp = fopen(filename, "r");
00398 if (fp == NULL){
00399 perror("loadDataASCII");
00400 return;
00401 }
00402
00403 fscanf(fp,"%d %d %d",&numProcessors,&numPatches,&numComputes);
00404
00405 printf("numProcs: %d numPatches: %d numComputes: %d\n", numProcessors,numPatches, numComputes);
00406
00407 delete [] processorArray;
00408 delete [] patchArray;
00409 delete [] computeArray;
00410 processorArray = new processorInfo[numProcessors];
00411 patchArray = new patchInfo[numPatches];
00412 computeArray = new computeInfo[numComputes];
00413
00414 int i;
00415 for(i=0;i<numProcessors;i++) {
00416 processorInfo* p = processorArray + i;
00417 fscanf(fp,"%d %le %le %le", &p->Id, &p->load, &p->backgroundLoad, &p->computeLoad);
00418 fscanf(fp,"%le\n", &p->idleTime);
00419 if (p->Id != i) CmiAbort("Reading processorArray error!");
00420
00421 }
00422
00423 for(i=0;i < numPatches; i++) {
00424 patchInfo* p = patchArray + i;
00425 fscanf(fp,"%d %le %d %d\n",&p->Id,&p->load,&p->processor,&p->numAtoms);
00426 if (p->Id != i || p->processor > numProcessors || p->processor < 0)
00427 CmiAbort("Reading patchArray error!");
00428 }
00429
00430 for(i=0; i < numComputes; i++) {
00431 computeInfo* c = computeArray + i;
00432 fscanf(fp,"%d %le %d %d %d %d",&c->Id,&c->load,&c->patch1,&c->patch2,
00433 &c->processor,&c->oldProcessor);
00434
00435 if (c->patch1 < 0 || c->patch1 > numPatches || c->patch2 < 0 || c->patch2 > numPatches)
00436 CmiAbort("Reading computeArray error!");
00437
00438 }
00439
00440
00441 for (i=0; i< numProcessors; i++) {
00442 int num, curp;
00443 fscanf(fp,"%d %d: ",&curp, &num);
00444 if(curp != i)
00445 CmiAbort("Reading patchsSet error!");
00446 for (int j=0; j<num; j++) {
00447 int id;
00448 fscanf(fp,"%d",&id);
00449 processorArray[i].proxies.unchecked_insert(&patchArray[id]);
00450 }
00451 }
00452
00453 for (i=0; i<numPatches; i++) {
00454 int num, curp;
00455 fscanf(fp,"%d %d: ",&curp, &num);
00456 if(curp != i)
00457 CmiAbort("Reading proxiesOn error!");
00458 for (int j=0; j<num; j++) {
00459 int id;
00460 fscanf(fp,"%d",&id);
00461 patchArray[i].proxiesOn.insert(&processorArray[id]);
00462 }
00463 }
00464
00465 fclose(fp);
00466 }
00467 #endif
00468
00469 extern int isPmeProcessor(int);
00470 #ifdef MEM_OPT_VERSION
00471 extern int isOutputProcessor(int);
00472 #endif
00473
00474 int NamdCentLB::buildData(LDStats* stats)
00475 {
00476 #if CHARM_VERSION > 60301
00477 int n_pes = stats->nprocs();
00478 #else
00479 int n_pes = stats->count;
00480 #endif
00481
00482 PatchMap* patchMap = PatchMap::Object();
00483 ComputeMap* computeMap = ComputeMap::Object();
00484 const SimParameters* simParams = Node::Object()->simParameters;
00485
00486 BigReal bgfactor = simParams->ldbBackgroundScaling;
00487 BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
00488 BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
00489 int pmeOn = simParams->PMEOn;
00490 int unLoadPme = simParams->ldbUnloadPME;
00491 int pmeBarrier = simParams->PMEBarrier;
00492 int unLoadZero = simParams->ldbUnloadZero;
00493 int unLoadOne = simParams->ldbUnloadOne;
00494 int unLoadIO= simParams->ldbUnloadOutputPEs;
00495 int i;
00496 for (i=0; i<n_pes; ++i) {
00497 processorArray[i].Id = i;
00498 processorArray[i].available = CmiTrue;
00499 if ( pmeOn && isPmeProcessor(i) ) {
00500 processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
00501 } else if (patchMap->numPatchesOnNode(i) > 0) {
00502 processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
00503 } else {
00504 processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
00505 }
00506 processorArray[i].idleTime = stats->procs[i].idletime;
00507 processorArray[i].load = processorArray[i].computeLoad = 0.0;
00508 }
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551 if (unLoadZero) processorArray[0].available = CmiFalse;
00552 if (unLoadOne) processorArray[1].available = CmiFalse;
00553
00554
00555 if (pmeOn && unLoadPme) {
00556 for (i=0; i<n_pes; i++) {
00557 if (!isPmeProcessor(i)) break;
00558 }
00559 if (i == n_pes) {
00560 iout << iINFO << "Turned off unLoadPme flag!\n" << endi;
00561 unLoadPme = 0;
00562 }
00563 }
00564
00565 if (pmeOn && unLoadPme) {
00566 for (i=0; i<n_pes; i++) {
00567 if ((pmeBarrier && i==0) || isPmeProcessor(i))
00568 processorArray[i].available = CmiFalse;
00569 }
00570 }
00571
00572 #ifdef MEM_OPT_VERSION
00573
00574 if (unLoadIO) {
00575 if (simParams->numoutputprocs == n_pes) {
00576 iout << iINFO << "Turned off unLoadIO flag!\n" << endi;
00577 unLoadIO = 0;
00578 }
00579 }
00580 if (unLoadIO){
00581 iout << iINFO << "Testing for output processors!\n" << endi;
00582 for (i=0; i<n_pes; i++) {
00583 if (isOutputProcessor(stats->procs[i].pe))
00584 {
00585
00586 processorArray[i].available = CmiFalse;
00587 }
00588 else
00589 {
00590
00591 }
00592 }
00593 }
00594 #endif
00595
00596 int nMoveableComputes=0;
00597 int nProxies = 0;
00598 int nIdleComputes = 0;
00599
00600 int j;
00601 for (j=0; j < stats->n_objs; j++) {
00602 const LDObjData &this_obj = stats->objData[j];
00603 int frompe = stats->from_proc[j];
00604
00605
00606 if (this_obj.omID().id.idx != 1) {
00607
00608
00609 processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00610 continue;
00611 }
00612
00613 if (this_obj.id().id[1] == -2) {
00614 const int pid = this_obj.id().id[0];
00615 int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00616
00617 patchArray[pid].Id = pid;
00618 patchArray[pid].numAtoms = 0;
00619 patchArray[pid].processor = stats->from_proc[j];
00620 const int numProxies =
00621 #if USE_TOPOMAP
00622 requiredProxiesOnProcGrid(pid,neighborNodes);
00623 #else
00624 requiredProxies(pid, neighborNodes);
00625 #endif
00626
00627 nProxies += numProxies;
00628
00629 for (int k=0; k<numProxies; k++) {
00630 processorArray[neighborNodes[k]].proxies.unchecked_insert(&patchArray[pid]);
00631 patchArray[pid].proxiesOn.unchecked_insert(&processorArray[neighborNodes[k]]);
00632 }
00633 processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00634 } else if (this_obj.id().id[1] == -3) {
00635 processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00636 } else if (this_obj.migratable) {
00637 if ( this_obj.wallTime == 0. ) {
00638 ++nIdleComputes;
00639 } else {
00640 const int cid = this_obj.id().id[0];
00641 const int p0 = computeMap->pid(cid,0);
00642
00643
00644 int p1;
00645 if (computeMap->numPids(cid) > 1)
00646 p1 = computeMap->pid(cid,1);
00647 else p1 = p0;
00648 computeArray[nMoveableComputes].Id = cid;
00649 computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
00650 processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
00651 computeArray[nMoveableComputes].processor = -1;
00652 computeArray[nMoveableComputes].patch1 = p0;
00653 computeArray[nMoveableComputes].patch2 = p1;
00654 computeArray[nMoveableComputes].handle = this_obj.handle;
00655 computeArray[nMoveableComputes].load = this_obj.wallTime;
00656 nMoveableComputes++;
00657 }
00658 } else {
00659 processorArray[stats->from_proc[j]].backgroundLoad += this_obj.wallTime;
00660 }
00661 }
00662
00663 if ( nIdleComputes )
00664 CkPrintf("LDB: %d computes have load of zero\n", nIdleComputes);
00665
00666
00667
00668
00669
00670
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680
00681
00682 for (i=0; i<n_pes; i++) {
00683 processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
00684 }
00685 stats->clear();
00686 return nMoveableComputes;
00687 }
00688
00689
00690
00691
00692
00693 int NamdCentLB::requiredProxies(PatchID id, int neighborNodes[])
00694 {
00695 PatchMap* patchMap = PatchMap::Object();
00696 int myNode = patchMap->node(id);
00697 int nProxyNodes = 0;
00698
00699 #define IF_NEW_NODE \
00700 int j; \
00701 for ( j=0; j<nProxyNodes && neighborNodes[j] != proxyNode; ++j ); \
00702 if ( j == nProxyNodes )
00703
00704 PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00705 neighbors[0] = id;
00706 int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00707 for ( int i = 0; i < numNeighbors; ++i ) {
00708 const int proxyNode = patchMap->basenode(neighbors[i]);
00709 if ( proxyNode != myNode ) {
00710 IF_NEW_NODE {
00711 neighborNodes[nProxyNodes] = proxyNode;
00712 nProxyNodes++;
00713 }
00714 }
00715 }
00716
00717
00718
00719
00720
00721
00722
00723
00724 #if 1
00725 int numPes = CkNumPes();
00726 int numPatches = patchMap->numPatches();
00727 int emptyNodes = numPes - numPatches;
00728 if ( emptyNodes > numPatches ) {
00729 int nodesPerPatch = nProxyNodes + 1 + (emptyNodes-1) / numPatches;
00730 int maxNodesPerPatch = PatchMap::MaxOneAway + PatchMap::MaxTwoAway;
00731 if ( nodesPerPatch > maxNodesPerPatch ) nodesPerPatch = maxNodesPerPatch;
00732 int proxyNode = (myNode + 1) % numPes;
00733 while ( nProxyNodes < nodesPerPatch &&
00734 ! patchMap->numPatchesOnNode(proxyNode) ) {
00735 if ( proxyNode != myNode ) {
00736 IF_NEW_NODE {
00737 neighborNodes[nProxyNodes] = proxyNode;
00738 nProxyNodes++;
00739 }
00740 }
00741 proxyNode = (proxyNode + 1) % numPes;
00742 }
00743 proxyNode = (myNode - 1 + numPes) % numPes;
00744 while ( nProxyNodes < nodesPerPatch &&
00745 ! patchMap->numPatchesOnNode(proxyNode) ) {
00746 if ( proxyNode != myNode ) {
00747 IF_NEW_NODE {
00748 neighborNodes[nProxyNodes] = proxyNode;
00749 nProxyNodes++;
00750 }
00751 }
00752 proxyNode = (proxyNode - 1 + numPes) % numPes;
00753 }
00754 proxyNode = (myNode + 1) % numPes;
00755 int count = 0;
00756 while ( nProxyNodes < nodesPerPatch ) {
00757 if ( ! patchMap->numPatchesOnNode(proxyNode) && proxyNode != myNode ) {
00758 IF_NEW_NODE {
00759 neighborNodes[nProxyNodes] = proxyNode;
00760 nProxyNodes++;
00761 }
00762 }
00763 proxyNode = (proxyNode + 1) % numPes;
00764 count ++; if (count == numPes) break;
00765 }
00766 } else {
00767 int proxyNode = myNode - 1;
00768 if ( proxyNode >= 0 && ! patchMap->numPatchesOnNode(proxyNode) ) {
00769 if ( proxyNode != myNode ) {
00770 IF_NEW_NODE {
00771 neighborNodes[nProxyNodes] = proxyNode;
00772 nProxyNodes++;
00773 }
00774 }
00775 }
00776 proxyNode = myNode + 1;
00777 if ( proxyNode < numPes && ! patchMap->numPatchesOnNode(proxyNode) ) {
00778 if ( proxyNode != myNode ) {
00779 IF_NEW_NODE {
00780 neighborNodes[nProxyNodes] = proxyNode;
00781 nProxyNodes++;
00782 }
00783 }
00784 }
00785 }
00786 #endif
00787
00788 return nProxyNodes;
00789 }
00790
00791 #if USE_TOPOMAP
00792
00793
00794
00795
00796
00797
00798 int NamdCentLB::requiredProxiesOnProcGrid(PatchID id, int neighborNodes[])
00799 {
00800 enum proxyHere { No, Yes };
00801 int numPes = CkNumPes();
00802 proxyHere *proxyNodes = new proxyHere[numPes];
00803 int nProxyNodes;
00804 int i, j, k, l;
00805
00806 int xsize = 0, ysize = 0, zsize = 0, tsize = 0;
00807 int my_x = 0, my_y = 0, my_z = 0, my_t = 0;
00808
00809 PatchMap* patchMap = PatchMap::Object();
00810 int myNode = patchMap->node(id);
00811
00812 TopoManager tmgr;
00813 xsize = tmgr.getDimNX();
00814 ysize = tmgr.getDimNY();
00815 zsize = tmgr.getDimNZ();
00816 tsize = tmgr.getDimNT();
00817
00818 tmgr.rankToCoordinates(myNode, my_x, my_y, my_z, my_t);
00819
00820 if(xsize * ysize * zsize * tsize != CkNumPes()) {
00821 delete [] proxyNodes;
00822 return requiredProxies(id, neighborNodes);
00823 }
00824
00825
00826 for ( i = 0; i < numPes; ++i )
00827 {
00828 proxyNodes[i] = No;
00829 }
00830 nProxyNodes = 0;
00831
00832
00833
00834
00835 PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00836
00837
00838
00839 neighbors[0] = id;
00840 int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00841
00842
00843
00844
00845 CmiBool smallFlag = CmiFalse;
00846 double pnodes = CkNumPes();
00847 pnodes *= 0.25;
00848 smallFlag = (patchMap->numPatches() > pnodes )?1:0;
00849
00850
00851
00852
00853 for ( i = 1; i < numNeighbors; ++i )
00854 {
00855 int proxyNode = patchMap->basenode(neighbors[i]);
00856
00857 if (proxyNode != myNode)
00858 if (proxyNodes[proxyNode] == No)
00859 {
00860 proxyNodes[proxyNode] = Yes;
00861 neighborNodes[nProxyNodes] = proxyNode;
00862 nProxyNodes++;
00863 }
00864 }
00865
00866
00867 if (step() > 2) {
00868 delete [] proxyNodes;
00869 return nProxyNodes;
00870 }
00871
00872
00873
00874 int numPatches = patchMap->numPatches();
00875 int emptyNodes = numPes - numPatches;
00876
00877
00878 int nodesPerPatch = nProxyNodes + 4 * (emptyNodes-1) / numPatches + 1;
00879 int proxyNode = 0 ;
00880 int proxy_x=0, proxy_y=0, proxy_z=0;
00881
00882
00883
00884
00885
00886 for(k=-1; k<= 1; k++) {
00887 proxy_z = (my_z + k + zsize) % zsize;
00888 for(j=-1; j <= 1; j++) {
00889 proxy_y = (my_y + j + ysize) % ysize;
00890 for(i = -1; i <= 1; i++) {
00891 proxy_x = (my_x + i + xsize) % xsize;
00892 for(l = 0; l < tsize; l++) {
00893 if(i == 0 && j == 0 && k == 0 && l == 0)
00894 continue;
00895
00896 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z, l);
00897
00898 if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00899 proxyNodes[proxyNode] == No) {
00900 proxyNodes[proxyNode] = Yes;
00901 neighborNodes[nProxyNodes] = proxyNode;
00902 nProxyNodes++;
00903 }
00904
00905 if(nProxyNodes >= nodesPerPatch ||
00906 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00907 break;
00908 }
00909
00910 if(nProxyNodes >= nodesPerPatch ||
00911 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00912 break;
00913 }
00914
00915 if(nProxyNodes >= nodesPerPatch ||
00916 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00917 break;
00918 }
00919
00920 if(nProxyNodes >= nodesPerPatch ||
00921 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00922 break;
00923 }
00924
00925 #if 1
00926 if(!smallFlag) {
00927 for(k=-2; k<= 2; k+=2) {
00928 proxy_z = (my_z + k + zsize) % zsize;
00929 for(j=-2; j <= 2; j+=2) {
00930 proxy_y = (my_y + j + ysize) % ysize;
00931 for(i = -2; i <= 2; i+=2) {
00932 proxy_x = (my_x + i + xsize) % xsize;
00933 for(l = 0; l < tsize; l++) {
00934 if(i == 0 && j == 0 && k == 0 && l == 0)
00935 continue;
00936
00937 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z, l);
00938
00939 if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00940 proxyNodes[proxyNode] == No) {
00941 proxyNodes[proxyNode] = Yes;
00942 neighborNodes[nProxyNodes] = proxyNode;
00943 nProxyNodes++;
00944 }
00945
00946 if(nProxyNodes >= nodesPerPatch ||
00947 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00948 break;
00949 }
00950
00951 if(nProxyNodes >= nodesPerPatch ||
00952 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00953 break;
00954 }
00955
00956 if(nProxyNodes >= nodesPerPatch ||
00957 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00958 break;
00959 }
00960
00961 if(nProxyNodes >= nodesPerPatch ||
00962 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00963 break;
00964 }
00965 }
00966
00967 #else
00968 #if 0
00969 const SimParameters* params = Node::Object()->simParameters;
00970
00971 if(!smallFlag) {
00972
00973 if(patchMap->numaway_a() == 2) {
00974 proxy_y = (my_y + 2) % ysize;
00975 proxy_x = my_x % xsize;
00976 proxy_z = my_z % zsize;
00977
00978 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00979 if(proxyNodes[proxyNode] == No) {
00980 proxyNodes[proxyNode] = Yes;
00981 neighborNodes[nProxyNodes] = proxyNode;
00982 nProxyNodes++;
00983 }
00984
00985 proxy_y = (my_y - 2 + ysize) % ysize;
00986 proxy_x = my_x % xsize;
00987 proxy_z = my_z % zsize;
00988
00989 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00990 if(proxyNodes[proxyNode] == No) {
00991 proxyNodes[proxyNode] = Yes;
00992 neighborNodes[nProxyNodes] = proxyNode;
00993 nProxyNodes++;
00994 }
00995 }
00996
00997
00998 if(patchMap->numaway_b() == 2) {
00999 proxy_y = my_y % ysize;
01000 proxy_x = my_x % xsize;
01001 proxy_z = (my_z + 2) % zsize;
01002
01003 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01004 if(proxyNodes[proxyNode] == No) {
01005 proxyNodes[proxyNode] = Yes;
01006 neighborNodes[nProxyNodes] = proxyNode;
01007 nProxyNodes++;
01008 }
01009
01010 proxy_y = my_y % ysize;
01011 proxy_x = my_x % xsize;
01012 proxy_z = (my_z - 2 + zsize) % zsize;
01013
01014 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01015 if(proxyNodes[proxyNode] == No) {
01016 proxyNodes[proxyNode] = Yes;
01017 neighborNodes[nProxyNodes] = proxyNode;
01018 nProxyNodes++;
01019 }
01020 }
01021
01022
01023 if(patchMap->numaway_c() == 2) {
01024 proxy_y = my_y % ysize;
01025 proxy_x = (my_x + 2) % xsize;
01026 proxy_z = my_z % zsize;
01027
01028 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01029 if(proxyNodes[proxyNode] == No) {
01030 proxyNodes[proxyNode] = Yes;
01031 neighborNodes[nProxyNodes] = proxyNode;
01032 nProxyNodes++;
01033 }
01034
01035 proxy_y = my_y % ysize;
01036 proxy_x = (my_x - 2 + xsize) % xsize;
01037 proxy_z = my_z % zsize;
01038
01039 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
01040 if(proxyNodes[proxyNode] == No) {
01041 proxyNodes[proxyNode] = Yes;
01042 neighborNodes[nProxyNodes] = proxyNode;
01043 nProxyNodes++;
01044 }
01045 }
01046 }
01047 #endif
01048 #endif
01049
01050
01051
01052 delete [] proxyNodes;
01053 return nProxyNodes;
01054 }
01055
01056 #endif