00001
00002
00003
00004
00005
00006
00007
00008 #if !defined(WIN32) || defined(__CYGWIN__)
00009 #include <unistd.h>
00010 #endif
00011 #include <fcntl.h>
00012
00013 #include "InfoStream.h"
00014 #include "NamdCentLB.h"
00015 #include "NamdCentLB.def.h"
00016 #include "Node.h"
00017 #include "PatchMap.h"
00018 #include "ComputeMap.h"
00019 #include "LdbCoordinator.h"
00020
00021
00022
00023
00024 double *cpuloads = NULL;
00025
00026 void CreateNamdCentLB()
00027 {
00028
00029 loadbalancer = CProxy_NamdCentLB::ckNew();
00030
00031 cpuloads = new double[CkNumPes()];
00032 for (int i=0; i<CkNumPes(); i++) cpuloads[i] = 0.0;
00033 }
00034
00035 #if CHARM_VERSION > 050610
00036 NamdCentLB::NamdCentLB(): CentralLB(CkLBOptions(-1))
00037 #else
00038 NamdCentLB::NamdCentLB()
00039 #endif
00040 {
00041
00042
00043 processorArray = 0;
00044 patchArray = 0;
00045 computeArray = 0;
00046 }
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 CmiBool NamdCentLB::QueryBalanceNow(int _step)
00058 {
00059
00060 if ( LdbCoordinator::Object()->takingLdbData ) {
00061 return CmiTrue;
00062 } else {
00063 return CmiFalse;
00064 }
00065 }
00066
00067 CmiBool NamdCentLB::QueryDumpData()
00068 {
00069 #if 0
00070 if (LdbCoordinator::Object()->ldbCycleNum == 1) return CmiTrue;
00071 if (LdbCoordinator::Object()->ldbCycleNum == 2) return CmiTrue;
00072 #endif
00073 return CmiFalse;
00074 }
00075
00076 CLBMigrateMsg* NamdCentLB::Strategy(CentralLB::LDStats* stats, int count)
00077 {
00078
00079
00080
00081 int numProcessors = count;
00082 int numPatches = PatchMap::Object()->numPatches();
00083 ComputeMap *computeMap = ComputeMap::Object();
00084 const int numComputes = computeMap->numComputes();
00085 const SimParameters* simParams = Node::Object()->simParameters;
00086
00087
00088 if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00089 if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00090 if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00091
00092 int nMoveableComputes = buildData(stats,count);
00093
00094 #if LDB_DEBUG
00095 #define DUMP_LDBDATA 1
00096 #define LOAD_LDBDATA 1
00097 #endif
00098
00099 #if DUMP_LDBDATA
00100 dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00101 #elif LOAD_LDBDATA
00102 loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00103
00104 #endif
00105
00106 if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00107 RefineOnly(computeArray, patchArray, processorArray,
00108 nMoveableComputes, numPatches, numProcessors);
00109 } else if (simParams->ldbStrategy == LDBSTRAT_ALG7) {
00110 Alg7(computeArray, patchArray, processorArray,
00111 nMoveableComputes, numPatches, numProcessors);
00112 } else if (simParams->ldbStrategy == LDBSTRAT_ASB8) {
00113 if (step() < 2)
00114 TorusLB(computeArray, patchArray, processorArray,
00115 nMoveableComputes, numPatches, numProcessors);
00116 else
00117 RefineTorusLB(computeArray, patchArray, processorArray,
00118 nMoveableComputes, numPatches, numProcessors, 1);
00119 } else if (simParams->ldbStrategy == LDBSTRAT_OTHER) {
00120 if (step() < 2)
00121 Alg7(computeArray, patchArray, processorArray,
00122 nMoveableComputes, numPatches, numProcessors);
00123 else
00124 RefineOnly(computeArray, patchArray, processorArray,
00125 nMoveableComputes, numPatches, numProcessors);
00126 }
00127
00128 #if LDB_DEBUG && USE_TOPOMAP
00129 TopoManager tmgr;
00130 int pe1, pe2, pe3, hops=0;
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 for (int i=0; i<numPatches; i++) {
00142
00143 pe1 = patchArray[i].processor;
00144 Iterator nextProc;
00145 processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00146 while (p) {
00147 pe2 = p->Id;
00148 hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00149 p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00150 }
00151 }
00152 CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00153 #endif
00154
00155 #if DUMP_LDBDATA
00156 dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00157 #elif LOAD_LDBDATA
00158 dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00159
00160
00161 #endif
00162
00163
00164
00165 int i;
00166 #if 0
00167 int* computeCount = new int[numProcessors];
00168 for(i=0; i<numProcessors; i++)
00169 computeCount[i]=0;
00170 for(i=0; i<nMoveableComputes; i++)
00171 computeCount[computeArray[i].processor]++;
00172 for(i=0; i<numProcessors; i++) {
00173 if (computeCount[i]==0)
00174 iout << iINFO <<"Warning: Processor " << i
00175 << " has NO moveable computes.\n" << endi;
00176 }
00177 delete [] computeCount;
00178 #endif
00179
00180 CkVec<MigrateInfo *> migrateInfo;
00181 for(i=0;i<nMoveableComputes;i++) {
00182 if (computeArray[i].processor != computeArray[i].oldProcessor) {
00183
00184
00185
00186 MigrateInfo *migrateMe = new MigrateInfo;
00187 migrateMe->obj = computeArray[i].handle;
00188 migrateMe->from_pe = computeArray[i].oldProcessor;
00189 migrateMe->to_pe = computeArray[i].processor;
00190 migrateInfo.insertAtEnd(migrateMe);
00191
00192
00193 computeMap->setNewNode(computeArray[i].handle.id.id[0],
00194 computeArray[i].processor);
00195 }
00196 }
00197
00198 int migrate_count=migrateInfo.length();
00199
00200 #if CHARM_VERSION > 050611
00201 CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00202 #else
00203 CLBMigrateMsg* msg = new(&migrate_count,1) CLBMigrateMsg;
00204 #endif
00205 msg->n_moves = migrate_count;
00206 for(i=0; i < migrate_count; i++) {
00207 MigrateInfo* item = migrateInfo[i];
00208 msg->moves[i] = *item;
00209 delete item;
00210 migrateInfo[i] = 0;
00211 }
00212
00213 for (i=0; i<numProcessors; i++) {
00214 cpuloads[i] = processorArray[i].load;
00215 }
00216
00217 delete [] processorArray;
00218 delete [] patchArray;
00219 delete [] computeArray;
00220
00221 processorArray = NULL;
00222 patchArray = NULL;
00223 computeArray = NULL;
00224
00225 return msg;
00226 };
00227
00228 #ifndef WIN32
00229
00230 void NamdCentLB::dumpDataASCII(char *file, int numProcessors,
00231 int numPatches, int numComputes)
00232 {
00233 char filename[128];
00234 sprintf(filename, "%s.%d", file, step());
00235 FILE* fp = fopen(filename,"w");
00236 if (fp == NULL){
00237 perror("dumpLDStatsASCII");
00238 return;
00239 }
00240 CkPrintf("***** DUMP data to file: %s ***** \n", filename);
00241 fprintf(fp,"%d %d %d\n",numProcessors,numPatches,numComputes);
00242
00243 int i;
00244 for(i=0;i<numProcessors;i++) {
00245 processorInfo* p = processorArray + i;
00246 fprintf(fp,"%d %e %e %e %e\n",p->Id,p->load,p->backgroundLoad,p->computeLoad,p->idleTime);
00247 }
00248
00249 for(i=0;i < numPatches; i++) {
00250 patchInfo* p = patchArray + i;
00251 fprintf(fp,"%d %e %d %d\n",p->Id,p->load,p->processor,p->numAtoms);
00252 }
00253
00254 for(i=0; i < numComputes; i++) {
00255 computeInfo* c = computeArray + i;
00256 fprintf(fp,"%d %e %d %d %d %d",c->Id,c->load,c->patch1,c->patch2,
00257 c->processor,c->oldProcessor);
00258 #if CHARM_VERSION > 50910
00259 fprintf(fp, " %e %e", c->minTime, c->maxTime);
00260 #endif
00261 fprintf(fp, "\n");
00262 }
00263
00264
00265 for (i=0; i< numProcessors; i++) {
00266 int num = processorArray[i].proxies.numElements();
00267 fprintf(fp, "%d %d: ", i, num);
00268 Iterator nextProxy;
00269 patchInfo *p = (patchInfo *)processorArray[i].proxies.
00270 iterator((Iterator *)&nextProxy);
00271 while (p) {
00272 fprintf(fp, "%d ", p->Id);
00273 p = (patchInfo *)processorArray[i].proxies.
00274 next((Iterator*)&nextProxy);
00275 }
00276 fprintf(fp, "\n");
00277 }
00278
00279 for (i=0; i<numPatches; i++) {
00280 int num = patchArray[i].proxiesOn.numElements();
00281 fprintf(fp, "%d %d: ", i, num);
00282 Iterator nextProc;
00283 processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.
00284 iterator((Iterator *)&nextProc);
00285 while (p) {
00286 fprintf(fp, "%d ", p->Id);
00287 p = (processorInfo *)patchArray[i].proxiesOn.
00288 next((Iterator*)&nextProc);
00289 }
00290 fprintf(fp, "\n");
00291 }
00292
00293 fclose(fp);
00294
00295 }
00296
00297 void NamdCentLB::loadDataASCII(char *file, int &numProcessors,
00298 int &numPatches, int &numComputes)
00299 {
00300 char filename[128];
00301
00302 sprintf(filename, "%s", file);
00303
00304 CkPrintf("***** Load ascii data from file: %s ***** \n", filename);
00305
00306 FILE* fp = fopen(filename, "r");
00307 if (fp == NULL){
00308 perror("loadDataASCII");
00309 return;
00310 }
00311
00312 fscanf(fp,"%d %d %d",&numProcessors,&numPatches,&numComputes);
00313
00314 printf("numProcs: %d numPatches: %d numComputes: %d\n", numProcessors,numPatches, numComputes);
00315
00316 delete [] processorArray;
00317 delete [] patchArray;
00318 delete [] computeArray;
00319 processorArray = new processorInfo[numProcessors];
00320 patchArray = new patchInfo[numPatches];
00321 computeArray = new computeInfo[numComputes];
00322
00323 int i;
00324 for(i=0;i<numProcessors;i++) {
00325 processorInfo* p = processorArray + i;
00326 fscanf(fp,"%d %le %le %le", &p->Id, &p->load, &p->backgroundLoad, &p->computeLoad);
00327 fscanf(fp,"%le\n", &p->idleTime);
00328 if (p->Id != i) CmiAbort("Reading processorArray error!");
00329
00330 }
00331
00332 for(i=0;i < numPatches; i++) {
00333 patchInfo* p = patchArray + i;
00334 fscanf(fp,"%d %le %d %d\n",&p->Id,&p->load,&p->processor,&p->numAtoms);
00335 if (p->Id != i || p->processor > numProcessors || p->processor < 0)
00336 CmiAbort("Reading patchArray error!");
00337 }
00338
00339 for(i=0; i < numComputes; i++) {
00340 computeInfo* c = computeArray + i;
00341 fscanf(fp,"%d %le %d %d %d %d",&c->Id,&c->load,&c->patch1,&c->patch2,
00342 &c->processor,&c->oldProcessor);
00343 #if CHARM_VERSION > 50910
00344 fscanf(fp, " %le %le", &c->minTime, &c->maxTime);
00345 #endif
00346 if (c->patch1 < 0 || c->patch1 > numPatches || c->patch2 < 0 || c->patch2 > numPatches)
00347 CmiAbort("Reading computeArray error!");
00348
00349 }
00350
00351
00352 for (i=0; i< numProcessors; i++) {
00353 int num = processorArray[i].proxies.numElements();
00354 fscanf(fp,"%d",&num);
00355 for (int j=0; j<num; j++) {
00356 int id;
00357 fscanf(fp,"%d",&id);
00358 processorArray[i].proxies.insert(&patchArray[id]);
00359 }
00360 }
00361
00362 for (i=0; i<numPatches; i++) {
00363 int num;
00364 fscanf(fp,"%d",&num);
00365 for (int j=0; j<num; j++) {
00366 int id;
00367 fscanf(fp,"%d",&id);
00368 patchArray[i].proxiesOn.insert(&processorArray[id]);
00369 }
00370 }
00371
00372 fclose(fp);
00373 }
00374 #endif
00375
00376 extern int isPmeProcessor(int);
00377
00378 int NamdCentLB::buildData(CentralLB::LDStats* stats, int count)
00379 {
00380 PatchMap* patchMap = PatchMap::Object();
00381 ComputeMap* computeMap = ComputeMap::Object();
00382 const SimParameters* simParams = Node::Object()->simParameters;
00383
00384 BigReal bgfactor = simParams->ldbBackgroundScaling;
00385 BigReal pmebgfactor = simParams->ldbPMEBackgroundScaling;
00386 BigReal homebgfactor = simParams->ldbHomeBackgroundScaling;
00387 int pmeOn = simParams->PMEOn;
00388 int unLoadPme = simParams->ldbUnloadPME;
00389 int pmeBarrier = simParams->PMEBarrier;
00390 int unLoadZero = simParams->ldbUnloadZero;
00391 int unLoadOne = simParams->ldbUnloadOne;
00392 int unLoadRankZero = simParams->ldbUnloadRankZero;
00393 int unLoadSMP = simParams->ldbUnloadSMP;
00394
00395 int i;
00396 for (i=0; i<count; ++i) {
00397 processorArray[i].Id = i;
00398 processorArray[i].available = CmiTrue;
00399 if ( pmeOn && isPmeProcessor(i) ) {
00400 #if CHARM_VERSION > 050607
00401 processorArray[i].backgroundLoad = pmebgfactor * stats->procs[i].bg_walltime;
00402 #else
00403 processorArray[i].backgroundLoad = pmebgfactor * stats[i].bg_walltime;
00404 #endif
00405 } else if (patchMap->numPatchesOnNode(i) > 0) {
00406 #if CHARM_VERSION > 050607
00407 processorArray[i].backgroundLoad = homebgfactor * stats->procs[i].bg_walltime;
00408 #else
00409 processorArray[i].backgroundLoad = homebgfactor * stats[i].bg_walltime;
00410 #endif
00411 } else {
00412 #if CHARM_VERSION > 050607
00413 processorArray[i].backgroundLoad = bgfactor * stats->procs[i].bg_walltime;
00414 #else
00415 processorArray[i].backgroundLoad = bgfactor * stats[i].bg_walltime;
00416 #endif
00417 }
00418 processorArray[i].idleTime = stats->procs[i].idletime;
00419 processorArray[i].load = processorArray[i].computeLoad = 0.0;
00420 }
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471 if (unLoadZero) processorArray[0].available = CmiFalse;
00472 if (unLoadOne) processorArray[1].available = CmiFalse;
00473 if (unLoadRankZero) {
00474 for (int i=0; i<count; i+=4)
00475 processorArray[i].available = CmiFalse;
00476 }
00477
00478
00479 if (pmeOn && unLoadPme) {
00480 for (i=0; i<count; i++) {
00481 if (!isPmeProcessor(i)) break;
00482 }
00483 if (i==count) {
00484 iout << iINFO << "Turned off unLoadPme flag!\n" << endi;
00485 unLoadPme = 0;
00486 }
00487 }
00488
00489 if (pmeOn && unLoadPme) {
00490 for (i=0; i<count; i++) {
00491 if ((pmeBarrier && i==0) || isPmeProcessor(i))
00492 processorArray[i].available = CmiFalse;
00493 }
00494 }
00495
00496 if (unLoadSMP) {
00497 int ppn = simParams->procsPerNode;
00498 int unloadrank = simParams->ldbUnloadRank;
00499 for (int i=0; i<count; i+=ppn) {
00500 processorArray[i+unloadrank].available = CmiFalse;
00501 }
00502 }
00503
00504 int nMoveableComputes=0;
00505 int nProxies = 0;
00506 #if CHARM_VERSION > 050607
00507 int j;
00508 for (j=0; j < stats->n_objs; j++) {
00509 const LDObjData &this_obj = stats->objData[j];
00510 int frompe = stats->from_proc[j];
00511 #else
00512 for (i=0; i < count; i++) {
00513 int j;
00514 for (j=0; j < stats[i].n_objs; j++) {
00515 const LDObjData &this_obj = stats[i].objData[j];
00516 int frompe = i;
00517 #endif
00518
00519 #if CHARM_VERSION > 050405
00520 if (this_obj.omID().id.idx != 1) continue;
00521 #elif CHARM_VERSION > 050403
00522 if (this_obj.omID.id.idx != 1) continue;
00523 #else
00524 if (this_obj.omID.id != 1) continue;
00525 #endif
00526 #if CHARM_VERSION > 050405
00527 if (this_obj.id().id[1] == -2) {
00528 const int pid = this_obj.id().id[0];
00529 #else
00530 if (this_obj.id.id[1] == -2) {
00531 const int pid = this_obj.id.id[0];
00532 #endif
00533 int neighborNodes[PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00534
00535 patchArray[pid].Id = pid;
00536 patchArray[pid].numAtoms = 0;
00537 #if CHARM_VERSION > 050607
00538 patchArray[pid].processor = stats->from_proc[j];
00539 #else
00540 patchArray[pid].processor = i;
00541 #endif
00542 const int numProxies =
00543 #if USE_TOPOMAP
00544 requiredProxiesOnProcGrid(pid,neighborNodes);
00545 #else
00546 requiredProxies(pid, neighborNodes);
00547 #endif
00548
00549 nProxies += numProxies;
00550
00551 for (int k=0; k<numProxies; k++) {
00552 processorArray[neighborNodes[k]].proxies.insert(&patchArray[pid]);
00553 patchArray[pid].proxiesOn.insert(&processorArray[neighborNodes[k]]);
00554 }
00555 } else if (this_obj.migratable) {
00556 #if CHARM_VERSION > 050405
00557 const int cid = this_obj.id().id[0];
00558 #else
00559 const int cid = this_obj.id.id[0];
00560 #endif
00561 const int p0 = computeMap->pid(cid,0);
00562
00563
00564 int p1;
00565 if (computeMap->numPids(cid) > 1)
00566 p1 = computeMap->pid(cid,1);
00567 else p1 = p0;
00568 computeArray[nMoveableComputes].Id = cid;
00569 #if CHARM_VERSION > 050607
00570 computeArray[nMoveableComputes].oldProcessor = stats->from_proc[j];
00571 processorArray[stats->from_proc[j]].computeLoad += this_obj.wallTime;
00572 #else
00573 computeArray[nMoveableComputes].oldProcessor = i;
00574 processorArray[i].computeLoad += this_obj.wallTime;
00575 #endif
00576 computeArray[nMoveableComputes].processor = -1;
00577 computeArray[nMoveableComputes].patch1 = p0;
00578 computeArray[nMoveableComputes].patch2 = p1;
00579 computeArray[nMoveableComputes].handle = this_obj.handle;
00580 computeArray[nMoveableComputes].load = this_obj.wallTime;
00581 #if CHARM_VERSION > 50910
00582 computeArray[nMoveableComputes].minTime = this_obj.minWall;
00583 computeArray[nMoveableComputes].maxTime = this_obj.maxWall;
00584 #endif
00585 nMoveableComputes++;
00586 }
00587 }
00588 #if ! ( CHARM_VERSION > 050607 )
00589 }
00590 #endif
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607 for (i=0; i<count; i++) {
00608 processorArray[i].load = processorArray[i].backgroundLoad + processorArray[i].computeLoad;
00609 }
00610 stats->clear();
00611 return nMoveableComputes;
00612 }
00613
00614
00615
00616
00617
00618 int NamdCentLB::requiredProxies(PatchID id, int neighborNodes[])
00619 {
00620 enum proxyHere { No, Yes };
00621 int numNodes = CkNumPes();
00622 proxyHere *proxyNodes = new proxyHere[numNodes];
00623 int nProxyNodes;
00624 int i;
00625
00626
00627 for ( i = 0; i < numNodes; ++i )
00628 {
00629 proxyNodes[i] = No;
00630 }
00631 nProxyNodes=0;
00632
00633
00634
00635
00636 PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00637
00638 PatchMap* patchMap = PatchMap::Object();
00639
00640 int myNode = patchMap->node(id);
00641 neighbors[0] = id;
00642 int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00643 for ( i = 0; i < numNeighbors; ++i )
00644 {
00645 const int proxyNode = patchMap->basenode(neighbors[i]);
00646 if (proxyNode != myNode)
00647 if (proxyNodes[proxyNode] == No)
00648 {
00649 proxyNodes[proxyNode] = Yes;
00650 neighborNodes[nProxyNodes] = proxyNode;
00651 nProxyNodes++;
00652 }
00653 }
00654
00655
00656
00657
00658
00659
00660
00661
00662 #if 1
00663 int numPatches = patchMap->numPatches();
00664 int emptyNodes = numNodes - numPatches;
00665 if ( emptyNodes > numPatches ) {
00666 int nodesPerPatch = nProxyNodes + 1 + (emptyNodes-1) / numPatches;
00667 int proxyNode = (myNode + 1) % numNodes;
00668 while ( nProxyNodes < nodesPerPatch &&
00669 ! patchMap->numPatchesOnNode(proxyNode) ) {
00670 if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00671 proxyNodes[proxyNode] = Yes;
00672 neighborNodes[nProxyNodes] = proxyNode;
00673 nProxyNodes++;
00674 }
00675 proxyNode = (proxyNode + 1) % numNodes;
00676 }
00677 proxyNode = (myNode - 1 + numNodes) % numNodes;
00678 while ( nProxyNodes < nodesPerPatch &&
00679 ! patchMap->numPatchesOnNode(proxyNode) ) {
00680 if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00681 proxyNodes[proxyNode] = Yes;
00682 neighborNodes[nProxyNodes] = proxyNode;
00683 nProxyNodes++;
00684 }
00685 proxyNode = (proxyNode - 1 + numNodes) % numNodes;
00686 }
00687 proxyNode = (myNode + 1) % numNodes;
00688 int count = 0;
00689 while ( nProxyNodes < nodesPerPatch ) {
00690 if ( ! patchMap->numPatchesOnNode(proxyNode) &&
00691 proxyNode != myNode && proxyNodes[proxyNode] == No) {
00692 proxyNodes[proxyNode] = Yes;
00693 neighborNodes[nProxyNodes] = proxyNode;
00694 nProxyNodes++;
00695 }
00696 proxyNode = (proxyNode + 1) % numNodes;
00697 count ++; if (count == numNodes) break;
00698 }
00699 } else {
00700 int proxyNode = myNode - 1;
00701 if ( proxyNode >= 0 && ! patchMap->numPatchesOnNode(proxyNode) ) {
00702 if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00703 proxyNodes[proxyNode] = Yes;
00704 neighborNodes[nProxyNodes] = proxyNode;
00705 nProxyNodes++;
00706 }
00707 }
00708 proxyNode = myNode + 1;
00709 if ( proxyNode < numNodes && ! patchMap->numPatchesOnNode(proxyNode) ) {
00710 if (proxyNode != myNode && proxyNodes[proxyNode] == No) {
00711 proxyNodes[proxyNode] = Yes;
00712 neighborNodes[nProxyNodes] = proxyNode;
00713 nProxyNodes++;
00714 }
00715 }
00716 }
00717 #endif
00718
00719 delete [] proxyNodes;
00720 return nProxyNodes;
00721 }
00722
00723 #if USE_TOPOMAP
00724
00725
00726
00727
00728
00729
00730 int NamdCentLB::requiredProxiesOnProcGrid(PatchID id, int neighborNodes[])
00731 {
00732 enum proxyHere { No, Yes };
00733 int numNodes = CkNumPes();
00734 proxyHere *proxyNodes = new proxyHere[numNodes];
00735 int nProxyNodes;
00736 int i,j,k;
00737
00738 int xsize = 0, ysize = 0, zsize = 0;
00739 int my_x =0, my_y = 0, my_z = 0;
00740
00741 PatchMap* patchMap = PatchMap::Object();
00742 int myNode = patchMap->node(id);
00743
00744 TopoManager tmgr;
00745 xsize = tmgr.getDimX();
00746 ysize = tmgr.getDimY();
00747 zsize = tmgr.getDimZ();
00748
00749 tmgr.rankToCoordinates(myNode, my_x, my_y, my_z);
00750
00751 if(xsize * ysize * zsize != CkNumPes()) {
00752 delete [] proxyNodes;
00753 return requiredProxies(id, neighborNodes);
00754 }
00755
00756
00757
00758 for ( i = 0; i < numNodes; ++i )
00759 {
00760 proxyNodes[i] = No;
00761 }
00762 nProxyNodes=0;
00763
00764
00765
00766
00767 PatchID neighbors[1 + PatchMap::MaxOneAway + PatchMap::MaxTwoAway];
00768
00769
00770
00771
00772 neighbors[0] = id;
00773 int numNeighbors = 1 + patchMap->downstreamNeighbors(id,neighbors+1);
00774
00775
00776
00777
00778 CmiBool smallFlag = CmiFalse;
00779 double pnodes = CkNumPes();
00780 pnodes *= 0.25;
00781 smallFlag = (patchMap->numPatches() > pnodes )?1:0;
00782
00783
00784
00785
00786 for ( i = 1; i < numNeighbors; ++i )
00787 {
00788 int proxyNode = patchMap->basenode(neighbors[i]);
00789
00790 if (proxyNode != myNode)
00791 if (proxyNodes[proxyNode] == No)
00792 {
00793 proxyNodes[proxyNode] = Yes;
00794 neighborNodes[nProxyNodes] = proxyNode;
00795 nProxyNodes++;
00796 }
00797 }
00798
00799
00800 if (step() > 2) {
00801 delete [] proxyNodes;
00802 return nProxyNodes;
00803 }
00804
00805
00806
00807 int numPatches = patchMap->numPatches();
00808 int emptyNodes = numNodes - numPatches;
00809
00810
00811 int nodesPerPatch = nProxyNodes + 4 * (emptyNodes-1) / numPatches + 1;
00812 int proxyNode = 0 ;
00813 int proxy_x=0, proxy_y=0, proxy_z=0;
00814
00815
00816
00817
00818
00819 for(k=-1; k<= 1; k++) {
00820 proxy_z = (my_z + k + zsize) % zsize;
00821 for(j=-1; j <= 1; j++) {
00822 proxy_y = (my_y + j + ysize) % ysize;
00823 for(i = -1; i <= 1; i++) {
00824 if(i == 0 && j == 0 && k == 0)
00825 continue;
00826
00827 proxy_x = (my_x + i + xsize) % xsize;
00828 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00829
00830 if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00831 proxyNodes[proxyNode] == No) {
00832 proxyNodes[proxyNode] = Yes;
00833 neighborNodes[nProxyNodes] = proxyNode;
00834 nProxyNodes++;
00835 }
00836
00837 if(nProxyNodes >= nodesPerPatch ||
00838 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00839 break;
00840 }
00841
00842 if(nProxyNodes >= nodesPerPatch ||
00843 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00844 break;
00845 }
00846 if(nProxyNodes >= nodesPerPatch ||
00847 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00848 break;
00849 }
00850
00851 #if 1
00852 if(!smallFlag) {
00853 for(k=-2; k<= 2; k+=2) {
00854 proxy_z = (my_z + k + zsize) % zsize;
00855 for(j=-2; j <= 2; j+=2) {
00856 proxy_y = (my_y + j + ysize) % ysize;
00857 for(i = -2; i <= 2; i+=2) {
00858 if(i == 0 && j == 0 && k == 0)
00859 continue;
00860
00861 proxy_x = (my_x + i + xsize) % xsize;
00862 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00863
00864 if((! patchMap->numPatchesOnNode(proxyNode) || !smallFlag) &&
00865 proxyNodes[proxyNode] == No) {
00866 proxyNodes[proxyNode] = Yes;
00867 neighborNodes[nProxyNodes] = proxyNode;
00868 nProxyNodes++;
00869 }
00870
00871 if(nProxyNodes >= nodesPerPatch ||
00872 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00873 break;
00874 }
00875
00876 if(nProxyNodes >= nodesPerPatch ||
00877 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00878 break;
00879 }
00880 if(nProxyNodes >= nodesPerPatch ||
00881 nProxyNodes >= PatchMap::MaxOneAway + PatchMap::MaxTwoAway)
00882 break;
00883 }
00884 }
00885
00886 #else
00887 const SimParameters* params = Node::Object()->simParameters;
00888
00889 if(!smallFlag) {
00890
00891 if(patchMap->numaway_a() == 2) {
00892 proxy_y = (my_y + 2) % ysize;
00893 proxy_x = my_x % xsize;
00894 proxy_z = my_z % zsize;
00895
00896 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00897 if(proxyNodes[proxyNode] == No) {
00898 proxyNodes[proxyNode] = Yes;
00899 neighborNodes[nProxyNodes] = proxyNode;
00900 nProxyNodes++;
00901 }
00902
00903 proxy_y = (my_y - 2 + ysize) % ysize;
00904 proxy_x = my_x % xsize;
00905 proxy_z = my_z % zsize;
00906
00907 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00908 if(proxyNodes[proxyNode] == No) {
00909 proxyNodes[proxyNode] = Yes;
00910 neighborNodes[nProxyNodes] = proxyNode;
00911 nProxyNodes++;
00912 }
00913 }
00914
00915
00916 if(patchMap->numaway_b() == 2) {
00917 proxy_y = my_y % ysize;
00918 proxy_x = my_x % xsize;
00919 proxy_z = (my_z + 2) % zsize;
00920
00921 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00922 if(proxyNodes[proxyNode] == No) {
00923 proxyNodes[proxyNode] = Yes;
00924 neighborNodes[nProxyNodes] = proxyNode;
00925 nProxyNodes++;
00926 }
00927
00928 proxy_y = my_y % ysize;
00929 proxy_x = my_x % xsize;
00930 proxy_z = (my_z - 2 + zsize) % zsize;
00931
00932 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00933 if(proxyNodes[proxyNode] == No) {
00934 proxyNodes[proxyNode] = Yes;
00935 neighborNodes[nProxyNodes] = proxyNode;
00936 nProxyNodes++;
00937 }
00938 }
00939
00940
00941 if(patchMap->numaway_c() == 2) {
00942 proxy_y = my_y % ysize;
00943 proxy_x = (my_x + 2) % xsize;
00944 proxy_z = my_z % zsize;
00945
00946 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00947 if(proxyNodes[proxyNode] == No) {
00948 proxyNodes[proxyNode] = Yes;
00949 neighborNodes[nProxyNodes] = proxyNode;
00950 nProxyNodes++;
00951 }
00952
00953 proxy_y = my_y % ysize;
00954 proxy_x = (my_x - 2 + xsize) % xsize;
00955 proxy_z = my_z % zsize;
00956
00957 proxyNode = tmgr.coordinatesToRank(proxy_x, proxy_y, proxy_z);
00958 if(proxyNodes[proxyNode] == No) {
00959 proxyNodes[proxyNode] = Yes;
00960 neighborNodes[nProxyNodes] = proxyNode;
00961 nProxyNodes++;
00962 }
00963 }
00964 }
00965 #endif
00966
00967
00968
00969 delete [] proxyNodes;
00970 return nProxyNodes;
00971 }
00972
00973 #endif