00001
00007
00008
00009
00010
00011
00012
00013
00020 #include <stdio.h>
00021
00022 #include "InfoStream.h"
00023 #include "ProcessorPrivate.h"
00024 #include "BOCgroup.h"
00025 #include "WorkDistrib.decl.h"
00026 #include "WorkDistrib.h"
00027 #include "Lattice.h"
00028 #include "ComputeMsmMsa.h"
00029 #include "main.decl.h"
00030 #include "main.h"
00031 #include "Node.h"
00032 #include "PatchMgr.h"
00033 #include "PatchMap.inl"
00034 #include "NamdTypes.h"
00035 #include "PDB.h"
00036 #include "SimParameters.h"
00037 #include "Molecule.h"
00038 #include "NamdOneTools.h"
00039 #include "Compute.h"
00040 #include "ComputeMap.h"
00041 #include "RecBisection.h"
00042 #include "Random.h"
00043 #include "varsizemsg.h"
00044 #include "ProxyMgr.h"
00045 #include "Priorities.h"
00046 #include "SortAtoms.h"
00047
00048
00049 #define MIN_DEBUG_LEVEL 2
00050 #include "Debug.h"
00051
00052 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
00053 {
00054 public:
00055
00056 int numNewNodes;
00057 int numNewNumPartitions;
00058 int *newNodes;
00059 char *newNumPartitions;
00060
00061
00062 };
00063
00064
00065
00066
00067
00068
00069
00070 static int eventMachineProgress;
00071
00072
00073
00074
00075 WorkDistrib::WorkDistrib()
00076 {
00077 CkpvAccess(BOCclass_group).workDistrib = thisgroup;
00078 patchMapArrived = false;
00079 computeMapArrived = false;
00080 eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
00081 }
00082
00083
00084 WorkDistrib::~WorkDistrib(void)
00085 { }
00086
00087
00088
00089 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
00090 {
00091 saveComputeMapReturnEP = ep;
00092 saveComputeMapReturnChareID = chareID;
00093
00094 ComputeMap *computeMap = ComputeMap::Object();
00095
00096 int i;
00097 int nc = computeMap->numComputes();
00098
00099 ComputeMapChangeMsg *mapMsg = new (nc, nc, 0) ComputeMapChangeMsg ;
00100
00101 mapMsg->numNewNodes = nc;
00102 for(i=0; i<nc; i++)
00103 mapMsg->newNodes[i] = computeMap->newNode(i);
00104 mapMsg->numNewNumPartitions = nc;
00105 for(i=0; i<nc; i++)
00106 mapMsg->newNumPartitions[i] = computeMap->newNumPartitions(i);
00107
00108 CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118 }
00119
00120 void WorkDistrib::recvComputeMapChanges(ComputeMapChangeMsg *msg) {
00121
00122 if ( ! CkMyRank() ) {
00123 ComputeMap *computeMap = ComputeMap::Object();
00124 int nc = computeMap->numComputes();
00125 if ( nc != msg->numNewNodes ) NAMD_bug("recvComputeMapChanges 1");
00126 int i;
00127 for(i=0; i<nc; i++)
00128 computeMap->setNewNode(i,msg->newNodes[i]);
00129 if ( msg->numNewNumPartitions ) {
00130 if ( nc != msg->numNewNumPartitions ) NAMD_bug("recvComputeMapChanges 2");
00131 for(i=0; i<nc; i++)
00132 computeMap->setNewNumPartitions(i,msg->newNumPartitions[i]);
00133 }
00134 }
00135
00136 delete msg;
00137
00138 CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
00139 contribute(NULL, 0, CkReduction::random, cb);
00140 }
00141
00142 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
00143 delete msg;
00144
00145 CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
00146 }
00147
00148 #ifdef MEM_OPT_VERSION
00149
00150
00151
00152
00153 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
00154 PatchMap *patchMap = PatchMap::Object();
00155
00156 ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
00157 0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
00158 0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
00159
00160 int n = alist.size();
00161 FullAtom *a = alist.begin();
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 SimParameters *params = Node::Object()->simParameters;
00174 const Lattice lattice = params->lattice;
00175 Transform mother_transform;
00176 for(int j=0; j < n; j++)
00177 {
00178 int aid = a[j].id;
00179 a[j].nonbondedGroupSize = 0;
00180
00181 a[j].fixedPosition = a[j].position;
00182
00183 if ( a[j].migrationGroupSize ) {
00184 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00185 Position pos = a[j].position;
00186 int mgs = a[j].migrationGroupSize;
00187 int c = 1;
00188
00189 for ( int k=a[j].hydrogenGroupSize; k<mgs;
00190 k+=a[j+k].hydrogenGroupSize ) {
00191 pos += a[j+k].position;
00192 ++c;
00193 }
00194
00195 pos *= 1./c;
00196 mother_transform = a[j].transform;
00197 pos = lattice.nearest(pos,center,&mother_transform);
00198 a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00199 a[j].transform = mother_transform;
00200 } else {
00201 a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
00202 mother_transform = a[j].transform;
00203 }
00204 } else {
00205 a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00206 a[j].transform = mother_transform;
00207 }
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 a[j].partition = 0;
00221
00222
00223 if(params->langevinOn) {
00224 BigReal bval = params->langevinDamping;
00225 if(!params->langevinHydrogen &&
00226 ((a[j].status & HydrogenAtom)!=0)) {
00227 bval = 0;
00228 }else if ((a[j].status & LonepairAtom)!=0) {
00229 bval = 0;
00230 }else if ((a[j].status & DrudeAtom)!=0) {
00231 bval = params->langevinDamping;
00232 }
00233 a[j].langevinParam = bval;
00234 }
00235
00236 }
00237
00238 int size, allfixed;
00239 for(int j=0; j < n; j+=size) {
00240 size = a[j].hydrogenGroupSize;
00241 if ( ! size ) {
00242 NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00243 }
00244 allfixed = 1;
00245 for (int k = 0; k < size; ++k ) {
00246 allfixed = ( allfixed && (a[j+k].atomFixed) );
00247 }
00248 for (int k = 0; k < size; ++k ) {
00249 a[j+k].groupFixed = allfixed ? 1 : 0;
00250 }
00251 }
00252
00253 if ( params->outputPatchDetails ) {
00254 int patchId = pid;
00255 int numAtomsInPatch = n;
00256 int numFixedAtomsInPatch = 0;
00257 int numAtomsInFixedGroupsInPatch = 0;
00258 for(int j=0; j < n; j++) {
00259 numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00260 numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00261 }
00262 iout << "PATCH_DETAILS:"
00263 << " on proc " << CkMyPe()
00264 << " patch " << patchId
00265 << " atoms " << numAtomsInPatch
00266 << " fixed_atoms " << numFixedAtomsInPatch
00267 << " fixed_groups " << numAtomsInFixedGroupsInPatch
00268 << "\n" << endi;
00269 }
00270
00271 }
00272
00273 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
00274 {
00275 int i, j;
00276 BigReal kbT;
00277 BigReal randnum;
00278 BigReal kbToverM;
00279 SimParameters *simParams = Node::Object()->simParameters;
00280 Bool lesOn = simParams->lesOn;
00281 Random vel_random(simParams->randomSeed);
00282 int lesReduceTemp = lesOn && simParams->lesReduceTemp;
00283 BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
00284
00285 kbT = Temp*BOLTZMANN;
00286 int count=0;
00287 int totalAtoms = inAtoms.size();
00288 for(i=0;i<totalAtoms;i++)
00289 {
00290 Real atomMs=inAtoms[i].mass;
00291
00292 if (atomMs <= 0.) {
00293 kbToverM = 0.;
00294 } else {
00295
00296
00297
00298
00299
00300
00301
00302 kbToverM = sqrt(kbT * 1.0 / atomMs);
00303 }
00304 for (randnum=0.0, j=0; j<12; j++)
00305 {
00306 randnum += vel_random.uniform();
00307 }
00308
00309 randnum -= 6.0;
00310
00311 inAtoms[i].velocity.x = randnum*kbToverM;
00312
00313 for (randnum=0.0, j=0; j<12; j++)
00314 {
00315 randnum += vel_random.uniform();
00316 }
00317
00318 randnum -= 6.0;
00319
00320 inAtoms[i].velocity.y = randnum*kbToverM;
00321
00322 for (randnum=0.0, j=0; j<12; j++)
00323 {
00324 randnum += vel_random.uniform();
00325 }
00326
00327 randnum -= 6.0;
00328
00329 inAtoms[i].velocity.z = randnum*kbToverM;
00330 }
00331 }
00332 #endif
00333
00334
00335
00336
00337 FullAtomList *WorkDistrib::createAtomLists(void)
00338 {
00339 int i;
00340 StringList *current;
00341 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00342 Node *node = nd.ckLocalBranch();
00343 PatchMap *patchMap = PatchMap::Object();
00344 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00345 PatchMgr *patchMgr = pm.ckLocalBranch();
00346 SimParameters *params = node->simParameters;
00347 Molecule *molecule = node->molecule;
00348 PDB *pdb = node->pdb;
00349
00350 int numPatches = patchMap->numPatches();
00351 int numAtoms = pdb->num_atoms();
00352
00353 Vector *positions = new Position[numAtoms];
00354 pdb->get_all_positions(positions);
00355
00356 Vector *velocities = new Velocity[numAtoms];
00357
00358 if ( params->initialTemp < 0.0 ) {
00359 Bool binvels=FALSE;
00360
00361
00362 current = node->configList->find("velocities");
00363
00364 if (current == NULL) {
00365 current = node->configList->find("binvelocities");
00366 binvels = TRUE;
00367 }
00368
00369 if (!binvels) {
00370 velocities_from_PDB(current->data, velocities, numAtoms);
00371 }
00372 else {
00373 velocities_from_binfile(current->data, velocities, numAtoms);
00374 }
00375 }
00376 else {
00377
00378 random_velocities(params->initialTemp, molecule, velocities, numAtoms);
00379 }
00380
00381
00382 if (!(params->comMove)) {
00383 remove_com_motion(velocities, molecule, numAtoms);
00384 }
00385
00386 FullAtomList *atoms = new FullAtomList[numPatches];
00387
00388 const Lattice lattice = params->lattice;
00389
00390 if ( params->staticAtomAssignment ) {
00391 FullAtomList sortAtoms;
00392 for ( i=0; i < numAtoms; i++ ) {
00393 HydrogenGroupID &h = molecule->hydrogenGroup[i];
00394 if ( ! h.isMP ) continue;
00395 FullAtom a;
00396 a.id = i;
00397 a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00398 a.position = positions[h.atomID];
00399 sortAtoms.add(a);
00400 }
00401 int *order = new int[sortAtoms.size()];
00402 for ( i=0; i < sortAtoms.size(); i++ ) {
00403 order[i] = i;
00404 }
00405 int *breaks = new int[numPatches];
00406 sortAtomsForPatches(order,breaks,sortAtoms.begin(),
00407 sortAtoms.size(),numAtoms,
00408 patchMap->gridsize_c(),
00409 patchMap->gridsize_b(),
00410 patchMap->gridsize_a());
00411
00412 i = 0;
00413 for ( int pid = 0; pid < numPatches; ++pid ) {
00414 int iend = breaks[pid];
00415 for ( ; i<iend; ++i ) {
00416 FullAtom &sa = sortAtoms[order[i]];
00417 int mgs = sa.migrationGroupSize;
00418
00419
00420
00421
00422
00423
00424 for ( int k=0; k<mgs; ++k ) {
00425 HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
00426 int aid = h.atomID;
00427 FullAtom a;
00428 a.id = aid;
00429 a.position = positions[aid];
00430 a.velocity = velocities[aid];
00431 a.vdwType = molecule->atomvdwtype(aid);
00432 a.status = molecule->getAtoms()[aid].status;
00433 a.langevinParam = molecule->langevin_param(aid);
00434 a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00435 a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00436 if(params->rigidBonds != RIGID_NONE) {
00437 a.rigidBondLength = molecule->rigid_bond_length(aid);
00438 }else{
00439 a.rigidBondLength = 0.0;
00440 }
00441 atoms[pid].add(a);
00442 }
00443 }
00444 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
00445 pid, patchMap->index_a(pid), patchMap->index_b(pid),
00446 patchMap->index_c(pid), atoms[pid].size());
00447 }
00448 delete [] order;
00449 delete [] breaks;
00450 } else
00451 {
00452
00453 int aid, pid=0;
00454 for(i=0; i < numAtoms; i++)
00455 {
00456
00457
00458
00459 HydrogenGroupID &h = molecule->hydrogenGroup[i];
00460 aid = h.atomID;
00461 FullAtom a;
00462 a.id = aid;
00463 a.position = positions[aid];
00464 a.velocity = velocities[aid];
00465 a.vdwType = molecule->atomvdwtype(aid);
00466 a.status = molecule->getAtoms()[aid].status;
00467 a.langevinParam = molecule->langevin_param(aid);
00468 a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00469 a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00470 if(params->rigidBonds != RIGID_NONE) {
00471 a.rigidBondLength = molecule->rigid_bond_length(aid);
00472 }else{
00473 a.rigidBondLength = 0.0;
00474 }
00475 if (h.isMP) {
00476 pid = patchMap->assignToPatch(positions[aid],lattice);
00477 }
00478 atoms[pid].add(a);
00479 }
00480 }
00481
00482 delete [] positions;
00483 delete [] velocities;
00484
00485 for(i=0; i < numPatches; i++)
00486 {
00487 ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
00488 0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
00489 0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
00490
00491 int n = atoms[i].size();
00492 FullAtom *a = atoms[i].begin();
00493 int j;
00494
00495 Bool alchFepOn = params->alchFepOn;
00496 Bool alchThermIntOn = params->alchThermIntOn;
00497
00498 Bool lesOn = params->lesOn;
00499
00500 Bool pairInteractionOn = params->pairInteractionOn;
00501
00502 Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
00503
00504 Transform mother_transform;
00505 for(j=0; j < n; j++)
00506 {
00507 int aid = a[j].id;
00508
00509 a[j].nonbondedGroupSize = 0;
00510
00511 a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
00512 a[j].fixedPosition = a[j].position;
00513
00514 if ( a[j].migrationGroupSize ) {
00515 if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00516 Position pos = a[j].position;
00517 int mgs = a[j].migrationGroupSize;
00518 int c = 1;
00519 for ( int k=a[j].hydrogenGroupSize; k<mgs;
00520 k+=a[j+k].hydrogenGroupSize ) {
00521 pos += a[j+k].position;
00522 ++c;
00523 }
00524 pos *= 1./c;
00525 mother_transform = a[j].transform;
00526 pos = lattice.nearest(pos,center,&mother_transform);
00527 a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00528 a[j].transform = mother_transform;
00529 } else {
00530 a[j].position = lattice.nearest(
00531 a[j].position, center, &(a[j].transform));
00532 mother_transform = a[j].transform;
00533 }
00534 } else {
00535 a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00536 a[j].transform = mother_transform;
00537 }
00538
00539 a[j].mass = molecule->atommass(aid);
00540 a[j].charge = molecule->atomcharge(aid);
00541
00542
00543 if ( alchFepOn || alchThermIntOn || lesOn || pairInteractionOn || pressureProfileTypes) {
00544 a[j].partition = molecule->get_fep_type(aid);
00545 }
00546 else {
00547 a[j].partition = 0;
00548 }
00549
00550
00551 }
00552
00553 int size, allfixed, k;
00554 for(j=0; j < n; j+=size) {
00555 size = a[j].hydrogenGroupSize;
00556 if ( ! size ) {
00557 NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00558 }
00559 allfixed = 1;
00560 for ( k = 0; k < size; ++k ) {
00561 allfixed = ( allfixed && (a[j+k].atomFixed) );
00562 }
00563 for ( k = 0; k < size; ++k ) {
00564 a[j+k].groupFixed = allfixed ? 1 : 0;
00565 }
00566 }
00567
00568 if ( params->outputPatchDetails ) {
00569 int patchId = i;
00570 int numAtomsInPatch = n;
00571 int numFixedAtomsInPatch = 0;
00572 int numAtomsInFixedGroupsInPatch = 0;
00573 for(j=0; j < n; j++) {
00574 numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00575 numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00576 }
00577 iout << "PATCH_DETAILS:"
00578 << " patch " << patchId
00579 << " atoms " << numAtomsInPatch
00580 << " fixed_atoms " << numFixedAtomsInPatch
00581 << " fixed_groups " << numAtomsInFixedGroupsInPatch
00582 << "\n" << endi;
00583 }
00584 }
00585
00586 return atoms;
00587
00588 }
00589
00590
00591
00592
00593 void WorkDistrib::createHomePatches(void)
00594 {
00595 int i;
00596 PatchMap *patchMap = PatchMap::Object();
00597 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00598 PatchMgr *patchMgr = pm.ckLocalBranch();
00599
00600 int numPatches = patchMap->numPatches();
00601
00602 FullAtomList *atoms = createAtomLists();
00603
00604 #ifdef MEM_OPT_VERSION
00605
00606
00607
00608
00609
00610 #endif
00611
00612 int maxAtoms = -1;
00613 int maxPatch = -1;
00614 for(i=0; i < numPatches; i++) {
00615 int numAtoms = atoms[i].size();
00616 if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
00617 }
00618 iout << iINFO << "LARGEST PATCH (" << maxPatch <<
00619 ") HAS " << maxAtoms << " ATOMS\n" << endi;
00620
00621 for(i=0; i < numPatches; i++)
00622 {
00623 if ( ! ( i % 100 ) )
00624 {
00625 DebugM(3,"Created " << i << " patches so far.\n");
00626 }
00627
00628 patchMgr->createHomePatch(i,atoms[i]);
00629 }
00630
00631 delete [] atoms;
00632 }
00633
00634 void WorkDistrib::distributeHomePatches() {
00635
00636 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00637 Node *node = nd.ckLocalBranch();
00638 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00639 PatchMgr *patchMgr = pm.ckLocalBranch();
00640
00641 PatchMap *patchMap = PatchMap::Object();
00642
00643
00644 for(int i=0;i < patchMap->numPatches(); i++)
00645 {
00646 if (patchMap->node(i) != node->myid() )
00647 {
00648 DebugM(3,"patchMgr->movePatch("
00649 << i << "," << patchMap->node(i) << ")\n");
00650 patchMgr->movePatch(i,patchMap->node(i));
00651 }
00652 }
00653 patchMgr->sendMovePatches();
00654 }
00655
00656 void WorkDistrib::reinitAtoms() {
00657
00658 PatchMap *patchMap = PatchMap::Object();
00659 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00660 PatchMgr *patchMgr = pm.ckLocalBranch();
00661
00662 int numPatches = patchMap->numPatches();
00663
00664 FullAtomList *atoms = createAtomLists();
00665
00666 for(int i=0; i < numPatches; i++) {
00667 patchMgr->sendAtoms(i,atoms[i]);
00668 }
00669
00670 delete [] atoms;
00671
00672 }
00673
00674
00675
00676
00677 class PatchMapMsg : public CMessage_PatchMapMsg {
00678 public:
00679 char *patchMapData;
00680 };
00681
00682 void WorkDistrib::sendPatchMap(void)
00683 {
00684 if ( CkNumPes() == 1 ) {
00685 patchMapArrived = true;
00686 return;
00687 }
00688
00689
00690 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00691 Node *node = nd.ckLocalBranch();
00692 SimParameters *params = node->simParameters;
00693 if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
00694 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00695 || CkNumPes() > CkNumNodes()
00696 ) && ( CkNumNodes() > 1
00697 #endif
00698 ) && params->isSendSpanningTreeUnset() )
00699 ProxyMgr::Object()->setSendSpanning();
00700
00701 int size = PatchMap::Object()->packSize();
00702
00703 PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
00704
00705 PatchMap::Object()->pack(mapMsg->patchMapData);
00706
00707 CProxy_WorkDistrib workProxy(thisgroup);
00708 workProxy[0].savePatchMap(mapMsg);
00709 }
00710
00711
00712 void WorkDistrib::savePatchMap(PatchMapMsg *msg)
00713 {
00714
00715
00716
00717
00718
00719
00720 if ( CkMyRank() ) patchMapArrived = true;
00721
00722 if ( patchMapArrived && CkMyPe() ) {
00723 PatchMap::Object()->unpack(msg->patchMapData);
00724
00725
00726 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00727 Node *node = nd.ckLocalBranch();
00728 SimParameters *params = node->simParameters;
00729 if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
00730 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00731 || CkNumPes() > CkNumNodes()
00732 ) && ( CkNumNodes() > 1
00733 #endif
00734 ) && params->isSendSpanningTreeUnset() )
00735 ProxyMgr::Object()->setSendSpanning();
00736 }
00737
00738 if ( patchMapArrived ) {
00739 if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
00740 ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
00741 } else {
00742 delete msg;
00743 }
00744 return;
00745 }
00746
00747 patchMapArrived = true;
00748
00749 int pids[3];
00750 int baseNid = 2 * CkMyNode() + 1;
00751 int npid = 0;
00752 if ( (baseNid+npid) < CkNumNodes() ) { pids[npid] = CkNodeFirst(baseNid + npid); ++npid; }
00753 if ( (baseNid+npid) < CkNumNodes() ) { pids[npid] = CkNodeFirst(baseNid + npid); ++npid; }
00754 pids[npid] = CkMyPe(); ++npid;
00755 CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
00756 }
00757
00758
00759 class ComputeMapMsg : public CMessage_ComputeMapMsg {
00760 public:
00761 int nComputes;
00762 ComputeMap::ComputeData *computeMapData;
00763 };
00764
00765 void WorkDistrib::sendComputeMap(void)
00766 {
00767 if ( CkNumNodes() == 1 ) {
00768 computeMapArrived = true;
00769 ComputeMap::Object()->initPtrs();
00770 return;
00771 }
00772
00773 int size = ComputeMap::Object()->numComputes();
00774
00775 ComputeMapMsg *mapMsg = new (size, 0) ComputeMapMsg;
00776
00777 mapMsg->nComputes = size;
00778 ComputeMap::Object()->pack(mapMsg->computeMapData);
00779
00780 CProxy_WorkDistrib workProxy(thisgroup);
00781 workProxy[0].saveComputeMap(mapMsg);
00782 }
00783
00784
00785 void WorkDistrib::saveComputeMap(ComputeMapMsg *msg)
00786 {
00787
00788
00789
00790
00791
00792
00793 if ( CkMyRank() ) {
00794 NAMD_bug("WorkDistrib::saveComputeMap called on non-rank-zero pe");
00795 }
00796
00797 if ( computeMapArrived && CkMyPe() ) {
00798 ComputeMap::Object()->unpack(msg->nComputes, msg->computeMapData);
00799 }
00800
00801 if ( computeMapArrived ) {
00802 delete msg;
00803 ComputeMap::Object()->initPtrs();
00804 return;
00805 }
00806
00807 computeMapArrived = true;
00808
00809 int pids[3];
00810 int baseNid = 2 * CkMyNode() + 1;
00811 int npid = 0;
00812 if ( (baseNid+npid) < CkNumNodes() ) { pids[npid] = CkNodeFirst(baseNid + npid); ++npid; }
00813 if ( (baseNid+npid) < CkNumNodes() ) { pids[npid] = CkNodeFirst(baseNid + npid); ++npid; }
00814 pids[npid] = CkMyPe(); ++npid;
00815 CProxy_WorkDistrib(thisgroup).saveComputeMap(msg,npid,pids);
00816 }
00817
00818
00819
00820 void WorkDistrib::patchMapInit(void)
00821 {
00822 PatchMap *patchMap = PatchMap::Object();
00823 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00824 Node *node = nd.ckLocalBranch();
00825 SimParameters *params = node->simParameters;
00826 Lattice lattice = params->lattice;
00827
00828 BigReal patchSize = params->patchDimension;
00829
00830 ScaledPosition xmin, xmax;
00831
00832 double maxNumPatches = 1.e9;
00833 if ( params->minAtomsPerPatch > 0 )
00834 #ifndef MEM_OPT_VERSION
00835 maxNumPatches = node->pdb->num_atoms() / params->minAtomsPerPatch;
00836 #else
00837 maxNumPatches = node->molecule->numAtoms / params->minAtomsPerPatch;
00838 #endif
00839
00840 DebugM(3,"Mapping patches\n");
00841 if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
00842 xmin = 0.; xmax = 0.;
00843 }
00844 else if ( params->FMAOn || params->MSMOn ) {
00845
00846 #if 0
00847 node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
00848 node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
00849 node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
00850 #endif
00851 node->pdb->find_extremes(lattice);
00852 node->pdb->get_extremes(xmin, xmax);
00853 #if 0
00854 printf("+++ center=%.4f %.4f %.4f\n",
00855 lattice.origin().x, lattice.origin().y, lattice.origin().z);
00856 printf("+++ xmin=%.4f xmax=%.4f\n", xmin.x, xmax.x);
00857 printf("+++ ymin=%.4f ymax=%.4f\n", xmin.y, xmax.y);
00858 printf("+++ zmin=%.4f zmax=%.4f\n", xmin.z, xmax.z);
00859 #endif
00860
00861 }
00862 else {
00863 #if 0
00864 node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
00865 node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
00866 node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
00867 #endif
00868 node->pdb->find_extremes(lattice, 0.9);
00869 node->pdb->get_extremes(xmin, xmax);
00870 }
00871
00872 #if 0
00873 BigReal origin_shift;
00874 origin_shift = lattice.a_r() * lattice.origin();
00875 xmin.x -= origin_shift;
00876 xmax.x -= origin_shift;
00877 origin_shift = lattice.b_r() * lattice.origin();
00878 xmin.y -= origin_shift;
00879 xmax.y -= origin_shift;
00880 origin_shift = lattice.c_r() * lattice.origin();
00881 xmin.z -= origin_shift;
00882 xmax.z -= origin_shift;
00883 #endif
00884
00885
00886 int twoAwayX = params->twoAwayX;
00887 int twoAwayY = params->twoAwayY;
00888 int twoAwayZ = params->twoAwayZ;
00889
00890
00891 if (params->LCPOOn) {
00892 if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
00893 iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
00894 }
00895 twoAwayX = twoAwayY = twoAwayZ = 0;
00896 }
00897
00898
00899 if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
00900 if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
00901 if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
00902 if ( params->maxPatches > 0 ) {
00903 maxNumPatches = params->maxPatches;
00904 iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
00905 maxNumPatches << "\n" << endi;
00906 }
00907
00908 int numpes = CkNumPes();
00909 SimParameters *simparam = Node::Object()->simParameters;
00910 if(simparam->simulateInitialMapping) {
00911 numpes = simparam->simulatedPEs;
00912 delete [] patchMap->nPatchesOnNode;
00913 patchMap->nPatchesOnNode = new int[numpes];
00914 memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
00915 }
00916
00917 #ifdef NAMD_CUDA
00918
00919
00920 int numPatches = patchMap->sizeGrid(
00921 xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
00922 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00923 if ( numPatches < numpes && twoAwayX < 0 ) {
00924 twoAwayX = 1;
00925 numPatches = patchMap->sizeGrid(
00926 xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
00927 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00928 }
00929 if ( numPatches < numpes && twoAwayY < 0 ) {
00930 twoAwayY = 1;
00931 numPatches = patchMap->sizeGrid(
00932 xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
00933 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00934 }
00935 if ( numPatches < numpes && twoAwayZ < 0 ) {
00936 twoAwayZ = 1;
00937 numPatches = patchMap->sizeGrid(
00938 xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
00939 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00940 }
00941 if ( numPatches < numpes ) {
00942 NAMD_die("CUDA-enabled NAMD requires at least one patch per process.");
00943 }
00944 if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
00945 int exactFit = numPatches - numPatches % numpes;
00946 int newNumPatches = patchMap->sizeGrid(
00947 xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
00948 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00949 if ( newNumPatches == exactFit ) {
00950 iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
00951 maxNumPatches = exactFit;
00952 }
00953 }
00954
00955 patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
00956 twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
00957
00958 #else
00959
00960 int availPes = numpes;
00961 if ( params->noPatchesOnZero && numpes > 1 ) {
00962 availPes -= 1;
00963 if(params->noPatchesOnOne && numpes > 2)
00964 availPes -= 1;
00965 }
00966
00967 int numPatches = patchMap->sizeGrid(
00968 xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
00969 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
00970 if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
00971 ) && twoAwayZ < 0 ) {
00972 twoAwayZ = 0;
00973 numPatches = patchMap->sizeGrid(
00974 xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
00975 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
00976 }
00977 if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
00978 ) && twoAwayY < 0 ) {
00979 twoAwayY = 0;
00980 numPatches = patchMap->sizeGrid(
00981 xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
00982 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
00983 }
00984 if ( ( numPatches > availPes || numPatches > maxNumPatches
00985 ) && twoAwayX < 0 ) {
00986 twoAwayX = 0;
00987 numPatches = patchMap->sizeGrid(
00988 xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
00989 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
00990 }
00991 if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
00992 int newNumPatches = patchMap->sizeGrid(
00993 xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
00994 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
00995 if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
00996 iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
00997 maxNumPatches = availPes;
00998 }
00999 }
01000
01001 patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01002 twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01003
01004 #endif
01005
01006 }
01007
01008
01009
01010 void WorkDistrib::assignNodeToPatch()
01011 {
01012 PatchMap *patchMap = PatchMap::Object();
01013 int nNodes = Node::Object()->numNodes();
01014 SimParameters *simparam = Node::Object()->simParameters;
01015 if(simparam->simulateInitialMapping) {
01016 nNodes = simparam->simulatedPEs;
01017 }
01018
01019 #if USE_TOPOMAP
01020 TopoManager tmgr;
01021 int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
01022 if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
01023 CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
01024 }
01025 else
01026 #endif
01027 if (nNodes > patchMap->numPatches())
01028 assignPatchesBitReversal();
01029 else
01030 #if ! (CMK_BLUEGENEP | CMK_BLUEGENEL)
01031 if ( simparam->PMEOn )
01032 assignPatchesSpaceFillingCurve();
01033 else
01034 #endif
01035 assignPatchesRecursiveBisection();
01036
01037
01038
01039 int *nAtoms = new int[nNodes];
01040 int numAtoms=0;
01041 int i;
01042 for(i=0; i < nNodes; i++)
01043 nAtoms[i] = 0;
01044
01045 for(i=0; i < patchMap->numPatches(); i++)
01046 {
01047
01048
01049
01050
01051
01052 #ifdef MEM_OPT_VERSION
01053 numAtoms += patchMap->numAtoms(i);
01054 nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
01055 #else
01056 if (patchMap->patch(i)) {
01057 numAtoms += patchMap->patch(i)->getNumAtoms();
01058 nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
01059 }
01060 #endif
01061 }
01062
01063 if ( numAtoms != Node::Object()->molecule->numAtoms ) {
01064 for(i=0; i < nNodes; i++)
01065 iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
01066 iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
01067 NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
01068 }
01069
01070 delete [] nAtoms;
01071
01072
01073 }
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109 void WorkDistrib::assignPatchesToLowestLoadNode()
01110 {
01111 int pid;
01112 int assignedNode = 0;
01113 PatchMap *patchMap = PatchMap::Object();
01114 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01115 Node *node = nd.ckLocalBranch();
01116 SimParameters *simParams = node->simParameters;
01117 int ncpus = node->numNodes();
01118 if(simParams->simulateInitialMapping) {
01119 ncpus = simParams->simulatedPEs;
01120 }
01121
01122 int *load = new int[ncpus];
01123 int *assignedNodes = new int[patchMap->numPatches()];
01124 for (int i=0; i<ncpus; i++) {
01125 load[i] = 0;
01126 }
01127
01128 int defaultNode = 0;
01129 if ( simParams->noPatchesOnZero && ncpus > 1 ){
01130 defaultNode = 1;
01131 if( simParams->noPatchesOnOne && ncpus > 2)
01132 defaultNode = 2;
01133 }
01134
01135 for(pid=0; pid < patchMap->numPatches(); pid++) {
01136 assignedNode = defaultNode;
01137 for (int i=assignedNode + 1; i < ncpus; i++) {
01138 if (load[i] < load[assignedNode]) assignedNode = i;
01139 }
01140 assignedNodes[pid] = assignedNode;
01141 #ifdef MEM_OPT_VERSION
01142 load[assignedNode] += patchMap->numAtoms(pid) + 1;
01143 #else
01144 load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
01145 #endif
01146 }
01147
01148 delete[] load;
01149 sortNodesAndAssign(assignedNodes);
01150 delete[] assignedNodes;
01151 }
01152
01153
01154 void WorkDistrib::assignPatchesBitReversal()
01155 {
01156 int pid;
01157 PatchMap *patchMap = PatchMap::Object();
01158 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01159 Node *node = nd.ckLocalBranch();
01160 SimParameters *simparam = node->simParameters;
01161
01162 int ncpus = node->numNodes();
01163 if(simparam->simulateInitialMapping) {
01164 ncpus = simparam->simulatedPEs;
01165 }
01166 int npatches = patchMap->numPatches();
01167 if ( ncpus <= npatches )
01168 NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
01169
01170
01171 int npow2 = 1; int nbits = 0;
01172 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
01173
01174
01175 SortableResizeArray<int> seq(ncpus);
01176
01177 int i = 1;
01178 for ( int icpu=0; icpu<(ncpus-1); ++icpu ) {
01179 int ri;
01180 for ( ri = ncpus; ri >= ncpus; ++i ) {
01181 ri = 0;
01182 int pow2 = 1;
01183 int rpow2 = npow2 / 2;
01184 for ( int j=0; j<nbits; ++j ) {
01185 ri += rpow2 * ( ( i / pow2 ) % 2 );
01186 pow2 *= 2; rpow2 /= 2;
01187 }
01188 }
01189 seq[icpu] = ri;
01190 }
01191
01192
01193 sortNodesAndAssign(seq.begin());
01194 if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
01195 }
01196
01197
01198 struct nodesort {
01199 int node;
01200 int a_total;
01201 int b_total;
01202 int c_total;
01203 int npatches;
01204 nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
01205 int operator==(const nodesort &o) const {
01206 float a1 = ((float)a_total)/((float)npatches);
01207 float a2 = ((float)o.a_total)/((float)o.npatches);
01208 float b1 = ((float)b_total)/((float)npatches);
01209 float b2 = ((float)o.b_total)/((float)o.npatches);
01210 float c1 = ((float)c_total)/((float)npatches);
01211 float c2 = ((float)o.c_total)/((float)o.npatches);
01212 return ((a1 == a2) && (b1 == b2) && (c1 == c2));
01213 }
01214 int operator<(const nodesort &o) const {
01215 float a1 = ((float)a_total)/((float)npatches);
01216 float a2 = ((float)o.a_total)/((float)o.npatches);
01217 float b1 = ((float)b_total)/((float)npatches);
01218 float b2 = ((float)o.b_total)/((float)o.npatches);
01219 float c1 = ((float)c_total)/((float)npatches);
01220 float c2 = ((float)o.c_total)/((float)o.npatches);
01221 return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
01222 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
01223 }
01224 };
01225
01226 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
01227
01228
01229 int i, pid;
01230 PatchMap *patchMap = PatchMap::Object();
01231 int npatches = patchMap->numPatches();
01232 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01233 Node *node = nd.ckLocalBranch();
01234 int nnodes = node->numNodes();
01235 SimParameters *simparam = node->simParameters;
01236 if(simparam->simulateInitialMapping) {
01237 nnodes = simparam->simulatedPEs;
01238 }
01239
01240 ResizeArray<nodesort> allnodes(nnodes);
01241 for ( i=0; i < nnodes; ++i ) {
01242 allnodes[i].node = i;
01243 }
01244 for ( pid=0; pid<npatches; ++pid ) {
01245
01246 allnodes[assignedNode[pid]].npatches++;
01247 allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
01248 allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
01249 allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
01250 }
01251 SortableResizeArray<nodesort> usednodes(nnodes);
01252 usednodes.resize(0);
01253 for ( i=0; i < nnodes; ++i ) {
01254 if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
01255 }
01256 usednodes.sort();
01257 int nused = usednodes.size();
01258 int i2 = nused/2;
01259 for ( i=0; i < nnodes; ++i ) {
01260 if ( allnodes[i].npatches ) allnodes[usednodes[i2++].node].node = i;
01261 if ( i2 == nused ) i2 = 0;
01262 }
01263
01264 for ( pid=0; pid<npatches; ++pid ) {
01265
01266 if ( ! baseNodes ) {
01267 patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
01268 }
01269 patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
01270 }
01271 }
01272
01273
01274 void WorkDistrib::assignPatchesRoundRobin()
01275 {
01276 int pid;
01277 PatchMap *patchMap = PatchMap::Object();
01278 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01279 Node *node = nd.ckLocalBranch();
01280 SimParameters *simparam = node->simParameters;
01281 int ncpus = node->numNodes();
01282 if(simparam->simulateInitialMapping) {
01283 ncpus = simparam->simulatedPEs;
01284 }
01285 int *assignedNode = new int[patchMap->numPatches()];
01286
01287 for(pid=0; pid < patchMap->numPatches(); pid++) {
01288 assignedNode[pid] = pid % ncpus;
01289 }
01290
01291 sortNodesAndAssign(assignedNode);
01292 delete [] assignedNode;
01293 }
01294
01295
01296 void WorkDistrib::assignPatchesRecursiveBisection()
01297 {
01298 PatchMap *patchMap = PatchMap::Object();
01299 int *assignedNode = new int[patchMap->numPatches()];
01300 SimParameters *simParams = Node::Object()->simParameters;
01301 int numNodes = Node::Object()->numNodes();
01302 if(simParams->simulateInitialMapping) {
01303 numNodes = simParams->simulatedPEs;
01304 }
01305
01306 int usedNodes = numNodes;
01307 int unusedNodes = 0;
01308 if ( simParams->noPatchesOnZero && numNodes > 1 ){
01309 usedNodes -= 1;
01310 if(simParams->noPatchesOnOne && numNodes > 2)
01311 usedNodes -= 1;
01312 }
01313 unusedNodes = numNodes - usedNodes;
01314 RecBisection recBisec(usedNodes,PatchMap::Object());
01315 if ( recBisec.partition(assignedNode) ) {
01316 if ( unusedNodes !=0 ) {
01317 for ( int i=0; i<patchMap->numPatches(); ++i ) {
01318 assignedNode[i] += unusedNodes;
01319 }
01320 }
01321 sortNodesAndAssign(assignedNode);
01322 delete [] assignedNode;
01323 } else {
01324
01325
01326
01327 delete [] assignedNode;
01328
01329 iout << iWARN
01330 << "WorkDistrib: Recursive bisection fails, "
01331 << "invoking space-filling curve algorithm\n";
01332 assignPatchesSpaceFillingCurve();
01333 }
01334 }
01335
01336
01337 void WorkDistrib::assignPatchesSpaceFillingCurve()
01338 {
01339 PatchMap *patchMap = PatchMap::Object();
01340 int *assignedNode = new int[patchMap->numPatches()];
01341 int numNodes = Node::Object()->numNodes();
01342 SimParameters *simParams = Node::Object()->simParameters;
01343 if(simParams->simulateInitialMapping) {
01344 numNodes = simParams->simulatedPEs;
01345 }
01346 int usedNodes = numNodes;
01347 int unusedNodes = 0;
01348 if ( simParams->noPatchesOnZero && numNodes > 1 ){
01349 usedNodes -= 1;
01350 if(simParams->noPatchesOnOne && numNodes > 2)
01351 usedNodes -= 1;
01352 }
01353 unusedNodes = numNodes - usedNodes;
01354
01355 int numPatches = patchMap->numPatches();
01356 if ( numPatches < usedNodes )
01357 NAMD_bug("WorkDistrib::assignPatchesSpaceFillingCurve() called with more nodes than patches");
01358
01359 ResizeArray<double> patchLoads(numPatches);
01360 SortableResizeArray<double> sortedLoads(numPatches);
01361 for ( int i=0; i<numPatches; ++i ) {
01362 #ifdef MEM_OPT_VERSION
01363 double load = patchMap->numAtoms(i) + 10;
01364 #else
01365 double load = patchMap->patch(i)->getNumAtoms() + 10;
01366 #endif
01367 patchLoads[i] = load;
01368 sortedLoads[i] = load;
01369 }
01370 sortedLoads.sort();
01371
01372
01373 double sumLoad = 0;
01374 double maxPatchLoad = 1;
01375 for ( int i=0; i<numPatches; ++i ) {
01376 double load = sortedLoads[i];
01377 double total = sumLoad + (numPatches-i) * load;
01378 if ( usedNodes * load > total ) break;
01379 sumLoad += load;
01380 maxPatchLoad = load;
01381 }
01382 double totalLoad = 0;
01383 for ( int i=0; i<numPatches; ++i ) {
01384 if ( patchLoads[i] > maxPatchLoad ) patchLoads[i] = maxPatchLoad;
01385 totalLoad += patchLoads[i];
01386 }
01387 if ( usedNodes * maxPatchLoad > totalLoad )
01388 NAMD_bug("algorithm failure in WorkDistrib::assignPatchesSpaceFillingCurve()");
01389
01390
01391 sumLoad = 0;
01392 int node = 0;
01393 int adim = patchMap->gridsize_a();
01394 int bdim = patchMap->gridsize_b();
01395 int cdim = patchMap->gridsize_c();
01396 int b = 0;
01397 int c = 0;
01398 int binc = 1;
01399 int cinc = 1;
01400 for ( int a = 0; a < adim; ++a ) {
01401 while ( b >= 0 && b < bdim ) {
01402 while ( c >= 0 && c < cdim ) {
01403 int pid = patchMap->pid(a,b,c);
01404 assignedNode[pid] = node;
01405 sumLoad += patchLoads[pid];
01406 double targetLoad = (double)(node+1) / (double)usedNodes;
01407 targetLoad *= totalLoad;
01408 if ( node+1 < usedNodes && sumLoad >= targetLoad ) ++node;
01409 c += cinc;
01410 }
01411 cinc *= -1; c += cinc;
01412 b += binc;
01413 }
01414 binc *= -1; b += binc;
01415 }
01416
01417 if(unusedNodes>0) {
01418 for ( int i=0; i<patchMap->numPatches(); ++i ) {
01419 assignedNode[i] += unusedNodes;
01420 }
01421 }
01422
01423 sortNodesAndAssign(assignedNode);
01424 delete [] assignedNode;
01425 }
01426
01427
01428 void WorkDistrib::mapComputes(void)
01429 {
01430 PatchMap *patchMap = PatchMap::Object();
01431 ComputeMap *computeMap = ComputeMap::Object();
01432 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01433 Node *node = nd.ckLocalBranch();
01434
01435 DebugM(3,"Mapping computes\n");
01436
01437 computeMap->allocateCids();
01438
01439
01440 if ( node->simParameters->fullDirectOn )
01441 mapComputeHomePatches(computeFullDirectType);
01442 if ( node->simParameters->FMAOn )
01443 #ifdef DPMTA
01444 mapComputeHomePatches(computeDPMTAType);
01445 #else
01446 NAMD_die("This binary does not include DPMTA (FMA).");
01447 #endif
01448 if ( node->simParameters->PMEOn ) {
01449 #ifdef DPME
01450 if ( node->simParameters->useDPME )
01451 mapComputeHomePatches(computeDPMEType);
01452 else {
01453 if (node->simParameters->useOptPME) {
01454 mapComputeHomePatches(optPmeType);
01455 if ( node->simParameters->pressureProfileEwaldOn )
01456 mapComputeHomePatches(computeEwaldType);
01457 }
01458 else {
01459 mapComputeHomePatches(computePmeType);
01460 if ( node->simParameters->pressureProfileEwaldOn )
01461 mapComputeHomePatches(computeEwaldType);
01462 }
01463 }
01464 #else
01465 if (node->simParameters->useOptPME) {
01466 mapComputeHomePatches(optPmeType);
01467 if ( node->simParameters->pressureProfileEwaldOn )
01468 mapComputeHomePatches(computeEwaldType);
01469 }
01470 else {
01471 mapComputeHomePatches(computePmeType);
01472 if ( node->simParameters->pressureProfileEwaldOn )
01473 mapComputeHomePatches(computeEwaldType);
01474 }
01475 #endif
01476 }
01477
01478 if ( node->simParameters->globalForcesOn ) {
01479 DebugM(2,"adding ComputeGlobal\n");
01480 mapComputeHomePatches(computeGlobalType);
01481 }
01482
01483 if ( node->simParameters->extForcesOn )
01484 mapComputeHomePatches(computeExtType);
01485
01486 if ( node->simParameters->GBISserOn )
01487 mapComputeHomePatches(computeGBISserType);
01488
01489 if ( node->simParameters->MsmSerialOn )
01490 mapComputeHomePatches(computeMsmSerialType);
01491 #ifdef CHARM_HAS_MSA
01492 else if ( node->simParameters->MSMOn )
01493 mapComputeHomePatches(computeMsmMsaType);
01494 #else
01495 else if ( node->simParameters->MSMOn )
01496 mapComputeHomePatches(computeMsmType);
01497 #endif
01498
01499 #ifdef NAMD_CUDA
01500 mapComputeNode(computeNonbondedCUDAType);
01501 mapComputeHomeTuples(computeExclsType);
01502 mapComputePatch(computeSelfExclsType);
01503 #endif
01504
01505 mapComputeNonbonded();
01506
01507 if ( node->simParameters->LCPOOn ) {
01508 mapComputeLCPO();
01509 }
01510
01511
01512
01513 if ( !node->simParameters->pairInteractionOn ||
01514 node->simParameters->pairInteractionSelf) {
01515 mapComputeHomeTuples(computeBondsType);
01516 mapComputeHomeTuples(computeAnglesType);
01517 mapComputeHomeTuples(computeDihedralsType);
01518 mapComputeHomeTuples(computeImpropersType);
01519 mapComputeHomeTuples(computeCrosstermsType);
01520 mapComputePatch(computeSelfBondsType);
01521 mapComputePatch(computeSelfAnglesType);
01522 mapComputePatch(computeSelfDihedralsType);
01523 mapComputePatch(computeSelfImpropersType);
01524 mapComputePatch(computeSelfCrosstermsType);
01525 }
01526
01527 if ( node->simParameters->drudeOn ) {
01528 mapComputeHomeTuples(computeTholeType);
01529 mapComputePatch(computeSelfTholeType);
01530 mapComputeHomeTuples(computeAnisoType);
01531 mapComputePatch(computeSelfAnisoType);
01532 }
01533
01534 if ( node->simParameters->eFieldOn )
01535 mapComputePatch(computeEFieldType);
01536
01537 if ( node->simParameters->mgridforceOn )
01538 mapComputePatch(computeGridForceType);
01539
01540 if ( node->simParameters->stirOn )
01541 mapComputePatch(computeStirType);
01542 if ( node->simParameters->sphericalBCOn )
01543 mapComputePatch(computeSphericalBCType);
01544 if ( node->simParameters->cylindricalBCOn )
01545 mapComputePatch(computeCylindricalBCType);
01546 if ( node->simParameters->tclBCOn ) {
01547 mapComputeHomePatches(computeTclBCType);
01548 }
01549 if ( node->simParameters->constraintsOn )
01550 mapComputePatch(computeRestraintsType);
01551 if ( node->simParameters->consForceOn )
01552 mapComputePatch(computeConsForceType);
01553 if ( node->simParameters->consTorqueOn )
01554 mapComputePatch(computeConsTorqueType);
01555
01556
01557 SimParameters *simParams = Node::Object()->simParameters;
01558 if (simParams->storeComputeMap) {
01559 computeMap->saveComputeMap(simParams->computeMapFilename);
01560 }
01561
01562 if (simParams->loadComputeMap) {
01563 computeMap->loadComputeMap(simParams->computeMapFilename);
01564 CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
01565 }
01566 }
01567
01568
01569 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
01570 {
01571 PatchMap *patchMap = PatchMap::Object();
01572 ComputeMap *computeMap = ComputeMap::Object();
01573 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01574 Node *node = nd.ckLocalBranch();
01575
01576 int numNodes = node->numNodes();
01577 SimParameters *simparam = node->simParameters;
01578 if(simparam->simulateInitialMapping) {
01579 numNodes = simparam->simulatedPEs;
01580 }
01581
01582 char *isBaseNode = new char[numNodes];
01583 memset(isBaseNode,0,numNodes*sizeof(char));
01584
01585 int numPatches = patchMap->numPatches();
01586 for(int j=0; j<numPatches; j++) {
01587 isBaseNode[patchMap->basenode(j)] = 1;
01588 }
01589
01590 for(int i=0; i<numNodes; i++) {
01591 if ( isBaseNode[i] ) {
01592 computeMap->storeCompute(i,0,type);
01593 }
01594 }
01595
01596 delete [] isBaseNode;
01597 }
01598
01599
01600 void WorkDistrib::mapComputeHomePatches(ComputeType type)
01601 {
01602 PatchMap *patchMap = PatchMap::Object();
01603 ComputeMap *computeMap = ComputeMap::Object();
01604 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01605 Node *node = nd.ckLocalBranch();
01606
01607 int numNodes = node->numNodes();
01608 SimParameters *simparam = node->simParameters;
01609 if(simparam->simulateInitialMapping) {
01610 numNodes = simparam->simulatedPEs;
01611 }
01612
01613 for(int i=0; i<numNodes; i++) {
01614 if ( patchMap->numPatchesOnNode(i) ) {
01615 computeMap->storeCompute(i,0,type);
01616 }
01617 }
01618 }
01619
01620
01621 void WorkDistrib::mapComputePatch(ComputeType type)
01622 {
01623 PatchMap *patchMap = PatchMap::Object();
01624 ComputeMap *computeMap = ComputeMap::Object();
01625
01626 PatchID i;
01627 ComputeID cid;
01628
01629 for(i=0; i<patchMap->numPatches(); i++)
01630 {
01631 cid=computeMap->storeCompute(patchMap->node(i),1,type);
01632 computeMap->newPid(cid,i);
01633 patchMap->newCid(i,cid);
01634 }
01635
01636 }
01637
01638
01639
01640 void WorkDistrib::mapComputeNode(ComputeType type)
01641 {
01642 PatchMap *patchMap = PatchMap::Object();
01643 ComputeMap *computeMap = ComputeMap::Object();
01644
01645 PatchID i;
01646 ComputeID cid;
01647
01648 int ncpus = CkNumPes();
01649 SimParameters *simparam = Node::Object()->simParameters;
01650 if(simparam->simulateInitialMapping) {
01651 ncpus = simparam->simulatedPEs;
01652 }
01653
01654 for(int i=0; i<ncpus; i++) {
01655 computeMap->storeCompute(i,0,type);
01656 }
01657
01658 }
01659
01660
01661
01662 void WorkDistrib::mapComputeNonbonded(void)
01663 {
01664
01665
01666
01667
01668 PatchMap *patchMap = PatchMap::Object();
01669 ComputeMap *computeMap = ComputeMap::Object();
01670 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01671 Node *node = nd.ckLocalBranch();
01672 SimParameters *simParams = Node::Object()->simParameters;
01673 int ncpus = CkNumPes();
01674 int nodesize = CkMyNodeSize();
01675 if(simParams->simulateInitialMapping) {
01676 ncpus = simParams->simulatedPEs;
01677 nodesize = simParams->simulatedNodeSize;
01678 }
01679
01680 PatchID oneAway[PatchMap::MaxOneOrTwoAway];
01681 PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
01682 int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
01683
01684 PatchID i;
01685 ComputeID cid;
01686 int numNeighbors;
01687 int j;
01688 double partScaling = 1.0;
01689 if ( ncpus < patchMap->numPatches() ) {
01690 partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
01691 }
01692
01693 for(i=0; i<patchMap->numPatches(); i++)
01694 {
01695
01696 int numPartitions = 1;
01697 if ( simParams->ldBalancer == LDBAL_HYBRID ) {
01698 #ifdef MEM_OPT_VERSION
01699 int64 numFixed = patchMap->numFixedAtoms(i);
01700 int64 numAtoms = patchMap->numAtoms(i);
01701 #else
01702 int64 numFixed = patchMap->patch(i)->getNumFixedAtoms();
01703 int64 numAtoms = patchMap->patch(i)->getNumAtoms();
01704 #endif
01705
01706 int divide = node->simParameters->numAtomsSelf;
01707 if (divide > 0) {
01708 numPartitions = (int) ( partScaling * ( 0.5 +
01709 (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
01710 }
01711 if (numPartitions < 1) numPartitions = 1;
01712 if ( numPartitions > node->simParameters->maxSelfPart )
01713 numPartitions = node->simParameters->maxSelfPart;
01714
01715 DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
01716
01717 }
01718
01719 for(int partition=0; partition < numPartitions; partition++)
01720 {
01721 cid=computeMap->storeCompute(patchMap->node(i),1,
01722 computeNonbondedSelfType,
01723 partition,numPartitions);
01724 computeMap->newPid(cid,i);
01725 patchMap->newCid(i,cid);
01726 }
01727 }
01728
01729 for(int p1=0; p1 <patchMap->numPatches(); p1++)
01730 {
01731
01732 numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
01733 for(j=0;j<numNeighbors;j++)
01734 {
01735 int p2 = oneAway[j];
01736 int dsp = oneAwayDownstream[j];
01737
01738 int numPartitions = 1;
01739 if ( simParams->ldBalancer == LDBAL_HYBRID ) {
01740 #ifdef MEM_OPT_VERSION
01741 int64 numAtoms1 = patchMap->numAtoms(p1);
01742 int64 numAtoms2 = patchMap->numAtoms(p2);
01743 int64 numFixed1 = patchMap->numFixedAtoms(p1);
01744 int64 numFixed2 = patchMap->numFixedAtoms(p2);
01745 #else
01746 int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
01747 int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
01748 int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
01749 int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
01750 #endif
01751
01752
01753 const int t2 = oneAwayTrans[j];
01754 const int adim = patchMap->gridsize_a();
01755 const int bdim = patchMap->gridsize_b();
01756 const int cdim = patchMap->gridsize_c();
01757 const int nax = patchMap->numaway_a();
01758 const int nay = patchMap->numaway_b();
01759 const int naz = patchMap->numaway_c();
01760 const int ia1 = patchMap->index_a(p1);
01761 const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
01762 const int ib1 = patchMap->index_b(p1);
01763 const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
01764 const int ic1 = patchMap->index_c(p1);
01765 const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
01766
01767 if ( abs(ia2-ia1) > nax ||
01768 abs(ib2-ib1) > nay ||
01769 abs(ic2-ic1) > naz )
01770 NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
01771
01772 int distance = 3;
01773 if ( ia1 == ia2 ) --distance;
01774 else if ( ia1 == ia2 + nax - 1 ) --distance;
01775 else if ( ia1 + nax - 1 == ia2 ) --distance;
01776 if ( ib1 == ib2 ) --distance;
01777 else if ( ib1 == ib2 + nay - 1 ) --distance;
01778 else if ( ib1 + nay - 1 == ib2 ) --distance;
01779 if ( ic1 == ic2 ) --distance;
01780 else if ( ic1 == ic2 + naz - 1 ) --distance;
01781 else if ( ic1 + naz - 1 == ic2 ) --distance;
01782 int divide = 0;
01783 if ( distance == 0 ) {
01784 divide = node->simParameters->numAtomsSelf2;
01785 } else if (distance == 1) {
01786 divide = node->simParameters->numAtomsPair;
01787 } else {
01788 divide = node->simParameters->numAtomsPair2;
01789 }
01790 if (divide > 0) {
01791 numPartitions = (int) ( partScaling * ( 0.5 +
01792 (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
01793 }
01794 if ( numPartitions < 1 ) numPartitions = 1;
01795 if ( numPartitions > node->simParameters->maxPairPart )
01796 numPartitions = node->simParameters->maxPairPart;
01797
01798 }
01799 for(int partition=0; partition < numPartitions; partition++)
01800 {
01801 cid=computeMap->storeCompute( patchMap->basenode(dsp),
01802 2,computeNonbondedPairType,partition,numPartitions);
01803 computeMap->newPid(cid,p1);
01804 computeMap->newPid(cid,p2,oneAwayTrans[j]);
01805 patchMap->newCid(p1,cid);
01806 patchMap->newCid(p2,cid);
01807 }
01808 }
01809 }
01810 }
01811
01812
01813 void WorkDistrib::mapComputeLCPO(void) {
01814
01815
01816 PatchMap *patchMap = PatchMap::Object();
01817 ComputeMap *computeMap = ComputeMap::Object();
01818 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01819 Node *node = nd.ckLocalBranch();
01820 SimParameters *simParams = Node::Object()->simParameters;
01821 int ncpus = CkNumPes();
01822 int nodesize = CkMyNodeSize();
01823 const int maxPatches = 8;
01824
01825 int numPatchesInOctet;
01826 PatchID patchesInOctet[maxPatches];
01827 int oneAwayTrans[maxPatches];
01828
01829
01830 int numPartitions = 1;
01831
01832 PatchID i;
01833 ComputeID cid;
01834
01835
01836 for(i=0; i<patchMap->numPatches(); i++) {
01837 numPatchesInOctet =
01838 patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
01839
01840 for(int partition=0; partition < numPartitions; partition++) {
01841 cid=computeMap->storeCompute(patchMap->node(i),
01842 numPatchesInOctet,
01843 computeLCPOType,
01844 partition,
01845 numPartitions);
01846 for (int p = 0; p < numPatchesInOctet; p++) {
01847 computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
01848 }
01849 for (int p = 0; p < numPatchesInOctet; p++) {
01850 patchMap->newCid(patchesInOctet[p],cid);
01851 }
01852 }
01853 }
01854 }
01855
01856
01857 void WorkDistrib::messageEnqueueWork(Compute *compute) {
01858 LocalWorkMsg *msg = compute->localWorkMsg;
01859 int seq = compute->sequence();
01860 int gbisPhase = compute->getGBISPhase();
01861
01862 if ( seq < 0 ) {
01863 NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
01864 } else {
01865 SET_PRIORITY(msg,seq,compute->priority());
01866 }
01867
01868 msg->compute = compute;
01869 int type = compute->type();
01870 int cid = compute->cid;
01871
01872 CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
01873 switch ( type ) {
01874 case computeExclsType:
01875 case computeSelfExclsType:
01876 wdProxy[CkMyPe()].enqueueExcls(msg);
01877 break;
01878 case computeBondsType:
01879 case computeSelfBondsType:
01880 wdProxy[CkMyPe()].enqueueBonds(msg);
01881 break;
01882 case computeAnglesType:
01883 case computeSelfAnglesType:
01884 wdProxy[CkMyPe()].enqueueAngles(msg);
01885 break;
01886 case computeDihedralsType:
01887 case computeSelfDihedralsType:
01888 wdProxy[CkMyPe()].enqueueDihedrals(msg);
01889 break;
01890 case computeImpropersType:
01891 case computeSelfImpropersType:
01892 wdProxy[CkMyPe()].enqueueImpropers(msg);
01893 break;
01894 case computeTholeType:
01895 case computeSelfTholeType:
01896 wdProxy[CkMyPe()].enqueueThole(msg);
01897 break;
01898 case computeAnisoType:
01899 case computeSelfAnisoType:
01900 wdProxy[CkMyPe()].enqueueAniso(msg);
01901 break;
01902 case computeCrosstermsType:
01903 case computeSelfCrosstermsType:
01904 wdProxy[CkMyPe()].enqueueCrossterms(msg);
01905 break;
01906 case computeLCPOType:
01907 wdProxy[CkMyPe()].enqueueLCPO(msg);
01908 break;
01909 case computeNonbondedSelfType:
01910 switch ( seq % 2 ) {
01911 case 0:
01912
01913 switch ( gbisPhase ) {
01914 case 1:
01915 wdProxy[CkMyPe()].enqueueSelfA1(msg);
01916 break;
01917 case 2:
01918 wdProxy[CkMyPe()].enqueueSelfA2(msg);
01919 break;
01920 case 3:
01921 wdProxy[CkMyPe()].enqueueSelfA3(msg);
01922 break;
01923 }
01924 break;
01925 case 1:
01926
01927 switch ( gbisPhase ) {
01928 case 1:
01929 wdProxy[CkMyPe()].enqueueSelfB1(msg);
01930 break;
01931 case 2:
01932 wdProxy[CkMyPe()].enqueueSelfB2(msg);
01933 break;
01934 case 3:
01935 wdProxy[CkMyPe()].enqueueSelfB3(msg);
01936 break;
01937 }
01938 break;
01939 default:
01940 NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
01941 }
01942 break;
01943 case computeNonbondedPairType:
01944 switch ( seq % 2 ) {
01945 case 0:
01946
01947 switch ( gbisPhase ) {
01948 case 1:
01949 wdProxy[CkMyPe()].enqueueWorkA1(msg);
01950 break;
01951 case 2:
01952 wdProxy[CkMyPe()].enqueueWorkA2(msg);
01953 break;
01954 case 3:
01955 wdProxy[CkMyPe()].enqueueWorkA3(msg);
01956 break;
01957 }
01958 break;
01959 case 1:
01960
01961 switch ( gbisPhase ) {
01962 case 1:
01963 wdProxy[CkMyPe()].enqueueWorkB1(msg);
01964 break;
01965 case 2:
01966 wdProxy[CkMyPe()].enqueueWorkB2(msg);
01967 break;
01968 case 3:
01969 wdProxy[CkMyPe()].enqueueWorkB3(msg);
01970 break;
01971 }
01972 break;
01973 case 2:
01974 wdProxy[CkMyPe()].enqueueWorkC(msg);
01975 break;
01976 default:
01977 NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
01978 }
01979 break;
01980 case computeNonbondedCUDAType:
01981 #ifdef NAMD_CUDA
01982
01983
01984 switch ( gbisPhase ) {
01985 case 1:
01986 wdProxy[CkMyPe()].enqueueCUDA(msg);
01987 break;
01988 case 2:
01989 wdProxy[CkMyPe()].enqueueCUDAP2(msg);
01990 break;
01991 case 3:
01992 wdProxy[CkMyPe()].enqueueCUDAP3(msg);
01993 break;
01994 }
01995 #else
01996 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
01997 #endif
01998 break;
01999 case computePmeType:
02000
02001 #ifdef NAMD_CUDA
02002 wdProxy[CkMyPe()].enqueuePme(msg);
02003 #else
02004 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02005 #endif
02006 break;
02007 case optPmeType:
02008
02009 #ifdef NAMD_CUDA
02010 wdProxy[CkMyPe()].enqueuePme(msg);
02011 #else
02012 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02013 #endif
02014 break;
02015 default:
02016 wdProxy[CkMyPe()].enqueueWork(msg);
02017 }
02018 }
02019
02020 void WorkDistrib::enqueueWork(LocalWorkMsg *msg) {
02021 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02022 if ( msg->compute->localWorkMsg != msg )
02023 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02024 }
02025
02026 void WorkDistrib::enqueueExcls(LocalWorkMsg *msg) {
02027 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02028 if ( msg->compute->localWorkMsg != msg )
02029 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02030 }
02031
02032 void WorkDistrib::enqueueBonds(LocalWorkMsg *msg) {
02033 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02034 if ( msg->compute->localWorkMsg != msg )
02035 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02036 }
02037
02038 void WorkDistrib::enqueueAngles(LocalWorkMsg *msg) {
02039 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02040 if ( msg->compute->localWorkMsg != msg )
02041 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02042 }
02043
02044 void WorkDistrib::enqueueDihedrals(LocalWorkMsg *msg) {
02045 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02046 if ( msg->compute->localWorkMsg != msg )
02047 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02048 }
02049
02050 void WorkDistrib::enqueueImpropers(LocalWorkMsg *msg) {
02051 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02052 if ( msg->compute->localWorkMsg != msg )
02053 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02054 }
02055
02056 void WorkDistrib::enqueueThole(LocalWorkMsg *msg) {
02057 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02058 if ( msg->compute->localWorkMsg != msg )
02059 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02060 }
02061
02062 void WorkDistrib::enqueueAniso(LocalWorkMsg *msg) {
02063 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02064 if ( msg->compute->localWorkMsg != msg )
02065 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02066 }
02067
02068 void WorkDistrib::enqueueCrossterms(LocalWorkMsg *msg) {
02069 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02070 if ( msg->compute->localWorkMsg != msg )
02071 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02072 }
02073
02074 void WorkDistrib::enqueuePme(LocalWorkMsg *msg) {
02075 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02076 if ( msg->compute->localWorkMsg != msg )
02077 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02078 }
02079
02080 void WorkDistrib::enqueueLCPO(LocalWorkMsg *msg) {
02081 msg->compute->doWork();
02082 if ( msg->compute->localWorkMsg != msg )
02083 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02084 }
02085 void WorkDistrib::enqueueSelfA1(LocalWorkMsg *msg) {
02086 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02087 if ( msg->compute->localWorkMsg != msg )
02088 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02089 }
02090 void WorkDistrib::enqueueSelfA2(LocalWorkMsg *msg) {
02091 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02092 if ( msg->compute->localWorkMsg != msg )
02093 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02094 }
02095 void WorkDistrib::enqueueSelfA3(LocalWorkMsg *msg) {
02096 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02097 if ( msg->compute->localWorkMsg != msg )
02098 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02099 }
02100
02101 void WorkDistrib::enqueueSelfB1(LocalWorkMsg *msg) {
02102 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02103 if ( msg->compute->localWorkMsg != msg )
02104 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02105 }
02106 void WorkDistrib::enqueueSelfB2(LocalWorkMsg *msg) {
02107 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02108 if ( msg->compute->localWorkMsg != msg )
02109 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02110 }
02111 void WorkDistrib::enqueueSelfB3(LocalWorkMsg *msg) {
02112 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02113 if ( msg->compute->localWorkMsg != msg )
02114 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02115 }
02116
02117 void WorkDistrib::enqueueWorkA1(LocalWorkMsg *msg) {
02118 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02119 if ( msg->compute->localWorkMsg != msg )
02120 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02121 }
02122 void WorkDistrib::enqueueWorkA2(LocalWorkMsg *msg) {
02123 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02124 if ( msg->compute->localWorkMsg != msg )
02125 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02126 }
02127 void WorkDistrib::enqueueWorkA3(LocalWorkMsg *msg) {
02128 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02129 if ( msg->compute->localWorkMsg != msg )
02130 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02131 }
02132
02133 void WorkDistrib::enqueueWorkB1(LocalWorkMsg *msg) {
02134 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02135 if ( msg->compute->localWorkMsg != msg )
02136 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02137 }
02138 void WorkDistrib::enqueueWorkB2(LocalWorkMsg *msg) {
02139 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02140 if ( msg->compute->localWorkMsg != msg )
02141 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02142 }
02143 void WorkDistrib::enqueueWorkB3(LocalWorkMsg *msg) {
02144 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02145 if ( msg->compute->localWorkMsg != msg )
02146 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02147 }
02148
02149
02150
02151 void WorkDistrib::enqueueWorkC(LocalWorkMsg *msg) {
02152 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02153 if ( msg->compute->localWorkMsg != msg )
02154 NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02155 }
02156
02157 void WorkDistrib::enqueueCUDA(LocalWorkMsg *msg) {
02158 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02159
02160
02161
02162 }
02163 void WorkDistrib::enqueueCUDAP2(LocalWorkMsg *msg) {
02164 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02165 }
02166 void WorkDistrib::enqueueCUDAP3(LocalWorkMsg *msg) {
02167 msg->compute->doWork(); traceUserEvent(eventMachineProgress); CmiMachineProgressImpl();
02168 }
02169
02170
02171
02172
02173
02174
02175
02176
02177
02178
02179
02180
02181
02182
02183
02184
02185 void WorkDistrib::velocities_from_PDB(char *filename,
02186 Vector *v, int totalAtoms)
02187 {
02188 PDB *v_pdb;
02189 int i;
02190
02191
02192 v_pdb = new PDB(filename);
02193 if ( v_pdb == NULL )
02194 {
02195 NAMD_die("memory allocation failed in Node::velocities_from_PDB");
02196 }
02197
02198
02199
02200 if (v_pdb->num_atoms() != totalAtoms)
02201 {
02202 char err_msg[129];
02203
02204 sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
02205 v_pdb->num_atoms());
02206
02207 NAMD_die(err_msg);
02208 }
02209
02210
02211
02212 v_pdb->get_all_positions(v);
02213
02214 for (i=0; i<totalAtoms; i++)
02215 {
02216 v[i].x *= PDBVELINVFACTOR;
02217 v[i].y *= PDBVELINVFACTOR;
02218 v[i].z *= PDBVELINVFACTOR;
02219 }
02220
02221 delete v_pdb;
02222 }
02223
02224
02225
02226
02227
02228
02229
02230
02231
02232
02233
02234
02235
02236
02237
02238
02239 void WorkDistrib::velocities_from_binfile(char *fname, Vector *vels, int n)
02240 {
02241 read_binary_file(fname,vels,n);
02242 }
02243
02244
02245
02246
02247
02248
02249
02250
02251
02252
02253
02254
02255
02256
02257
02258
02259 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
02260 Vector *v, int totalAtoms)
02261 {
02262 int i, j;
02263 BigReal kbT;
02264 BigReal randnum;
02265 BigReal kbToverM;
02266 SimParameters *simParams = Node::Object()->simParameters;
02267 Bool lesOn = simParams->lesOn;
02268 Random vel_random(simParams->randomSeed);
02269
02270 int lesReduceTemp = lesOn && simParams->lesReduceTemp;
02271 BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
02272
02273 kbT = Temp*BOLTZMANN;
02274
02275
02276
02277 for (i=0; i<totalAtoms; i++)
02278 {
02279 if (structure->atommass(i) <= 0.) {
02280 kbToverM = 0.;
02281 } else {
02282 kbToverM = sqrt(kbT *
02283 ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
02284 structure->atommass(i) );
02285 }
02286
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296 for (randnum=0.0, j=0; j<12; j++)
02297 {
02298 randnum += vel_random.uniform();
02299 }
02300
02301 randnum -= 6.0;
02302
02303 v[i].x = randnum*kbToverM;
02304
02305 for (randnum=0.0, j=0; j<12; j++)
02306 {
02307 randnum += vel_random.uniform();
02308 }
02309
02310 randnum -= 6.0;
02311
02312 v[i].y = randnum*kbToverM;
02313
02314 for (randnum=0.0, j=0; j<12; j++)
02315 {
02316 randnum += vel_random.uniform();
02317 }
02318
02319 randnum -= 6.0;
02320
02321 v[i].z = randnum*kbToverM;
02322 }
02323 }
02324
02325
02326
02327
02328
02329
02330
02331
02332
02333
02334
02335
02336
02337 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
02338 {
02339 Vector mv(0,0,0);
02340 BigReal totalMass=0;
02341 int i;
02342
02343
02344 for (i=0; i<n; i++)
02345 {
02346 BigReal mass = structure->atommass(i);
02347 mv += mass * vel[i];
02348 totalMass += mass;
02349 }
02350
02351 mv /= totalMass;
02352
02353 iout << iINFO << "REMOVING COM VELOCITY "
02354 << ( PDBVELFACTOR * mv ) << "\n" << endi;
02355
02356 for (i=0; i<n; i++) { vel[i] -= mv; }
02357
02358 }
02359
02360
02361 #if USE_TOPOMAP
02362
02363
02364
02365 int WorkDistrib::assignPatchesTopoGridRecBisection() {
02366
02367 PatchMap *patchMap = PatchMap::Object();
02368 int *assignedNode = new int[patchMap->numPatches()];
02369 int numNodes = Node::Object()->numNodes();
02370 SimParameters *simParams = Node::Object()->simParameters;
02371 if(simParams->simulateInitialMapping) {
02372 numNodes = simParams->simulatedPEs;
02373 }
02374
02375 int usedNodes = numNodes;
02376
02377 if ( simParams->noPatchesOnZero && numNodes > 1 ) {
02378 usedNodes -= 1;
02379 if ( simParams->noPatchesOnOne && numNodes > 2 )
02380 usedNodes -= 1;
02381 }
02382 RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
02383
02384 int xsize = 0, ysize = 0, zsize = 0;
02385
02386
02387 TopoManager tmgr;
02388 xsize = tmgr.getDimNX();
02389 ysize = tmgr.getDimNY();
02390 zsize = tmgr.getDimNZ();
02391
02392
02393 int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
02394
02395 delete [] assignedNode;
02396
02397 return rc;
02398 }
02399 #endif
02400
02401 #include "WorkDistrib.def.h"
02402