00001
00002 #include "largefiles.h"
00003
00004 #include <stdio.h>
00005 #include "BOCgroup.h"
00006 #include "Molecule.h"
00007 #include "Node.h"
00008 #include "Node.decl.h"
00009 #include "NamdState.h"
00010 #include "WorkDistrib.h"
00011 #include "PDB.h"
00012 #include "PatchMap.h"
00013 #include "PatchMap.inl"
00014 #include "packmsg.h"
00015 #include "HomePatch.h"
00016 #include "InfoStream.h"
00017 #include "CollectionMaster.h"
00018 #include "CollectionMgr.h"
00019
00020 #include "ParallelIOMgr.decl.h"
00021 #include "ParallelIOMgr.h"
00022
00023 #include "Output.h"
00024 #include "Random.h"
00025
00026 #include <algorithm>
00027 using namespace std;
00028
00029 ParallelIOMgr::ParallelIOMgr()
00030 {
00031 CkpvAccess(BOCclass_group).ioMgr = thisgroup;
00032
00033 numInputProcs=-1;
00034 inputProcArray = NULL;
00035 numOutputProcs=-1;
00036 outputProcArray = NULL;
00037
00038 procsReceived=0;
00039 hydroMsgRecved=0;
00040
00041 totalMV.x = totalMV.y = totalMV.z = 0.0;
00042 totalMass = 0.0;
00043 totalCharge = 0.0;
00044
00045 isOKToRecvHPAtoms = false;
00046 hpAtomsList = NULL;
00047
00048 clusterID = NULL;
00049 clusterSize = NULL;
00050
00051 #ifdef MEM_OPT_VERSION
00052 midCM = NULL;
00053 #endif
00054
00055 isWater = NULL;
00056
00057 numCSMAck = 0;
00058 numReqRecved = 0;
00059
00060 sendAtomsThread = 0;
00061
00062 #if COLLECT_PERFORMANCE_DATA
00063 numFixedAtomLookup = 0;
00064 #endif
00065 }
00066
00067 ParallelIOMgr::~ParallelIOMgr()
00068 {
00069 delete [] inputProcArray;
00070 delete [] outputProcArray;
00071 delete [] clusterID;
00072 delete [] clusterSize;
00073
00074 #ifdef MEM_OPT_VERSION
00075 delete midCM;
00076 #endif
00077
00078 delete [] isWater;
00079 }
00080
00081
00082
00083 void ParallelIOMgr::initialize(Node *node)
00084 {
00085 simParameters = node->simParameters;
00086 molecule = node->molecule;
00087
00088 numInputProcs = simParameters->numinputprocs;
00089 numOutputProcs = simParameters->numoutputprocs;
00090 numOutputWrts = simParameters->numoutputwrts;
00091
00092
00093 if(!CkMyPe()) {
00094 iout << iINFO << "Running with " <<numInputProcs<<" input processors.\n"<<endi;
00095 #if OUTPUT_SINGLE_FILE
00096 iout << iINFO << "Running with " <<numOutputProcs<<" output processors ("<<numOutputWrts<<" of them will output simultaneously).\n"<<endi;
00097 #else
00098 iout << iINFO << "Running with " <<numOutputProcs<<" output processors, and each of them will output to its own separate file.\n"<<endi;
00099 #endif
00100 }
00101
00102
00103 {
00104 inputProcArray = new int[numInputProcs];
00105 myInputRank = -1;
00106 int stride = (numInputProcs > 1) ? (CkNumPes()-1)/(numInputProcs-1) : 1;
00107 int startpe = 0;
00108 for(int i=0; i<numInputProcs; i++) {
00109 int pe = inputProcArray[i] = startpe + i*stride;
00110 if ( pe < 0 || pe >= CkNumPes() ) NAMD_bug("Input proc out of range");
00111 if ( pe == CkMyPe() ) {
00112 if ( myInputRank != -1 ) NAMD_bug("Duplicate input proc");
00113 myInputRank = i;
00114 }
00115 }
00116 }
00117
00118 if(myInputRank!=-1) {
00119
00120
00121 int numMyAtoms = numInitMyAtomsOnInput();
00122 float growthRate = 100.0f/numMyAtoms;
00123 initAtoms.setParams(numMyAtoms+100, growthRate);
00124 initAtoms.resize(numMyAtoms);
00125 tmpRecvAtoms.resize(0);
00126 } else {
00127 initAtoms.resize(0);
00128 tmpRecvAtoms.resize(0);
00129 }
00130 hpIDList.resize(0);
00131
00132
00133
00134 {
00135 outputProcArray = new int[numOutputProcs];
00136 int stride = CkNumPes()/numOutputProcs;
00137 int startpe = 0;
00138 for(int i=0; i<numOutputProcs; i++) {
00139 outputProcArray[i] = startpe + i*stride;
00140 }
00141
00142 int residue = (CkMyPe()-startpe)%stride;
00143 if(residue==0) {
00144 int rank = (CkMyPe()-startpe)/stride;
00145 myOutputRank = rank<numOutputProcs ? rank : -1;
00146 } else {
00147 myOutputRank = -1;
00148 }
00149 }
00150
00151 #ifdef MEM_OPT_VERSION
00152 if(myOutputRank!=-1) {
00153 midCM = new CollectionMidMaster(this);
00154 }
00155 remoteClusters.clear();
00156 csmBuf.resize(0);
00157 remoteCoors.clear();
00158 ccmBuf.resize(0);
00159
00160 mainMaster = CollectionMgr::Object()->getMasterChareID();
00161 #endif
00162 }
00163
00164 void ParallelIOMgr::readPerAtomInfo()
00165 {
00166 #ifdef MEM_OPT_VERSION
00167 if(myInputRank!=-1) {
00168 int myAtomLIdx, myAtomUIdx;
00169 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
00170
00171
00172 molecule->read_binary_atom_info(myAtomLIdx, myAtomUIdx, initAtoms);
00173
00174
00175
00176
00177
00178 readCoordinatesAndVelocity();
00179
00180
00181
00182 int oRank=atomRankOnOutput(myAtomLIdx);
00183 for(int i=oRank; i<numOutputProcs; i++) {
00184 int lIdx, uIdx;
00185 getAtomsRangeOnOutput(lIdx, uIdx, i);
00186 if(lIdx > myAtomUIdx) break;
00187 int fid = lIdx>myAtomLIdx?lIdx:myAtomLIdx;
00188 int tid = uIdx>myAtomUIdx?myAtomUIdx:uIdx;
00189 for(int j=fid; j<=tid; j++) initAtoms[j-myAtomLIdx].outputRank = i;
00190 }
00191 }
00192
00193
00194 if(myOutputRank!=-1) {
00195
00196 if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
00197 readInfoForParOutput();
00198 }
00199 #endif
00200 }
00201
00202 void ParallelIOMgr::readCoordinatesAndVelocity()
00203 {
00204 #ifdef MEM_OPT_VERSION
00205 int needFlip = 0;
00206 int myAtomLIdx, myAtomUIdx;
00207 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
00208 int myNumAtoms = myAtomUIdx-myAtomLIdx+1;
00209
00210
00211 Vector *tmpData = new Vector[myNumAtoms];
00212
00213
00214
00215 FILE *ifp = fopen(simParameters->binCoorFile, "rb");
00216 if(!ifp) {
00217 char s[256];
00218 sprintf(s, "The binary coordinate file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
00219 NAMD_err(s);
00220 }
00221
00222 int filelen;
00223 fread(&filelen, sizeof(int32),1,ifp);
00224 char lenbuf[sizeof(int32)];
00225 memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
00226 flipNum(lenbuf, sizeof(int32), 1);
00227 if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
00228 iout << iWARN << "Number of atoms in binary file " << simParameters->binCoorFile
00229 <<" is palindromic, assuming same endian.\n" << endi;
00230 }
00231 if(filelen!=molecule->numAtoms) {
00232 needFlip = 1;
00233 memcpy((void *)&filelen, lenbuf,sizeof(int32));
00234 }
00235 if(filelen!=molecule->numAtoms) {
00236 char s[256];
00237 sprintf(s, "Incorrect atom count in binary file %s", simParameters->binCoorFile);
00238 NAMD_die(s);
00239 }
00240
00241 int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Position);
00242 #ifdef WIN32
00243 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
00244 #else
00245 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
00246 #endif
00247 {
00248 char s[256];
00249 sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
00250 NAMD_err(s);
00251 }
00252 size_t totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
00253 if(totalRead!=myNumAtoms) {
00254 char s[256];
00255 sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
00256 NAMD_err(s);
00257 }
00258 if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
00259 fclose(ifp);
00260 for(int i=0; i<myNumAtoms; i++) initAtoms[i].position = tmpData[i];
00261
00262
00263
00264 if(!simParameters->binVelFile) {
00265
00266 Node::Object()->workDistrib->random_velocities_parallel(simParameters->initialTemp, initAtoms);
00267 } else {
00268 ifp = fopen(simParameters->binVelFile, "rb");
00269 if(!ifp) {
00270 char s[256];
00271 sprintf(s, "The binary velocity file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
00272 NAMD_err(s);
00273 }
00274
00275 fread(&filelen, sizeof(int32),1,ifp);
00276 memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
00277 flipNum(lenbuf, sizeof(int32), 1);
00278 if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
00279 iout << iWARN << "Number of atoms in binary file " << simParameters->binVelFile
00280 <<" is palindromic, assuming same endian.\n" << endi;
00281 }
00282 if(filelen!=molecule->numAtoms) {
00283 needFlip = 1;
00284 memcpy((void *)&filelen, lenbuf,sizeof(int32));
00285 }
00286 if(filelen!=molecule->numAtoms) {
00287 char s[256];
00288 sprintf(s, "Incorrect atom count in binary file %s", simParameters->binVelFile);
00289 NAMD_die(s);
00290 }
00291
00292
00293 int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Velocity);
00294 #ifdef WIN32
00295 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
00296 #else
00297 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
00298 #endif
00299 {
00300 char s[256];
00301 sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
00302 NAMD_err(s);
00303 }
00304 totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
00305 if(totalRead!=myNumAtoms) {
00306 char s[256];
00307 sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
00308 NAMD_err(s);
00309 }
00310 if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
00311 fclose(ifp);
00312 for(int i=0; i<myNumAtoms; i++) initAtoms[i].velocity = tmpData[i];
00313 }
00314
00315 delete [] tmpData;
00316 #endif
00317 }
00318
00319 void ParallelIOMgr::readInfoForParOutput()
00320 {
00321 int fromIdx, toIdx;
00322 getMyAtomsRangeOnOutput(fromIdx,toIdx);
00323 int numMyAtoms = toIdx-fromIdx+1;
00324
00325 clusterID = new int[numMyAtoms];
00326 clusterSize = new int[numMyAtoms];
00327
00328
00329
00330 FILE *ifp = fopen(simParameters->binAtomFile, "rb");
00331
00332 int needFlip = 0;
00333 int magicNum;
00334 fread(&magicNum, sizeof(int), 1, ifp);
00335 if (magicNum!=COMPRESSED_PSF_MAGICNUM) {
00336 needFlip = 1;
00337 }
00338
00339
00340 isWater = new char[numMyAtoms];
00341
00342 int64 offset = sizeof(char)*((int64)(fromIdx-molecule->numAtoms));
00343 #ifdef WIN32
00344 if ( _fseeki64(ifp, offset, SEEK_END) )
00345 #else
00346 if ( fseeko(ifp, offset, SEEK_END) )
00347 #endif
00348 {
00349 char s[256];
00350 sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
00351 NAMD_err(s);
00352 }
00353 fread(isWater, sizeof(char), numMyAtoms, ifp);
00354
00355
00356
00357 offset = sizeof(int)*((int64)(fromIdx-molecule->numAtoms))
00358 - sizeof(char)*((int64)(molecule->numAtoms));
00359 #ifdef WIN32
00360 if ( _fseeki64(ifp, offset, SEEK_END) )
00361 #else
00362 if ( fseeko(ifp, offset, SEEK_END) )
00363 #endif
00364 {
00365 char s[256];
00366 sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
00367 NAMD_err(s);
00368 }
00369 fread(clusterID, sizeof(int), numMyAtoms, ifp);
00370 if(needFlip) flipNum((char *)clusterID, sizeof(int), numMyAtoms);
00371 fclose(ifp);
00372
00373
00374 ClusterElem one;
00375 for(int i=0; i<numMyAtoms; i++) {
00376 clusterSize[i] = 0;
00377 int cid = clusterID[i];
00378
00379
00380
00381
00382
00383 CmiAssert(cid<=toIdx);
00384 if(cid<fromIdx) {
00385
00386 one.clusterId = cid;
00387 ClusterElem *ret = remoteClusters.find(one);
00388 if(ret==NULL) {
00389 one.atomsCnt = 1;
00390 remoteClusters.add(one);
00391 } else {
00392 ret->atomsCnt++;
00393 }
00394 } else {
00395 int lidx = cid-fromIdx;
00396 CmiAssert(lidx<=i);
00397 clusterSize[lidx]++;
00398 }
00399 }
00400
00401
00402
00403
00404 #if 0
00405 printf("output[%d]=%d: prepare to send %d remote msgs for cluster size\n",
00406 myOutputRank, CkMyPe(), remoteClusters.size());
00407 #endif
00408
00409 numRemoteClusters = remoteClusters.size();
00410 numCSMAck = 0;
00411 CProxy_ParallelIOMgr pIO(thisgroup);
00412 ClusterSetIter iter(remoteClusters);
00413 for(iter=iter.begin(); iter!=iter.end(); iter++) {
00414 ClusterSizeMsg *msg = new ClusterSizeMsg;
00415 msg->srcRank = myOutputRank;
00416 msg->clusterId = iter->clusterId;
00417 msg->atomsCnt = iter->atomsCnt;
00418 int dstRank = atomRankOnOutput(iter->clusterId);
00419 pIO[outputProcArray[dstRank]].recvClusterSize(msg);
00420 }
00421 }
00422
00423 void ParallelIOMgr::recvClusterSize(ClusterSizeMsg *msg)
00424 {
00425 csmBuf.add(msg);
00426
00427
00428
00429 }
00430
00431 void ParallelIOMgr::integrateClusterSize()
00432 {
00433 if(myOutputRank==-1) return;
00434 if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
00435
00436 int fromIdx, toIdx;
00437 getMyAtomsRangeOnOutput(fromIdx,toIdx);
00438
00439
00440 for(int i=0; i<csmBuf.size(); i++) {
00441 ClusterSizeMsg *msg = csmBuf[i];
00442 int lidx = msg->clusterId - fromIdx;
00443 clusterSize[lidx] += msg->atomsCnt;
00444 }
00445
00446 CProxy_ParallelIOMgr pIO(thisgroup);
00447 for(int i=0; i<csmBuf.size(); i++) {
00448 ClusterSizeMsg *msg = csmBuf[i];
00449 int lidx = msg->clusterId - fromIdx;
00450 msg->atomsCnt = clusterSize[lidx];
00451 pIO[outputProcArray[msg->srcRank]].recvFinalClusterSize(msg);
00452 }
00453 numRemoteReqs = csmBuf.size();
00454 csmBuf.resize(0);
00455
00456
00457
00458
00459
00460
00461 if(numRemoteClusters!=0){
00462 recvFinalClusterSize(NULL);
00463 }else{
00464
00465 int numMyAtoms = toIdx-fromIdx+1;
00466 for(int i=0; i<numMyAtoms; i++) {
00467 int lidx = clusterID[i]-fromIdx;
00468 clusterSize[i] = clusterSize[lidx];
00469 }
00470
00471 #if 0 //write out cluster debug info
00472 char fname[128];
00473 sprintf(fname, "cluster.par.%d", CkMyPe());
00474 FILE *ofp = fopen(fname, "w");
00475 for(int i=0; i<numMyAtoms; i++) {
00476 fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
00477 }
00478 fclose(ofp);
00479 #endif
00480 }
00481 }
00482
00483 void ParallelIOMgr::recvFinalClusterSize(ClusterSizeMsg *msg)
00484 {
00485
00486 if(msg!=NULL) {
00487
00488 ClusterElem one(msg->clusterId);
00489 ClusterElem *ret = remoteClusters.find(one);
00490 CmiAssert(ret!=NULL);
00491 ret->atomsCnt = msg->atomsCnt;
00492 }
00493 delete msg;
00494
00495
00496 if(++numCSMAck == (numRemoteClusters+1)) {
00497
00498 int fromIdx, toIdx;
00499 getMyAtomsRangeOnOutput(fromIdx,toIdx);
00500 int numMyAtoms = toIdx-fromIdx+1;
00501 ClusterElem tmp;
00502 for(int i=0; i<numMyAtoms; i++) {
00503 int cid = clusterID[i];
00504 int lidx = cid-fromIdx;
00505 if(lidx<0) {
00506
00507 tmp.clusterId = cid;
00508 ClusterElem *fone = remoteClusters.find(tmp);
00509 clusterSize[i] = fone->atomsCnt;
00510 } else {
00511 clusterSize[i] = clusterSize[lidx];
00512 }
00513 }
00514 numCSMAck = 0;
00515 remoteClusters.clear();
00516
00517 #if 0 //write out cluster debug info
00518 char fname[128];
00519 sprintf(fname, "cluster.par.%d", CkMyPe());
00520 FILE *ofp = fopen(fname, "w");
00521 for(int i=0; i<numMyAtoms; i++) {
00522 fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
00523 }
00524 fclose(ofp);
00525 #endif
00526
00527 }
00528 }
00529
00530 void ParallelIOMgr::migrateAtomsMGrp()
00531 {
00532 if(myInputRank==-1) return;
00533
00534
00535
00536
00537 AtomIDList toMigrateList;
00538
00539 int maxOffset = 0;
00540 for(int i=0; i<initAtoms.size(); i++) {
00541
00542 int parentRank = atomInitRankOnInput(initAtoms[i].MPID);
00543 if(parentRank != myInputRank) {
00544 toMigrateList.add(i);
00545 initAtoms[i].isValid = false;
00546 int tmp = parentRank - myInputRank;
00547 tmp = tmp>0 ? tmp : -tmp;
00548 if(tmp > maxOffset) maxOffset = tmp;
00549 }
00550 }
00551
00552
00553
00554
00555
00556 InputAtomList *migLists = new InputAtomList[2*maxOffset+1];
00557 for(int i=0; i<toMigrateList.size(); i++) {
00558 int idx = toMigrateList[i];
00559 int parentRank = atomInitRankOnInput(initAtoms[idx].MPID);
00560
00561 int offset = parentRank - myInputRank + maxOffset;
00562 migLists[offset].add(initAtoms[idx]);
00563 }
00564
00565 CProxy_ParallelIOMgr pIO(thisgroup);
00566 for(int i=0; i<2*maxOffset+1; i++) {
00567 int migLen = migLists[i].size();
00568 if(migLen>0) {
00569 MoveInputAtomsMsg *msg = new (migLen, 0)MoveInputAtomsMsg;
00570 msg->length = migLen;
00571 memcpy(msg->atomList, migLists[i].begin(), sizeof(InputAtom)*migLen);
00572 int destRank = i-maxOffset+myInputRank;
00573 pIO[inputProcArray[destRank]].recvAtomsMGrp(msg);
00574 migLists[i].clear();
00575 }
00576 }
00577
00578 toMigrateList.clear();
00579 delete [] migLists;
00580 }
00581
00582 void ParallelIOMgr::recvAtomsMGrp(MoveInputAtomsMsg *msg)
00583 {
00584 for(int i=0; i<msg->length; i++) {
00585 tmpRecvAtoms.add((msg->atomList)[i]);
00586 }
00587 delete msg;
00588 }
00589
00590 void ParallelIOMgr::integrateMigratedAtoms()
00591 {
00592 if(myInputRank==-1) return;
00593
00594 for(int i=0; i<tmpRecvAtoms.size(); i++) {
00595 tmpRecvAtoms[i].isValid = true;
00596 initAtoms.add(tmpRecvAtoms[i]);
00597 }
00598 tmpRecvAtoms.clear();
00599
00600
00601 std::sort(initAtoms.begin(), initAtoms.end());
00602
00603
00604
00605
00606 int numFixedRigidBonds = 0;
00607 if(molecule->numRigidBonds){
00608 int parentIsFixed = 0;
00609 for(int i=0; i<initAtoms.size(); i++) {
00610 InputAtom *one = &(initAtoms[i]);
00611 if(!one->isValid) continue;
00612 if(one->isGP) {
00613 parentIsFixed = one->atomFixed;
00614 InputAtom *a1 = &(initAtoms[i+1]);
00615 InputAtom *a2 = &(initAtoms[i+2]);
00616 if((one->rigidBondLength>0.0) &&
00617 a1->atomFixed && a2->atomFixed) {
00618 numFixedRigidBonds++;
00619 }
00620 }else{
00621 if((one->rigidBondLength>0.0) &&
00622 one->atomFixed && parentIsFixed) {
00623 numFixedRigidBonds++;
00624 }
00625 }
00626 }
00627 }
00628
00629 int numFixedGroups = 0;
00630 if(molecule->numFixedAtoms){
00631 for(int i=0; i<initAtoms.size();) {
00632 InputAtom *one = &(initAtoms[i]);
00633 if(!one->isValid){
00634 i++;
00635 continue;
00636 }
00637 if(one->isGP) {
00638 int allFixed = 1;
00639 for(int j=0; j<one->hydrogenGroupSize; j++){
00640 InputAtom *a1 = &(initAtoms[i+j]);
00641 allFixed = allFixed & a1->atomFixed;
00642 if(!allFixed) break;
00643 }
00644 if(allFixed) numFixedGroups++;
00645 i += one->hydrogenGroupSize;
00646 }
00647 }
00648 }
00649
00650 CProxy_ParallelIOMgr pIO(thisgroup);
00651 HydroBasedMsg *msg = new HydroBasedMsg;
00652 msg->numFixedGroups = numFixedGroups;
00653 msg->numFixedRigidBonds = numFixedRigidBonds;
00654 pIO[0].recvHydroBasedCounter(msg);
00655 }
00656
00657 void ParallelIOMgr::updateMolInfo()
00658 {
00659 #ifdef MEM_OPT_VERSION
00660 if(myInputRank==-1) return;
00661
00662 CProxy_ParallelIOMgr pIO(thisgroup);
00663
00664 MolInfoMsg *msg = new MolInfoMsg;
00665 msg->numBonds = msg->numCalcBonds = 0;
00666 msg->numAngles = msg->numCalcAngles = 0;
00667 msg->numDihedrals = msg->numCalcDihedrals = 0;
00668 msg->numImpropers = msg->numCalcImpropers = 0;
00669 msg->numCrossterms = msg->numCalcCrossterms = 0;
00670 msg->numExclusions = msg->numCalcExclusions = 0;
00671 msg->numRigidBonds = 0;
00672 msg->totalMass = 0.0;
00673 msg->totalCharge = 0.0;
00674
00675
00676 AtomSignature *atomSigPool = molecule->atomSigPool;
00677 ExclusionSignature *exclSigPool = molecule->exclSigPool;
00678 for(int i=0; i<initAtoms.size(); i++) {
00679 AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
00680 msg->numBonds += thisSig->bondCnt;
00681 msg->numAngles += thisSig->angleCnt;
00682 msg->numDihedrals += thisSig->dihedralCnt;
00683 msg->numImpropers += thisSig->improperCnt;
00684 msg->numCrossterms += thisSig->crosstermCnt;
00685
00686 ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
00687 msg->numExclusions += (exclSig->fullExclCnt + exclSig->modExclCnt);
00688
00689 if(initAtoms[i].rigidBondLength > 0.0) msg->numRigidBonds++;
00690
00691 msg->totalMass += initAtoms[i].mass;
00692 msg->totalCharge += initAtoms[i].charge;
00693 }
00694
00695
00696 if(molecule->numFixedAtoms>0) {
00697
00698
00699
00700
00701
00702 int sAId = initAtoms[0].id;
00703 int remoteCnt=0;
00704 for(int i=0; i<initAtoms.size(); i++) {
00705
00706
00707 int myAId = initAtoms[i].id;
00708 AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
00709 ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
00710 if(!initAtoms[i].atomFixed) {
00711 msg->numCalcBonds += thisSig->bondCnt;
00712 msg->numCalcAngles += thisSig->angleCnt;
00713 msg->numCalcDihedrals += thisSig->dihedralCnt;
00714 msg->numCalcImpropers += thisSig->improperCnt;
00715 msg->numCalcCrossterms += thisSig->crosstermCnt;
00716 msg->numCalcExclusions+=(exclSig->fullExclCnt+exclSig->modExclCnt);
00717 continue;
00718 }
00719
00720
00721 for(int j=0; j<thisSig->bondCnt; j++) {
00722 TupleSignature *bsig = &(thisSig->bondSigs[j]);
00723 int a1 = myAId + bsig->offset[0];
00724 if(!isAtomFixed(sAId, a1)) msg->numCalcBonds++;
00725 }
00726
00727
00728 for(int j=0; j<thisSig->angleCnt; j++) {
00729 TupleSignature *bsig = &(thisSig->angleSigs[j]);
00730 int a1 = myAId + bsig->offset[0];
00731 int a2 = myAId + bsig->offset[1];
00732 if(!isAtomFixed(sAId, a1) || !isAtomFixed(sAId, a2))
00733 msg->numCalcAngles++;
00734 }
00735
00736
00737 for(int j=0; j<thisSig->dihedralCnt; j++) {
00738 TupleSignature *bsig = &(thisSig->dihedralSigs[j]);
00739 int a1 = myAId + bsig->offset[0];
00740 int a2 = myAId + bsig->offset[1];
00741 int a3 = myAId + bsig->offset[2];
00742 if(!isAtomFixed(sAId, a1) ||
00743 !isAtomFixed(sAId, a2) ||
00744 !isAtomFixed(sAId, a3))
00745 msg->numCalcDihedrals++;
00746 }
00747
00748
00749 for(int j=0; j<thisSig->improperCnt; j++) {
00750 TupleSignature *bsig = &(thisSig->improperSigs[j]);
00751 int a1 = myAId + bsig->offset[0];
00752 int a2 = myAId + bsig->offset[1];
00753 int a3 = myAId + bsig->offset[2];
00754 if(!isAtomFixed(sAId, a1) ||
00755 !isAtomFixed(sAId, a2) ||
00756 !isAtomFixed(sAId, a3))
00757 msg->numCalcImpropers++;
00758 }
00759
00760
00761 for(int j=0; j<thisSig->crosstermCnt; j++) {
00762 TupleSignature *bsig = &(thisSig->crosstermSigs[j]);
00763 int a1 = myAId + bsig->offset[0];
00764 int a2 = myAId + bsig->offset[1];
00765 int a3 = myAId + bsig->offset[2];
00766 int a4 = myAId + bsig->offset[3];
00767 int a5 = myAId + bsig->offset[4];
00768 int a6 = myAId + bsig->offset[5];
00769 int a7 = myAId + bsig->offset[6];
00770
00771 if(!isAtomFixed(sAId, a1) ||
00772 !isAtomFixed(sAId, a2) ||
00773 !isAtomFixed(sAId, a3) ||
00774 !isAtomFixed(sAId, a4) ||
00775 !isAtomFixed(sAId, a5) ||
00776 !isAtomFixed(sAId, a6) ||
00777 !isAtomFixed(sAId, a7))
00778 msg->numCalcDihedrals++;
00779 }
00780
00781
00782
00783 for(int j=0; j<exclSig->fullExclCnt; j++) {
00784 int thisAId = exclSig->fullOffset[j]+myAId;
00785 if(!isAtomFixed(sAId, thisAId)) msg->numCalcExclusions++;
00786 }
00787 for(int j=0; j<exclSig->modExclCnt; j++) {
00788 int thisAId = exclSig->modOffset[j]+myAId;
00789 if(!isAtomFixed(sAId, thisAId)) msg->numCalcExclusions++;
00790 }
00791 }
00792 #if COLLECT_PERFORMANCE_DATA
00793 printf("Num fixedAtom lookup on proc %d is %d\n", CkMyPe(), numFixedAtomLookup);
00794 #endif
00795 } else {
00796
00797 msg->numCalcBonds = msg->numBonds;
00798 msg->numCalcAngles = msg->numAngles;
00799 msg->numCalcDihedrals = msg->numDihedrals;
00800 msg->numCalcImpropers = msg->numImpropers;
00801 msg->numCalcCrossterms = msg->numCrossterms;
00802 msg->numCalcExclusions = msg->numExclusions;
00803 }
00804
00805
00806 if(!simParameters->comMove) {
00807
00808
00809
00810
00811 (msg->totalMV).x = 0.0;
00812 (msg->totalMV).y = 0.0;
00813 (msg->totalMV).z = 0.0;
00814 for (int i=0; i<initAtoms.size(); i++) {
00815 msg->totalMV += initAtoms[i].mass * initAtoms[i].velocity;
00816 }
00817 }
00818
00819
00820 pIO[0].recvMolInfo(msg);
00821 #endif
00822 }
00823
00824
00825 void ParallelIOMgr::recvMolInfo(MolInfoMsg *msg)
00826 {
00827 molecule->numBonds += msg->numBonds;
00828 molecule->numCalcBonds += msg->numCalcBonds;
00829 molecule->numAngles += msg->numAngles;
00830 molecule->numCalcAngles += msg->numCalcAngles;
00831 molecule->numDihedrals += msg->numDihedrals;
00832 molecule->numCalcDihedrals += msg->numCalcDihedrals;
00833 molecule->numImpropers += msg->numImpropers;
00834 molecule->numCalcImpropers += msg->numCalcImpropers;
00835 molecule->numCrossterms += msg->numCrossterms;
00836 molecule->numCalcCrossterms += msg->numCalcCrossterms;
00837 molecule->numTotalExclusions += msg->numExclusions;
00838 molecule->numCalcExclusions += msg->numCalcExclusions;
00839 molecule->numRigidBonds += msg->numRigidBonds;
00840
00841 totalMass += msg->totalMass;
00842 totalCharge += msg->totalCharge;
00843
00844 if(!simParameters->comMove) {
00845 totalMV += msg->totalMV;
00846 }
00847
00848 if(++procsReceived == numInputProcs) {
00849
00850 msg->numBonds = molecule->numBonds;
00851 msg->numCalcBonds = molecule->numCalcBonds;
00852 msg->numAngles = molecule->numAngles;
00853 msg->numCalcAngles = molecule->numCalcAngles;
00854 msg->numDihedrals = molecule->numDihedrals;
00855 msg->numCalcDihedrals = molecule->numCalcDihedrals;
00856 msg->numImpropers = molecule->numImpropers;
00857 msg->numCalcImpropers = molecule->numCalcImpropers;
00858 msg->numCrossterms = molecule->numCrossterms;
00859 msg->numCalcCrossterms = molecule->numCalcCrossterms;
00860 msg->numExclusions = molecule->numTotalExclusions/2;
00861 msg->numCalcExclusions = molecule->numCalcExclusions/2;
00862 msg->numRigidBonds = molecule->numRigidBonds;
00863
00864 msg->totalMass = totalMass;
00865 msg->totalCharge = totalCharge;
00866
00867 if(!simParameters->comMove) {
00868 msg->totalMV = totalMV;
00869 }
00870
00871 CProxy_ParallelIOMgr pIO(thisgroup);
00872 pIO.bcastMolInfo(msg);
00873
00874
00875 procsReceived = 0;
00876 } else delete msg;
00877 }
00878
00879 void ParallelIOMgr::bcastMolInfo(MolInfoMsg *msg)
00880 {
00881 #ifdef MEM_OPT_VERSION
00882 if(myInputRank!=-1) {
00883 if(!simParameters->comMove) {
00884
00885 Vector val = msg->totalMV / msg->totalMass;
00886 for (int i=0; i<initAtoms.size(); i++) initAtoms[i].velocity -= val;
00887 }
00888 }
00889
00890
00891 if(CmiMyRank()) {
00892 delete msg;
00893 return;
00894 }
00895
00896 molecule->numBonds = msg->numBonds;
00897 molecule->numCalcBonds = msg->numCalcBonds;
00898 molecule->numAngles = msg->numAngles;
00899 molecule->numCalcAngles = msg->numCalcAngles;
00900 molecule->numDihedrals = msg->numDihedrals;
00901 molecule->numCalcDihedrals = msg->numCalcDihedrals;
00902 molecule->numImpropers = msg->numImpropers;
00903 molecule->numCalcImpropers = msg->numCalcImpropers;
00904 molecule->numCrossterms = msg->numCrossterms;
00905 molecule->numCalcCrossterms = msg->numCalcCrossterms;
00906
00907 molecule->numTotalExclusions = msg->numExclusions;
00908 molecule->numCalcExclusions = msg->numCalcExclusions;
00909
00910 molecule->numRigidBonds = msg->numRigidBonds;
00911 delete msg;
00912
00913 if(!CkMyPe()) {
00914 if(!simParameters->comMove) {
00915 iout << iINFO << "REMOVING COM VELOCITY "
00916 << (PDBVELFACTOR * (msg->totalMV / msg->totalMass))<< "\n" <<endi;
00917 }
00918 }
00919 #endif
00920 }
00921
00922
00923 void ParallelIOMgr::recvHydroBasedCounter(HydroBasedMsg *msg){
00924 molecule->numFixedRigidBonds += msg->numFixedRigidBonds;
00925 molecule->numFixedGroups += msg->numFixedGroups;
00926
00927 if(++hydroMsgRecved == numInputProcs){
00928 msg->numFixedRigidBonds = molecule->numFixedRigidBonds;
00929 msg->numFixedGroups = molecule->numFixedGroups;
00930 CProxy_ParallelIOMgr pIO(thisgroup);
00931 pIO.bcastHydroBasedCounter(msg);
00932 hydroMsgRecved = 0;
00933 }else delete msg;
00934 }
00935
00936 void ParallelIOMgr::bcastHydroBasedCounter(HydroBasedMsg *msg){
00937 #ifdef MEM_OPT_VERSION
00938
00939 if(CmiMyRank()) {
00940 delete msg;
00941 return;
00942 }
00943 molecule->numFixedRigidBonds = msg->numFixedRigidBonds;
00944 molecule->numFixedGroups = msg->numFixedGroups;
00945 delete msg;
00946
00947 if(!CkMyPe()) {
00948 iout << iINFO << "****************************\n";
00949 iout << iINFO << "STRUCTURE SUMMARY:\n";
00950 iout << iINFO << molecule->numAtoms << " ATOMS\n";
00951 iout << iINFO << molecule->numBonds << " BONDS\n";
00952 iout << iINFO << molecule->numAngles << " ANGLES\n";
00953 iout << iINFO << molecule->numDihedrals << " DIHEDRALS\n";
00954 iout << iINFO << molecule->numImpropers << " IMPROPERS\n";
00955 iout << iINFO << molecule->numCrossterms << " CROSSTERMS\n";
00956 iout << iINFO << molecule->numExclusions << " EXCLUSIONS\n";
00957
00958
00959 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeXplorOn)){
00960 iout << iINFO << molecule->numMultipleDihedrals
00961 << " DIHEDRALS WITH MULTIPLE PERIODICITY (BASED ON PSF FILE)\n";
00962 }
00963 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeCharmmOn)){
00964 iout << iINFO << molecule->numMultipleDihedrals
00965 << " DIHEDRALS WITH MULTIPLE PERIODICITY IGNORED (BASED ON PSF FILE) \n";
00966 iout << iINFO
00967 << " CHARMM MULTIPLICITIES BASED ON PARAMETER FILE INFO! \n";
00968 }
00969
00970
00971 if (molecule->numMultipleImpropers){
00972 iout << iINFO << molecule->numMultipleImpropers
00973 << " IMPROPERS WITH MULTIPLE PERIODICITY\n";
00974 }
00975
00976 if (simParameters->fixedAtomsOn)
00977 iout << iINFO << molecule->numFixedAtoms << " FIXED ATOMS\n";
00978
00979
00980 if (simParameters->rigidBonds)
00981 iout << iINFO << molecule->numRigidBonds << " RIGID BONDS\n";
00982
00983 if (simParameters->fixedAtomsOn && simParameters->rigidBonds)
00984 iout << iINFO << molecule->numFixedRigidBonds <<
00985 " RIGID BONDS BETWEEN FIXED ATOMS\n";
00986
00987 iout << iINFO << molecule->num_deg_freedom(1)
00988 << " DEGREES OF FREEDOM\n";
00989
00990 iout << iINFO << molecule->numHydrogenGroups << " HYDROGEN GROUPS\n";
00991 iout << iINFO << molecule->maxHydrogenGroupSize
00992 << " ATOMS IN LARGEST HYDROGEN GROUP\n";
00993 iout << iINFO << molecule->numMigrationGroups << " MIGRATION GROUPS\n";
00994 iout << iINFO << molecule->maxMigrationGroupSize
00995 << " ATOMS IN LARGEST MIGRATION GROUP\n";
00996 if (simParameters->fixedAtomsOn)
00997 {
00998 iout << iINFO << molecule->numFixedGroups <<
00999 " HYDROGEN GROUPS WITH ALL ATOMS FIXED\n";
01000 }
01001
01002 iout << iINFO << "TOTAL MASS = " << totalMass << " amu\n";
01003 iout << iINFO << "TOTAL CHARGE = " << totalCharge << " e\n";
01004
01005 BigReal volume = simParameters->lattice.volume();
01006 if ( volume ) {
01007 iout << iINFO << "MASS DENSITY = "
01008 << ((totalMass/volume) / 0.6022) << " g/cm^3\n";
01009 iout << iINFO << "ATOM DENSITY = "
01010 << (molecule->numAtoms/volume) << " atoms/A^3\n";
01011 }
01012
01013 iout << iINFO << "*****************************\n";
01014 iout << endi;
01015 fflush(stdout);
01016 }
01017 #endif
01018 }
01019
01020 void ParallelIOMgr::calcAtomsInEachPatch()
01021 {
01022 if(myInputRank==-1) return;
01023
01024 PatchMap *patchMap = PatchMap::Object();
01025 int numPatches = patchMap->numPatches();
01026
01027 patchMap->initTmpPatchAtomsList();
01028
01029
01030 vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
01031
01032 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
01033 PatchMgr *patchMgr = pm.ckLocalBranch();
01034
01035 int pid=0;
01036 const Lattice lattice = simParameters->lattice;
01037 for(int i=0; i<initAtoms.size(); i++) {
01038 InputAtom *atom = &(initAtoms[i]);
01039 if(!atom->isValid) continue;
01040 if(atom->isMP) {
01041 pid = patchMap->assignToPatch(atom->position, lattice);
01042 }
01043 eachPatchAtomList[pid].push_back(i);
01044 }
01045
01046 CProxy_ParallelIOMgr pIO(thisgroup);
01047
01048 int patchCnt = 0;
01049 for(int i=0; i<numPatches; i++) {
01050 int cursize = eachPatchAtomList[i].size();
01051 if(cursize>0) patchCnt++;
01052 }
01053
01054 AtomsCntPerPatchMsg *msg = NULL;
01055 if(simParameters->fixedAtomsOn) {
01056 msg = new (patchCnt, patchCnt, patchCnt, 0)AtomsCntPerPatchMsg;
01057 } else {
01058 msg = new (patchCnt, patchCnt, 0, 0)AtomsCntPerPatchMsg;
01059 }
01060
01061 msg->length = patchCnt;
01062 patchCnt = 0;
01063 for(int i=0; i<numPatches; i++) {
01064 int cursize = eachPatchAtomList[i].size();
01065 if(cursize>0) {
01066 if ( cursize > USHRT_MAX ) {
01067 char errstr[512];
01068 sprintf(errstr, "Patch %d exceeds %d atoms.", i, USHRT_MAX);
01069 NAMD_die(errstr);
01070 }
01071 msg->pidList[patchCnt] = i;
01072 msg->atomsCntList[patchCnt] = cursize;
01073 patchCnt++;
01074 }
01075 }
01076
01077 if(simParameters->fixedAtomsOn) {
01078 patchCnt = 0;
01079 for(int i=0; i<numPatches; i++) {
01080 int cursize = eachPatchAtomList[i].size();
01081 if(cursize>0) {
01082 int fixedCnt = 0;
01083 for(int j=0; j<cursize; j++) {
01084 int aid = eachPatchAtomList[i][j];
01085
01086 fixedCnt += initAtoms[aid].atomFixed;
01087 }
01088 msg->fixedAtomsCntList[patchCnt] = fixedCnt;
01089 patchCnt++;
01090 }
01091 }
01092 }
01093
01094 pIO[0].recvAtomsCntPerPatch(msg);
01095
01096 }
01097
01098 void ParallelIOMgr::recvAtomsCntPerPatch(AtomsCntPerPatchMsg *msg)
01099 {
01100 #ifdef MEM_OPT_VERSION
01101 PatchMap *patchMap = PatchMap::Object();
01102 for(int i=0; i<msg->length; i++) {
01103 int pid = msg->pidList[i];
01104 int oldNum = patchMap->numAtoms(pid);
01105 if ( oldNum + msg->atomsCntList[i] > USHRT_MAX ) {
01106 char errstr[512];
01107 sprintf(errstr, "Patch %d exceeds %d atoms.", pid, USHRT_MAX);
01108 NAMD_die(errstr);
01109 }
01110 patchMap->setNumAtoms(pid, oldNum+msg->atomsCntList[i]);
01111 if(simParameters->fixedAtomsOn) {
01112 oldNum = patchMap->numFixedAtoms(pid);
01113 patchMap->setNumFixedAtoms(pid, oldNum+msg->fixedAtomsCntList[i]);
01114 }
01115 }
01116 delete msg;
01117
01118 if(++procsReceived == numInputProcs) {
01119
01120 int maxAtoms = -1;
01121 int maxPatch = -1;
01122 int totalAtoms = 0;
01123 for(int i=0; i<patchMap->numPatches(); i++) {
01124 int cnt = patchMap->numAtoms(i);
01125 totalAtoms += cnt;
01126 if(cnt>maxAtoms) {
01127 maxAtoms = cnt;
01128 maxPatch = i;
01129 }
01130 }
01131 procsReceived = 0;
01132 iout << iINFO << "LARGEST PATCH (" << maxPatch <<
01133 ") HAS " << maxAtoms << " ATOMS\n" << endi;
01134 if ( totalAtoms != Node::Object()->molecule->numAtoms ) {
01135 char errstr[512];
01136 sprintf(errstr, "Incorrect atom count in void ParallelIOMgr::recvAtomsCntPerPatch: %d vs %d", totalAtoms, Node::Object()->molecule->numAtoms);
01137 NAMD_die(errstr);
01138 }
01139 }
01140 #endif
01141 }
01142
01143 void call_sendAtomsToHomePatchProcs(void *arg)
01144 {
01145 ((ParallelIOMgr*)arg)->sendAtomsToHomePatchProcs();
01146 }
01147
01148 void ParallelIOMgr::sendAtomsToHomePatchProcs()
01149 {
01150 #ifdef MEM_OPT_VERSION
01151 if(myInputRank==-1) return;
01152
01153 if ( sendAtomsThread == 0 ) {
01154 sendAtomsThread = CthCreate((CthVoidFn)call_sendAtomsToHomePatchProcs,this,0);
01155 CthAwaken(sendAtomsThread);
01156 return;
01157 }
01158 sendAtomsThread = 0;
01159 numAcksOutstanding = 0;
01160
01161 PatchMap *patchMap = PatchMap::Object();
01162 int numPatches = patchMap->numPatches();
01163 vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
01164
01165
01166
01167 ResizeArray<int> *procList = new ResizeArray<int>[CkNumPes()];
01168 ResizeArray<int> pesToSend;
01169 for(int i=0; i<numPatches; i++) {
01170 if(eachPatchAtomList[i].size()==0) continue;
01171 int onPE = patchMap->node(i);
01172 if ( procList[onPE].size() == 0 ) pesToSend.add(onPE);
01173 procList[onPE].add(i);
01174 }
01175
01176 Random(CkMyPe()).reorder(pesToSend.begin(),pesToSend.size());
01177
01178
01179
01180
01181 CProxy_ParallelIOMgr pIO(thisgroup);
01182 for(int k=0; k<pesToSend.size(); k++) {
01183 const int i = pesToSend[k];
01184 int len = procList[i].size();
01185 if(len==0) continue;
01186
01187 if ( numAcksOutstanding >= 10 ) {
01188
01189
01190 sendAtomsThread = CthSelf();
01191 CthSuspend();
01192 }
01193 ++numAcksOutstanding;
01194
01195
01196 int patchCnt = len;
01197 int totalAtomCnt = 0;
01198 for(int j=0; j<len; j++) {
01199 int pid = procList[i][j];
01200 int atomCnt = eachPatchAtomList[pid].size();
01201 totalAtomCnt += atomCnt;
01202 }
01203
01204 MovePatchAtomsMsg *msg = new (patchCnt, patchCnt, totalAtomCnt, 0)MovePatchAtomsMsg;
01205 msg->from = CkMyPe();
01206 msg->patchCnt = patchCnt;
01207 int atomIdx = 0;
01208 for(int j=0; j<len; j++) {
01209 int pid = procList[i][j];
01210 int atomCnt = eachPatchAtomList[pid].size();
01211 msg->pidList[j] = pid;
01212 msg->sizeList[j] = atomCnt;
01213 for(int k=0; k<atomCnt; k++, atomIdx++) {
01214 int aid = eachPatchAtomList[pid][k];
01215 FullAtom one = initAtoms[aid];
01216
01217
01218 one.hydVal = initAtoms[aid].hydList;
01219 msg->allAtoms[atomIdx] = one;
01220 }
01221 }
01222 pIO[i].recvAtomsToHomePatchProcs(msg);
01223
01224 procList[i].clear();
01225 }
01226
01227
01228 delete [] procList;
01229 patchMap->delTmpPatchAtomsList();
01230
01231
01232 initAtoms.clear();
01233 #endif
01234 }
01235
01236 void ParallelIOMgr::ackAtomsToHomePatchProcs()
01237 {
01238 --numAcksOutstanding;
01239 if ( sendAtomsThread ) {
01240 CthAwaken(sendAtomsThread);
01241 sendAtomsThread = 0;
01242 }
01243 }
01244
01245 void ParallelIOMgr::recvAtomsToHomePatchProcs(MovePatchAtomsMsg *msg)
01246 {
01247 CProxy_ParallelIOMgr pIO(thisgroup);
01248 pIO[msg->from].ackAtomsToHomePatchProcs();
01249
01250 if(!isOKToRecvHPAtoms) {
01251 prepareHomePatchAtomList();
01252 isOKToRecvHPAtoms = true;
01253 }
01254
01255 int numRecvPatches = msg->patchCnt;
01256 int aid = 0;
01257 for(int i=0; i<numRecvPatches; i++) {
01258 int pid = msg->pidList[i];
01259 int size = msg->sizeList[i];
01260 int idx = binaryFindHPID(pid);
01261 for(int j=0; j<size; j++, aid++) {
01262 hpAtomsList[idx].add(msg->allAtoms[aid]);
01263 }
01264 }
01265
01266 delete msg;
01267 }
01268
01269 void ParallelIOMgr::prepareHomePatchAtomList()
01270 {
01271 PatchMap *patchMap = PatchMap::Object();
01272 for(int i=0; i<patchMap->numPatches(); i++) {
01273 if(patchMap->node(i)==CkMyPe()) {
01274 hpIDList.add(i);
01275 }
01276 }
01277 if(hpIDList.size()>0)
01278 hpAtomsList = new FullAtomList[hpIDList.size()];
01279 }
01280
01281 int ParallelIOMgr::binaryFindHPID(int pid)
01282 {
01283
01284 int lIdx, rIdx;
01285 int retIdx = -1;
01286
01287 rIdx=0;
01288 lIdx=hpIDList.size()-1;
01289
01290 while(rIdx<=lIdx ) {
01291 int idx = (rIdx+lIdx)/2;
01292 int curPid = hpIDList[idx];
01293 if(pid>curPid) {
01294
01295 rIdx = idx+1;
01296 } else if(pid<curPid) {
01297
01298 lIdx = idx-1;
01299 } else {
01300
01301 retIdx = idx;
01302 break;
01303 }
01304 }
01305 CmiAssert(retIdx!=-1);
01306 return retIdx;
01307 }
01308
01309 void ParallelIOMgr::createHomePatches()
01310 {
01311 #ifdef MEM_OPT_VERSION
01312
01313 int assignedPids = PatchMap::Object()->numPatchesOnNode(CkMyPe());
01314 int numPids = hpIDList.size();
01315 if(numPids==0){
01316
01317 if(assignedPids == 0) return;
01318
01319
01320
01321
01322 CmiAssert(isOKToRecvHPAtoms == false);
01323 PatchMap *patchMap = PatchMap::Object();
01324 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
01325 PatchMgr *patchMgr = pm.ckLocalBranch();
01326 for(int i=0; i<patchMap->numPatches(); i++) {
01327 if(patchMap->node(i)==CkMyPe()) {
01328 FullAtomList emptyone;
01329 patchMgr->createHomePatch(i, emptyone);
01330 }
01331 }
01332 return;
01333 }
01334
01335 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
01336 PatchMgr *patchMgr = pm.ckLocalBranch();
01337
01338
01339 for(int i=0; i<numPids; i++) {
01340 int pid = hpIDList[i];
01341
01342
01343 std::sort(hpAtomsList[i].begin(), hpAtomsList[i].end());
01344 Node::Object()->workDistrib->fillAtomListForOnePatch(pid, hpAtomsList[i]);
01345 patchMgr->createHomePatch(pid, hpAtomsList[i]);
01346 }
01347
01348 hpIDList.clear();
01349 delete [] hpAtomsList;
01350
01351 hpAtomsList = NULL;
01352 #endif
01353 }
01354
01355 void ParallelIOMgr::freeMolSpace()
01356 {
01357 #ifdef MEM_OPT_VERSION
01358 molecule->delAtomNames();
01359 molecule->delChargeSpace();
01360
01361
01362
01363 if(!CkMyPe() && !simParameters->freeEnergyOn)
01364 molecule->delMassSpace();
01365
01366 molecule->delFixedAtoms();
01367 #endif
01368 }
01369
01370
01371
01372
01373
01374 int ParallelIOMgr::numMyAtoms(int rank, int numProcs)
01375 {
01376 if(rank==-1) return -1;
01377 int avgNum = molecule->numAtoms/numProcs;
01378 int remainder = molecule->numAtoms%numProcs;
01379 if(rank<remainder) return avgNum+1;
01380 else return avgNum;
01381 }
01382
01383 int ParallelIOMgr::atomRank(int atomID, int numProcs)
01384 {
01385 int avgNum = molecule->numAtoms/numProcs;
01386 int remainder = molecule->numAtoms%numProcs;
01387 int midLimit = remainder*(avgNum+1);
01388 int idx;
01389 if(atomID<midLimit) {
01390 idx = atomID/(avgNum+1);
01391 } else {
01392 idx = remainder+(atomID-midLimit)/avgNum;
01393 }
01394 return idx;
01395 }
01396
01397 void ParallelIOMgr::getMyAtomsRange(int &lowerIdx, int &upperIdx, int rank, int numProcs)
01398 {
01399 if(rank==-1) {
01400
01401 lowerIdx=-1;
01402 upperIdx=-3;
01403 return;
01404 }
01405 int avgNum = molecule->numAtoms/numProcs;
01406 int remainder = molecule->numAtoms%numProcs;
01407 if(rank<remainder) {
01408 lowerIdx = rank*(avgNum+1);
01409 upperIdx = lowerIdx+avgNum;
01410 } else {
01411 int midLimit = remainder*(avgNum+1);
01412 lowerIdx = midLimit+(rank-remainder)*avgNum;
01413 upperIdx = lowerIdx+avgNum-1;
01414 }
01415 }
01416
01417 void ParallelIOMgr::receivePositions(CollectVectorVarMsg *msg)
01418 {
01419 #ifdef MEM_OPT_VERSION
01420 int ready = midCM->receivePositions(msg);
01421 if(ready) {
01422 CProxy_CollectionMaster cm(mainMaster);
01423 cm.receiveOutputPosReady(msg->seq);
01424 }
01425 delete msg;
01426 #endif
01427 }
01428
01429 void ParallelIOMgr::receiveVelocities(CollectVectorVarMsg *msg)
01430 {
01431 #ifdef MEM_OPT_VERSION
01432 int ready = midCM->receiveVelocities(msg);
01433 if(ready) {
01434 CProxy_CollectionMaster cm(mainMaster);
01435 cm.receiveOutputVelReady(msg->seq);
01436 }
01437 delete msg;
01438 #endif
01439 }
01440
01441 void ParallelIOMgr::receiveForces(CollectVectorVarMsg *msg)
01442 {
01443 #ifdef MEM_OPT_VERSION
01444 int ready = midCM->receiveForces(msg);
01445 if(ready) {
01446 CProxy_CollectionMaster cm(mainMaster);
01447 cm.receiveOutputForceReady(msg->seq);
01448 }
01449 delete msg;
01450 #endif
01451 }
01452
01453
01454 void ParallelIOMgr::disposePositions(int seq, double prevT)
01455 {
01456 #ifdef MEM_OPT_VERSION
01457 double iotime = CmiWallTimer();
01458 midCM->disposePositions(seq);
01459 iotime = CmiWallTimer()-iotime+prevT;
01460
01461 #if OUTPUT_SINGLE_FILE
01462
01463 if(myOutputRank == getMyOutputGroupHighestRank()) {
01464
01465 CProxy_CollectionMaster cm(mainMaster);
01466 cm.startNextRoundOutputPos(iotime);
01467 } else {
01468 CProxy_ParallelIOMgr io(thisgroup);
01469 io[outputProcArray[myOutputRank+1]].disposePositions(seq, iotime);
01470 }
01471 #else
01472
01473 CProxy_CollectionMaster cm(mainMaster);
01474 cm.startNextRoundOutputPos(iotime);
01475 #endif
01476
01477 #endif
01478 }
01479
01480 void ParallelIOMgr::disposeVelocities(int seq, double prevT)
01481 {
01482 #ifdef MEM_OPT_VERSION
01483 double iotime = CmiWallTimer();
01484 midCM->disposeVelocities(seq);
01485 iotime = CmiWallTimer()-iotime+prevT;
01486
01487 #if OUTPUT_SINGLE_FILE
01488
01489 if(myOutputRank==getMyOutputGroupHighestRank()) {
01490
01491 CProxy_CollectionMaster cm(mainMaster);
01492 cm.startNextRoundOutputVel(iotime);
01493 } else {
01494 CProxy_ParallelIOMgr io(thisgroup);
01495 io[outputProcArray[myOutputRank+1]].disposeVelocities(seq, iotime);
01496 }
01497 #else
01498
01499 CProxy_CollectionMaster cm(mainMaster);
01500 cm.startNextRoundOutputVel(iotime);
01501 #endif
01502
01503 #endif
01504 }
01505
01506 void ParallelIOMgr::disposeForces(int seq, double prevT)
01507 {
01508 #ifdef MEM_OPT_VERSION
01509 double iotime = CmiWallTimer();
01510 midCM->disposeForces(seq);
01511 iotime = CmiWallTimer()-iotime+prevT;
01512
01513 #if OUTPUT_SINGLE_FILE
01514
01515 if(myOutputRank==getMyOutputGroupHighestRank()) {
01516
01517 CProxy_CollectionMaster cm(mainMaster);
01518 cm.startNextRoundOutputForce(iotime);
01519 } else {
01520 CProxy_ParallelIOMgr io(thisgroup);
01521 io[outputProcArray[myOutputRank+1]].disposeForces(seq, iotime);
01522 }
01523 #else
01524
01525 CProxy_CollectionMaster cm(mainMaster);
01526 cm.startNextRoundOutputForce(iotime);
01527 #endif
01528
01529 #endif
01530 }
01531
01532
01533 void ParallelIOMgr::wrapCoor(int seq, Lattice lat)
01534 {
01535 #ifdef MEM_OPT_VERSION
01536 coorInstance = midCM->getReadyPositions(seq);
01537
01538 coorInstance->lattice = lat;
01539 int fromAtomID = coorInstance->fromAtomID;
01540 int toAtomID = coorInstance->toAtomID;
01541
01542
01543 ResizeArray<Vector> data = coorInstance->data;
01544 ResizeArray<FloatVector> fdata = coorInstance->fdata;
01545
01546
01547
01548 int dsize = data.size();
01549 int numMyAtoms = toAtomID-fromAtomID+1;
01550 tmpCoorCon = new Vector[numMyAtoms];
01551 ClusterCoorElem one;
01552
01553 for(int i=0; i<numMyAtoms; i++){
01554 tmpCoorCon[i] = 0.0;
01555 int cid = clusterID[i];
01556 if(cid<fromAtomID){
01557
01558 one.clusterId = cid;
01559 ClusterCoorElem *ret = remoteCoors.find(one);
01560 if(ret==NULL){
01561 if(dsize==0)
01562 one.dsum = fdata[i];
01563 else
01564 one.dsum = data[i];
01565
01566 remoteCoors.add(one);
01567 }else{
01568 if(dsize==0)
01569 ret->dsum += fdata[i];
01570 else
01571 ret->dsum += data[i];
01572 }
01573 }else{
01574 if(dsize==0)
01575 tmpCoorCon[cid-fromAtomID] += fdata[i];
01576 else
01577 tmpCoorCon[cid-fromAtomID] += data[i];
01578 }
01579 }
01580
01581
01582
01583 CmiAssert(numRemoteClusters == remoteCoors.size());
01584 numCSMAck = 0;
01585 CProxy_ParallelIOMgr pIO(thisgroup);
01586 ClusterCoorSetIter iter(remoteCoors);
01587 for(iter=iter.begin(); iter!=iter.end(); iter++){
01588 ClusterCoorMsg *msg = new ClusterCoorMsg;
01589 msg->srcRank = myOutputRank;
01590 msg->clusterId = iter->clusterId;
01591 msg->dsum = iter->dsum;
01592 int dstRank = atomRankOnOutput(iter->clusterId);
01593 pIO[outputProcArray[dstRank]].recvClusterCoor(msg);
01594 }
01595
01596
01597
01598 recvClusterCoor(NULL);
01599 #endif
01600 }
01601
01602
01603
01604
01605
01606
01607 void ParallelIOMgr::recvClusterCoor(ClusterCoorMsg *msg){
01608
01609 if(msg!=NULL) ccmBuf.add(msg);
01610
01611
01612 if(++numReqRecved == (numRemoteReqs+1)){
01613 numReqRecved = 0;
01614 integrateClusterCoor();
01615 }
01616 }
01617
01618 void ParallelIOMgr::integrateClusterCoor(){
01619 #ifdef MEM_OPT_VERSION
01620 int fromIdx = coorInstance->fromAtomID;
01621 int toIdx = coorInstance->toAtomID;
01622 for(int i=0; i<ccmBuf.size(); i++){
01623 ClusterCoorMsg *msg = ccmBuf[i];
01624 int lidx = msg->clusterId - fromIdx;
01625 tmpCoorCon[lidx] += msg->dsum;
01626 }
01627
01628
01629 CProxy_ParallelIOMgr pIO(thisgroup);
01630 for(int i=0; i<ccmBuf.size(); i++){
01631 ClusterCoorMsg *msg = ccmBuf[i];
01632 int lidx = msg->clusterId - fromIdx;
01633 if(simParameters->wrapAll || isWater[lidx]) {
01634 Lattice *lat = &(coorInstance->lattice);
01635 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
01636 msg->dsum = (simParameters->wrapNearest ?
01637 lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
01638 }else{
01639 msg->dsum = 0.0;
01640 }
01641 pIO[outputProcArray[msg->srcRank]].recvFinalClusterCoor(msg);
01642 }
01643 ccmBuf.resize(0);
01644
01645
01646
01647
01648 if(numRemoteClusters!=0){
01649 recvFinalClusterCoor(NULL);
01650 } else {
01651
01652
01653 int numMyAtoms = toIdx-fromIdx+1;
01654 ResizeArray<Vector> data = coorInstance->data;
01655 ResizeArray<FloatVector> fdata = coorInstance->fdata;
01656 for(int i=0; i<numMyAtoms; i++){
01657 if(!simParameters->wrapAll && !isWater[i]) continue;
01658 int lidx = clusterID[i]-fromIdx;
01659 if(lidx==i){
01660
01661 Lattice *lat = &(coorInstance->lattice);
01662 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
01663 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
01664 lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
01665 }
01666 if(data.size()) data[i] += tmpCoorCon[lidx];
01667
01668 if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
01669 }
01670
01671 delete [] tmpCoorCon;
01672 tmpCoorCon = NULL;
01673 CProxy_CollectionMaster cm(mainMaster);
01674 cm.wrapCoorFinished();
01675 }
01676 #endif
01677 }
01678
01679 void ParallelIOMgr::recvFinalClusterCoor(ClusterCoorMsg *msg){
01680 #ifdef MEM_OPT_VERSION
01681 if(msg!=NULL){
01682
01683 ClusterCoorElem one(msg->clusterId);
01684 ClusterCoorElem *ret = remoteCoors.find(one);
01685 ret->dsum = msg->dsum;
01686 delete msg;
01687 }
01688
01689 if(++numCSMAck == (numRemoteClusters+1)){
01690
01691 int fromIdx = coorInstance->fromAtomID;
01692 int toIdx = coorInstance->toAtomID;
01693 int numMyAtoms = toIdx-fromIdx+1;
01694 ResizeArray<Vector> data = coorInstance->data;
01695 ResizeArray<FloatVector> fdata = coorInstance->fdata;
01696 ClusterCoorElem tmp;
01697 for(int i=0; i<numMyAtoms; i++){
01698 if(!simParameters->wrapAll && !isWater[i]) continue;
01699 int cid = clusterID[i];
01700 int lidx = cid-fromIdx;
01701 if(lidx<0){
01702
01703 tmp.clusterId = cid;
01704 ClusterCoorElem *fone = remoteCoors.find(tmp);
01705 if(data.size()) data[i] += fone->dsum;
01706 if(fdata.size()) fdata[i] = fdata[i] + fone->dsum;
01707 }else{
01708 if(lidx==i){
01709 Lattice *lat = &(coorInstance->lattice);
01710 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
01711 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
01712 lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
01713 }
01714 if(data.size()) data[i] += tmpCoorCon[lidx];
01715 if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
01716 }
01717 }
01718
01719 delete [] tmpCoorCon;
01720 tmpCoorCon = NULL;
01721 CProxy_CollectionMaster cm(mainMaster);
01722 cm.wrapCoorFinished();
01723 numCSMAck = 0;
01724 remoteCoors.clear();
01725 }
01726 #endif
01727 }
01728 #include "ParallelIOMgr.def.h"