20 #include "ParallelIOMgr.decl.h" 29 #if !(defined(__NVCC__) || defined(__HIPCC__)) 46 if ( s == -10 )
NAMD_bug(
"seq == free in CollectionMgr");
49 remaining = numClients;
58 if ( msg->
status != vstatus ) {
59 NAMD_bug(
"CollectProxyVectorInstance vstatus mismatch");
61 if ( msg->
seq != seq ) {
62 NAMD_bug(
"CollectProxyVectorInstance seq mismatch");
65 for(
int i = 0; i < size; ++i ) { aid.add(msg->
aid[i]); }
68 for(
int i = 0; i < size; ++i ) { data.add(msg->
data[i]); }
72 for(
int i = 0; i < size; ++i ) { fdata.add(msg->
fdata[i]); }
74 const int atoms_per_message_target = 100000;
75 return ( ! --remaining || aid.size() > atoms_per_message_target );
79 int numAtoms = aid.
size();
85 for(
int j=0; j<numAtoms; j++) {
87 msg->
data[j] = data[j];
91 for(
int j=0; j<numAtoms; j++) {
93 msg->
fdata[j] = fdata[j];
97 for(
int j=0; j<numAtoms; j++) {
99 msg->
data[j] = data[j];
100 msg->
fdata[j] = fdata[j];
105 msg->
size = numAtoms;
108 if ( remaining ) reset(seq,vstatus,remaining);
131 for( ; c != c_e && (*c)->
seq != msg->
seq; ++c );
134 for( ; c != c_e && (*c)->
notfree(); ++c );
141 if ( (*c)->append(msg) ) {
159 CkpvAccess(BOCclass_group).ioMgr = thisgroup;
162 inputProcArray = NULL;
164 outputProcArray = NULL;
169 totalMV.x = totalMV.y = totalMV.z = 0.0;
172 numTotalExclusions = 0;
173 numCalcExclusions = 0;
174 numCalcFullExclusions = 0;
176 isOKToRecvHPAtoms =
false;
182 #ifdef MEM_OPT_VERSION 193 #if COLLECT_PERFORMANCE_DATA 194 numFixedAtomLookup = 0;
200 delete [] inputProcArray;
201 delete [] outputProcArray;
203 delete [] clusterSize;
205 #ifdef MEM_OPT_VERSION 212 #ifndef OUTPUT_SINGLE_FILE 213 #error OUTPUT_SINGLE_FILE not defined! 222 numInputProcs = simParameters->numinputprocs;
223 numOutputProcs = simParameters->numoutputprocs;
224 numOutputWrts = simParameters->numoutputwrts;
226 numProxiesPerOutputProc = std::min((
int)sqrt(CkNumPes()),(CkNumPes()-1)/numOutputProcs-1);
227 if ( numProxiesPerOutputProc < 2 ) numProxiesPerOutputProc = 0;
230 iout <<
iINFO <<
"Running with " <<numInputProcs<<
" input processors.\n"<<
endi;
231 #if OUTPUT_SINGLE_FILE 232 iout <<
iINFO <<
"Running with " <<numOutputProcs<<
" output processors ("<<numOutputWrts<<
" of them will output simultaneously).\n"<<
endi;
234 iout <<
iINFO <<
"Running with " <<numOutputProcs<<
" output processors, and each of them will output to its own separate file.\n"<<
endi;
236 if ( numProxiesPerOutputProc ) {
237 iout <<
iINFO <<
"Running with " <<numProxiesPerOutputProc<<
" proxies per output processor.\n"<<
endi;
243 inputProcArray =
new int[numInputProcs];
245 for(
int i=0; i<numInputProcs; ++i) {
248 std::sort(inputProcArray, inputProcArray+numInputProcs);
249 for(
int i=0; i<numInputProcs; ++i) {
250 if ( CkMyPe() == inputProcArray[i] ) {
251 if ( myInputRank != -1 )
NAMD_bug(
"Duplicate input proc");
257 iout <<
iINFO <<
"INPUT PROC LOCATIONS:";
259 for ( i=0; i<numInputProcs && i < 10; ++i ) {
260 iout <<
" " << inputProcArray[i];
262 if ( i<numInputProcs )
iout <<
" ... " << inputProcArray[numInputProcs-1];
267 if(myInputRank!=-1) {
270 int numMyAtoms = numInitMyAtomsOnInput();
271 initAtoms.resize(numMyAtoms+100);
272 initAtoms.resize(numMyAtoms);
273 tmpRecvAtoms.resize(0);
276 tmpRecvAtoms.resize(0);
283 outputProcArray =
new int[numOutputProcs];
284 outputProcFlags =
new char[CkNumPes()];
285 outputProxyArray =
new int[numOutputProcs*numProxiesPerOutputProc];
286 myOutputProxies =
new int[numOutputProcs];
288 myOutputProxyRank = -1;
289 for(
int i=0; i<numOutputProcs; ++i) {
292 std::sort(outputProcArray, outputProcArray+numOutputProcs);
293 for(
int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
296 std::sort(outputProxyArray, outputProxyArray+numOutputProcs*numProxiesPerOutputProc,
298 for(
int i=0; i<CkNumPes(); ++i) {
299 outputProcFlags[i] = 0;
301 for(
int i=0; i<numOutputProcs; ++i) {
302 outputProcFlags[outputProcArray[i]] = 1;
303 if ( CkMyPe() == outputProcArray[i] ) {
304 if ( myOutputRank != -1 )
NAMD_bug(
"Duplicate output proc");
308 for(
int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
309 if ( CkMyPe() == outputProxyArray[i] ) {
310 if ( myOutputRank != -1 )
NAMD_bug(
"Output proxy is also output proc");
311 if ( myOutputProxyRank != -1 )
NAMD_bug(
"Duplicate output proxy");
312 myOutputProxyRank = i;
316 for(
int i=0; i<numOutputProcs; ++i) {
317 if ( numProxiesPerOutputProc ) {
318 myOutputProxies[i] = outputProxyArray[myProxySet*numOutputProcs+i];
320 myOutputProxies[i] = outputProcArray[i];
325 myOutputProxyPositions = 0;
326 myOutputProxyVelocities = 0;
327 myOutputProxyForces = 0;
330 iout <<
iINFO <<
"OUTPUT PROC LOCATIONS:";
332 for ( i=0; i<numOutputProcs && i < 10; ++i ) {
333 iout <<
" " << outputProcArray[i];
335 if ( i<numOutputProcs )
iout <<
" ... " << outputProcArray[numOutputProcs-1];
340 #ifdef MEM_OPT_VERSION 341 if(myOutputRank!=-1) {
342 midCM =
new CollectionMidMaster(
this);
344 remoteClusters.clear();
354 return outputProcFlags[pe];
358 return CProxy_ParallelIOMgr::ckLocalBranch(CkpvAccess(BOCclass_group).ioMgr)->isOutputProcessor(pe);
365 #ifdef MEM_OPT_VERSION 366 if(myInputRank!=-1) {
367 int myAtomLIdx, myAtomUIdx;
368 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
371 molecule->read_binary_atom_info(myAtomLIdx, myAtomUIdx, initAtoms);
377 readCoordinatesAndVelocity();
381 int oRank=atomRankOnOutput(myAtomLIdx);
382 for(
int i=oRank; i<numOutputProcs; i++) {
384 getAtomsRangeOnOutput(lIdx, uIdx, i);
385 if(lIdx > myAtomUIdx)
break;
386 int fid = lIdx>myAtomLIdx?lIdx:myAtomLIdx;
387 int tid = uIdx>myAtomUIdx?myAtomUIdx:uIdx;
388 for(
int j=fid; j<=tid; j++) initAtoms[j-myAtomLIdx].outputRank = i;
393 if(myOutputRank!=-1) {
395 if(!(simParameters->wrapAll || simParameters->wrapWater))
return;
396 readInfoForParOutput();
401 void ParallelIOMgr::readCoordinatesAndVelocity()
403 #ifdef MEM_OPT_VERSION 405 int myAtomLIdx, myAtomUIdx;
406 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
407 int myNumAtoms = myAtomUIdx-myAtomLIdx+1;
414 FILE *ifp = fopen(simParameters->binCoorFile,
"rb");
417 sprintf(s,
"The binary coordinate file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
422 fread(&filelen,
sizeof(
int32),1,ifp);
423 char lenbuf[
sizeof(
int32)];
424 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
426 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
427 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binCoorFile
428 <<
" is palindromic, assuming same endian.\n" <<
endi;
430 if(filelen!=molecule->numAtoms) {
432 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
434 if(filelen!=molecule->numAtoms) {
436 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binCoorFile);
442 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
444 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
448 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
451 size_t totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
452 if(totalRead!=myNumAtoms) {
454 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
457 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
459 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].position = tmpData[i];
463 if(!simParameters->binVelFile) {
467 ifp = fopen(simParameters->binVelFile,
"rb");
470 sprintf(s,
"The binary velocity file %s cannot be opened on proc %d\n", simParameters->binVelFile, CkMyPe());
474 fread(&filelen,
sizeof(
int32),1,ifp);
475 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
477 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
478 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binVelFile
479 <<
" is palindromic, assuming same endian.\n" <<
endi;
481 if(filelen!=molecule->numAtoms) {
483 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
485 if(filelen!=molecule->numAtoms) {
487 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binVelFile);
494 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
496 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
500 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
503 totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
504 if(totalRead!=myNumAtoms) {
506 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
509 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
511 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].velocity = tmpData[i];
516 if(!simParameters->binRefFile) {
517 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = initAtoms[i].position;
519 ifp = fopen(simParameters->binRefFile,
"rb");
522 sprintf(s,
"The binary reference coordinate file %s cannot be opened on proc %d\n", simParameters->binRefFile, CkMyPe());
526 fread(&filelen,
sizeof(
int32),1,ifp);
527 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
529 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
530 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binRefFile
531 <<
" is palindromic, assuming same endian.\n" <<
endi;
533 if(filelen!=molecule->numAtoms) {
535 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
537 if(filelen!=molecule->numAtoms) {
539 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binRefFile);
546 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
548 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
552 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
555 totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
556 if(totalRead!=myNumAtoms) {
558 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
561 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
563 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = tmpData[i];
570 void ParallelIOMgr::readInfoForParOutput()
573 getMyAtomsRangeOnOutput(fromIdx,toIdx);
574 int numMyAtoms = toIdx-fromIdx+1;
576 clusterID =
new int[numMyAtoms];
577 clusterSize =
new int[numMyAtoms];
581 FILE *ifp = fopen(simParameters->binAtomFile,
"rb");
585 fread(&magicNum,
sizeof(
int), 1, ifp);
591 isWater =
new char[numMyAtoms];
593 int64 offset =
sizeof(char)*((
int64)(fromIdx-molecule->numAtoms));
595 if ( _fseeki64(ifp, offset, SEEK_END) )
597 if ( fseeko(ifp, offset, SEEK_END) )
601 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
604 fread(
isWater,
sizeof(
char), numMyAtoms, ifp);
608 offset =
sizeof(int)*((
int64)(fromIdx-molecule->numAtoms))
609 -
sizeof(char)*((
int64)(molecule->numAtoms));
611 if ( _fseeki64(ifp, offset, SEEK_END) )
613 if ( fseeko(ifp, offset, SEEK_END) )
617 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
620 fread(clusterID,
sizeof(
int), numMyAtoms, ifp);
621 if(needFlip)
flipNum((
char *)clusterID,
sizeof(
int), numMyAtoms);
626 for(
int i=0; i<numMyAtoms; i++) {
628 int cid = clusterID[i];
634 CmiAssert(cid<=toIdx);
641 remoteClusters.add(one);
646 int lidx = cid-fromIdx;
656 printf(
"output[%d]=%d: prepare to send %d remote msgs for cluster size\n",
657 myOutputRank, CkMyPe(), remoteClusters.size());
660 numRemoteClusters = remoteClusters.size();
662 CProxy_ParallelIOMgr pIO(thisgroup);
664 for(iter=iter.begin(); iter!=iter.end(); iter++) {
669 int dstRank = atomRankOnOutput(iter->clusterId);
670 pIO[outputProcArray[dstRank]].recvClusterSize(msg);
684 if(myOutputRank==-1)
return;
685 if(!(simParameters->wrapAll || simParameters->wrapWater))
return;
688 getMyAtomsRangeOnOutput(fromIdx,toIdx);
691 for(
int i=0; i<csmBuf.size(); i++) {
697 CProxy_ParallelIOMgr pIO(thisgroup);
698 for(
int i=0; i<csmBuf.size(); i++) {
702 pIO[outputProcArray[msg->
srcRank]].recvFinalClusterSize(msg);
704 numRemoteReqs = csmBuf.size();
712 if(numRemoteClusters!=0){
713 recvFinalClusterSize(NULL);
716 int numMyAtoms = toIdx-fromIdx+1;
717 for(
int i=0; i<numMyAtoms; i++) {
718 int lidx = clusterID[i]-fromIdx;
719 clusterSize[i] = clusterSize[lidx];
722 #if 0 //write out cluster debug info 724 sprintf(fname,
"cluster.par.%d", CkMyPe());
725 FILE *ofp = fopen(fname,
"w");
726 for(
int i=0; i<numMyAtoms; i++) {
727 fprintf(ofp,
"%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
741 CmiAssert(ret!=NULL);
747 if(++numCSMAck == (numRemoteClusters+1)) {
750 getMyAtomsRangeOnOutput(fromIdx,toIdx);
751 int numMyAtoms = toIdx-fromIdx+1;
753 for(
int i=0; i<numMyAtoms; i++) {
754 int cid = clusterID[i];
755 int lidx = cid-fromIdx;
762 clusterSize[i] = clusterSize[lidx];
766 remoteClusters.clear();
768 #if 0 //write out cluster debug info 770 sprintf(fname,
"cluster.par.%d", CkMyPe());
771 FILE *ofp = fopen(fname,
"w");
772 for(
int i=0; i<numMyAtoms; i++) {
773 fprintf(ofp,
"%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
783 if(myInputRank==-1)
return;
791 for(
int i=0; i<initAtoms.size(); i++) {
793 int parentRank = atomInitRankOnInput(initAtoms[i].MPID);
794 if(parentRank != myInputRank) {
795 toMigrateList.
add(i);
796 initAtoms[i].isValid =
false;
797 int tmp = parentRank - myInputRank;
798 tmp = tmp>0 ? tmp : -tmp;
799 if(tmp > maxOffset) maxOffset = tmp;
808 for(
int i=0; i<toMigrateList.
size(); i++) {
809 int idx = toMigrateList[i];
810 int parentRank = atomInitRankOnInput(initAtoms[idx].MPID);
812 int offset = parentRank - myInputRank + maxOffset;
813 migLists[offset].
add(initAtoms[idx]);
816 CProxy_ParallelIOMgr pIO(thisgroup);
817 for(
int i=0; i<2*maxOffset+1; i++) {
818 int migLen = migLists[i].
size();
823 int destRank = i-maxOffset+myInputRank;
824 pIO[inputProcArray[destRank]].recvAtomsMGrp(msg);
829 toMigrateList.
clear();
835 for(
int i=0; i<msg->
length; i++) {
836 tmpRecvAtoms.add((msg->
atomList)[i]);
843 if(myInputRank==-1)
return;
845 for(
int i=0; i<tmpRecvAtoms.size(); i++) {
846 tmpRecvAtoms[i].isValid =
true;
847 initAtoms.add(tmpRecvAtoms[i]);
849 tmpRecvAtoms.clear();
852 std::sort(initAtoms.begin(), initAtoms.end());
857 int numFixedRigidBonds = 0;
858 if(molecule->numRigidBonds){
859 int parentIsFixed = 0;
860 for(
int i=0; i<initAtoms.size(); i++) {
869 numFixedRigidBonds++;
874 numFixedRigidBonds++;
880 int numFixedGroups = 0;
881 if(molecule->numFixedAtoms){
882 for(
int i=0; i<initAtoms.size();) {
895 if(allFixed) numFixedGroups++;
901 CProxy_ParallelIOMgr pIO(thisgroup);
905 pIO[0].recvHydroBasedCounter(msg);
910 #ifdef MEM_OPT_VERSION 911 if(myInputRank==-1)
return;
913 CProxy_ParallelIOMgr pIO(thisgroup);
933 for(
int i=0; i<initAtoms.size(); i++) {
955 if(molecule->numFixedAtoms>0 && ! simParameters->fixedAtomsForces) {
961 int sAId = initAtoms[0].id;
963 for(
int i=0; i<initAtoms.size(); i++) {
966 int myAId = initAtoms[i].id;
969 if(!initAtoms[i].atomFixed) {
981 for(
int j=0; j<thisSig->
bondCnt; j++) {
983 int a1 = myAId + bsig->
offset[0];
988 for(
int j=0; j<thisSig->
angleCnt; j++) {
990 int a1 = myAId + bsig->
offset[0];
991 int a2 = myAId + bsig->
offset[1];
992 if(!isAtomFixed(sAId, a1) || !isAtomFixed(sAId, a2))
999 int a1 = myAId + bsig->
offset[0];
1000 int a2 = myAId + bsig->
offset[1];
1001 int a3 = myAId + bsig->
offset[2];
1002 if(!isAtomFixed(sAId, a1) ||
1003 !isAtomFixed(sAId, a2) ||
1004 !isAtomFixed(sAId, a3))
1011 int a1 = myAId + bsig->
offset[0];
1012 int a2 = myAId + bsig->
offset[1];
1013 int a3 = myAId + bsig->
offset[2];
1014 if(!isAtomFixed(sAId, a1) ||
1015 !isAtomFixed(sAId, a2) ||
1016 !isAtomFixed(sAId, a3))
1023 int a1 = myAId + bsig->
offset[0];
1024 int a2 = myAId + bsig->
offset[1];
1025 int a3 = myAId + bsig->
offset[2];
1026 int a4 = myAId + bsig->
offset[3];
1027 int a5 = myAId + bsig->
offset[4];
1028 int a6 = myAId + bsig->
offset[5];
1029 int a7 = myAId + bsig->
offset[6];
1031 if(!isAtomFixed(sAId, a1) ||
1032 !isAtomFixed(sAId, a2) ||
1033 !isAtomFixed(sAId, a3) ||
1034 !isAtomFixed(sAId, a4) ||
1035 !isAtomFixed(sAId, a5) ||
1036 !isAtomFixed(sAId, a6) ||
1037 !isAtomFixed(sAId, a7))
1048 int thisAId = exclSig->
modOffset[j]+myAId;
1055 int a1 = myAId + bsig->
offset[0];
1056 int a2 = myAId + bsig->
offset[1];
1057 if(!isAtomFixed(sAId, a1) ||
1058 !isAtomFixed(sAId, a2))
1062 #if COLLECT_PERFORMANCE_DATA 1063 printf(
"Num fixedAtom lookup on proc %d is %d\n", CkMyPe(), numFixedAtomLookup);
1077 if(!simParameters->comMove) {
1085 for (
int i=0; i<initAtoms.size(); i++) {
1086 msg->
totalMV += initAtoms[i].mass * initAtoms[i].velocity;
1091 pIO[0].recvMolInfo(msg);
1098 molecule->numBonds += msg->
numBonds;
1116 if(!simParameters->comMove) {
1120 if(++procsReceived == numInputProcs) {
1122 msg->
numBonds = molecule->numBonds;
1140 if(!simParameters->comMove) {
1144 CProxy_ParallelIOMgr pIO(thisgroup);
1145 pIO.bcastMolInfo(msg);
1154 #ifdef MEM_OPT_VERSION 1155 if(myInputRank!=-1) {
1156 if(!simParameters->comMove) {
1159 for (
int i=0; i<initAtoms.size(); i++) initAtoms[i].velocity -= val;
1188 iout <<
iINFO <<
"LOADED " << molecule->numTotalExclusions <<
" TOTAL EXCLUSIONS\n" <<
endi;
1189 if(!simParameters->comMove) {
1190 iout <<
iINFO <<
"REMOVING COM VELOCITY " 1202 if(++hydroMsgRecved == numInputProcs){
1205 CProxy_ParallelIOMgr pIO(thisgroup);
1206 pIO.bcastHydroBasedCounter(msg);
1212 #ifdef MEM_OPT_VERSION 1223 iout <<
iINFO <<
"****************************\n";
1224 iout <<
iINFO <<
"STRUCTURE SUMMARY:\n";
1225 iout <<
iINFO << molecule->numAtoms <<
" ATOMS\n";
1226 iout <<
iINFO << molecule->numBonds <<
" BONDS\n";
1227 iout <<
iINFO << molecule->numAngles <<
" ANGLES\n";
1228 iout <<
iINFO << molecule->numDihedrals <<
" DIHEDRALS\n";
1229 iout <<
iINFO << molecule->numImpropers <<
" IMPROPERS\n";
1230 iout <<
iINFO << molecule->numCrossterms <<
" CROSSTERMS\n";
1231 iout <<
iINFO << molecule->numExclusions <<
" EXCLUSIONS\n";
1234 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeXplorOn)){
1235 iout <<
iINFO << molecule->numMultipleDihedrals
1236 <<
" DIHEDRALS WITH MULTIPLE PERIODICITY (BASED ON PSF FILE)\n";
1238 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeCharmmOn)){
1239 iout <<
iINFO << molecule->numMultipleDihedrals
1240 <<
" DIHEDRALS WITH MULTIPLE PERIODICITY IGNORED (BASED ON PSF FILE) \n";
1242 <<
" CHARMM MULTIPLICITIES BASED ON PARAMETER FILE INFO! \n";
1246 if (molecule->numMultipleImpropers){
1247 iout <<
iINFO << molecule->numMultipleImpropers
1248 <<
" IMPROPERS WITH MULTIPLE PERIODICITY\n";
1251 if (simParameters->fixedAtomsOn)
1252 iout <<
iINFO << molecule->numFixedAtoms <<
" FIXED ATOMS\n";
1255 if (simParameters->rigidBonds)
1256 iout <<
iINFO << molecule->numRigidBonds <<
" RIGID BONDS\n";
1258 if (simParameters->fixedAtomsOn && simParameters->rigidBonds)
1259 iout <<
iINFO << molecule->numFixedRigidBonds <<
1260 " RIGID BONDS BETWEEN FIXED ATOMS\n";
1262 iout <<
iINFO << molecule->num_deg_freedom(1)
1263 <<
" DEGREES OF FREEDOM\n";
1265 iout <<
iINFO << molecule->numHydrogenGroups <<
" HYDROGEN GROUPS\n";
1266 iout <<
iINFO << molecule->maxHydrogenGroupSize
1267 <<
" ATOMS IN LARGEST HYDROGEN GROUP\n";
1268 iout <<
iINFO << molecule->numMigrationGroups <<
" MIGRATION GROUPS\n";
1269 iout <<
iINFO << molecule->maxMigrationGroupSize
1270 <<
" ATOMS IN LARGEST MIGRATION GROUP\n";
1271 if (simParameters->fixedAtomsOn)
1273 iout <<
iINFO << molecule->numFixedGroups <<
1274 " HYDROGEN GROUPS WITH ALL ATOMS FIXED\n";
1277 iout <<
iINFO <<
"TOTAL MASS = " << totalMass <<
" amu\n";
1278 iout <<
iINFO <<
"TOTAL CHARGE = " << totalCharge <<
" e\n";
1280 BigReal volume = simParameters->lattice.volume();
1283 << ((totalMass/volume) / 0.6022) <<
" g/cm^3\n";
1285 << (molecule->numAtoms/volume) <<
" atoms/A^3\n";
1288 iout <<
iINFO <<
"*****************************\n";
1297 if(myInputRank==-1)
return;
1307 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1308 PatchMgr *patchMgr = pm.ckLocalBranch();
1311 const Lattice lattice = simParameters->lattice;
1312 for(
int i=0; i<initAtoms.size(); i++) {
1318 eachPatchAtomList[pid].push_back(i);
1321 CProxy_ParallelIOMgr pIO(thisgroup);
1324 for(
int i=0; i<numPatches; i++) {
1325 int cursize = eachPatchAtomList[i].size();
1326 if(cursize>0) patchCnt++;
1330 if(simParameters->fixedAtomsOn) {
1338 for(
int i=0; i<numPatches; i++) {
1339 int cursize = eachPatchAtomList[i].size();
1341 if ( cursize > USHRT_MAX ) {
1343 sprintf(errstr,
"Patch %d exceeds %d atoms.", i, USHRT_MAX);
1352 if(simParameters->fixedAtomsOn) {
1354 for(
int i=0; i<numPatches; i++) {
1355 int cursize = eachPatchAtomList[i].size();
1358 for(
int j=0; j<cursize; j++) {
1359 int aid = eachPatchAtomList[i][j];
1361 fixedCnt += initAtoms[aid].atomFixed;
1369 pIO[0].recvAtomsCntPerPatch(msg);
1375 #ifdef MEM_OPT_VERSION 1377 for(
int i=0; i<msg->
length; i++) {
1379 int oldNum = patchMap->numAtoms(pid);
1382 sprintf(errstr,
"Patch %d exceeds %d atoms.", pid, USHRT_MAX);
1385 patchMap->setNumAtoms(pid, oldNum+msg->
atomsCntList[i]);
1386 if(simParameters->fixedAtomsOn) {
1387 oldNum = patchMap->numFixedAtoms(pid);
1393 if(++procsReceived == numInputProcs) {
1398 for(
int i=0; i<patchMap->
numPatches(); i++) {
1399 int cnt = patchMap->numAtoms(i);
1407 iout <<
iINFO <<
"LARGEST PATCH (" << maxPatch <<
1408 ") HAS " << maxAtoms <<
" ATOMS\n" <<
endi;
1409 if ( totalAtoms !=
Node::Object()->molecule->numAtoms ) {
1411 sprintf(errstr,
"Incorrect atom count in void ParallelIOMgr::recvAtomsCntPerPatch: %d vs %d", totalAtoms,
Node::Object()->molecule->numAtoms);
1425 #ifdef MEM_OPT_VERSION 1426 if(myInputRank==-1)
return;
1428 if ( sendAtomsThread == 0 ) {
1430 CthAwaken(sendAtomsThread);
1433 sendAtomsThread = 0;
1434 numAcksOutstanding = 0;
1444 for(
int i=0; i<numPatches; i++) {
1445 if(eachPatchAtomList[i].size()==0)
continue;
1446 int onPE = patchMap->
node(i);
1447 if ( procList[onPE].size() == 0 ) pesToSend.
add(onPE);
1448 procList[onPE].
add(i);
1456 CProxy_ParallelIOMgr pIO(thisgroup);
1457 for(
int k=0; k<pesToSend.
size(); k++) {
1458 const int i = pesToSend[k];
1459 int len = procList[i].
size();
1460 if(len==0)
continue;
1464 for(
int j=0; j<len; j++) {
1465 int pid = procList[i][j];
1466 int atomCnt = eachPatchAtomList[pid].
size();
1468 if ( numAcksOutstanding >= 10 ) {
1469 sendAtomsThread = CthSelf();
1472 ++numAcksOutstanding;
1475 msg->
from = CkMyPe();
1480 for(
int k=0; k<atomCnt; k++, atomIdx++) {
1481 int aid = eachPatchAtomList[pid][k];
1485 one.hydVal = initAtoms[aid].hydList;
1488 pIO[i].recvAtomsToHomePatchProcs(msg);
1491 procList[i].
clear();
1505 --numAcksOutstanding;
1506 if ( sendAtomsThread ) {
1507 CthAwaken(sendAtomsThread);
1508 sendAtomsThread = 0;
1514 CProxy_ParallelIOMgr pIO(thisgroup);
1515 pIO[msg->
from].ackAtomsToHomePatchProcs();
1517 if(!isOKToRecvHPAtoms) {
1518 prepareHomePatchAtomList();
1519 isOKToRecvHPAtoms =
true;
1522 int numRecvPatches = msg->
patchCnt;
1524 for(
int i=0; i<numRecvPatches; i++) {
1527 int idx = binaryFindHPID(pid);
1528 for(
int j=0; j<size; j++, aid++) {
1529 hpAtomsList[idx].add(msg->
allAtoms[aid]);
1536 void ParallelIOMgr::prepareHomePatchAtomList()
1539 for(
int i=0; i<patchMap->
numPatches(); i++) {
1540 if(patchMap->
node(i)==CkMyPe()) {
1544 if(hpIDList.size()>0)
1548 int ParallelIOMgr::binaryFindHPID(
int pid)
1555 lIdx=hpIDList.size()-1;
1557 while(rIdx<=lIdx ) {
1558 int idx = (rIdx+lIdx)/2;
1559 int curPid = hpIDList[idx];
1563 }
else if(pid<curPid) {
1572 CmiAssert(retIdx!=-1);
1578 #ifdef MEM_OPT_VERSION 1581 int numPids = hpIDList.size();
1584 if(assignedPids == 0)
return;
1589 CmiAssert(isOKToRecvHPAtoms ==
false);
1591 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1592 PatchMgr *patchMgr = pm.ckLocalBranch();
1593 for(
int i=0; i<patchMap->
numPatches(); i++) {
1594 if(patchMap->
node(i)==CkMyPe()) {
1602 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1603 PatchMgr *patchMgr = pm.ckLocalBranch();
1606 for(
int i=0; i<numPids; i++) {
1607 int pid = hpIDList[i];
1610 std::sort(hpAtomsList[i].begin(), hpAtomsList[i].end());
1616 delete [] hpAtomsList;
1624 #ifdef MEM_OPT_VERSION 1625 molecule->delAtomNames();
1626 molecule->delChargeSpace();
1630 if(!CkMyPe() && !simParameters->freeEnergyOn)
1631 molecule->delMassSpace();
1633 molecule->delFixedAtoms();
1641 int ParallelIOMgr::numMyAtoms(
int rank,
int numProcs)
1643 if(rank==-1)
return -1;
1644 int avgNum = molecule->numAtoms/numProcs;
1645 int remainder = molecule->numAtoms%numProcs;
1646 if(rank<remainder)
return avgNum+1;
1650 int ParallelIOMgr::atomRank(
int atomID,
int numProcs)
1652 int avgNum = molecule->numAtoms/numProcs;
1653 int remainder = molecule->numAtoms%numProcs;
1654 int midLimit = remainder*(avgNum+1);
1656 if(atomID<midLimit) {
1657 idx = atomID/(avgNum+1);
1659 idx = remainder+(atomID-midLimit)/avgNum;
1664 void ParallelIOMgr::getMyAtomsRange(
int &lowerIdx,
int &upperIdx,
int rank,
int numProcs)
1672 int avgNum = molecule->numAtoms/numProcs;
1673 int remainder = molecule->numAtoms%numProcs;
1674 if(rank<remainder) {
1675 lowerIdx = rank*(avgNum+1);
1676 upperIdx = lowerIdx+avgNum;
1678 int midLimit = remainder*(avgNum+1);
1679 lowerIdx = midLimit+(rank-remainder)*avgNum;
1680 upperIdx = lowerIdx+avgNum-1;
1684 int ParallelIOMgr::calcMyOutputProxyClients() {
1686 int myOutputProxyClients = 0;
1687 int myset = myOutputProxyRank / numOutputProcs;
1688 for(
int i=0; i<CkNumPes(); ++i) {
1689 if ( (i*numProxiesPerOutputProc)/CkNumPes() == myset &&
1691 ++myOutputProxyClients;
1694 return myOutputProxyClients;
1699 #ifdef MEM_OPT_VERSION 1700 if ( myOutputRank != -1 ) {
1701 int ready = midCM->receivePositions(msg);
1703 CProxy_CollectionMaster cm(mainMaster);
1704 cm.receiveOutputPosReady(msg->
seq);
1707 }
else if ( myOutputProxyRank != -1 ) {
1708 if ( ! myOutputProxyPositions ) {
1712 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receivePositions(newmsg);
1715 NAMD_bug(
"ParallelIOMgr::receivePositions on bad pe");
1722 #ifdef MEM_OPT_VERSION 1723 if ( myOutputRank != -1 ) {
1724 int ready = midCM->receiveVelocities(msg);
1726 CProxy_CollectionMaster cm(mainMaster);
1727 cm.receiveOutputVelReady(msg->
seq);
1730 }
else if ( myOutputProxyRank != -1 ) {
1731 if ( ! myOutputProxyVelocities ) {
1735 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveVelocities(newmsg);
1738 NAMD_bug(
"ParallelIOMgr::receiveVelocities on bad pe");
1745 #ifdef MEM_OPT_VERSION 1746 if ( myOutputRank != -1 ) {
1747 int ready = midCM->receiveForces(msg);
1749 CProxy_CollectionMaster cm(mainMaster);
1750 cm.receiveOutputForceReady(msg->
seq);
1753 }
else if ( myOutputProxyRank != -1 ) {
1754 if ( ! myOutputProxyForces ) {
1758 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveForces(newmsg);
1761 NAMD_bug(
"ParallelIOMgr::receiveForces on bad pe");
1769 #ifdef MEM_OPT_VERSION 1770 double iotime = CmiWallTimer();
1771 midCM->disposePositions(seq);
1772 iotime = CmiWallTimer()-iotime+prevT;
1774 #if OUTPUT_SINGLE_FILE 1776 if(myOutputRank == getMyOutputGroupHighestRank()) {
1778 CProxy_CollectionMaster cm(mainMaster);
1779 cm.startNextRoundOutputPos(iotime);
1781 CProxy_ParallelIOMgr io(thisgroup);
1782 io[outputProcArray[myOutputRank+1]].disposePositions(seq, iotime);
1786 CProxy_CollectionMaster cm(mainMaster);
1787 cm.startNextRoundOutputPos(iotime);
1795 #ifdef MEM_OPT_VERSION 1796 double iotime = CmiWallTimer();
1797 midCM->disposeVelocities(seq);
1798 iotime = CmiWallTimer()-iotime+prevT;
1800 #if OUTPUT_SINGLE_FILE 1802 if(myOutputRank==getMyOutputGroupHighestRank()) {
1804 CProxy_CollectionMaster cm(mainMaster);
1805 cm.startNextRoundOutputVel(iotime);
1807 CProxy_ParallelIOMgr io(thisgroup);
1808 io[outputProcArray[myOutputRank+1]].disposeVelocities(seq, iotime);
1812 CProxy_CollectionMaster cm(mainMaster);
1813 cm.startNextRoundOutputVel(iotime);
1821 #ifdef MEM_OPT_VERSION 1822 double iotime = CmiWallTimer();
1823 midCM->disposeForces(seq);
1824 iotime = CmiWallTimer()-iotime+prevT;
1826 #if OUTPUT_SINGLE_FILE 1828 if(myOutputRank==getMyOutputGroupHighestRank()) {
1830 CProxy_CollectionMaster cm(mainMaster);
1831 cm.startNextRoundOutputForce(iotime);
1833 CProxy_ParallelIOMgr io(thisgroup);
1834 io[outputProcArray[myOutputRank+1]].disposeForces(seq, iotime);
1838 CProxy_CollectionMaster cm(mainMaster);
1839 cm.startNextRoundOutputForce(iotime);
1848 #ifdef MEM_OPT_VERSION 1849 coorInstance = midCM->getReadyPositions(seq);
1851 coorInstance->lattice = lat;
1852 int fromAtomID = coorInstance->fromAtomID;
1853 int toAtomID = coorInstance->toAtomID;
1861 int dsize = data.
size();
1862 int numMyAtoms = toAtomID-fromAtomID+1;
1863 tmpCoorCon =
new Vector[numMyAtoms];
1866 for(
int i=0; i<numMyAtoms; i++){
1867 tmpCoorCon[i] = 0.0;
1868 int cid = clusterID[i];
1875 one.
dsum = fdata[i];
1879 remoteCoors.
add(one);
1882 ret->
dsum += fdata[i];
1884 ret->
dsum += data[i];
1888 tmpCoorCon[cid-fromAtomID] += fdata[i];
1890 tmpCoorCon[cid-fromAtomID] += data[i];
1896 CmiAssert(numRemoteClusters == remoteCoors.size());
1898 CProxy_ParallelIOMgr pIO(thisgroup);
1900 for(iter=iter.
begin(); iter!=iter.
end(); iter++){
1904 msg->
dsum = iter->dsum;
1905 int dstRank = atomRankOnOutput(iter->clusterId);
1906 pIO[outputProcArray[dstRank]].recvClusterCoor(msg);
1911 recvClusterCoor(NULL);
1922 if(msg!=NULL) ccmBuf.add(msg);
1925 if(++numReqRecved == (numRemoteReqs+1)){
1927 integrateClusterCoor();
1931 void ParallelIOMgr::integrateClusterCoor(){
1932 #ifdef MEM_OPT_VERSION 1933 int fromIdx = coorInstance->fromAtomID;
1934 int toIdx = coorInstance->toAtomID;
1935 for(
int i=0; i<ccmBuf.size(); i++){
1938 tmpCoorCon[lidx] += msg->
dsum;
1942 CProxy_ParallelIOMgr pIO(thisgroup);
1943 for(
int i=0; i<ccmBuf.size(); i++){
1946 if(simParameters->wrapAll ||
isWater[lidx]) {
1947 Lattice *lat = &(coorInstance->lattice);
1948 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1949 msg->
dsum = (simParameters->wrapNearest ?
1954 pIO[outputProcArray[msg->
srcRank]].recvFinalClusterCoor(msg);
1961 if(numRemoteClusters!=0){
1962 recvFinalClusterCoor(NULL);
1966 int numMyAtoms = toIdx-fromIdx+1;
1969 for(
int i=0; i<numMyAtoms; i++){
1970 if(!simParameters->wrapAll && !
isWater[i])
continue;
1971 int lidx = clusterID[i]-fromIdx;
1974 Lattice *lat = &(coorInstance->lattice);
1975 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1976 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
1979 if(data.
size()) data[i] += tmpCoorCon[lidx];
1981 if(fdata.
size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
1984 delete [] tmpCoorCon;
1986 CProxy_CollectionMaster cm(mainMaster);
1987 cm.wrapCoorFinished();
1993 #ifdef MEM_OPT_VERSION 2002 if(++numCSMAck == (numRemoteClusters+1)){
2004 int fromIdx = coorInstance->fromAtomID;
2005 int toIdx = coorInstance->toAtomID;
2006 int numMyAtoms = toIdx-fromIdx+1;
2010 for(
int i=0; i<numMyAtoms; i++){
2011 if(!simParameters->wrapAll && !
isWater[i])
continue;
2012 int cid = clusterID[i];
2013 int lidx = cid-fromIdx;
2018 if(data.
size()) data[i] += fone->
dsum;
2019 if(fdata.
size()) fdata[i] = fdata[i] + fone->
dsum;
2022 Lattice *lat = &(coorInstance->lattice);
2023 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
2024 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
2027 if(data.
size()) data[i] += tmpCoorCon[lidx];
2028 if(fdata.
size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
2032 delete [] tmpCoorCon;
2034 CProxy_CollectionMaster cm(mainMaster);
2035 cm.wrapCoorFinished();
2037 remoteCoors.clear();
2041 #include "ParallelIOMgr.def.h"
int64 numCalcFullExclusions
static CollectionMgr * Object()
unsigned short * fixedAtomsCntList
std::ostream & iINFO(std::ostream &s)
void sendAtomsToHomePatchProcs()
void flipNum(char *elem, int elemSize, int numElems)
PatchID assignToPatch(Position p, const Lattice &l)
#define COMPRESSED_PSF_MAGICNUM
void NAMD_err(const char *err_msg)
void initTmpPatchAtomsList()
static int * peCompactOrdering
void recvClusterSize(ClusterSizeMsg *msg)
static PatchMap * Object()
ResizeArray< CollectProxyVectorInstance * > data
TupleSignature * improperSigs
SimParameters * simParameters
void integrateClusterSize()
CollectVectorVarMsg * buildMsg()
void recvAtomsMGrp(MoveInputAtomsMsg *msg)
void createHomePatch(PatchID pid, FullAtomList &a)
TupleSignature * dihedralSigs
std::ostream & endi(std::ostream &s)
void disposePositions(int seq, double prevT)
std::ostream & iWARN(std::ostream &s)
TupleSignature * crosstermSigs
int add(const Elem &elem)
void wrapCoor(int seq, Lattice lat)
UniqueSetIter< T > begin(void) const
void receiveForces(CollectVectorVarMsg *msg)
CollectVectorVarMsg::DataStatus vstatus
void recvAtomsToHomePatchProcs(MovePatchAtomsMsg *msg)
void integrateMigratedAtoms()
void reorder(Elem *a, int n)
CkChareID getMasterChareID()
TupleSignature * gromacsPairSigs
void disposeForces(int seq, double prevT)
int numPatches(void) const
void recvClusterCoor(ClusterCoorMsg *msg)
int append(CollectVectorVarMsg *msg)
void NAMD_bug(const char *err_msg)
void call_sendAtomsToHomePatchProcs(void *arg)
void recvAtomsCntPerPatch(AtomsCntPerPatchMsg *msg)
void recvHydroBasedCounter(HydroBasedMsg *msg)
void recvFinalClusterCoor(ClusterCoorMsg *msg)
std::vector< int > * getTmpPatchAtomsList()
TupleSignature * bondSigs
void NAMD_die(const char *err_msg)
UniqueSetIter< T > end(void) const
CollectProxyVectorSequence(int nc)
void ackAtomsToHomePatchProcs()
static int * peDiffuseOrdering
void initialize(Node *node)
CollectProxyVectorInstance()
static int * peCompactOrderingIndex
WorkDistrib * workDistrib
void recvFinalClusterSize(ClusterSizeMsg *msg)
ResizeArray< FloatVector > fdata
NAMD_HOST_DEVICE Vector wrap_nearest_delta(Position pos1) const
void disposeVelocities(int seq, double prevT)
void calcAtomsInEachPatch()
unsigned short * atomsCntList
bool isOutputProcessor(int pe)
int numPatchesOnNode(int node)
ResizeArray< Vector > data
void delTmpPatchAtomsList()
void bcastHydroBasedCounter(HydroBasedMsg *msg)
void receiveVelocities(CollectVectorVarMsg *msg)
void recvMolInfo(MolInfoMsg *msg)
int isOutputProcessor(int pe)
NAMD_HOST_DEVICE Vector wrap_delta(const Position &pos1) const
void bcastMolInfo(MolInfoMsg *msg)
TupleSignature * angleSigs
void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients)
CollectVectorVarMsg * submitData(CollectVectorVarMsg *msg)
void receivePositions(CollectVectorVarMsg *msg)
HashPool< AtomSigInfo > atomSigPool