20 #include "ParallelIOMgr.decl.h"
41 if ( s == -10 )
NAMD_bug(
"seq == free in CollectionMgr");
44 remaining = numClients;
53 if ( msg->
status != vstatus ) {
54 NAMD_bug(
"CollectProxyVectorInstance vstatus mismatch");
56 if ( msg->
seq != seq ) {
57 NAMD_bug(
"CollectProxyVectorInstance seq mismatch");
60 for(
int i = 0; i < size; ++i ) { aid.add(msg->
aid[i]); }
63 for(
int i = 0; i < size; ++i ) { data.add(msg->
data[i]); }
67 for(
int i = 0; i < size; ++i ) { fdata.add(msg->
fdata[i]); }
69 const int atoms_per_message_target = 100000;
70 return ( ! --remaining || aid.size() > atoms_per_message_target );
74 int numAtoms = aid.
size();
80 for(
int j=0; j<numAtoms; j++) {
82 msg->
data[j] = data[j];
86 for(
int j=0; j<numAtoms; j++) {
88 msg->
fdata[j] = fdata[j];
92 for(
int j=0; j<numAtoms; j++) {
94 msg->
data[j] = data[j];
95 msg->
fdata[j] = fdata[j];
100 msg->
size = numAtoms;
103 if ( remaining ) reset(seq,vstatus,remaining);
126 for( ; c != c_e && (*c)->
seq != msg->
seq; ++c );
129 for( ; c != c_e && (*c)->
notfree(); ++c );
136 if ( (*c)->append(msg) ) {
154 CkpvAccess(BOCclass_group).ioMgr = thisgroup;
157 inputProcArray = NULL;
159 outputProcArray = NULL;
164 totalMV.x = totalMV.y = totalMV.z = 0.0;
167 numTotalExclusions = 0;
168 numCalcExclusions = 0;
169 numCalcFullExclusions = 0;
171 isOKToRecvHPAtoms =
false;
177 #ifdef MEM_OPT_VERSION
188 #if COLLECT_PERFORMANCE_DATA
189 numFixedAtomLookup = 0;
195 delete [] inputProcArray;
196 delete [] outputProcArray;
198 delete [] clusterSize;
200 #ifdef MEM_OPT_VERSION
207 #ifndef OUTPUT_SINGLE_FILE
208 #error OUTPUT_SINGLE_FILE not defined!
217 numInputProcs = simParameters->numinputprocs;
218 numOutputProcs = simParameters->numoutputprocs;
219 numOutputWrts = simParameters->numoutputwrts;
221 numProxiesPerOutputProc = std::min((
int)sqrt(CkNumPes()),(CkNumPes()-1)/numOutputProcs-1);
222 if ( numProxiesPerOutputProc < 2 ) numProxiesPerOutputProc = 0;
225 iout <<
iINFO <<
"Running with " <<numInputProcs<<
" input processors.\n"<<
endi;
226 #if OUTPUT_SINGLE_FILE
227 iout <<
iINFO <<
"Running with " <<numOutputProcs<<
" output processors ("<<numOutputWrts<<
" of them will output simultaneously).\n"<<
endi;
229 iout <<
iINFO <<
"Running with " <<numOutputProcs<<
" output processors, and each of them will output to its own separate file.\n"<<
endi;
231 if ( numProxiesPerOutputProc ) {
232 iout <<
iINFO <<
"Running with " <<numProxiesPerOutputProc<<
" proxies per output processor.\n"<<
endi;
238 inputProcArray =
new int[numInputProcs];
240 for(
int i=0; i<numInputProcs; ++i) {
243 std::sort(inputProcArray, inputProcArray+numInputProcs);
244 for(
int i=0; i<numInputProcs; ++i) {
245 if ( CkMyPe() == inputProcArray[i] ) {
246 if ( myInputRank != -1 )
NAMD_bug(
"Duplicate input proc");
252 iout <<
iINFO <<
"INPUT PROC LOCATIONS:";
254 for ( i=0; i<numInputProcs && i < 10; ++i ) {
255 iout <<
" " << inputProcArray[i];
257 if ( i<numInputProcs )
iout <<
" ... " << inputProcArray[numInputProcs-1];
262 if(myInputRank!=-1) {
265 int numMyAtoms = numInitMyAtomsOnInput();
266 initAtoms.resize(numMyAtoms+100);
267 initAtoms.resize(numMyAtoms);
268 tmpRecvAtoms.resize(0);
271 tmpRecvAtoms.resize(0);
278 outputProcArray =
new int[numOutputProcs];
279 outputProcFlags =
new char[CkNumPes()];
280 outputProxyArray =
new int[numOutputProcs*numProxiesPerOutputProc];
281 myOutputProxies =
new int[numOutputProcs];
283 myOutputProxyRank = -1;
284 for(
int i=0; i<numOutputProcs; ++i) {
287 std::sort(outputProcArray, outputProcArray+numOutputProcs);
288 for(
int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
291 std::sort(outputProxyArray, outputProxyArray+numOutputProcs*numProxiesPerOutputProc,
293 for(
int i=0; i<CkNumPes(); ++i) {
294 outputProcFlags[i] = 0;
296 for(
int i=0; i<numOutputProcs; ++i) {
297 outputProcFlags[outputProcArray[i]] = 1;
298 if ( CkMyPe() == outputProcArray[i] ) {
299 if ( myOutputRank != -1 )
NAMD_bug(
"Duplicate output proc");
303 for(
int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
304 if ( CkMyPe() == outputProxyArray[i] ) {
305 if ( myOutputRank != -1 )
NAMD_bug(
"Output proxy is also output proc");
306 if ( myOutputProxyRank != -1 )
NAMD_bug(
"Duplicate output proxy");
307 myOutputProxyRank = i;
311 for(
int i=0; i<numOutputProcs; ++i) {
312 if ( numProxiesPerOutputProc ) {
313 myOutputProxies[i] = outputProxyArray[myProxySet*numOutputProcs+i];
315 myOutputProxies[i] = outputProcArray[i];
320 myOutputProxyPositions = 0;
321 myOutputProxyVelocities = 0;
322 myOutputProxyForces = 0;
325 iout <<
iINFO <<
"OUTPUT PROC LOCATIONS:";
327 for ( i=0; i<numOutputProcs && i < 10; ++i ) {
328 iout <<
" " << outputProcArray[i];
330 if ( i<numOutputProcs )
iout <<
" ... " << outputProcArray[numOutputProcs-1];
335 #ifdef MEM_OPT_VERSION
336 if(myOutputRank!=-1) {
337 midCM =
new CollectionMidMaster(
this);
339 remoteClusters.clear();
349 return outputProcFlags[pe];
353 return CProxy_ParallelIOMgr::ckLocalBranch(CkpvAccess(BOCclass_group).ioMgr)->isOutputProcessor(pe);
360 #ifdef MEM_OPT_VERSION
361 if(myInputRank!=-1) {
362 int myAtomLIdx, myAtomUIdx;
363 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
366 molecule->read_binary_atom_info(myAtomLIdx, myAtomUIdx, initAtoms);
372 readCoordinatesAndVelocity();
376 int oRank=atomRankOnOutput(myAtomLIdx);
377 for(
int i=oRank; i<numOutputProcs; i++) {
379 getAtomsRangeOnOutput(lIdx, uIdx, i);
380 if(lIdx > myAtomUIdx)
break;
381 int fid = lIdx>myAtomLIdx?lIdx:myAtomLIdx;
382 int tid = uIdx>myAtomUIdx?myAtomUIdx:uIdx;
383 for(
int j=fid; j<=tid; j++) initAtoms[j-myAtomLIdx].outputRank = i;
388 if(myOutputRank!=-1) {
390 if(!(simParameters->wrapAll || simParameters->wrapWater))
return;
391 readInfoForParOutput();
396 void ParallelIOMgr::readCoordinatesAndVelocity()
398 #ifdef MEM_OPT_VERSION
400 int myAtomLIdx, myAtomUIdx;
401 getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
402 int myNumAtoms = myAtomUIdx-myAtomLIdx+1;
409 FILE *ifp = fopen(simParameters->binCoorFile,
"rb");
412 sprintf(s,
"The binary coordinate file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
417 fread(&filelen,
sizeof(
int32),1,ifp);
418 char lenbuf[
sizeof(
int32)];
419 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
421 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
422 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binCoorFile
423 <<
" is palindromic, assuming same endian.\n" <<
endi;
425 if(filelen!=molecule->numAtoms) {
427 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
429 if(filelen!=molecule->numAtoms) {
431 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binCoorFile);
437 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
439 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
443 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
446 size_t totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
447 if(totalRead!=myNumAtoms) {
449 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
452 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
454 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].position = tmpData[i];
458 if(!simParameters->binVelFile) {
462 ifp = fopen(simParameters->binVelFile,
"rb");
465 sprintf(s,
"The binary velocity file %s cannot be opened on proc %d\n", simParameters->binVelFile, CkMyPe());
469 fread(&filelen,
sizeof(
int32),1,ifp);
470 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
472 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
473 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binVelFile
474 <<
" is palindromic, assuming same endian.\n" <<
endi;
476 if(filelen!=molecule->numAtoms) {
478 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
480 if(filelen!=molecule->numAtoms) {
482 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binVelFile);
489 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
491 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
495 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
498 totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
499 if(totalRead!=myNumAtoms) {
501 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
504 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
506 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].velocity = tmpData[i];
511 if(!simParameters->binRefFile) {
512 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = initAtoms[i].position;
514 ifp = fopen(simParameters->binRefFile,
"rb");
517 sprintf(s,
"The binary reference coordinate file %s cannot be opened on proc %d\n", simParameters->binRefFile, CkMyPe());
521 fread(&filelen,
sizeof(
int32),1,ifp);
522 memcpy(lenbuf, (
const char *)&filelen,
sizeof(
int32));
524 if(!memcmp(lenbuf, (
const char *)&filelen,
sizeof(
int32))) {
525 iout <<
iWARN <<
"Number of atoms in binary file " << simParameters->binRefFile
526 <<
" is palindromic, assuming same endian.\n" <<
endi;
528 if(filelen!=molecule->numAtoms) {
530 memcpy((
void *)&filelen, lenbuf,
sizeof(
int32));
532 if(filelen!=molecule->numAtoms) {
534 sprintf(s,
"Incorrect atom count in binary file %s", simParameters->binRefFile);
541 if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
543 if ( fseeko(ifp, offsetPos, SEEK_CUR) )
547 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
550 totalRead = fread(tmpData,
sizeof(
Vector), myNumAtoms, ifp);
551 if(totalRead!=myNumAtoms) {
553 sprintf(s,
"Error in reading binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
556 if(needFlip)
flipNum((
char *)tmpData,
sizeof(
BigReal), myNumAtoms*3);
558 for(
int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = tmpData[i];
565 void ParallelIOMgr::readInfoForParOutput()
568 getMyAtomsRangeOnOutput(fromIdx,toIdx);
569 int numMyAtoms = toIdx-fromIdx+1;
571 clusterID =
new int[numMyAtoms];
572 clusterSize =
new int[numMyAtoms];
576 FILE *ifp = fopen(simParameters->binAtomFile,
"rb");
580 fread(&magicNum,
sizeof(
int), 1, ifp);
586 isWater =
new char[numMyAtoms];
588 int64 offset =
sizeof(char)*((
int64)(fromIdx-molecule->numAtoms));
590 if ( _fseeki64(ifp, offset, SEEK_END) )
592 if ( fseeko(ifp, offset, SEEK_END) )
596 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
599 fread(isWater,
sizeof(
char), numMyAtoms, ifp);
603 offset =
sizeof(int)*((
int64)(fromIdx-molecule->numAtoms))
604 -
sizeof(char)*((
int64)(molecule->numAtoms));
606 if ( _fseeki64(ifp, offset, SEEK_END) )
608 if ( fseeko(ifp, offset, SEEK_END) )
612 sprintf(s,
"Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
615 fread(clusterID,
sizeof(
int), numMyAtoms, ifp);
616 if(needFlip)
flipNum((
char *)clusterID,
sizeof(
int), numMyAtoms);
621 for(
int i=0; i<numMyAtoms; i++) {
623 int cid = clusterID[i];
629 CmiAssert(cid<=toIdx);
636 remoteClusters.add(one);
641 int lidx = cid-fromIdx;
651 printf(
"output[%d]=%d: prepare to send %d remote msgs for cluster size\n",
652 myOutputRank, CkMyPe(), remoteClusters.size());
655 numRemoteClusters = remoteClusters.size();
657 CProxy_ParallelIOMgr pIO(thisgroup);
659 for(iter=iter.begin(); iter!=iter.end(); iter++) {
664 int dstRank = atomRankOnOutput(iter->clusterId);
665 pIO[outputProcArray[dstRank]].recvClusterSize(msg);
679 if(myOutputRank==-1)
return;
680 if(!(simParameters->wrapAll || simParameters->wrapWater))
return;
683 getMyAtomsRangeOnOutput(fromIdx,toIdx);
686 for(
int i=0; i<csmBuf.size(); i++) {
692 CProxy_ParallelIOMgr pIO(thisgroup);
693 for(
int i=0; i<csmBuf.size(); i++) {
697 pIO[outputProcArray[msg->
srcRank]].recvFinalClusterSize(msg);
699 numRemoteReqs = csmBuf.size();
707 if(numRemoteClusters!=0){
708 recvFinalClusterSize(NULL);
711 int numMyAtoms = toIdx-fromIdx+1;
712 for(
int i=0; i<numMyAtoms; i++) {
713 int lidx = clusterID[i]-fromIdx;
714 clusterSize[i] = clusterSize[lidx];
717 #if 0 //write out cluster debug info
719 sprintf(fname,
"cluster.par.%d", CkMyPe());
720 FILE *ofp = fopen(fname,
"w");
721 for(
int i=0; i<numMyAtoms; i++) {
722 fprintf(ofp,
"%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
736 CmiAssert(ret!=NULL);
742 if(++numCSMAck == (numRemoteClusters+1)) {
745 getMyAtomsRangeOnOutput(fromIdx,toIdx);
746 int numMyAtoms = toIdx-fromIdx+1;
748 for(
int i=0; i<numMyAtoms; i++) {
749 int cid = clusterID[i];
750 int lidx = cid-fromIdx;
757 clusterSize[i] = clusterSize[lidx];
761 remoteClusters.clear();
763 #if 0 //write out cluster debug info
765 sprintf(fname,
"cluster.par.%d", CkMyPe());
766 FILE *ofp = fopen(fname,
"w");
767 for(
int i=0; i<numMyAtoms; i++) {
768 fprintf(ofp,
"%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
778 if(myInputRank==-1)
return;
786 for(
int i=0; i<initAtoms.size(); i++) {
788 int parentRank = atomInitRankOnInput(initAtoms[i].MPID);
789 if(parentRank != myInputRank) {
790 toMigrateList.
add(i);
791 initAtoms[i].isValid =
false;
792 int tmp = parentRank - myInputRank;
793 tmp = tmp>0 ? tmp : -tmp;
794 if(tmp > maxOffset) maxOffset = tmp;
803 for(
int i=0; i<toMigrateList.
size(); i++) {
804 int idx = toMigrateList[i];
805 int parentRank = atomInitRankOnInput(initAtoms[idx].MPID);
807 int offset = parentRank - myInputRank + maxOffset;
808 migLists[offset].
add(initAtoms[idx]);
811 CProxy_ParallelIOMgr pIO(thisgroup);
812 for(
int i=0; i<2*maxOffset+1; i++) {
813 int migLen = migLists[i].
size();
818 int destRank = i-maxOffset+myInputRank;
819 pIO[inputProcArray[destRank]].recvAtomsMGrp(msg);
824 toMigrateList.
clear();
830 for(
int i=0; i<msg->
length; i++) {
831 tmpRecvAtoms.add((msg->
atomList)[i]);
838 if(myInputRank==-1)
return;
840 for(
int i=0; i<tmpRecvAtoms.size(); i++) {
841 tmpRecvAtoms[i].isValid =
true;
842 initAtoms.add(tmpRecvAtoms[i]);
844 tmpRecvAtoms.clear();
847 std::sort(initAtoms.begin(), initAtoms.end());
852 int numFixedRigidBonds = 0;
853 if(molecule->numRigidBonds){
854 int parentIsFixed = 0;
855 for(
int i=0; i<initAtoms.size(); i++) {
864 numFixedRigidBonds++;
869 numFixedRigidBonds++;
875 int numFixedGroups = 0;
876 if(molecule->numFixedAtoms){
877 for(
int i=0; i<initAtoms.size();) {
890 if(allFixed) numFixedGroups++;
896 CProxy_ParallelIOMgr pIO(thisgroup);
900 pIO[0].recvHydroBasedCounter(msg);
905 #ifdef MEM_OPT_VERSION
906 if(myInputRank==-1)
return;
908 CProxy_ParallelIOMgr pIO(thisgroup);
928 for(
int i=0; i<initAtoms.size(); i++) {
950 if(molecule->numFixedAtoms>0 && ! simParameters->fixedAtomsForces) {
956 int sAId = initAtoms[0].id;
958 for(
int i=0; i<initAtoms.size(); i++) {
961 int myAId = initAtoms[i].id;
964 if(!initAtoms[i].atomFixed) {
976 for(
int j=0; j<thisSig->
bondCnt; j++) {
978 int a1 = myAId + bsig->
offset[0];
983 for(
int j=0; j<thisSig->
angleCnt; j++) {
985 int a1 = myAId + bsig->
offset[0];
986 int a2 = myAId + bsig->
offset[1];
987 if(!isAtomFixed(sAId, a1) || !isAtomFixed(sAId, a2))
994 int a1 = myAId + bsig->
offset[0];
995 int a2 = myAId + bsig->
offset[1];
996 int a3 = myAId + bsig->
offset[2];
997 if(!isAtomFixed(sAId, a1) ||
998 !isAtomFixed(sAId, a2) ||
999 !isAtomFixed(sAId, a3))
1006 int a1 = myAId + bsig->
offset[0];
1007 int a2 = myAId + bsig->
offset[1];
1008 int a3 = myAId + bsig->
offset[2];
1009 if(!isAtomFixed(sAId, a1) ||
1010 !isAtomFixed(sAId, a2) ||
1011 !isAtomFixed(sAId, a3))
1018 int a1 = myAId + bsig->
offset[0];
1019 int a2 = myAId + bsig->
offset[1];
1020 int a3 = myAId + bsig->
offset[2];
1021 int a4 = myAId + bsig->
offset[3];
1022 int a5 = myAId + bsig->
offset[4];
1023 int a6 = myAId + bsig->
offset[5];
1024 int a7 = myAId + bsig->
offset[6];
1026 if(!isAtomFixed(sAId, a1) ||
1027 !isAtomFixed(sAId, a2) ||
1028 !isAtomFixed(sAId, a3) ||
1029 !isAtomFixed(sAId, a4) ||
1030 !isAtomFixed(sAId, a5) ||
1031 !isAtomFixed(sAId, a6) ||
1032 !isAtomFixed(sAId, a7))
1043 int thisAId = exclSig->
modOffset[j]+myAId;
1050 int a1 = myAId + bsig->
offset[0];
1051 int a2 = myAId + bsig->
offset[1];
1052 if(!isAtomFixed(sAId, a1) ||
1053 !isAtomFixed(sAId, a2))
1057 #if COLLECT_PERFORMANCE_DATA
1058 printf(
"Num fixedAtom lookup on proc %d is %d\n", CkMyPe(), numFixedAtomLookup);
1072 if(!simParameters->comMove) {
1080 for (
int i=0; i<initAtoms.size(); i++) {
1081 msg->
totalMV += initAtoms[i].mass * initAtoms[i].velocity;
1086 pIO[0].recvMolInfo(msg);
1093 molecule->numBonds += msg->
numBonds;
1111 if(!simParameters->comMove) {
1115 if(++procsReceived == numInputProcs) {
1117 msg->
numBonds = molecule->numBonds;
1135 if(!simParameters->comMove) {
1139 CProxy_ParallelIOMgr pIO(thisgroup);
1140 pIO.bcastMolInfo(msg);
1149 #ifdef MEM_OPT_VERSION
1150 if(myInputRank!=-1) {
1151 if(!simParameters->comMove) {
1154 for (
int i=0; i<initAtoms.size(); i++) initAtoms[i].velocity -= val;
1183 iout <<
iINFO <<
"LOADED " << molecule->numTotalExclusions <<
" TOTAL EXCLUSIONS\n" <<
endi;
1184 if(!simParameters->comMove) {
1185 iout <<
iINFO <<
"REMOVING COM VELOCITY "
1197 if(++hydroMsgRecved == numInputProcs){
1200 CProxy_ParallelIOMgr pIO(thisgroup);
1201 pIO.bcastHydroBasedCounter(msg);
1207 #ifdef MEM_OPT_VERSION
1218 iout <<
iINFO <<
"****************************\n";
1219 iout << iINFO <<
"STRUCTURE SUMMARY:\n";
1220 iout << iINFO << molecule->numAtoms <<
" ATOMS\n";
1221 iout << iINFO << molecule->numBonds <<
" BONDS\n";
1222 iout << iINFO << molecule->numAngles <<
" ANGLES\n";
1223 iout << iINFO << molecule->numDihedrals <<
" DIHEDRALS\n";
1224 iout << iINFO << molecule->numImpropers <<
" IMPROPERS\n";
1225 iout << iINFO << molecule->numCrossterms <<
" CROSSTERMS\n";
1226 iout << iINFO << molecule->numExclusions <<
" EXCLUSIONS\n";
1229 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeXplorOn)){
1230 iout << iINFO << molecule->numMultipleDihedrals
1231 <<
" DIHEDRALS WITH MULTIPLE PERIODICITY (BASED ON PSF FILE)\n";
1233 if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeCharmmOn)){
1234 iout << iINFO << molecule->numMultipleDihedrals
1235 <<
" DIHEDRALS WITH MULTIPLE PERIODICITY IGNORED (BASED ON PSF FILE) \n";
1237 <<
" CHARMM MULTIPLICITIES BASED ON PARAMETER FILE INFO! \n";
1241 if (molecule->numMultipleImpropers){
1242 iout << iINFO << molecule->numMultipleImpropers
1243 <<
" IMPROPERS WITH MULTIPLE PERIODICITY\n";
1246 if (simParameters->fixedAtomsOn)
1247 iout << iINFO << molecule->numFixedAtoms <<
" FIXED ATOMS\n";
1250 if (simParameters->rigidBonds)
1251 iout << iINFO << molecule->numRigidBonds <<
" RIGID BONDS\n";
1253 if (simParameters->fixedAtomsOn && simParameters->rigidBonds)
1254 iout << iINFO << molecule->numFixedRigidBonds <<
1255 " RIGID BONDS BETWEEN FIXED ATOMS\n";
1257 iout << iINFO << molecule->num_deg_freedom(1)
1258 <<
" DEGREES OF FREEDOM\n";
1260 iout << iINFO << molecule->numHydrogenGroups <<
" HYDROGEN GROUPS\n";
1261 iout << iINFO << molecule->maxHydrogenGroupSize
1262 <<
" ATOMS IN LARGEST HYDROGEN GROUP\n";
1263 iout << iINFO << molecule->numMigrationGroups <<
" MIGRATION GROUPS\n";
1264 iout << iINFO << molecule->maxMigrationGroupSize
1265 <<
" ATOMS IN LARGEST MIGRATION GROUP\n";
1266 if (simParameters->fixedAtomsOn)
1268 iout << iINFO << molecule->numFixedGroups <<
1269 " HYDROGEN GROUPS WITH ALL ATOMS FIXED\n";
1272 iout << iINFO <<
"TOTAL MASS = " << totalMass <<
" amu\n";
1273 iout << iINFO <<
"TOTAL CHARGE = " << totalCharge <<
" e\n";
1275 BigReal volume = simParameters->lattice.volume();
1277 iout << iINFO <<
"MASS DENSITY = "
1278 << ((totalMass/volume) / 0.6022) <<
" g/cm^3\n";
1279 iout << iINFO <<
"ATOM DENSITY = "
1280 << (molecule->numAtoms/volume) <<
" atoms/A^3\n";
1283 iout << iINFO <<
"*****************************\n";
1292 if(myInputRank==-1)
return;
1302 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1303 PatchMgr *patchMgr = pm.ckLocalBranch();
1306 const Lattice lattice = simParameters->lattice;
1307 for(
int i=0; i<initAtoms.size(); i++) {
1313 eachPatchAtomList[pid].push_back(i);
1316 CProxy_ParallelIOMgr pIO(thisgroup);
1320 int cursize = eachPatchAtomList[i].size();
1321 if(cursize>0) patchCnt++;
1325 if(simParameters->fixedAtomsOn) {
1334 int cursize = eachPatchAtomList[i].size();
1336 if ( cursize > USHRT_MAX ) {
1338 sprintf(errstr,
"Patch %d exceeds %d atoms.", i, USHRT_MAX);
1347 if(simParameters->fixedAtomsOn) {
1350 int cursize = eachPatchAtomList[i].size();
1353 for(
int j=0; j<cursize; j++) {
1354 int aid = eachPatchAtomList[i][j];
1356 fixedCnt += initAtoms[aid].atomFixed;
1364 pIO[0].recvAtomsCntPerPatch(msg);
1370 #ifdef MEM_OPT_VERSION
1372 for(
int i=0; i<msg->
length; i++) {
1374 int oldNum = patchMap->numAtoms(pid);
1377 sprintf(errstr,
"Patch %d exceeds %d atoms.", pid, USHRT_MAX);
1380 patchMap->setNumAtoms(pid, oldNum+msg->
atomsCntList[i]);
1381 if(simParameters->fixedAtomsOn) {
1382 oldNum = patchMap->numFixedAtoms(pid);
1388 if(++procsReceived == numInputProcs) {
1393 for(
int i=0; i<patchMap->
numPatches(); i++) {
1394 int cnt = patchMap->numAtoms(i);
1402 iout <<
iINFO <<
"LARGEST PATCH (" << maxPatch <<
1403 ") HAS " << maxAtoms <<
" ATOMS\n" <<
endi;
1404 if ( totalAtoms !=
Node::Object()->molecule->numAtoms ) {
1406 sprintf(errstr,
"Incorrect atom count in void ParallelIOMgr::recvAtomsCntPerPatch: %d vs %d", totalAtoms,
Node::Object()->molecule->numAtoms);
1420 #ifdef MEM_OPT_VERSION
1421 if(myInputRank==-1)
return;
1423 if ( sendAtomsThread == 0 ) {
1425 CthAwaken(sendAtomsThread);
1428 sendAtomsThread = 0;
1429 numAcksOutstanding = 0;
1440 if(eachPatchAtomList[i].size()==0)
continue;
1441 int onPE = patchMap->
node(i);
1442 if ( procList[onPE].size() == 0 ) pesToSend.
add(onPE);
1443 procList[onPE].
add(i);
1451 CProxy_ParallelIOMgr pIO(thisgroup);
1452 for(
int k=0; k<pesToSend.
size(); k++) {
1453 const int i = pesToSend[k];
1454 int len = procList[i].
size();
1455 if(len==0)
continue;
1459 for(
int j=0; j<len; j++) {
1460 int pid = procList[i][j];
1461 int atomCnt = eachPatchAtomList[pid].
size();
1463 if ( numAcksOutstanding >= 10 ) {
1464 sendAtomsThread = CthSelf();
1467 ++numAcksOutstanding;
1470 msg->
from = CkMyPe();
1475 for(
int k=0; k<atomCnt; k++, atomIdx++) {
1476 int aid = eachPatchAtomList[pid][k];
1480 one.hydVal = initAtoms[aid].hydList;
1483 pIO[i].recvAtomsToHomePatchProcs(msg);
1486 procList[i].
clear();
1500 --numAcksOutstanding;
1501 if ( sendAtomsThread ) {
1502 CthAwaken(sendAtomsThread);
1503 sendAtomsThread = 0;
1509 CProxy_ParallelIOMgr pIO(thisgroup);
1510 pIO[msg->
from].ackAtomsToHomePatchProcs();
1512 if(!isOKToRecvHPAtoms) {
1513 prepareHomePatchAtomList();
1514 isOKToRecvHPAtoms =
true;
1517 int numRecvPatches = msg->
patchCnt;
1519 for(
int i=0; i<numRecvPatches; i++) {
1522 int idx = binaryFindHPID(pid);
1523 for(
int j=0; j<size; j++, aid++) {
1524 hpAtomsList[idx].add(msg->
allAtoms[aid]);
1531 void ParallelIOMgr::prepareHomePatchAtomList()
1534 for(
int i=0; i<patchMap->
numPatches(); i++) {
1535 if(patchMap->
node(i)==CkMyPe()) {
1539 if(hpIDList.size()>0)
1543 int ParallelIOMgr::binaryFindHPID(
int pid)
1550 lIdx=hpIDList.size()-1;
1552 while(rIdx<=lIdx ) {
1553 int idx = (rIdx+lIdx)/2;
1554 int curPid = hpIDList[idx];
1558 }
else if(pid<curPid) {
1567 CmiAssert(retIdx!=-1);
1573 #ifdef MEM_OPT_VERSION
1576 int numPids = hpIDList.size();
1579 if(assignedPids == 0)
return;
1584 CmiAssert(isOKToRecvHPAtoms ==
false);
1586 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1587 PatchMgr *patchMgr = pm.ckLocalBranch();
1588 for(
int i=0; i<patchMap->
numPatches(); i++) {
1589 if(patchMap->
node(i)==CkMyPe()) {
1597 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1598 PatchMgr *patchMgr = pm.ckLocalBranch();
1601 for(
int i=0; i<numPids; i++) {
1602 int pid = hpIDList[i];
1605 std::sort(hpAtomsList[i].begin(), hpAtomsList[i].end());
1611 delete [] hpAtomsList;
1619 #ifdef MEM_OPT_VERSION
1620 molecule->delAtomNames();
1621 molecule->delChargeSpace();
1625 if(!CkMyPe() && !simParameters->freeEnergyOn)
1626 molecule->delMassSpace();
1628 molecule->delFixedAtoms();
1636 int ParallelIOMgr::numMyAtoms(
int rank,
int numProcs)
1638 if(rank==-1)
return -1;
1639 int avgNum = molecule->numAtoms/numProcs;
1640 int remainder = molecule->numAtoms%numProcs;
1641 if(rank<remainder)
return avgNum+1;
1645 int ParallelIOMgr::atomRank(
int atomID,
int numProcs)
1647 int avgNum = molecule->numAtoms/numProcs;
1648 int remainder = molecule->numAtoms%numProcs;
1649 int midLimit = remainder*(avgNum+1);
1651 if(atomID<midLimit) {
1652 idx = atomID/(avgNum+1);
1654 idx = remainder+(atomID-midLimit)/avgNum;
1659 void ParallelIOMgr::getMyAtomsRange(
int &lowerIdx,
int &upperIdx,
int rank,
int numProcs)
1667 int avgNum = molecule->numAtoms/numProcs;
1668 int remainder = molecule->numAtoms%numProcs;
1669 if(rank<remainder) {
1670 lowerIdx = rank*(avgNum+1);
1671 upperIdx = lowerIdx+avgNum;
1673 int midLimit = remainder*(avgNum+1);
1674 lowerIdx = midLimit+(rank-remainder)*avgNum;
1675 upperIdx = lowerIdx+avgNum-1;
1679 int ParallelIOMgr::calcMyOutputProxyClients() {
1681 int myOutputProxyClients = 0;
1682 int myset = myOutputProxyRank / numOutputProcs;
1683 for(
int i=0; i<CkNumPes(); ++i) {
1684 if ( (i*numProxiesPerOutputProc)/CkNumPes() == myset &&
1686 ++myOutputProxyClients;
1689 return myOutputProxyClients;
1694 #ifdef MEM_OPT_VERSION
1695 if ( myOutputRank != -1 ) {
1696 int ready = midCM->receivePositions(msg);
1698 CProxy_CollectionMaster cm(mainMaster);
1699 cm.receiveOutputPosReady(msg->
seq);
1702 }
else if ( myOutputProxyRank != -1 ) {
1703 if ( ! myOutputProxyPositions ) {
1707 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receivePositions(newmsg);
1710 NAMD_bug(
"ParallelIOMgr::receivePositions on bad pe");
1717 #ifdef MEM_OPT_VERSION
1718 if ( myOutputRank != -1 ) {
1719 int ready = midCM->receiveVelocities(msg);
1721 CProxy_CollectionMaster cm(mainMaster);
1722 cm.receiveOutputVelReady(msg->
seq);
1725 }
else if ( myOutputProxyRank != -1 ) {
1726 if ( ! myOutputProxyVelocities ) {
1730 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveVelocities(newmsg);
1733 NAMD_bug(
"ParallelIOMgr::receiveVelocities on bad pe");
1740 #ifdef MEM_OPT_VERSION
1741 if ( myOutputRank != -1 ) {
1742 int ready = midCM->receiveForces(msg);
1744 CProxy_CollectionMaster cm(mainMaster);
1745 cm.receiveOutputForceReady(msg->
seq);
1748 }
else if ( myOutputProxyRank != -1 ) {
1749 if ( ! myOutputProxyForces ) {
1753 if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveForces(newmsg);
1756 NAMD_bug(
"ParallelIOMgr::receiveForces on bad pe");
1764 #ifdef MEM_OPT_VERSION
1765 double iotime = CmiWallTimer();
1766 midCM->disposePositions(seq);
1767 iotime = CmiWallTimer()-iotime+prevT;
1769 #if OUTPUT_SINGLE_FILE
1771 if(myOutputRank == getMyOutputGroupHighestRank()) {
1773 CProxy_CollectionMaster cm(mainMaster);
1774 cm.startNextRoundOutputPos(iotime);
1776 CProxy_ParallelIOMgr io(thisgroup);
1777 io[outputProcArray[myOutputRank+1]].disposePositions(seq, iotime);
1781 CProxy_CollectionMaster cm(mainMaster);
1782 cm.startNextRoundOutputPos(iotime);
1790 #ifdef MEM_OPT_VERSION
1791 double iotime = CmiWallTimer();
1792 midCM->disposeVelocities(seq);
1793 iotime = CmiWallTimer()-iotime+prevT;
1795 #if OUTPUT_SINGLE_FILE
1797 if(myOutputRank==getMyOutputGroupHighestRank()) {
1799 CProxy_CollectionMaster cm(mainMaster);
1800 cm.startNextRoundOutputVel(iotime);
1802 CProxy_ParallelIOMgr io(thisgroup);
1803 io[outputProcArray[myOutputRank+1]].disposeVelocities(seq, iotime);
1807 CProxy_CollectionMaster cm(mainMaster);
1808 cm.startNextRoundOutputVel(iotime);
1816 #ifdef MEM_OPT_VERSION
1817 double iotime = CmiWallTimer();
1818 midCM->disposeForces(seq);
1819 iotime = CmiWallTimer()-iotime+prevT;
1821 #if OUTPUT_SINGLE_FILE
1823 if(myOutputRank==getMyOutputGroupHighestRank()) {
1825 CProxy_CollectionMaster cm(mainMaster);
1826 cm.startNextRoundOutputForce(iotime);
1828 CProxy_ParallelIOMgr io(thisgroup);
1829 io[outputProcArray[myOutputRank+1]].disposeForces(seq, iotime);
1833 CProxy_CollectionMaster cm(mainMaster);
1834 cm.startNextRoundOutputForce(iotime);
1843 #ifdef MEM_OPT_VERSION
1844 coorInstance = midCM->getReadyPositions(seq);
1846 coorInstance->lattice = lat;
1847 int fromAtomID = coorInstance->fromAtomID;
1848 int toAtomID = coorInstance->toAtomID;
1856 int dsize = data.
size();
1857 int numMyAtoms = toAtomID-fromAtomID+1;
1858 tmpCoorCon =
new Vector[numMyAtoms];
1861 for(
int i=0; i<numMyAtoms; i++){
1862 tmpCoorCon[i] = 0.0;
1863 int cid = clusterID[i];
1870 one.
dsum = fdata[i];
1874 remoteCoors.
add(one);
1877 ret->
dsum += fdata[i];
1879 ret->
dsum += data[i];
1883 tmpCoorCon[cid-fromAtomID] += fdata[i];
1885 tmpCoorCon[cid-fromAtomID] += data[i];
1891 CmiAssert(numRemoteClusters == remoteCoors.size());
1893 CProxy_ParallelIOMgr pIO(thisgroup);
1895 for(iter=iter.
begin(); iter!=iter.
end(); iter++){
1899 msg->
dsum = iter->dsum;
1900 int dstRank = atomRankOnOutput(iter->clusterId);
1901 pIO[outputProcArray[dstRank]].recvClusterCoor(msg);
1906 recvClusterCoor(NULL);
1917 if(msg!=NULL) ccmBuf.add(msg);
1920 if(++numReqRecved == (numRemoteReqs+1)){
1922 integrateClusterCoor();
1926 void ParallelIOMgr::integrateClusterCoor(){
1927 #ifdef MEM_OPT_VERSION
1928 int fromIdx = coorInstance->fromAtomID;
1929 int toIdx = coorInstance->toAtomID;
1930 for(
int i=0; i<ccmBuf.size(); i++){
1933 tmpCoorCon[lidx] += msg->
dsum;
1937 CProxy_ParallelIOMgr pIO(thisgroup);
1938 for(
int i=0; i<ccmBuf.size(); i++){
1941 if(simParameters->wrapAll || isWater[lidx]) {
1942 Lattice *lat = &(coorInstance->lattice);
1943 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1944 msg->
dsum = (simParameters->wrapNearest ?
1949 pIO[outputProcArray[msg->
srcRank]].recvFinalClusterCoor(msg);
1956 if(numRemoteClusters!=0){
1957 recvFinalClusterCoor(NULL);
1961 int numMyAtoms = toIdx-fromIdx+1;
1964 for(
int i=0; i<numMyAtoms; i++){
1965 if(!simParameters->wrapAll && !isWater[i])
continue;
1966 int lidx = clusterID[i]-fromIdx;
1969 Lattice *lat = &(coorInstance->lattice);
1970 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1971 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
1974 if(data.
size()) data[i] += tmpCoorCon[lidx];
1976 if(fdata.
size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
1979 delete [] tmpCoorCon;
1981 CProxy_CollectionMaster cm(mainMaster);
1982 cm.wrapCoorFinished();
1988 #ifdef MEM_OPT_VERSION
1997 if(++numCSMAck == (numRemoteClusters+1)){
1999 int fromIdx = coorInstance->fromAtomID;
2000 int toIdx = coorInstance->toAtomID;
2001 int numMyAtoms = toIdx-fromIdx+1;
2005 for(
int i=0; i<numMyAtoms; i++){
2006 if(!simParameters->wrapAll && !isWater[i])
continue;
2007 int cid = clusterID[i];
2008 int lidx = cid-fromIdx;
2013 if(data.
size()) data[i] += fone->
dsum;
2014 if(fdata.
size()) fdata[i] = fdata[i] + fone->
dsum;
2017 Lattice *lat = &(coorInstance->lattice);
2018 Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
2019 tmpCoorCon[lidx] = (simParameters->wrapNearest ?
2022 if(data.
size()) data[i] += tmpCoorCon[lidx];
2023 if(fdata.
size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
2027 delete [] tmpCoorCon;
2029 CProxy_CollectionMaster cm(mainMaster);
2030 cm.wrapCoorFinished();
2032 remoteCoors.clear();
2036 #include "ParallelIOMgr.def.h"
int64 numCalcFullExclusions
static CollectionMgr * Object()
unsigned short * fixedAtomsCntList
std::ostream & iINFO(std::ostream &s)
void sendAtomsToHomePatchProcs()
void flipNum(char *elem, int elemSize, int numElems)
PatchID assignToPatch(Position p, const Lattice &l)
#define COMPRESSED_PSF_MAGICNUM
void NAMD_err(const char *err_msg)
void initTmpPatchAtomsList()
unsigned int hydrogenGroupSize
static int * peCompactOrdering
void recvClusterSize(ClusterSizeMsg *msg)
static PatchMap * Object()
ResizeArray< CollectProxyVectorInstance * > data
TupleSignature * improperSigs
SimParameters * simParameters
void integrateClusterSize()
CollectVectorVarMsg * buildMsg()
void recvAtomsMGrp(MoveInputAtomsMsg *msg)
Vector wrap_delta(const Position &pos1) const
void createHomePatch(PatchID pid, FullAtomList &a)
TupleSignature * dihedralSigs
std::ostream & endi(std::ostream &s)
void disposePositions(int seq, double prevT)
std::ostream & iWARN(std::ostream &s)
TupleSignature * crosstermSigs
void wrapCoor(int seq, Lattice lat)
void receiveForces(CollectVectorVarMsg *msg)
UniqueSetIter< T > begin(void) const
CollectVectorVarMsg::DataStatus vstatus
void recvAtomsToHomePatchProcs(MovePatchAtomsMsg *msg)
void integrateMigratedAtoms()
void reorder(Elem *a, int n)
CkChareID getMasterChareID()
TupleSignature * gromacsPairSigs
void disposeForces(int seq, double prevT)
void recvClusterCoor(ClusterCoorMsg *msg)
int append(CollectVectorVarMsg *msg)
void NAMD_bug(const char *err_msg)
void call_sendAtomsToHomePatchProcs(void *arg)
void recvAtomsCntPerPatch(AtomsCntPerPatchMsg *msg)
void recvHydroBasedCounter(HydroBasedMsg *msg)
void recvFinalClusterCoor(ClusterCoorMsg *msg)
std::vector< int > * getTmpPatchAtomsList()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
TupleSignature * bondSigs
void NAMD_die(const char *err_msg)
CollectProxyVectorSequence(int nc)
void ackAtomsToHomePatchProcs()
static int * peDiffuseOrdering
void initialize(Node *node)
int add(const Elem &elem)
CollectProxyVectorInstance()
UniqueSetIter< T > end(void) const
static int * peCompactOrderingIndex
BlockRadixSort::TempStorage sort
WorkDistrib * workDistrib
int numPatches(void) const
void recvFinalClusterSize(ClusterSizeMsg *msg)
ResizeArray< FloatVector > fdata
void disposeVelocities(int seq, double prevT)
void calcAtomsInEachPatch()
unsigned short * atomsCntList
bool isOutputProcessor(int pe)
int numPatchesOnNode(int node)
ResizeArray< Vector > data
void delTmpPatchAtomsList()
Vector wrap_nearest_delta(Position pos1) const
void bcastHydroBasedCounter(HydroBasedMsg *msg)
void receiveVelocities(CollectVectorVarMsg *msg)
void recvMolInfo(MolInfoMsg *msg)
int isOutputProcessor(int pe)
void bcastMolInfo(MolInfoMsg *msg)
TupleSignature * angleSigs
void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients)
CollectVectorVarMsg * submitData(CollectVectorVarMsg *msg)
void receivePositions(CollectVectorVarMsg *msg)
HashPool< AtomSigInfo > atomSigPool