NAMD
ParallelIOMgr.C
Go to the documentation of this file.
1 
2 #include "largefiles.h" // must be first!
3 
4 #include <stdio.h>
5 #include "BOCgroup.h"
6 #include "Molecule.h"
7 #include "Node.h"
8 #include "Node.decl.h"
9 #include "NamdState.h"
10 #include "WorkDistrib.h"
11 #include "PDB.h"
12 #include "PatchMap.h"
13 #include "PatchMap.inl"
14 #include "packmsg.h"
15 #include "HomePatch.h"
16 #include "InfoStream.h"
17 #include "CollectionMaster.h"
18 #include "CollectionMgr.h"
19 
20 #include "ParallelIOMgr.decl.h"
21 #include "ParallelIOMgr.h"
22 
23 #include "Output.h"
24 #include "Random.h"
25 
26 #include <algorithm>
27 using namespace std;
28 
29 #if !(defined(__NVCC__) || defined(__HIPCC__))
30 #include <pup.h>
32 #endif
33 
34 
35 // data structures for IO proxy, all destined for same IO processor
36 
38  public:
39 
40  CollectProxyVectorInstance() : seq(-10) { ; }
41 
42  void free() { seq = -10; }
43  int notfree() { return ( seq != -10 ); }
44 
45  void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients) {
46  if ( s == -10 ) NAMD_bug("seq == free in CollectionMgr");
47  seq = s;
48  vstatus = v;
49  remaining = numClients;
50  aid.resize(0);
51  data.resize(0);
52  fdata.resize(0);
53  }
54 
55  // true -> send it and delete it!
57  {
58  if ( msg->status != vstatus ) {
59  NAMD_bug("CollectProxyVectorInstance vstatus mismatch");
60  }
61  if ( msg->seq != seq ) {
62  NAMD_bug("CollectProxyVectorInstance seq mismatch");
63  }
64  int size = msg->size;
65  for( int i = 0; i < size; ++i ) { aid.add(msg->aid[i]); }
66  if ( vstatus == CollectVectorVarMsg::VectorValid ||
67  vstatus == CollectVectorVarMsg::BothValid ) {
68  for( int i = 0; i < size; ++i ) { data.add(msg->data[i]); }
69  }
70  if ( vstatus == CollectVectorVarMsg::FloatVectorValid ||
71  vstatus == CollectVectorVarMsg::BothValid ) {
72  for( int i = 0; i < size; ++i ) { fdata.add(msg->fdata[i]); }
73  }
74  const int atoms_per_message_target = 100000;
75  return ( ! --remaining || aid.size() > atoms_per_message_target );
76  }
77 
79  int numAtoms = aid.size();
81  if ( ! numAtoms ) {
82  msg = 0;
83  } else if ( vstatus == CollectVectorVarMsg::VectorValid) {
84  msg = new(numAtoms, numAtoms, 0, 0) CollectVectorVarMsg;
85  for(int j=0; j<numAtoms; j++) {
86  msg->aid[j] = aid[j];
87  msg->data[j] = data[j];
88  }
89  } else if (vstatus == CollectVectorVarMsg::FloatVectorValid) {
90  msg = new(numAtoms, 0, numAtoms, 0) CollectVectorVarMsg;
91  for(int j=0; j<numAtoms; j++) {
92  msg->aid[j] = aid[j];
93  msg->fdata[j] = fdata[j];
94  }
95  } else {
96  msg = new(numAtoms, numAtoms, numAtoms, 0) CollectVectorVarMsg;
97  for(int j=0; j<numAtoms; j++) {
98  msg->aid[j] = aid[j];
99  msg->data[j] = data[j];
100  msg->fdata[j] = fdata[j];
101  }
102  }
103  if ( msg ) {
104  msg->seq = seq;
105  msg->size = numAtoms;
106  msg->status = vstatus;
107  }
108  if ( remaining ) reset(seq,vstatus,remaining);
109  else free();
110  return msg;
111  }
112 
113  int seq;
118 
119  private:
120  int remaining;
121 
122  };
123 
125  public:
126  CollectProxyVectorSequence(int nc) : numClients(nc) { ; }
127 
129  CollectProxyVectorInstance **c = data.begin();
130  CollectProxyVectorInstance **c_e = data.end();
131  for( ; c != c_e && (*c)->seq != msg->seq; ++c );
132  if ( c == c_e ) {
133  c = data.begin();
134  for( ; c != c_e && (*c)->notfree(); ++c );
135  if ( c == c_e ) {
136  data.add(new CollectProxyVectorInstance);
137  c = data.end() - 1;
138  }
139  (*c)->reset(msg->seq,msg->status,numClients);
140  }
141  if ( (*c)->append(msg) ) {
142  CollectVectorVarMsg *newmsg = (*c)->buildMsg();
143  return newmsg;
144  } else {
145  return 0;
146  }
147  }
148 
150 
151  private:
152  int numClients;
153 
154  };
155 
156 
158 {
159  CkpvAccess(BOCclass_group).ioMgr = thisgroup;
160 
161  numInputProcs=-1;
162  inputProcArray = NULL;
163  numOutputProcs=-1;
164  outputProcArray = NULL;
165 
166  procsReceived=0;
167  hydroMsgRecved=0;
168 
169  totalMV.x = totalMV.y = totalMV.z = 0.0;
170  totalMass = 0.0;
171  totalCharge = 0.0;
172  numTotalExclusions = 0;
173  numCalcExclusions = 0;
174  numCalcFullExclusions = 0;
175 
176  isOKToRecvHPAtoms = false;
177  hpAtomsList = NULL;
178 
179  clusterID = NULL;
180  clusterSize = NULL;
181 
182 #ifdef MEM_OPT_VERSION
183  midCM = NULL;
184 #endif
185 
186  isWater = NULL;
187 
188  numCSMAck = 0;
189  numReqRecved = 0;
190 
191  sendAtomsThread = 0;
192 
193 #if COLLECT_PERFORMANCE_DATA
194  numFixedAtomLookup = 0;
195 #endif
196 }
197 
199 {
200  delete [] inputProcArray;
201  delete [] outputProcArray;
202  delete [] clusterID;
203  delete [] clusterSize;
204 
205 #ifdef MEM_OPT_VERSION
206  delete midCM;
207 #endif
208 
209  delete [] isWater;
210 }
211 
212 #ifndef OUTPUT_SINGLE_FILE
213 #error OUTPUT_SINGLE_FILE not defined!
214 #endif
215 
216 // initialization needed for the parallel IO manager. Entry point through Scripttcl
218 {
219  simParameters = node->simParameters;
220  molecule = node->molecule;
221 
222  numInputProcs = simParameters->numinputprocs;
223  numOutputProcs = simParameters->numoutputprocs;
224  numOutputWrts = simParameters->numoutputwrts;
225 
226  numProxiesPerOutputProc = std::min((int)sqrt(CkNumPes()),(CkNumPes()-1)/numOutputProcs-1);
227  if ( numProxiesPerOutputProc < 2 ) numProxiesPerOutputProc = 0;
228 
229  if(!CkMyPe()) {
230  iout << iINFO << "Running with " <<numInputProcs<<" input processors.\n"<<endi;
231  #if OUTPUT_SINGLE_FILE
232  iout << iINFO << "Running with " <<numOutputProcs<<" output processors ("<<numOutputWrts<<" of them will output simultaneously).\n"<<endi;
233  #else
234  iout << iINFO << "Running with " <<numOutputProcs<<" output processors, and each of them will output to its own separate file.\n"<<endi;
235  #endif
236  if ( numProxiesPerOutputProc ) {
237  iout << iINFO << "Running with " <<numProxiesPerOutputProc<<" proxies per output processor.\n"<<endi;
238  }
239  }
240 
241  //build inputProcArray
242  {
243  inputProcArray = new int[numInputProcs];
244  myInputRank = -1;
245  for(int i=0; i<numInputProcs; ++i) {
246  inputProcArray[i] = WorkDistrib::peDiffuseOrdering[(1+numOutputProcs+i)%CkNumPes()];
247  }
248  std::sort(inputProcArray, inputProcArray+numInputProcs);
249  for(int i=0; i<numInputProcs; ++i) {
250  if ( CkMyPe() == inputProcArray[i] ) {
251  if ( myInputRank != -1 ) NAMD_bug("Duplicate input proc");
252  myInputRank = i;
253  }
254  }
255 
256  if(!CkMyPe()) {
257  iout << iINFO << "INPUT PROC LOCATIONS:";
258  int i;
259  for ( i=0; i<numInputProcs && i < 10; ++i ) {
260  iout << " " << inputProcArray[i];
261  }
262  if ( i<numInputProcs ) iout << " ... " << inputProcArray[numInputProcs-1];
263  iout << "\n" << endi;
264  }
265  }
266 
267  if(myInputRank!=-1) {
268  //NOTE: this could further be optimized by pre-allocate the memory
269  //for incoming atoms --Chao Mei
270  int numMyAtoms = numInitMyAtomsOnInput();
271  initAtoms.resize(numMyAtoms+100); // extra space for orphan hydrogens
272  initAtoms.resize(numMyAtoms);
273  tmpRecvAtoms.resize(0);
274  } else {
275  initAtoms.resize(0);
276  tmpRecvAtoms.resize(0);
277  }
278  hpIDList.resize(0);
279 
280  //build outputProcArray
281  //spread the output processors across all the processors
282  {
283  outputProcArray = new int[numOutputProcs];
284  outputProcFlags = new char[CkNumPes()];
285  outputProxyArray = new int[numOutputProcs*numProxiesPerOutputProc];
286  myOutputProxies = new int[numOutputProcs];
287  myOutputRank = -1;
288  myOutputProxyRank = -1;
289  for(int i=0; i<numOutputProcs; ++i) {
290  outputProcArray[i] = WorkDistrib::peDiffuseOrdering[(1+i)%CkNumPes()];
291  }
292  std::sort(outputProcArray, outputProcArray+numOutputProcs);
293  for(int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
294  outputProxyArray[i] = WorkDistrib::peDiffuseOrdering[(1+numOutputProcs+i)%CkNumPes()];
295  }
296  std::sort(outputProxyArray, outputProxyArray+numOutputProcs*numProxiesPerOutputProc,
298  for(int i=0; i<CkNumPes(); ++i) {
299  outputProcFlags[i] = 0;
300  }
301  for(int i=0; i<numOutputProcs; ++i) {
302  outputProcFlags[outputProcArray[i]] = 1;
303  if ( CkMyPe() == outputProcArray[i] ) {
304  if ( myOutputRank != -1 ) NAMD_bug("Duplicate output proc");
305  myOutputRank = i;
306  }
307  }
308  for(int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
309  if ( CkMyPe() == outputProxyArray[i] ) {
310  if ( myOutputRank != -1 ) NAMD_bug("Output proxy is also output proc");
311  if ( myOutputProxyRank != -1 ) NAMD_bug("Duplicate output proxy");
312  myOutputProxyRank = i;
313  }
314  }
315  int myProxySet = (WorkDistrib::peCompactOrderingIndex[CkMyPe()]*numProxiesPerOutputProc)/CkNumPes();
316  for(int i=0; i<numOutputProcs; ++i) {
317  if ( numProxiesPerOutputProc ) {
318  myOutputProxies[i] = outputProxyArray[myProxySet*numOutputProcs+i];
319  } else {
320  myOutputProxies[i] = outputProcArray[i];
321  }
322  }
323 
324  // delay building sequences until after PatchMap is initialized
325  myOutputProxyPositions = 0;
326  myOutputProxyVelocities = 0;
327  myOutputProxyForces = 0;
328 
329  if(!CkMyPe()) {
330  iout << iINFO << "OUTPUT PROC LOCATIONS:";
331  int i;
332  for ( i=0; i<numOutputProcs && i < 10; ++i ) {
333  iout << " " << outputProcArray[i];
334  }
335  if ( i<numOutputProcs ) iout << " ... " << outputProcArray[numOutputProcs-1];
336  iout << "\n" << endi;
337  }
338  }
339 
340 #ifdef MEM_OPT_VERSION
341  if(myOutputRank!=-1) {
342  midCM = new CollectionMidMaster(this);
343  }
344  remoteClusters.clear();
345  csmBuf.resize(0);
346  remoteCoors.clear();
347  ccmBuf.resize(0);
348 
349  mainMaster = CollectionMgr::Object()->getMasterChareID();
350 #endif
351 }
352 
354  return outputProcFlags[pe];
355 }
356 
357 int isOutputProcessor(int pe){
358  return CProxy_ParallelIOMgr::ckLocalBranch(CkpvAccess(BOCclass_group).ioMgr)->isOutputProcessor(pe);
359 }
360 
361 
362 
364 {
365 #ifdef MEM_OPT_VERSION
366  if(myInputRank!=-1) {
367  int myAtomLIdx, myAtomUIdx;
368  getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
369 
370  //1. read the file that contains per-atom info such as signature index
371  molecule->read_binary_atom_info(myAtomLIdx, myAtomUIdx, initAtoms);
372 
373  //2. read coordinates and velocities of each atom if the velocity file
374  //exists, otherwise, the velocity of each atom is randomly generated.
375  //This has to be DONE AFTER THE FIRST STEP as the atom mass is required
376  //if the velocity is generated randomly.
377  readCoordinatesAndVelocity();
378 
379  //3. set every atom's output processor rank, i.e. the dest pe this
380  //atom will be sent for writing positions and velocities etc.
381  int oRank=atomRankOnOutput(myAtomLIdx);
382  for(int i=oRank; i<numOutputProcs; i++) {
383  int lIdx, uIdx; //indicates the range of atom ids outputProcArray[i] has
384  getAtomsRangeOnOutput(lIdx, uIdx, i);
385  if(lIdx > myAtomUIdx) break;
386  int fid = lIdx>myAtomLIdx?lIdx:myAtomLIdx;
387  int tid = uIdx>myAtomUIdx?myAtomUIdx:uIdx;
388  for(int j=fid; j<=tid; j++) initAtoms[j-myAtomLIdx].outputRank = i;
389  }
390  }
391 
392  //read clusters
393  if(myOutputRank!=-1) {
394  //only when wrapAll or wrapWater is set, cluster info is required
395  if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
396  readInfoForParOutput();
397  }
398 #endif
399 }
400 
401 void ParallelIOMgr::readCoordinatesAndVelocity()
402 {
403 #ifdef MEM_OPT_VERSION
404  int needFlip = 0;
405  int myAtomLIdx, myAtomUIdx;
406  getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
407  int myNumAtoms = myAtomUIdx-myAtomLIdx+1;
408 
409  //contains the data for Position and Velocity
410  Vector *tmpData = new Vector[myNumAtoms];
411 
412  //begin to read coordinates
413  //step1: open the file
414  FILE *ifp = fopen(simParameters->binCoorFile, "rb");
415  if(!ifp) {
416  char s[256];
417  sprintf(s, "The binary coordinate file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
418  NAMD_err(s);
419  }
420  //step2: check whether flip is needed
421  int filelen;
422  fread(&filelen, sizeof(int32),1,ifp);
423  char lenbuf[sizeof(int32)];
424  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
425  flipNum(lenbuf, sizeof(int32), 1);
426  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
427  iout << iWARN << "Number of atoms in binary file " << simParameters->binCoorFile
428  <<" is palindromic, assuming same endian.\n" << endi;
429  }
430  if(filelen!=molecule->numAtoms) {
431  needFlip = 1;
432  memcpy((void *)&filelen, lenbuf,sizeof(int32));
433  }
434  if(filelen!=molecule->numAtoms) {
435  char s[256];
436  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binCoorFile);
437  NAMD_die(s);
438  }
439  //step3: read the file specified by the range
440  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Position);
441 #ifdef WIN32
442  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
443 #else
444  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
445 #endif
446  {
447  char s[256];
448  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
449  NAMD_err(s);
450  }
451  size_t totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
452  if(totalRead!=myNumAtoms) {
453  char s[256];
454  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
455  NAMD_err(s);
456  }
457  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
458  fclose(ifp);
459  for(int i=0; i<myNumAtoms; i++) initAtoms[i].position = tmpData[i];
460 
461  //begin to read velocity
462  //step1: generate velocity randomly or open the file
463  if(!simParameters->binVelFile) {
464  //generate velocity randomly
465  Node::Object()->workDistrib->random_velocities_parallel(simParameters->initialTemp, initAtoms);
466  } else {
467  ifp = fopen(simParameters->binVelFile, "rb");
468  if(!ifp) {
469  char s[256];
470  sprintf(s, "The binary velocity file %s cannot be opened on proc %d\n", simParameters->binVelFile, CkMyPe());
471  NAMD_err(s);
472  }
473  //step2: check whether flip is needed
474  fread(&filelen, sizeof(int32),1,ifp);
475  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
476  flipNum(lenbuf, sizeof(int32), 1);
477  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
478  iout << iWARN << "Number of atoms in binary file " << simParameters->binVelFile
479  <<" is palindromic, assuming same endian.\n" << endi;
480  }
481  if(filelen!=molecule->numAtoms) {
482  needFlip = 1;
483  memcpy((void *)&filelen, lenbuf,sizeof(int32));
484  }
485  if(filelen!=molecule->numAtoms) {
486  char s[256];
487  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binVelFile);
488  NAMD_die(s);
489  }
490 
491  //step3: read the file specified by the range
492  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Velocity);
493 #ifdef WIN32
494  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
495 #else
496  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
497 #endif
498  {
499  char s[256];
500  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
501  NAMD_err(s);
502  }
503  totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
504  if(totalRead!=myNumAtoms) {
505  char s[256];
506  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
507  NAMD_err(s);
508  }
509  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
510  fclose(ifp);
511  for(int i=0; i<myNumAtoms; i++) initAtoms[i].velocity = tmpData[i];
512  }
513 
514  //begin to read reference coordinates
515  //step1: use initial positions or open the file
516  if(!simParameters->binRefFile) {
517  for(int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = initAtoms[i].position;
518  } else {
519  ifp = fopen(simParameters->binRefFile, "rb");
520  if(!ifp) {
521  char s[256];
522  sprintf(s, "The binary reference coordinate file %s cannot be opened on proc %d\n", simParameters->binRefFile, CkMyPe());
523  NAMD_err(s);
524  }
525  //step2: check whether flip is needed
526  fread(&filelen, sizeof(int32),1,ifp);
527  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
528  flipNum(lenbuf, sizeof(int32), 1);
529  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
530  iout << iWARN << "Number of atoms in binary file " << simParameters->binRefFile
531  <<" is palindromic, assuming same endian.\n" << endi;
532  }
533  if(filelen!=molecule->numAtoms) {
534  needFlip = 1;
535  memcpy((void *)&filelen, lenbuf,sizeof(int32));
536  }
537  if(filelen!=molecule->numAtoms) {
538  char s[256];
539  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binRefFile);
540  NAMD_die(s);
541  }
542 
543  //step3: read the file specified by the range
544  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Position);
545 #ifdef WIN32
546  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
547 #else
548  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
549 #endif
550  {
551  char s[256];
552  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
553  NAMD_err(s);
554  }
555  totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
556  if(totalRead!=myNumAtoms) {
557  char s[256];
558  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
559  NAMD_err(s);
560  }
561  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
562  fclose(ifp);
563  for(int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = tmpData[i];
564  }
565 
566  delete [] tmpData;
567 #endif
568 }
569 
570 void ParallelIOMgr::readInfoForParOutput()
571 {
572  int fromIdx, toIdx; //atoms' range
573  getMyAtomsRangeOnOutput(fromIdx,toIdx);
574  int numMyAtoms = toIdx-fromIdx+1;
575 
576  clusterID = new int[numMyAtoms];
577  clusterSize = new int[numMyAtoms];
578 
579  //Since the input proc also reads this file, all the checks
580  //(file version check, atom record size check) are omitted here
581  FILE *ifp = fopen(simParameters->binAtomFile, "rb");
582  //read magic number to set needFlip
583  int needFlip = 0;
584  int magicNum;
585  fread(&magicNum, sizeof(int), 1, ifp);
586  if (magicNum!=COMPRESSED_PSF_MAGICNUM) {
587  needFlip = 1;
588  }
589 
590  //need to load isWater info
591  isWater = new char[numMyAtoms];
592  //seek from the end of the file (note offset is negative!)
593  int64 offset = sizeof(char)*((int64)(fromIdx-molecule->numAtoms));
594 #ifdef WIN32
595  if ( _fseeki64(ifp, offset, SEEK_END) )
596 #else
597  if ( fseeko(ifp, offset, SEEK_END) )
598 #endif
599  {
600  char s[256];
601  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
602  NAMD_err(s);
603  }
604  fread(isWater, sizeof(char), numMyAtoms, ifp);
605  //there's no need for flipping as it's a char array
606 
607  //seek from the end of the file (note offset is negative!)
608  offset = sizeof(int)*((int64)(fromIdx-molecule->numAtoms))
609  - sizeof(char)*((int64)(molecule->numAtoms));
610 #ifdef WIN32
611  if ( _fseeki64(ifp, offset, SEEK_END) )
612 #else
613  if ( fseeko(ifp, offset, SEEK_END) )
614 #endif
615  {
616  char s[256];
617  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
618  NAMD_err(s);
619  }
620  fread(clusterID, sizeof(int), numMyAtoms, ifp);
621  if(needFlip) flipNum((char *)clusterID, sizeof(int), numMyAtoms);
622  fclose(ifp);
623 
624  //calculate cluster size (communication may be neccessary)
625  ClusterElem one;
626  for(int i=0; i<numMyAtoms; i++) {
627  clusterSize[i] = 0;
628  int cid = clusterID[i];
629  //check if the cluster id (i.e. the header of the atom of
630  //the cluster) is in my local repository. If the cluster id
631  //is not in my local repository, then it must appear in output
632  //processors that are in front of this one because of the way
633  //of determing cluster info for the molecular system.
634  CmiAssert(cid<=toIdx);
635  if(cid<fromIdx) {
636  //on output procs ahead of me
637  one.clusterId = cid;
638  ClusterElem *ret = remoteClusters.find(one);
639  if(ret==NULL) {
640  one.atomsCnt = 1;
641  remoteClusters.add(one);
642  } else {
643  ret->atomsCnt++;
644  }
645  } else {
646  int lidx = cid-fromIdx;
647  CmiAssert(lidx<=i);
648  clusterSize[lidx]++;
649  }
650  }
651 
652  //Prepare to send msgs to remote output procs to reduce the cluster size
653  //Since the expected number of msgs to be very small, msgs to the same proc
654  //are not aggregated. --Chao Mei
655 #if 0
656  printf("output[%d]=%d: prepare to send %d remote msgs for cluster size\n",
657  myOutputRank, CkMyPe(), remoteClusters.size());
658 #endif
659 
660  numRemoteClusters = remoteClusters.size();
661  numCSMAck = 0; //set to 0 to prepare recving the final cluster size update
662  CProxy_ParallelIOMgr pIO(thisgroup);
663  ClusterSetIter iter(remoteClusters);
664  for(iter=iter.begin(); iter!=iter.end(); iter++) {
665  ClusterSizeMsg *msg = new ClusterSizeMsg;
666  msg->srcRank = myOutputRank;
667  msg->clusterId = iter->clusterId;
668  msg->atomsCnt = iter->atomsCnt;
669  int dstRank = atomRankOnOutput(iter->clusterId);
670  pIO[outputProcArray[dstRank]].recvClusterSize(msg);
671  }
672 }
673 
675 {
676  csmBuf.add(msg); //added to buffer for reuse to send back to src
677 
678  //update cluster size has to be delayed to integration to prevent
679  //data racing where the clusterSize has not been created!
680 }
681 
683 {
684  if(myOutputRank==-1) return;
685  if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
686 
687  int fromIdx, toIdx; //atoms' range
688  getMyAtomsRangeOnOutput(fromIdx,toIdx);
689 
690  //calculated the final cluster size
691  for(int i=0; i<csmBuf.size(); i++) {
692  ClusterSizeMsg *msg = csmBuf[i];
693  int lidx = msg->clusterId - fromIdx;
694  clusterSize[lidx] += msg->atomsCnt;
695  }
696 
697  CProxy_ParallelIOMgr pIO(thisgroup);
698  for(int i=0; i<csmBuf.size(); i++) {
699  ClusterSizeMsg *msg = csmBuf[i];
700  int lidx = msg->clusterId - fromIdx;
701  msg->atomsCnt = clusterSize[lidx];
702  pIO[outputProcArray[msg->srcRank]].recvFinalClusterSize(msg);
703  }
704  numRemoteReqs = csmBuf.size();
705  csmBuf.resize(0);
706 
707  //There's a possible msg race problem here that recvFinalClusterSize
708  //executes before integrateClusterSize because other proc finishes faster
709  //in calculating the cluster size. The recvFinalClusterSize should be
710  //executed after integrateClusterSize. To avoid this, a self message is
711  //sent to participate the reduction.
712  if(numRemoteClusters!=0){
713  recvFinalClusterSize(NULL);
714  }else{
715  //this output proc already has the final cluster size for each atom
716  int numMyAtoms = toIdx-fromIdx+1;
717  for(int i=0; i<numMyAtoms; i++) {
718  int lidx = clusterID[i]-fromIdx;
719  clusterSize[i] = clusterSize[lidx];
720  }
721 
722  #if 0 //write out cluster debug info
723  char fname[128];
724  sprintf(fname, "cluster.par.%d", CkMyPe());
725  FILE *ofp = fopen(fname, "w");
726  for(int i=0; i<numMyAtoms; i++) {
727  fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
728  }
729  fclose(ofp);
730  #endif
731  }
732 }
733 
735 {
736  //only process the message sent by other procs
737  if(msg!=NULL) {
738  //indicating a message from other procs
739  ClusterElem one(msg->clusterId);
740  ClusterElem *ret = remoteClusters.find(one);
741  CmiAssert(ret!=NULL);
742  ret->atomsCnt = msg->atomsCnt;
743  }
744  delete msg;
745 
746  //include a msg sent by itself for reduction
747  if(++numCSMAck == (numRemoteClusters+1)) {
748  //recved all the msgs needed to update the cluster size for each atom finally
749  int fromIdx, toIdx; //atoms' range
750  getMyAtomsRangeOnOutput(fromIdx,toIdx);
751  int numMyAtoms = toIdx-fromIdx+1;
752  ClusterElem tmp;
753  for(int i=0; i<numMyAtoms; i++) {
754  int cid = clusterID[i];
755  int lidx = cid-fromIdx;
756  if(lidx<0) {
757  //this cid should be inside remoteClusters
758  tmp.clusterId = cid;
759  ClusterElem *fone = remoteClusters.find(tmp);
760  clusterSize[i] = fone->atomsCnt;
761  } else {
762  clusterSize[i] = clusterSize[lidx];
763  }
764  }
765  numCSMAck = 0;
766  remoteClusters.clear();
767 
768 #if 0 //write out cluster debug info
769  char fname[128];
770  sprintf(fname, "cluster.par.%d", CkMyPe());
771  FILE *ofp = fopen(fname, "w");
772  for(int i=0; i<numMyAtoms; i++) {
773  fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
774  }
775  fclose(ofp);
776 #endif
777 
778  }
779 }
780 
782 {
783  if(myInputRank==-1) return;
784 
785  //1. first get the list of atoms to be migrated
786  //which should be few compared with the number of atoms
787  //initially assigned to this input proc.
788  AtomIDList toMigrateList; //the list of atoms to be migrated
789  //the max distance from this processor of atoms to be sent
790  int maxOffset = 0;
791  for(int i=0; i<initAtoms.size(); i++) {
792  //returns the proc id on which atom MPID resides on
793  int parentRank = atomInitRankOnInput(initAtoms[i].MPID);
794  if(parentRank != myInputRank) {
795  toMigrateList.add(i);
796  initAtoms[i].isValid = false;
797  int tmp = parentRank - myInputRank;
798  tmp = tmp>0 ? tmp : -tmp;
799  if(tmp > maxOffset) maxOffset = tmp;
800  }
801  }
802 
803  //2. prepare atom migration messages
804  //the messages are indexed as [-maxOffset,..., -1,0,1,..., maxOffset]
805  //where the "0" is not used at all. It is added for the sake of
806  //computing the index easily.
807  InputAtomList *migLists = new InputAtomList[2*maxOffset+1];
808  for(int i=0; i<toMigrateList.size(); i++) {
809  int idx = toMigrateList[i];
810  int parentRank = atomInitRankOnInput(initAtoms[idx].MPID);
811  //decide which migList to put this atom
812  int offset = parentRank - myInputRank + maxOffset;
813  migLists[offset].add(initAtoms[idx]);
814  }
815 
816  CProxy_ParallelIOMgr pIO(thisgroup);
817  for(int i=0; i<2*maxOffset+1; i++) {
818  int migLen = migLists[i].size();
819  if(migLen>0) {
820  MoveInputAtomsMsg *msg = new (migLen, 0)MoveInputAtomsMsg;
821  msg->length = migLen;
822  memcpy(msg->atomList, migLists[i].begin(), sizeof(InputAtom)*migLen);
823  int destRank = i-maxOffset+myInputRank;
824  pIO[inputProcArray[destRank]].recvAtomsMGrp(msg);
825  migLists[i].clear();
826  }
827  }
828 
829  toMigrateList.clear();
830  delete [] migLists;
831 }
832 
834 {
835  for(int i=0; i<msg->length; i++) {
836  tmpRecvAtoms.add((msg->atomList)[i]);
837  }
838  delete msg;
839 }
840 
842 {
843  if(myInputRank==-1) return;
844 
845  for(int i=0; i<tmpRecvAtoms.size(); i++) {
846  tmpRecvAtoms[i].isValid = true;
847  initAtoms.add(tmpRecvAtoms[i]);
848  }
849  tmpRecvAtoms.clear();
850 
851  //sort atom list based on hydrogenList value
852  std::sort(initAtoms.begin(), initAtoms.end());
853 
854  //now compute the counters inside Molecule such as numFixedRigidBonds
855  //which is based on the hydrogen group info
856 
857  int numFixedRigidBonds = 0;
858  if(molecule->numRigidBonds){
859  int parentIsFixed = 0;
860  for(int i=0; i<initAtoms.size(); i++) {
861  InputAtom *one = &(initAtoms[i]);
862  if(!one->isValid) continue;
863  if(one->isGP) {
864  parentIsFixed = one->atomFixed;
865  InputAtom *a1 = &(initAtoms[i+1]);
866  InputAtom *a2 = &(initAtoms[i+2]);
867  if((one->rigidBondLength>0.0) &&
868  a1->atomFixed && a2->atomFixed) {
869  numFixedRigidBonds++;
870  }
871  }else{
872  if((one->rigidBondLength>0.0) &&
873  one->atomFixed && parentIsFixed) {
874  numFixedRigidBonds++;
875  }
876  }
877  }
878  }
879 
880  int numFixedGroups = 0;
881  if(molecule->numFixedAtoms){
882  for(int i=0; i<initAtoms.size();) {
883  InputAtom *one = &(initAtoms[i]);
884  if(!one->isValid){
885  i++;
886  continue;
887  }
888  if(one->isGP) {
889  int allFixed = 1;
890  for(int j=0; j<one->hydrogenGroupSize; j++){
891  InputAtom *a1 = &(initAtoms[i+j]);
892  allFixed = allFixed & a1->atomFixed;
893  if(!allFixed) break;
894  }
895  if(allFixed) numFixedGroups++;
896  i += one->hydrogenGroupSize;
897  }
898  }
899  }
900 
901  CProxy_ParallelIOMgr pIO(thisgroup);
902  HydroBasedMsg *msg = new HydroBasedMsg;
903  msg->numFixedGroups = numFixedGroups;
904  msg->numFixedRigidBonds = numFixedRigidBonds;
905  pIO[0].recvHydroBasedCounter(msg);
906 }
907 
909 {
910 #ifdef MEM_OPT_VERSION
911  if(myInputRank==-1) return;
912 
913  CProxy_ParallelIOMgr pIO(thisgroup);
914 
915  MolInfoMsg *msg = new MolInfoMsg;
916  msg->numBonds = msg->numCalcBonds = 0;
917  msg->numAngles = msg->numCalcAngles = 0;
918  msg->numDihedrals = msg->numCalcDihedrals = 0;
919  msg->numImpropers = msg->numCalcImpropers = 0;
920  msg->numCrossterms = msg->numCalcCrossterms = 0;
921  msg->numExclusions = msg->numCalcExclusions = 0;
922  int64 numFullExclusions = msg->numCalcFullExclusions = 0;
923  // JLai
924  msg->numLJPairs = msg->numCalcLJPairs = 0;
925  // End of JLai
926  msg->numRigidBonds = 0;
927  msg->totalMass = 0.0;
928  msg->totalCharge = 0.0;
929 
930  //calculate the tuples this input processor have
931  AtomSignature *atomSigPool = molecule->atomSigPool;
932  ExclusionSignature *exclSigPool = molecule->exclSigPool;
933  for(int i=0; i<initAtoms.size(); i++) {
934  AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
935  msg->numBonds += thisSig->bondCnt;
936  msg->numAngles += thisSig->angleCnt;
937  msg->numDihedrals += thisSig->dihedralCnt;
938  msg->numImpropers += thisSig->improperCnt;
939  msg->numCrossterms += thisSig->crosstermCnt;
940  // JLai
941  msg->numLJPairs += thisSig->gromacsPairCnt;
942  // End of JLai
943 
944  ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
945  msg->numExclusions += (exclSig->fullExclCnt + exclSig->modExclCnt);
946  numFullExclusions += exclSig->fullExclCnt;
947 
948  if(initAtoms[i].rigidBondLength > 0.0) msg->numRigidBonds++;
949 
950  msg->totalMass += initAtoms[i].mass;
951  msg->totalCharge += initAtoms[i].charge;
952  }
953 
954  //deal with numCalc* which is related with fixed atoms!
955  if(molecule->numFixedAtoms>0 && ! simParameters->fixedAtomsForces) {
956  //if there's fixed atoms, calcExclusions needs to be calculated
957  //Since it's possible the atom inside the this exclusion set is on
958  //another input processor, we have to resort to the global fixed atoms
959  //info inside the Molecule object. The number of such accesses should
960  //be very small! --Chao Mei
961  int sAId = initAtoms[0].id;
962  int remoteCnt=0; //stats info
963  for(int i=0; i<initAtoms.size(); i++) {
964  //When all the atoms in the set are fixed, the elem (Bond etc.)
965  //is not counted as a calc*.
966  int myAId = initAtoms[i].id;
967  AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
968  ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
969  if(!initAtoms[i].atomFixed) {
970  msg->numCalcBonds += thisSig->bondCnt;
971  msg->numCalcAngles += thisSig->angleCnt;
972  msg->numCalcDihedrals += thisSig->dihedralCnt;
973  msg->numCalcImpropers += thisSig->improperCnt;
974  msg->numCalcCrossterms += thisSig->crosstermCnt;
975  msg->numCalcExclusions+=(exclSig->fullExclCnt+exclSig->modExclCnt);
976  msg->numCalcFullExclusions+=(exclSig->fullExclCnt);
977  continue;
978  }
979 
980  //1. Bonds
981  for(int j=0; j<thisSig->bondCnt; j++) {
982  TupleSignature *bsig = &(thisSig->bondSigs[j]);
983  int a1 = myAId + bsig->offset[0];
984  if(!isAtomFixed(sAId, a1)) msg->numCalcBonds++;
985  }
986 
987  //2. Angles
988  for(int j=0; j<thisSig->angleCnt; j++) {
989  TupleSignature *bsig = &(thisSig->angleSigs[j]);
990  int a1 = myAId + bsig->offset[0];
991  int a2 = myAId + bsig->offset[1];
992  if(!isAtomFixed(sAId, a1) || !isAtomFixed(sAId, a2))
993  msg->numCalcAngles++;
994  }
995 
996  //3. Dihedrals
997  for(int j=0; j<thisSig->dihedralCnt; j++) {
998  TupleSignature *bsig = &(thisSig->dihedralSigs[j]);
999  int a1 = myAId + bsig->offset[0];
1000  int a2 = myAId + bsig->offset[1];
1001  int a3 = myAId + bsig->offset[2];
1002  if(!isAtomFixed(sAId, a1) ||
1003  !isAtomFixed(sAId, a2) ||
1004  !isAtomFixed(sAId, a3))
1005  msg->numCalcDihedrals++;
1006  }
1007 
1008  //4. Impropers
1009  for(int j=0; j<thisSig->improperCnt; j++) {
1010  TupleSignature *bsig = &(thisSig->improperSigs[j]);
1011  int a1 = myAId + bsig->offset[0];
1012  int a2 = myAId + bsig->offset[1];
1013  int a3 = myAId + bsig->offset[2];
1014  if(!isAtomFixed(sAId, a1) ||
1015  !isAtomFixed(sAId, a2) ||
1016  !isAtomFixed(sAId, a3))
1017  msg->numCalcImpropers++;
1018  }
1019 
1020  //5. Crossterms
1021  for(int j=0; j<thisSig->crosstermCnt; j++) {
1022  TupleSignature *bsig = &(thisSig->crosstermSigs[j]);
1023  int a1 = myAId + bsig->offset[0];
1024  int a2 = myAId + bsig->offset[1];
1025  int a3 = myAId + bsig->offset[2];
1026  int a4 = myAId + bsig->offset[3];
1027  int a5 = myAId + bsig->offset[4];
1028  int a6 = myAId + bsig->offset[5];
1029  int a7 = myAId + bsig->offset[6];
1030 
1031  if(!isAtomFixed(sAId, a1) ||
1032  !isAtomFixed(sAId, a2) ||
1033  !isAtomFixed(sAId, a3) ||
1034  !isAtomFixed(sAId, a4) ||
1035  !isAtomFixed(sAId, a5) ||
1036  !isAtomFixed(sAId, a6) ||
1037  !isAtomFixed(sAId, a7))
1038  msg->numCalcDihedrals++;
1039  }
1040 
1041  //6: Exclusions
1042  //this atom is fixed, check atoms in the exclusion set
1043  for(int j=0; j<exclSig->fullExclCnt; j++) {
1044  int thisAId = exclSig->fullOffset[j]+myAId;
1045  if(!isAtomFixed(sAId, thisAId)) { msg->numCalcExclusions++; msg->numCalcFullExclusions++; }
1046  }
1047  for(int j=0; j<exclSig->modExclCnt; j++) {
1048  int thisAId = exclSig->modOffset[j]+myAId;
1049  if(!isAtomFixed(sAId, thisAId)) msg->numCalcExclusions++;
1050  }
1051 
1052  //7: GromacsPair
1053  for(int j=0; j<thisSig->gromacsPairCnt; j++) {
1054  TupleSignature *bsig = &(thisSig->gromacsPairSigs[j]);
1055  int a1 = myAId + bsig->offset[0];
1056  int a2 = myAId + bsig->offset[1];
1057  if(!isAtomFixed(sAId, a1) ||
1058  !isAtomFixed(sAId, a2))
1059  msg->numCalcLJPairs++;
1060  }
1061  }
1062 #if COLLECT_PERFORMANCE_DATA
1063  printf("Num fixedAtom lookup on proc %d is %d\n", CkMyPe(), numFixedAtomLookup);
1064 #endif
1065  } else {
1066  //no fixed atoms, numCalc* is same with numExclusions
1067  msg->numCalcBonds = msg->numBonds;
1068  msg->numCalcAngles = msg->numAngles;
1069  msg->numCalcDihedrals = msg->numDihedrals;
1070  msg->numCalcImpropers = msg->numImpropers;
1071  msg->numCalcCrossterms = msg->numCrossterms;
1072  msg->numCalcExclusions = msg->numExclusions;
1073  msg->numCalcFullExclusions = numFullExclusions;
1074  }
1075 
1076 
1077  if(!simParameters->comMove) {
1078  //to remove the center of mass motion from a molecule.
1079  //first calculate the values on every input proc, then reduce.
1080  //For more info, refer to WorkDistrib::remove_com_motion
1081  //-Chao Mei
1082  (msg->totalMV).x = 0.0;
1083  (msg->totalMV).y = 0.0;
1084  (msg->totalMV).z = 0.0;
1085  for (int i=0; i<initAtoms.size(); i++) {
1086  msg->totalMV += initAtoms[i].mass * initAtoms[i].velocity;
1087  }
1088  }
1089 
1090  //always send to the master processor (proc 0)
1091  pIO[0].recvMolInfo(msg);
1092 #endif
1093 }
1094 
1095 //only executed on proc 0
1097 {
1098  molecule->numBonds += msg->numBonds;
1099  molecule->numCalcBonds += msg->numCalcBonds;
1100  molecule->numAngles += msg->numAngles;
1101  molecule->numCalcAngles += msg->numCalcAngles;
1102  molecule->numDihedrals += msg->numDihedrals;
1103  molecule->numCalcDihedrals += msg->numCalcDihedrals;
1104  molecule->numImpropers += msg->numImpropers;
1105  molecule->numCalcImpropers += msg->numCalcImpropers;
1106  molecule->numCrossterms += msg->numCrossterms;
1107  molecule->numCalcCrossterms += msg->numCalcCrossterms;
1108  numTotalExclusions += msg->numExclusions;
1109  numCalcExclusions += msg->numCalcExclusions;
1110  numCalcFullExclusions += msg->numCalcFullExclusions;
1111  molecule->numRigidBonds += msg->numRigidBonds;
1112 
1113  totalMass += msg->totalMass;
1114  totalCharge += msg->totalCharge;
1115 
1116  if(!simParameters->comMove) {
1117  totalMV += msg->totalMV;
1118  }
1119 
1120  if(++procsReceived == numInputProcs) {
1121  //received all the counters
1122  msg->numBonds = molecule->numBonds;
1123  msg->numCalcBonds = molecule->numCalcBonds;
1124  msg->numAngles = molecule->numAngles;
1125  msg->numCalcAngles = molecule->numCalcAngles;
1126  msg->numDihedrals = molecule->numDihedrals;
1127  msg->numCalcDihedrals = molecule->numCalcDihedrals;
1128  msg->numImpropers = molecule->numImpropers;
1129  msg->numCalcImpropers = molecule->numCalcImpropers;
1130  msg->numCrossterms = molecule->numCrossterms;
1131  msg->numCalcCrossterms = molecule->numCalcCrossterms;
1132  msg->numExclusions = numTotalExclusions/2;
1133  msg->numCalcExclusions = numCalcExclusions/2;
1134  msg->numCalcFullExclusions = numCalcFullExclusions/2;
1135  msg->numRigidBonds = molecule->numRigidBonds;
1136 
1137  msg->totalMass = totalMass;
1138  msg->totalCharge = totalCharge;
1139 
1140  if(!simParameters->comMove) {
1141  msg->totalMV = totalMV;
1142  }
1143 
1144  CProxy_ParallelIOMgr pIO(thisgroup);
1145  pIO.bcastMolInfo(msg);
1146 
1147  //reset to 0 for the next p2p-based reduction on input procs
1148  procsReceived = 0;
1149  } else delete msg;
1150 }
1151 
1153 {
1154 #ifdef MEM_OPT_VERSION
1155  if(myInputRank!=-1) {
1156  if(!simParameters->comMove) {
1157  //needs to remove the center of mass motion from a molecule
1158  Vector val = msg->totalMV / msg->totalMass;
1159  for (int i=0; i<initAtoms.size(); i++) initAtoms[i].velocity -= val;
1160  }
1161  }
1162 
1163  //only the rank 0 in the SMP node update the Molecule object
1164  if(CmiMyRank()) {
1165  delete msg;
1166  return;
1167  }
1168 
1169  molecule->numBonds = msg->numBonds;
1170  molecule->numCalcBonds = msg->numCalcBonds;
1171  molecule->numAngles = msg->numAngles;
1172  molecule->numCalcAngles = msg->numCalcAngles;
1173  molecule->numDihedrals = msg->numDihedrals;
1174  molecule->numCalcDihedrals = msg->numCalcDihedrals;
1175  molecule->numImpropers = msg->numImpropers;
1176  molecule->numCalcImpropers = msg->numCalcImpropers;
1177  molecule->numCrossterms = msg->numCrossterms;
1178  molecule->numCalcCrossterms = msg->numCalcCrossterms;
1179 
1180  molecule->numTotalExclusions = msg->numExclusions;
1181  molecule->numCalcExclusions = msg->numCalcExclusions;
1182  molecule->numCalcFullExclusions = msg->numCalcFullExclusions;
1183 
1184  molecule->numRigidBonds = msg->numRigidBonds;
1185  delete msg;
1186 
1187  if(!CkMyPe()) {
1188  iout << iINFO << "LOADED " << molecule->numTotalExclusions << " TOTAL EXCLUSIONS\n" << endi;
1189  if(!simParameters->comMove) {
1190  iout << iINFO << "REMOVING COM VELOCITY "
1191  << (PDBVELFACTOR * (msg->totalMV / msg->totalMass))<< "\n" <<endi;
1192  }
1193  }
1194 #endif
1195 }
1196 
1197 //only called on PE0
1199  molecule->numFixedRigidBonds += msg->numFixedRigidBonds;
1200  molecule->numFixedGroups += msg->numFixedGroups;
1201 
1202  if(++hydroMsgRecved == numInputProcs){
1203  msg->numFixedRigidBonds = molecule->numFixedRigidBonds;
1204  msg->numFixedGroups = molecule->numFixedGroups;
1205  CProxy_ParallelIOMgr pIO(thisgroup);
1206  pIO.bcastHydroBasedCounter(msg);
1207  hydroMsgRecved = 0;
1208  }else delete msg;
1209 }
1210 
1212 #ifdef MEM_OPT_VERSION
1213  //only the rank 0 in the SMP node update the Molecule object
1214  if(CmiMyRank()) {
1215  delete msg;
1216  return;
1217  }
1218  molecule->numFixedRigidBonds = msg->numFixedRigidBonds;
1219  molecule->numFixedGroups = msg->numFixedGroups;
1220  delete msg;
1221 
1222  if(!CkMyPe()) {
1223  iout << iINFO << "****************************\n";
1224  iout << iINFO << "STRUCTURE SUMMARY:\n";
1225  iout << iINFO << molecule->numAtoms << " ATOMS\n";
1226  iout << iINFO << molecule->numBonds << " BONDS\n";
1227  iout << iINFO << molecule->numAngles << " ANGLES\n";
1228  iout << iINFO << molecule->numDihedrals << " DIHEDRALS\n";
1229  iout << iINFO << molecule->numImpropers << " IMPROPERS\n";
1230  iout << iINFO << molecule->numCrossterms << " CROSSTERMS\n";
1231  iout << iINFO << molecule->numExclusions << " EXCLUSIONS\n";
1232 
1233  //****** BEGIN CHARMM/XPLOR type changes
1234  if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeXplorOn)){
1235  iout << iINFO << molecule->numMultipleDihedrals
1236  << " DIHEDRALS WITH MULTIPLE PERIODICITY (BASED ON PSF FILE)\n";
1237  }
1238  if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeCharmmOn)){
1239  iout << iINFO << molecule->numMultipleDihedrals
1240  << " DIHEDRALS WITH MULTIPLE PERIODICITY IGNORED (BASED ON PSF FILE) \n";
1241  iout << iINFO
1242  << " CHARMM MULTIPLICITIES BASED ON PARAMETER FILE INFO! \n";
1243  }
1244  //****** END CHARMM/XPLOR type changes
1245 
1246  if (molecule->numMultipleImpropers){
1247  iout << iINFO << molecule->numMultipleImpropers
1248  << " IMPROPERS WITH MULTIPLE PERIODICITY\n";
1249  }
1250 
1251  if (simParameters->fixedAtomsOn)
1252  iout << iINFO << molecule->numFixedAtoms << " FIXED ATOMS\n";
1253 
1254 
1255  if (simParameters->rigidBonds)
1256  iout << iINFO << molecule->numRigidBonds << " RIGID BONDS\n";
1257 
1258  if (simParameters->fixedAtomsOn && simParameters->rigidBonds)
1259  iout << iINFO << molecule->numFixedRigidBonds <<
1260  " RIGID BONDS BETWEEN FIXED ATOMS\n";
1261 
1262  iout << iINFO << molecule->num_deg_freedom(1)
1263  << " DEGREES OF FREEDOM\n";
1264 
1265  iout << iINFO << molecule->numHydrogenGroups << " HYDROGEN GROUPS\n";
1266  iout << iINFO << molecule->maxHydrogenGroupSize
1267  << " ATOMS IN LARGEST HYDROGEN GROUP\n";
1268  iout << iINFO << molecule->numMigrationGroups << " MIGRATION GROUPS\n";
1269  iout << iINFO << molecule->maxMigrationGroupSize
1270  << " ATOMS IN LARGEST MIGRATION GROUP\n";
1271  if (simParameters->fixedAtomsOn)
1272  {
1273  iout << iINFO << molecule->numFixedGroups <<
1274  " HYDROGEN GROUPS WITH ALL ATOMS FIXED\n";
1275  }
1276 
1277  iout << iINFO << "TOTAL MASS = " << totalMass << " amu\n";
1278  iout << iINFO << "TOTAL CHARGE = " << totalCharge << " e\n";
1279 
1280  BigReal volume = simParameters->lattice.volume();
1281  if ( volume ) {
1282  iout << iINFO << "MASS DENSITY = "
1283  << ((totalMass/volume) / 0.6022) << " g/cm^3\n";
1284  iout << iINFO << "ATOM DENSITY = "
1285  << (molecule->numAtoms/volume) << " atoms/A^3\n";
1286  }
1287 
1288  iout << iINFO << "*****************************\n";
1289  iout << endi;
1290  fflush(stdout);
1291  }
1292 #endif
1293 }
1294 
1296 {
1297  if(myInputRank==-1) return;
1298 
1299  PatchMap *patchMap = PatchMap::Object();
1300  int numPatches = patchMap->numPatches();
1301 
1302  patchMap->initTmpPatchAtomsList();
1303 
1304  //each list contains the atom index to the initAtoms
1305  vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
1306 
1307  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1308  PatchMgr *patchMgr = pm.ckLocalBranch();
1309 
1310  int pid=0;
1311  const Lattice lattice = simParameters->lattice;
1312  for(int i=0; i<initAtoms.size(); i++) {
1313  InputAtom *atom = &(initAtoms[i]);
1314  if(!atom->isValid) continue;
1315  if(atom->isMP) {
1316  pid = patchMap->assignToPatch(atom->position, lattice);
1317  }
1318  eachPatchAtomList[pid].push_back(i);
1319  }
1320 
1321  CProxy_ParallelIOMgr pIO(thisgroup);
1322 
1323  int patchCnt = 0;
1324  for(int i=0; i<numPatches; i++) {
1325  int cursize = eachPatchAtomList[i].size();
1326  if(cursize>0) patchCnt++;
1327  }
1328 
1329  AtomsCntPerPatchMsg *msg = NULL;
1330  if(simParameters->fixedAtomsOn) {
1331  msg = new (patchCnt, patchCnt, patchCnt, 0)AtomsCntPerPatchMsg;
1332  } else {
1333  msg = new (patchCnt, patchCnt, 0, 0)AtomsCntPerPatchMsg;
1334  }
1335 
1336  msg->length = patchCnt;
1337  patchCnt = 0;
1338  for(int i=0; i<numPatches; i++) {
1339  int cursize = eachPatchAtomList[i].size();
1340  if(cursize>0) {
1341  if ( cursize > USHRT_MAX ) {
1342  char errstr[512];
1343  sprintf(errstr, "Patch %d exceeds %d atoms.", i, USHRT_MAX);
1344  NAMD_die(errstr);
1345  }
1346  msg->pidList[patchCnt] = i;
1347  msg->atomsCntList[patchCnt] = cursize;
1348  patchCnt++;
1349  }
1350  }
1351 
1352  if(simParameters->fixedAtomsOn) {
1353  patchCnt = 0;
1354  for(int i=0; i<numPatches; i++) {
1355  int cursize = eachPatchAtomList[i].size();
1356  if(cursize>0) {
1357  int fixedCnt = 0;
1358  for(int j=0; j<cursize; j++) {
1359  int aid = eachPatchAtomList[i][j];
1360  //atomFixed is either 0 or 1
1361  fixedCnt += initAtoms[aid].atomFixed;
1362  }
1363  msg->fixedAtomsCntList[patchCnt] = fixedCnt;
1364  patchCnt++;
1365  }
1366  }
1367  }
1368 
1369  pIO[0].recvAtomsCntPerPatch(msg);
1370 
1371 }
1372 
1374 {
1375 #ifdef MEM_OPT_VERSION
1376  PatchMap *patchMap = PatchMap::Object();
1377  for(int i=0; i<msg->length; i++) {
1378  int pid = msg->pidList[i];
1379  int oldNum = patchMap->numAtoms(pid);
1380  if ( oldNum + msg->atomsCntList[i] > USHRT_MAX ) {
1381  char errstr[512];
1382  sprintf(errstr, "Patch %d exceeds %d atoms.", pid, USHRT_MAX);
1383  NAMD_die(errstr);
1384  }
1385  patchMap->setNumAtoms(pid, oldNum+msg->atomsCntList[i]);
1386  if(simParameters->fixedAtomsOn) {
1387  oldNum = patchMap->numFixedAtoms(pid);
1388  patchMap->setNumFixedAtoms(pid, oldNum+msg->fixedAtomsCntList[i]);
1389  }
1390  }
1391  delete msg;
1392 
1393  if(++procsReceived == numInputProcs) {
1394  //print max PATCH info
1395  int maxAtoms = -1;
1396  int maxPatch = -1;
1397  int totalAtoms = 0;
1398  for(int i=0; i<patchMap->numPatches(); i++) {
1399  int cnt = patchMap->numAtoms(i);
1400  totalAtoms += cnt;
1401  if(cnt>maxAtoms) {
1402  maxAtoms = cnt;
1403  maxPatch = i;
1404  }
1405  }
1406  procsReceived = 0;
1407  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
1408  ") HAS " << maxAtoms << " ATOMS\n" << endi;
1409  if ( totalAtoms != Node::Object()->molecule->numAtoms ) {
1410  char errstr[512];
1411  sprintf(errstr, "Incorrect atom count in void ParallelIOMgr::recvAtomsCntPerPatch: %d vs %d", totalAtoms, Node::Object()->molecule->numAtoms);
1412  NAMD_die(errstr);
1413  }
1414  }
1415 #endif
1416 }
1417 
1419 {
1420  ((ParallelIOMgr*)arg)->sendAtomsToHomePatchProcs();
1421 }
1422 
1424 {
1425 #ifdef MEM_OPT_VERSION
1426  if(myInputRank==-1) return;
1427 
1428  if ( sendAtomsThread == 0 ) {
1429  sendAtomsThread = CthCreate((CthVoidFn)call_sendAtomsToHomePatchProcs,this,0);
1430  CthAwaken(sendAtomsThread);
1431  return;
1432  }
1433  sendAtomsThread = 0;
1434  numAcksOutstanding = 0;
1435 
1436  PatchMap *patchMap = PatchMap::Object();
1437  int numPatches = patchMap->numPatches();
1438  vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
1439 
1440  //each element (proc) contains the list of ids of patches which will stay
1441  //on that processor
1442  ResizeArray<int> *procList = new ResizeArray<int>[CkNumPes()];
1443  ResizeArray<int> pesToSend;
1444  for(int i=0; i<numPatches; i++) {
1445  if(eachPatchAtomList[i].size()==0) continue;
1446  int onPE = patchMap->node(i);
1447  if ( procList[onPE].size() == 0 ) pesToSend.add(onPE);
1448  procList[onPE].add(i);
1449  }
1450 
1451  Random(CkMyPe()).reorder(pesToSend.begin(),pesToSend.size());
1452  //CkPrintf("Pe %d ParallelIOMgr::sendAtomsToHomePatchProcs sending to %d pes\n",CkMyPe(),pesToSend.size());
1453 
1454  //go over every processor to send a message if necessary
1455  //TODO: Optimization for local home patches to save temp memory usage??? -CHAOMEI
1456  CProxy_ParallelIOMgr pIO(thisgroup);
1457  for(int k=0; k<pesToSend.size(); k++) {
1458  const int i = pesToSend[k];
1459  int len = procList[i].size();
1460  if(len==0) continue;
1461 
1462  // Sending one message per pe can result in very large messages
1463  // that break Converse so send one message per patch instead.
1464  for(int j=0; j<len; j++) {
1465  int pid = procList[i][j];
1466  int atomCnt = eachPatchAtomList[pid].size();
1467 
1468  if ( numAcksOutstanding >= 10 ) {
1469  sendAtomsThread = CthSelf();
1470  CthSuspend();
1471  }
1472  ++numAcksOutstanding;
1473 
1474  MovePatchAtomsMsg *msg = new (1, 1, atomCnt, 0)MovePatchAtomsMsg;
1475  msg->from = CkMyPe();
1476  msg->patchCnt = 1;
1477  int atomIdx = 0;
1478  msg->pidList[0] = pid;
1479  msg->sizeList[0] = atomCnt;
1480  for(int k=0; k<atomCnt; k++, atomIdx++) {
1481  int aid = eachPatchAtomList[pid][k];
1482  FullAtom one = initAtoms[aid];
1483  //HACK to re-sort the atom list after receiving the atom list on
1484  //home patch processor -Chao Mei
1485  one.hydVal = initAtoms[aid].hydList;
1486  msg->allAtoms[atomIdx] = one;
1487  }
1488  pIO[i].recvAtomsToHomePatchProcs(msg);
1489  }
1490 
1491  procList[i].clear();
1492  }
1493 
1494  //clean up to free space
1495  delete [] procList;
1496  patchMap->delTmpPatchAtomsList();
1497 
1498  //free the space occupied by the list that contains the input atoms
1499  initAtoms.clear();
1500 #endif
1501 }
1502 
1504 {
1505  --numAcksOutstanding;
1506  if ( sendAtomsThread ) {
1507  CthAwaken(sendAtomsThread);
1508  sendAtomsThread = 0;
1509  }
1510 }
1511 
1513 {
1514  CProxy_ParallelIOMgr pIO(thisgroup);
1515  pIO[msg->from].ackAtomsToHomePatchProcs();
1516 
1517  if(!isOKToRecvHPAtoms) {
1518  prepareHomePatchAtomList();
1519  isOKToRecvHPAtoms = true;
1520  }
1521 
1522  int numRecvPatches = msg->patchCnt;
1523  int aid = 0;
1524  for(int i=0; i<numRecvPatches; i++) {
1525  int pid = msg->pidList[i];
1526  int size = msg->sizeList[i];
1527  int idx = binaryFindHPID(pid);
1528  for(int j=0; j<size; j++, aid++) {
1529  hpAtomsList[idx].add(msg->allAtoms[aid]);
1530  }
1531  }
1532  //CkPrintf("Pe %d recvAtomsToHomePatchProcs for %d patches %d atoms\n",CkMyPe(),numRecvPatches,aid);
1533  delete msg;
1534 }
1535 
1536 void ParallelIOMgr::prepareHomePatchAtomList()
1537 {
1538  PatchMap *patchMap = PatchMap::Object();
1539  for(int i=0; i<patchMap->numPatches(); i++) {
1540  if(patchMap->node(i)==CkMyPe()) {
1541  hpIDList.add(i);
1542  }
1543  }
1544  if(hpIDList.size()>0)
1545  hpAtomsList = new FullAtomList[hpIDList.size()];
1546 }
1547 
1548 int ParallelIOMgr::binaryFindHPID(int pid)
1549 {
1550  //hpIDList should be in increasing order!!
1551  int lIdx, rIdx;
1552  int retIdx = -1;
1553 
1554  rIdx=0;
1555  lIdx=hpIDList.size()-1;
1556 
1557  while(rIdx<=lIdx ) {
1558  int idx = (rIdx+lIdx)/2;
1559  int curPid = hpIDList[idx];
1560  if(pid>curPid) {
1561  //in the left
1562  rIdx = idx+1;
1563  } else if(pid<curPid) {
1564  //in the right
1565  lIdx = idx-1;
1566  } else {
1567  //found!
1568  retIdx = idx;
1569  break;
1570  }
1571  }
1572  CmiAssert(retIdx!=-1);
1573  return retIdx;
1574 }
1575 
1577 {
1578 #ifdef MEM_OPT_VERSION
1579 
1580  int assignedPids = PatchMap::Object()->numPatchesOnNode(CkMyPe());
1581  int numPids = hpIDList.size();
1582  if(numPids==0){
1583  //this node actually contains no homepatches
1584  if(assignedPids == 0) return;
1585 
1586  //Entering the rare condition that all the homepatches this node has
1587  //are empty so that "recvAtomsToHomePatchProcs" is never called!
1588  //But we still need to create those empty homepatches!
1589  CmiAssert(isOKToRecvHPAtoms == false);
1590  PatchMap *patchMap = PatchMap::Object();
1591  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1592  PatchMgr *patchMgr = pm.ckLocalBranch();
1593  for(int i=0; i<patchMap->numPatches(); i++) {
1594  if(patchMap->node(i)==CkMyPe()) {
1595  FullAtomList emptyone;
1596  patchMgr->createHomePatch(i, emptyone);
1597  }
1598  }
1599  return;
1600  }
1601 
1602  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1603  PatchMgr *patchMgr = pm.ckLocalBranch();
1604 
1605  //go through the home patch list
1606  for(int i=0; i<numPids; i++) {
1607  int pid = hpIDList[i];
1608 
1609  //re-sort the atom list of this patch
1610  std::sort(hpAtomsList[i].begin(), hpAtomsList[i].end());
1611  Node::Object()->workDistrib->fillAtomListForOnePatch(pid, hpAtomsList[i]);
1612  patchMgr->createHomePatch(pid, hpAtomsList[i]);
1613  }
1614 
1615  hpIDList.clear();
1616  delete [] hpAtomsList;
1617 
1618  hpAtomsList = NULL;
1619 #endif
1620 }
1621 
1623 {
1624 #ifdef MEM_OPT_VERSION
1625  molecule->delAtomNames();
1626  molecule->delChargeSpace();
1627 
1628  //???TODO NOT SURE WHETHER freeEnergyOn is support in MEM_OPT_VERSION
1629  //-CHAOMEI
1630  if(!CkMyPe() && !simParameters->freeEnergyOn)
1631  molecule->delMassSpace();
1632 
1633  molecule->delFixedAtoms();
1634 #endif
1635 }
1636 
1637 //The initial distribution of atoms:
1638 //(avgNum+1),...,(avgNum+1),avgNum,...,avgNum where
1639 //avgNum equals to the total number of atoms in the system
1640 //divided by number of input processors --Chao Mei
1641 int ParallelIOMgr::numMyAtoms(int rank, int numProcs)
1642 {
1643  if(rank==-1) return -1;
1644  int avgNum = molecule->numAtoms/numProcs;
1645  int remainder = molecule->numAtoms%numProcs;
1646  if(rank<remainder) return avgNum+1;
1647  else return avgNum;
1648 }
1649 
1650 int ParallelIOMgr::atomRank(int atomID, int numProcs)
1651 {
1652  int avgNum = molecule->numAtoms/numProcs;
1653  int remainder = molecule->numAtoms%numProcs;
1654  int midLimit = remainder*(avgNum+1);
1655  int idx;
1656  if(atomID<midLimit) {
1657  idx = atomID/(avgNum+1);
1658  } else {
1659  idx = remainder+(atomID-midLimit)/avgNum;
1660  }
1661  return idx;
1662 }
1663 
1664 void ParallelIOMgr::getMyAtomsRange(int &lowerIdx, int &upperIdx, int rank, int numProcs)
1665 {
1666  if(rank==-1) {
1667  //just to make sure upperIdx-lowerIdx+1 == -1
1668  lowerIdx=-1;
1669  upperIdx=-3;
1670  return;
1671  }
1672  int avgNum = molecule->numAtoms/numProcs;
1673  int remainder = molecule->numAtoms%numProcs;
1674  if(rank<remainder) {
1675  lowerIdx = rank*(avgNum+1);
1676  upperIdx = lowerIdx+avgNum;
1677  } else {
1678  int midLimit = remainder*(avgNum+1);
1679  lowerIdx = midLimit+(rank-remainder)*avgNum;
1680  upperIdx = lowerIdx+avgNum-1;
1681  }
1682 }
1683 
1684 int ParallelIOMgr::calcMyOutputProxyClients() {
1685  PatchMap *patchMap = PatchMap::Object();
1686  int myOutputProxyClients = 0;
1687  int myset = myOutputProxyRank / numOutputProcs;
1688  for(int i=0; i<CkNumPes(); ++i) {
1689  if ( (i*numProxiesPerOutputProc)/CkNumPes() == myset &&
1691  ++myOutputProxyClients;
1692  }
1693  }
1694  return myOutputProxyClients;
1695 }
1696 
1698 {
1699 #ifdef MEM_OPT_VERSION
1700  if ( myOutputRank != -1 ) {
1701  int ready = midCM->receivePositions(msg);
1702  if(ready) {
1703  CProxy_CollectionMaster cm(mainMaster);
1704  cm.receiveOutputPosReady(msg->seq);
1705  }
1706  delete msg;
1707  } else if ( myOutputProxyRank != -1 ) {
1708  if ( ! myOutputProxyPositions ) {
1709  myOutputProxyPositions = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1710  }
1711  CollectVectorVarMsg *newmsg = myOutputProxyPositions->submitData(msg);
1712  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receivePositions(newmsg);
1713  delete msg;
1714  } else {
1715  NAMD_bug("ParallelIOMgr::receivePositions on bad pe");
1716  }
1717 #endif
1718 }
1719 
1721 {
1722 #ifdef MEM_OPT_VERSION
1723  if ( myOutputRank != -1 ) {
1724  int ready = midCM->receiveVelocities(msg);
1725  if(ready) {
1726  CProxy_CollectionMaster cm(mainMaster);
1727  cm.receiveOutputVelReady(msg->seq);
1728  }
1729  delete msg;
1730  } else if ( myOutputProxyRank != -1 ) {
1731  if ( ! myOutputProxyVelocities ) {
1732  myOutputProxyVelocities = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1733  }
1734  CollectVectorVarMsg *newmsg = myOutputProxyVelocities->submitData(msg);
1735  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveVelocities(newmsg);
1736  delete msg;
1737  } else {
1738  NAMD_bug("ParallelIOMgr::receiveVelocities on bad pe");
1739  }
1740 #endif
1741 }
1742 
1744 {
1745 #ifdef MEM_OPT_VERSION
1746  if ( myOutputRank != -1 ) {
1747  int ready = midCM->receiveForces(msg);
1748  if(ready) {
1749  CProxy_CollectionMaster cm(mainMaster);
1750  cm.receiveOutputForceReady(msg->seq);
1751  }
1752  delete msg;
1753  } else if ( myOutputProxyRank != -1 ) {
1754  if ( ! myOutputProxyForces ) {
1755  myOutputProxyForces = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1756  }
1757  CollectVectorVarMsg *newmsg = myOutputProxyForces->submitData(msg);
1758  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveForces(newmsg);
1759  delete msg;
1760  } else {
1761  NAMD_bug("ParallelIOMgr::receiveForces on bad pe");
1762  }
1763 #endif
1764 }
1765 
1766 
1767 void ParallelIOMgr::disposePositions(int seq, double prevT)
1768 {
1769 #ifdef MEM_OPT_VERSION
1770  double iotime = CmiWallTimer();
1771  midCM->disposePositions(seq);
1772  iotime = CmiWallTimer()-iotime+prevT;
1773 
1774 #if OUTPUT_SINGLE_FILE
1775  //Token-based file output
1776  if(myOutputRank == getMyOutputGroupHighestRank()) {
1777  //notify the CollectionMaster to start the next round
1778  CProxy_CollectionMaster cm(mainMaster);
1779  cm.startNextRoundOutputPos(iotime);
1780  } else {
1781  CProxy_ParallelIOMgr io(thisgroup);
1782  io[outputProcArray[myOutputRank+1]].disposePositions(seq, iotime);
1783  }
1784 #else
1785  //notify the CollectionMaster to start the next round
1786  CProxy_CollectionMaster cm(mainMaster);
1787  cm.startNextRoundOutputPos(iotime);
1788 #endif
1789 
1790 #endif
1791 }
1792 
1793 void ParallelIOMgr::disposeVelocities(int seq, double prevT)
1794 {
1795 #ifdef MEM_OPT_VERSION
1796  double iotime = CmiWallTimer();
1797  midCM->disposeVelocities(seq);
1798  iotime = CmiWallTimer()-iotime+prevT;
1799 
1800 #if OUTPUT_SINGLE_FILE
1801  //Token-based file output
1802  if(myOutputRank==getMyOutputGroupHighestRank()) {
1803  //notify the CollectionMaster to start the next round
1804  CProxy_CollectionMaster cm(mainMaster);
1805  cm.startNextRoundOutputVel(iotime);
1806  } else {
1807  CProxy_ParallelIOMgr io(thisgroup);
1808  io[outputProcArray[myOutputRank+1]].disposeVelocities(seq, iotime);
1809  }
1810 #else
1811  //notify the CollectionMaster to start the next round
1812  CProxy_CollectionMaster cm(mainMaster);
1813  cm.startNextRoundOutputVel(iotime);
1814 #endif
1815 
1816 #endif
1817 }
1818 
1819 void ParallelIOMgr::disposeForces(int seq, double prevT)
1820 {
1821 #ifdef MEM_OPT_VERSION
1822  double iotime = CmiWallTimer();
1823  midCM->disposeForces(seq);
1824  iotime = CmiWallTimer()-iotime+prevT;
1825 
1826 #if OUTPUT_SINGLE_FILE
1827  //Token-based file output
1828  if(myOutputRank==getMyOutputGroupHighestRank()) {
1829  //notify the CollectionMaster to start the next round
1830  CProxy_CollectionMaster cm(mainMaster);
1831  cm.startNextRoundOutputForce(iotime);
1832  } else {
1833  CProxy_ParallelIOMgr io(thisgroup);
1834  io[outputProcArray[myOutputRank+1]].disposeForces(seq, iotime);
1835  }
1836 #else
1837  //notify the CollectionMaster to start the next round
1838  CProxy_CollectionMaster cm(mainMaster);
1839  cm.startNextRoundOutputForce(iotime);
1840 #endif
1841 
1842 #endif
1843 }
1844 
1845 
1847 {
1848 #ifdef MEM_OPT_VERSION
1849  coorInstance = midCM->getReadyPositions(seq);
1850 
1851  coorInstance->lattice = lat; //record the lattice to use for wrapAll/Water!
1852  int fromAtomID = coorInstance->fromAtomID;
1853  int toAtomID = coorInstance->toAtomID;
1854 
1855  //only reference copies
1856  ResizeArray<Vector> &data = coorInstance->data;
1857  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
1858  //if both data and fdata are not empty, they contain exact values, the only
1859  //difference lies in their precisions. Therefore, we only need to compute
1860  //the higher precision coordinate array. -Chao Mei
1861  int dsize = data.size();
1862  int numMyAtoms = toAtomID-fromAtomID+1;
1863  tmpCoorCon = new Vector[numMyAtoms];
1864  ClusterCoorElem one;
1865  //1. compute wrapped coordinates locally
1866  for(int i=0; i<numMyAtoms; i++){
1867  tmpCoorCon[i] = 0.0;
1868  int cid = clusterID[i];
1869  if(cid<fromAtomID){
1870  //on output procs ahead of me
1871  one.clusterId = cid;
1872  ClusterCoorElem *ret = remoteCoors.find(one);
1873  if(ret==NULL){
1874  if(dsize==0)
1875  one.dsum = fdata[i];
1876  else
1877  one.dsum = data[i];
1878 
1879  remoteCoors.add(one);
1880  }else{
1881  if(dsize==0)
1882  ret->dsum += fdata[i];
1883  else
1884  ret->dsum += data[i];
1885  }
1886  }else{
1887  if(dsize==0)
1888  tmpCoorCon[cid-fromAtomID] += fdata[i];
1889  else
1890  tmpCoorCon[cid-fromAtomID] += data[i];
1891  }
1892  }
1893 
1894  //2. Prepare to send msgs to remote output procs to reduce coordinates
1895  //values of a cluster
1896  CmiAssert(numRemoteClusters == remoteCoors.size());
1897  numCSMAck = 0; //set to 0 to prepare recving the final coor update
1898  CProxy_ParallelIOMgr pIO(thisgroup);
1899  ClusterCoorSetIter iter(remoteCoors);
1900  for(iter=iter.begin(); iter!=iter.end(); iter++){
1901  ClusterCoorMsg *msg = new ClusterCoorMsg;
1902  msg->srcRank = myOutputRank;
1903  msg->clusterId = iter->clusterId;
1904  msg->dsum = iter->dsum;
1905  int dstRank = atomRankOnOutput(iter->clusterId);
1906  pIO[outputProcArray[dstRank]].recvClusterCoor(msg);
1907  }
1908 
1909  //Just send a local NULL msg to indicate the local wrapping
1910  //coordinates has finished.
1911  recvClusterCoor(NULL);
1912 #endif
1913 }
1914 
1915 //On the output proc, it's possible (could be a rare case) that recvClusterCoor
1916 //is executed before wrapCoor, so we need to make sure the local tmpCoorCon has
1917 //been calculated. This is why (numRemoteReqs+1) msgs are expected as the
1918 //additional one is sent by itself when it finishes wrapping coordinates.
1919 // --Chao Mei
1921  //only add the msg from remote procs
1922  if(msg!=NULL) ccmBuf.add(msg);
1923 
1924  //include a msg sent by itself
1925  if(++numReqRecved == (numRemoteReqs+1)){
1926  numReqRecved = 0;
1927  integrateClusterCoor();
1928  }
1929 }
1930 
1931 void ParallelIOMgr::integrateClusterCoor(){
1932 #ifdef MEM_OPT_VERSION
1933  int fromIdx = coorInstance->fromAtomID;
1934  int toIdx = coorInstance->toAtomID;
1935  for(int i=0; i<ccmBuf.size(); i++){
1936  ClusterCoorMsg *msg = ccmBuf[i];
1937  int lidx = msg->clusterId - fromIdx;
1938  tmpCoorCon[lidx] += msg->dsum;
1939  }
1940 
1941  //send back those msgs
1942  CProxy_ParallelIOMgr pIO(thisgroup);
1943  for(int i=0; i<ccmBuf.size(); i++){
1944  ClusterCoorMsg *msg = ccmBuf[i];
1945  int lidx = msg->clusterId - fromIdx;
1946  if(simParameters->wrapAll || isWater[lidx]) {
1947  Lattice *lat = &(coorInstance->lattice);
1948  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1949  msg->dsum = (simParameters->wrapNearest ?
1950  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
1951  }else{
1952  msg->dsum = 0.0;
1953  }
1954  pIO[outputProcArray[msg->srcRank]].recvFinalClusterCoor(msg);
1955  }
1956  ccmBuf.resize(0);
1957 
1958  //It's possible that recvFinalClusterCoor is executed before integrateClusterCoor
1959  //on this processor (the other proc executes faster in doing wrapCoor). So avoid
1960  //this msg race, do send a message to itself to participate the reduction.
1961  if(numRemoteClusters!=0){
1962  recvFinalClusterCoor(NULL);
1963  } else {
1964  //this output proc is ready has the sum of coordinates of each cluster
1965  //on it, so it is ready to do the final wrap coor computation
1966  int numMyAtoms = toIdx-fromIdx+1;
1967  ResizeArray<Vector> &data = coorInstance->data;
1968  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
1969  for(int i=0; i<numMyAtoms; i++){
1970  if(!simParameters->wrapAll && !isWater[i]) continue;
1971  int lidx = clusterID[i]-fromIdx;
1972  if(lidx==i){
1973  //the head atom of the cluster
1974  Lattice *lat = &(coorInstance->lattice);
1975  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1976  tmpCoorCon[lidx] = (simParameters->wrapNearest ?
1977  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
1978  }
1979  if(data.size()) data[i] += tmpCoorCon[lidx];
1980  //no operator += (FloatVector, Vector)
1981  if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
1982  }
1983 
1984  delete [] tmpCoorCon;
1985  tmpCoorCon = NULL;
1986  CProxy_CollectionMaster cm(mainMaster);
1987  cm.wrapCoorFinished();
1988  }
1989 #endif
1990 }
1991 
1993 #ifdef MEM_OPT_VERSION
1994  if(msg!=NULL){
1995  //only process the message sent from other procs!
1996  ClusterCoorElem one(msg->clusterId);
1997  ClusterCoorElem *ret = remoteCoors.find(one);
1998  ret->dsum = msg->dsum;
1999  delete msg;
2000  }
2001 
2002  if(++numCSMAck == (numRemoteClusters+1)){
2003  //final wrap coor computation
2004  int fromIdx = coorInstance->fromAtomID;
2005  int toIdx = coorInstance->toAtomID;
2006  int numMyAtoms = toIdx-fromIdx+1;
2007  ResizeArray<Vector> &data = coorInstance->data;
2008  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
2009  ClusterCoorElem tmp;
2010  for(int i=0; i<numMyAtoms; i++){
2011  if(!simParameters->wrapAll && !isWater[i]) continue;
2012  int cid = clusterID[i];
2013  int lidx = cid-fromIdx;
2014  if(lidx<0){
2015  //this cid should be inside remoteCoors
2016  tmp.clusterId = cid;
2017  ClusterCoorElem *fone = remoteCoors.find(tmp);
2018  if(data.size()) data[i] += fone->dsum;
2019  if(fdata.size()) fdata[i] = fdata[i] + fone->dsum;
2020  }else{
2021  if(lidx==i){
2022  Lattice *lat = &(coorInstance->lattice);
2023  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
2024  tmpCoorCon[lidx] = (simParameters->wrapNearest ?
2025  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
2026  }
2027  if(data.size()) data[i] += tmpCoorCon[lidx];
2028  if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
2029  }
2030  }
2031 
2032  delete [] tmpCoorCon;
2033  tmpCoorCon = NULL;
2034  CProxy_CollectionMaster cm(mainMaster);
2035  cm.wrapCoorFinished();
2036  numCSMAck = 0;
2037  remoteCoors.clear();
2038  }
2039 #endif
2040 }
2041 #include "ParallelIOMgr.def.h"
static Node * Object()
Definition: Node.h:86
int64 numCalcFullExclusions
Definition: ParallelIOMgr.h:49
static CollectionMgr * Object()
Definition: CollectionMgr.h:30
unsigned short * fixedAtomsCntList
Definition: ParallelIOMgr.h:86
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
void sendAtomsToHomePatchProcs()
int size(void) const
Definition: ResizeArray.h:131
void flipNum(char *elem, int elemSize, int numElems)
Definition: CompressPsf.C:406
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
#define COMPRESSED_PSF_MAGICNUM
Definition: CompressPsf.h:13
void NAMD_err(const char *err_msg)
Definition: common.C:170
void initTmpPatchAtomsList()
Definition: PatchMap.h:223
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:117
void recvClusterSize(ClusterSizeMsg *msg)
int numCalcCrossterms
Definition: ParallelIOMgr.h:46
static PatchMap * Object()
Definition: PatchMap.h:27
ResizeArray< CollectProxyVectorInstance * > data
TupleSignature * improperSigs
Definition: structures.h:338
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
void integrateClusterSize()
CollectVectorVarMsg * buildMsg()
Definition: ParallelIOMgr.C:78
void recvAtomsMGrp(MoveInputAtomsMsg *msg)
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
TupleSignature * dihedralSigs
Definition: structures.h:337
int32_t int32
Definition: common.h:38
void readPerAtomInfo()
PUPbytes(Lattice)
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
InputAtom * atomList
Definition: ParallelIOMgr.h:76
Position position
Definition: NamdTypes.h:77
void disposePositions(int seq, double prevT)
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
int64 numCalcExclusions
Definition: ParallelIOMgr.h:48
TupleSignature * crosstermSigs
Definition: structures.h:339
#define iout
Definition: InfoStream.h:51
void migrateAtomsMGrp()
void clear()
Definition: ResizeArray.h:91
int add(const Elem &elem)
Definition: ResizeArray.h:101
int numRigidBonds
Definition: ParallelIOMgr.h:53
void wrapCoor(int seq, Lattice lat)
int numCalcDihedrals
Definition: ParallelIOMgr.h:42
UniqueSetIter< T > begin(void) const
Definition: UniqueSetIter.h:55
void receiveForces(CollectVectorVarMsg *msg)
CollectVectorVarMsg::DataStatus vstatus
void recvAtomsToHomePatchProcs(MovePatchAtomsMsg *msg)
void integrateMigratedAtoms()
void reorder(Elem *a, int n)
Definition: Random.h:231
CkChareID getMasterChareID()
Definition: CollectionMgr.h:47
int numDihedrals
Definition: ParallelIOMgr.h:41
TupleSignature * gromacsPairSigs
Definition: structures.h:341
void disposeForces(int seq, double prevT)
Definition: Random.h:37
int numPatches(void) const
Definition: PatchMap.h:59
BigReal totalCharge
Definition: ParallelIOMgr.h:60
void recvClusterCoor(ClusterCoorMsg *msg)
void updateMolInfo()
int append(CollectVectorVarMsg *msg)
Definition: ParallelIOMgr.C:56
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int16 isMP
Definition: NamdTypes.h:246
void call_sendAtomsToHomePatchProcs(void *arg)
void recvAtomsCntPerPatch(AtomsCntPerPatchMsg *msg)
bool isValid
Definition: NamdTypes.h:244
int64 numExclusions
Definition: ParallelIOMgr.h:47
BigReal totalMass
Definition: ParallelIOMgr.h:57
void recvHydroBasedCounter(HydroBasedMsg *msg)
FullAtom * allAtoms
Definition: ParallelIOMgr.h:97
void recvFinalClusterCoor(ClusterCoorMsg *msg)
std::vector< int > * getTmpPatchAtomsList()
Definition: PatchMap.h:233
Vector Position
Definition: NamdTypes.h:24
uint8 hydrogenGroupSize
Definition: NamdTypes.h:88
TupleSignature * bondSigs
Definition: structures.h:335
int numCalcImpropers
Definition: ParallelIOMgr.h:44
void NAMD_die(const char *err_msg)
Definition: common.C:147
int numCrossterms
Definition: ParallelIOMgr.h:45
UniqueSetIter< T > end(void) const
Definition: UniqueSetIter.h:64
int numCalcLJPairs
Definition: ParallelIOMgr.h:51
void ackAtomsToHomePatchProcs()
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
void initialize(Node *node)
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:118
Vector Velocity
Definition: NamdTypes.h:27
Real rigidBondLength
Definition: NamdTypes.h:221
WorkDistrib * workDistrib
Definition: Node.h:169
iterator begin(void)
Definition: ResizeArray.h:36
int numFixedRigidBonds
Definition: ParallelIOMgr.h:68
void recvFinalClusterSize(ClusterSizeMsg *msg)
ResizeArray< FloatVector > fdata
NAMD_HOST_DEVICE Vector wrap_nearest_delta(Position pos1) const
Definition: Lattice.h:233
void disposeVelocities(int seq, double prevT)
#define PDBVELFACTOR
Definition: common.h:57
void calcAtomsInEachPatch()
unsigned short * atomsCntList
Definition: ParallelIOMgr.h:85
bool isOutputProcessor(int pe)
int numCalcBonds
Definition: ParallelIOMgr.h:38
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
ResizeArray< Vector > data
int16 isGP
Definition: NamdTypes.h:245
void delTmpPatchAtomsList()
Definition: PatchMap.h:226
void bcastHydroBasedCounter(HydroBasedMsg *msg)
int node(int pid) const
Definition: PatchMap.h:114
Vector totalMV
Definition: ParallelIOMgr.h:58
void receiveVelocities(CollectVectorVarMsg *msg)
int numImpropers
Definition: ParallelIOMgr.h:43
void recvMolInfo(MolInfoMsg *msg)
int isOutputProcessor(int pe)
uint32 atomFixed
Definition: NamdTypes.h:158
int64_t int64
Definition: common.h:39
int gromacsPairCnt
Definition: structures.h:333
NAMD_HOST_DEVICE Vector wrap_delta(const Position &pos1) const
Definition: Lattice.h:222
Molecule * molecule
Definition: Node.h:179
void bcastMolInfo(MolInfoMsg *msg)
TupleSignature * angleSigs
Definition: structures.h:336
void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients)
Definition: ParallelIOMgr.C:45
CollectVectorVarMsg * submitData(CollectVectorVarMsg *msg)
void createHomePatches()
double BigReal
Definition: common.h:123
void receivePositions(CollectVectorVarMsg *msg)
HashPool< AtomSigInfo > atomSigPool
Definition: CompressPsf.C:313
int numCalcAngles
Definition: ParallelIOMgr.h:40