NAMD
ParallelIOMgr.C
Go to the documentation of this file.
1 
2 #include "largefiles.h" // must be first!
3 
4 #include <stdio.h>
5 #include "BOCgroup.h"
6 #include "Molecule.h"
7 #include "Node.h"
8 #include "Node.decl.h"
9 #include "NamdState.h"
10 #include "WorkDistrib.h"
11 #include "PDB.h"
12 #include "PatchMap.h"
13 #include "PatchMap.inl"
14 #include "packmsg.h"
15 #include "HomePatch.h"
16 #include "InfoStream.h"
17 #include "CollectionMaster.h"
18 #include "CollectionMgr.h"
19 
20 #include "ParallelIOMgr.decl.h"
21 #include "ParallelIOMgr.h"
22 
23 #include "Output.h"
24 #include "Random.h"
25 
26 #include <algorithm>
27 using namespace std;
28 
29 
30 // data structures for IO proxy, all destined for same IO processor
31 
33  public:
34 
35  CollectProxyVectorInstance() : seq(-10) { ; }
36 
37  void free() { seq = -10; }
38  int notfree() { return ( seq != -10 ); }
39 
40  void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients) {
41  if ( s == -10 ) NAMD_bug("seq == free in CollectionMgr");
42  seq = s;
43  vstatus = v;
44  remaining = numClients;
45  aid.resize(0);
46  data.resize(0);
47  fdata.resize(0);
48  }
49 
50  // true -> send it and delete it!
52  {
53  if ( msg->status != vstatus ) {
54  NAMD_bug("CollectProxyVectorInstance vstatus mismatch");
55  }
56  if ( msg->seq != seq ) {
57  NAMD_bug("CollectProxyVectorInstance seq mismatch");
58  }
59  int size = msg->size;
60  for( int i = 0; i < size; ++i ) { aid.add(msg->aid[i]); }
61  if ( vstatus == CollectVectorVarMsg::VectorValid ||
62  vstatus == CollectVectorVarMsg::BothValid ) {
63  for( int i = 0; i < size; ++i ) { data.add(msg->data[i]); }
64  }
65  if ( vstatus == CollectVectorVarMsg::FloatVectorValid ||
66  vstatus == CollectVectorVarMsg::BothValid ) {
67  for( int i = 0; i < size; ++i ) { fdata.add(msg->fdata[i]); }
68  }
69  const int atoms_per_message_target = 100000;
70  return ( ! --remaining || aid.size() > atoms_per_message_target );
71  }
72 
74  int numAtoms = aid.size();
76  if ( ! numAtoms ) {
77  msg = 0;
78  } else if ( vstatus == CollectVectorVarMsg::VectorValid) {
79  msg = new(numAtoms, numAtoms, 0, 0) CollectVectorVarMsg;
80  for(int j=0; j<numAtoms; j++) {
81  msg->aid[j] = aid[j];
82  msg->data[j] = data[j];
83  }
84  } else if (vstatus == CollectVectorVarMsg::FloatVectorValid) {
85  msg = new(numAtoms, 0, numAtoms, 0) CollectVectorVarMsg;
86  for(int j=0; j<numAtoms; j++) {
87  msg->aid[j] = aid[j];
88  msg->fdata[j] = fdata[j];
89  }
90  } else {
91  msg = new(numAtoms, numAtoms, numAtoms, 0) CollectVectorVarMsg;
92  for(int j=0; j<numAtoms; j++) {
93  msg->aid[j] = aid[j];
94  msg->data[j] = data[j];
95  msg->fdata[j] = fdata[j];
96  }
97  }
98  if ( msg ) {
99  msg->seq = seq;
100  msg->size = numAtoms;
101  msg->status = vstatus;
102  }
103  if ( remaining ) reset(seq,vstatus,remaining);
104  else free();
105  return msg;
106  }
107 
108  int seq;
113 
114  private:
115  int remaining;
116 
117  };
118 
120  public:
121  CollectProxyVectorSequence(int nc) : numClients(nc) { ; }
122 
124  CollectProxyVectorInstance **c = data.begin();
125  CollectProxyVectorInstance **c_e = data.end();
126  for( ; c != c_e && (*c)->seq != msg->seq; ++c );
127  if ( c == c_e ) {
128  c = data.begin();
129  for( ; c != c_e && (*c)->notfree(); ++c );
130  if ( c == c_e ) {
131  data.add(new CollectProxyVectorInstance);
132  c = data.end() - 1;
133  }
134  (*c)->reset(msg->seq,msg->status,numClients);
135  }
136  if ( (*c)->append(msg) ) {
137  CollectVectorVarMsg *newmsg = (*c)->buildMsg();
138  return newmsg;
139  } else {
140  return 0;
141  }
142  }
143 
145 
146  private:
147  int numClients;
148 
149  };
150 
151 
153 {
154  CkpvAccess(BOCclass_group).ioMgr = thisgroup;
155 
156  numInputProcs=-1;
157  inputProcArray = NULL;
158  numOutputProcs=-1;
159  outputProcArray = NULL;
160 
161  procsReceived=0;
162  hydroMsgRecved=0;
163 
164  totalMV.x = totalMV.y = totalMV.z = 0.0;
165  totalMass = 0.0;
166  totalCharge = 0.0;
167  numTotalExclusions = 0;
168  numCalcExclusions = 0;
169  numCalcFullExclusions = 0;
170 
171  isOKToRecvHPAtoms = false;
172  hpAtomsList = NULL;
173 
174  clusterID = NULL;
175  clusterSize = NULL;
176 
177 #ifdef MEM_OPT_VERSION
178  midCM = NULL;
179 #endif
180 
181  isWater = NULL;
182 
183  numCSMAck = 0;
184  numReqRecved = 0;
185 
186  sendAtomsThread = 0;
187 
188 #if COLLECT_PERFORMANCE_DATA
189  numFixedAtomLookup = 0;
190 #endif
191 }
192 
194 {
195  delete [] inputProcArray;
196  delete [] outputProcArray;
197  delete [] clusterID;
198  delete [] clusterSize;
199 
200 #ifdef MEM_OPT_VERSION
201  delete midCM;
202 #endif
203 
204  delete [] isWater;
205 }
206 
207 #ifndef OUTPUT_SINGLE_FILE
208 #error OUTPUT_SINGLE_FILE not defined!
209 #endif
210 
211 // initialization needed for the parallel IO manager. Entry point through Scripttcl
213 {
214  simParameters = node->simParameters;
215  molecule = node->molecule;
216 
217  numInputProcs = simParameters->numinputprocs;
218  numOutputProcs = simParameters->numoutputprocs;
219  numOutputWrts = simParameters->numoutputwrts;
220 
221  numProxiesPerOutputProc = std::min((int)sqrt(CkNumPes()),(CkNumPes()-1)/numOutputProcs-1);
222  if ( numProxiesPerOutputProc < 2 ) numProxiesPerOutputProc = 0;
223 
224  if(!CkMyPe()) {
225  iout << iINFO << "Running with " <<numInputProcs<<" input processors.\n"<<endi;
226  #if OUTPUT_SINGLE_FILE
227  iout << iINFO << "Running with " <<numOutputProcs<<" output processors ("<<numOutputWrts<<" of them will output simultaneously).\n"<<endi;
228  #else
229  iout << iINFO << "Running with " <<numOutputProcs<<" output processors, and each of them will output to its own separate file.\n"<<endi;
230  #endif
231  if ( numProxiesPerOutputProc ) {
232  iout << iINFO << "Running with " <<numProxiesPerOutputProc<<" proxies per output processor.\n"<<endi;
233  }
234  }
235 
236  //build inputProcArray
237  {
238  inputProcArray = new int[numInputProcs];
239  myInputRank = -1;
240  for(int i=0; i<numInputProcs; ++i) {
241  inputProcArray[i] = WorkDistrib::peDiffuseOrdering[(1+numOutputProcs+i)%CkNumPes()];
242  }
243  std::sort(inputProcArray, inputProcArray+numInputProcs);
244  for(int i=0; i<numInputProcs; ++i) {
245  if ( CkMyPe() == inputProcArray[i] ) {
246  if ( myInputRank != -1 ) NAMD_bug("Duplicate input proc");
247  myInputRank = i;
248  }
249  }
250 
251  if(!CkMyPe()) {
252  iout << iINFO << "INPUT PROC LOCATIONS:";
253  int i;
254  for ( i=0; i<numInputProcs && i < 10; ++i ) {
255  iout << " " << inputProcArray[i];
256  }
257  if ( i<numInputProcs ) iout << " ... " << inputProcArray[numInputProcs-1];
258  iout << "\n" << endi;
259  }
260  }
261 
262  if(myInputRank!=-1) {
263  //NOTE: this could further be optimized by pre-allocate the memory
264  //for incoming atoms --Chao Mei
265  int numMyAtoms = numInitMyAtomsOnInput();
266  initAtoms.resize(numMyAtoms+100); // extra space for orphan hydrogens
267  initAtoms.resize(numMyAtoms);
268  tmpRecvAtoms.resize(0);
269  } else {
270  initAtoms.resize(0);
271  tmpRecvAtoms.resize(0);
272  }
273  hpIDList.resize(0);
274 
275  //build outputProcArray
276  //spread the output processors across all the processors
277  {
278  outputProcArray = new int[numOutputProcs];
279  outputProcFlags = new char[CkNumPes()];
280  outputProxyArray = new int[numOutputProcs*numProxiesPerOutputProc];
281  myOutputProxies = new int[numOutputProcs];
282  myOutputRank = -1;
283  myOutputProxyRank = -1;
284  for(int i=0; i<numOutputProcs; ++i) {
285  outputProcArray[i] = WorkDistrib::peDiffuseOrdering[(1+i)%CkNumPes()];
286  }
287  std::sort(outputProcArray, outputProcArray+numOutputProcs);
288  for(int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
289  outputProxyArray[i] = WorkDistrib::peDiffuseOrdering[(1+numOutputProcs+i)%CkNumPes()];
290  }
291  std::sort(outputProxyArray, outputProxyArray+numOutputProcs*numProxiesPerOutputProc,
293  for(int i=0; i<CkNumPes(); ++i) {
294  outputProcFlags[i] = 0;
295  }
296  for(int i=0; i<numOutputProcs; ++i) {
297  outputProcFlags[outputProcArray[i]] = 1;
298  if ( CkMyPe() == outputProcArray[i] ) {
299  if ( myOutputRank != -1 ) NAMD_bug("Duplicate output proc");
300  myOutputRank = i;
301  }
302  }
303  for(int i=0; i<numOutputProcs*numProxiesPerOutputProc; ++i) {
304  if ( CkMyPe() == outputProxyArray[i] ) {
305  if ( myOutputRank != -1 ) NAMD_bug("Output proxy is also output proc");
306  if ( myOutputProxyRank != -1 ) NAMD_bug("Duplicate output proxy");
307  myOutputProxyRank = i;
308  }
309  }
310  int myProxySet = (WorkDistrib::peCompactOrderingIndex[CkMyPe()]*numProxiesPerOutputProc)/CkNumPes();
311  for(int i=0; i<numOutputProcs; ++i) {
312  if ( numProxiesPerOutputProc ) {
313  myOutputProxies[i] = outputProxyArray[myProxySet*numOutputProcs+i];
314  } else {
315  myOutputProxies[i] = outputProcArray[i];
316  }
317  }
318 
319  // delay building sequences until after PatchMap is initialized
320  myOutputProxyPositions = 0;
321  myOutputProxyVelocities = 0;
322  myOutputProxyForces = 0;
323 
324  if(!CkMyPe()) {
325  iout << iINFO << "OUTPUT PROC LOCATIONS:";
326  int i;
327  for ( i=0; i<numOutputProcs && i < 10; ++i ) {
328  iout << " " << outputProcArray[i];
329  }
330  if ( i<numOutputProcs ) iout << " ... " << outputProcArray[numOutputProcs-1];
331  iout << "\n" << endi;
332  }
333  }
334 
335 #ifdef MEM_OPT_VERSION
336  if(myOutputRank!=-1) {
337  midCM = new CollectionMidMaster(this);
338  }
339  remoteClusters.clear();
340  csmBuf.resize(0);
341  remoteCoors.clear();
342  ccmBuf.resize(0);
343 
344  mainMaster = CollectionMgr::Object()->getMasterChareID();
345 #endif
346 }
347 
349  return outputProcFlags[pe];
350 }
351 
352 int isOutputProcessor(int pe){
353  return CProxy_ParallelIOMgr::ckLocalBranch(CkpvAccess(BOCclass_group).ioMgr)->isOutputProcessor(pe);
354 }
355 
356 
357 
359 {
360 #ifdef MEM_OPT_VERSION
361  if(myInputRank!=-1) {
362  int myAtomLIdx, myAtomUIdx;
363  getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
364 
365  //1. read the file that contains per-atom info such as signature index
366  molecule->read_binary_atom_info(myAtomLIdx, myAtomUIdx, initAtoms);
367 
368  //2. read coordinates and velocities of each atom if the velocity file
369  //exists, otherwise, the velocity of each atom is randomly generated.
370  //This has to be DONE AFTER THE FIRST STEP as the atom mass is required
371  //if the velocity is generated randomly.
372  readCoordinatesAndVelocity();
373 
374  //3. set every atom's output processor rank, i.e. the dest pe this
375  //atom will be sent for writing positions and velocities etc.
376  int oRank=atomRankOnOutput(myAtomLIdx);
377  for(int i=oRank; i<numOutputProcs; i++) {
378  int lIdx, uIdx; //indicates the range of atom ids outputProcArray[i] has
379  getAtomsRangeOnOutput(lIdx, uIdx, i);
380  if(lIdx > myAtomUIdx) break;
381  int fid = lIdx>myAtomLIdx?lIdx:myAtomLIdx;
382  int tid = uIdx>myAtomUIdx?myAtomUIdx:uIdx;
383  for(int j=fid; j<=tid; j++) initAtoms[j-myAtomLIdx].outputRank = i;
384  }
385  }
386 
387  //read clusters
388  if(myOutputRank!=-1) {
389  //only when wrapAll or wrapWater is set, cluster info is required
390  if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
391  readInfoForParOutput();
392  }
393 #endif
394 }
395 
396 void ParallelIOMgr::readCoordinatesAndVelocity()
397 {
398 #ifdef MEM_OPT_VERSION
399  int needFlip = 0;
400  int myAtomLIdx, myAtomUIdx;
401  getMyAtomsInitRangeOnInput(myAtomLIdx, myAtomUIdx);
402  int myNumAtoms = myAtomUIdx-myAtomLIdx+1;
403 
404  //contains the data for Position and Velocity
405  Vector *tmpData = new Vector[myNumAtoms];
406 
407  //begin to read coordinates
408  //step1: open the file
409  FILE *ifp = fopen(simParameters->binCoorFile, "rb");
410  if(!ifp) {
411  char s[256];
412  sprintf(s, "The binary coordinate file %s cannot be opened on proc %d\n", simParameters->binCoorFile, CkMyPe());
413  NAMD_err(s);
414  }
415  //step2: check whether flip is needed
416  int filelen;
417  fread(&filelen, sizeof(int32),1,ifp);
418  char lenbuf[sizeof(int32)];
419  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
420  flipNum(lenbuf, sizeof(int32), 1);
421  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
422  iout << iWARN << "Number of atoms in binary file " << simParameters->binCoorFile
423  <<" is palindromic, assuming same endian.\n" << endi;
424  }
425  if(filelen!=molecule->numAtoms) {
426  needFlip = 1;
427  memcpy((void *)&filelen, lenbuf,sizeof(int32));
428  }
429  if(filelen!=molecule->numAtoms) {
430  char s[256];
431  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binCoorFile);
432  NAMD_die(s);
433  }
434  //step3: read the file specified by the range
435  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Position);
436 #ifdef WIN32
437  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
438 #else
439  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
440 #endif
441  {
442  char s[256];
443  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
444  NAMD_err(s);
445  }
446  size_t totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
447  if(totalRead!=myNumAtoms) {
448  char s[256];
449  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binCoorFile, CkMyPe());
450  NAMD_err(s);
451  }
452  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
453  fclose(ifp);
454  for(int i=0; i<myNumAtoms; i++) initAtoms[i].position = tmpData[i];
455 
456  //begin to read velocity
457  //step1: generate velocity randomly or open the file
458  if(!simParameters->binVelFile) {
459  //generate velocity randomly
460  Node::Object()->workDistrib->random_velocities_parallel(simParameters->initialTemp, initAtoms);
461  } else {
462  ifp = fopen(simParameters->binVelFile, "rb");
463  if(!ifp) {
464  char s[256];
465  sprintf(s, "The binary velocity file %s cannot be opened on proc %d\n", simParameters->binVelFile, CkMyPe());
466  NAMD_err(s);
467  }
468  //step2: check whether flip is needed
469  fread(&filelen, sizeof(int32),1,ifp);
470  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
471  flipNum(lenbuf, sizeof(int32), 1);
472  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
473  iout << iWARN << "Number of atoms in binary file " << simParameters->binVelFile
474  <<" is palindromic, assuming same endian.\n" << endi;
475  }
476  if(filelen!=molecule->numAtoms) {
477  needFlip = 1;
478  memcpy((void *)&filelen, lenbuf,sizeof(int32));
479  }
480  if(filelen!=molecule->numAtoms) {
481  char s[256];
482  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binVelFile);
483  NAMD_die(s);
484  }
485 
486  //step3: read the file specified by the range
487  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Velocity);
488 #ifdef WIN32
489  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
490 #else
491  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
492 #endif
493  {
494  char s[256];
495  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
496  NAMD_err(s);
497  }
498  totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
499  if(totalRead!=myNumAtoms) {
500  char s[256];
501  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binVelFile, CkMyPe());
502  NAMD_err(s);
503  }
504  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
505  fclose(ifp);
506  for(int i=0; i<myNumAtoms; i++) initAtoms[i].velocity = tmpData[i];
507  }
508 
509  //begin to read reference coordinates
510  //step1: use initial positions or open the file
511  if(!simParameters->binRefFile) {
512  for(int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = initAtoms[i].position;
513  } else {
514  ifp = fopen(simParameters->binRefFile, "rb");
515  if(!ifp) {
516  char s[256];
517  sprintf(s, "The binary reference coordinate file %s cannot be opened on proc %d\n", simParameters->binRefFile, CkMyPe());
518  NAMD_err(s);
519  }
520  //step2: check whether flip is needed
521  fread(&filelen, sizeof(int32),1,ifp);
522  memcpy(lenbuf, (const char *)&filelen, sizeof(int32));
523  flipNum(lenbuf, sizeof(int32), 1);
524  if(!memcmp(lenbuf, (const char *)&filelen, sizeof(int32))) {
525  iout << iWARN << "Number of atoms in binary file " << simParameters->binRefFile
526  <<" is palindromic, assuming same endian.\n" << endi;
527  }
528  if(filelen!=molecule->numAtoms) {
529  needFlip = 1;
530  memcpy((void *)&filelen, lenbuf,sizeof(int32));
531  }
532  if(filelen!=molecule->numAtoms) {
533  char s[256];
534  sprintf(s, "Incorrect atom count in binary file %s", simParameters->binRefFile);
535  NAMD_die(s);
536  }
537 
538  //step3: read the file specified by the range
539  int64 offsetPos = ((int64)myAtomLIdx)*sizeof(Position);
540 #ifdef WIN32
541  if ( _fseeki64(ifp, offsetPos, SEEK_CUR) )
542 #else
543  if ( fseeko(ifp, offsetPos, SEEK_CUR) )
544 #endif
545  {
546  char s[256];
547  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
548  NAMD_err(s);
549  }
550  totalRead = fread(tmpData, sizeof(Vector), myNumAtoms, ifp);
551  if(totalRead!=myNumAtoms) {
552  char s[256];
553  sprintf(s, "Error in reading binary file %s on proc %d", simParameters->binRefFile, CkMyPe());
554  NAMD_err(s);
555  }
556  if(needFlip) flipNum((char *)tmpData, sizeof(BigReal), myNumAtoms*3);
557  fclose(ifp);
558  for(int i=0; i<myNumAtoms; i++) initAtoms[i].fixedPosition = tmpData[i];
559  }
560 
561  delete [] tmpData;
562 #endif
563 }
564 
565 void ParallelIOMgr::readInfoForParOutput()
566 {
567  int fromIdx, toIdx; //atoms' range
568  getMyAtomsRangeOnOutput(fromIdx,toIdx);
569  int numMyAtoms = toIdx-fromIdx+1;
570 
571  clusterID = new int[numMyAtoms];
572  clusterSize = new int[numMyAtoms];
573 
574  //Since the input proc also reads this file, all the checks
575  //(file version check, atom record size check) are omitted here
576  FILE *ifp = fopen(simParameters->binAtomFile, "rb");
577  //read magic number to set needFlip
578  int needFlip = 0;
579  int magicNum;
580  fread(&magicNum, sizeof(int), 1, ifp);
581  if (magicNum!=COMPRESSED_PSF_MAGICNUM) {
582  needFlip = 1;
583  }
584 
585  //need to load isWater info
586  isWater = new char[numMyAtoms];
587  //seek from the end of the file (note offset is negative!)
588  int64 offset = sizeof(char)*((int64)(fromIdx-molecule->numAtoms));
589 #ifdef WIN32
590  if ( _fseeki64(ifp, offset, SEEK_END) )
591 #else
592  if ( fseeko(ifp, offset, SEEK_END) )
593 #endif
594  {
595  char s[256];
596  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
597  NAMD_err(s);
598  }
599  fread(isWater, sizeof(char), numMyAtoms, ifp);
600  //there's no need for flipping as it's a char array
601 
602  //seek from the end of the file (note offset is negative!)
603  offset = sizeof(int)*((int64)(fromIdx-molecule->numAtoms))
604  - sizeof(char)*((int64)(molecule->numAtoms));
605 #ifdef WIN32
606  if ( _fseeki64(ifp, offset, SEEK_END) )
607 #else
608  if ( fseeko(ifp, offset, SEEK_END) )
609 #endif
610  {
611  char s[256];
612  sprintf(s, "Error in seeking binary file %s on proc %d", simParameters->binAtomFile, CkMyPe());
613  NAMD_err(s);
614  }
615  fread(clusterID, sizeof(int), numMyAtoms, ifp);
616  if(needFlip) flipNum((char *)clusterID, sizeof(int), numMyAtoms);
617  fclose(ifp);
618 
619  //calculate cluster size (communication may be neccessary)
620  ClusterElem one;
621  for(int i=0; i<numMyAtoms; i++) {
622  clusterSize[i] = 0;
623  int cid = clusterID[i];
624  //check if the cluster id (i.e. the header of the atom of
625  //the cluster) is in my local repository. If the cluster id
626  //is not in my local repository, then it must appear in output
627  //processors that are in front of this one because of the way
628  //of determing cluster info for the molecular system.
629  CmiAssert(cid<=toIdx);
630  if(cid<fromIdx) {
631  //on output procs ahead of me
632  one.clusterId = cid;
633  ClusterElem *ret = remoteClusters.find(one);
634  if(ret==NULL) {
635  one.atomsCnt = 1;
636  remoteClusters.add(one);
637  } else {
638  ret->atomsCnt++;
639  }
640  } else {
641  int lidx = cid-fromIdx;
642  CmiAssert(lidx<=i);
643  clusterSize[lidx]++;
644  }
645  }
646 
647  //Prepare to send msgs to remote output procs to reduce the cluster size
648  //Since the expected number of msgs to be very small, msgs to the same proc
649  //are not aggregated. --Chao Mei
650 #if 0
651  printf("output[%d]=%d: prepare to send %d remote msgs for cluster size\n",
652  myOutputRank, CkMyPe(), remoteClusters.size());
653 #endif
654 
655  numRemoteClusters = remoteClusters.size();
656  numCSMAck = 0; //set to 0 to prepare recving the final cluster size update
657  CProxy_ParallelIOMgr pIO(thisgroup);
658  ClusterSetIter iter(remoteClusters);
659  for(iter=iter.begin(); iter!=iter.end(); iter++) {
660  ClusterSizeMsg *msg = new ClusterSizeMsg;
661  msg->srcRank = myOutputRank;
662  msg->clusterId = iter->clusterId;
663  msg->atomsCnt = iter->atomsCnt;
664  int dstRank = atomRankOnOutput(iter->clusterId);
665  pIO[outputProcArray[dstRank]].recvClusterSize(msg);
666  }
667 }
668 
670 {
671  csmBuf.add(msg); //added to buffer for reuse to send back to src
672 
673  //update cluster size has to be delayed to integration to prevent
674  //data racing where the clusterSize has not been created!
675 }
676 
678 {
679  if(myOutputRank==-1) return;
680  if(!(simParameters->wrapAll || simParameters->wrapWater)) return;
681 
682  int fromIdx, toIdx; //atoms' range
683  getMyAtomsRangeOnOutput(fromIdx,toIdx);
684 
685  //calculated the final cluster size
686  for(int i=0; i<csmBuf.size(); i++) {
687  ClusterSizeMsg *msg = csmBuf[i];
688  int lidx = msg->clusterId - fromIdx;
689  clusterSize[lidx] += msg->atomsCnt;
690  }
691 
692  CProxy_ParallelIOMgr pIO(thisgroup);
693  for(int i=0; i<csmBuf.size(); i++) {
694  ClusterSizeMsg *msg = csmBuf[i];
695  int lidx = msg->clusterId - fromIdx;
696  msg->atomsCnt = clusterSize[lidx];
697  pIO[outputProcArray[msg->srcRank]].recvFinalClusterSize(msg);
698  }
699  numRemoteReqs = csmBuf.size();
700  csmBuf.resize(0);
701 
702  //There's a possible msg race problem here that recvFinalClusterSize
703  //executes before integrateClusterSize because other proc finishes faster
704  //in calculating the cluster size. The recvFinalClusterSize should be
705  //executed after integrateClusterSize. To avoid this, a self message is
706  //sent to participate the reduction.
707  if(numRemoteClusters!=0){
708  recvFinalClusterSize(NULL);
709  }else{
710  //this output proc already has the final cluster size for each atom
711  int numMyAtoms = toIdx-fromIdx+1;
712  for(int i=0; i<numMyAtoms; i++) {
713  int lidx = clusterID[i]-fromIdx;
714  clusterSize[i] = clusterSize[lidx];
715  }
716 
717  #if 0 //write out cluster debug info
718  char fname[128];
719  sprintf(fname, "cluster.par.%d", CkMyPe());
720  FILE *ofp = fopen(fname, "w");
721  for(int i=0; i<numMyAtoms; i++) {
722  fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
723  }
724  fclose(ofp);
725  #endif
726  }
727 }
728 
730 {
731  //only process the message sent by other procs
732  if(msg!=NULL) {
733  //indicating a message from other procs
734  ClusterElem one(msg->clusterId);
735  ClusterElem *ret = remoteClusters.find(one);
736  CmiAssert(ret!=NULL);
737  ret->atomsCnt = msg->atomsCnt;
738  }
739  delete msg;
740 
741  //include a msg sent by itself for reduction
742  if(++numCSMAck == (numRemoteClusters+1)) {
743  //recved all the msgs needed to update the cluster size for each atom finally
744  int fromIdx, toIdx; //atoms' range
745  getMyAtomsRangeOnOutput(fromIdx,toIdx);
746  int numMyAtoms = toIdx-fromIdx+1;
747  ClusterElem tmp;
748  for(int i=0; i<numMyAtoms; i++) {
749  int cid = clusterID[i];
750  int lidx = cid-fromIdx;
751  if(lidx<0) {
752  //this cid should be inside remoteClusters
753  tmp.clusterId = cid;
754  ClusterElem *fone = remoteClusters.find(tmp);
755  clusterSize[i] = fone->atomsCnt;
756  } else {
757  clusterSize[i] = clusterSize[lidx];
758  }
759  }
760  numCSMAck = 0;
761  remoteClusters.clear();
762 
763 #if 0 //write out cluster debug info
764  char fname[128];
765  sprintf(fname, "cluster.par.%d", CkMyPe());
766  FILE *ofp = fopen(fname, "w");
767  for(int i=0; i<numMyAtoms; i++) {
768  fprintf(ofp, "%d: %d: %d\n", i+fromIdx, clusterID[i], clusterSize[i]);
769  }
770  fclose(ofp);
771 #endif
772 
773  }
774 }
775 
777 {
778  if(myInputRank==-1) return;
779 
780  //1. first get the list of atoms to be migrated
781  //which should be few compared with the number of atoms
782  //initially assigned to this input proc.
783  AtomIDList toMigrateList; //the list of atoms to be migrated
784  //the max distance from this processor of atoms to be sent
785  int maxOffset = 0;
786  for(int i=0; i<initAtoms.size(); i++) {
787  //returns the proc id on which atom MPID resides on
788  int parentRank = atomInitRankOnInput(initAtoms[i].MPID);
789  if(parentRank != myInputRank) {
790  toMigrateList.add(i);
791  initAtoms[i].isValid = false;
792  int tmp = parentRank - myInputRank;
793  tmp = tmp>0 ? tmp : -tmp;
794  if(tmp > maxOffset) maxOffset = tmp;
795  }
796  }
797 
798  //2. prepare atom migration messages
799  //the messages are indexed as [-maxOffset,..., -1,0,1,..., maxOffset]
800  //where the "0" is not used at all. It is added for the sake of
801  //computing the index easily.
802  InputAtomList *migLists = new InputAtomList[2*maxOffset+1];
803  for(int i=0; i<toMigrateList.size(); i++) {
804  int idx = toMigrateList[i];
805  int parentRank = atomInitRankOnInput(initAtoms[idx].MPID);
806  //decide which migList to put this atom
807  int offset = parentRank - myInputRank + maxOffset;
808  migLists[offset].add(initAtoms[idx]);
809  }
810 
811  CProxy_ParallelIOMgr pIO(thisgroup);
812  for(int i=0; i<2*maxOffset+1; i++) {
813  int migLen = migLists[i].size();
814  if(migLen>0) {
815  MoveInputAtomsMsg *msg = new (migLen, 0)MoveInputAtomsMsg;
816  msg->length = migLen;
817  memcpy(msg->atomList, migLists[i].begin(), sizeof(InputAtom)*migLen);
818  int destRank = i-maxOffset+myInputRank;
819  pIO[inputProcArray[destRank]].recvAtomsMGrp(msg);
820  migLists[i].clear();
821  }
822  }
823 
824  toMigrateList.clear();
825  delete [] migLists;
826 }
827 
829 {
830  for(int i=0; i<msg->length; i++) {
831  tmpRecvAtoms.add((msg->atomList)[i]);
832  }
833  delete msg;
834 }
835 
837 {
838  if(myInputRank==-1) return;
839 
840  for(int i=0; i<tmpRecvAtoms.size(); i++) {
841  tmpRecvAtoms[i].isValid = true;
842  initAtoms.add(tmpRecvAtoms[i]);
843  }
844  tmpRecvAtoms.clear();
845 
846  //sort atom list based on hydrogenList value
847  std::sort(initAtoms.begin(), initAtoms.end());
848 
849  //now compute the counters inside Molecule such as numFixedRigidBonds
850  //which is based on the hydrogen group info
851 
852  int numFixedRigidBonds = 0;
853  if(molecule->numRigidBonds){
854  int parentIsFixed = 0;
855  for(int i=0; i<initAtoms.size(); i++) {
856  InputAtom *one = &(initAtoms[i]);
857  if(!one->isValid) continue;
858  if(one->isGP) {
859  parentIsFixed = one->atomFixed;
860  InputAtom *a1 = &(initAtoms[i+1]);
861  InputAtom *a2 = &(initAtoms[i+2]);
862  if((one->rigidBondLength>0.0) &&
863  a1->atomFixed && a2->atomFixed) {
864  numFixedRigidBonds++;
865  }
866  }else{
867  if((one->rigidBondLength>0.0) &&
868  one->atomFixed && parentIsFixed) {
869  numFixedRigidBonds++;
870  }
871  }
872  }
873  }
874 
875  int numFixedGroups = 0;
876  if(molecule->numFixedAtoms){
877  for(int i=0; i<initAtoms.size();) {
878  InputAtom *one = &(initAtoms[i]);
879  if(!one->isValid){
880  i++;
881  continue;
882  }
883  if(one->isGP) {
884  int allFixed = 1;
885  for(int j=0; j<one->hydrogenGroupSize; j++){
886  InputAtom *a1 = &(initAtoms[i+j]);
887  allFixed = allFixed & a1->atomFixed;
888  if(!allFixed) break;
889  }
890  if(allFixed) numFixedGroups++;
891  i += one->hydrogenGroupSize;
892  }
893  }
894  }
895 
896  CProxy_ParallelIOMgr pIO(thisgroup);
897  HydroBasedMsg *msg = new HydroBasedMsg;
898  msg->numFixedGroups = numFixedGroups;
899  msg->numFixedRigidBonds = numFixedRigidBonds;
900  pIO[0].recvHydroBasedCounter(msg);
901 }
902 
904 {
905 #ifdef MEM_OPT_VERSION
906  if(myInputRank==-1) return;
907 
908  CProxy_ParallelIOMgr pIO(thisgroup);
909 
910  MolInfoMsg *msg = new MolInfoMsg;
911  msg->numBonds = msg->numCalcBonds = 0;
912  msg->numAngles = msg->numCalcAngles = 0;
913  msg->numDihedrals = msg->numCalcDihedrals = 0;
914  msg->numImpropers = msg->numCalcImpropers = 0;
915  msg->numCrossterms = msg->numCalcCrossterms = 0;
916  msg->numExclusions = msg->numCalcExclusions = 0;
917  int64 numFullExclusions = msg->numCalcFullExclusions = 0;
918  // JLai
919  msg->numLJPairs = msg->numCalcLJPairs = 0;
920  // End of JLai
921  msg->numRigidBonds = 0;
922  msg->totalMass = 0.0;
923  msg->totalCharge = 0.0;
924 
925  //calculate the tuples this input processor have
926  AtomSignature *atomSigPool = molecule->atomSigPool;
927  ExclusionSignature *exclSigPool = molecule->exclSigPool;
928  for(int i=0; i<initAtoms.size(); i++) {
929  AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
930  msg->numBonds += thisSig->bondCnt;
931  msg->numAngles += thisSig->angleCnt;
932  msg->numDihedrals += thisSig->dihedralCnt;
933  msg->numImpropers += thisSig->improperCnt;
934  msg->numCrossterms += thisSig->crosstermCnt;
935  // JLai
936  msg->numLJPairs += thisSig->gromacsPairCnt;
937  // End of JLai
938 
939  ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
940  msg->numExclusions += (exclSig->fullExclCnt + exclSig->modExclCnt);
941  numFullExclusions += exclSig->fullExclCnt;
942 
943  if(initAtoms[i].rigidBondLength > 0.0) msg->numRigidBonds++;
944 
945  msg->totalMass += initAtoms[i].mass;
946  msg->totalCharge += initAtoms[i].charge;
947  }
948 
949  //deal with numCalc* which is related with fixed atoms!
950  if(molecule->numFixedAtoms>0 && ! simParameters->fixedAtomsForces) {
951  //if there's fixed atoms, calcExclusions needs to be calculated
952  //Since it's possible the atom inside the this exclusion set is on
953  //another input processor, we have to resort to the global fixed atoms
954  //info inside the Molecule object. The number of such accesses should
955  //be very small! --Chao Mei
956  int sAId = initAtoms[0].id;
957  int remoteCnt=0; //stats info
958  for(int i=0; i<initAtoms.size(); i++) {
959  //When all the atoms in the set are fixed, the elem (Bond etc.)
960  //is not counted as a calc*.
961  int myAId = initAtoms[i].id;
962  AtomSignature *thisSig = &atomSigPool[initAtoms[i].sigId];
963  ExclusionSignature *exclSig = &exclSigPool[initAtoms[i].exclId];
964  if(!initAtoms[i].atomFixed) {
965  msg->numCalcBonds += thisSig->bondCnt;
966  msg->numCalcAngles += thisSig->angleCnt;
967  msg->numCalcDihedrals += thisSig->dihedralCnt;
968  msg->numCalcImpropers += thisSig->improperCnt;
969  msg->numCalcCrossterms += thisSig->crosstermCnt;
970  msg->numCalcExclusions+=(exclSig->fullExclCnt+exclSig->modExclCnt);
971  msg->numCalcFullExclusions+=(exclSig->fullExclCnt);
972  continue;
973  }
974 
975  //1. Bonds
976  for(int j=0; j<thisSig->bondCnt; j++) {
977  TupleSignature *bsig = &(thisSig->bondSigs[j]);
978  int a1 = myAId + bsig->offset[0];
979  if(!isAtomFixed(sAId, a1)) msg->numCalcBonds++;
980  }
981 
982  //2. Angles
983  for(int j=0; j<thisSig->angleCnt; j++) {
984  TupleSignature *bsig = &(thisSig->angleSigs[j]);
985  int a1 = myAId + bsig->offset[0];
986  int a2 = myAId + bsig->offset[1];
987  if(!isAtomFixed(sAId, a1) || !isAtomFixed(sAId, a2))
988  msg->numCalcAngles++;
989  }
990 
991  //3. Dihedrals
992  for(int j=0; j<thisSig->dihedralCnt; j++) {
993  TupleSignature *bsig = &(thisSig->dihedralSigs[j]);
994  int a1 = myAId + bsig->offset[0];
995  int a2 = myAId + bsig->offset[1];
996  int a3 = myAId + bsig->offset[2];
997  if(!isAtomFixed(sAId, a1) ||
998  !isAtomFixed(sAId, a2) ||
999  !isAtomFixed(sAId, a3))
1000  msg->numCalcDihedrals++;
1001  }
1002 
1003  //4. Impropers
1004  for(int j=0; j<thisSig->improperCnt; j++) {
1005  TupleSignature *bsig = &(thisSig->improperSigs[j]);
1006  int a1 = myAId + bsig->offset[0];
1007  int a2 = myAId + bsig->offset[1];
1008  int a3 = myAId + bsig->offset[2];
1009  if(!isAtomFixed(sAId, a1) ||
1010  !isAtomFixed(sAId, a2) ||
1011  !isAtomFixed(sAId, a3))
1012  msg->numCalcImpropers++;
1013  }
1014 
1015  //5. Crossterms
1016  for(int j=0; j<thisSig->crosstermCnt; j++) {
1017  TupleSignature *bsig = &(thisSig->crosstermSigs[j]);
1018  int a1 = myAId + bsig->offset[0];
1019  int a2 = myAId + bsig->offset[1];
1020  int a3 = myAId + bsig->offset[2];
1021  int a4 = myAId + bsig->offset[3];
1022  int a5 = myAId + bsig->offset[4];
1023  int a6 = myAId + bsig->offset[5];
1024  int a7 = myAId + bsig->offset[6];
1025 
1026  if(!isAtomFixed(sAId, a1) ||
1027  !isAtomFixed(sAId, a2) ||
1028  !isAtomFixed(sAId, a3) ||
1029  !isAtomFixed(sAId, a4) ||
1030  !isAtomFixed(sAId, a5) ||
1031  !isAtomFixed(sAId, a6) ||
1032  !isAtomFixed(sAId, a7))
1033  msg->numCalcDihedrals++;
1034  }
1035 
1036  //6: Exclusions
1037  //this atom is fixed, check atoms in the exclusion set
1038  for(int j=0; j<exclSig->fullExclCnt; j++) {
1039  int thisAId = exclSig->fullOffset[j]+myAId;
1040  if(!isAtomFixed(sAId, thisAId)) { msg->numCalcExclusions++; msg->numCalcFullExclusions++; }
1041  }
1042  for(int j=0; j<exclSig->modExclCnt; j++) {
1043  int thisAId = exclSig->modOffset[j]+myAId;
1044  if(!isAtomFixed(sAId, thisAId)) msg->numCalcExclusions++;
1045  }
1046 
1047  //7: GromacsPair
1048  for(int j=0; j<thisSig->gromacsPairCnt; j++) {
1049  TupleSignature *bsig = &(thisSig->gromacsPairSigs[j]);
1050  int a1 = myAId + bsig->offset[0];
1051  int a2 = myAId + bsig->offset[1];
1052  if(!isAtomFixed(sAId, a1) ||
1053  !isAtomFixed(sAId, a2))
1054  msg->numCalcLJPairs++;
1055  }
1056  }
1057 #if COLLECT_PERFORMANCE_DATA
1058  printf("Num fixedAtom lookup on proc %d is %d\n", CkMyPe(), numFixedAtomLookup);
1059 #endif
1060  } else {
1061  //no fixed atoms, numCalc* is same with numExclusions
1062  msg->numCalcBonds = msg->numBonds;
1063  msg->numCalcAngles = msg->numAngles;
1064  msg->numCalcDihedrals = msg->numDihedrals;
1065  msg->numCalcImpropers = msg->numImpropers;
1066  msg->numCalcCrossterms = msg->numCrossterms;
1067  msg->numCalcExclusions = msg->numExclusions;
1068  msg->numCalcFullExclusions = numFullExclusions;
1069  }
1070 
1071 
1072  if(!simParameters->comMove) {
1073  //to remove the center of mass motion from a molecule.
1074  //first calculate the values on every input proc, then reduce.
1075  //For more info, refer to WorkDistrib::remove_com_motion
1076  //-Chao Mei
1077  (msg->totalMV).x = 0.0;
1078  (msg->totalMV).y = 0.0;
1079  (msg->totalMV).z = 0.0;
1080  for (int i=0; i<initAtoms.size(); i++) {
1081  msg->totalMV += initAtoms[i].mass * initAtoms[i].velocity;
1082  }
1083  }
1084 
1085  //always send to the master processor (proc 0)
1086  pIO[0].recvMolInfo(msg);
1087 #endif
1088 }
1089 
1090 //only executed on proc 0
1092 {
1093  molecule->numBonds += msg->numBonds;
1094  molecule->numCalcBonds += msg->numCalcBonds;
1095  molecule->numAngles += msg->numAngles;
1096  molecule->numCalcAngles += msg->numCalcAngles;
1097  molecule->numDihedrals += msg->numDihedrals;
1098  molecule->numCalcDihedrals += msg->numCalcDihedrals;
1099  molecule->numImpropers += msg->numImpropers;
1100  molecule->numCalcImpropers += msg->numCalcImpropers;
1101  molecule->numCrossterms += msg->numCrossterms;
1102  molecule->numCalcCrossterms += msg->numCalcCrossterms;
1103  numTotalExclusions += msg->numExclusions;
1104  numCalcExclusions += msg->numCalcExclusions;
1105  numCalcFullExclusions += msg->numCalcFullExclusions;
1106  molecule->numRigidBonds += msg->numRigidBonds;
1107 
1108  totalMass += msg->totalMass;
1109  totalCharge += msg->totalCharge;
1110 
1111  if(!simParameters->comMove) {
1112  totalMV += msg->totalMV;
1113  }
1114 
1115  if(++procsReceived == numInputProcs) {
1116  //received all the counters
1117  msg->numBonds = molecule->numBonds;
1118  msg->numCalcBonds = molecule->numCalcBonds;
1119  msg->numAngles = molecule->numAngles;
1120  msg->numCalcAngles = molecule->numCalcAngles;
1121  msg->numDihedrals = molecule->numDihedrals;
1122  msg->numCalcDihedrals = molecule->numCalcDihedrals;
1123  msg->numImpropers = molecule->numImpropers;
1124  msg->numCalcImpropers = molecule->numCalcImpropers;
1125  msg->numCrossterms = molecule->numCrossterms;
1126  msg->numCalcCrossterms = molecule->numCalcCrossterms;
1127  msg->numExclusions = numTotalExclusions/2;
1128  msg->numCalcExclusions = numCalcExclusions/2;
1129  msg->numCalcFullExclusions = numCalcFullExclusions/2;
1130  msg->numRigidBonds = molecule->numRigidBonds;
1131 
1132  msg->totalMass = totalMass;
1133  msg->totalCharge = totalCharge;
1134 
1135  if(!simParameters->comMove) {
1136  msg->totalMV = totalMV;
1137  }
1138 
1139  CProxy_ParallelIOMgr pIO(thisgroup);
1140  pIO.bcastMolInfo(msg);
1141 
1142  //reset to 0 for the next p2p-based reduction on input procs
1143  procsReceived = 0;
1144  } else delete msg;
1145 }
1146 
1148 {
1149 #ifdef MEM_OPT_VERSION
1150  if(myInputRank!=-1) {
1151  if(!simParameters->comMove) {
1152  //needs to remove the center of mass motion from a molecule
1153  Vector val = msg->totalMV / msg->totalMass;
1154  for (int i=0; i<initAtoms.size(); i++) initAtoms[i].velocity -= val;
1155  }
1156  }
1157 
1158  //only the rank 0 in the SMP node update the Molecule object
1159  if(CmiMyRank()) {
1160  delete msg;
1161  return;
1162  }
1163 
1164  molecule->numBonds = msg->numBonds;
1165  molecule->numCalcBonds = msg->numCalcBonds;
1166  molecule->numAngles = msg->numAngles;
1167  molecule->numCalcAngles = msg->numCalcAngles;
1168  molecule->numDihedrals = msg->numDihedrals;
1169  molecule->numCalcDihedrals = msg->numCalcDihedrals;
1170  molecule->numImpropers = msg->numImpropers;
1171  molecule->numCalcImpropers = msg->numCalcImpropers;
1172  molecule->numCrossterms = msg->numCrossterms;
1173  molecule->numCalcCrossterms = msg->numCalcCrossterms;
1174 
1175  molecule->numTotalExclusions = msg->numExclusions;
1176  molecule->numCalcExclusions = msg->numCalcExclusions;
1177  molecule->numCalcFullExclusions = msg->numCalcFullExclusions;
1178 
1179  molecule->numRigidBonds = msg->numRigidBonds;
1180  delete msg;
1181 
1182  if(!CkMyPe()) {
1183  iout << iINFO << "LOADED " << molecule->numTotalExclusions << " TOTAL EXCLUSIONS\n" << endi;
1184  if(!simParameters->comMove) {
1185  iout << iINFO << "REMOVING COM VELOCITY "
1186  << (PDBVELFACTOR * (msg->totalMV / msg->totalMass))<< "\n" <<endi;
1187  }
1188  }
1189 #endif
1190 }
1191 
1192 //only called on PE0
1194  molecule->numFixedRigidBonds += msg->numFixedRigidBonds;
1195  molecule->numFixedGroups += msg->numFixedGroups;
1196 
1197  if(++hydroMsgRecved == numInputProcs){
1198  msg->numFixedRigidBonds = molecule->numFixedRigidBonds;
1199  msg->numFixedGroups = molecule->numFixedGroups;
1200  CProxy_ParallelIOMgr pIO(thisgroup);
1201  pIO.bcastHydroBasedCounter(msg);
1202  hydroMsgRecved = 0;
1203  }else delete msg;
1204 }
1205 
1207 #ifdef MEM_OPT_VERSION
1208  //only the rank 0 in the SMP node update the Molecule object
1209  if(CmiMyRank()) {
1210  delete msg;
1211  return;
1212  }
1213  molecule->numFixedRigidBonds = msg->numFixedRigidBonds;
1214  molecule->numFixedGroups = msg->numFixedGroups;
1215  delete msg;
1216 
1217  if(!CkMyPe()) {
1218  iout << iINFO << "****************************\n";
1219  iout << iINFO << "STRUCTURE SUMMARY:\n";
1220  iout << iINFO << molecule->numAtoms << " ATOMS\n";
1221  iout << iINFO << molecule->numBonds << " BONDS\n";
1222  iout << iINFO << molecule->numAngles << " ANGLES\n";
1223  iout << iINFO << molecule->numDihedrals << " DIHEDRALS\n";
1224  iout << iINFO << molecule->numImpropers << " IMPROPERS\n";
1225  iout << iINFO << molecule->numCrossterms << " CROSSTERMS\n";
1226  iout << iINFO << molecule->numExclusions << " EXCLUSIONS\n";
1227 
1228  //****** BEGIN CHARMM/XPLOR type changes
1229  if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeXplorOn)){
1230  iout << iINFO << molecule->numMultipleDihedrals
1231  << " DIHEDRALS WITH MULTIPLE PERIODICITY (BASED ON PSF FILE)\n";
1232  }
1233  if ((molecule->numMultipleDihedrals) && (simParameters->paraTypeCharmmOn)){
1234  iout << iINFO << molecule->numMultipleDihedrals
1235  << " DIHEDRALS WITH MULTIPLE PERIODICITY IGNORED (BASED ON PSF FILE) \n";
1236  iout << iINFO
1237  << " CHARMM MULTIPLICITIES BASED ON PARAMETER FILE INFO! \n";
1238  }
1239  //****** END CHARMM/XPLOR type changes
1240 
1241  if (molecule->numMultipleImpropers){
1242  iout << iINFO << molecule->numMultipleImpropers
1243  << " IMPROPERS WITH MULTIPLE PERIODICITY\n";
1244  }
1245 
1246  if (simParameters->fixedAtomsOn)
1247  iout << iINFO << molecule->numFixedAtoms << " FIXED ATOMS\n";
1248 
1249 
1250  if (simParameters->rigidBonds)
1251  iout << iINFO << molecule->numRigidBonds << " RIGID BONDS\n";
1252 
1253  if (simParameters->fixedAtomsOn && simParameters->rigidBonds)
1254  iout << iINFO << molecule->numFixedRigidBonds <<
1255  " RIGID BONDS BETWEEN FIXED ATOMS\n";
1256 
1257  iout << iINFO << molecule->num_deg_freedom(1)
1258  << " DEGREES OF FREEDOM\n";
1259 
1260  iout << iINFO << molecule->numHydrogenGroups << " HYDROGEN GROUPS\n";
1261  iout << iINFO << molecule->maxHydrogenGroupSize
1262  << " ATOMS IN LARGEST HYDROGEN GROUP\n";
1263  iout << iINFO << molecule->numMigrationGroups << " MIGRATION GROUPS\n";
1264  iout << iINFO << molecule->maxMigrationGroupSize
1265  << " ATOMS IN LARGEST MIGRATION GROUP\n";
1266  if (simParameters->fixedAtomsOn)
1267  {
1268  iout << iINFO << molecule->numFixedGroups <<
1269  " HYDROGEN GROUPS WITH ALL ATOMS FIXED\n";
1270  }
1271 
1272  iout << iINFO << "TOTAL MASS = " << totalMass << " amu\n";
1273  iout << iINFO << "TOTAL CHARGE = " << totalCharge << " e\n";
1274 
1275  BigReal volume = simParameters->lattice.volume();
1276  if ( volume ) {
1277  iout << iINFO << "MASS DENSITY = "
1278  << ((totalMass/volume) / 0.6022) << " g/cm^3\n";
1279  iout << iINFO << "ATOM DENSITY = "
1280  << (molecule->numAtoms/volume) << " atoms/A^3\n";
1281  }
1282 
1283  iout << iINFO << "*****************************\n";
1284  iout << endi;
1285  fflush(stdout);
1286  }
1287 #endif
1288 }
1289 
1291 {
1292  if(myInputRank==-1) return;
1293 
1294  PatchMap *patchMap = PatchMap::Object();
1295  int numPatches = patchMap->numPatches();
1296 
1297  patchMap->initTmpPatchAtomsList();
1298 
1299  //each list contains the atom index to the initAtoms
1300  vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
1301 
1302  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1303  PatchMgr *patchMgr = pm.ckLocalBranch();
1304 
1305  int pid=0;
1306  const Lattice lattice = simParameters->lattice;
1307  for(int i=0; i<initAtoms.size(); i++) {
1308  InputAtom *atom = &(initAtoms[i]);
1309  if(!atom->isValid) continue;
1310  if(atom->isMP) {
1311  pid = patchMap->assignToPatch(atom->position, lattice);
1312  }
1313  eachPatchAtomList[pid].push_back(i);
1314  }
1315 
1316  CProxy_ParallelIOMgr pIO(thisgroup);
1317 
1318  int patchCnt = 0;
1319  for(int i=0; i<numPatches; i++) {
1320  int cursize = eachPatchAtomList[i].size();
1321  if(cursize>0) patchCnt++;
1322  }
1323 
1324  AtomsCntPerPatchMsg *msg = NULL;
1325  if(simParameters->fixedAtomsOn) {
1326  msg = new (patchCnt, patchCnt, patchCnt, 0)AtomsCntPerPatchMsg;
1327  } else {
1328  msg = new (patchCnt, patchCnt, 0, 0)AtomsCntPerPatchMsg;
1329  }
1330 
1331  msg->length = patchCnt;
1332  patchCnt = 0;
1333  for(int i=0; i<numPatches; i++) {
1334  int cursize = eachPatchAtomList[i].size();
1335  if(cursize>0) {
1336  if ( cursize > USHRT_MAX ) {
1337  char errstr[512];
1338  sprintf(errstr, "Patch %d exceeds %d atoms.", i, USHRT_MAX);
1339  NAMD_die(errstr);
1340  }
1341  msg->pidList[patchCnt] = i;
1342  msg->atomsCntList[patchCnt] = cursize;
1343  patchCnt++;
1344  }
1345  }
1346 
1347  if(simParameters->fixedAtomsOn) {
1348  patchCnt = 0;
1349  for(int i=0; i<numPatches; i++) {
1350  int cursize = eachPatchAtomList[i].size();
1351  if(cursize>0) {
1352  int fixedCnt = 0;
1353  for(int j=0; j<cursize; j++) {
1354  int aid = eachPatchAtomList[i][j];
1355  //atomFixed is either 0 or 1
1356  fixedCnt += initAtoms[aid].atomFixed;
1357  }
1358  msg->fixedAtomsCntList[patchCnt] = fixedCnt;
1359  patchCnt++;
1360  }
1361  }
1362  }
1363 
1364  pIO[0].recvAtomsCntPerPatch(msg);
1365 
1366 }
1367 
1369 {
1370 #ifdef MEM_OPT_VERSION
1371  PatchMap *patchMap = PatchMap::Object();
1372  for(int i=0; i<msg->length; i++) {
1373  int pid = msg->pidList[i];
1374  int oldNum = patchMap->numAtoms(pid);
1375  if ( oldNum + msg->atomsCntList[i] > USHRT_MAX ) {
1376  char errstr[512];
1377  sprintf(errstr, "Patch %d exceeds %d atoms.", pid, USHRT_MAX);
1378  NAMD_die(errstr);
1379  }
1380  patchMap->setNumAtoms(pid, oldNum+msg->atomsCntList[i]);
1381  if(simParameters->fixedAtomsOn) {
1382  oldNum = patchMap->numFixedAtoms(pid);
1383  patchMap->setNumFixedAtoms(pid, oldNum+msg->fixedAtomsCntList[i]);
1384  }
1385  }
1386  delete msg;
1387 
1388  if(++procsReceived == numInputProcs) {
1389  //print max PATCH info
1390  int maxAtoms = -1;
1391  int maxPatch = -1;
1392  int totalAtoms = 0;
1393  for(int i=0; i<patchMap->numPatches(); i++) {
1394  int cnt = patchMap->numAtoms(i);
1395  totalAtoms += cnt;
1396  if(cnt>maxAtoms) {
1397  maxAtoms = cnt;
1398  maxPatch = i;
1399  }
1400  }
1401  procsReceived = 0;
1402  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
1403  ") HAS " << maxAtoms << " ATOMS\n" << endi;
1404  if ( totalAtoms != Node::Object()->molecule->numAtoms ) {
1405  char errstr[512];
1406  sprintf(errstr, "Incorrect atom count in void ParallelIOMgr::recvAtomsCntPerPatch: %d vs %d", totalAtoms, Node::Object()->molecule->numAtoms);
1407  NAMD_die(errstr);
1408  }
1409  }
1410 #endif
1411 }
1412 
1414 {
1415  ((ParallelIOMgr*)arg)->sendAtomsToHomePatchProcs();
1416 }
1417 
1419 {
1420 #ifdef MEM_OPT_VERSION
1421  if(myInputRank==-1) return;
1422 
1423  if ( sendAtomsThread == 0 ) {
1424  sendAtomsThread = CthCreate((CthVoidFn)call_sendAtomsToHomePatchProcs,this,0);
1425  CthAwaken(sendAtomsThread);
1426  return;
1427  }
1428  sendAtomsThread = 0;
1429  numAcksOutstanding = 0;
1430 
1431  PatchMap *patchMap = PatchMap::Object();
1432  int numPatches = patchMap->numPatches();
1433  vector<int> *eachPatchAtomList = patchMap->getTmpPatchAtomsList();
1434 
1435  //each element (proc) contains the list of ids of patches which will stay
1436  //on that processor
1437  ResizeArray<int> *procList = new ResizeArray<int>[CkNumPes()];
1438  ResizeArray<int> pesToSend;
1439  for(int i=0; i<numPatches; i++) {
1440  if(eachPatchAtomList[i].size()==0) continue;
1441  int onPE = patchMap->node(i);
1442  if ( procList[onPE].size() == 0 ) pesToSend.add(onPE);
1443  procList[onPE].add(i);
1444  }
1445 
1446  Random(CkMyPe()).reorder(pesToSend.begin(),pesToSend.size());
1447  //CkPrintf("Pe %d ParallelIOMgr::sendAtomsToHomePatchProcs sending to %d pes\n",CkMyPe(),pesToSend.size());
1448 
1449  //go over every processor to send a message if necessary
1450  //TODO: Optimization for local home patches to save temp memory usage??? -CHAOMEI
1451  CProxy_ParallelIOMgr pIO(thisgroup);
1452  for(int k=0; k<pesToSend.size(); k++) {
1453  const int i = pesToSend[k];
1454  int len = procList[i].size();
1455  if(len==0) continue;
1456 
1457  // Sending one message per pe can result in very large messages
1458  // that break Converse so send one message per patch instead.
1459  for(int j=0; j<len; j++) {
1460  int pid = procList[i][j];
1461  int atomCnt = eachPatchAtomList[pid].size();
1462 
1463  if ( numAcksOutstanding >= 10 ) {
1464  sendAtomsThread = CthSelf();
1465  CthSuspend();
1466  }
1467  ++numAcksOutstanding;
1468 
1469  MovePatchAtomsMsg *msg = new (1, 1, atomCnt, 0)MovePatchAtomsMsg;
1470  msg->from = CkMyPe();
1471  msg->patchCnt = 1;
1472  int atomIdx = 0;
1473  msg->pidList[0] = pid;
1474  msg->sizeList[0] = atomCnt;
1475  for(int k=0; k<atomCnt; k++, atomIdx++) {
1476  int aid = eachPatchAtomList[pid][k];
1477  FullAtom one = initAtoms[aid];
1478  //HACK to re-sort the atom list after receiving the atom list on
1479  //home patch processor -Chao Mei
1480  one.hydVal = initAtoms[aid].hydList;
1481  msg->allAtoms[atomIdx] = one;
1482  }
1483  pIO[i].recvAtomsToHomePatchProcs(msg);
1484  }
1485 
1486  procList[i].clear();
1487  }
1488 
1489  //clean up to free space
1490  delete [] procList;
1491  patchMap->delTmpPatchAtomsList();
1492 
1493  //free the space occupied by the list that contains the input atoms
1494  initAtoms.clear();
1495 #endif
1496 }
1497 
1499 {
1500  --numAcksOutstanding;
1501  if ( sendAtomsThread ) {
1502  CthAwaken(sendAtomsThread);
1503  sendAtomsThread = 0;
1504  }
1505 }
1506 
1508 {
1509  CProxy_ParallelIOMgr pIO(thisgroup);
1510  pIO[msg->from].ackAtomsToHomePatchProcs();
1511 
1512  if(!isOKToRecvHPAtoms) {
1513  prepareHomePatchAtomList();
1514  isOKToRecvHPAtoms = true;
1515  }
1516 
1517  int numRecvPatches = msg->patchCnt;
1518  int aid = 0;
1519  for(int i=0; i<numRecvPatches; i++) {
1520  int pid = msg->pidList[i];
1521  int size = msg->sizeList[i];
1522  int idx = binaryFindHPID(pid);
1523  for(int j=0; j<size; j++, aid++) {
1524  hpAtomsList[idx].add(msg->allAtoms[aid]);
1525  }
1526  }
1527  //CkPrintf("Pe %d recvAtomsToHomePatchProcs for %d patches %d atoms\n",CkMyPe(),numRecvPatches,aid);
1528  delete msg;
1529 }
1530 
1531 void ParallelIOMgr::prepareHomePatchAtomList()
1532 {
1533  PatchMap *patchMap = PatchMap::Object();
1534  for(int i=0; i<patchMap->numPatches(); i++) {
1535  if(patchMap->node(i)==CkMyPe()) {
1536  hpIDList.add(i);
1537  }
1538  }
1539  if(hpIDList.size()>0)
1540  hpAtomsList = new FullAtomList[hpIDList.size()];
1541 }
1542 
1543 int ParallelIOMgr::binaryFindHPID(int pid)
1544 {
1545  //hpIDList should be in increasing order!!
1546  int lIdx, rIdx;
1547  int retIdx = -1;
1548 
1549  rIdx=0;
1550  lIdx=hpIDList.size()-1;
1551 
1552  while(rIdx<=lIdx ) {
1553  int idx = (rIdx+lIdx)/2;
1554  int curPid = hpIDList[idx];
1555  if(pid>curPid) {
1556  //in the left
1557  rIdx = idx+1;
1558  } else if(pid<curPid) {
1559  //in the right
1560  lIdx = idx-1;
1561  } else {
1562  //found!
1563  retIdx = idx;
1564  break;
1565  }
1566  }
1567  CmiAssert(retIdx!=-1);
1568  return retIdx;
1569 }
1570 
1572 {
1573 #ifdef MEM_OPT_VERSION
1574 
1575  int assignedPids = PatchMap::Object()->numPatchesOnNode(CkMyPe());
1576  int numPids = hpIDList.size();
1577  if(numPids==0){
1578  //this node actually contains no homepatches
1579  if(assignedPids == 0) return;
1580 
1581  //Entering the rare condition that all the homepatches this node has
1582  //are empty so that "recvAtomsToHomePatchProcs" is never called!
1583  //But we still need to create those empty homepatches!
1584  CmiAssert(isOKToRecvHPAtoms == false);
1585  PatchMap *patchMap = PatchMap::Object();
1586  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1587  PatchMgr *patchMgr = pm.ckLocalBranch();
1588  for(int i=0; i<patchMap->numPatches(); i++) {
1589  if(patchMap->node(i)==CkMyPe()) {
1590  FullAtomList emptyone;
1591  patchMgr->createHomePatch(i, emptyone);
1592  }
1593  }
1594  return;
1595  }
1596 
1597  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1598  PatchMgr *patchMgr = pm.ckLocalBranch();
1599 
1600  //go through the home patch list
1601  for(int i=0; i<numPids; i++) {
1602  int pid = hpIDList[i];
1603 
1604  //re-sort the atom list of this patch
1605  std::sort(hpAtomsList[i].begin(), hpAtomsList[i].end());
1606  Node::Object()->workDistrib->fillAtomListForOnePatch(pid, hpAtomsList[i]);
1607  patchMgr->createHomePatch(pid, hpAtomsList[i]);
1608  }
1609 
1610  hpIDList.clear();
1611  delete [] hpAtomsList;
1612 
1613  hpAtomsList = NULL;
1614 #endif
1615 }
1616 
1618 {
1619 #ifdef MEM_OPT_VERSION
1620  molecule->delAtomNames();
1621  molecule->delChargeSpace();
1622 
1623  //???TODO NOT SURE WHETHER freeEnergyOn is support in MEM_OPT_VERSION
1624  //-CHAOMEI
1625  if(!CkMyPe() && !simParameters->freeEnergyOn)
1626  molecule->delMassSpace();
1627 
1628  molecule->delFixedAtoms();
1629 #endif
1630 }
1631 
1632 //The initial distribution of atoms:
1633 //(avgNum+1),...,(avgNum+1),avgNum,...,avgNum where
1634 //avgNum equals to the total number of atoms in the system
1635 //divided by number of input processors --Chao Mei
1636 int ParallelIOMgr::numMyAtoms(int rank, int numProcs)
1637 {
1638  if(rank==-1) return -1;
1639  int avgNum = molecule->numAtoms/numProcs;
1640  int remainder = molecule->numAtoms%numProcs;
1641  if(rank<remainder) return avgNum+1;
1642  else return avgNum;
1643 }
1644 
1645 int ParallelIOMgr::atomRank(int atomID, int numProcs)
1646 {
1647  int avgNum = molecule->numAtoms/numProcs;
1648  int remainder = molecule->numAtoms%numProcs;
1649  int midLimit = remainder*(avgNum+1);
1650  int idx;
1651  if(atomID<midLimit) {
1652  idx = atomID/(avgNum+1);
1653  } else {
1654  idx = remainder+(atomID-midLimit)/avgNum;
1655  }
1656  return idx;
1657 }
1658 
1659 void ParallelIOMgr::getMyAtomsRange(int &lowerIdx, int &upperIdx, int rank, int numProcs)
1660 {
1661  if(rank==-1) {
1662  //just to make sure upperIdx-lowerIdx+1 == -1
1663  lowerIdx=-1;
1664  upperIdx=-3;
1665  return;
1666  }
1667  int avgNum = molecule->numAtoms/numProcs;
1668  int remainder = molecule->numAtoms%numProcs;
1669  if(rank<remainder) {
1670  lowerIdx = rank*(avgNum+1);
1671  upperIdx = lowerIdx+avgNum;
1672  } else {
1673  int midLimit = remainder*(avgNum+1);
1674  lowerIdx = midLimit+(rank-remainder)*avgNum;
1675  upperIdx = lowerIdx+avgNum-1;
1676  }
1677 }
1678 
1679 int ParallelIOMgr::calcMyOutputProxyClients() {
1680  PatchMap *patchMap = PatchMap::Object();
1681  int myOutputProxyClients = 0;
1682  int myset = myOutputProxyRank / numOutputProcs;
1683  for(int i=0; i<CkNumPes(); ++i) {
1684  if ( (i*numProxiesPerOutputProc)/CkNumPes() == myset &&
1686  ++myOutputProxyClients;
1687  }
1688  }
1689  return myOutputProxyClients;
1690 }
1691 
1693 {
1694 #ifdef MEM_OPT_VERSION
1695  if ( myOutputRank != -1 ) {
1696  int ready = midCM->receivePositions(msg);
1697  if(ready) {
1698  CProxy_CollectionMaster cm(mainMaster);
1699  cm.receiveOutputPosReady(msg->seq);
1700  }
1701  delete msg;
1702  } else if ( myOutputProxyRank != -1 ) {
1703  if ( ! myOutputProxyPositions ) {
1704  myOutputProxyPositions = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1705  }
1706  CollectVectorVarMsg *newmsg = myOutputProxyPositions->submitData(msg);
1707  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receivePositions(newmsg);
1708  delete msg;
1709  } else {
1710  NAMD_bug("ParallelIOMgr::receivePositions on bad pe");
1711  }
1712 #endif
1713 }
1714 
1716 {
1717 #ifdef MEM_OPT_VERSION
1718  if ( myOutputRank != -1 ) {
1719  int ready = midCM->receiveVelocities(msg);
1720  if(ready) {
1721  CProxy_CollectionMaster cm(mainMaster);
1722  cm.receiveOutputVelReady(msg->seq);
1723  }
1724  delete msg;
1725  } else if ( myOutputProxyRank != -1 ) {
1726  if ( ! myOutputProxyVelocities ) {
1727  myOutputProxyVelocities = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1728  }
1729  CollectVectorVarMsg *newmsg = myOutputProxyVelocities->submitData(msg);
1730  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveVelocities(newmsg);
1731  delete msg;
1732  } else {
1733  NAMD_bug("ParallelIOMgr::receiveVelocities on bad pe");
1734  }
1735 #endif
1736 }
1737 
1739 {
1740 #ifdef MEM_OPT_VERSION
1741  if ( myOutputRank != -1 ) {
1742  int ready = midCM->receiveForces(msg);
1743  if(ready) {
1744  CProxy_CollectionMaster cm(mainMaster);
1745  cm.receiveOutputForceReady(msg->seq);
1746  }
1747  delete msg;
1748  } else if ( myOutputProxyRank != -1 ) {
1749  if ( ! myOutputProxyForces ) {
1750  myOutputProxyForces = new CollectProxyVectorSequence(calcMyOutputProxyClients());
1751  }
1752  CollectVectorVarMsg *newmsg = myOutputProxyForces->submitData(msg);
1753  if ( newmsg ) thisProxy[outputProcArray[myOutputProxyRank%numOutputProcs]].receiveForces(newmsg);
1754  delete msg;
1755  } else {
1756  NAMD_bug("ParallelIOMgr::receiveForces on bad pe");
1757  }
1758 #endif
1759 }
1760 
1761 
1762 void ParallelIOMgr::disposePositions(int seq, double prevT)
1763 {
1764 #ifdef MEM_OPT_VERSION
1765  double iotime = CmiWallTimer();
1766  midCM->disposePositions(seq);
1767  iotime = CmiWallTimer()-iotime+prevT;
1768 
1769 #if OUTPUT_SINGLE_FILE
1770  //Token-based file output
1771  if(myOutputRank == getMyOutputGroupHighestRank()) {
1772  //notify the CollectionMaster to start the next round
1773  CProxy_CollectionMaster cm(mainMaster);
1774  cm.startNextRoundOutputPos(iotime);
1775  } else {
1776  CProxy_ParallelIOMgr io(thisgroup);
1777  io[outputProcArray[myOutputRank+1]].disposePositions(seq, iotime);
1778  }
1779 #else
1780  //notify the CollectionMaster to start the next round
1781  CProxy_CollectionMaster cm(mainMaster);
1782  cm.startNextRoundOutputPos(iotime);
1783 #endif
1784 
1785 #endif
1786 }
1787 
1788 void ParallelIOMgr::disposeVelocities(int seq, double prevT)
1789 {
1790 #ifdef MEM_OPT_VERSION
1791  double iotime = CmiWallTimer();
1792  midCM->disposeVelocities(seq);
1793  iotime = CmiWallTimer()-iotime+prevT;
1794 
1795 #if OUTPUT_SINGLE_FILE
1796  //Token-based file output
1797  if(myOutputRank==getMyOutputGroupHighestRank()) {
1798  //notify the CollectionMaster to start the next round
1799  CProxy_CollectionMaster cm(mainMaster);
1800  cm.startNextRoundOutputVel(iotime);
1801  } else {
1802  CProxy_ParallelIOMgr io(thisgroup);
1803  io[outputProcArray[myOutputRank+1]].disposeVelocities(seq, iotime);
1804  }
1805 #else
1806  //notify the CollectionMaster to start the next round
1807  CProxy_CollectionMaster cm(mainMaster);
1808  cm.startNextRoundOutputVel(iotime);
1809 #endif
1810 
1811 #endif
1812 }
1813 
1814 void ParallelIOMgr::disposeForces(int seq, double prevT)
1815 {
1816 #ifdef MEM_OPT_VERSION
1817  double iotime = CmiWallTimer();
1818  midCM->disposeForces(seq);
1819  iotime = CmiWallTimer()-iotime+prevT;
1820 
1821 #if OUTPUT_SINGLE_FILE
1822  //Token-based file output
1823  if(myOutputRank==getMyOutputGroupHighestRank()) {
1824  //notify the CollectionMaster to start the next round
1825  CProxy_CollectionMaster cm(mainMaster);
1826  cm.startNextRoundOutputForce(iotime);
1827  } else {
1828  CProxy_ParallelIOMgr io(thisgroup);
1829  io[outputProcArray[myOutputRank+1]].disposeForces(seq, iotime);
1830  }
1831 #else
1832  //notify the CollectionMaster to start the next round
1833  CProxy_CollectionMaster cm(mainMaster);
1834  cm.startNextRoundOutputForce(iotime);
1835 #endif
1836 
1837 #endif
1838 }
1839 
1840 
1842 {
1843 #ifdef MEM_OPT_VERSION
1844  coorInstance = midCM->getReadyPositions(seq);
1845 
1846  coorInstance->lattice = lat; //record the lattice to use for wrapAll/Water!
1847  int fromAtomID = coorInstance->fromAtomID;
1848  int toAtomID = coorInstance->toAtomID;
1849 
1850  //only reference copies
1851  ResizeArray<Vector> &data = coorInstance->data;
1852  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
1853  //if both data and fdata are not empty, they contain exact values, the only
1854  //difference lies in their precisions. Therefore, we only need to compute
1855  //the higher precision coordinate array. -Chao Mei
1856  int dsize = data.size();
1857  int numMyAtoms = toAtomID-fromAtomID+1;
1858  tmpCoorCon = new Vector[numMyAtoms];
1859  ClusterCoorElem one;
1860  //1. compute wrapped coordinates locally
1861  for(int i=0; i<numMyAtoms; i++){
1862  tmpCoorCon[i] = 0.0;
1863  int cid = clusterID[i];
1864  if(cid<fromAtomID){
1865  //on output procs ahead of me
1866  one.clusterId = cid;
1867  ClusterCoorElem *ret = remoteCoors.find(one);
1868  if(ret==NULL){
1869  if(dsize==0)
1870  one.dsum = fdata[i];
1871  else
1872  one.dsum = data[i];
1873 
1874  remoteCoors.add(one);
1875  }else{
1876  if(dsize==0)
1877  ret->dsum += fdata[i];
1878  else
1879  ret->dsum += data[i];
1880  }
1881  }else{
1882  if(dsize==0)
1883  tmpCoorCon[cid-fromAtomID] += fdata[i];
1884  else
1885  tmpCoorCon[cid-fromAtomID] += data[i];
1886  }
1887  }
1888 
1889  //2. Prepare to send msgs to remote output procs to reduce coordinates
1890  //values of a cluster
1891  CmiAssert(numRemoteClusters == remoteCoors.size());
1892  numCSMAck = 0; //set to 0 to prepare recving the final coor update
1893  CProxy_ParallelIOMgr pIO(thisgroup);
1894  ClusterCoorSetIter iter(remoteCoors);
1895  for(iter=iter.begin(); iter!=iter.end(); iter++){
1896  ClusterCoorMsg *msg = new ClusterCoorMsg;
1897  msg->srcRank = myOutputRank;
1898  msg->clusterId = iter->clusterId;
1899  msg->dsum = iter->dsum;
1900  int dstRank = atomRankOnOutput(iter->clusterId);
1901  pIO[outputProcArray[dstRank]].recvClusterCoor(msg);
1902  }
1903 
1904  //Just send a local NULL msg to indicate the local wrapping
1905  //coordinates has finished.
1906  recvClusterCoor(NULL);
1907 #endif
1908 }
1909 
1910 //On the output proc, it's possible (could be a rare case) that recvClusterCoor
1911 //is executed before wrapCoor, so we need to make sure the local tmpCoorCon has
1912 //been calculated. This is why (numRemoteReqs+1) msgs are expected as the
1913 //additional one is sent by itself when it finishes wrapping coordinates.
1914 // --Chao Mei
1916  //only add the msg from remote procs
1917  if(msg!=NULL) ccmBuf.add(msg);
1918 
1919  //include a msg sent by itself
1920  if(++numReqRecved == (numRemoteReqs+1)){
1921  numReqRecved = 0;
1922  integrateClusterCoor();
1923  }
1924 }
1925 
1926 void ParallelIOMgr::integrateClusterCoor(){
1927 #ifdef MEM_OPT_VERSION
1928  int fromIdx = coorInstance->fromAtomID;
1929  int toIdx = coorInstance->toAtomID;
1930  for(int i=0; i<ccmBuf.size(); i++){
1931  ClusterCoorMsg *msg = ccmBuf[i];
1932  int lidx = msg->clusterId - fromIdx;
1933  tmpCoorCon[lidx] += msg->dsum;
1934  }
1935 
1936  //send back those msgs
1937  CProxy_ParallelIOMgr pIO(thisgroup);
1938  for(int i=0; i<ccmBuf.size(); i++){
1939  ClusterCoorMsg *msg = ccmBuf[i];
1940  int lidx = msg->clusterId - fromIdx;
1941  if(simParameters->wrapAll || isWater[lidx]) {
1942  Lattice *lat = &(coorInstance->lattice);
1943  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1944  msg->dsum = (simParameters->wrapNearest ?
1945  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
1946  }else{
1947  msg->dsum = 0.0;
1948  }
1949  pIO[outputProcArray[msg->srcRank]].recvFinalClusterCoor(msg);
1950  }
1951  ccmBuf.resize(0);
1952 
1953  //It's possible that recvFinalClusterCoor is executed before integrateClusterCoor
1954  //on this processor (the other proc executes faster in doing wrapCoor). So avoid
1955  //this msg race, do send a message to itself to participate the reduction.
1956  if(numRemoteClusters!=0){
1957  recvFinalClusterCoor(NULL);
1958  } else {
1959  //this output proc is ready has the sum of coordinates of each cluster
1960  //on it, so it is ready to do the final wrap coor computation
1961  int numMyAtoms = toIdx-fromIdx+1;
1962  ResizeArray<Vector> &data = coorInstance->data;
1963  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
1964  for(int i=0; i<numMyAtoms; i++){
1965  if(!simParameters->wrapAll && !isWater[i]) continue;
1966  int lidx = clusterID[i]-fromIdx;
1967  if(lidx==i){
1968  //the head atom of the cluster
1969  Lattice *lat = &(coorInstance->lattice);
1970  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
1971  tmpCoorCon[lidx] = (simParameters->wrapNearest ?
1972  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
1973  }
1974  if(data.size()) data[i] += tmpCoorCon[lidx];
1975  //no operator += (FloatVector, Vector)
1976  if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
1977  }
1978 
1979  delete [] tmpCoorCon;
1980  tmpCoorCon = NULL;
1981  CProxy_CollectionMaster cm(mainMaster);
1982  cm.wrapCoorFinished();
1983  }
1984 #endif
1985 }
1986 
1988 #ifdef MEM_OPT_VERSION
1989  if(msg!=NULL){
1990  //only process the message sent from other procs!
1991  ClusterCoorElem one(msg->clusterId);
1992  ClusterCoorElem *ret = remoteCoors.find(one);
1993  ret->dsum = msg->dsum;
1994  delete msg;
1995  }
1996 
1997  if(++numCSMAck == (numRemoteClusters+1)){
1998  //final wrap coor computation
1999  int fromIdx = coorInstance->fromAtomID;
2000  int toIdx = coorInstance->toAtomID;
2001  int numMyAtoms = toIdx-fromIdx+1;
2002  ResizeArray<Vector> &data = coorInstance->data;
2003  ResizeArray<FloatVector> &fdata = coorInstance->fdata;
2004  ClusterCoorElem tmp;
2005  for(int i=0; i<numMyAtoms; i++){
2006  if(!simParameters->wrapAll && !isWater[i]) continue;
2007  int cid = clusterID[i];
2008  int lidx = cid-fromIdx;
2009  if(lidx<0){
2010  //this cid should be inside remoteCoors
2011  tmp.clusterId = cid;
2012  ClusterCoorElem *fone = remoteCoors.find(tmp);
2013  if(data.size()) data[i] += fone->dsum;
2014  if(fdata.size()) fdata[i] = fdata[i] + fone->dsum;
2015  }else{
2016  if(lidx==i){
2017  Lattice *lat = &(coorInstance->lattice);
2018  Vector coni = tmpCoorCon[lidx]/clusterSize[lidx];
2019  tmpCoorCon[lidx] = (simParameters->wrapNearest ?
2020  lat->wrap_nearest_delta(coni) : lat->wrap_delta(coni));
2021  }
2022  if(data.size()) data[i] += tmpCoorCon[lidx];
2023  if(fdata.size()) fdata[i] = fdata[i] + tmpCoorCon[lidx];
2024  }
2025  }
2026 
2027  delete [] tmpCoorCon;
2028  tmpCoorCon = NULL;
2029  CProxy_CollectionMaster cm(mainMaster);
2030  cm.wrapCoorFinished();
2031  numCSMAck = 0;
2032  remoteCoors.clear();
2033  }
2034 #endif
2035 }
2036 #include "ParallelIOMgr.def.h"
static Node * Object()
Definition: Node.h:86
int64 numCalcFullExclusions
Definition: ParallelIOMgr.h:49
static CollectionMgr * Object()
Definition: CollectionMgr.h:30
unsigned short * fixedAtomsCntList
Definition: ParallelIOMgr.h:86
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
void sendAtomsToHomePatchProcs()
void flipNum(char *elem, int elemSize, int numElems)
Definition: CompressPsf.C:406
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
#define COMPRESSED_PSF_MAGICNUM
Definition: CompressPsf.h:13
void NAMD_err(const char *err_msg)
Definition: common.C:106
void initTmpPatchAtomsList()
Definition: PatchMap.h:216
unsigned int hydrogenGroupSize
Definition: NamdTypes.h:58
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:117
short int32
Definition: dumpdcd.c:24
unsigned int atomFixed
Definition: NamdTypes.h:96
void recvClusterSize(ClusterSizeMsg *msg)
int numCalcCrossterms
Definition: ParallelIOMgr.h:46
static PatchMap * Object()
Definition: PatchMap.h:27
ResizeArray< CollectProxyVectorInstance * > data
TupleSignature * improperSigs
Definition: structures.h:336
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
void integrateClusterSize()
CollectVectorVarMsg * buildMsg()
Definition: ParallelIOMgr.C:73
void recvAtomsMGrp(MoveInputAtomsMsg *msg)
Vector wrap_delta(const Position &pos1) const
Definition: Lattice.h:206
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
TupleSignature * dihedralSigs
Definition: structures.h:335
void readPerAtomInfo()
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
InputAtom * atomList
Definition: ParallelIOMgr.h:76
Position position
Definition: NamdTypes.h:53
void disposePositions(int seq, double prevT)
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
int64 numCalcExclusions
Definition: ParallelIOMgr.h:48
TupleSignature * crosstermSigs
Definition: structures.h:337
#define iout
Definition: InfoStream.h:51
void migrateAtomsMGrp()
int numRigidBonds
Definition: ParallelIOMgr.h:53
void wrapCoor(int seq, Lattice lat)
int numCalcDihedrals
Definition: ParallelIOMgr.h:42
short isGP
Definition: NamdTypes.h:142
void receiveForces(CollectVectorVarMsg *msg)
UniqueSetIter< T > begin(void) const
Definition: UniqueSetIter.h:55
CollectVectorVarMsg::DataStatus vstatus
void recvAtomsToHomePatchProcs(MovePatchAtomsMsg *msg)
void integrateMigratedAtoms()
void reorder(Elem *a, int n)
Definition: Random.h:220
CkChareID getMasterChareID()
Definition: CollectionMgr.h:41
int numDihedrals
Definition: ParallelIOMgr.h:41
TupleSignature * gromacsPairSigs
Definition: structures.h:339
void disposeForces(int seq, double prevT)
Definition: Random.h:37
BigReal totalCharge
Definition: ParallelIOMgr.h:60
void recvClusterCoor(ClusterCoorMsg *msg)
void updateMolInfo()
int append(CollectVectorVarMsg *msg)
Definition: ParallelIOMgr.C:51
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void call_sendAtomsToHomePatchProcs(void *arg)
void recvAtomsCntPerPatch(AtomsCntPerPatchMsg *msg)
bool isValid
Definition: NamdTypes.h:141
int64 numExclusions
Definition: ParallelIOMgr.h:47
BigReal totalMass
Definition: ParallelIOMgr.h:57
void recvHydroBasedCounter(HydroBasedMsg *msg)
short isMP
Definition: NamdTypes.h:143
gridSize z
void clear()
Definition: ResizeArray.h:87
FullAtom * allAtoms
Definition: ParallelIOMgr.h:97
void recvFinalClusterCoor(ClusterCoorMsg *msg)
std::vector< int > * getTmpPatchAtomsList()
Definition: PatchMap.h:226
Vector Position
Definition: NamdTypes.h:18
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
TupleSignature * bondSigs
Definition: structures.h:333
int numCalcImpropers
Definition: ParallelIOMgr.h:44
void NAMD_die(const char *err_msg)
Definition: common.C:85
int numCrossterms
Definition: ParallelIOMgr.h:45
int numCalcLJPairs
Definition: ParallelIOMgr.h:51
void ackAtomsToHomePatchProcs()
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
void initialize(Node *node)
int add(const Elem &elem)
Definition: ResizeArray.h:97
UniqueSetIter< T > end(void) const
Definition: UniqueSetIter.h:64
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:118
BlockRadixSort::TempStorage sort
Vector Velocity
Definition: NamdTypes.h:21
Real rigidBondLength
Definition: NamdTypes.h:118
long long int64
Definition: common.h:34
WorkDistrib * workDistrib
Definition: Node.h:166
int numPatches(void) const
Definition: PatchMap.h:59
int node(int pid) const
Definition: PatchMap.h:114
int numFixedRigidBonds
Definition: ParallelIOMgr.h:68
void recvFinalClusterSize(ClusterSizeMsg *msg)
ResizeArray< FloatVector > fdata
void disposeVelocities(int seq, double prevT)
#define PDBVELFACTOR
Definition: common.h:50
void calcAtomsInEachPatch()
unsigned short * atomsCntList
Definition: ParallelIOMgr.h:85
bool isOutputProcessor(int pe)
int numCalcBonds
Definition: ParallelIOMgr.h:38
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
gridSize y
ResizeArray< Vector > data
void delTmpPatchAtomsList()
Definition: PatchMap.h:219
int size(void) const
Definition: ResizeArray.h:127
Vector wrap_nearest_delta(Position pos1) const
Definition: Lattice.h:217
void bcastHydroBasedCounter(HydroBasedMsg *msg)
Vector totalMV
Definition: ParallelIOMgr.h:58
void receiveVelocities(CollectVectorVarMsg *msg)
int numImpropers
Definition: ParallelIOMgr.h:43
gridSize x
void recvMolInfo(MolInfoMsg *msg)
int isOutputProcessor(int pe)
int gromacsPairCnt
Definition: structures.h:331
Molecule * molecule
Definition: Node.h:176
void bcastMolInfo(MolInfoMsg *msg)
TupleSignature * angleSigs
Definition: structures.h:334
void reset(int s, CollectVectorVarMsg::DataStatus v, int numClients)
Definition: ParallelIOMgr.C:40
CollectVectorVarMsg * submitData(CollectVectorVarMsg *msg)
void createHomePatches()
double BigReal
Definition: common.h:114
void receivePositions(CollectVectorVarMsg *msg)
HashPool< AtomSigInfo > atomSigPool
Definition: CompressPsf.C:313
int numCalcAngles
Definition: ParallelIOMgr.h:40
iterator begin(void)
Definition: ResizeArray.h:36