NAMD
HomePatch.C
Go to the documentation of this file.
1 
8 /*
9  HomePatch owns the actual atoms of a Patch of space
10  Proxy(s) get messages via ProxyMgr from HomePatch(es)
11  to update lists of atoms and their coordinates
12  HomePatch(es) also have a Sequencer bound to them
13 
14  superclass: Patch
15 */
16 
17 #include "NamdTypes.h"
18 #include "common.h"
19 #include "time.h"
20 #include <math.h>
21 #include "charm++.h"
22 #include "qd.h"
23 
24 #include "SimParameters.h"
25 #include "HomePatch.h"
26 #include "AtomMap.h"
27 #include "Node.h"
28 #include "PatchMap.inl"
29 #include "main.h"
30 #include "ProxyMgr.decl.h"
31 #include "ProxyMgr.h"
32 #include "Migration.h"
33 #include "Molecule.h"
34 #include "PatchMgr.h"
35 #include "Sequencer.h"
36 #include "Broadcasts.h"
37 #include "LdbCoordinator.h"
38 #include "ReductionMgr.h"
39 #include "Sync.h"
40 #include "Random.h"
41 #include "Priorities.h"
42 #include "ComputeNonbondedUtil.h"
43 #include "ComputeGBIS.inl"
44 #include "Priorities.h"
45 #include "SortAtoms.h"
46 #include "MigrationCUDAKernel.h"
47 
48 #include "ComputeQM.h"
49 #include "ComputeQMMgr.decl.h"
50 
51 #include "NamdEventsProfiling.h"
52 
53 //#define PRINT_COMP
54 #define TINY 1.0e-20;
55 #define MAXHGS 10
56 #define MIN_DEBUG_LEVEL 2
57 //#define DEBUGM
58 //#define NL_DEBUG
59 #include "Debug.h"
60 
61 #include <vector>
62 #include <algorithm>
63 using namespace std;
64 
65 typedef int HGArrayInt[MAXHGS];
70 
71 int average(CompAtom *qtilde,const HGArrayVector &q,BigReal *lambda,const int n,const int m, const HGArrayBigReal &imass, const HGArrayBigReal &length2, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab, const BigReal tolf, const int ntrial);
72 
73 void mollify(CompAtom *qtilde,const HGArrayVector &q0,const BigReal *lambda, HGArrayVector &force,const int n, const int m, const HGArrayBigReal &imass,const HGArrayInt &ial,const HGArrayInt &ibl,const HGArrayVector &refab);
74 
75 void MSHAKE_CUDA(int*, const int size, const RattleParam* rattleParam,
76  BigReal *refx, BigReal *refy, BigReal *refz,
77  BigReal *posx, BigReal *posy, BigReal *posz,
78  const BigReal tol2, const int maxiter,
79  bool& done, bool& consFailure);
80 #define MASS_EPSILON (1.0e-35) //a very small floating point number
81 
82 
83 // DMK - Atom Separation (water vs. non-water)
84 #if NAMD_SeparateWaters != 0
85 
86 // Macro to test if a hydrogen group represents a water molecule.
87 // NOTE: This test is the same test in Molecule.C for setting the
88 // OxygenAtom flag in status.
89 // hgtype should be the number of atoms in a water hydrogen group
90 // It must now be set based on simulation parameters because we might
91 // be using tip4p
92 
93 // DJH: This will give false positive for full Drude model,
94 // e.g. O D H is not water but has hgs==3
95 #define IS_HYDROGEN_GROUP_WATER(hgs, mass) \
96  ((hgs >= 3) && ((mass >= 14.0) && (mass <= 18.0)))
97 
98 #endif
99 
100 #ifdef TIMER_COLLECTION
101 const char *TimerSet::tlabel[TimerSet::NUMTIMERS] = {
102  "kick",
103  "maxmove",
104  "drift",
105  "piston",
106  "submithalf",
107  "velbbk1",
108  "velbbk2",
109  "rattle1",
110  "submitfull",
111  "submitcollect",
112 };
113 #endif
114 
115 HomePatch::HomePatch(PatchID pd, FullAtomList &al) : Patch(pd)
116 // DMK - Atom Separation (water vs. non-water)
117 #if NAMD_SeparateWaters != 0
118  ,tempAtom()
119 #endif
120 {
121  atom.swap(al);
122  settle_initialized = 0;
123 
124  doAtomUpdate = true;
125  rattleListValid = false;
126  rattleListValid_SOA = false;
127 
128  exchange_msg = 0;
129  exchange_req = -1;
130 
131  numGBISP1Arrived = 0;
132  numGBISP2Arrived = 0;
133  numGBISP3Arrived = 0;
134  phase1BoxClosedCalled = false;
135  phase2BoxClosedCalled = false;
136  phase3BoxClosedCalled = false;
137 
138  min.x = PatchMap::Object()->min_a(patchID);
139  min.y = PatchMap::Object()->min_b(patchID);
140  min.z = PatchMap::Object()->min_c(patchID);
141  max.x = PatchMap::Object()->max_a(patchID);
142  max.y = PatchMap::Object()->max_b(patchID);
143  max.z = PatchMap::Object()->max_c(patchID);
144  center = 0.5*(min+max);
145 
146  int aAway = PatchMap::Object()->numaway_a();
147  if ( PatchMap::Object()->periodic_a() ||
148  PatchMap::Object()->gridsize_a() > aAway + 1 ) {
149  aAwayDist = (max.x - min.x) * aAway;
150  } else {
151  aAwayDist = Node::Object()->simParameters->patchDimension;
152  }
153  int bAway = PatchMap::Object()->numaway_b();
154  if ( PatchMap::Object()->periodic_b() ||
155  PatchMap::Object()->gridsize_b() > bAway + 1 ) {
156  bAwayDist = (max.y - min.y) * bAway;
157  } else {
158  bAwayDist = Node::Object()->simParameters->patchDimension;
159  }
160  int cAway = PatchMap::Object()->numaway_c();
161  if ( PatchMap::Object()->periodic_c() ||
162  PatchMap::Object()->gridsize_c() > cAway + 1 ) {
163  cAwayDist = (max.z - min.z) * cAway;
164  } else {
165  cAwayDist = Node::Object()->simParameters->patchDimension;
166  }
167 
168  migrationSuspended = false;
169  allMigrationIn = false;
170  marginViolations = 0;
171  patchMapRead = 0; // We delay read of PatchMap data
172  // to make sure it is really valid
173  inMigration = false;
174  numMlBuf = 0;
175  flags.sequence = -1;
176  flags.maxForceUsed = -1;
177 
178  numAtoms = atom.size();
179  replacementForces = 0;
180 
182  doPairlistCheck_newTolerance =
183  0.5 * ( simParams->pairlistDist - simParams->cutoff );
184 
185 
186  numFixedAtoms = 0;
187  if ( simParams->fixedAtomsOn ) {
188  for ( int i = 0; i < numAtoms; ++i ) {
189  numFixedAtoms += ( atom[i].atomFixed ? 1 : 0 );
190  }
191  }
192 
193  #if 0
194  cudaAtomList = NULL;
195  sizeCudaAtomList = 0;
196  #endif
197 
198 #ifdef NODEAWARE_PROXY_SPANNINGTREE
199  ptnTree.resize(0);
200  /*children = NULL;
201  numChild = 0;*/
202 #else
203  child = new int[proxySpanDim];
204  nChild = 0; // number of proxy spanning tree children
205 #endif
206 
207 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
208  nphs = 0;
209  localphs = NULL;
210  isProxyChanged = 0;
211 #endif
212 
213  if (simParams->SOAintegrateOn) {
214  PatchDataSOA_initialize( &patchDataSOA );
215  sort_solvent_atoms();
216  copy_atoms_to_SOA();
217 #if 0
218  if (simParams->rigidBonds != RIGID_NONE) {
219  buildRattleList_SOA();
220  rattleListValid_SOA = true;
221  }
222 #endif
223  }
224 
225  // DMK - Atom Separation (water vs. non-water)
226  #if NAMD_SeparateWaters != 0
227 
228  // Create the scratch memory for separating atoms
229  tempAtom.resize(numAtoms);
230  numWaterAtoms = -1;
231 
232  // Separate the current list of atoms
233  separateAtoms();
234 
235  #endif
236  // Handle unusual water models here
237  if (simParams->watmodel == WaterModel::TIP4) init_tip4();
238  else if (simParams->watmodel == WaterModel::SWM4) init_swm4();
239  gridForceIdxChecked=false;
240 
241  isNewProxyAdded = 0;
242 }
243 
244 void HomePatch::write_tip4_props() {
245  printf("Writing r_om and r_ohc: %f | %f\n", r_om, r_ohc);
246 }
247 
248 void HomePatch::init_tip4() {
249  // initialize the distances needed for the tip4p water model
250  Molecule *mol = Node::Object()->molecule;
251  r_om = mol->r_om;
252  r_ohc = mol->r_ohc;
253 }
254 
255 
256 void ::HomePatch::init_swm4() {
257  // initialize the distances needed for the SWM4 water model
258  Molecule *mol = Node::Object()->molecule;
259  r_om = mol->r_om;
260  r_ohc = mol->r_ohc;
261 }
262 
263 
264 void HomePatch::reinitAtoms(FullAtomList &al) {
265  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
266 
267  atom.swap(al);
268  numAtoms = atom.size();
269 
270  doAtomUpdate = true;
271  rattleListValid = false;
272  rattleListValid_SOA = false;
273 
274  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
275 
277  if (simParams->SOAintegrateOn) {
278  sort_solvent_atoms();
279  copy_atoms_to_SOA();
280 #if 0
281  if (simParams->rigidBonds != RIGID_NONE) {
283  rattleListValid_SOA = true;
284  }
285 #endif
286  }
287 
288  // DMK - Atom Separation (water vs. non-water)
289  #if NAMD_SeparateWaters != 0
290 
291  // Reset the numWaterAtoms value
292  numWaterAtoms = -1;
293 
294  // Separate the atoms
295  separateAtoms();
296 
297  #endif
298 }
299 
300 // Bind a Sequencer to this HomePatch
302 { sequencer=sequencerPtr; }
303 
304 // start simulation over this Patch of atoms
306 { sequencer->run(); }
307 
308 void HomePatch::readPatchMap() {
309  // iout << "Patch " << patchID << " has " << proxy.size() << " proxies.\n" << endi;
311  PatchID nnPatchID[PatchMap::MaxOneAway];
312 
313  patchMigrationCounter = numNeighbors
314  = PatchMap::Object()->oneAwayNeighbors(patchID, nnPatchID);
315  DebugM( 1, "NumNeighbors for pid " <<patchID<<" is "<< numNeighbors << "\n");
316  int n;
317  for (n=0; n<numNeighbors; n++) {
318  realInfo[n].destNodeID = p->node(realInfo[n].destPatchID = nnPatchID[n]);
319  DebugM( 1, " nnPatchID=" <<nnPatchID[n]<<" nnNodeID="<< realInfo[n].destNodeID<< "\n");
320  realInfo[n].mList.resize(0);
321  }
322 
323  // Make mapping from the 3x3x3 cube of pointers to real migration info
324  for (int i=0; i<3; i++)
325  for (int j=0; j<3; j++)
326  for (int k=0; k<3; k++)
327  {
328  int pid = p->pid(p->index_a(patchID)+i-1,
329  p->index_b(patchID)+j-1, p->index_c(patchID)+k-1);
330  if (pid < 0) {
331  DebugM(5, "ERROR, for patchID " << patchID <<" I got neigh pid = " << pid << "\n");
332  }
333  if (pid == patchID && ! (
334  ( (i-1) && p->periodic_a() ) ||
335  ( (j-1) && p->periodic_b() ) ||
336  ( (k-1) && p->periodic_c() ) )) {
337  mInfo[i][j][k] = NULL;
338  }
339  else {
340  // Does not work as expected for periodic with only two patches.
341  // Also need to check which image we want, but OK for now. -JCP
342  for (n = 0; n<numNeighbors; n++) {
343  if (pid == realInfo[n].destPatchID) {
344  mInfo[i][j][k] = &realInfo[n];
345  break;
346  }
347  }
348  if (n == numNeighbors) { // disaster!
349  DebugM(4,"BAD News, I could not find PID " << pid << "\n");
350  }
351  }
352  }
353 
354  DebugM(1,"Patch("<<patchID<<") # of neighbors = " << numNeighbors << "\n");
355 }
356 
358 {
359  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
360 #ifdef NODEAWARE_PROXY_SPANNINGTREE
361  ptnTree.resize(0);
362  #ifdef USE_NODEPATCHMGR
363  delete [] nodeChildren;
364  #endif
365 #endif
366  delete [] child;
367 }
368 
369 
370 void HomePatch::boxClosed(int box) {
371  // begin gbis
372  if (box == 5) {// end of phase 1
373  phase1BoxClosedCalled = true;
374  if (!psiSumBox.isOpen() && numGBISP1Arrived == proxy.size()) {
375  if (flags.doGBIS && flags.doNonbonded) {
376  // fprintf(stderr, "Calling awaken() on patch %d: 1\n", this->patchID);
377  sequencer->awaken();
378  }
379  } else {
380  //need to wait until proxies arrive before awakening
381  }
382  } else if (box == 6) {// intRad
383  //do nothing
384  } else if (box == 7) {// bornRad
385  //do nothing
386  } else if (box == 8) {// end of phase 2
387  phase2BoxClosedCalled = true;
388  //if no proxies, AfterP1 can't be called from receive
389  //so it will be called from here
390  if (!dEdaSumBox.isOpen() && numGBISP2Arrived == proxy.size()) {
391  if (flags.doGBIS && flags.doNonbonded) {
392  // fprintf(stderr, "Calling awaken() on patch %d: 2\n", this->patchID);
393  sequencer->awaken();
394  }
395  } else {
396  //need to wait until proxies arrive before awakening
397  }
398  } else if (box == 9) {
399  //do nothing
400  } else if (box == 10) {
401  //lcpoType Box closed: do nothing
402  } else {
403  //do nothing
404  }
405  // end gbis
406 
407  if ( ! --boxesOpen ) {
408  if ( replacementForces ) {
409  for ( int i = 0; i < numAtoms; ++i ) {
410  if ( replacementForces[i].replace ) {
411  for ( int j = 0; j < Results::maxNumForces; ++j ) { f[j][i] = 0; }
412  f[Results::normal][i] = replacementForces[i].force;
413  }
414  }
415  replacementForces = 0;
416  }
417  DebugM(1,patchID << ": " << CthSelf() << " awakening sequencer "
418  << sequencer->thread << "(" << patchID << ") @" << CmiTimer() << "\n");
419  // only awaken suspended threads. Then say it is suspended.
420 
421  phase3BoxClosedCalled = true;
422  if (flags.doGBIS) {
423  if (flags.doNonbonded) {
424  sequencer->awaken();
425  } else {
426  if (numGBISP1Arrived == proxy.size() &&
427  numGBISP2Arrived == proxy.size() &&
428  numGBISP3Arrived == proxy.size()) {
429  sequencer->awaken();//all boxes closed and all proxies arrived
430  }
431  }
432  } else {//non-gbis awaken
434  if(!simParams->CUDASOAintegrate) {
435  sequencer->awaken();
436  }
437  }
438  } else {
439  DebugM(1,patchID << ": " << boxesOpen << " boxes left to close.\n");
440  }
441 }
442 
444  DebugM(4, "registerProxy("<<patchID<<") - adding node " <<msg->node<<"\n");
445  proxy.add(msg->node);
447 
448  isNewProxyAdded = 1;
449 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
450  isProxyChanged = 1;
451 #endif
452 
453  Random((patchID + 37) * 137).reorder(proxy.begin(),proxy.size());
454  delete msg;
455 }
456 
458 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
459  isProxyChanged = 1;
460 #endif
461  int n = msg->node;
462  NodeID *pe = proxy.begin();
463  for ( ; *pe != n; ++pe );
465  proxy.del(pe - proxy.begin());
466  delete msg;
467 }
468 
469 #if USE_TOPOMAP && USE_SPANNING_TREE
470 
471 int HomePatch::findSubroots(int dim, int* subroots, int psize, int* pidscopy){
472  int nChild = 0;
473  int cones[6][proxySpanDim*proxySpanDim+proxySpanDim];
474  int conesizes[6] = {0,0,0,0,0,0};
475  int conecounters[6] = {0,0,0,0,0,0};
476  int childcounter = 0;
477  nChild = (psize>proxySpanDim)?proxySpanDim:psize;
478  TopoManager tmgr;
479  for(int i=0;i<psize;i++){
480  int cone = tmgr.getConeNumberForRank(pidscopy[i]);
481  cones[cone][conesizes[cone]++] = pidscopy[i];
482  }
483 
484  while(childcounter<nChild){
485  for(int i=0;i<6;i++){
486  if(conecounters[i]<conesizes[i]){
487  subroots[childcounter++] = cones[i][conecounters[i]++];
488  }
489  }
490  }
491  for(int i=nChild;i<proxySpanDim;i++)
492  subroots[i] = -1;
493  return nChild;
494 }
495 #endif // USE_TOPOMAP
496 
497 static int compDistance(const void *a, const void *b)
498 {
499  int d1 = abs(*(int *)a - CkMyPe());
500  int d2 = abs(*(int *)b - CkMyPe());
501  if (d1 < d2)
502  return -1;
503  else if (d1 == d2)
504  return 0;
505  else
506  return 1;
507 }
508 
510 {
511 #if USE_NODEPATCHMGR
512  CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
513  NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();
514  npm->sendProxyList(patchID, proxy.begin(), proxy.size());
515 #else
516  ProxyMgr::Object()->sendProxies(patchID, proxy.begin(), proxy.size());
517 #endif
518 }
519 
520 #ifdef NODEAWARE_PROXY_SPANNINGTREE
521 void HomePatch::buildNodeAwareSpanningTree(void){
522 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
523  DebugFileTrace *dft = DebugFileTrace::Object();
524  dft->openTrace();
525  dft->writeTrace("HomePatch[%d] has %d proxy on proc[%d] node[%d]\n", patchID, proxy.size(), CkMyPe(), CkMyNode());
526  dft->writeTrace("Proxies are: ");
527  for(int i=0; i<proxy.size(); i++) dft->writeTrace("%d(%d), ", proxy[i], CkNodeOf(proxy[i]));
528  dft->writeTrace("\n");
529  dft->closeTrace();
530 #endif
531 
532  //build the naive spanning tree for this home patch
533  if(! proxy.size()) {
534  //this case will not happen in practice.
535  //In debugging state where spanning tree is enforced, then this could happen
536  //Chao Mei
537  return;
538  }
539  ProxyMgr::buildSinglePatchNodeAwareSpanningTree(patchID, proxy, ptnTree);
540  //optimize on the naive spanning tree
541 
542  //setup the children
543  setupChildrenFromProxySpanningTree();
544  //send down to children
546 }
547 
548 void HomePatch::setupChildrenFromProxySpanningTree(){
549 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
550  isProxyChanged = 1;
551 #endif
552  if(ptnTree.size()==0) {
553  nChild = 0;
554  delete [] child;
555  child = NULL;
556  #ifdef USE_NODEPATCHMGR
557  numNodeChild = 0;
558  delete [] nodeChildren;
559  nodeChildren = NULL;
560  #endif
561  return;
562  }
563  proxyTreeNode *rootnode = &ptnTree.item(0);
564  CmiAssert(rootnode->peIDs[0] == CkMyPe());
565  //set up children
566  //1. add external children (the first proc inside the proxy tree node)
567  //2. add internal children (with threshold that would enable spanning
568  int internalChild = rootnode->numPes-1;
569  int externalChild = ptnTree.size()-1;
570  externalChild = (proxySpanDim>externalChild)?externalChild:proxySpanDim;
571  int internalSlots = proxySpanDim-externalChild;
572  if(internalChild>0){
573  if(internalSlots==0) {
574  //at least having one internal child
575  internalChild = 1;
576  }else{
577  internalChild = (internalSlots>internalChild)?internalChild:internalSlots;
578  }
579  }
580 
581  nChild = externalChild+internalChild;
582  CmiAssert(nChild>0);
583 
584  //exclude the root node
585  delete [] child;
586  child = new int[nChild];
587 
588  for(int i=0; i<externalChild; i++) {
589  child[i] = ptnTree.item(i+1).peIDs[0];
590  }
591  for(int i=externalChild, j=1; i<nChild; i++, j++) {
592  child[i] = rootnode->peIDs[j];
593  }
594 
595 #ifdef USE_NODEPATCHMGR
596  //only register the cores that have proxy patches. The HomePach's core
597  //doesn't need to be registered.
598  CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
599  NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();
600  if(rootnode->numPes==1){
601  npm->registerPatch(patchID, 0, NULL);
602  }
603  else{
604  npm->registerPatch(patchID, rootnode->numPes-1, &rootnode->peIDs[1]);
605  }
606 
607  //set up childrens in terms of node ids
608  numNodeChild = externalChild;
609  if(internalChild) numNodeChild++;
610  delete [] nodeChildren;
611  nodeChildren = new int[numNodeChild];
612  for(int i=0; i<externalChild; i++) {
613  nodeChildren[i] = ptnTree.item(i+1).nodeID;
614  }
615  //the last entry always stores this node id if there are proxies
616  //on other cores of the same node for this patch.
617  if(internalChild)
618  nodeChildren[numNodeChild-1] = rootnode->nodeID;
619 #endif
620 
621 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
622  DebugFileTrace *dft = DebugFileTrace::Object();
623  dft->openTrace();
624  dft->writeTrace("HomePatch[%d] has %d children: ", patchID, nChild);
625  for(int i=0; i<nChild; i++)
626  dft->writeTrace("%d ", child[i]);
627  dft->writeTrace("\n");
628  dft->closeTrace();
629 #endif
630 }
631 #endif
632 
633 #ifdef NODEAWARE_PROXY_SPANNINGTREE
634 //This is not an entry method, but takes an argument of message type
636  //set up the whole tree ptnTree
637  int treesize = msg->numNodesWithProxies;
638  ptnTree.resize(treesize);
639  int *pAllPes = msg->allPes;
640  for(int i=0; i<treesize; i++) {
641  proxyTreeNode *oneNode = &ptnTree.item(i);
642  delete [] oneNode->peIDs;
643  oneNode->numPes = msg->numPesOfNode[i];
644  oneNode->nodeID = CkNodeOf(*pAllPes);
645  oneNode->peIDs = new int[oneNode->numPes];
646  for(int j=0; j<oneNode->numPes; j++) {
647  oneNode->peIDs[j] = *pAllPes;
648  pAllPes++;
649  }
650  }
651  //setup children
652  setupChildrenFromProxySpanningTree();
653  //send down to children
655 }
656 
658  if(ptnTree.size()==0) return;
660  ProxyNodeAwareSpanningTreeMsg::getANewMsg(patchID, CkMyPe(), ptnTree.begin(), ptnTree.size());
661 
662  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
663  msg->printOut("HP::sendST");
664  #endif
665 
666  CmiAssert(CkMyPe() == msg->allPes[0]);
668 
669 }
670 #else
673 #endif
674 
675 #ifndef NODEAWARE_PROXY_SPANNINGTREE
676 // recv a spanning tree from load balancer
677 void HomePatch::recvSpanningTree(int *t, int n)
678 {
679  int i;
680  nChild=0;
681  tree.resize(n);
682  for (i=0; i<n; i++) {
683  tree[i] = t[i];
684  }
685 
686  for (i=1; i<=proxySpanDim; i++) {
687  if (tree.size() <= i) break;
688  child[i-1] = tree[i];
689  nChild++;
690  }
691 
692 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
693  isProxyChanged = 1;
694 #endif
695 
696  // send down to children
698 }
699 
701 {
702  if (tree.size() == 0) return;
704  msg->patch = patchID;
705  msg->node = CkMyPe();
706  msg->tree.copy(tree); // copy data for thread safety
708 }
709 #else
710 void HomePatch::recvSpanningTree(int *t, int n){}
712 #endif
713 
714 #ifndef NODEAWARE_PROXY_SPANNINGTREE
716 {
717  nChild = 0;
718  int psize = proxy.size();
719  if (psize == 0) return;
720  NodeIDList oldtree; oldtree.swap(tree);
721  int oldsize = oldtree.size();
722  tree.resize(psize + 1);
723  tree.setall(-1);
724  tree[0] = CkMyPe();
725  int s=1, e=psize+1;
727  int patchNodesLast =
728  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
729  int nNonPatch = 0;
730 #if 1
731  // try to put it to the same old tree
732  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
733  {
734  int oldindex = oldtree.find(*pli);
735  if (oldindex != -1 && oldindex < psize) {
736  tree[oldindex] = *pli;
737  }
738  }
739  s=1; e=psize;
740  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
741  {
742  if (tree.find(*pli) != -1) continue; // already assigned
743  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
744  while (tree[e] != -1) { e--; if (e==-1) e = psize; }
745  tree[e] = *pli;
746  } else {
747  while (tree[s] != -1) { s++; if (s==psize+1) s = 1; }
748  tree[s] = *pli;
749  nNonPatch++;
750  }
751  }
752 #if 1
753  if (oldsize==0 && nNonPatch) {
754  // first time, sort by distance
755  qsort(&tree[1], nNonPatch, sizeof(int), compDistance);
756  }
757 #endif
758 
759  //CkPrintf("home: %d:(%d) %d %d %d %d %d\n", patchID, tree.size(),tree[0],tree[1],tree[2],tree[3],tree[4]);
760 
761 #if USE_TOPOMAP && USE_SPANNING_TREE
762 
763  //Right now only works for spanning trees with two levels
764  int *treecopy = new int [psize];
765  int subroots[proxySpanDim];
766  int subsizes[proxySpanDim];
767  int subtrees[proxySpanDim][proxySpanDim];
768  int idxes[proxySpanDim];
769  int i = 0;
770 
771  for(i=0;i<proxySpanDim;i++){
772  subsizes[i] = 0;
773  idxes[i] = i;
774  }
775 
776  for(i=0;i<psize;i++){
777  treecopy[i] = tree[i+1];
778  }
779 
780  TopoManager tmgr;
781  tmgr.sortRanksByHops(treecopy,nNonPatch);
782  tmgr.sortRanksByHops(treecopy+nNonPatch,
783  psize-nNonPatch);
784 
785  /* build tree and subtrees */
786  nChild = findSubroots(proxySpanDim,subroots,psize,treecopy);
787  delete [] treecopy;
788 
789  for(int i=1;i<psize+1;i++){
790  int isSubroot=0;
791  for(int j=0;j<nChild;j++)
792  if(tree[i]==subroots[j]){
793  isSubroot=1;
794  break;
795  }
796  if(isSubroot) continue;
797 
798  int bAdded = 0;
799  tmgr.sortIndexByHops(tree[i], subroots,
800  idxes, proxySpanDim);
801  for(int j=0;j<proxySpanDim;j++){
802  if(subsizes[idxes[j]]<proxySpanDim){
803  subtrees[idxes[j]][(subsizes[idxes[j]])++] = tree[i];
804  bAdded = 1;
805  break;
806  }
807  }
808  if( psize > proxySpanDim && ! bAdded ) {
809  NAMD_bug("HomePatch BGL Spanning Tree error: Couldn't find subtree for leaf\n");
810  }
811  }
812 
813 #else /* USE_TOPOMAP && USE_SPANNING_TREE */
814 
815  for (int i=1; i<=proxySpanDim; i++) {
816  if (tree.size() <= i) break;
817  child[i-1] = tree[i];
818  nChild++;
819  }
820 #endif
821 #endif
822 
823 #if 0
824  // for debugging
825  CkPrintf("[%d] Spanning tree for %d with %d children %d nNonPatch %d\n", CkMyPe(), patchID, psize, nNonPatch);
826  for (int i=0; i<psize+1; i++) {
827  CkPrintf("%d ", tree[i]);
828  }
829  CkPrintf("\n");
830 #endif
831  // send to children nodes
833 }
834 #endif
835 
836 
838 
839  numGBISP3Arrived++;
840  DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
841  int n = msg->node;
842  Results *r = forceBox.clientOpen();
843 
844  char *iszeroPtr = msg->isZero;
845  Force *msgFPtr = msg->forceArr;
846 
847  for ( int k = 0; k < Results::maxNumForces; ++k )
848  {
849  Force *rfPtr = r->f[k];
850  for(int i=0; i<msg->flLen[k]; i++, rfPtr++, iszeroPtr++) {
851  if((*iszeroPtr)!=1) {
852  *rfPtr += *msgFPtr;
853  msgFPtr++;
854  }
855  }
856  }
858  delete msg;
859 }
860 
862  numGBISP3Arrived++;
863  DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
864  int n = msg->node;
865  Results *r = forceBox.clientOpen();
866  for ( int k = 0; k < Results::maxNumForces; ++k )
867  {
868  Force *f = r->f[k];
869  register ForceList::iterator f_i, f_e;
870  f_i = msg->forceList[k]->begin();
871  f_e = msg->forceList[k]->end();
872  for ( ; f_i != f_e; ++f_i, ++f ) *f += *f_i;
873  }
875  delete msg;
876 }
877 
879 {
880  numGBISP3Arrived++;
881  DebugM(4, "patchID("<<patchID<<") receiveRes() #nodes("<<msg->nodeSize<<")\n");
882  Results *r = forceBox.clientOpen(msg->nodeSize);
883  register char* isNonZero = msg->isForceNonZero;
884  register Force* f_i = msg->forceArr;
885  for ( int k = 0; k < Results::maxNumForces; ++k )
886  {
887  Force *f = r->f[k];
888  int nf = msg->flLen[k];
889 #ifdef ARCH_POWERPC
890 #pragma disjoint (*f_i, *f)
891 #endif
892  for (int count = 0; count < nf; count++) {
893  if(*isNonZero){
894  f[count].x += f_i->x;
895  f[count].y += f_i->y;
896  f[count].z += f_i->z;
897  f_i++;
898  }
899  isNonZero++;
900  }
901  }
903 
904  delete msg;
905 }
906 
908 {
909  // This is used for LSS in QM/MM simulations.
910  // Changes atom labels so that we effectively exchange solvent
911  // molecules between classical and quantum modes.
912 
914  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
915  const Real * const qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
916  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
917  Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
918 
919  ComputeQMMgr *mgrP = CProxy_ComputeQMMgr::ckLocalBranch(
920  CkpvAccess(BOCclass_group).computeQMMgr) ;
921 
922  FullAtom *a_i = atom.begin();
923 
924  for (int i=0; i<numAtoms; ++i ) {
925 
926  LSSSubsDat *subP = lssSubs(mgrP).find( LSSSubsDat(a_i[i].id) ) ;
927 
928  if ( subP != NULL ) {
929  a_i[i].id = subP->newID ;
930  a_i[i].vdwType = subP->newVdWType ;
931 
932  // If we are swappign a classical atom with a QM one, the charge
933  // will need extra handling.
934  if (qmAtomGroup[subP->newID] > 0 && simParams->PMEOn) {
935  // We make sure that the last atom charge calculated for the
936  // QM atom being transfered here is available for PME
937  // in the next step.
938 
939  // Loops over all QM atoms (in all QM groups) comparing their
940  // global indices (the sequential atom ID from NAMD).
941  for (int qmIter=0; qmIter<numQMAtms; qmIter++) {
942 
943  if (qmAtmIndx[qmIter] == subP->newID) {
944  qmAtmChrg[qmIter] = subP->newCharge;
945  break;
946  }
947 
948  }
949 
950  // For QM atoms, the charge in the full atom structure is zero.
951  // Electrostatic interactions between QM atoms and their
952  // environment is handled in ComputeQM.
953  a_i[i].charge = 0;
954  }
955  else {
956  // If we are swappign a QM atom with a Classical one, only the charge
957  // in the full atomstructure needs updating, since it used to be zero.
958  a_i[i].charge = subP->newCharge ;
959  }
960  }
961  }
962 
963  return;
964 }
965 
966 
968 //
969 // begin SOA
970 //
971 void HomePatch::positionsReady_SOA(int doMigration)
972 {
974  // char prbuf[32];
975  // sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY_SOA], this->getPatchID());
976  // NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY_SOA, prbuf);
977  if(!simParams->CUDASOAintegrate) flags.sequence++;
978  // flags.sequence++;
979  if (!patchMapRead) { readPatchMap(); }
980  if (numNeighbors && ! simParams->staticAtomAssignment) {
981  if (doMigration) {
982  // copy SOA updates to AOS
983  // XXX TODO:
984  copy_updates_to_AOS();
985  // make sure to invalidate RATTLE lists when atoms move
986  rattleListValid_SOA = false;
987  rattleListValid = false;
988  // this has a suspend
989  doAtomMigration();
990  } else {
991  // XXX TODO: Get rid of this marginCheck for every tstep
992  // move this to the GPU afterwards
993  if(!simParams->CUDASOAintegrate) doMarginCheck_SOA();
994  }
995  }
996 
997 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
998  char prbuf[32];
999  sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY], this->getPatchID());
1000  NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY, prbuf);
1001 #endif
1002 
1003 #if 0
1004  if (doMigration && simParams->qmLSSOn)
1005  qmSwapAtoms();
1006 #endif
1007 
1008 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1009  if ( doMigration ) {
1010  // XXX Since we just migrated FullAtom array is up-to-date
1011  int n = numAtoms;
1012  int * __restrict ao_SOA = patchDataSOA.sortOrder;
1013  int * __restrict unsort_SOA = patchDataSOA.unsortOrder;
1014  #if defined(NAMD_CUDA) || defined(NAMD_HIP) || (defined(NAMD_MIC) && MIC_SORT_ATOMS != 0)
1015  //#if 0
1016  int nfree_SOA;
1017  if ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces ) {
1018  int k = 0;
1019  int k2 = n;
1020  int * __restrict atomFixed = patchDataSOA.atomFixed;
1021  for ( int j=0; j<n; ++j ) {
1022  // put fixed atoms at end
1023  if ( atomFixed[j] ) ao_SOA[--k2] = j;
1024  else ao_SOA[k++] = j;
1025  }
1026  nfree_SOA = k;
1027  } else {
1028  nfree_SOA = n;
1029  for ( int j=0; j<n; ++j ) {
1030  ao_SOA[j] = j;
1031  }
1032  }
1033 #if 1
1034  sortAtomsForCUDA_SOA(ao_SOA, unsort_SOA,
1035  patchDataSOA.pos_x, patchDataSOA.pos_y, patchDataSOA.pos_z,
1036  nfree_SOA, n);
1037 #endif
1038 #if 0
1039  for (int i = 0; i < n; ++i) {
1040  ao_SOA[i] = i;
1041  printf("ao_SOA[%d] = %d\n", i, ao_SOA[i]);
1042  }
1043 #endif
1044 
1045 #else
1046  for (int i = 0; i < n; ++i) {
1047  ao_SOA[i] = i;
1048  }
1049 #endif
1050  }
1051  {
1052  const double charge_scaling = sqrt(COULOMB * ComputeNonbondedUtil::scaling * ComputeNonbondedUtil::dielectric_1);
1053  const Vector ucenter = lattice.unscale(center);
1054  const BigReal ucenter_x = ucenter.x;
1055  const BigReal ucenter_y = ucenter.y;
1056  const BigReal ucenter_z = ucenter.z;
1057  const int n = numAtoms;
1058  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1059  int n_16 = n;
1060  n_16 = (n + 15) & (~15);
1061  cudaAtomList.resize(n_16);
1062  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1063  mic_position_t *atom_x = ((mic_position_t*)ac) + (0 * n_16);
1064  mic_position_t *atom_y = ((mic_position_t*)ac) + (1 * n_16);
1065  mic_position_t *atom_z = ((mic_position_t*)ac) + (2 * n_16);
1066  mic_position_t *atom_q = ((mic_position_t*)ac) + (3 * n_16);
1067  #elif defined(NAMD_AVXTILES)
1068  int n_avx = (n + 15) & (~15);
1069  cudaAtomList.resize(n_avx);
1070  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1071  tiles.realloc(n, ac);
1072  #else
1073  if(doMigration) cudaAtomList.resize(n);
1074  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1075  #endif
1076  double * __restrict pos_x = patchDataSOA.pos_x;
1077  double * __restrict pos_y = patchDataSOA.pos_y;
1078  double * __restrict pos_z = patchDataSOA.pos_z;
1079  float * __restrict charge = patchDataSOA.charge;
1080  int * __restrict ao_SOA = patchDataSOA.sortOrder;
1081 #ifndef NODEGROUP_FORCE_REGISTER
1082 //#if 1
1083  for ( int k=0; k<n; ++k ) {
1084  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1085  int j = ao_SOA[k];
1086  atom_x[k] = pos_x[j] - ucenter_x;
1087  atom_y[k] = pos_y[j] - ucenter_y;
1088  atom_z[k] = pos_z[j] - ucenter_z;
1089  atom_q[k] = charge_scaling * charge[j];
1090  #else
1091  // XXX TODO: This has to go
1092  int j = ao_SOA[k];
1093  // JM: Calculating single-precision patch-centered atomic coordinates as
1094  // offsets. By adding single-precision offsets to double-precision
1095  // patch-patch center distances, we maintain full precision
1096  // XXX NOTE: check where I can use this to use float instead of double in NAMD
1097  ac[k].x = pos_x[j] - ucenter_x;
1098  ac[k].y = pos_y[j] - ucenter_y;
1099  ac[k].z = pos_z[j] - ucenter_z;
1100  // XXX TODO: Compute charge scaling on GPUs and not here to avoid a copy
1101  // for every timestep
1102  // XXX TODO: Check when do we have to update this value and do it
1103  // on the gpu
1104  ac[k].q = charge_scaling * charge[j];
1105  #endif
1106  }
1107 #else
1108  if(!simParams->CUDASOAintegrate || doMigration){
1109  for ( int k=0; k<n; ++k ) {
1110  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1111  int j = ao_SOA[k];
1112  atom_x[k] = pos_x[j] - ucenter_x;
1113  atom_y[k] = pos_y[j] - ucenter_y;
1114  atom_z[k] = pos_z[j] - ucenter_z;
1115  atom_q[k] = charge_scaling * charge[j];
1116  #else
1117  // XXX TODO: This has to go
1118  int j = ao_SOA[k];
1119  // JM: Calculating single-precision patch-centered atomic coordinates as
1120  // offsets. By adding single-precision offsets to double-precision
1121  // patch-patch center distances, we maintain full precision
1122  ac[k].x = pos_x[j] - ucenter_x;
1123  ac[k].y = pos_y[j] - ucenter_y;
1124  ac[k].z = pos_z[j] - ucenter_z;
1125  // XXX TODO: Compute charge scaling on GPUs and not here to avoid a copy
1126  // for every timestep
1127  // XXX TODO: Check when do we have to update this value and do it
1128  // on the gpu
1129  ac[k].q = charge_scaling * charge[j];
1130  #endif
1131  }
1132  }
1133 #endif
1134  }
1135 #else
1136  doMigration = doMigration && numNeighbors;
1137 #endif
1138  doMigration = doMigration || ! patchMapRead;
1139 
1140  doMigration = doMigration || doAtomUpdate;
1141  doAtomUpdate = false;
1142 
1143  // Workaround for oversize groups:
1144  // reset nonbondedGroupSize (ngs) before force calculation,
1145  // making sure that subset of hydrogen group starting with
1146  // parent atom are all within 0.5 * hgroupCutoff.
1147  // XXX hydrogentGroupSize remains constant but is checked for nonzero
1148  // XXX should be skipped for CUDA, ngs not used by CUDA kernels
1149  // XXX should this also be skipped for KNL kernels?
1150  // ngs used by ComputeNonbondedBase.h - CPU nonbonded kernels
1151  // ngs used by ComputeGBIS.C - CPU GB nonbonded kernels
1152 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1154 #endif
1155 
1156  // Copy information needed by computes and proxys to Patch::p.
1157  if (doMigration) {
1158  // Atom data changes after migration, so must copy all of it.
1159  // Must resize CompAtom and CompAtomExt arrays to new atom count.
1160  p.resize(numAtoms);
1161  pExt.resize(numAtoms);
1162  CompAtom * __restrict p_i = p.begin();
1163  CompAtomExt * __restrict pExt_i = pExt.begin();
1164  const double * __restrict pos_x = patchDataSOA.pos_x;
1165  const double * __restrict pos_y = patchDataSOA.pos_y;
1166  const double * __restrict pos_z = patchDataSOA.pos_z;
1167  const float * __restrict charge = patchDataSOA.charge;
1168  const int * __restrict vdwType = patchDataSOA.vdwType;
1169  const int * __restrict partition = patchDataSOA.partition;
1170 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1171  const int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
1172 #endif
1173  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
1174  const int * __restrict isWater = patchDataSOA.isWater;
1175  int n = numAtoms;
1176  for (int i=0; i < n; i++) {
1177  p_i[i].position.x = pos_x[i];
1178  p_i[i].position.y = pos_y[i];
1179  p_i[i].position.z = pos_z[i];
1180  p_i[i].charge = charge[i];
1181  p_i[i].vdwType = vdwType[i];
1182  p_i[i].partition = partition[i];
1183 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1184  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1185 #endif
1186  p_i[i].hydrogenGroupSize = hydrogenGroupSize[i];
1187  p_i[i].isWater = isWater[i];
1188  }
1189 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1190  const int * __restrict sortOrder = patchDataSOA.sortOrder;
1191 #endif
1192  const int * __restrict id = patchDataSOA.id;
1193 #if defined(MEM_OPT_VERSION)
1194  const int * __restrict exclId = patchDataSOA.exclId;
1195  const int * __restrict sigId = patchDataSOA.sigId;
1196 #endif
1197  const int * __restrict atomFixed = patchDataSOA.atomFixed;
1198  const int * __restrict groupFixed = patchDataSOA.groupFixed;
1199  // Copy into CompAtomExt using typecast to temporary CompAtomExtCopy
1200  // to avoid loop vectorization bug in Intel 2018 compiler.
1201 #ifndef USE_NO_BITFIELDS
1202  CompAtomExtCopy *pExtCopy_i = (CompAtomExtCopy *) pExt_i;
1203 #endif // USE_NO_BITFIELDS
1204  for (int i=0; i < n; i++) {
1205 #ifndef USE_NO_BITFIELDS
1206 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1207  pExtCopy_i[i].sortOrder = sortOrder[i];
1208 #endif
1209 #if defined(MEM_OPT_VERSION)
1210  pExtCopy_i[i].id = id[i];
1211  pExtCopy_i[i].exclId = exclId[i];
1212  ASSERT(atomFixed[i] == 0 || atomFixed[i] == 1);
1213  ASSERT(groupFixed[i] == 0 || groupFixed[i] == 1);
1214  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed[i];
1215  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed[i];
1216  pExtCopy_i[i].sigId = (sigId[i] | atomFixedBit | groupFixedBit);
1217 #else
1218  ASSERT(atomFixed[i] == 0 || atomFixed[i] == 1);
1219  ASSERT(groupFixed[i] == 0 || groupFixed[i] == 1);
1220  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed[i];
1221  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed[i];
1222  pExtCopy_i[i].id = (id[i] | atomFixedBit | groupFixedBit);
1223 #endif // if defined(MEM_OPT_VERSION)
1224 #else
1225 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1226  pExt_i[i].sortOrder = sortOrder[i];
1227 #endif
1228  pExt_i[i].id = id[i];
1229 #if defined(MEM_OPT_VERSION)
1230  pExt_i[i].exclId = exclId[i];
1231  pExt_i[i].sigId = sigId[i];
1232 #endif
1233  pExt_i[i].atomFixed = atomFixed[i];
1234  pExt_i[i].groupFixed = groupFixed[i];
1235 #endif // USE_NO_BITFIELDS
1236  }
1237  }
1238  else {
1239  // JM: This is done for every timestep
1240  // Only need to copy positions, nonbondedGroupSize, and sortOrder.
1241  // Other data remains unchanged.
1242  CompAtom * __restrict p_i = p.begin();
1243  CompAtomExt * __restrict pExt_i = pExt.begin();
1244  const double * __restrict pos_x = patchDataSOA.pos_x;
1245  const double * __restrict pos_y = patchDataSOA.pos_y;
1246  const double * __restrict pos_z = patchDataSOA.pos_z;
1247 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1248  const int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
1249 #endif
1250  int n = numAtoms;
1251 #ifndef NODEGROUP_FORCE_REGISTER
1252 //#if 1
1253  for (int i=0; i < n; i++) {
1254  p_i[i].position.x = pos_x[i];
1255  p_i[i].position.y = pos_y[i];
1256  p_i[i].position.z = pos_z[i];
1257 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1258  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1259 #endif
1260  }
1261 #else
1262  if(!simParams->CUDASOAintegrate || doMigration){
1263  for (int i=0; i < n; i++) {
1264  p_i[i].position.x = pos_x[i];
1265  p_i[i].position.y = pos_y[i];
1266  p_i[i].position.z = pos_z[i];
1267 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1268  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1269 #endif
1270  }
1271  }
1272 #endif
1273 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1274  const int * __restrict sortOrder = patchDataSOA.sortOrder;
1275 #ifdef NODEGROUP_FORCE_REGISTER
1276 //#if 0
1277  if(!simParams->CUDASOAintegrate || doMigration){
1278  for (int i=0; i < n; i++) {
1279  pExt_i[i].sortOrder = sortOrder[i];
1280  }
1281  }
1282 #else
1283  //if(!simParams->CUDASOAintegrate || doMigration){
1284  for (int i=0; i < n; i++) {
1285  pExt_i[i].sortOrder = sortOrder[i];
1286  }
1287 #endif
1288 #endif
1289  } // end Copy information to Patch::p
1290 
1291  // Measure atom movement to test pairlist validity
1292  // XXX TODO: Check if this ever needs to be done if CUDASOAintegrate
1293 #ifdef NODEGROUP_FORCE_REGISTER
1294  if(!simParams->CUDASOAintegrate || flags.sequence == 0){
1295  doPairlistCheck();
1296  }
1297 #else
1298  doPairlistCheck();
1299 #endif
1300 
1301 #if 0
1302  if (flags.doMolly) mollyAverage();
1303  // BEGIN LA
1305  // END LA
1306 
1307  if (flags.doGBIS) {
1308  //reset for next time step
1309  numGBISP1Arrived = 0;
1310  phase1BoxClosedCalled = false;
1311  numGBISP2Arrived = 0;
1312  phase2BoxClosedCalled = false;
1313  numGBISP3Arrived = 0;
1314  phase3BoxClosedCalled = false;
1315  if (doMigration || isNewProxyAdded)
1317  }
1318 
1319  if (flags.doLCPO) {
1320  if (doMigration || isNewProxyAdded) {
1321  setLcpoType();
1322  }
1323  }
1324 #endif
1325 
1326  // Must Add Proxy Changes when migration completed!
1328  int *pids = NULL;
1329  int pidsPreAllocated = 1;
1330  int npid;
1331  if (proxySendSpanning == 0) {
1332  npid = proxy.size();
1333  pids = new int[npid];
1334  pidsPreAllocated = 0;
1335  int *pidi = pids;
1336  int *pide = pids + proxy.size();
1337  int patchNodesLast =
1338  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
1339  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
1340  {
1341  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
1342  *(--pide) = *pli;
1343  } else {
1344  *(pidi++) = *pli;
1345  }
1346  }
1347  }
1348  else {
1349 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1350  #ifdef USE_NODEPATCHMGR
1351  npid = numNodeChild;
1352  pids = nodeChildren;
1353  #else
1354  npid = nChild;
1355  pids = child;
1356  #endif
1357 #else
1358  npid = nChild;
1359  pidsPreAllocated = 0;
1360  pids = new int[proxySpanDim];
1361  for (int i=0; i<nChild; i++) pids[i] = child[i];
1362 #endif
1363  }
1364  if (npid) { //have proxies
1365  int seq = flags.sequence;
1366  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
1367  //begin to prepare proxy msg and send it
1368  int pdMsgPLLen = p.size();
1369  int pdMsgAvgPLLen = 0;
1370 #if 0
1371  if(flags.doMolly) {
1372  pdMsgAvgPLLen = p_avg.size();
1373  }
1374 #endif
1375  // BEGIN LA
1376  int pdMsgVLLen = 0;
1377 #if 0
1378  if (flags.doLoweAndersen) {
1379  pdMsgVLLen = v.size();
1380  }
1381 #endif
1382  // END LA
1383 
1384  int intRadLen = 0;
1385 #if 0
1386  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
1387  intRadLen = numAtoms * 2;
1388  }
1389 #endif
1390 
1391  //LCPO
1392  int lcpoTypeLen = 0;
1393 #if 0
1394  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
1395  lcpoTypeLen = numAtoms;
1396  }
1397 #endif
1398 
1399  int pdMsgPLExtLen = 0;
1400  if(doMigration || isNewProxyAdded) {
1401  pdMsgPLExtLen = pExt.size();
1402  }
1403 
1404  int cudaAtomLen = 0;
1405 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1406  cudaAtomLen = numAtoms;
1407 #endif
1408 
1409  #ifdef NAMD_MIC
1410  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
1411  cudaAtomLen = numAtoms;
1412  #else
1413  cudaAtomLen = (numAtoms + 15) & (~15);
1414  #endif
1415  #endif
1416  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
1417  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
1418 
1419  SET_PRIORITY(nmsg,seq,priority);
1420  nmsg->patch = patchID;
1421  nmsg->flags = flags;
1422  nmsg->plLen = pdMsgPLLen;
1423  //copying data to the newly created msg
1424  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1425  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
1426  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1427  nmsg->avgPlLen = pdMsgAvgPLLen;
1428 #if 0
1429  if(flags.doMolly) {
1430  memcpy(nmsg->avgPositionList, p_avg.begin(), sizeof(CompAtom)*pdMsgAvgPLLen);
1431  }
1432 #endif
1433  // BEGIN LA
1434  nmsg->vlLen = pdMsgVLLen;
1435 #if 0
1436  if (flags.doLoweAndersen) {
1437  memcpy(nmsg->velocityList, v.begin(), sizeof(CompAtom)*pdMsgVLLen);
1438  }
1439 #endif
1440  // END LA
1441 
1442 #if 0
1443  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
1444  for (int i = 0; i < numAtoms * 2; i++) {
1445  nmsg->intRadList[i] = intRad[i];
1446  }
1447  }
1448 #endif
1449 
1450 #if 0
1451  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
1452  for (int i = 0; i < numAtoms; i++) {
1453  nmsg->lcpoTypeList[i] = lcpoType[i];
1454  }
1455  }
1456 #endif
1457  nmsg->plExtLen = pdMsgPLExtLen;
1458  if(doMigration || isNewProxyAdded){
1459  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
1460  }
1461 
1462 // DMK
1463 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1464  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1465  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
1466  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1467 #endif
1468 
1469 #if NAMD_SeparateWaters != 0
1470  //DMK - Atom Separation (water vs. non-water)
1471  nmsg->numWaterAtoms = numWaterAtoms;
1472 #endif
1473 
1474 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1475  nmsg->isFromImmMsgCall = 0;
1476 #endif
1477 
1478  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1479  DebugFileTrace *dft = DebugFileTrace::Object();
1480  dft->openTrace();
1481  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
1482  for(int i=0; i<npid; i++) {
1483  dft->writeTrace("%d ", pids[i]);
1484  }
1485  dft->writeTrace("\n");
1486  dft->closeTrace();
1487  #endif
1488 
1489 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1490  if (isProxyChanged || localphs == NULL)
1491  {
1492 //CmiPrintf("[%d] Build persistent: isProxyChanged: %d %p\n", CkMyPe(), isProxyChanged, localphs);
1493  //CmiAssert(isProxyChanged);
1494  if (nphs) {
1495  for (int i=0; i<nphs; i++) {
1496  CmiDestoryPersistent(localphs[i]);
1497  }
1498  delete [] localphs;
1499  }
1500  localphs = new PersistentHandle[npid];
1501  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
1502  for (int i=0; i<npid; i++) {
1503 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1504  if (proxySendSpanning)
1505  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1506  else
1507 #endif
1508  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1509  }
1510  nphs = npid;
1511  }
1512  CmiAssert(nphs == npid && localphs != NULL);
1513  CmiUsePersistentHandle(localphs, nphs);
1514 #endif
1515  if(doMigration || isNewProxyAdded) {
1516  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
1517  }else{
1518  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
1519  }
1520 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1521  CmiUsePersistentHandle(NULL, 0);
1522 #endif
1523  isNewProxyAdded = 0;
1524  }
1525  isProxyChanged = 0;
1526  if(!pidsPreAllocated) delete [] pids;
1527  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
1528 
1529 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
1530  positionPtrBegin = p.begin();
1531  positionPtrEnd = p.end();
1532 #endif
1533 
1534 #if 0
1535  if(flags.doMolly) {
1538  }
1539  // BEGIN LA
1540  if (flags.doLoweAndersen) {
1541  velocityPtrBegin = v.begin();
1542  velocityPtrEnd = v.end();
1543  }
1544  // END LA
1545 #endif
1546  // fprintf(stderr, "(Pe[%d] tstep %d)calling positionsReady on patch %d\n",
1547  // CkMyPe(), this->flags.step, this->patchID);
1548  Patch::positionsReady(doMigration);
1549 
1550  patchMapRead = 1;
1551 
1552  // gzheng
1553  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY_SOA);
1554 
1555  Sync::Object()->PatchReady();
1556 
1557 #if 0
1558  fprintf(stderr, "Patch %d atom IDS\n", this->patchID);
1559  AtomMap* mapper = AtomMap::Object();
1560  for(int i = 0 ; i < numAtoms; i++){
1561  fprintf(stderr, "atom[%d] = %d %d %d\n", i, atom[i].id,
1562  mapper->localID(atom[i].id).pid, mapper->localID(atom[i].id).index);
1563  }
1564 #endif
1565 
1566 }
1567 //
1568 // end SOA
1569 //
1571 
1573 //
1574 // GPU migration code path
1575 //
1576 
1577 #ifdef NODEGROUP_FORCE_REGISTER
1578 
1579 void HomePatch::updateAtomCount(const int n, const int reallocate) {
1580  numAtoms = n;
1581  // See MigrationCUDAKernel.h
1582  if (n > MigrationCUDAKernel::kMaxAtomsPerPatch) {
1583  NAMD_die("Device migration does not currently support patches with greater than 2048 atoms.\n"
1584  "Please run with a smaller margin or without device migration.\n");
1585  }
1586 }
1587 
1589  atom.resize(numAtoms);
1590  cudaAtomList.resize(numAtoms);
1591  p.resize(numAtoms);
1592  pExt.resize(numAtoms);
1593 
1594  size_t nbytes = patchDataSOA.numBytes;
1595  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048)) {
1596 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1597  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms));
1598  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
1599 #else
1600  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms) );
1601  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
1602 #endif
1603  }
1604 }
1605 
1606 void HomePatch::clearAtomMap() {
1607  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
1608 }
1609 
1610 void HomePatch::positionsReady_GPU(int doMigration, int startup) {
1612  if (doMigration) {
1613  // make sure to invalidate RATTLE lists when atoms move
1614  rattleListValid_SOA = false;
1615  rattleListValid = false;
1616  //invalidate gridforced list
1617  gridForceIdxChecked=false;
1618 
1619  }
1620  if (startup) {
1621  const int numAtomAlloc = 2048;
1622  atom.reserve(numAtomAlloc);
1623  cudaAtomList.reserve(numAtomAlloc);
1624  p.reserve(numAtomAlloc);
1625  pExt.reserve(numAtomAlloc);
1626 
1627  for ( int j = 0; j < Results::maxNumForces; ++j ) {
1628  f[j].reserve(numAtomAlloc);
1629  }
1630  }
1631 
1632  int n = numAtoms;
1633  atom.resize(n);
1634  cudaAtomList.resize(n);
1635  p.resize(n);
1636  pExt.resize(n);
1637 
1638  size_t nbytes = patchDataSOA.numBytes;
1639  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048)) {
1640 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1641  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048));
1642  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
1643 #else
1644  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048) );
1645  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
1646 #endif
1647  }
1648 
1649 
1650  doMigration = doMigration || ! patchMapRead;
1651 
1652  doMigration = doMigration || doAtomUpdate;
1653  doAtomUpdate = false;
1654 
1655 
1656  // This is set during startup depending on which features are enabled.
1657  const bool updateAtomMap = simParams->updateAtomMap;
1658 
1659  // If startup or update atom map, then we need to update some data structures.
1660  // For startup this is mainly for tuple generations
1661  if (startup || updateAtomMap) {
1662  // These are used in the atom map, so are needed for startup and updateAtomMap
1663  CompAtomExt * __restrict pExt_i = pExt.begin();
1664 #ifndef USE_NO_BITFIELDS
1665  CompAtomExtCopy *pExtCopy_i = (CompAtomExtCopy *) pExt_i;
1666 #endif
1667  for (int i=0; i < numAtoms; i++) {
1668  const uint32 atomFixed = uint32(atom[i].atomFixed);
1669  const uint32 groupFixed = uint32(atom[i].groupFixed);
1670  const uint32 id = uint32(atom[i].id);
1671 #ifndef USE_NO_BITFIELDS
1672  ASSERT(atomFixed == 0 || atomFixed == 1);
1673  ASSERT(groupFixed == 0 || groupFixed == 1);
1674  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed;
1675  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed;
1676  pExtCopy_i[i].id = (id | atomFixedBit | groupFixedBit);
1677 #else
1678  pExt_i[i].id = id;
1679  pExt_i[i].atomFixed = atomFixed;
1680  pExt_i[i].groupFixed = groupFixed;
1681 #endif // USE_NO_BITFIELDS
1682  }
1683  }
1684  if (startup) {
1685  // These are used in the exclusion tuple generation, so are only needed for startup
1686  CompAtom * __restrict p_i = p.begin();
1687  for (int i=0; i < numAtoms; i++) {
1688  p_i[i].vdwType = atom[i].vdwType;
1689  }
1690  if (simParams->alchOn) {
1691  for (int i=0; i < numAtoms; i++) {
1692  p_i[i].partition = atom[i].partition;
1693  }
1694  }
1695  // copy data from AOS into SOA
1696  for (int i=0; i < numAtoms; i++) {
1697  patchDataSOA.id[i] = int(atom[i].id);
1698  patchDataSOA.atomFixed[i] = int(atom[i].atomFixed);
1699  patchDataSOA.groupFixed[i] = int(atom[i].groupFixed);
1700  }
1701  } else if (updateAtomMap) {
1702  // This might be overkill for updating atom map
1703  copy_atoms_to_SOA();
1704  }
1705 
1706  // Measure atom movement to test pairlist validity
1707  // XXX TODO: Check if this ever needs to be done if CUDASOAintegrate
1708 #ifdef NODEGROUP_FORCE_REGISTER
1709  if(!simParams->CUDASOAintegrate || flags.sequence == 0){
1710  doPairlistCheck();
1711  }
1712 #else
1713  doPairlistCheck();
1714 #endif
1715 
1716  // Must Add Proxy Changes when migration completed!
1718  int *pids = NULL;
1719  int pidsPreAllocated = 1;
1720  int npid;
1721  if (proxySendSpanning == 0) {
1722  npid = proxy.size();
1723  pids = new int[npid];
1724  pidsPreAllocated = 0;
1725  int *pidi = pids;
1726  int *pide = pids + proxy.size();
1727  int patchNodesLast =
1728  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
1729  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
1730  {
1731  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
1732  *(--pide) = *pli;
1733  } else {
1734  *(pidi++) = *pli;
1735  }
1736  }
1737  }
1738  else {
1739 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1740  #ifdef USE_NODEPATCHMGR
1741  npid = numNodeChild;
1742  pids = nodeChildren;
1743  #else
1744  npid = nChild;
1745  pids = child;
1746  #endif
1747 #else
1748  npid = nChild;
1749  pidsPreAllocated = 0;
1750  pids = new int[proxySpanDim];
1751  for (int i=0; i<nChild; i++) pids[i] = child[i];
1752 #endif
1753  }
1754  if (npid) { //have proxies
1755  int seq = flags.sequence;
1756  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
1757  //begin to prepare proxy msg and send it
1758  int pdMsgPLLen = p.size();
1759  int pdMsgAvgPLLen = 0;
1760  // BEGIN LA
1761  int pdMsgVLLen = 0;
1762  // END LA
1763 
1764  int intRadLen = 0;
1765 
1766  //LCPO
1767  int lcpoTypeLen = 0;
1768 
1769  int pdMsgPLExtLen = 0;
1770  if(doMigration || isNewProxyAdded) {
1771  pdMsgPLExtLen = pExt.size();
1772  }
1773 
1774  int cudaAtomLen = 0;
1775 #ifdef NAMD_CUDA
1776  cudaAtomLen = numAtoms;
1777 #endif
1778 
1779  #ifdef NAMD_MIC
1780  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
1781  cudaAtomLen = numAtoms;
1782  #else
1783  cudaAtomLen = (numAtoms + 15) & (~15);
1784  #endif
1785  #endif
1786  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
1787  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
1788 
1789  SET_PRIORITY(nmsg,seq,priority);
1790  nmsg->patch = patchID;
1791  nmsg->flags = flags;
1792  nmsg->plLen = pdMsgPLLen;
1793  //copying data to the newly created msg
1794  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1795  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
1796  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1797  nmsg->avgPlLen = pdMsgAvgPLLen;
1798  // BEGIN LA
1799  nmsg->vlLen = pdMsgVLLen;
1800  // END LA
1801 
1802  nmsg->plExtLen = pdMsgPLExtLen;
1803  if(doMigration || isNewProxyAdded){
1804  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
1805  }
1806 
1807 // DMK
1808 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
1809  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1810  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
1811  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1812 #endif
1813 
1814 #if NAMD_SeparateWaters != 0
1815  //DMK - Atom Separation (water vs. non-water)
1816  nmsg->numWaterAtoms = numWaterAtoms;
1817 #endif
1818 
1819 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1820  nmsg->isFromImmMsgCall = 0;
1821 #endif
1822 
1823  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1824  DebugFileTrace *dft = DebugFileTrace::Object();
1825  dft->openTrace();
1826  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
1827  for(int i=0; i<npid; i++) {
1828  dft->writeTrace("%d ", pids[i]);
1829  }
1830  dft->writeTrace("\n");
1831  dft->closeTrace();
1832  #endif
1833 
1834 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1835  if (isProxyChanged || localphs == NULL)
1836  {
1837  if (nphs) {
1838  for (int i=0; i<nphs; i++) {
1839  CmiDestoryPersistent(localphs[i]);
1840  }
1841  delete [] localphs;
1842  }
1843  localphs = new PersistentHandle[npid];
1844  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
1845  for (int i=0; i<npid; i++) {
1846 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1847  if (proxySendSpanning)
1848  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1849  else
1850 #endif
1851  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1852  }
1853  nphs = npid;
1854  }
1855  CmiAssert(nphs == npid && localphs != NULL);
1856  CmiUsePersistentHandle(localphs, nphs);
1857 #endif
1858  if(doMigration || isNewProxyAdded) {
1859  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
1860  }else{
1861  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
1862  }
1863 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1864  CmiUsePersistentHandle(NULL, 0);
1865 #endif
1866  isNewProxyAdded = 0;
1867  }
1868  isProxyChanged = 0;
1869  if(!pidsPreAllocated) delete [] pids;
1870  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
1871 
1872 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
1873  positionPtrBegin = p.begin();
1874  positionPtrEnd = p.end();
1875 #endif
1876 
1877  Patch::positionsReady(doMigration, startup);
1878 
1879  patchMapRead = 1;
1880 
1881  // gzheng
1882  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY_SOA);
1883 
1884  Sync::Object()->PatchReady();
1885 }
1886 
1887 #endif // NODEGROUP_FORCE_REGISTER
1888 
1889 //
1890 // end of GPU migration code path
1891 //
1893 
1894 
1895 void HomePatch::positionsReady(int doMigration)
1896 {
1898 
1899  flags.sequence++;
1900  if (doMigration)
1901  gridForceIdxChecked=false;
1902  if (!patchMapRead) { readPatchMap(); }
1903 
1904  if (numNeighbors && ! simParams->staticAtomAssignment) {
1905  if (doMigration) {
1906  rattleListValid = false;
1907  doAtomMigration();
1908  } else {
1909  doMarginCheck();
1910  }
1911  }
1912 
1913 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1914  char prbuf[32];
1915  sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY], this->getPatchID());
1916  NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY, prbuf);
1917 #endif
1918 
1919  if (doMigration && simParams->qmLSSOn)
1920  qmSwapAtoms();
1921 
1922 #if defined(NAMD_CUDA) || defined(NAMD_MIC) || defined(NAMD_AVXTILES) || defined(NAMD_HIP)
1923  #ifdef NAMD_AVXTILES
1924  if ( simParams->useAVXTiles ) {
1925  #endif
1926  if ( doMigration ) {
1927  int n = numAtoms;
1928  FullAtom *a_i = atom.begin();
1929  #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_AVXTILES) || \
1930  (defined(NAMD_MIC) && MIC_SORT_ATOMS != 0)
1931  int *ao = new int[n];
1932  int nfree;
1933  if ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces ) {
1934  int k = 0;
1935  int k2 = n;
1936  for ( int j=0; j<n; ++j ) {
1937  // put fixed atoms at end
1938  if ( a_i[j].atomFixed ) ao[--k2] = j;
1939  else ao[k++] = j;
1940  }
1941  nfree = k;
1942  } else {
1943  nfree = n;
1944  for ( int j=0; j<n; ++j ) {
1945  ao[j] = j;
1946  }
1947  }
1948  sortAtomsForCUDA(ao,a_i,nfree,n);
1949  for ( int i=0; i<n; ++i ) {
1950  a_i[i].sortOrder = ao[i];
1951  }
1952  delete [] ao;
1953  #else
1954  for (int i = 0; i < n; ++i) {
1955  a_i[i].sortOrder = i;
1956  }
1957  #endif
1958  }
1959 
1960  {
1961  const double charge_scaling = sqrt(COULOMB * ComputeNonbondedUtil::scaling * ComputeNonbondedUtil::dielectric_1);
1962  const Vector ucenter = lattice.unscale(center);
1963  const BigReal ucenter_x = ucenter.x;
1964  const BigReal ucenter_y = ucenter.y;
1965  const BigReal ucenter_z = ucenter.z;
1966  const int n = numAtoms;
1967  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1968  int n_16 = n;
1969  n_16 = (n + 15) & (~15);
1970  cudaAtomList.resize(n_16);
1971  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1972  mic_position_t *atom_x = ((mic_position_t*)ac) + (0 * n_16);
1973  mic_position_t *atom_y = ((mic_position_t*)ac) + (1 * n_16);
1974  mic_position_t *atom_z = ((mic_position_t*)ac) + (2 * n_16);
1975  mic_position_t *atom_q = ((mic_position_t*)ac) + (3 * n_16);
1976  #elif defined(NAMD_AVXTILES)
1977  int n_avx = (n + 15) & (~15);
1978  cudaAtomList.resize(n_avx);
1979  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1980  tiles.realloc(n, ac);
1981  #else
1982  #if 1
1983  cudaAtomList.resize(n);
1984  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1985  #else
1986  reallocate_host<CudaAtom>(&cudaAtomList, &sizeCudaAtomList, n);
1987  CudaAtom *ac = cudaAtomPtr = &cudaAtomList[0];
1988  #endif
1989  #endif
1990  const FullAtom *a = atom.begin();
1991  for ( int k=0; k<n; ++k ) {
1992  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1993  int j = a[k].sortOrder;
1994  atom_x[k] = a[j].position.x - ucenter_x;
1995  atom_y[k] = a[j].position.y - ucenter_y;
1996  atom_z[k] = a[j].position.z - ucenter_z;
1997  atom_q[k] = charge_scaling * a[j].charge;
1998  #else
1999  int j = a[k].sortOrder;
2000  ac[k].x = a[j].position.x - ucenter_x;
2001  ac[k].y = a[j].position.y - ucenter_y;
2002  ac[k].z = a[j].position.z - ucenter_z;
2003  ac[k].q = charge_scaling * a[j].charge;
2004  #endif
2005  }
2006  #ifdef NAMD_AVXTILES
2007  {
2008  if (n > 0) {
2009  int j = a[n-1].sortOrder;
2010  for ( int k=n; k<n_avx; ++k ) {
2011  ac[k].x = a[j].position.x - ucenter_x;
2012  ac[k].y = a[j].position.y - ucenter_y;
2013  ac[k].z = a[j].position.z - ucenter_z;
2014  }
2015  }
2016  }
2017  #endif
2018  }
2019  #ifdef NAMD_AVXTILES
2020  // If "Tiles" mode disabled, no use of the CUDA data structures
2021  } else doMigration = doMigration && numNeighbors;
2022  #endif
2023 #else
2024  doMigration = doMigration && numNeighbors;
2025 #endif
2026  doMigration = doMigration || ! patchMapRead;
2027 
2028  doMigration = doMigration || doAtomUpdate;
2029  doAtomUpdate = false;
2030 
2031  // Workaround for oversize groups:
2032  // reset nonbondedGroupSize (ngs) before force calculation,
2033  // making sure that subset of hydrogen group starting with
2034  // parent atom are all within 0.5 * hgroupCutoff.
2035  // XXX hydrogentGroupSize remains constant but is checked for nonzero
2036  // XXX should be skipped for CUDA, ngs not used by CUDA kernels
2037  // XXX should this also be skipped for KNL kernels?
2038  // ngs used by ComputeNonbondedBase.h - CPU nonbonded kernels
2039  // ngs used by ComputeGBIS.C - CPU GB nonbonded kernels
2040 #if ! (defined(NAMD_CUDA) || defined(NAMD_HIP))
2041 #if defined(NAMD_AVXTILES)
2042  if (!simParams->useAVXTiles)
2043 #endif
2044  doGroupSizeCheck();
2045 #endif
2046 
2047  // Copy information needed by computes and proxys to Patch::p.
2048  // Resize only if atoms were migrated
2049  if (doMigration) {
2050  p.resize(numAtoms);
2051  pExt.resize(numAtoms);
2052  }
2053  CompAtom *p_i = p.begin();
2054  CompAtomExt *pExt_i = pExt.begin();
2055  FullAtom *a_i = atom.begin();
2056  int i; int n = numAtoms;
2057  for ( i=0; i<n; ++i ) {
2058  p_i[i] = a_i[i];
2059  pExt_i[i] = a_i[i];
2060  }
2061 
2062  // Measure atom movement to test pairlist validity
2063  doPairlistCheck();
2064 
2065  if (flags.doMolly) mollyAverage();
2066  // BEGIN LA
2068  // END LA
2069 
2070  if (flags.doGBIS) {
2071  //reset for next time step
2072  numGBISP1Arrived = 0;
2073  phase1BoxClosedCalled = false;
2074  numGBISP2Arrived = 0;
2075  phase2BoxClosedCalled = false;
2076  numGBISP3Arrived = 0;
2077  phase3BoxClosedCalled = false;
2078  if (doMigration || isNewProxyAdded)
2080  }
2081 
2082  if (flags.doLCPO) {
2083  if (doMigration || isNewProxyAdded) {
2084  setLcpoType();
2085  }
2086  }
2087 
2088  // Must Add Proxy Changes when migration completed!
2090  int *pids = NULL;
2091  int pidsPreAllocated = 1;
2092  int npid;
2093  if (proxySendSpanning == 0) {
2094  npid = proxy.size();
2095  pids = new int[npid];
2096  pidsPreAllocated = 0;
2097  int *pidi = pids;
2098  int *pide = pids + proxy.size();
2099  int patchNodesLast =
2100  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
2101  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
2102  {
2103  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
2104  *(--pide) = *pli;
2105  } else {
2106  *(pidi++) = *pli;
2107  }
2108  }
2109  }
2110  else {
2111 #ifdef NODEAWARE_PROXY_SPANNINGTREE
2112  #ifdef USE_NODEPATCHMGR
2113  npid = numNodeChild;
2114  pids = nodeChildren;
2115  #else
2116  npid = nChild;
2117  pids = child;
2118  #endif
2119 #else
2120  npid = nChild;
2121  pidsPreAllocated = 0;
2122  pids = new int[proxySpanDim];
2123  for (int i=0; i<nChild; i++) pids[i] = child[i];
2124 #endif
2125  }
2126  if (npid) { //have proxies
2127  int seq = flags.sequence;
2128  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
2129  //begin to prepare proxy msg and send it
2130  int pdMsgPLLen = p.size();
2131  int pdMsgAvgPLLen = 0;
2132  if(flags.doMolly) {
2133  pdMsgAvgPLLen = p_avg.size();
2134  }
2135  // BEGIN LA
2136  int pdMsgVLLen = 0;
2137  if (flags.doLoweAndersen) {
2138  pdMsgVLLen = v.size();
2139  }
2140  // END LA
2141 
2142  int intRadLen = 0;
2143  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
2144  intRadLen = numAtoms * 2;
2145  }
2146 
2147  //LCPO
2148  int lcpoTypeLen = 0;
2149  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
2150  lcpoTypeLen = numAtoms;
2151  }
2152 
2153  int pdMsgPLExtLen = 0;
2154  if(doMigration || isNewProxyAdded) {
2155  pdMsgPLExtLen = pExt.size();
2156  }
2157 
2158  int cudaAtomLen = 0;
2159 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2160  cudaAtomLen = numAtoms;
2161 #elif defined(NAMD_AVXTILES)
2162  if (simParams->useAVXTiles)
2163  cudaAtomLen = (numAtoms + 15) & (~15);
2164 #elif defined(NAMD_MIC)
2165  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
2166  cudaAtomLen = numAtoms;
2167  #else
2168  cudaAtomLen = (numAtoms + 15) & (~15);
2169  #endif
2170 #endif
2171 
2172  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
2173  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
2174 
2175  SET_PRIORITY(nmsg,seq,priority);
2176  nmsg->patch = patchID;
2177  nmsg->flags = flags;
2178  nmsg->plLen = pdMsgPLLen;
2179  //copying data to the newly created msg
2180  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
2181  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
2182  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
2183  nmsg->avgPlLen = pdMsgAvgPLLen;
2184  if(flags.doMolly) {
2185  memcpy(nmsg->avgPositionList, p_avg.begin(), sizeof(CompAtom)*pdMsgAvgPLLen);
2186  }
2187  // BEGIN LA
2188  nmsg->vlLen = pdMsgVLLen;
2189  if (flags.doLoweAndersen) {
2190  memcpy(nmsg->velocityList, v.begin(), sizeof(CompAtom)*pdMsgVLLen);
2191  }
2192  // END LA
2193 
2194  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
2195  for (int i = 0; i < numAtoms * 2; i++) {
2196  nmsg->intRadList[i] = intRad[i];
2197  }
2198  }
2199 
2200  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
2201  for (int i = 0; i < numAtoms; i++) {
2202  nmsg->lcpoTypeList[i] = lcpoType[i];
2203  }
2204  }
2205 
2206  nmsg->plExtLen = pdMsgPLExtLen;
2207  if(doMigration || isNewProxyAdded){
2208  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
2209  }
2210 
2211 // DMK
2212 #if defined(NAMD_CUDA) || defined(NAMD_MIC) || defined(NAMD_AVXTILES) || defined(NAMD_HIP)
2213  #ifdef NAMD_AVXTILES
2214  if (simParams->useAVXTiles)
2215  #endif
2216  {
2217  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
2218  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
2219  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
2220  }
2221 #endif
2222 
2223 #if NAMD_SeparateWaters != 0
2224  //DMK - Atom Separation (water vs. non-water)
2225  nmsg->numWaterAtoms = numWaterAtoms;
2226 #endif
2227 
2228 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
2229  nmsg->isFromImmMsgCall = 0;
2230 #endif
2231 
2232  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
2233  DebugFileTrace *dft = DebugFileTrace::Object();
2234  dft->openTrace();
2235  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
2236  for(int i=0; i<npid; i++) {
2237  dft->writeTrace("%d ", pids[i]);
2238  }
2239  dft->writeTrace("\n");
2240  dft->closeTrace();
2241  #endif
2242 
2243 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
2244  if (isProxyChanged || localphs == NULL)
2245  {
2246 //CmiPrintf("[%d] Build persistent: isProxyChanged: %d %p\n", CkMyPe(), isProxyChanged, localphs);
2247  //CmiAssert(isProxyChanged);
2248  if (nphs) {
2249  for (int i=0; i<nphs; i++) {
2250  CmiDestoryPersistent(localphs[i]);
2251  }
2252  delete [] localphs;
2253  }
2254  localphs = new PersistentHandle[npid];
2255  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
2256  for (int i=0; i<npid; i++) {
2257 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
2258  if (proxySendSpanning)
2259  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
2260  else
2261 #endif
2262  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
2263  }
2264  nphs = npid;
2265  }
2266  CmiAssert(nphs == npid && localphs != NULL);
2267  CmiUsePersistentHandle(localphs, nphs);
2268 #endif
2269  if(doMigration || isNewProxyAdded) {
2270  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
2271  }else{
2272  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
2273  }
2274 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
2275  CmiUsePersistentHandle(NULL, 0);
2276 #endif
2277  isNewProxyAdded = 0;
2278  }
2279  isProxyChanged = 0;
2280  if(!pidsPreAllocated) delete [] pids;
2281  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
2282 
2283 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
2284  positionPtrBegin = p.begin();
2285  positionPtrEnd = p.end();
2286 #endif
2287 
2288  if(flags.doMolly) {
2291  }
2292  // BEGIN LA
2293  if (flags.doLoweAndersen) {
2294  velocityPtrBegin = v.begin();
2295  velocityPtrEnd = v.end();
2296  }
2297  // END LA
2298 
2299  Patch::positionsReady(doMigration);
2300 
2301  patchMapRead = 1;
2302 
2303  // gzheng
2304  Sync::Object()->PatchReady();
2305 
2306  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY);
2307 
2308 }
2309 
2311 {
2312  replacementForces = f;
2313 }
2314 
2315 void HomePatch::saveForce(const int ftag)
2316 {
2317  f_saved[ftag].resize(numAtoms);
2318  for ( int i = 0; i < numAtoms; ++i )
2319  {
2320  f_saved[ftag][i] = f[ftag][i];
2321  }
2322 }
2323 
2324 
2325 void HomePatch::sort_solvent_atoms() {
2326 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2327  char ssabuf[32];
2328  sprintf(ssabuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SORT_SOLVENT_ATOMS], this->getPatchID());
2329  NAMD_EVENT_START_EX(1, NamdProfileEvent::SORT_SOLVENT_ATOMS, ssabuf);
2330 #endif
2331 
2332 
2333  // goal is to move SETTLE water molecules to end of FullAtom array
2334  // first we need to count the number of SETTLE water molecules
2335  sortatom.resize(numAtoms);
2336  FullAtom * __restrict a = atom.begin();
2337  FullAtom * __restrict sa = sortatom.begin();
2338  numSolventAtoms = 0;
2339  numSoluteAtoms = 0;
2340  int i, j, k;
2341  for (i = 0; i < numAtoms; i++) {
2342  if (a[i].isWater) numSolventAtoms++;
2343  }
2344  j = 0; // starting index for solute
2345  numSoluteAtoms = numAtoms - numSolventAtoms;
2346  k = numSoluteAtoms; // starting index for water
2347  for (i = 0; i < numAtoms; i++) {
2348  if (a[i].isWater) {
2349  sa[k++] = a[i];
2350  }
2351  else {
2352  sa[j++] = a[i];
2353  }
2354  }
2355  // replace atom array by sorted array by swapping memory buffers
2356  atom.swap(sortatom);
2357  // XXX improve next line to avoid division
2358  // XXX should NOT assume 3 atoms per water
2359  numWaters = numSolventAtoms / 3;
2360 
2361  NAMD_EVENT_STOP(1, NamdProfileEvent::SORT_SOLVENT_ATOMS);
2362 
2363 }
2364 
2365 
2367  p->buffer = NULL;
2368  p->numBytes = 0;
2369  p->numAtoms = 0;
2370  p->maxAtoms = 0;
2371 }
2372 
2374  PatchDataSOA *p,
2375  int natoms,
2376  int padding
2377  ) {
2378  if (natoms > p->maxAtoms) {
2379  // maxAtoms extends numAtoms to next multiple of padding
2380  p->maxAtoms = ((natoms + padding - 1) / padding) * padding;
2381  // set numBytes to allow each array to have maxAtoms space
2382  // count up total number of double, float, and int arrays
2383  // XXX TODO : these magic numbers are bad for maintability
2385  int numdoubles = (simParams->colvarsOn || simParams->tclForcesOn) ? 34: 31;
2386  p->numBytes = p->maxAtoms *
2387  (numdoubles * sizeof(double) + 9*sizeof(float) + 17*sizeof(int));
2388  }
2389  p->numAtoms = natoms;
2390  return p->numBytes;
2391 }
2392 
2394  PatchDataSOA *p,
2395  void *mybuffer
2396  ) {
2398  p->buffer = (unsigned char *) mybuffer;
2399  unsigned char *t = p->buffer;
2400  p->pos_x = (double *) t;
2401  t += p->maxAtoms * sizeof(double);
2402  p->pos_y = (double *) t;
2403  t += p->maxAtoms * sizeof(double);
2404  p->pos_z = (double *) t;
2405  t += p->maxAtoms * sizeof(double);
2406  p->charge = (float *) t;
2407  t += p->maxAtoms * sizeof(float);
2408  p->vdwType = (int *) t;
2409  t += p->maxAtoms * sizeof(int);
2410  p->partition = (int *) t;
2411  t += p->maxAtoms * sizeof(int);
2412  p->nonbondedGroupSize = (int *) t;
2413  t += p->maxAtoms * sizeof(int);
2414  p->hydrogenGroupSize = (int *) t;
2415  t += p->maxAtoms * sizeof(int);
2416  p->isWater = (int *) t;
2417  t += p->maxAtoms * sizeof(int);
2418  p->sortOrder = (int *) t;
2419  t += p->maxAtoms * sizeof(int);
2420  p->unsortOrder = (int *) t;
2421  t += p->maxAtoms * sizeof(int);
2422  p->id = (int *) t;
2423  t += p->maxAtoms * sizeof(int);
2424  p->exclId = (int *) t;
2425  t += p->maxAtoms * sizeof(int);
2426  p->sigId = (int *) t;
2427  t += p->maxAtoms * sizeof(int);
2428  p->atomFixed = (int *) t;
2429  t += p->maxAtoms * sizeof(int);
2430  p->groupFixed = (int *) t;
2431  t += p->maxAtoms * sizeof(int);
2432  p->vel_x = (double *) t;
2433  t += p->maxAtoms * sizeof(double);
2434  p->vel_y = (double *) t;
2435  t += p->maxAtoms * sizeof(double);
2436  p->vel_z = (double *) t;
2437  t += p->maxAtoms * sizeof(double);
2438  p->fixedPosition_x = (double *) t;
2439  t += p->maxAtoms * sizeof(double);
2440  p->fixedPosition_y = (double *) t;
2441  t += p->maxAtoms * sizeof(double);
2442  p->fixedPosition_z = (double *) t;
2443  t += p->maxAtoms * sizeof(double);
2444  p->recipMass = (double *) t;
2445  t += p->maxAtoms * sizeof(double);
2446  p->mass = (float *) t;
2447  t += p->maxAtoms * sizeof(float);
2448  p->langevinParam = (float *) t;
2449  t += p->maxAtoms * sizeof(float);
2450  p->status = (int *) t;
2451  t += p->maxAtoms * sizeof(int);
2452  p->transform_i = (int *) t;
2453  t += p->maxAtoms * sizeof(int);
2454  p->transform_j = (int *) t;
2455  t += p->maxAtoms * sizeof(int);
2456  p->transform_k = (int *) t;
2457  t += p->maxAtoms * sizeof(int);
2458  p->migrationGroupSize = (int *) t;
2459  t += p->maxAtoms * sizeof(int);
2460  p->rigidBondLength = (float *) t;
2461  t += p->maxAtoms * sizeof(float);
2462  p->langScalVelBBK2 = (float *) t;
2463  t += p->maxAtoms * sizeof(float);
2464  p->langScalRandBBK2 = (float *) t;
2465  t += p->maxAtoms * sizeof(float);
2466  p->gaussrand_x = (float *) t;
2467  t += p->maxAtoms * sizeof(float);
2468  p->gaussrand_y = (float *) t;
2469  t += p->maxAtoms * sizeof(float);
2470  p->gaussrand_z = (float *) t;
2471  t += p->maxAtoms * sizeof(float);
2472  p->f_normal_x = (double *) t;
2473  t += p->maxAtoms * sizeof(double);
2474  p->f_normal_y = (double *) t;
2475  t += p->maxAtoms * sizeof(double);
2476  p->f_normal_z = (double *) t;
2477  t += p->maxAtoms * sizeof(double);
2478  p->f_nbond_x = (double *) t;
2479  t += p->maxAtoms * sizeof(double);
2480  p->f_nbond_y = (double *) t;
2481  t += p->maxAtoms * sizeof(double);
2482  p->f_nbond_z = (double *) t;
2483  t += p->maxAtoms * sizeof(double);
2484  p->f_slow_x = (double *) t;
2485  t += p->maxAtoms * sizeof(double);
2486  p->f_slow_y = (double *) t;
2487  t += p->maxAtoms * sizeof(double);
2488  p->f_slow_z = (double *) t;
2489  t += p->maxAtoms * sizeof(float);
2490  if (simParams->colvarsOn || simParams->tclForcesOn){
2491  p->f_global_x = (double *) t;
2492  t += p->maxAtoms * sizeof(double);
2493  p->f_global_y = (double *) t;
2494  t += p->maxAtoms * sizeof(double);
2495  p->f_global_z = (double *) t;
2496  t += p->maxAtoms * sizeof(double);
2497  }
2498  p->f_saved_nbond_x = (double *) t;
2499  t += p->maxAtoms * sizeof(double);
2500  p->f_saved_nbond_y = (double *) t;
2501  t += p->maxAtoms * sizeof(double);
2502  p->f_saved_nbond_z = (double *) t;
2503  t += p->maxAtoms * sizeof(double);
2504  p->f_saved_slow_x = (double *) t;
2505  t += p->maxAtoms * sizeof(double);
2506  p->f_saved_slow_y = (double *) t;
2507  t += p->maxAtoms * sizeof(double);
2508  p->f_saved_slow_z = (double *) t;
2509  t += p->maxAtoms * sizeof(double);
2510  p->velNew_x = (double *) t;
2511  t += p->maxAtoms * sizeof(double);
2512  p->velNew_y = (double *) t;
2513  t += p->maxAtoms * sizeof(double);
2514  p->velNew_z = (double *) t;
2515  t += p->maxAtoms * sizeof(double);
2516  p->posNew_x = (double *) t;
2517  t += p->maxAtoms * sizeof(double);
2518  p->posNew_y = (double *) t;
2519  t += p->maxAtoms * sizeof(double);
2520  p->posNew_z = (double *) t;
2521 }
2522 
2523 
2524 void HomePatch::copy_atoms_to_SOA() {
2525  // make sure that SOA data storage is big enough
2526  // do not resize buffer and reset internal array pointers
2527  // unless the buffer size changes
2528  size_t nbytes = patchDataSOA.numBytes;
2529  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms)) {
2530 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2531  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms));
2532  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
2533 #else
2534  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms) );
2535  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
2536 #endif
2537  }
2538 
2539  // copy data from AOS into SOA
2540  for (int i=0; i < numAtoms; i++) {
2541  patchDataSOA.pos_x[i] = atom[i].position.x;
2542  patchDataSOA.pos_y[i] = atom[i].position.y;
2543  patchDataSOA.pos_z[i] = atom[i].position.z;
2544  patchDataSOA.charge[i] = atom[i].charge;
2545  patchDataSOA.vdwType[i] = int(atom[i].vdwType);
2546  patchDataSOA.partition[i] = int(atom[i].partition);
2547  // XXX nonbondedGroupSize is always recalculated
2548  //patchDataSOA.nonbondedGroupSize[i] = int(atom[i].nonbondedGroupSize);
2549  patchDataSOA.hydrogenGroupSize[i] = int(atom[i].hydrogenGroupSize);
2550  patchDataSOA.isWater[i] = int(atom[i].isWater);
2551  // XXX sortOrder is recalculated
2552  // XXX and defined only for NAMD_CUDA or NAMD_MIC
2553  //patchDataSOA.sortOrder[i] = int(atom[i].sortOrder);
2554  patchDataSOA.id[i] = int(atom[i].id);
2555 #ifdef MEM_OPT_VERSION
2556  patchDataSOA.exclId[i] = int(atom[i].exclId);
2557  patchDataSOA.sigId[i] = int(atom[i].sigId);
2558 #endif
2559  patchDataSOA.atomFixed[i] = int(atom[i].atomFixed);
2560  patchDataSOA.groupFixed[i] = int(atom[i].groupFixed);
2561 
2562  patchDataSOA.vel_x[i] = atom[i].velocity.x;
2563  patchDataSOA.vel_y[i] = atom[i].velocity.y;
2564  patchDataSOA.vel_z[i] = atom[i].velocity.z;
2565  patchDataSOA.fixedPosition_x[i] = atom[i].fixedPosition.x;
2566  patchDataSOA.fixedPosition_y[i] = atom[i].fixedPosition.y;
2567  patchDataSOA.fixedPosition_z[i] = atom[i].fixedPosition.z;
2568  patchDataSOA.mass[i] = atom[i].mass;
2569  patchDataSOA.langevinParam[i] = atom[i].langevinParam;
2570  patchDataSOA.status[i] = atom[i].status;
2571  patchDataSOA.transform_i[i] = atom[i].transform.i;
2572  patchDataSOA.transform_j[i] = atom[i].transform.j;
2573  patchDataSOA.transform_k[i] = atom[i].transform.k;
2574  patchDataSOA.migrationGroupSize[i] = atom[i].migrationGroupSize;
2575  patchDataSOA.rigidBondLength[i] = atom[i].rigidBondLength;
2576  }
2577 
2578  // calculate quantities derived basic components
2579  calculate_derived_SOA();
2580 }
2581 
2582 
2583 void HomePatch::calculate_derived_SOA() {
2585  for (int i=0; i < numAtoms; i++) {
2586  patchDataSOA.recipMass[i] = (atom[i].mass > 0 ? 1.f / atom[i].mass : 0);
2587  }
2588  if (simParams->langevinOn) {
2589  BigReal dt_fs = simParams->dt;
2590  BigReal dt = dt_fs * 0.001; // convert timestep to ps
2591  BigReal kbT = BOLTZMANN * simParams->langevinTemp;
2592  int lesReduceTemp = (simParams->lesOn && simParams->lesReduceTemp);
2593  BigReal tempFactor = (lesReduceTemp ? 1. / simParams->lesFactor : 1);
2594  for (int i=0; i < numAtoms; i++) {
2595  BigReal dt_gamma = dt * patchDataSOA.langevinParam[i];
2596  patchDataSOA.langScalRandBBK2[i] = (float) sqrt( 2 * dt_gamma * kbT *
2597  ( atom[i].partition ? tempFactor : 1 ) * patchDataSOA.recipMass[i] );
2598  patchDataSOA.langScalVelBBK2[i] = (float) (1 / (1 + 0.5 * dt_gamma));
2599  }
2600  }
2601 }
2602 
2603 
2604 void HomePatch::copy_forces_to_SOA() {
2605  NAMD_EVENT_START(1, NamdProfileEvent::COPY_FORCES_TO_SOA);
2607  const int saveForce = (simParams->tclForcesOn || simParams->colvarsOn);
2608  const Force *fnormal = f[Results::normal].const_begin();
2609  for (int i=0; i < numAtoms; i++) {
2610  // we set the f_normal_x,y,z to zero before calling positionsReady_SOA in ComputeObjects
2611  patchDataSOA.f_normal_x[i] += fnormal[i].x;
2612  patchDataSOA.f_normal_y[i] += fnormal[i].y;
2613  patchDataSOA.f_normal_z[i] += fnormal[i].z;
2614  }
2615  if (flags.doNonbonded) {
2616  const Force *fnbond = f[Results::nbond].const_begin();
2617  if(saveForce) {
2618  for (int i=0; i < numAtoms; i++) {
2619  Force f = fnbond[i];
2620  patchDataSOA.f_nbond_x[i] = f.x;
2621  patchDataSOA.f_nbond_y[i] = f.y;
2622  patchDataSOA.f_nbond_z[i] = f.z;
2623  patchDataSOA.f_saved_nbond_x[i] = f.x;
2624  patchDataSOA.f_saved_nbond_y[i] = f.y;
2625  patchDataSOA.f_saved_nbond_z[i] = f.z;
2626  }
2627  } else {
2628  for (int i=0; i < numAtoms; i++) {
2629  patchDataSOA.f_nbond_x[i] = fnbond[i].x;
2630  patchDataSOA.f_nbond_y[i] = fnbond[i].y;
2631  patchDataSOA.f_nbond_z[i] = fnbond[i].z;
2632  }
2633  }
2634  }
2635  else {
2636  for (int i=0; i < numAtoms; i++) {
2637  patchDataSOA.f_nbond_x[i] = 0;
2638  patchDataSOA.f_nbond_y[i] = 0;
2639  patchDataSOA.f_nbond_z[i] = 0;
2640  }
2641  // we dont set the saved force to zero if we dont have nonbonded force.
2642  }
2644  const Force *fslow = f[Results::slow].const_begin();
2645  if(saveForce) {
2646  for (int i = 0; i < numAtoms; i++) {
2647  Force f = fslow[i];
2648  patchDataSOA.f_slow_x[i] = f.x;
2649  patchDataSOA.f_slow_y[i] = f.y;
2650  patchDataSOA.f_slow_z[i] = f.z;
2651  patchDataSOA.f_saved_slow_x[i] = f.x;
2652  patchDataSOA.f_saved_slow_y[i] = f.y;
2653  patchDataSOA.f_saved_slow_z[i] = f.z;
2654  }
2655  } else {
2656  for (int i = 0; i < numAtoms; i++) {
2657  patchDataSOA.f_slow_x[i] = fslow[i].x;
2658  patchDataSOA.f_slow_y[i] = fslow[i].y;
2659  patchDataSOA.f_slow_z[i] = fslow[i].z;
2660  }
2661  }
2662  }
2663  else {
2664  for (int i = 0; i < numAtoms; i++) {
2665  patchDataSOA.f_slow_x[i] = 0;
2666  patchDataSOA.f_slow_y[i] = 0;
2667  patchDataSOA.f_slow_z[i] = 0;
2668  }
2669  // we dont set the saved force to zero if we dont have slow force.
2670  }
2671  NAMD_EVENT_STOP(1, NamdProfileEvent::COPY_FORCES_TO_SOA);
2672 }
2673 
2674 
2675 void HomePatch::copy_updates_to_AOS() {
2676  for (int i=0; i < numAtoms; i++) {
2677  atom[i].velocity.x = patchDataSOA.vel_x[i];
2678  atom[i].velocity.y = patchDataSOA.vel_y[i];
2679  atom[i].velocity.z = patchDataSOA.vel_z[i];
2680  atom[i].position.x = patchDataSOA.pos_x[i];
2681  atom[i].position.y = patchDataSOA.pos_y[i];
2682  atom[i].position.z = patchDataSOA.pos_z[i];
2683  }
2684 }
2685 
2686 
2687 void HomePatch::copy_forces_to_AOS() {
2688  switch (flags.maxForceUsed) { // intentionally fallthrough
2689  case 2: {
2690  ForceList& current_force = f[2];
2691  for (int i=0; i < numAtoms; i++) {
2692  current_force[i].x = patchDataSOA.f_slow_x[i];
2693  current_force[i].y = patchDataSOA.f_slow_y[i];
2694  current_force[i].z = patchDataSOA.f_slow_z[i];
2695  }
2696  }
2697  case 1: {
2698  ForceList& current_force = f[1];
2699  for (int i=0; i < numAtoms; i++) {
2700  current_force[i].x = patchDataSOA.f_nbond_x[i];
2701  current_force[i].y = patchDataSOA.f_nbond_y[i];
2702  current_force[i].z = patchDataSOA.f_nbond_z[i];
2703  }
2704  }
2705  case 0: {
2706  ForceList& current_force = f[0];
2707  for (int i=0; i < numAtoms; i++) {
2708  current_force[i].x = patchDataSOA.f_normal_x[i];
2709  current_force[i].y = patchDataSOA.f_normal_y[i];
2710  current_force[i].z = patchDataSOA.f_normal_z[i];
2711  }
2712  }
2713  }
2714 }
2715 
2716 
2717 void HomePatch::zero_global_forces_SOA() {
2718  memset(this->patchDataSOA.f_global_x, 0, sizeof(double)*numAtoms);
2719  memset(this->patchDataSOA.f_global_y, 0, sizeof(double)*numAtoms);
2720  memset(this->patchDataSOA.f_global_z, 0, sizeof(double)*numAtoms);
2721 }
2722 
2723 #undef DEBUG_REDISTRIB_FORCE
2724 #undef DEBUG_REDISTRIB_FORCE_VERBOSE
2725 //#define DEBUG_REDISTRIB_FORCE
2726 
2741 void HomePatch::redistrib_colinear_lp_force(
2742  Vector& fi, Vector& fj, Vector& fk,
2743  const Vector& ri, const Vector& rj, const Vector& rk,
2744  Real distance_f, Real scale_f, Tensor *virial) {
2745  BigReal distance = distance_f;
2746  BigReal scale = scale_f;
2747  Vector r_jk = rj - rk;
2748  // TODO: Add a check for small distances?
2749  BigReal r_jk_rlength = r_jk.rlength();
2750  distance *= r_jk_rlength;
2751  BigReal fdot = distance*(fi*r_jk)*r_jk_rlength*r_jk_rlength;
2752  const Vector fja = (1. + scale + distance)*fi - r_jk*fdot;
2753  const Vector fka = fi - fja;
2754  if (virial) {
2755  *virial += outer(fja, rj) + outer(fka, rk) - outer(fi, ri);
2756  }
2757  fj += fja;
2758  fk += fka;
2759  fi = 0;
2760 }
2761 
2787 #define FIX_FOR_WATER
2788 void HomePatch::redistrib_relative_lp_force(
2789  Vector& fi, Vector& fj, Vector& fk, Vector& fl,
2790  const Vector& ri, const Vector& rj, const Vector& rk, const Vector& rl,
2791  Tensor *virial, int midpt) {
2792 #ifdef DEBUG_REDISTRIB_FORCE
2793  Vector foldnet, toldnet; // old net force, old net torque
2794  foldnet = fi + fj + fk + fl;
2795  toldnet = cross(ri,fi) + cross(rj,fj) + cross(rk,fk) + cross(rl,fl);
2796 #endif
2797  Vector fja(0), fka(0), fla(0);
2798 
2799  Vector r = ri - rj;
2800  BigReal invr2 = r.rlength();
2801  invr2 *= invr2;
2802  BigReal fdot = (fi*r) * invr2;
2803  Vector fr = r * fdot;
2804 
2805  fja += fr;
2806 
2807  Vector s, t;
2808  if (midpt) {
2809  s = rj - 0.5*(rk + rl);
2810  t = 0.5*(rk - rl);
2811  }
2812  else {
2813  s = rj - rk;
2814  t = rk - rl;
2815  }
2816  BigReal invs2 = s.rlength();
2817  invs2 *= invs2;
2818 
2819  Vector p = cross(r,s);
2820 #if !defined(FIX_FOR_WATER)
2821  BigReal invp = p.rlength();
2822 #else
2823  BigReal p2 = p.length2(); // fix division by zero above
2824 #endif
2825 
2826  Vector q = cross(s,t);
2827  BigReal invq = q.rlength();
2828 
2829 #if !defined(FIX_FOR_WATER)
2830  BigReal fpdot = (fi*p) * invp;
2831  Vector fp = p * fpdot;
2832  Vector ft = fi - fr - fp;
2833 #else
2834  BigReal fpdot;
2835  Vector fp, ft;
2836  if (p2 < 1e-6) { // vector is near zero, assume no fp contribution to force
2837  fpdot = 0;
2838  fp = 0;
2839  ft = fi - fr;
2840  }
2841  else {
2842  fpdot = (fi*p) / sqrt(p2);
2843  fp = p * fpdot;
2844  ft = fi - fr - fp;
2845  }
2846 #endif
2847 
2848  fja += ft;
2849  Vector v = cross(r,ft); // torque
2850  ft = cross(s,v) * invs2;
2851  fja -= ft;
2852 
2853  if (midpt) {
2854  fka += 0.5 * ft;
2855  fla += 0.5 * ft;
2856  }
2857  else {
2858  fka += ft;
2859  }
2860 
2861  BigReal srdot = (s*r) * invs2;
2862  Vector rr = r - s*srdot;
2863  BigReal rrdot = rr.length();
2864  BigReal stdot = (s*t) * invs2;
2865  Vector tt = t - s*stdot;
2866  BigReal invtt = tt.rlength();
2867  BigReal fact = rrdot*fpdot*invtt*invq;
2868  Vector fq = q * fact;
2869 
2870  fla += fq;
2871  fja += fp*(1+srdot) + fq*stdot;
2872 
2873  ft = fq*(1+stdot) + fp*srdot;
2874 
2875  if (midpt) {
2876  fka += -0.5*ft;
2877  fla += -0.5*ft;
2878  }
2879  else {
2880  fka -= ft;
2881  }
2882 
2883  if (virial) {
2884  Tensor va = outer(fja,rj);
2885  va += outer(fka,rk);
2886  va += outer(fla,rl);
2887  va -= outer(fi,ri);
2888  *virial += va;
2889  }
2890 
2891  fi = 0; // lone pair has zero force
2892  fj += fja;
2893  fk += fka;
2894  fl += fla;
2895 
2896 #ifdef DEBUG_REDISTRIB_FORCE
2897 #define TOL_REDISTRIB 1e-4
2898  Vector fnewnet, tnewnet; // new net force, new net torque
2899  fnewnet = fi + fj + fk + fl;
2900  tnewnet = cross(ri,fi) + cross(rj,fj) + cross(rk,fk) + cross(rl,fl);
2901  Vector fdiff = fnewnet - foldnet;
2902  Vector tdiff = tnewnet - toldnet;
2903  if (fdiff.length2() > TOL_REDISTRIB*TOL_REDISTRIB) {
2904  printf("Error: force redistribution for water exceeded tolerance: "
2905  "fdiff=(%f, %f, %f)\n", fdiff.x, fdiff.y, fdiff.z);
2906  }
2907  if (tdiff.length2() > TOL_REDISTRIB*TOL_REDISTRIB) {
2908  printf("Error: torque redistribution for water exceeded tolerance: "
2909  "tdiff=(%f, %f, %f)\n", tdiff.x, tdiff.y, tdiff.z);
2910  }
2911 #endif
2912 }
2913 
2914 void HomePatch::redistrib_ap_force(Vector& fi, Vector& fj) {
2915  // final state 'atom' force must transfer to initial state atom
2916  // in single topology FEP.
2917  Vector fi_old = fi;
2918  Vector fj_old = fj;
2919  fi = fi_old + fj_old;
2920  fj = fi_old + fj_old;
2921 }
2922 
2923 /* Redistribute forces from the massless lonepair charge particle onto
2924  * the other atoms of the water.
2925  *
2926  * This is done using the same algorithm as charmm uses for TIP4P lonepairs.
2927  *
2928  * Pass by reference the forces (O H1 H2 LP) to be modified,
2929  * pass by constant reference the corresponding positions,
2930  * and a pointer to virial.
2931  */
2932 void HomePatch::redistrib_lp_water_force(
2933  Vector& f_ox, Vector& f_h1, Vector& f_h2, Vector& f_lp,
2934  const Vector& p_ox, const Vector& p_h1, const Vector& p_h2,
2935  const Vector& p_lp, Tensor *virial) {
2936 
2937 #ifdef DEBUG_REDISTRIB_FORCE
2938  // Debug information to check against results at end
2939 
2940  // total force and torque relative to origin
2941  Vector totforce, tottorque;
2942  totforce = f_ox + f_h1 + f_h2 + f_lp;
2943  tottorque = cross(f_ox, p_ox) + cross(f_h1, p_h1) + cross(f_h2, p_h2);
2944  //printf("Torque without LP is %f/%f/%f\n",
2945  // tottorque.x, tottorque.y, tottorque.z);
2946  tottorque += cross(f_lp, p_lp);
2947  //printf("Torque with LP is %f/%f/%f\n",
2948  // tottorque.x, tottorque.y, tottorque.z);
2949 #endif
2950 
2951  // accumulate force adjustments
2952  Vector fad_ox(0), fad_h(0);
2953 
2954  // Calculate the radial component of the force and add it to the oxygen
2955  Vector r_ox_lp = p_lp - p_ox;
2956  BigReal invlen2_r_ox_lp = r_ox_lp.rlength();
2957  invlen2_r_ox_lp *= invlen2_r_ox_lp;
2958  BigReal rad_factor = (f_lp * r_ox_lp) * invlen2_r_ox_lp;
2959  Vector f_rad = r_ox_lp * rad_factor;
2960 
2961  fad_ox += f_rad;
2962 
2963  // Calculate the angular component
2964  Vector r_hcom_ox = p_ox - ( (p_h1 + p_h2) * 0.5 );
2965  // Vector r_h2_h1_2 = (p_h1 - p_h2) * 0.5; // half of r_h2_h1
2966 
2967  // deviation from collinearity of charge site
2968  //Vector r_oop = cross(r_ox_lp, r_hcom_ox);
2969  //
2970  // vector out of o-h-h plane
2971  //Vector r_perp = cross(r_hcom_ox, r_h2_h1_2);
2972 
2973  // Here we assume that Ox/Lp/Hcom are linear
2974  // If you want to correct for deviations, this is the place
2975 
2976 // printf("Deviation from linearity for ox %i: %f/%f/%f\n", oxind, r_oop.x, r_oop.y, r_oop.z);
2977 
2978  Vector f_ang = f_lp - f_rad; // leave the angular component
2979 
2980  // now split this component onto the other atoms
2981  BigReal len_r_ox_lp = r_ox_lp.length();
2982  BigReal invlen_r_hcom_ox = r_hcom_ox.rlength();
2983  BigReal oxcomp = (r_hcom_ox.length() - len_r_ox_lp) * invlen_r_hcom_ox;
2984  BigReal hydcomp = 0.5 * len_r_ox_lp * invlen_r_hcom_ox;
2985 
2986  fad_ox += (f_ang * oxcomp);
2987  fad_h += (f_ang * hydcomp); // adjustment for both hydrogens
2988 
2989  // Add virial contributions
2990  if (virial) {
2991  Tensor vir = outer(fad_ox, p_ox);
2992  vir += outer(fad_h, p_h1);
2993  vir += outer(fad_h, p_h2);
2994  vir -= outer(f_lp, p_lp);
2995  *virial += vir;
2996  }
2997 
2998  //Vector zerovec(0.0, 0.0, 0.0);
2999  f_lp = 0;
3000  f_ox += fad_ox;
3001  f_h1 += fad_h;
3002  f_h2 += fad_h;
3003 
3004 #ifdef DEBUG_REDISTRIB_FORCE
3005  // Check that the total force and torque come out right
3006  Vector newforce, newtorque;
3007  newforce = f_ox + f_h1 + f_h2;
3008  newtorque = cross(f_ox, p_ox) + cross(f_h1, p_h1) + cross(f_h2, p_h2);
3009  Vector fdiff = newforce - totforce;
3010  Vector tdiff = newtorque - tottorque;
3011  BigReal error = fdiff.length();
3012  if (error > 0.0001) {
3013  printf("Error: Force redistribution for water "
3014  "exceeded force tolerance: error=%f\n", error);
3015  }
3016 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
3017  printf("Error in net force: %f\n", error);
3018 #endif
3019 
3020  error = tdiff.length();
3021  if (error > 0.0001) {
3022  printf("Error: Force redistribution for water "
3023  "exceeded torque tolerance: error=%f\n", error);
3024  }
3025 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
3026  printf("Error in net torque: %f\n", error);
3027 #endif
3028 #endif /* DEBUG */
3029 }
3030 
3049 void HomePatch::reposition_colinear_lonepair(
3050  Vector& ri, const Vector& rj, const Vector& rk,
3051  Real distance_f, Real scale_f)
3052 {
3053  BigReal distance = distance_f;
3054  BigReal scale = scale_f;
3055  Vector r_jk = rj - rk;
3056  BigReal r2 = r_jk.length2();
3057  if (r2 < 1e-10 || 100. < r2) { // same low tolerance as used in CHARMM
3058  iout << iWARN << "Large/small distance between lonepair reference atoms: ("
3059  << rj << ") (" << rk << ")\n" << endi;
3060  }
3061  ri = rj + (scale + distance*r_jk.rlength())*r_jk;
3062 }
3063 
3078 void HomePatch::reposition_relative_lonepair(
3079  Vector& ri, const Vector& rj, const Vector& rk, const Vector& rl,
3080  Real distance, Real angle, Real dihedral)
3081 {
3082  if ( (rj-rk).length2() > 100. || (rj-rl).length2() > 100. ) {
3083  iout << iWARN << "Large distance between lonepair reference atoms: ("
3084  << rj << ") (" << rk << ") (" << rl << ")\n" << endi;
3085  }
3086  BigReal r, t, p, cst, snt, csp, snp, invlen;
3087  Vector v, w, a, b, c;
3088 
3089  if (distance >= 0) {
3090  v = rk;
3091  r = distance;
3092  }
3093  else {
3094  v = 0.5*(rk + rl);
3095  r = -distance;
3096  }
3097 
3098  t = angle;
3099  p = dihedral;
3100  cst = cos(t);
3101  snt = sin(t);
3102  csp = cos(p);
3103  snp = sin(p);
3104  a = v - rj;
3105  b = rl - v;
3106  invlen = a.rlength();
3107  a *= invlen;
3108  c = cross(b, a);
3109  invlen = c.rlength();
3110  c *= invlen;
3111  b = cross(a, c);
3112  w.x = r*cst;
3113  w.y = r*snt*csp;
3114  w.z = r*snt*snp;
3115  ri.x = rj.x + w.x*a.x + w.y*b.x + w.z*c.x;
3116  ri.y = rj.y + w.x*a.y + w.y*b.y + w.z*c.y;
3117  ri.z = rj.z + w.x*a.z + w.y*b.z + w.z*c.z;
3118 }
3119 
3120 void HomePatch::reposition_alchpair(Vector& ri, Vector& rj, Mass& Mi, Mass& Mj) {
3121  Vector ri_old, rj_old;
3122  Mass mi, mj;
3123  ri_old.x = ri.x;
3124  ri_old.y = ri.y;
3125  ri_old.z = ri.z;
3126  rj_old.x = rj.x;
3127  rj_old.y = rj.y;
3128  rj_old.z = rj.z;
3129 
3130  mi = Mi; mj = Mj;
3131  ri.x = (mi * ri_old.x + mj * rj_old.x)/(mi + mj);
3132  ri.y = (mi * ri_old.y + mj * rj_old.y)/(mi + mj);
3133  ri.z = (mi * ri_old.z + mj * rj_old.z)/(mi + mj);
3134  rj.x = ri.x;
3135  rj.y = ri.y;
3136  rj.z = ri.z;
3137 }
3138 
3139 void HomePatch::reposition_all_lonepairs(void) {
3140  // ASSERT: simParams->lonepairs == TRUE
3141  for (int i=0; i < numAtoms; i++) {
3142  if (atom[i].mass < 0.01) {
3143  // found a lone pair
3144  AtomID aid = atom[i].id; // global atom ID of lp
3145  Lphost *lph = Node::Object()->molecule->get_lphost(aid); // its lphost
3146  if (lph == NULL) {
3147  char errmsg[512];
3148  sprintf(errmsg, "reposition lone pairs: "
3149  "no Lphost exists for LP %d\n", aid);
3150  NAMD_die(errmsg);
3151  }
3152  LocalID j = AtomMap::Object()->localID(lph->atom2);
3153  LocalID k = AtomMap::Object()->localID(lph->atom3);
3154  LocalID l = AtomMap::Object()->localID(lph->atom4);
3155  if (j.pid != patchID || k.pid != patchID || l.pid != patchID) {
3156  char errmsg[512];
3157  sprintf(errmsg, "reposition lone pairs: "
3158  "LP %d has some Lphost atom off patch\n", aid);
3159  NAMD_die(errmsg);
3160  }
3161  // reposition this lone pair
3162  if (lph->numhosts == 2) {
3163  reposition_colinear_lonepair(atom[i].position, atom[j.index].position,
3164  atom[k.index].position, lph->distance, lph->angle);
3165  }
3166  else if (lph->numhosts == 3) {
3167  reposition_relative_lonepair(atom[i].position, atom[j.index].position,
3168  atom[k.index].position, atom[l.index].position,
3169  lph->distance, lph->angle, lph->dihedral);
3170  }
3171  }
3172  }
3173 }
3174 
3175 void HomePatch::reposition_all_alchpairs(void) {
3176  Molecule *mol = Node::Object()->molecule;
3177  int numFepInitial = mol->numFepInitial;
3178  int alchPair_id;
3179  for (int i = 0; i < numAtoms; i++) {
3180  if (atom[i].partition == 4 ) {
3181  alchPair_id = atom[i].id + numFepInitial; //global id of the pair atom.
3182  LocalID j = AtomMap::Object()->localID(alchPair_id);
3183  reposition_alchpair(atom[i].position, atom[j.index].position, atom[i].mass, atom[j.index].mass);
3184  }
3185  }
3186 }
3187 
3188 void HomePatch::swm4_omrepos(Vector *ref, Vector *pos, Vector *vel,
3189  BigReal invdt) {
3190  // Reposition lonepair (Om) particle of Drude SWM4 water.
3191  // Same comments apply as to tip4_omrepos(), but the ordering of atoms
3192  // is different: O, D, LP, H1, H2.
3193  pos[2] = pos[0] + (0.5 * (pos[3] + pos[4]) - pos[0]) * (r_om / r_ohc);
3194  // Now, adjust velocity of particle to get it to appropriate place
3195  // during next integration "drift-step"
3196  if (invdt != 0) {
3197  vel[2] = (pos[2] - ref[2]) * invdt;
3198  }
3199  // No virial correction needed since lonepair is massless
3200 }
3201 
3202 void HomePatch::tip4_omrepos(Vector* ref, Vector* pos, Vector* vel, BigReal invdt) {
3203  /* Reposition the om particle of a tip4p water
3204  * A little geometry shows that the appropriate position is given by
3205  * R_O + (1 / 2 r_ohc) * ( 0.5 (R_H1 + R_H2) - R_O )
3206  * Here r_om is the distance from the oxygen to Om site, and r_ohc
3207  * is the altitude from the oxygen to the hydrogen center of mass
3208  * Those quantities are precalculated upon initialization of HomePatch
3209  *
3210  * Ordering of TIP4P atoms: O, H1, H2, LP.
3211  */
3212 
3213  //printf("rom/rohc are %f %f and invdt is %f\n", r_om, r_ohc, invdt);
3214  //printf("Other positions are: \n 0: %f %f %f\n 1: %f %f %f\n 2: %f %f %f\n", pos[0].x, pos[0].y, pos[0].z, pos[1].x, pos[1].y, pos[1].z, pos[2].x, pos[2].y, pos[2].z);
3215  pos[3] = pos[0] + (0.5 * (pos[1] + pos[2]) - pos[0]) * (r_om / r_ohc);
3216  //printf("New position for lp is %f %f %f\n", pos[3].x, pos[3].y, pos[3].z);
3217 
3218  // Now, adjust the velocity of the particle to get it to the appropriate place
3219  if (invdt != 0) {
3220  vel[3] = (pos[3] - ref[3]) * invdt;
3221  }
3222 
3223  // No virial correction needed, since this is a massless particle
3224  return;
3225 }
3226 
3227 void HomePatch::redistrib_lonepair_forces(const int ftag, Tensor *virial) {
3228  // ASSERT: simParams->lonepairs == TRUE
3229  ForceList *f_mod = f;
3230  for (int i = 0; i < numAtoms; i++) {
3231  if (atom[i].mass < 0.01) {
3232  // found a lone pair
3233  AtomID aid = atom[i].id; // global atom ID of lp
3234  Lphost *lph = Node::Object()->molecule->get_lphost(aid); // its lphost
3235  if (lph == NULL) {
3236  char errmsg[512];
3237  sprintf(errmsg, "redistrib lone pair forces: "
3238  "no Lphost exists for LP %d\n", aid);
3239  NAMD_die(errmsg);
3240  }
3241  LocalID j = AtomMap::Object()->localID(lph->atom2);
3242  LocalID k = AtomMap::Object()->localID(lph->atom3);
3243  LocalID l = AtomMap::Object()->localID(lph->atom4);
3244  if (j.pid != patchID || k.pid != patchID || l.pid != patchID) {
3245  char errmsg[512];
3246  sprintf(errmsg, "redistrib lone pair forces: "
3247  "LP %d has some Lphost atom off patch\n", aid);
3248  NAMD_die(errmsg);
3249  }
3250  // redistribute forces from this lone pair
3251  if (lph->numhosts == 2) {
3252  redistrib_colinear_lp_force(f_mod[ftag][i], f_mod[ftag][j.index],
3253  f_mod[ftag][k.index], atom[i].position, atom[j.index].position,
3254  atom[k.index].position, lph->distance, lph->angle, virial);
3255  }
3256  else if (lph->numhosts == 3) {
3257  int midpt = (lph->distance < 0);
3258  redistrib_relative_lp_force(f_mod[ftag][i], f_mod[ftag][j.index],
3259  f_mod[ftag][k.index], f_mod[ftag][l.index],
3260  atom[i].position, atom[j.index].position,
3261  atom[k.index].position, atom[l.index].position, virial, midpt);
3262  }
3263  }
3264  }
3265 }
3266 
3267 void HomePatch::redistrib_alchpair_forces(const int ftag) { //Virial unchanged
3269  Molecule *mol = Node::Object()->molecule;
3270  ForceList *f_mod = f;
3271  int numFepInitial = mol->numFepInitial;
3272  BigReal lambda = simParams->alchLambda;
3273  int alchPair_id;
3274  for (int i = 0; i < numAtoms; i++) {
3275  if (atom[i].partition == 4 ) {
3276  alchPair_id = atom[i].id + numFepInitial; //global id of the pair atom.
3277  LocalID j = AtomMap::Object()->localID(alchPair_id);
3278  redistrib_ap_force(f_mod[ftag][i],f_mod[ftag][j.index]);
3279  }
3280  }
3281 }
3282 
3283 void HomePatch::redistrib_swm4_forces(const int ftag, Tensor *virial) {
3284  // Loop over the patch's atoms and apply the appropriate corrections
3285  // to get all forces off of lone pairs
3286  ForceList *f_mod = f;
3287  for (int i = 0; i < numAtoms; i++) {
3288  if (atom[i].mass < 0.01) {
3289  // found lonepair
3290  redistrib_lp_water_force(f_mod[ftag][i-2], f_mod[ftag][i+1],
3291  f_mod[ftag][i+2], f_mod[ftag][i],
3292  atom[i-2].position, atom[i+1].position,
3293  atom[i+2].position, atom[i].position, virial);
3294  }
3295  }
3296 }
3297 
3298 void HomePatch::redistrib_tip4p_forces(const int ftag, Tensor* virial) {
3299  // Loop over the patch's atoms and apply the appropriate corrections
3300  // to get all forces off of lone pairs
3301  // Atom ordering: O H1 H2 LP
3302  ForceList *f_mod =f;
3303  for (int i=0; i<numAtoms; i++) {
3304  if (atom[i].mass < 0.01) {
3305  // found lonepair
3306  redistrib_lp_water_force(f_mod[ftag][i-3], f_mod[ftag][i-2],
3307  f_mod[ftag][i-1], f_mod[ftag][i],
3308  atom[i-3].position, atom[i-2].position,
3309  atom[i-1].position, atom[i].position, virial);
3310  }
3311  }
3312 }
3313 
3314 
3316  FullAtom * __restrict atom_arr,
3317  const Force * __restrict force_arr,
3318  const BigReal dt,
3319  int num_atoms
3320  ) {
3322 
3323  if ( simParams->fixedAtomsOn ) {
3324  for ( int i = 0; i < num_atoms; ++i ) {
3325  if ( atom_arr[i].atomFixed ) {
3326  atom_arr[i].velocity = 0;
3327  } else {
3328  BigReal dt_mass = dt * atom_arr[i].recipMass; // dt/mass
3329  atom_arr[i].velocity.x += force_arr[i].x * dt_mass;
3330  atom_arr[i].velocity.y += force_arr[i].y * dt_mass;
3331  atom_arr[i].velocity.z += force_arr[i].z * dt_mass;
3332  }
3333  }
3334  } else {
3335  for ( int i = 0; i < num_atoms; ++i ) {
3336  BigReal dt_mass = dt * atom_arr[i].recipMass; // dt/mass
3337  atom_arr[i].velocity.x += force_arr[i].x * dt_mass;
3338  atom_arr[i].velocity.y += force_arr[i].y * dt_mass;
3339  atom_arr[i].velocity.z += force_arr[i].z * dt_mass;
3340  }
3341  }
3342 }
3343 
3345  FullAtom * __restrict atom_arr,
3346  const Force * __restrict force_arr1,
3347  const Force * __restrict force_arr2,
3348  const Force * __restrict force_arr3,
3349  const BigReal dt1,
3350  const BigReal dt2,
3351  const BigReal dt3,
3352  int num_atoms
3353  ) {
3355 
3356  if ( simParams->fixedAtomsOn ) {
3357  for ( int i = 0; i < num_atoms; ++i ) {
3358  if ( atom_arr[i].atomFixed ) {
3359  atom_arr[i].velocity = 0;
3360  } else {
3361  BigReal rmass = atom_arr[i].recipMass; // 1/mass
3362  atom_arr[i].velocity.x += (force_arr1[i].x*dt1
3363  + force_arr2[i].x*dt2 + force_arr3[i].x*dt3) * rmass;
3364  atom_arr[i].velocity.y += (force_arr1[i].y*dt1
3365  + force_arr2[i].y*dt2 + force_arr3[i].y*dt3) * rmass;
3366  atom_arr[i].velocity.z += (force_arr1[i].z*dt1
3367  + force_arr2[i].z*dt2 + force_arr3[i].z*dt3) * rmass;
3368  }
3369  }
3370  } else {
3371  for ( int i = 0; i < num_atoms; ++i ) {
3372  BigReal rmass = atom_arr[i].recipMass; // 1/mass
3373  atom_arr[i].velocity.x += (force_arr1[i].x*dt1
3374  + force_arr2[i].x*dt2 + force_arr3[i].x*dt3) * rmass;
3375  atom_arr[i].velocity.y += (force_arr1[i].y*dt1
3376  + force_arr2[i].y*dt2 + force_arr3[i].y*dt3) * rmass;
3377  atom_arr[i].velocity.z += (force_arr1[i].z*dt1
3378  + force_arr2[i].z*dt2 + force_arr3[i].z*dt3) * rmass;
3379  }
3380  }
3381 }
3382 
3384  FullAtom * __restrict atom_arr,
3385  const BigReal dt,
3386  int num_atoms
3387  ) {
3389  if ( simParams->fixedAtomsOn ) {
3390  for ( int i = 0; i < num_atoms; ++i ) {
3391  if ( ! atom_arr[i].atomFixed ) {
3392  atom_arr[i].position.x += atom_arr[i].velocity.x * dt;
3393  atom_arr[i].position.y += atom_arr[i].velocity.y * dt;
3394  atom_arr[i].position.z += atom_arr[i].velocity.z * dt;
3395  }
3396  }
3397  } else {
3398  for ( int i = 0; i < num_atoms; ++i ) {
3399  atom_arr[i].position.x += atom_arr[i].velocity.x * dt;
3400  atom_arr[i].position.y += atom_arr[i].velocity.y * dt;
3401  atom_arr[i].position.z += atom_arr[i].velocity.z * dt;
3402  }
3403  }
3404 }
3405 
3406 int HomePatch::hardWallDrude(const BigReal timestep, Tensor *virial,
3407  SubmitReduction *ppreduction)
3408 {
3409  Molecule *mol = Node::Object()->molecule;
3411  const BigReal kbt=BOLTZMANN*simParams->drudeTemp;
3412  const int fixedAtomsOn = simParams->fixedAtomsOn;
3413  const BigReal dt = timestep / TIMEFACTOR;
3414  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3415  int i, ia, ib, j;
3416  int dieOnError = simParams->rigidDie;
3417  Tensor wc; // constraint virial
3418  BigReal idz, zmin, delta_T, maxtime=timestep,v_Bond;
3419  int nslabs;
3420 
3421  // start data for hard wall boundary between drude and its host atom
3422  // static int Count=0;
3423  int Idx;
3424  double r_wall, r_wall_SQ, rab, rab_SQ, dr, mass_a, mass_b, mass_sum;
3425  Vector v_ab, vb_1, vp_1, vb_2, vp_2, new_vel_a, new_vel_b, new_pos_a, new_pos_b, *new_pos, *new_vel;
3426  double dot_v_r_1, dot_v_r_2;
3427  double vb_cm, dr_a, dr_b;
3428  // end data for hard wall boundary between drude and its host atom
3429 
3430  // start calculation of hard wall boundary between drude and its host atom
3431  if (simParams->drudeHardWallOn) {
3432  if (ppreduction) {
3433  nslabs = simParams->pressureProfileSlabs;
3434  idz = nslabs/lattice.c().z;
3435  zmin = lattice.origin().z - 0.5*lattice.c().z;
3436  }
3437 
3438  r_wall = simParams->drudeBondLen;
3439  r_wall_SQ = r_wall*r_wall;
3440  // Count++;
3441  for (i=1; i<numAtoms; i++) {
3442  if ( (0.05 < atom[i].mass) && ((atom[i].mass < 1.0)) ) { // drude particle
3443  ia = i-1;
3444  ib = i;
3445 
3446  v_ab = atom[ib].position - atom[ia].position;
3447  rab_SQ = v_ab.x*v_ab.x + v_ab.y*v_ab.y + v_ab.z*v_ab.z;
3448 
3449  if (rab_SQ > r_wall_SQ) { // to impose the hard wall constraint
3450  rab = sqrt(rab_SQ);
3451  if ( (rab > (2.0*r_wall)) && dieOnError ) { // unexpected situation
3452  iout << iERROR << "HardWallDrude> "
3453  << "The drude is too far away from atom "
3454  << (atom[ia].id + 1) << " d = " << rab << "!\n" << endi;
3455  return -1; // triggers early exit
3456  }
3457 
3458  v_ab.x /= rab;
3459  v_ab.y /= rab;
3460  v_ab.z /= rab;
3461 
3462  if ( fixedAtomsOn && atom[ia].atomFixed ) { // the heavy atom is fixed
3463  if (atom[ib].atomFixed) { // the drude is fixed too
3464  continue;
3465  }
3466  else { // only the heavy atom is fixed
3467  dot_v_r_2 = atom[ib].velocity.x*v_ab.x
3468  + atom[ib].velocity.y*v_ab.y + atom[ib].velocity.z*v_ab.z;
3469  vb_2 = dot_v_r_2 * v_ab;
3470  vp_2 = atom[ib].velocity - vb_2;
3471 
3472  dr = rab - r_wall;
3473  if(dot_v_r_2 == 0.0) {
3474  delta_T = maxtime;
3475  }
3476  else {
3477  delta_T = dr/fabs(dot_v_r_2); // the time since the collision occurs
3478  if(delta_T > maxtime ) delta_T = maxtime; // make sure it is not crazy
3479  }
3480 
3481  dot_v_r_2 = -dot_v_r_2*sqrt(kbt/atom[ib].mass)/fabs(dot_v_r_2);
3482 
3483  vb_2 = dot_v_r_2 * v_ab;
3484 
3485  new_vel_a = atom[ia].velocity;
3486  new_vel_b = vp_2 + vb_2;
3487 
3488  dr_b = -dr + delta_T*dot_v_r_2; // L = L_0 + dT *v_new, v was flipped
3489 
3490  new_pos_a = atom[ia].position;
3491  new_pos_b = atom[ib].position + dr_b*v_ab; // correct the position
3492  }
3493  }
3494  else {
3495  mass_a = atom[ia].mass;
3496  mass_b = atom[ib].mass;
3497  mass_sum = mass_a+mass_b;
3498 
3499  dot_v_r_1 = atom[ia].velocity.x*v_ab.x
3500  + atom[ia].velocity.y*v_ab.y + atom[ia].velocity.z*v_ab.z;
3501  vb_1 = dot_v_r_1 * v_ab;
3502  vp_1 = atom[ia].velocity - vb_1;
3503 
3504  dot_v_r_2 = atom[ib].velocity.x*v_ab.x
3505  + atom[ib].velocity.y*v_ab.y + atom[ib].velocity.z*v_ab.z;
3506  vb_2 = dot_v_r_2 * v_ab;
3507  vp_2 = atom[ib].velocity - vb_2;
3508 
3509  vb_cm = (mass_a*dot_v_r_1 + mass_b*dot_v_r_2)/mass_sum;
3510 
3511  dot_v_r_1 -= vb_cm;
3512  dot_v_r_2 -= vb_cm;
3513 
3514  dr = rab - r_wall;
3515 
3516  if(dot_v_r_2 == dot_v_r_1) {
3517  delta_T = maxtime;
3518  }
3519  else {
3520  delta_T = dr/fabs(dot_v_r_2 - dot_v_r_1); // the time since the collision occurs
3521  if(delta_T > maxtime ) delta_T = maxtime; // make sure it is not crazy
3522  }
3523 
3524  // the relative velocity between ia and ib. Drawn according to T_Drude
3525  v_Bond = sqrt(kbt/mass_b);
3526 
3527  // reflect the velocity along bond vector and scale down
3528  dot_v_r_1 = -dot_v_r_1*v_Bond*mass_b/(fabs(dot_v_r_1)*mass_sum);
3529  dot_v_r_2 = -dot_v_r_2*v_Bond*mass_a/(fabs(dot_v_r_2)*mass_sum);
3530 
3531  dr_a = dr*mass_b/mass_sum + delta_T*dot_v_r_1;
3532  dr_b = -dr*mass_a/mass_sum + delta_T*dot_v_r_2;
3533 
3534  new_pos_a = atom[ia].position + dr_a*v_ab; // correct the position
3535  new_pos_b = atom[ib].position + dr_b*v_ab;
3536  // atom[ia].position += (dr_a*v_ab); // correct the position
3537  // atom[ib].position += (dr_b*v_ab);
3538 
3539  dot_v_r_1 += vb_cm;
3540  dot_v_r_2 += vb_cm;
3541 
3542  vb_1 = dot_v_r_1 * v_ab;
3543  vb_2 = dot_v_r_2 * v_ab;
3544 
3545  new_vel_a = vp_1 + vb_1;
3546  new_vel_b = vp_2 + vb_2;
3547  }
3548 
3549  int ppoffset, partition;
3550  if ( invdt == 0 ) {
3551  atom[ia].position = new_pos_a;
3552  atom[ib].position = new_pos_b;
3553  }
3554  else if ( virial == 0 ) {
3555  atom[ia].velocity = new_vel_a;
3556  atom[ib].velocity = new_vel_b;
3557  }
3558  else {
3559  for ( j = 0; j < 2; j++ ) {
3560  if (j==0) { // atom ia, heavy atom
3561  Idx = ia;
3562  new_pos = &new_pos_a;
3563  new_vel = &new_vel_a;
3564  }
3565  else if (j==1) { // atom ib, drude
3566  Idx = ib;
3567  new_pos = &new_pos_b;
3568  new_vel = &new_vel_b;
3569  }
3570  Force df = (*new_vel - atom[Idx].velocity) *
3571  ( atom[Idx].mass * invdt );
3572  Tensor vir = outer(df, atom[Idx].position);
3573  wc += vir;
3574  atom[Idx].velocity = *new_vel;
3575  atom[Idx].position = *new_pos;
3576 
3577  if (ppreduction) {
3578  if (!j) {
3579  BigReal z = new_pos->z;
3580  int partition = atom[Idx].partition;
3581  int slab = (int)floor((z-zmin)*idz);
3582  if (slab < 0) slab += nslabs;
3583  else if (slab >= nslabs) slab -= nslabs;
3584  ppoffset = 3*(slab + nslabs*partition);
3585  }
3586  ppreduction->item(ppoffset ) += vir.xx;
3587  ppreduction->item(ppoffset+1) += vir.yy;
3588  ppreduction->item(ppoffset+2) += vir.zz;
3589  }
3590 
3591  }
3592  }
3593  }
3594  }
3595  }
3596 
3597  // if ( (Count>10000) && (Count%10==0) ) {
3598  // v_ab = atom[1].position - atom[0].position;
3599  // rab_SQ = v_ab.x*v_ab.x + v_ab.y*v_ab.y + v_ab.z*v_ab.z;
3600  // iout << "DBG_R: " << Count << " " << sqrt(rab_SQ) << "\n" << endi;
3601  // }
3602 
3603  }
3604 
3605  // end calculation of hard wall boundary between drude and its host atom
3606 
3607  if ( dt && virial ) *virial += wc;
3608 
3609  return 0;
3610 }
3611 
3613 #ifdef DEBUG_MINIMIZE
3614  if (patchID == 0) {
3615  printf("Step %d, patch %d: buildRattleList()\n",
3616  flags.step, (int)patchID);
3617  }
3618 #endif
3620  const int fixedAtomsOn = simParams->fixedAtomsOn;
3621  const int useSettle = simParams->useSettle;
3622 
3623  // Re-size to containt numAtoms elements
3624  velNew.resize(numAtoms);
3625  posNew.resize(numAtoms);
3626 
3627  // Size of a hydrogen group for water
3628  const WaterModel watmodel = simParams->watmodel;
3629  const int wathgsize = getWaterModelGroupSize(watmodel);
3630 
3631  // Initialize the settle algorithm with water parameters
3632  // settle1() assumes all waters are identical,
3633  // and will generate bad results if they are not.
3634  // XXX this will move to Molecule::build_atom_status when that
3635  // version is debugged
3636  if ( ! settle_initialized ) {
3637  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
3638  // find a water
3639  if (atom[ig].rigidBondLength > 0) {
3640  int oatm;
3641  if (watmodel == WaterModel::SWM4) {
3642  oatm = ig+3; // skip over Drude and Lonepair
3643  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
3644  // ig, atom[ig].mass, oatm, atom[oatm].mass);
3645  }
3646  else {
3647  oatm = ig+1;
3648  // Avoid using the Om site to set this by mistake
3649  if (atom[ig].mass < 0.5 || atom[ig+1].mass < 0.5) {
3650  oatm += 1;
3651  }
3652  }
3653 
3654  // initialize settle water parameters
3655  settle1init(atom[ig].mass, atom[oatm].mass,
3656  atom[ig].rigidBondLength,
3657  atom[oatm].rigidBondLength,
3658  settle_mO, settle_mH,
3659  settle_mOrmT, settle_mHrmT, settle_ra,
3660  settle_rb, settle_rc, settle_rra);
3661  settle_initialized = 1;
3662  break; // done with init
3663  }
3664  }
3665  }
3666 
3667  Vector ref[10];
3668  BigReal rmass[10];
3669  BigReal dsq[10];
3670  int fixed[10];
3671  int ial[10];
3672  int ibl[10];
3673 
3674  int numSettle = 0;
3675  int numRattle = 0;
3676  int posRattleParam = 0;
3677 
3678  settleList.clear();
3679  rattleList.clear();
3680  noconstList.clear();
3681  rattleParam.clear();
3682 
3683  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
3684  int hgs = atom[ig].hydrogenGroupSize;
3685  if ( hgs == 1 ) {
3686  // only one atom in group
3687  noconstList.push_back(ig);
3688  continue;
3689  }
3690  int anyfixed = 0;
3691  for (int i = 0; i < hgs; ++i ) {
3692  ref[i] = atom[ig+i].position;
3693  rmass[i] = (atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.);
3694  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
3695  if ( fixed[i] ) {
3696  anyfixed = 1;
3697  rmass[i] = 0.;
3698  }
3699  }
3700  int icnt = 0;
3701  BigReal tmp = atom[ig].rigidBondLength;
3702  if (tmp > 0.0) { // for water
3703  if (hgs != wathgsize) {
3704  char errmsg[256];
3705  sprintf(errmsg, "Water molecule starting with atom %d contains %d atoms "
3706  "but the specified water model requires %d atoms.\n",
3707  atom[ig].id+1, hgs, wathgsize);
3708  NAMD_die(errmsg);
3709  }
3710  // Use SETTLE for water unless some of the water atoms are fixed,
3711  if (useSettle && !anyfixed) {
3712  // Store to Settle -list
3713  settleList.push_back(ig);
3714  continue;
3715  }
3716  if ( !(fixed[1] && fixed[2]) ) {
3717  dsq[icnt] = tmp * tmp;
3718  ial[icnt] = 1;
3719  ibl[icnt] = 2;
3720  ++icnt;
3721  }
3722  } // if (tmp > 0.0)
3723  for (int i = 1; i < hgs; ++i ) { // normal bonds to mother atom
3724  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
3725  if ( !(fixed[0] && fixed[i]) ) {
3726  dsq[icnt] = tmp * tmp;
3727  ial[icnt] = 0;
3728  ibl[icnt] = i;
3729  ++icnt;
3730  }
3731  }
3732  }
3733  if ( icnt == 0 ) {
3734  // no constraints
3735  noconstList.push_back(ig);
3736  continue;
3737  }
3738  // Store to Rattle -list
3739  RattleList rattleListElem;
3740  rattleListElem.ig = ig;
3741  rattleListElem.icnt = icnt;
3742  rattleList.push_back(rattleListElem);
3743  for (int i = 0; i < icnt; ++i ) {
3744  int a = ial[i];
3745  int b = ibl[i];
3746  RattleParam rattleParamElem;
3747  rattleParamElem.ia = a;
3748  rattleParamElem.ib = b;
3749  rattleParamElem.dsq = dsq[i];
3750  rattleParamElem.rma = rmass[a];
3751  rattleParamElem.rmb = rmass[b];
3752  rattleParam.push_back(rattleParamElem);
3753  }
3754  //adding dummy atom in the hydrogen group
3755  for (int i = icnt; i < 4; ++i ) {
3756  RattleParam rattleParamElem;
3757  rattleParamElem.ia = 0;
3758  rattleParamElem.ib = 0;
3759  rattleParamElem.dsq = 0;
3760  rattleParamElem.rma = 0;
3761  rattleParamElem.rmb = 0;
3762  rattleParam.push_back(rattleParamElem);
3763  }
3764 #if 0
3765  for(int i = 0; i < 4; ++i) {
3766  std::cout << rattleParam[i].ia << " " << rattleParam[i].ib << std::endl;
3767  }
3768  std::cout << std::endl;
3769 #endif
3770  }
3771 
3772 }
3773 
3774 void HomePatch::addRattleForce(const BigReal invdt, Tensor& wc) {
3775  for (int ig = 0; ig < numAtoms; ++ig ) {
3776  Force df = (velNew[ig] - atom[ig].velocity) * ( atom[ig].mass * invdt );
3777  Tensor vir = outer(df, atom[ig].position);
3778  wc += vir;
3779  f[Results::normal][ig] += df;
3780  atom[ig].velocity = velNew[ig];
3781  }
3782 }
3783 
3784 int HomePatch::rattle1(const BigReal timestep, Tensor *virial,
3785  SubmitReduction *ppreduction) {
3786 
3788  if (simParams->watmodel != WaterModel::TIP3 || ppreduction) {
3789  // Call old rattle1 -method instead
3790  return rattle1old(timestep, virial, ppreduction);
3791  }
3792 
3793  if (!rattleListValid) {
3794  buildRattleList();
3795  rattleListValid = true;
3796  }
3797 
3798  const int fixedAtomsOn = simParams->fixedAtomsOn;
3799  const int useSettle = simParams->useSettle;
3800  const BigReal dt = timestep / TIMEFACTOR;
3801  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3802  const BigReal tol2 = 2.0 * simParams->rigidTol;
3803  int maxiter = simParams->rigidIter;
3804  int dieOnError = simParams->rigidDie;
3805 
3806  Vector ref[10]; // reference position
3807  Vector pos[10]; // new position
3808  Vector vel[10]; // new velocity
3809 
3810  // Manual un-roll
3811  int n = (settleList.size()/2)*2;
3812  for (int j=0;j < n;j+=2) {
3813  int ig;
3814  ig = settleList[j];
3815  for (int i = 0; i < 3; ++i ) {
3816  ref[i] = atom[ig+i].position;
3817  pos[i] = atom[ig+i].position + atom[ig+i].velocity * dt;
3818  }
3819  ig = settleList[j+1];
3820  for (int i = 0; i < 3; ++i ) {
3821  ref[i+3] = atom[ig+i].position;
3822  pos[i+3] = atom[ig+i].position + atom[ig+i].velocity * dt;
3823  }
3824  settle1_SIMD<2>(ref, pos,
3825  settle_mOrmT, settle_mHrmT, settle_ra,
3826  settle_rb, settle_rc, settle_rra);
3827 
3828  ig = settleList[j];
3829  for (int i = 0; i < 3; ++i ) {
3830  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3831  posNew[ig+i] = pos[i];
3832  }
3833  ig = settleList[j+1];
3834  for (int i = 0; i < 3; ++i ) {
3835  velNew[ig+i] = (pos[i+3] - ref[i+3])*invdt;
3836  posNew[ig+i] = pos[i+3];
3837  }
3838 
3839  }
3840 
3841  if (settleList.size() % 2) {
3842  int ig = settleList[settleList.size()-1];
3843  for (int i = 0; i < 3; ++i ) {
3844  ref[i] = atom[ig+i].position;
3845  pos[i] = atom[ig+i].position + atom[ig+i].velocity * dt;
3846  }
3847  settle1_SIMD<1>(ref, pos,
3848  settle_mOrmT, settle_mHrmT, settle_ra,
3849  settle_rb, settle_rc, settle_rra);
3850  for (int i = 0; i < 3; ++i ) {
3851  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3852  posNew[ig+i] = pos[i];
3853  }
3854  }
3855 
3856  int posParam = 0;
3857  for (int j=0;j < rattleList.size();++j) {
3858 
3859  BigReal refx[10];
3860  BigReal refy[10];
3861  BigReal refz[10];
3862 
3863  BigReal posx[10];
3864  BigReal posy[10];
3865  BigReal posz[10];
3866 
3867  int ig = rattleList[j].ig;
3868  int icnt = rattleList[j].icnt;
3869  int hgs = atom[ig].hydrogenGroupSize;
3870  for (int i = 0; i < hgs; ++i ) {
3871  ref[i] = atom[ig+i].position;
3872  pos[i] = atom[ig+i].position;
3873  if (!(fixedAtomsOn && atom[ig+i].atomFixed)) {
3874  pos[i] += atom[ig+i].velocity * dt;
3875  }
3876  refx[i] = ref[i].x;
3877  refy[i] = ref[i].y;
3878  refz[i] = ref[i].z;
3879  posx[i] = pos[i].x;
3880  posy[i] = pos[i].y;
3881  posz[i] = pos[i].z;
3882  }
3883 
3884  bool done;
3885  bool consFailure;
3886  if (icnt == 1) {
3887  rattlePair<1>(&rattleParam[posParam],
3888  refx, refy, refz,
3889  posx, posy, posz,
3890  consFailure);
3891  done = true;
3892  } else {
3893  if (simParams->mshakeOn) {
3894  MSHAKEIterate(icnt, &rattleParam[posParam],
3895  refx, refy, refz,
3896  posx, posy, posz,
3897  tol2, maxiter,
3898  done, consFailure);
3899  }
3900  else if(simParams->lincsOn) {
3901  LINCS(icnt, &rattleParam[posParam],
3902  refx, refy, refz,
3903  posx, posy, posz,
3904  tol2, maxiter, done, consFailure);
3905  }
3906  else
3907  rattleN(icnt, &rattleParam[posParam],
3908  refx, refy, refz,
3909  posx, posy, posz,
3910  tol2, maxiter,
3911  done, consFailure);
3912  }
3913 
3914  // Advance position in rattleParam
3915  //posParam += icnt;
3916  posParam += 4;
3917  for (int i = 0; i < hgs; ++i ) {
3918  pos[i].x = posx[i];
3919  pos[i].y = posy[i];
3920  pos[i].z = posz[i];
3921  }
3922 
3923  for (int i = 0; i < hgs; ++i ) {
3924  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3925  posNew[ig+i] = pos[i];
3926  }
3927 
3928  if ( consFailure ) {
3929  if ( dieOnError ) {
3930  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
3931  << (atom[ig].id + 1) << "!\n" << endi;
3932  return -1; // triggers early exit
3933  } else {
3934  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
3935  << (atom[ig].id + 1) << "!\n" << endi;
3936  }
3937  } else if ( ! done ) {
3938  if ( dieOnError ) {
3939  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
3940  << (atom[ig].id + 1) << "!\n" << endi;
3941  return -1; // triggers early exit
3942  } else {
3943  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
3944  << (atom[ig].id + 1) << "!\n" << endi;
3945  }
3946  }
3947  }
3948  // Finally, we have to go through atoms that are not involved in rattle just so that we have
3949  // their positions and velocities up-to-date in posNew and velNew
3950  for (int j=0;j < noconstList.size();++j) {
3951  int ig = noconstList[j];
3952  int hgs = atom[ig].hydrogenGroupSize;
3953  for (int i = 0; i < hgs; ++i ) {
3954  velNew[ig+i] = atom[ig+i].velocity;
3955  posNew[ig+i] = atom[ig+i].position;
3956  }
3957  }
3958 
3959  if ( invdt == 0 ) {
3960  for (int ig = 0; ig < numAtoms; ++ig )
3961  atom[ig].position = posNew[ig];
3962  } else if ( virial == 0 ) {
3963  for (int ig = 0; ig < numAtoms; ++ig )
3964  atom[ig].velocity = velNew[ig];
3965  } else {
3966  Tensor wc; // constraint virial
3967  addRattleForce(invdt, wc);
3968  *virial += wc;
3969  }
3970 
3971  return 0;
3972 }
3973 
3974 // RATTLE algorithm from Allen & Tildesley
3975 int HomePatch::rattle1old(const BigReal timestep, Tensor *virial,
3976  SubmitReduction *ppreduction)
3977 {
3978  // CkPrintf("Call HomePatch::rattle1old\n");
3979  Molecule *mol = Node::Object()->molecule;
3981  const int fixedAtomsOn = simParams->fixedAtomsOn;
3982  const int useSettle = simParams->useSettle;
3983  const BigReal dt = timestep / TIMEFACTOR;
3984  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3985  BigReal tol2 = 2.0 * simParams->rigidTol;
3986  int maxiter = simParams->rigidIter;
3987  int dieOnError = simParams->rigidDie;
3988  int i, iter;
3989  BigReal dsq[10], tmp;
3990  int ial[10], ibl[10];
3991  Vector ref[10]; // reference position
3992  Vector refab[10]; // reference vector
3993  Vector pos[10]; // new position
3994  Vector vel[10]; // new velocity
3995  Vector netdp[10]; // total momentum change from constraint
3996  BigReal rmass[10]; // 1 / mass
3997  int fixed[10]; // is atom fixed?
3998  Tensor wc; // constraint virial
3999  BigReal idz, zmin;
4000  int nslabs;
4001 
4002  // Size of a hydrogen group for water
4003  const WaterModel watmodel = simParams->watmodel;
4004  const int wathgsize = getWaterModelGroupSize(watmodel);
4005 
4006  // Initialize the settle algorithm with water parameters
4007  // settle1() assumes all waters are identical,
4008  // and will generate bad results if they are not.
4009  // XXX this will move to Molecule::build_atom_status when that
4010  // version is debugged
4011  if ( ! settle_initialized ) {
4012  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4013  // find a water
4014  if (atom[ig].rigidBondLength > 0) {
4015  int oatm;
4016  if (watmodel == WaterModel::SWM4) {
4017  oatm = ig+3; // skip over Drude and Lonepair
4018  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
4019  // ig, atom[ig].mass, oatm, atom[oatm].mass);
4020  }
4021  else {
4022  oatm = ig+1;
4023  // Avoid using the Om site to set this by mistake
4024  if (atom[ig].mass < 0.5 || atom[ig+1].mass < 0.5) {
4025  oatm += 1;
4026  }
4027  }
4028 
4029  // initialize settle water parameters
4030  settle1init(atom[ig].mass, atom[oatm].mass,
4031  atom[ig].rigidBondLength,
4032  atom[oatm].rigidBondLength,
4033  settle_mO, settle_mH,
4034  settle_mOrmT, settle_mHrmT, settle_ra,
4035  settle_rb, settle_rc, settle_rra);
4036  settle_initialized = 1;
4037  break; // done with init
4038  }
4039  }
4040  }
4041 
4042  if (ppreduction) {
4043  nslabs = simParams->pressureProfileSlabs;
4044  idz = nslabs/lattice.c().z;
4045  zmin = lattice.origin().z - 0.5*lattice.c().z;
4046  }
4047 
4048  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4049  int hgs = atom[ig].hydrogenGroupSize;
4050  if ( hgs == 1 ) continue; // only one atom in group
4051  // cache data in local arrays and integrate positions normally
4052  int anyfixed = 0;
4053  for ( i = 0; i < hgs; ++i ) {
4054  ref[i] = atom[ig+i].position;
4055  pos[i] = atom[ig+i].position;
4056  vel[i] = atom[ig+i].velocity;
4057  rmass[i] = (atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.);
4058  //printf("rmass of %i is %f\n", ig+i, rmass[i]);
4059  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4060  //printf("fixed status of %i is %i\n", i, fixed[i]);
4061  // undo addVelocityToPosition to get proper reference coordinates
4062  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; } else pos[i] += vel[i] * dt;
4063  }
4064  int icnt = 0;
4065  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4066  if (hgs != wathgsize) {
4067  char errmsg[256];
4068  sprintf(errmsg, "Water molecule starting with atom %d contains %d atoms "
4069  "but the specified water model requires %d atoms.\n",
4070  atom[ig].id+1, hgs, wathgsize);
4071  NAMD_die(errmsg);
4072  }
4073  // Use SETTLE for water unless some of the water atoms are fixed,
4074  if (useSettle && !anyfixed) {
4075  if (watmodel == WaterModel::SWM4) {
4076  // SWM4 ordering: O D LP H1 H2
4077  // do swap(O,LP) and call settle with subarray O H1 H2
4078  // swap back after we return
4079  Vector lp_ref = ref[2];
4080  Vector lp_pos = pos[2];
4081  Vector lp_vel = vel[2];
4082  ref[2] = ref[0];
4083  pos[2] = pos[0];
4084  vel[2] = vel[0];
4085  settle1(ref+2, pos+2, vel+2, invdt,
4086  settle_mOrmT, settle_mHrmT, settle_ra,
4087  settle_rb, settle_rc, settle_rra);
4088  ref[0] = ref[2];
4089  pos[0] = pos[2];
4090  vel[0] = vel[2];
4091  ref[2] = lp_ref;
4092  pos[2] = lp_pos;
4093  vel[2] = lp_vel;
4094  // determine for LP updated pos and vel
4095  swm4_omrepos(ref, pos, vel, invdt);
4096  }
4097  else {
4098  settle1(ref, pos, vel, invdt,
4099  settle_mOrmT, settle_mHrmT, settle_ra,
4100  settle_rb, settle_rc, settle_rra);
4101  if (watmodel == WaterModel::TIP4) {
4102  tip4_omrepos(ref, pos, vel, invdt);
4103  }
4104  }
4105 
4106  // which slab the hydrogen group will belong to
4107  // for pprofile calculations.
4108  int ppoffset, partition;
4109  if ( invdt == 0 ) for ( i = 0; i < wathgsize; ++i ) {
4110  atom[ig+i].position = pos[i];
4111  } else if ( virial == 0 ) for ( i = 0; i < wathgsize; ++i ) {
4112  atom[ig+i].velocity = vel[i];
4113  } else for ( i = 0; i < wathgsize; ++i ) {
4114  Force df = (vel[i] - atom[ig+i].velocity) * ( atom[ig+i].mass * invdt );
4115  Tensor vir = outer(df, ref[i]);
4116  wc += vir;
4117  f[Results::normal][ig+i] += df;
4118  atom[ig+i].velocity = vel[i];
4119  if (ppreduction) {
4120  // put all the atoms from a water in the same slab. Atom 0
4121  // should be the parent atom.
4122  if (!i) {
4123  BigReal z = pos[i].z;
4124  partition = atom[ig].partition;
4125  int slab = (int)floor((z-zmin)*idz);
4126  if (slab < 0) slab += nslabs;
4127  else if (slab >= nslabs) slab -= nslabs;
4128  ppoffset = 3*(slab + nslabs*partition);
4129  }
4130  ppreduction->item(ppoffset ) += vir.xx;
4131  ppreduction->item(ppoffset+1) += vir.yy;
4132  ppreduction->item(ppoffset+2) += vir.zz;
4133  }
4134  }
4135  continue;
4136  }
4137  if ( !(fixed[1] && fixed[2]) ) {
4138  dsq[icnt] = tmp * tmp; ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4139  }
4140  }
4141  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4142  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4143  if ( !(fixed[0] && fixed[i]) ) {
4144  dsq[icnt] = tmp * tmp; ial[icnt] = 0; ibl[icnt] = i; ++icnt;
4145  }
4146  }
4147  }
4148  if ( icnt == 0 ) continue; // no constraints
4149  for ( i = 0; i < icnt; ++i ) {
4150  refab[i] = ref[ial[i]] - ref[ibl[i]];
4151  }
4152  for ( i = 0; i < hgs; ++i ) {
4153  netdp[i] = 0.;
4154  }
4155  int done;
4156  int consFailure;
4157  for ( iter = 0; iter < maxiter; ++iter ) {
4158 //if (iter > 0) CkPrintf("iteration %d\n", iter);
4159  done = 1;
4160  consFailure = 0;
4161  for ( i = 0; i < icnt; ++i ) {
4162  int a = ial[i]; int b = ibl[i];
4163  Vector pab = pos[a] - pos[b];
4164  BigReal pabsq = pab.x*pab.x + pab.y*pab.y + pab.z*pab.z;
4165  BigReal rabsq = dsq[i];
4166  BigReal diffsq = rabsq - pabsq;
4167  if ( fabs(diffsq) > (rabsq * tol2) ) {
4168  Vector &rab = refab[i];
4169  BigReal rpab = rab.x*pab.x + rab.y*pab.y + rab.z*pab.z;
4170  if ( rpab < ( rabsq * 1.0e-6 ) ) {
4171  done = 0;
4172  consFailure = 1;
4173  continue;
4174  }
4175  BigReal rma = rmass[a];
4176  BigReal rmb = rmass[b];
4177  BigReal gab = diffsq / ( 2.0 * ( rma + rmb ) * rpab );
4178  Vector dp = rab * gab;
4179  pos[a] += rma * dp;
4180  pos[b] -= rmb * dp;
4181  if ( invdt != 0. ) {
4182  dp *= invdt;
4183  netdp[a] += dp;
4184  netdp[b] -= dp;
4185  }
4186  done = 0;
4187  }
4188  }
4189  if ( done ) break;
4190  }
4191 
4192  if ( consFailure ) {
4193  if ( dieOnError ) {
4194  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
4195  << (atom[ig].id + 1) << "!\n" << endi;
4196  return -1; // triggers early exit
4197  } else {
4198  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
4199  << (atom[ig].id + 1) << "!\n" << endi;
4200  }
4201  } else if ( ! done ) {
4202  if ( dieOnError ) {
4203  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
4204  << (atom[ig].id + 1) << "!\n" << endi;
4205  return -1; // triggers early exit
4206  } else {
4207  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
4208  << (atom[ig].id + 1) << "!\n" << endi;
4209  }
4210  }
4211 
4212  // store data back to patch
4213  int ppoffset, partition;
4214  if ( invdt == 0 ) for ( i = 0; i < hgs; ++i ) {
4215  atom[ig+i].position = pos[i];
4216  } else if ( virial == 0 ) for ( i = 0; i < hgs; ++i ) {
4217  atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
4218  } else for ( i = 0; i < hgs; ++i ) {
4219  Force df = netdp[i] * invdt;
4220  Tensor vir = outer(df, ref[i]);
4221  wc += vir;
4222  f[Results::normal][ig+i] += df;
4223  atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
4224  if (ppreduction) {
4225  if (!i) {
4226  BigReal z = pos[i].z;
4227  int partition = atom[ig].partition;
4228  int slab = (int)floor((z-zmin)*idz);
4229  if (slab < 0) slab += nslabs;
4230  else if (slab >= nslabs) slab -= nslabs;
4231  ppoffset = 3*(slab + nslabs*partition);
4232  }
4233  ppreduction->item(ppoffset ) += vir.xx;
4234  ppreduction->item(ppoffset+1) += vir.yy;
4235  ppreduction->item(ppoffset+2) += vir.zz;
4236  }
4237  }
4238  }
4239  if ( dt && virial ) *virial += wc;
4240 
4241  return 0;
4242 }
4243 
4244 // RATTLE algorithm from Allen & Tildesley
4245 void HomePatch::rattle2(const BigReal timestep, Tensor *virial)
4246 {
4247  Molecule *mol = Node::Object()->molecule;
4249  const int fixedAtomsOn = simParams->fixedAtomsOn;
4250  const int useSettle = simParams->useSettle;
4251  const BigReal dt = timestep / TIMEFACTOR;
4252  Tensor wc; // constraint virial
4253  BigReal tol = simParams->rigidTol;
4254  int maxiter = simParams->rigidIter;
4255  int dieOnError = simParams->rigidDie;
4256  int i, iter;
4257  BigReal dsqi[10], tmp;
4258  int ial[10], ibl[10];
4259  Vector ref[10]; // reference position
4260  Vector refab[10]; // reference vector
4261  Vector vel[10]; // new velocity
4262  BigReal rmass[10]; // 1 / mass
4263  BigReal redmass[10]; // reduced mass
4264  int fixed[10]; // is atom fixed?
4265 
4266  // Size of a hydrogen group for water
4267  const WaterModel watmodel = simParams->watmodel;
4268  const int wathgsize = getWaterModelGroupSize(watmodel);
4269 
4270  // CkPrintf("In rattle2!\n");
4271  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4272  // CkPrintf("ig=%d\n",ig);
4273  int hgs = atom[ig].hydrogenGroupSize;
4274  if ( hgs == 1 ) continue; // only one atom in group
4275  // cache data in local arrays and integrate positions normally
4276  int anyfixed = 0;
4277  for ( i = 0; i < hgs; ++i ) {
4278  ref[i] = atom[ig+i].position;
4279  vel[i] = atom[ig+i].velocity;
4280  rmass[i] = atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.;
4281  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4282  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; }
4283  }
4284  int icnt = 0;
4285  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4286  if (hgs != wathgsize) {
4287  NAMD_bug("Hydrogen group error caught in rattle2().");
4288  }
4289  // Use SETTLE for water unless some of the water atoms are fixed,
4290  if (useSettle && !anyfixed) {
4291  if (watmodel == WaterModel::SWM4) {
4292  // SWM4 ordering: O D LP H1 H2
4293  // do swap(O,LP) and call settle with subarray O H1 H2
4294  // swap back after we return
4295  Vector lp_ref = ref[2];
4296  // Vector lp_vel = vel[2];
4297  ref[2] = ref[0];
4298  vel[2] = vel[0];
4299  settle2(atom[ig].mass, atom[ig+3].mass, ref+2, vel+2, dt, virial);
4300  ref[0] = ref[2];
4301  vel[0] = vel[2];
4302  ref[2] = lp_ref;
4303  // vel[2] = vel[0]; // set LP vel to O vel
4304  }
4305  else {
4306  settle2(atom[ig].mass, atom[ig+1].mass, ref, vel, dt, virial);
4307  if (watmodel == WaterModel::TIP4) {
4308  vel[3] = vel[0];
4309  }
4310  }
4311  for (i=0; i<hgs; i++) {
4312  atom[ig+i].velocity = vel[i];
4313  }
4314  continue;
4315  }
4316  if ( !(fixed[1] && fixed[2]) ) {
4317  redmass[icnt] = 1. / (rmass[1] + rmass[2]);
4318  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4319  }
4320  }
4321  // CkPrintf("Loop 2\n");
4322  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4323  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4324  if ( !(fixed[0] && fixed[i]) ) {
4325  redmass[icnt] = 1. / (rmass[0] + rmass[i]);
4326  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 0;
4327  ibl[icnt] = i; ++icnt;
4328  }
4329  }
4330  }
4331  if ( icnt == 0 ) continue; // no constraints
4332  // CkPrintf("Loop 3\n");
4333  for ( i = 0; i < icnt; ++i ) {
4334  refab[i] = ref[ial[i]] - ref[ibl[i]];
4335  }
4336  // CkPrintf("Loop 4\n");
4337  int done;
4338  for ( iter = 0; iter < maxiter; ++iter ) {
4339  done = 1;
4340  for ( i = 0; i < icnt; ++i ) {
4341  int a = ial[i]; int b = ibl[i];
4342  Vector vab = vel[a] - vel[b];
4343  Vector &rab = refab[i];
4344  BigReal rabsqi = dsqi[i];
4345  BigReal rvab = rab.x*vab.x + rab.y*vab.y + rab.z*vab.z;
4346  if ( (fabs(rvab) * dt * rabsqi) > tol ) {
4347  Vector dp = rab * (-rvab * redmass[i] * rabsqi);
4348  wc += outer(dp,rab);
4349  vel[a] += rmass[a] * dp;
4350  vel[b] -= rmass[b] * dp;
4351  done = 0;
4352  }
4353  }
4354  if ( done ) break;
4355  //if (done) { if (iter > 0) CkPrintf("iter=%d\n", iter); break; }
4356  }
4357  if ( ! done ) {
4358  if ( dieOnError ) {
4359  NAMD_die("Exceeded maximum number of iterations in rattle2().");
4360  } else {
4361  iout << iWARN <<
4362  "Exceeded maximum number of iterations in rattle2().\n" << endi;
4363  }
4364  }
4365  // store data back to patch
4366  for ( i = 0; i < hgs; ++i ) {
4367  atom[ig+i].velocity = vel[i];
4368  }
4369  }
4370  // CkPrintf("Leaving rattle2!\n");
4371  // check that there isn't a constant needed here!
4372  *virial += wc / ( 0.5 * dt );
4373 
4374 }
4375 
4376 
4377 // Adjust gradients for minimizer
4378 void HomePatch::minimize_rattle2(const BigReal timestep, Tensor *virial, bool forces)
4379 {
4380  Molecule *mol = Node::Object()->molecule;
4382  Force *f1 = f[Results::normal].begin();
4383  const int fixedAtomsOn = simParams->fixedAtomsOn;
4384  const int useSettle = simParams->useSettle;
4385  const BigReal dt = timestep / TIMEFACTOR;
4386  Tensor wc; // constraint virial
4387  BigReal tol = simParams->rigidTol;
4388  int maxiter = simParams->rigidIter;
4389  int dieOnError = simParams->rigidDie;
4390  int i, iter;
4391  BigReal dsqi[10], tmp;
4392  int ial[10], ibl[10];
4393  Vector ref[10]; // reference position
4394  Vector refab[10]; // reference vector
4395  Vector vel[10]; // new velocity
4396  BigReal rmass[10]; // 1 / mass
4397  BigReal redmass[10]; // reduced mass
4398  int fixed[10]; // is atom fixed?
4399 
4400  // Size of a hydrogen group for water
4401  const WaterModel watmodel = simParams->watmodel;
4402  const int wathgsize = getWaterModelGroupSize(watmodel);
4403 
4404  // CkPrintf("In rattle2!\n");
4405  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4406  // CkPrintf("ig=%d\n",ig);
4407  int hgs = atom[ig].hydrogenGroupSize;
4408  if ( hgs == 1 ) continue; // only one atom in group
4409  // cache data in local arrays and integrate positions normally
4410  int anyfixed = 0;
4411  for ( i = 0; i < hgs; ++i ) {
4412  ref[i] = atom[ig+i].position;
4413  vel[i] = ( forces ? f1[ig+i] : atom[ig+i].velocity );
4414  rmass[i] = 1.0;
4415  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4416  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; }
4417  }
4418  int icnt = 0;
4419  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4420  if (hgs != wathgsize) {
4421  NAMD_bug("Hydrogen group error caught in rattle2().");
4422  }
4423  // Use SETTLE for water unless some of the water atoms are fixed,
4424  if (useSettle && !anyfixed) {
4425  if (watmodel == WaterModel::SWM4) {
4426  // SWM4 ordering: O D LP H1 H2
4427  // do swap(O,LP) and call settle with subarray O H1 H2
4428  // swap back after we return
4429  Vector lp_ref = ref[2];
4430  // Vector lp_vel = vel[2];
4431  ref[2] = ref[0];
4432  vel[2] = vel[0];
4433  settle2(1.0, 1.0, ref+2, vel+2, dt, virial);
4434  ref[0] = ref[2];
4435  vel[0] = vel[2];
4436  ref[2] = lp_ref;
4437  // vel[2] = vel[0]; // set LP vel to O vel
4438  }
4439  else {
4440  settle2(1.0, 1.0, ref, vel, dt, virial);
4441  if (watmodel == WaterModel::TIP4) {
4442  vel[3] = vel[0];
4443  }
4444  }
4445  for (i=0; i<hgs; i++) {
4446  ( forces ? f1[ig+i] : atom[ig+i].velocity ) = vel[i];
4447  }
4448  continue;
4449  }
4450  if ( !(fixed[1] && fixed[2]) ) {
4451  redmass[icnt] = 1. / (rmass[1] + rmass[2]);
4452  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4453  }
4454  }
4455  // CkPrintf("Loop 2\n");
4456  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4457  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4458  if ( !(fixed[0] && fixed[i]) ) {
4459  redmass[icnt] = 1. / (rmass[0] + rmass[i]);
4460  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 0;
4461  ibl[icnt] = i; ++icnt;
4462  }
4463  }
4464  }
4465  if ( icnt == 0 ) continue; // no constraints
4466  // CkPrintf("Loop 3\n");
4467  for ( i = 0; i < icnt; ++i ) {
4468  refab[i] = ref[ial[i]] - ref[ibl[i]];
4469  }
4470  // CkPrintf("Loop 4\n");
4471  int done;
4472  for ( iter = 0; iter < maxiter; ++iter ) {
4473  done = 1;
4474  for ( i = 0; i < icnt; ++i ) {
4475  int a = ial[i]; int b = ibl[i];
4476  Vector vab = vel[a] - vel[b];
4477  Vector &rab = refab[i];
4478  BigReal rabsqi = dsqi[i];
4479  BigReal rvab = rab.x*vab.x + rab.y*vab.y + rab.z*vab.z;
4480  if ( (fabs(rvab) * dt * rabsqi) > tol ) {
4481  Vector dp = rab * (-rvab * redmass[i] * rabsqi);
4482  wc += outer(dp,rab);
4483  vel[a] += rmass[a] * dp;
4484  vel[b] -= rmass[b] * dp;
4485  done = 0;
4486  }
4487  }
4488  if ( done ) break;
4489  //if (done) { if (iter > 0) CkPrintf("iter=%d\n", iter); break; }
4490  }
4491  if ( ! done ) {
4492  if ( dieOnError ) {
4493  NAMD_die("Exceeded maximum number of iterations in rattle2().");
4494  } else {
4495  iout << iWARN <<
4496  "Exceeded maximum number of iterations in rattle2().\n" << endi;
4497  }
4498  }
4499  // store data back to patch
4500  for ( i = 0; i < hgs; ++i ) {
4501  ( forces ? f1[ig+i] : atom[ig+i].velocity ) = vel[i];
4502  }
4503  }
4504  // CkPrintf("Leaving rattle2!\n");
4505  // check that there isn't a constant needed here!
4506  *virial += wc / ( 0.5 * dt );
4507 
4508 }
4509 
4510 
4512 //
4513 // begin SOA rattle
4514 //
4515 
4517 #ifdef DEBUG_MINIMIZE
4518  if (patchID == 0) {
4519  printf("Step %d, patch %d: buildRattleList_SOA()\n",
4520  flags.step, (int)patchID);
4521  }
4522 #endif
4523 
4524  // Called when rattleListValid_SOA is false.
4525  // List will stay valid until atom migration or some other event,
4526  // such as exchanging replicas, SCRIPT_REVERT from Tcl, reinit atoms.
4527 
4528  const double * __restrict pos_x = patchDataSOA.pos_x;
4529  const double * __restrict pos_y = patchDataSOA.pos_y;
4530  const double * __restrict pos_z = patchDataSOA.pos_z;
4531  const float * __restrict mass = patchDataSOA.mass;
4532  const double * __restrict recipMass = patchDataSOA.recipMass;
4533  const float * __restrict rigidBondLength = patchDataSOA.rigidBondLength;
4534  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
4535 
4537  const int fixedAtomsOn = simParams->fixedAtomsOn;
4538  const int useSettle = simParams->useSettle;
4539 
4540  // Size of a hydrogen group for water
4541  const WaterModel watmodel = simParams->watmodel;
4542  const int wathgsize = getWaterModelGroupSize(watmodel);
4543 
4544  // Initialize the settle algorithm with water parameters
4545  // settle1() assumes all waters are identical,
4546  // and will generate bad results if they are not.
4547  // XXX this will move to Molecule::build_atom_status when that
4548  // version is debugged
4549  if ( ! settle_initialized ) {
4550  for (int ig = numSoluteAtoms;
4551  ig < numAtoms;
4552  ig += hydrogenGroupSize[ig]) {
4553  // find a water
4554  if (rigidBondLength[ig] > 0) {
4555  int oatm;
4556  if (watmodel == WaterModel::SWM4) {
4557  oatm = ig+3; // skip over Drude and Lonepair
4558  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
4559  // ig, atom[ig].mass, oatm, atom[oatm].mass);
4560  }
4561  else {
4562  oatm = ig+1;
4563  // Avoid using the Om site to set this by mistake
4564  if (mass[ig] < 0.5 || mass[ig+1] < 0.5) {
4565  oatm += 1;
4566  }
4567  }
4568 
4569  // initialize settle water parameters
4570  settle1init(mass[ig], mass[oatm],
4571  rigidBondLength[ig],
4572  rigidBondLength[oatm],
4573  settle_mO, settle_mH,
4574  settle_mOrmT, settle_mHrmT, settle_ra,
4575  settle_rb, settle_rc, settle_rra);
4576  settle_initialized = 1;
4577  break; // done with init
4578  }
4579  }
4580  }
4581 
4582  BigReal dsq[10];
4583  int ial[10];
4584  int ibl[10];
4585 
4586  rattleList.clear();
4587  noconstList.clear();
4588  rattleParam.clear();
4589 
4590  for (int ig = 0; ig < numSoluteAtoms; ig += hydrogenGroupSize[ig]) {
4591  int hgs = hydrogenGroupSize[ig];
4592  if ( hgs == 1 ) {
4593  // only one atom in group
4594  noconstList.push_back(ig);
4595  continue;
4596  }
4597  int icnt = 0;
4598  // XXX convert rigid bond length to double to square it?
4599  BigReal tmp = rigidBondLength[ig];
4600  if (tmp > 0.0) { // for water
4601  dsq[icnt] = tmp * tmp;
4602  ial[icnt] = 1;
4603  ibl[icnt] = 2;
4604  ++icnt;
4605  }
4606  for (int i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4607  if ( ( tmp = rigidBondLength[ig+i] ) > 0 ) {
4608  dsq[icnt] = tmp * tmp;
4609  ial[icnt] = 0;
4610  ibl[icnt] = i;
4611  ++icnt;
4612  }
4613  }
4614  if ( icnt == 0 ) {
4615  // no constraints
4616  noconstList.push_back(ig);
4617  continue;
4618  }
4619  // Store to Rattle -list
4620  RattleList rattleListElem;
4621  rattleListElem.ig = ig;
4622  rattleListElem.icnt = icnt;
4623  rattleList.push_back(rattleListElem);
4624  for (int i = 0; i < icnt; ++i ) {
4625  int a = ial[i];
4626  int b = ibl[i];
4627  RattleParam rattleParamElem;
4628  rattleParamElem.ia = a;
4629  rattleParamElem.ib = b;
4630  rattleParamElem.dsq = dsq[i];
4631  rattleParamElem.rma = recipMass[ig+a];
4632  rattleParamElem.rmb = recipMass[ig+b];
4633  rattleParam.push_back(rattleParamElem);
4634  }
4635  //adding dummy atom in the hydrogen group
4636  for (int i = icnt; i < 4; ++i )
4637  {
4638  RattleParam rattleParamElem;
4639  rattleParamElem.ia = 0;
4640  rattleParamElem.ib = 0;
4641  rattleParamElem.dsq = 0;
4642  rattleParamElem.rma = 0;
4643  rattleParamElem.rmb = 0;
4644  rattleParam.push_back(rattleParamElem);
4645  }
4646 
4647  }
4648 }
4649 
4650 // dt scaled by 1/TIMEFACTOR
4651 // Removed code handling fixed atoms.
4652 // XXX ppreduction == NULL
4653 int HomePatch::rattle1_SOA(const BigReal dt, Tensor *virial,
4654  SubmitReduction *ppreduction) {
4655 
4657 
4658 #if 1
4659  if (!rattleListValid_SOA) {
4661  rattleListValid_SOA = true;
4662  }
4663 #endif
4664 
4665  double * __restrict pos_x = patchDataSOA.pos_x;
4666  double * __restrict pos_y = patchDataSOA.pos_y;
4667  double * __restrict pos_z = patchDataSOA.pos_z;
4668  double * __restrict vel_x = patchDataSOA.vel_x;
4669  double * __restrict vel_y = patchDataSOA.vel_y;
4670  double * __restrict vel_z = patchDataSOA.vel_z;
4671  double * __restrict posNew_x = patchDataSOA.posNew_x;
4672  double * __restrict posNew_y = patchDataSOA.posNew_y;
4673  double * __restrict posNew_z = patchDataSOA.posNew_z;
4674  double * __restrict velNew_x = patchDataSOA.velNew_x;
4675  double * __restrict velNew_y = patchDataSOA.velNew_y;
4676  double * __restrict velNew_z = patchDataSOA.velNew_z;
4677  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
4678 #ifdef __INTEL_COMPILER
4679  __assume_aligned(pos_x,64);
4680  __assume_aligned(pos_y,64);
4681  __assume_aligned(pos_z,64);
4682  __assume_aligned(vel_x,64);
4683  __assume_aligned(vel_y,64);
4684  __assume_aligned(vel_z,64);
4685  __assume_aligned(posNew_x,64);
4686  __assume_aligned(posNew_y,64);
4687  __assume_aligned(posNew_z,64);
4688  __assume_aligned(velNew_x,64);
4689  __assume_aligned(velNew_y,64);
4690  __assume_aligned(velNew_z,64);
4691  __assume_aligned(hydrogenGroupSize,64);
4692 #endif
4693 
4694  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
4695  const BigReal tol2 = 2.0 * simParams->rigidTol;
4696  int maxiter = simParams->rigidIter;
4697  int dieOnError = simParams->rigidDie;
4698 
4699  // calculate full step update to all positions
4700  for (int i=0; i < numAtoms; i++) {
4701  posNew_x[i] = pos_x[i] + vel_x[i] * dt;
4702  posNew_y[i] = pos_y[i] + vel_y[i] * dt;
4703  posNew_z[i] = pos_z[i] + vel_z[i] * dt;
4704  }
4705 
4706  // call settle to process all waters at once
4707  // XXX this assumes sorting waters into consecutive part of list
4708  //int numWaters = settleList.size();
4709  if (numSolventAtoms > 0) {
4710  int n = numSoluteAtoms; // index of first water in list is past solute
4711  settle1_SOA(&pos_x[n], &pos_y[n], &pos_z[n],
4712  &posNew_x[n], &posNew_y[n], &posNew_z[n],
4713  numWaters,
4714  settle_mOrmT, settle_mHrmT, settle_ra,
4715  settle_rb, settle_rc, settle_rra);
4716  }
4717  int posParam = 0;
4718  for (int j=0;j < rattleList.size();++j) {
4719  int ig = rattleList[j].ig;
4720  int icnt = rattleList[j].icnt;
4721  bool done;
4722  bool consFailure;
4723  if (icnt == 1) {
4724  rattlePair<1>(&rattleParam[posParam],
4725  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4726  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4727  consFailure
4728  );
4729  done = true;
4730  } else {
4731  if (simParams->mshakeOn) {
4732  //buildConstantMatrix();
4733  MSHAKEIterate(icnt, &rattleParam[posParam],
4734  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4735  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4736  tol2, maxiter,
4737  done, consFailure);
4738  }
4739  else if(simParams->lincsOn) {
4740  LINCS(icnt, &rattleParam[posParam],
4741  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4742  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4743  tol2, maxiter, done, consFailure);
4744  }
4745 
4746  else
4747  rattleN(icnt, &rattleParam[posParam],
4748  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4749  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4750  tol2, maxiter,
4751  done, consFailure);
4752  }
4753 
4754  // Advance position in rattleParam
4755 // posParam += icnt;
4756  posParam += 4;
4757  if ( consFailure ) {
4758  if ( dieOnError ) {
4759  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
4760  << (atom[ig].id + 1) << "!\n" << endi;
4761  return -1; // triggers early exit
4762  } else {
4763  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
4764  << (atom[ig].id + 1) << "!\n" << endi;
4765  }
4766  } else if ( ! done ) {
4767  if ( dieOnError ) {
4768  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
4769  << (atom[ig].id + 1) << "!\n" << endi;
4770  return -1; // triggers early exit
4771  } else {
4772  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
4773  << (atom[ig].id + 1) << "!\n" << endi;
4774  }
4775  }
4776 
4777  } // end rattle
4778  // Now that all new positions are known, determine new velocities
4779  // needed to reach new position.
4780  for (int i=0; i < numAtoms; i++) {
4781  velNew_x[i] = (posNew_x[i] - pos_x[i]) * invdt;
4782  velNew_y[i] = (posNew_y[i] - pos_y[i]) * invdt;
4783  velNew_z[i] = (posNew_z[i] - pos_z[i]) * invdt;
4784  }
4785 
4786  // Bring new positions and velocities back to reference for noconstList.
4787  // No need to check hydrogen group size, since no fixed atoms.
4788  int numNoconst = noconstList.size();
4789  for (int j=0; j < numNoconst; j++) {
4790  int ig = noconstList[j];
4791  posNew_x[ig] = pos_x[ig];
4792  posNew_y[ig] = pos_y[ig];
4793  posNew_z[ig] = pos_z[ig];
4794  velNew_x[ig] = vel_x[ig];
4795  velNew_y[ig] = vel_y[ig];
4796  velNew_z[ig] = vel_z[ig];
4797  }
4798 
4799  if ( invdt == 0 ) {
4800  for (int ig = 0; ig < numAtoms; ++ig ) {
4801  pos_x[ig] = posNew_x[ig];
4802  pos_y[ig] = posNew_y[ig];
4803  pos_z[ig] = posNew_z[ig];
4804  }
4805  }
4806  else if ( virial == 0 ) {
4807  for (int ig = 0; ig < numAtoms; ++ig ) {
4808  vel_x[ig] = velNew_x[ig];
4809  vel_y[ig] = velNew_y[ig];
4810  vel_z[ig] = velNew_z[ig];
4811  }
4812  }
4813  else {
4814  const float * __restrict mass = patchDataSOA.mass;
4815  double * __restrict f_normal_x = patchDataSOA.f_normal_x;
4816  double * __restrict f_normal_y = patchDataSOA.f_normal_y;
4817  double * __restrict f_normal_z = patchDataSOA.f_normal_z;
4818 #ifdef __INTEL_COMPILER
4819  __assume_aligned(mass,64);
4820  __assume_aligned(f_normal_x,64);
4821  __assume_aligned(f_normal_y,64);
4822  __assume_aligned(f_normal_z,64);
4823 #endif
4824  Tensor vir; // = 0
4825  for (int ig = 0; ig < numAtoms; ig++) {
4826  BigReal df_x = (velNew_x[ig] - vel_x[ig]) * ( mass[ig] * invdt );
4827  BigReal df_y = (velNew_y[ig] - vel_y[ig]) * ( mass[ig] * invdt );
4828  BigReal df_z = (velNew_z[ig] - vel_z[ig]) * ( mass[ig] * invdt );
4829  f_normal_x[ig] += df_x;
4830  f_normal_y[ig] += df_y;
4831  f_normal_z[ig] += df_z;
4832  vir.xx += df_x * pos_x[ig];
4833  vir.xy += df_x * pos_y[ig];
4834  vir.xz += df_x * pos_z[ig];
4835  vir.yx += df_y * pos_x[ig];
4836  vir.yy += df_y * pos_y[ig];
4837  vir.yz += df_y * pos_z[ig];
4838  vir.zx += df_z * pos_x[ig];
4839  vir.zy += df_z * pos_y[ig];
4840  vir.zz += df_z * pos_z[ig];
4841  }
4842  *virial += vir;
4843  for (int ig = 0; ig < numAtoms; ig++) {
4844  vel_x[ig] = velNew_x[ig];
4845  vel_y[ig] = velNew_y[ig];
4846  vel_z[ig] = velNew_z[ig];
4847  }
4848  }
4849 
4850  return 0;
4851 }
4852 
4853 //
4854 // end SOA rattle
4855 //
4857 
4858 
4859 // BEGIN LA
4861 {
4862  DebugM(2, "loweAndersenVelocities\n");
4863  Molecule *mol = Node::Object()->molecule;
4865  v.resize(numAtoms);
4866  for (int i = 0; i < numAtoms; ++i) {
4867  //v[i] = p[i];
4868  // co-opt CompAtom structure to pass velocity and mass information
4869  v[i].position = atom[i].velocity;
4870  v[i].charge = atom[i].mass;
4871  }
4872  DebugM(2, "loweAndersenVelocities\n");
4873 }
4874 
4876 {
4877  DebugM(2, "loweAndersenFinish\n");
4878  v.resize(0);
4879 }
4880 // END LA
4881 
4882 //LCPO
4884  Molecule *mol = Node::Object()->molecule;
4885  const int *lcpoParamType = mol->getLcpoParamType();
4886 
4888  for (int i = 0; i < numAtoms; i++) {
4889  lcpoType[i] = lcpoParamType[pExt[i].id];
4890  }
4891 }
4892 
4893 //set intrinsic radii of atom when doMigration
4895  intRad.resize(numAtoms*2);
4896  intRad.setall(0);
4897  Molecule *mol = Node::Object()->molecule;
4899  Real offset = simParams->coulomb_radius_offset;
4900  for (int i = 0; i < numAtoms; i++) {
4901  Real rad = MassToRadius(atom[i].mass);//in ComputeGBIS.inl
4902  Real screen = MassToScreen(atom[i].mass);//same
4903  intRad[2*i+0] = rad - offset;//r0
4904  intRad[2*i+1] = screen*(rad - offset);//s0
4905  }
4906 }
4907 
4908 //compute born radius after phase 1, before phase 2
4910 
4912  BigReal alphaMax = simParams->alpha_max;
4913  BigReal delta = simParams->gbis_delta;
4914  BigReal beta = simParams->gbis_beta;
4915  BigReal gamma = simParams->gbis_gamma;
4916  BigReal coulomb_radius_offset = simParams->coulomb_radius_offset;
4917 
4918  BigReal rhoi;
4919  BigReal rhoi0;
4920  //calculate bornRad from psiSum
4921  for (int i = 0; i < numAtoms; i++) {
4922  rhoi0 = intRad[2*i];
4923  rhoi = rhoi0+coulomb_radius_offset;
4924  psiFin[i] += psiSum[i];
4925  psiFin[i] *= rhoi0;
4926  bornRad[i]=1/(1/rhoi0-1/rhoi*tanh(psiFin[i]*(delta+psiFin[i]*(-beta+gamma*psiFin[i]))));
4927  bornRad[i] = (bornRad[i] > alphaMax) ? alphaMax : bornRad[i];
4928 #ifdef PRINT_COMP
4929  CkPrintf("BORNRAD(%04i)[%04i] = % .4e\n",flags.sequence,pExt[i].id,bornRad[i]);
4930 #endif
4931  }
4932 
4933  gbisP2Ready();
4934 }
4935 
4936 //compute dHdrPrefix after phase 2, before phase 3
4938 
4940  BigReal delta = simParams->gbis_delta;
4941  BigReal beta = simParams->gbis_beta;
4942  BigReal gamma = simParams->gbis_gamma;
4943  BigReal epsilon_s = simParams->solvent_dielectric;
4944  BigReal epsilon_p = simParams->dielectric;
4945  BigReal epsilon_s_i = 1/simParams->solvent_dielectric;
4946  BigReal epsilon_p_i = 1/simParams->dielectric;
4947  BigReal coulomb_radius_offset = simParams->coulomb_radius_offset;
4948  BigReal kappa = simParams->kappa;
4949  BigReal fij, expkappa, Dij, dEdai, dedasum;
4950  BigReal rhoi, rhoi0, psii, nbetapsi;
4951  BigReal gammapsi2, tanhi, daidr;
4952  for (int i = 0; i < numAtoms; i++) {
4953  //add diagonal dEda term
4954  dHdrPrefix[i] += dEdaSum[i];//accumulated from proxies
4955  fij = bornRad[i];//inf
4956  expkappa = exp(-kappa*fij);//0
4957  Dij = epsilon_p_i - expkappa*epsilon_s_i;//dielectric term
4958  //calculate dHij prefix
4959  dEdai = -0.5*COULOMB*atom[i].charge*atom[i].charge
4960  *(kappa*epsilon_s_i*expkappa-Dij/fij)/bornRad[i];
4961  dHdrPrefix[i] += dEdai;
4962  dedasum = dHdrPrefix[i];
4963 
4964  rhoi0 = intRad[2*i];
4965  rhoi = rhoi0+coulomb_radius_offset;
4966  psii = psiFin[i];
4967  nbetapsi = -beta*psii;
4968  gammapsi2 = gamma*psii*psii;
4969  tanhi = tanh(psii*(delta+nbetapsi+gammapsi2));
4970  daidr = bornRad[i]*bornRad[i]*rhoi0/rhoi*(1-tanhi*tanhi)
4971  * (delta+nbetapsi+nbetapsi+gammapsi2+gammapsi2+gammapsi2);
4972  dHdrPrefix[i] *= daidr;//dHdrPrefix previously equaled dEda
4973 #ifdef PRINT_COMP
4974  CkPrintf("DHDR(%04i)[%04i] = % .4e\n",flags.sequence,pExt[i].id,dHdrPrefix[i]);
4975 #endif
4976  }
4977  gbisP3Ready();
4978 }
4979 
4980 //send born radius to proxies to begin phase 2
4982  if (proxy.size() > 0) {
4983  CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
4984  for (int i = 0; i < proxy.size(); i++) {
4985  int node = proxy[i];
4987  msg->patch = patchID;
4988  msg->origPe = CkMyPe();
4989  memcpy(msg->bornRad,bornRad.begin(),numAtoms*sizeof(Real));
4990  msg->destPe = node;
4991  int seq = flags.sequence;
4993  SET_PRIORITY(msg,seq,priority);
4994  cp[node].recvData(msg);
4995  }
4996  }
4998 }
4999 
5000 //send dHdrPrefix to proxies to begin phase 3
5002  if (proxy.size() > 0) {
5003  CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
5004  //only nonzero message should be sent for doFullElec
5005  int msgAtoms = (flags.doFullElectrostatics) ? numAtoms : 0;
5006  for (int i = 0; i < proxy.size(); i++) {
5007  int node = proxy[i];
5009  msg->patch = patchID;
5010  msg->dHdrPrefixLen = msgAtoms;
5011  msg->origPe = CkMyPe();
5012  memcpy(msg->dHdrPrefix, dHdrPrefix.begin(), msgAtoms*sizeof(Real));
5013  msg->destPe = node;
5014  int seq = flags.sequence;
5016  SET_PRIORITY(msg,seq,priority);
5017  cp[node].recvData(msg);
5018  }
5019  }
5021 }
5022 
5023 //receive proxy results from phase 1
5025  ++numGBISP1Arrived;
5026  for ( int i = 0; i < msg->psiSumLen; ++i ) {
5027  psiFin[i] += msg->psiSum[i];
5028  }
5029  delete msg;
5030 
5031  if (flags.doNonbonded) {
5032  //awaken if phase 1 done
5033  if (phase1BoxClosedCalled == true &&
5034  numGBISP1Arrived==proxy.size() ) {
5035  // fprintf(stderr, "Calling awaken() on patch %d: 4\n", this->patchID);
5036  sequencer->awaken();
5037  }
5038  } else {
5039  //awaken if all phases done on noWork step
5040  if (boxesOpen == 0 &&
5041  numGBISP1Arrived == proxy.size() &&
5042  numGBISP2Arrived == proxy.size() &&
5043  numGBISP3Arrived == proxy.size()) {
5044  // fprintf(stderr, "Calling awaken() on patch %d: 5\n", this->patchID);
5045  sequencer->awaken();
5046  }
5047  }
5048 }
5049 
5050 //receive proxy results from phase 2
5052  ++numGBISP2Arrived;
5053  //accumulate dEda
5054  for ( int i = 0; i < msg->dEdaSumLen; ++i ) {
5055  dHdrPrefix[i] += msg->dEdaSum[i];
5056  }
5057  delete msg;
5058 
5059  if (flags.doNonbonded) {
5060  //awaken if phase 2 done
5061  if (phase2BoxClosedCalled == true &&
5062  numGBISP2Arrived==proxy.size() ) {
5063  // fprintf(stderr, "Calling awaken() on patch %d: 6\n", this->patchID);
5064  sequencer->awaken();
5065  }
5066  } else {
5067  //awaken if all phases done on noWork step
5068  if (boxesOpen == 0 &&
5069  numGBISP1Arrived == proxy.size() &&
5070  numGBISP2Arrived == proxy.size() &&
5071  numGBISP3Arrived == proxy.size()) {
5072  // fprintf(stderr, "Calling awaken() on patch %d: 7\n", this->patchID);
5073  sequencer->awaken();
5074  }
5075  }
5076 }
5077 
5078 // MOLLY algorithm part 1
5080 {
5081  Molecule *mol = Node::Object()->molecule;
5083  BigReal tol = simParams->mollyTol;
5084  int maxiter = simParams->mollyIter;
5085  int i, iter;
5086  HGArrayBigReal dsq;
5087  BigReal tmp;
5088  HGArrayInt ial, ibl;
5089  HGArrayVector ref; // reference position
5090  HGArrayVector refab; // reference vector
5091  HGArrayBigReal rmass; // 1 / mass
5092  BigReal *lambda; // Lagrange multipliers
5093  CompAtom *avg; // averaged position
5094  int numLambdas = 0;
5095  HGArrayInt fixed; // is atom fixed?
5096 
5097  // iout<<iINFO << "mollyAverage: "<<std::endl<<endi;
5099  for ( i=0; i<numAtoms; ++i ) p_avg[i] = p[i];
5100 
5101  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
5102  int hgs = atom[ig].hydrogenGroupSize;
5103  if ( hgs == 1 ) continue; // only one atom in group
5104  for ( i = 0; i < hgs; ++i ) {
5105  ref[i] = atom[ig+i].position;
5106  rmass[i] = 1. / atom[ig+i].mass;
5107  fixed[i] = ( simParams->fixedAtomsOn && atom[ig+i].atomFixed );
5108  if ( fixed[i] ) rmass[i] = 0.;
5109  }
5110  avg = &(p_avg[ig]);
5111  int icnt = 0;
5112 
5113  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
5114  if ( hgs != 3 ) {
5115  NAMD_die("Hydrogen group error caught in mollyAverage(). It's a bug!\n");
5116  }
5117  if ( !(fixed[1] && fixed[2]) ) {
5118  dsq[icnt] = tmp * tmp; ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
5119  }
5120  }
5121  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
5122  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
5123  if ( !(fixed[0] && fixed[i]) ) {
5124  dsq[icnt] = tmp * tmp; ial[icnt] = 0; ibl[icnt] = i; ++icnt;
5125  }
5126  }
5127  }
5128  if ( icnt == 0 ) continue; // no constraints
5129  numLambdas += icnt;
5130  molly_lambda.resize(numLambdas);
5131  lambda = &(molly_lambda[numLambdas - icnt]);
5132  for ( i = 0; i < icnt; ++i ) {
5133  refab[i] = ref[ial[i]] - ref[ibl[i]];
5134  }
5135  // iout<<iINFO<<"hgs="<<hgs<<" m="<<icnt<<std::endl<<endi;
5136  iter=average(avg,ref,lambda,hgs,icnt,rmass,dsq,ial,ibl,refab,tol,maxiter);
5137  if ( iter == maxiter ) {
5138  iout << iWARN << "Exceeded maximum number of iterations in mollyAverage().\n"<<endi;
5139  }
5140  }
5141 
5142  // for ( i=0; i<numAtoms; ++i ) {
5143  // if ( ( p_avg[i].position - p[i].position ).length2() > 1.0 ) {
5144  // iout << iERROR << "MOLLY moved atom " << (p[i].id + 1) << " from "
5145  // << p[i].position << " to " << p_avg[i].position << "\n" << endi;
5146  // }
5147  // }
5148 
5149 }
5150 
5151 
5152 // MOLLY algorithm part 2
5154 {
5155  Molecule *mol = Node::Object()->molecule;
5157  Tensor wc; // constraint virial
5158  int i;
5159  HGArrayInt ial, ibl;
5160  HGArrayVector ref; // reference position
5161  CompAtom *avg; // averaged position
5162  HGArrayVector refab; // reference vector
5163  HGArrayVector force; // new force
5164  HGArrayBigReal rmass; // 1 / mass
5165  BigReal *lambda; // Lagrange multipliers
5166  int numLambdas = 0;
5167  HGArrayInt fixed; // is atom fixed?
5168 
5169  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
5170  int hgs = atom[ig].hydrogenGroupSize;
5171  if (hgs == 1 ) continue; // only one atom in group
5172  for ( i = 0; i < hgs; ++i ) {
5173  ref[i] = atom[ig+i].position;
5174  force[i] = f[Results::slow][ig+i];
5175  rmass[i] = 1. / atom[ig+i].mass;
5176  fixed[i] = ( simParams->fixedAtomsOn && atom[ig+i].atomFixed );
5177  if ( fixed[i] ) rmass[i] = 0.;
5178  }
5179  int icnt = 0;
5180  // c-ji I'm only going to mollify water for now
5181  if ( atom[ig].rigidBondLength > 0 ) { // for water
5182  if ( hgs != 3 ) {
5183  NAMD_die("Hydrogen group error caught in mollyMollify(). It's a bug!\n");
5184  }
5185  if ( !(fixed[1] && fixed[2]) ) {
5186  ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
5187  }
5188  }
5189  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
5190  if ( atom[ig+i].rigidBondLength > 0 ) {
5191  if ( !(fixed[0] && fixed[i]) ) {
5192  ial[icnt] = 0; ibl[icnt] = i; ++icnt;
5193  }
5194  }
5195  }
5196 
5197  if ( icnt == 0 ) continue; // no constraints
5198  lambda = &(molly_lambda[numLambdas]);
5199  numLambdas += icnt;
5200  for ( i = 0; i < icnt; ++i ) {
5201  refab[i] = ref[ial[i]] - ref[ibl[i]];
5202  }
5203  avg = &(p_avg[ig]);
5204  mollify(avg,ref,lambda,force,hgs,icnt,rmass,ial,ibl,refab);
5205  // store data back to patch
5206  for ( i = 0; i < hgs; ++i ) {
5207  wc += outer(force[i]-f[Results::slow][ig+i],ref[i]);
5208  f[Results::slow][ig+i] = force[i];
5209  }
5210  }
5211  // check that there isn't a constant needed here!
5212  *virial += wc;
5213  p_avg.resize(0);
5214 }
5215 
5217  checkpoint_atom.copy(atom);
5218  checkpoint_lattice = lattice;
5219 
5220  // DMK - Atom Separation (water vs. non-water)
5221  #if NAMD_SeparateWaters != 0
5222  checkpoint_numWaterAtoms = numWaterAtoms;
5223  #endif
5224 }
5225 
5226 void HomePatch::revert(void) {
5227  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5228 
5229  atom.copy(checkpoint_atom);
5230  numAtoms = atom.size();
5231  lattice = checkpoint_lattice;
5232 
5233  doAtomUpdate = true;
5234  rattleListValid = false;
5235  rattleListValid_SOA = false;
5236 
5237  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5238 
5240  if (simParams->SOAintegrateOn) {
5241  sort_solvent_atoms();
5242  copy_atoms_to_SOA();
5243 #if 0
5244  if (simParams->rigidBonds != RIGID_NONE) {
5246  rattleListValid_SOA = true;
5247  }
5248 #endif
5249  }
5250 
5251  // DMK - Atom Separation (water vs. non-water)
5252  #if NAMD_SeparateWaters != 0
5253  numWaterAtoms = checkpoint_numWaterAtoms;
5254  #endif
5255 }
5256 
5257 void HomePatch::exchangeCheckpoint(int scriptTask, int &bpc) { // initiating replica
5259  checkpoint_task = scriptTask;
5260  const int remote = simParams->scriptIntArg1;
5261  const char *key = simParams->scriptStringArg1;
5262  PatchMgr::Object()->sendCheckpointReq(patchID, remote, key, scriptTask);
5263 }
5264 
5265 void HomePatch::recvCheckpointReq(int task, const char *key, int replica, int pe) { // responding replica
5266  if ( task == SCRIPT_CHECKPOINT_FREE ) {
5267  if ( ! checkpoints.count(key) ) {
5268  NAMD_die("Unable to free checkpoint, requested key was never stored.");
5269  }
5270  delete checkpoints[key];
5271  checkpoints.erase(key);
5272  PatchMgr::Object()->sendCheckpointAck(patchID, replica, pe);
5273  return;
5274  }
5275  CheckpointAtomsMsg *msg;
5276  if ( task == SCRIPT_CHECKPOINT_LOAD || task == SCRIPT_CHECKPOINT_SWAP ) {
5277  if ( ! checkpoints.count(key) ) {
5278  NAMD_die("Unable to load checkpoint, requested key was never stored.");
5279  }
5280  checkpoint_t &cp = *checkpoints[key];
5281  msg = new (cp.numAtoms,1,0) CheckpointAtomsMsg;
5282  msg->lattice = cp.lattice;
5284  msg->numAtoms = cp.numAtoms;
5285  memcpy(msg->atoms,cp.atoms.begin(),cp.numAtoms*sizeof(FullAtom));
5286  } else {
5287  msg = new (0,1,0) CheckpointAtomsMsg;
5288  }
5289  msg->pid = patchID;
5290  msg->replica = CmiMyPartition();
5291  msg->pe = CkMyPe();
5292  PatchMgr::Object()->sendCheckpointLoad(msg, replica, pe);
5293 }
5294 
5295 void HomePatch::recvCheckpointLoad(CheckpointAtomsMsg *msg) { // initiating replica
5297  const int remote = simParams->scriptIntArg1;
5298  const char *key = simParams->scriptStringArg1;
5300  NAMD_bug("HomePatch::recvCheckpointLoad called during checkpointFree.");
5301  }
5302  if ( msg->replica != remote ) {
5303  NAMD_bug("HomePatch::recvCheckpointLoad message from wrong replica.");
5304  }
5306  CheckpointAtomsMsg *newmsg = new (numAtoms,1+strlen(key),0) CheckpointAtomsMsg;
5307  strcpy(newmsg->key,key);
5308  newmsg->lattice = lattice;
5309  newmsg->berendsenPressure_count = sequencer->berendsenPressure_count;
5310  newmsg->pid = patchID;
5311  newmsg->pe = CkMyPe();
5312  newmsg->replica = CmiMyPartition();
5313  newmsg->numAtoms = numAtoms;
5314  memcpy(newmsg->atoms,atom.begin(),numAtoms*sizeof(FullAtom));
5315  PatchMgr::Object()->sendCheckpointStore(newmsg, remote, msg->pe);
5316  }
5318  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5319  lattice = msg->lattice;
5321  numAtoms = msg->numAtoms;
5322  atom.resize(numAtoms);
5323  memcpy(atom.begin(),msg->atoms,numAtoms*sizeof(FullAtom));
5324  doAtomUpdate = true;
5325  rattleListValid = false;
5326  rattleListValid_SOA = false;
5327  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5328  if (simParams->SOAintegrateOn) {
5329  sort_solvent_atoms();
5330  copy_atoms_to_SOA();
5331 #if 0
5332  if (simParams->rigidBonds != RIGID_NONE) {
5334  rattleListValid_SOA = true;
5335  }
5336 #endif
5337  }
5338  }
5341  }
5342  delete msg;
5343 }
5344 
5345 void HomePatch::recvCheckpointStore(CheckpointAtomsMsg *msg) { // responding replica
5346  if ( ! checkpoints.count(msg->key) ) {
5347  checkpoints[msg->key] = new checkpoint_t;
5348  }
5349  checkpoint_t &cp = *checkpoints[msg->key];
5350  cp.lattice = msg->lattice;
5352  cp.numAtoms = msg->numAtoms;
5353  cp.atoms.resize(cp.numAtoms);
5354  memcpy(cp.atoms.begin(),msg->atoms,cp.numAtoms*sizeof(FullAtom));
5356  delete msg;
5357 }
5358 
5359 void HomePatch::recvCheckpointAck() { // initiating replica
5360  CkpvAccess(_qd)->process();
5361 }
5362 
5363 
5364 void HomePatch::exchangeAtoms(int scriptTask) {
5366  // CkPrintf("exchangeAtoms %d %d %d %d\n", CmiMyPartition(), scriptTask, (int)(simParams->scriptArg1), (int)(simParams->scriptArg2));
5367  if ( scriptTask == SCRIPT_ATOMSEND || scriptTask == SCRIPT_ATOMSENDRECV ) {
5368  exchange_dst = (int) simParams->scriptArg1;
5369  // create and save outgoing message
5374  memcpy(exchange_msg->atoms,atom.begin(),numAtoms*sizeof(FullAtom));
5375  if ( exchange_req >= 0 ) {
5377  }
5378  }
5379  if ( scriptTask == SCRIPT_ATOMRECV || scriptTask == SCRIPT_ATOMSENDRECV ) {
5380  exchange_src = (int) simParams->scriptArg2;
5382  }
5383 }
5384 
5386  exchange_req = req;
5387  if ( exchange_msg ) {
5388  // CkPrintf("recvExchangeReq %d %d\n", CmiMyPartition(), exchange_dst);
5390  exchange_msg = 0;
5391  exchange_req = -1;
5392  CkpvAccess(_qd)->process();
5393  }
5394 }
5395 
5397  // CkPrintf("recvExchangeMsg %d %d\n", CmiMyPartition(), exchange_src);
5398  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5399  lattice = msg->lattice;
5400  numAtoms = msg->numAtoms;
5401  atom.resize(numAtoms);
5402  memcpy(atom.begin(),msg->atoms,numAtoms*sizeof(FullAtom));
5403  delete msg;
5404  CkpvAccess(_qd)->process();
5405  doAtomUpdate = true;
5406  rattleListValid = false;
5407  rattleListValid_SOA = false;
5408  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5410  if (simParams->SOAintegrateOn) {
5411  sort_solvent_atoms();
5412  copy_atoms_to_SOA();
5413 #if 0
5414  if (simParams->rigidBonds != RIGID_NONE) {
5416  rattleListValid_SOA = true;
5417  }
5418 #endif
5419  }
5420 }
5421 
5422 void HomePatch::submitLoadStats(int timestep)
5423 {
5425 }
5426 
5427 
5428 //
5429 // XXX operates on CompAtom, not FullAtom
5430 //
5431 // XXX TODO: This operation could be moved to the gpu (?)
5433 {
5434 #if 0
5435 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
5436  char dpcbuf[32];
5437  sprintf(dpcbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::DO_PAIRLIST_CHECK], this->getPatchID());
5438  NAMD_EVENT_START_EX(1, NamdProfileEvent::DO_PAIRLIST_CHECK, dpcbuf);
5439 #endif
5440 #endif
5441 
5443 
5444  if ( numAtoms == 0 || ! flags.usePairlists ) {
5445  flags.pairlistTolerance = 0.;
5446  flags.maxAtomMovement = 99999.;
5447 #if 0
5448  NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5449 #endif
5450  return;
5451  }
5452 
5453  int i; int n = numAtoms;
5454  CompAtom *p_i = p.begin();
5455 
5456  if ( flags.savePairlists ) {
5457  flags.pairlistTolerance = doPairlistCheck_newTolerance;
5458  flags.maxAtomMovement = 0.;
5459  doPairlistCheck_newTolerance *= (1. - simParams->pairlistShrink);
5460  doPairlistCheck_lattice = lattice;
5461  doPairlistCheck_positions.resize(numAtoms);
5462  CompAtom *psave_i = doPairlistCheck_positions.begin();
5463  for ( i=0; i<n; ++i ) { psave_i[i] = p_i[i]; }
5464 #if 0
5465  NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5466 #endif
5467  return;
5468  }
5469 
5470  Lattice &lattice_old = doPairlistCheck_lattice;
5471  Position center_cur = lattice.unscale(center);
5472  Position center_old = lattice_old.unscale(center);
5473  Vector center_delta = center_cur - center_old;
5474 
5475  // find max deviation to corner (any neighbor shares a corner)
5476  BigReal max_cd = 0.;
5477  for ( i=0; i<2; ++i ) {
5478  for ( int j=0; j<2; ++j ) {
5479  for ( int k=0; k<2; ++k ) {
5480  ScaledPosition corner( i ? min.x : max.x , j ? min.y : max.y , k ? min.z : max.z );
5481  Vector corner_delta = lattice.unscale(corner) - lattice_old.unscale(corner);
5482  corner_delta -= center_delta;
5483  BigReal cd = corner_delta.length2();
5484  if ( cd > max_cd ) max_cd = cd;
5485  }
5486  }
5487  }
5488  max_cd = sqrt(max_cd);
5489 
5490  // find max deviation of atoms relative to center
5491  BigReal max_pd = 0.;
5492  CompAtom *p_old_i = doPairlistCheck_positions.begin();
5493  for ( i=0; i<n; ++i ) {
5494  // JM: Calculating position difference and making it patch-centered
5495  Vector p_delta = p_i[i].position - p_old_i[i].position;
5496  p_delta -= center_delta;
5497  BigReal pd = p_delta.length2();
5498  if ( pd > max_pd ) max_pd = pd;
5499  }
5500  max_pd = sqrt(max_pd);
5501 
5502  BigReal max_tol = max_pd + max_cd;
5503 
5504  flags.maxAtomMovement = max_tol;
5505 
5506  // if ( max_tol > flags.pairlistTolerance ) iout << "tolerance " << max_tol << " > " << flags.pairlistTolerance << "\n" << endi;
5507 
5508  if ( max_tol > ( (1. - simParams->pairlistTrigger) *
5509  doPairlistCheck_newTolerance ) ) {
5510  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: Increasing pairList tolerance(%lf %lf)\n",
5511  // max_tol, doPairlistCheck_newTolerance);
5512  doPairlistCheck_newTolerance *= (1. + simParams->pairlistGrow);
5513  }
5514 
5515  if ( max_tol > doPairlistCheck_newTolerance ) {
5516  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: Decreasing pairList tolerance(%lf %lf)\n",
5517  // max_tol, doPairlistCheck_newTolerance);
5518  doPairlistCheck_newTolerance = max_tol / (1. - simParams->pairlistTrigger);
5519  }
5520 
5521  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: New patchTolerance: %lf\n", doPairlistCheck_newTolerance);
5522 
5523 // NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5524 }
5525 
5526 
5528 //
5529 // begin SOA
5530 //
5532 {
5533  if ( ! flags.doNonbonded ) return;
5534 
5536  BigReal hgcut = 0.5 * simParams->hgroupCutoff; hgcut *= hgcut;
5537  BigReal maxrad2 = 0.;
5538 
5539  double * __restrict pos_x = patchDataSOA.pos_x;
5540  double * __restrict pos_y = patchDataSOA.pos_y;
5541  double * __restrict pos_z = patchDataSOA.pos_z;
5542  int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
5543  int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
5544 
5545  int j=0;
5546  while (j < numAtoms) {
5547  const int hgs = hydrogenGroupSize[j];
5548  if ( ! hgs ) break; // avoid infinite loop on bug
5549  int ngs = hgs;
5550  if ( ngs > 5 ) ngs = 5; // XXX why? limit to at most 5 atoms per group
5551  BigReal x = pos_x[j];
5552  BigReal y = pos_y[j];
5553  BigReal z = pos_z[j];
5554  int i;
5555  for ( i = 1; i < ngs; ++i ) { // limit spatial extent
5556  nonbondedGroupSize[j+i] = 0;
5557  BigReal dx = pos_x[j+i] - x;
5558  BigReal dy = pos_y[j+i] - y;
5559  BigReal dz = pos_z[j+i] - z;
5560  BigReal r2 = dx * dx + dy * dy + dz * dz;
5561  if ( r2 > hgcut ) break;
5562  else if ( r2 > maxrad2 ) maxrad2 = r2;
5563  }
5564  nonbondedGroupSize[j] = i;
5565  for ( ; i < hgs; ++i ) {
5566  nonbondedGroupSize[j+i] = 1;
5567  }
5568  j += hgs;
5569  }
5570 
5571  if (j != numAtoms) {
5572  NAMD_bug("hydrogenGroupSize is zero in HomePatch::doGroupSizeCheck");
5573  }
5574 
5575  flags.maxGroupRadius = sqrt(maxrad2);
5576 
5577 }
5578 //
5579 // end SOA
5580 //
5582 
5583 
5585 {
5586  if ( ! flags.doNonbonded ) return;
5587 
5589  BigReal hgcut = 0.5 * simParams->hgroupCutoff; hgcut *= hgcut;
5590  BigReal maxrad2 = 0.;
5591 
5592  FullAtomList::iterator p_i = atom.begin();
5593  FullAtomList::iterator p_e = atom.end();
5594 
5595  while ( p_i != p_e ) {
5596  const int hgs = p_i->hydrogenGroupSize;
5597  if ( ! hgs ) break; // avoid infinite loop on bug
5598  int ngs = hgs;
5599  if ( ngs > 5 ) ngs = 5; // XXX why? limit to at most 5 atoms per group
5600  BigReal x = p_i->position.x;
5601  BigReal y = p_i->position.y;
5602  BigReal z = p_i->position.z;
5603  int i;
5604  for ( i = 1; i < ngs; ++i ) { // limit spatial extent
5605  p_i[i].nonbondedGroupSize = 0;
5606  BigReal dx = p_i[i].position.x - x;
5607  BigReal dy = p_i[i].position.y - y;
5608  BigReal dz = p_i[i].position.z - z;
5609  BigReal r2 = dx * dx + dy * dy + dz * dz;
5610  if ( r2 > hgcut ) break;
5611  else if ( r2 > maxrad2 ) maxrad2 = r2;
5612  }
5613  p_i->nonbondedGroupSize = i;
5614  for ( ; i < hgs; ++i ) {
5615  p_i[i].nonbondedGroupSize = 1;
5616  }
5617  p_i += hgs;
5618  }
5619 
5620  if ( p_i != p_e ) {
5621  NAMD_bug("hydrogenGroupSize is zero in HomePatch::doGroupSizeCheck");
5622  }
5623 
5624  flags.maxGroupRadius = sqrt(maxrad2);
5625 
5626 }
5627 
5628 
5630 //
5631 // begin SOA
5632 //
5634 {
5636 
5637  BigReal sysdima = lattice.a_r().unit() * lattice.a();
5638  BigReal sysdimb = lattice.b_r().unit() * lattice.b();
5639  BigReal sysdimc = lattice.c_r().unit() * lattice.c();
5640 
5641  BigReal minSize = simParams->patchDimension - simParams->margin;
5642 
5643  if ( ( aAwayDist*sysdima < minSize*0.9999 ) ||
5644  ( bAwayDist*sysdimb < minSize*0.9999 ) ||
5645  ( cAwayDist*sysdimc < minSize*0.9999 ) ) {
5646 
5647  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5648  "Possible solutions are to restart from a recent checkpoint,\n"
5649  "increase margin, or disable useFlexibleCell for liquid simulation.");
5650  }
5651 
5652  BigReal cutoff = simParams->cutoff;
5653 
5654  BigReal margina = 0.5 * ( aAwayDist - cutoff / sysdima );
5655  BigReal marginb = 0.5 * ( bAwayDist - cutoff / sysdimb );
5656  BigReal marginc = 0.5 * ( cAwayDist - cutoff / sysdimc );
5657 
5658  if ( (margina < -0.0001) || (marginb < -0.0001) || (marginc < -0.0001) ) {
5659  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5660  "There are probably many margin violations already reported.\n"
5661  "Possible solutions are to restart from a recent checkpoint,\n"
5662  "increase margin, or disable useFlexibleCell for liquid simulation.");
5663  }
5664 
5665  BigReal minx = min.x - margina;
5666  BigReal miny = min.y - marginb;
5667  BigReal minz = min.z - marginc;
5668  BigReal maxx = max.x + margina;
5669  BigReal maxy = max.y + marginb;
5670  BigReal maxz = max.z + marginc;
5671 
5672  int xdev, ydev, zdev;
5673  int problemCount = 0;
5674 
5675  double * __restrict pos_x = patchDataSOA.pos_x;
5676  double * __restrict pos_y = patchDataSOA.pos_y;
5677  double * __restrict pos_z = patchDataSOA.pos_z;
5678  for (int i=0; i < numAtoms; i++) {
5679  Vector pos(pos_x[i],pos_y[i],pos_z[i]);
5680 
5681  ScaledPosition s = lattice.scale(pos);
5682 
5683  // check if atom is within bounds
5684  if (s.x < minx) xdev = 0;
5685  else if (maxx <= s.x) xdev = 2;
5686  else xdev = 1;
5687 
5688  if (s.y < miny) ydev = 0;
5689  else if (maxy <= s.y) ydev = 2;
5690  else ydev = 1;
5691 
5692  if (s.z < minz) zdev = 0;
5693  else if (maxz <= s.z) zdev = 2;
5694  else zdev = 1;
5695 
5696  if (mInfo[xdev][ydev][zdev]) { // somewhere else to be
5697  ++problemCount;
5698  }
5699 
5700  }
5701 
5702  marginViolations = problemCount;
5703  // if ( problemCount ) {
5704  // iout << iERROR <<
5705  // "Found " << problemCount << " margin violations!\n" << endi;
5706  // }
5707 
5708 }
5709 //
5710 // end SOA
5711 //
5713 
5714 
5716 {
5718 
5719  BigReal sysdima = lattice.a_r().unit() * lattice.a();
5720  BigReal sysdimb = lattice.b_r().unit() * lattice.b();
5721  BigReal sysdimc = lattice.c_r().unit() * lattice.c();
5722 
5723  BigReal minSize = simParams->patchDimension - simParams->margin;
5724 
5725  if ( ( aAwayDist*sysdima < minSize*0.9999 ) ||
5726  ( bAwayDist*sysdimb < minSize*0.9999 ) ||
5727  ( cAwayDist*sysdimc < minSize*0.9999 ) ) {
5728 
5729  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5730  "Possible solutions are to restart from a recent checkpoint,\n"
5731  "increase margin, or disable useFlexibleCell for liquid simulation.");
5732  }
5733 
5734  BigReal cutoff = simParams->cutoff;
5735 
5736  BigReal margina = 0.5 * ( aAwayDist - cutoff / sysdima );
5737  BigReal marginb = 0.5 * ( bAwayDist - cutoff / sysdimb );
5738  BigReal marginc = 0.5 * ( cAwayDist - cutoff / sysdimc );
5739 
5740  if ( (margina < -0.0001) || (marginb < -0.0001) || (marginc < -0.0001) ) {
5741  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5742  "There are probably many margin violations already reported.\n"
5743  "Possible solutions are to restart from a recent checkpoint,\n"
5744  "increase margin, or disable useFlexibleCell for liquid simulation.");
5745  }
5746 
5747  BigReal minx = min.x - margina;
5748  BigReal miny = min.y - marginb;
5749  BigReal minz = min.z - marginc;
5750  BigReal maxx = max.x + margina;
5751  BigReal maxy = max.y + marginb;
5752  BigReal maxz = max.z + marginc;
5753 
5754  int xdev, ydev, zdev;
5755  int problemCount = 0;
5756 
5757  FullAtomList::iterator p_i = atom.begin();
5758  FullAtomList::iterator p_e = atom.end();
5759  for ( ; p_i != p_e; ++p_i ) {
5760 
5762 
5763  // check if atom is within bounds
5764  if (s.x < minx) xdev = 0;
5765  else if (maxx <= s.x) xdev = 2;
5766  else xdev = 1;
5767 
5768  if (s.y < miny) ydev = 0;
5769  else if (maxy <= s.y) ydev = 2;
5770  else ydev = 1;
5771 
5772  if (s.z < minz) zdev = 0;
5773  else if (maxz <= s.z) zdev = 2;
5774  else zdev = 1;
5775 
5776  if (mInfo[xdev][ydev][zdev]) { // somewhere else to be
5777  ++problemCount;
5778  }
5779 
5780  }
5781 
5782  marginViolations = problemCount;
5783  // if ( problemCount ) {
5784  // iout << iERROR <<
5785  // "Found " << problemCount << " margin violations!\n" << endi;
5786  // }
5787 
5788 }
5789 
5790 
5791 void
5793 {
5794 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
5795  char ambuf[32];
5796  sprintf(ambuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::ATOM_MIGRATIONS], this->getPatchID());
5797  NAMD_EVENT_START_EX(1, NamdProfileEvent::ATOM_MIGRATIONS, ambuf);
5798 #endif
5799 
5800  // every patch needs to call this once per migration step
5801  // XXX TODO: check if the cpu version also calls it once per tstep
5802  int i;
5803  for (i=0; i<numNeighbors; i++) {
5804  realInfo[i].mList.resize(0);
5805  }
5806 
5807  // Purge the AtomMap
5808  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5809 
5810  // Determine atoms that need to migrate
5811 
5812  BigReal minx = min.x;
5813  BigReal miny = min.y;
5814  BigReal minz = min.z;
5815  BigReal maxx = max.x;
5816  BigReal maxy = max.y;
5817  BigReal maxz = max.z;
5818 
5819  int xdev, ydev, zdev;
5820  int delnum = 0;
5821 
5822  FullAtomList::iterator atom_i = atom.begin();
5823  FullAtomList::iterator atom_e = atom.end();
5824 
5825  // DMK - Atom Separation (water vs. non-water)
5826  #if NAMD_SeparateWaters != 0
5827  FullAtomList::iterator atom_first = atom_i;
5828  int numLostWaterAtoms = 0;
5829  #endif
5830 
5831  while ( atom_i != atom_e ) {
5832  // Even though this code iterates through all atoms successively
5833  // it moves entire hydrogen/migration groups as follows:
5834  // Only the parent atom of the hydrogen/migration group has
5835  // nonzero migrationGroupSize. Values determined for xdev,ydev,zdev
5836  // will persist through the remaining group members so that each
5837  // following atom will again be added to the same mList.
5838  if ( atom_i->migrationGroupSize ) {
5839  Position pos = atom_i->position;
5840  if ( atom_i->migrationGroupSize != atom_i->hydrogenGroupSize ) {
5841  // If there are multiple hydrogen groups in a migration group
5842  // (e.g. for supporting lone pairs)
5843  // the following code takes the average position (midpoint)
5844  // of their parents.
5845  int mgs = atom_i->migrationGroupSize;
5846  int c = 1;
5847  for ( int j=atom_i->hydrogenGroupSize; j<mgs;
5848  j+=(atom_i+j)->hydrogenGroupSize ) {
5849  pos += (atom_i+j)->position;
5850  ++c;
5851  }
5852  pos *= 1./c;
5853  // iout << "mgroup " << atom_i->id << " at " << pos << "\n" << endi;
5854  }
5855 
5856  // Scaling the position below transforms space within patch from
5857  // what could have been a rotated parallelepiped into
5858  // orthogonal coordinates, where we can use minmax comparison
5859  // to detect which of our nearest neighbors this
5860  // parent atom might have entered.
5861  ScaledPosition s = lattice.scale(pos);
5862 
5863  // check if atom is within bounds
5864  if (s.x < minx) xdev = 0;
5865  else if (maxx <= s.x) xdev = 2;
5866  else xdev = 1;
5867 
5868  if (s.y < miny) ydev = 0;
5869  else if (maxy <= s.y) ydev = 2;
5870  else ydev = 1;
5871 
5872  if (s.z < minz) zdev = 0;
5873  else if (maxz <= s.z) zdev = 2;
5874  else zdev = 1;
5875 
5876  }
5877 
5878  if (mInfo[xdev][ydev][zdev]) { // process atom for migration
5879  // Don't migrate if destination is myself
5880 
5881  // See if we have a migration list already
5882  MigrationList &mCur = mInfo[xdev][ydev][zdev]->mList;
5883  DebugM(3,"Migrating atom " << atom_i->id << " from patch "
5884  << patchID << " with position " << atom_i->position << "\n");
5885  mCur.add(*atom_i);
5886  ++delnum;
5887 
5888 
5889  // DMK - Atom Separation (water vs. non-water)
5890  #if NAMD_SeparateWaters != 0
5891  // Check to see if this atom is part of a water molecule. If
5892  // so, numWaterAtoms needs to be adjusted to refect the lost of
5893  // this atom.
5894  // NOTE: The atom separation code assumes that if the oxygen
5895  // atom of the hydrogen group making up the water molecule is
5896  // migrated to another HomePatch, the hydrogens will also
5897  // move!!!
5898  int atomIndex = atom_i - atom_first;
5899  if (atomIndex < numWaterAtoms)
5900  numLostWaterAtoms++;
5901  #endif
5902 
5903 
5904  } else {
5905  // By keeping track of delnum total being deleted from FullAtomList
5906  // the else clause allows us to fill holes as we visit each atom.
5907 
5908  if ( delnum ) { *(atom_i-delnum) = *atom_i; }
5909 
5910  }
5911 
5912  ++atom_i;
5913  }
5914 
5915  // DMK - Atom Separation (water vs. non-water)
5916  #if NAMD_SeparateWaters != 0
5917  numWaterAtoms -= numLostWaterAtoms;
<