NAMD
HomePatch.C
Go to the documentation of this file.
1 
8 /*
9  HomePatch owns the actual atoms of a Patch of space
10  Proxy(s) get messages via ProxyMgr from HomePatch(es)
11  to update lists of atoms and their coordinates
12  HomePatch(es) also have a Sequencer bound to them
13 
14  superclass: Patch
15 */
16 
17 #include "NamdTypes.h"
18 #include "common.h"
19 #include "time.h"
20 #include <math.h>
21 #include "charm++.h"
22 #include "qd.h"
23 
24 #include "SimParameters.h"
25 #include "HomePatch.h"
26 #include "AtomMap.h"
27 #include "Node.h"
28 #include "PatchMap.inl"
29 #include "main.h"
30 #include "ProxyMgr.decl.h"
31 #include "ProxyMgr.h"
32 #include "Migration.h"
33 #include "Molecule.h"
34 #include "PatchMgr.h"
35 #include "Sequencer.h"
36 #include "Broadcasts.h"
37 #include "LdbCoordinator.h"
38 #include "ReductionMgr.h"
39 #include "Sync.h"
40 #include "Random.h"
41 #include "Priorities.h"
42 #include "ComputeNonbondedUtil.h"
43 #include "ComputeGBIS.inl"
44 #include "Priorities.h"
45 #include "SortAtoms.h"
46 #include "MigrationCUDAKernel.h"
47 
48 #include "ComputeQM.h"
49 #include "ComputeQMMgr.decl.h"
50 
51 #include "NamdEventsProfiling.h"
52 
53 //#define PRINT_COMP
54 #define TINY 1.0e-20;
55 #define MAXHGS 10
56 #define MIN_DEBUG_LEVEL 2
57 //#define DEBUGM
58 //#define NL_DEBUG
59 #include "Debug.h"
60 
61 #include <vector>
62 #include <algorithm>
63 using namespace std;
64 
65 typedef int HGArrayInt[MAXHGS];
70 
71 int average(CompAtom *qtilde,const HGArrayVector &q,BigReal *lambda,const int n,const int m, const HGArrayBigReal &imass, const HGArrayBigReal &length2, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab, const BigReal tolf, const int ntrial);
72 
73 void mollify(CompAtom *qtilde,const HGArrayVector &q0,const BigReal *lambda, HGArrayVector &force,const int n, const int m, const HGArrayBigReal &imass,const HGArrayInt &ial,const HGArrayInt &ibl,const HGArrayVector &refab);
74 
75 void MSHAKE_CUDA(int*, const int size, const RattleParam* rattleParam,
76  BigReal *refx, BigReal *refy, BigReal *refz,
77  BigReal *posx, BigReal *posy, BigReal *posz,
78  const BigReal tol2, const int maxiter,
79  bool& done, bool& consFailure);
80 #define MASS_EPSILON (1.0e-35) //a very small floating point number
81 
82 
83 // DMK - Atom Separation (water vs. non-water)
84 #if NAMD_SeparateWaters != 0
85 
86 // Macro to test if a hydrogen group represents a water molecule.
87 // NOTE: This test is the same test in Molecule.C for setting the
88 // OxygenAtom flag in status.
89 // hgtype should be the number of atoms in a water hydrogen group
90 // It must now be set based on simulation parameters because we might
91 // be using tip4p
92 
93 // DJH: This will give false positive for full Drude model,
94 // e.g. O D H is not water but has hgs==3
95 #define IS_HYDROGEN_GROUP_WATER(hgs, mass) \
96  ((hgs >= 3) && ((mass >= 14.0) && (mass <= 18.0)))
97 
98 #endif
99 
100 #ifdef TIMER_COLLECTION
101 const char *TimerSet::tlabel[TimerSet::NUMTIMERS] = {
102  "kick",
103  "maxmove",
104  "drift",
105  "piston",
106  "submithalf",
107  "velbbk1",
108  "velbbk2",
109  "rattle1",
110  "submitfull",
111  "submitcollect",
112 };
113 #endif
114 
115 HomePatch::HomePatch(PatchID pd, FullAtomList &al) : Patch(pd)
116 // DMK - Atom Separation (water vs. non-water)
117 #if NAMD_SeparateWaters != 0
118  ,tempAtom()
119 #endif
120 {
121  atom.swap(al);
122  settle_initialized = 0;
123 
124  doAtomUpdate = true;
125  rattleListValid = false;
126  rattleListValid_SOA = false;
127 
128  exchange_msg = 0;
129  exchange_req = -1;
130 
131  numGBISP1Arrived = 0;
132  numGBISP2Arrived = 0;
133  numGBISP3Arrived = 0;
134  phase1BoxClosedCalled = false;
135  phase2BoxClosedCalled = false;
136  phase3BoxClosedCalled = false;
137 
138  min.x = PatchMap::Object()->min_a(patchID);
139  min.y = PatchMap::Object()->min_b(patchID);
140  min.z = PatchMap::Object()->min_c(patchID);
141  max.x = PatchMap::Object()->max_a(patchID);
142  max.y = PatchMap::Object()->max_b(patchID);
143  max.z = PatchMap::Object()->max_c(patchID);
144  center = 0.5*(min+max);
145 
146  int aAway = PatchMap::Object()->numaway_a();
147  if ( PatchMap::Object()->periodic_a() ||
148  PatchMap::Object()->gridsize_a() > aAway + 1 ) {
149  aAwayDist = (max.x - min.x) * aAway;
150  } else {
151  aAwayDist = Node::Object()->simParameters->patchDimension;
152  }
153  int bAway = PatchMap::Object()->numaway_b();
154  if ( PatchMap::Object()->periodic_b() ||
155  PatchMap::Object()->gridsize_b() > bAway + 1 ) {
156  bAwayDist = (max.y - min.y) * bAway;
157  } else {
158  bAwayDist = Node::Object()->simParameters->patchDimension;
159  }
160  int cAway = PatchMap::Object()->numaway_c();
161  if ( PatchMap::Object()->periodic_c() ||
162  PatchMap::Object()->gridsize_c() > cAway + 1 ) {
163  cAwayDist = (max.z - min.z) * cAway;
164  } else {
165  cAwayDist = Node::Object()->simParameters->patchDimension;
166  }
167 
168  migrationSuspended = false;
169  allMigrationIn = false;
170  marginViolations = 0;
171  patchMapRead = 0; // We delay read of PatchMap data
172  // to make sure it is really valid
173  inMigration = false;
174  numMlBuf = 0;
175  flags.sequence = -1;
176  flags.maxForceUsed = -1;
177 
178  numAtoms = atom.size();
179  replacementForces = 0;
180 
182  doPairlistCheck_newTolerance =
183  0.5 * ( simParams->pairlistDist - simParams->cutoff );
184 
185 
186  numFixedAtoms = 0;
187  if ( simParams->fixedAtomsOn ) {
188  for ( int i = 0; i < numAtoms; ++i ) {
189  numFixedAtoms += ( atom[i].atomFixed ? 1 : 0 );
190  }
191  }
192 
193  #if 0
194  cudaAtomList = NULL;
195  sizeCudaAtomList = 0;
196  #endif
197 
198 #ifdef NODEAWARE_PROXY_SPANNINGTREE
199  ptnTree.resize(0);
200  /*children = NULL;
201  numChild = 0;*/
202 #else
203  child = new int[proxySpanDim];
204  nChild = 0; // number of proxy spanning tree children
205 #endif
206 
207 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
208  nphs = 0;
209  localphs = NULL;
210  isProxyChanged = 0;
211 #endif
212 
213  if (simParams->SOAintegrateOn) {
214  PatchDataSOA_initialize( &patchDataSOA );
215  sort_solvent_atoms();
216  copy_atoms_to_SOA();
217 #if 0
218  if (simParams->rigidBonds != RIGID_NONE) {
219  buildRattleList_SOA();
220  rattleListValid_SOA = true;
221  }
222 #endif
223  }
224 
225  // DMK - Atom Separation (water vs. non-water)
226  #if NAMD_SeparateWaters != 0
227 
228  // Create the scratch memory for separating atoms
229  tempAtom.resize(numAtoms);
230  numWaterAtoms = -1;
231 
232  // Separate the current list of atoms
233  separateAtoms();
234 
235  #endif
236  // Handle unusual water models here
237  if (simParams->watmodel == WaterModel::TIP4) init_tip4();
238  else if (simParams->watmodel == WaterModel::SWM4) init_swm4();
239  gridForceIdxChecked=false;
240 
241  isNewProxyAdded = 0;
242 }
243 
244 void HomePatch::write_tip4_props() {
245  printf("Writing r_om and r_ohc: %f | %f\n", r_om, r_ohc);
246 }
247 
248 void HomePatch::init_tip4() {
249  // initialize the distances needed for the tip4p water model
250  Molecule *mol = Node::Object()->molecule;
251  r_om = mol->r_om;
252  r_ohc = mol->r_ohc;
253 }
254 
255 
256 void ::HomePatch::init_swm4() {
257  // initialize the distances needed for the SWM4 water model
258  Molecule *mol = Node::Object()->molecule;
259  r_om = mol->r_om;
260  r_ohc = mol->r_ohc;
261 }
262 
263 
264 void HomePatch::reinitAtoms(FullAtomList &al) {
265  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
266 
267  atom.swap(al);
268  numAtoms = atom.size();
269 
270  doAtomUpdate = true;
271  rattleListValid = false;
272  rattleListValid_SOA = false;
273 
274  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
275 
277  if (simParams->SOAintegrateOn) {
278  sort_solvent_atoms();
279  copy_atoms_to_SOA();
280 #if 0
281  if (simParams->rigidBonds != RIGID_NONE) {
283  rattleListValid_SOA = true;
284  }
285 #endif
286  }
287 
288  // DMK - Atom Separation (water vs. non-water)
289  #if NAMD_SeparateWaters != 0
290 
291  // Reset the numWaterAtoms value
292  numWaterAtoms = -1;
293 
294  // Separate the atoms
295  separateAtoms();
296 
297  #endif
298 }
299 
300 // Bind a Sequencer to this HomePatch
302 { sequencer=sequencerPtr; }
303 
304 // start simulation over this Patch of atoms
306 { sequencer->run(); }
307 
308 void HomePatch::readPatchMap() {
309  // iout << "Patch " << patchID << " has " << proxy.size() << " proxies.\n" << endi;
311  PatchID nnPatchID[PatchMap::MaxOneAway];
312 
313  patchMigrationCounter = numNeighbors
314  = PatchMap::Object()->oneAwayNeighbors(patchID, nnPatchID);
315  DebugM( 1, "NumNeighbors for pid " <<patchID<<" is "<< numNeighbors << "\n");
316  int n;
317  for (n=0; n<numNeighbors; n++) {
318  realInfo[n].destNodeID = p->node(realInfo[n].destPatchID = nnPatchID[n]);
319  DebugM( 1, " nnPatchID=" <<nnPatchID[n]<<" nnNodeID="<< realInfo[n].destNodeID<< "\n");
320  realInfo[n].mList.resize(0);
321  }
322 
323  // Make mapping from the 3x3x3 cube of pointers to real migration info
324  for (int i=0; i<3; i++)
325  for (int j=0; j<3; j++)
326  for (int k=0; k<3; k++)
327  {
328  int pid = p->pid(p->index_a(patchID)+i-1,
329  p->index_b(patchID)+j-1, p->index_c(patchID)+k-1);
330  if (pid < 0) {
331  DebugM(5, "ERROR, for patchID " << patchID <<" I got neigh pid = " << pid << "\n");
332  }
333  if (pid == patchID && ! (
334  ( (i-1) && p->periodic_a() ) ||
335  ( (j-1) && p->periodic_b() ) ||
336  ( (k-1) && p->periodic_c() ) )) {
337  mInfo[i][j][k] = NULL;
338  }
339  else {
340  // Does not work as expected for periodic with only two patches.
341  // Also need to check which image we want, but OK for now. -JCP
342  for (n = 0; n<numNeighbors; n++) {
343  if (pid == realInfo[n].destPatchID) {
344  mInfo[i][j][k] = &realInfo[n];
345  break;
346  }
347  }
348  if (n == numNeighbors) { // disaster!
349  DebugM(4,"BAD News, I could not find PID " << pid << "\n");
350  }
351  }
352  }
353 
354  DebugM(1,"Patch("<<patchID<<") # of neighbors = " << numNeighbors << "\n");
355 }
356 
358 {
359  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
360 #ifdef NODEAWARE_PROXY_SPANNINGTREE
361  ptnTree.resize(0);
362  #ifdef USE_NODEPATCHMGR
363  delete [] nodeChildren;
364  #endif
365 #endif
366  delete [] child;
367 }
368 
369 
370 void HomePatch::boxClosed(int box) {
371  // begin gbis
372  if (box == 5) {// end of phase 1
373  phase1BoxClosedCalled = true;
374  if (!psiSumBox.isOpen() && numGBISP1Arrived == proxy.size()) {
375  if (flags.doGBIS && flags.doNonbonded) {
376  // fprintf(stderr, "Calling awaken() on patch %d: 1\n", this->patchID);
377  sequencer->awaken();
378  }
379  } else {
380  //need to wait until proxies arrive before awakening
381  }
382  } else if (box == 6) {// intRad
383  //do nothing
384  } else if (box == 7) {// bornRad
385  //do nothing
386  } else if (box == 8) {// end of phase 2
387  phase2BoxClosedCalled = true;
388  //if no proxies, AfterP1 can't be called from receive
389  //so it will be called from here
390  if (!dEdaSumBox.isOpen() && numGBISP2Arrived == proxy.size()) {
391  if (flags.doGBIS && flags.doNonbonded) {
392  // fprintf(stderr, "Calling awaken() on patch %d: 2\n", this->patchID);
393  sequencer->awaken();
394  }
395  } else {
396  //need to wait until proxies arrive before awakening
397  }
398  } else if (box == 9) {
399  //do nothing
400  } else if (box == 10) {
401  //lcpoType Box closed: do nothing
402  } else {
403  //do nothing
404  }
405  // end gbis
406 
407  if ( ! --boxesOpen ) {
408  if ( replacementForces ) {
409  for ( int i = 0; i < numAtoms; ++i ) {
410  if ( replacementForces[i].replace ) {
411  for ( int j = 0; j < Results::maxNumForces; ++j ) { f[j][i] = 0; }
412  f[Results::normal][i] = replacementForces[i].force;
413  }
414  }
415  replacementForces = 0;
416  }
417  DebugM(1,patchID << ": " << CthSelf() << " awakening sequencer "
418  << sequencer->thread << "(" << patchID << ") @" << CmiTimer() << "\n");
419  // only awaken suspended threads. Then say it is suspended.
420 
421  phase3BoxClosedCalled = true;
422  if (flags.doGBIS) {
423  if (flags.doNonbonded) {
424  sequencer->awaken();
425  } else {
426  if (numGBISP1Arrived == proxy.size() &&
427  numGBISP2Arrived == proxy.size() &&
428  numGBISP3Arrived == proxy.size()) {
429  sequencer->awaken();//all boxes closed and all proxies arrived
430  }
431  }
432  } else {//non-gbis awaken
434  if(!simParams->CUDASOAintegrate) {
435  sequencer->awaken();
436  }
437  }
438  } else {
439  DebugM(1,patchID << ": " << boxesOpen << " boxes left to close.\n");
440  }
441 }
442 
444  DebugM(4, "registerProxy("<<patchID<<") - adding node " <<msg->node<<"\n");
445  proxy.add(msg->node);
447 
448  isNewProxyAdded = 1;
449 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
450  isProxyChanged = 1;
451 #endif
452 
453  Random((patchID + 37) * 137).reorder(proxy.begin(),proxy.size());
454  delete msg;
455 }
456 
458 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
459  isProxyChanged = 1;
460 #endif
461  int n = msg->node;
462  NodeID *pe = proxy.begin();
463  for ( ; *pe != n; ++pe );
465  proxy.del(pe - proxy.begin());
466  delete msg;
467 }
468 
469 #if USE_TOPOMAP && USE_SPANNING_TREE
470 
471 int HomePatch::findSubroots(int dim, int* subroots, int psize, int* pidscopy){
472  int nChild = 0;
473  int cones[6][proxySpanDim*proxySpanDim+proxySpanDim];
474  int conesizes[6] = {0,0,0,0,0,0};
475  int conecounters[6] = {0,0,0,0,0,0};
476  int childcounter = 0;
477  nChild = (psize>proxySpanDim)?proxySpanDim:psize;
478  TopoManager tmgr;
479  for(int i=0;i<psize;i++){
480  int cone = tmgr.getConeNumberForRank(pidscopy[i]);
481  cones[cone][conesizes[cone]++] = pidscopy[i];
482  }
483 
484  while(childcounter<nChild){
485  for(int i=0;i<6;i++){
486  if(conecounters[i]<conesizes[i]){
487  subroots[childcounter++] = cones[i][conecounters[i]++];
488  }
489  }
490  }
491  for(int i=nChild;i<proxySpanDim;i++)
492  subroots[i] = -1;
493  return nChild;
494 }
495 #endif // USE_TOPOMAP
496 
497 static int compDistance(const void *a, const void *b)
498 {
499  int d1 = abs(*(int *)a - CkMyPe());
500  int d2 = abs(*(int *)b - CkMyPe());
501  if (d1 < d2)
502  return -1;
503  else if (d1 == d2)
504  return 0;
505  else
506  return 1;
507 }
508 
510 {
511 #if USE_NODEPATCHMGR
512  CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
513  NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();
514  npm->sendProxyList(patchID, proxy.begin(), proxy.size());
515 #else
516  ProxyMgr::Object()->sendProxies(patchID, proxy.begin(), proxy.size());
517 #endif
518 }
519 
520 #ifdef NODEAWARE_PROXY_SPANNINGTREE
521 void HomePatch::buildNodeAwareSpanningTree(void){
522 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
523  DebugFileTrace *dft = DebugFileTrace::Object();
524  dft->openTrace();
525  dft->writeTrace("HomePatch[%d] has %d proxy on proc[%d] node[%d]\n", patchID, proxy.size(), CkMyPe(), CkMyNode());
526  dft->writeTrace("Proxies are: ");
527  for(int i=0; i<proxy.size(); i++) dft->writeTrace("%d(%d), ", proxy[i], CkNodeOf(proxy[i]));
528  dft->writeTrace("\n");
529  dft->closeTrace();
530 #endif
531 
532  //build the naive spanning tree for this home patch
533  if(! proxy.size()) {
534  //this case will not happen in practice.
535  //In debugging state where spanning tree is enforced, then this could happen
536  //Chao Mei
537  return;
538  }
539  ProxyMgr::buildSinglePatchNodeAwareSpanningTree(patchID, proxy, ptnTree);
540  //optimize on the naive spanning tree
541 
542  //setup the children
543  setupChildrenFromProxySpanningTree();
544  //send down to children
546 }
547 
548 void HomePatch::setupChildrenFromProxySpanningTree(){
549 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
550  isProxyChanged = 1;
551 #endif
552  if(ptnTree.size()==0) {
553  nChild = 0;
554  delete [] child;
555  child = NULL;
556  #ifdef USE_NODEPATCHMGR
557  numNodeChild = 0;
558  delete [] nodeChildren;
559  nodeChildren = NULL;
560  #endif
561  return;
562  }
563  proxyTreeNode *rootnode = &ptnTree.item(0);
564  CmiAssert(rootnode->peIDs[0] == CkMyPe());
565  //set up children
566  //1. add external children (the first proc inside the proxy tree node)
567  //2. add internal children (with threshold that would enable spanning
568  int internalChild = rootnode->numPes-1;
569  int externalChild = ptnTree.size()-1;
570  externalChild = (proxySpanDim>externalChild)?externalChild:proxySpanDim;
571  int internalSlots = proxySpanDim-externalChild;
572  if(internalChild>0){
573  if(internalSlots==0) {
574  //at least having one internal child
575  internalChild = 1;
576  }else{
577  internalChild = (internalSlots>internalChild)?internalChild:internalSlots;
578  }
579  }
580 
581  nChild = externalChild+internalChild;
582  CmiAssert(nChild>0);
583 
584  //exclude the root node
585  delete [] child;
586  child = new int[nChild];
587 
588  for(int i=0; i<externalChild; i++) {
589  child[i] = ptnTree.item(i+1).peIDs[0];
590  }
591  for(int i=externalChild, j=1; i<nChild; i++, j++) {
592  child[i] = rootnode->peIDs[j];
593  }
594 
595 #ifdef USE_NODEPATCHMGR
596  //only register the cores that have proxy patches. The HomePach's core
597  //doesn't need to be registered.
598  CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
599  NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();
600  if(rootnode->numPes==1){
601  npm->registerPatch(patchID, 0, NULL);
602  }
603  else{
604  npm->registerPatch(patchID, rootnode->numPes-1, &rootnode->peIDs[1]);
605  }
606 
607  //set up childrens in terms of node ids
608  numNodeChild = externalChild;
609  if(internalChild) numNodeChild++;
610  delete [] nodeChildren;
611  nodeChildren = new int[numNodeChild];
612  for(int i=0; i<externalChild; i++) {
613  nodeChildren[i] = ptnTree.item(i+1).nodeID;
614  }
615  //the last entry always stores this node id if there are proxies
616  //on other cores of the same node for this patch.
617  if(internalChild)
618  nodeChildren[numNodeChild-1] = rootnode->nodeID;
619 #endif
620 
621 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
622  DebugFileTrace *dft = DebugFileTrace::Object();
623  dft->openTrace();
624  dft->writeTrace("HomePatch[%d] has %d children: ", patchID, nChild);
625  for(int i=0; i<nChild; i++)
626  dft->writeTrace("%d ", child[i]);
627  dft->writeTrace("\n");
628  dft->closeTrace();
629 #endif
630 }
631 #endif
632 
633 #ifdef NODEAWARE_PROXY_SPANNINGTREE
634 //This is not an entry method, but takes an argument of message type
636  //set up the whole tree ptnTree
637  int treesize = msg->numNodesWithProxies;
638  ptnTree.resize(treesize);
639  int *pAllPes = msg->allPes;
640  for(int i=0; i<treesize; i++) {
641  proxyTreeNode *oneNode = &ptnTree.item(i);
642  delete [] oneNode->peIDs;
643  oneNode->numPes = msg->numPesOfNode[i];
644  oneNode->nodeID = CkNodeOf(*pAllPes);
645  oneNode->peIDs = new int[oneNode->numPes];
646  for(int j=0; j<oneNode->numPes; j++) {
647  oneNode->peIDs[j] = *pAllPes;
648  pAllPes++;
649  }
650  }
651  //setup children
652  setupChildrenFromProxySpanningTree();
653  //send down to children
655 }
656 
658  if(ptnTree.size()==0) return;
660  ProxyNodeAwareSpanningTreeMsg::getANewMsg(patchID, CkMyPe(), ptnTree.begin(), ptnTree.size());
661 
662  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
663  msg->printOut("HP::sendST");
664  #endif
665 
666  CmiAssert(CkMyPe() == msg->allPes[0]);
668 
669 }
670 #else
673 #endif
674 
675 #ifndef NODEAWARE_PROXY_SPANNINGTREE
676 // recv a spanning tree from load balancer
677 void HomePatch::recvSpanningTree(int *t, int n)
678 {
679  int i;
680  nChild=0;
681  tree.resize(n);
682  for (i=0; i<n; i++) {
683  tree[i] = t[i];
684  }
685 
686  for (i=1; i<=proxySpanDim; i++) {
687  if (tree.size() <= i) break;
688  child[i-1] = tree[i];
689  nChild++;
690  }
691 
692 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
693  isProxyChanged = 1;
694 #endif
695 
696  // send down to children
698 }
699 
701 {
702  if (tree.size() == 0) return;
704  msg->patch = patchID;
705  msg->node = CkMyPe();
706  msg->tree.copy(tree); // copy data for thread safety
708 }
709 #else
710 void HomePatch::recvSpanningTree(int *t, int n){}
712 #endif
713 
714 #ifndef NODEAWARE_PROXY_SPANNINGTREE
716 {
717  nChild = 0;
718  int psize = proxy.size();
719  if (psize == 0) return;
720  NodeIDList oldtree; oldtree.swap(tree);
721  int oldsize = oldtree.size();
722  tree.resize(psize + 1);
723  tree.setall(-1);
724  tree[0] = CkMyPe();
725  int s=1, e=psize+1;
727  int patchNodesLast =
728  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
729  int nNonPatch = 0;
730 #if 1
731  // try to put it to the same old tree
732  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
733  {
734  int oldindex = oldtree.find(*pli);
735  if (oldindex != -1 && oldindex < psize) {
736  tree[oldindex] = *pli;
737  }
738  }
739  s=1; e=psize;
740  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
741  {
742  if (tree.find(*pli) != -1) continue; // already assigned
743  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
744  while (tree[e] != -1) { e--; if (e==-1) e = psize; }
745  tree[e] = *pli;
746  } else {
747  while (tree[s] != -1) { s++; if (s==psize+1) s = 1; }
748  tree[s] = *pli;
749  nNonPatch++;
750  }
751  }
752 #if 1
753  if (oldsize==0 && nNonPatch) {
754  // first time, sort by distance
755  qsort(&tree[1], nNonPatch, sizeof(int), compDistance);
756  }
757 #endif
758 
759  //CkPrintf("home: %d:(%d) %d %d %d %d %d\n", patchID, tree.size(),tree[0],tree[1],tree[2],tree[3],tree[4]);
760 
761 #if USE_TOPOMAP && USE_SPANNING_TREE
762 
763  //Right now only works for spanning trees with two levels
764  int *treecopy = new int [psize];
765  int subroots[proxySpanDim];
766  int subsizes[proxySpanDim];
767  int subtrees[proxySpanDim][proxySpanDim];
768  int idxes[proxySpanDim];
769  int i = 0;
770 
771  for(i=0;i<proxySpanDim;i++){
772  subsizes[i] = 0;
773  idxes[i] = i;
774  }
775 
776  for(i=0;i<psize;i++){
777  treecopy[i] = tree[i+1];
778  }
779 
780  TopoManager tmgr;
781  tmgr.sortRanksByHops(treecopy,nNonPatch);
782  tmgr.sortRanksByHops(treecopy+nNonPatch,
783  psize-nNonPatch);
784 
785  /* build tree and subtrees */
786  nChild = findSubroots(proxySpanDim,subroots,psize,treecopy);
787  delete [] treecopy;
788 
789  for(int i=1;i<psize+1;i++){
790  int isSubroot=0;
791  for(int j=0;j<nChild;j++)
792  if(tree[i]==subroots[j]){
793  isSubroot=1;
794  break;
795  }
796  if(isSubroot) continue;
797 
798  int bAdded = 0;
799  tmgr.sortIndexByHops(tree[i], subroots,
800  idxes, proxySpanDim);
801  for(int j=0;j<proxySpanDim;j++){
802  if(subsizes[idxes[j]]<proxySpanDim){
803  subtrees[idxes[j]][(subsizes[idxes[j]])++] = tree[i];
804  bAdded = 1;
805  break;
806  }
807  }
808  if( psize > proxySpanDim && ! bAdded ) {
809  NAMD_bug("HomePatch BGL Spanning Tree error: Couldn't find subtree for leaf\n");
810  }
811  }
812 
813 #else /* USE_TOPOMAP && USE_SPANNING_TREE */
814 
815  for (int i=1; i<=proxySpanDim; i++) {
816  if (tree.size() <= i) break;
817  child[i-1] = tree[i];
818  nChild++;
819  }
820 #endif
821 #endif
822 
823 #if 0
824  // for debugging
825  CkPrintf("[%d] Spanning tree for %d with %d children %d nNonPatch %d\n", CkMyPe(), patchID, psize, nNonPatch);
826  for (int i=0; i<psize+1; i++) {
827  CkPrintf("%d ", tree[i]);
828  }
829  CkPrintf("\n");
830 #endif
831  // send to children nodes
833 }
834 #endif
835 
836 
838 
839  numGBISP3Arrived++;
840  DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
841  int n = msg->node;
842  Results *r = forceBox.clientOpen();
843 
844  char *iszeroPtr = msg->isZero;
845  Force *msgFPtr = msg->forceArr;
846 
847  for ( int k = 0; k < Results::maxNumForces; ++k )
848  {
849  Force *rfPtr = r->f[k];
850  for(int i=0; i<msg->flLen[k]; i++, rfPtr++, iszeroPtr++) {
851  if((*iszeroPtr)!=1) {
852  *rfPtr += *msgFPtr;
853  msgFPtr++;
854  }
855  }
856  }
858  delete msg;
859 }
860 
862  numGBISP3Arrived++;
863  DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
864  int n = msg->node;
865  Results *r = forceBox.clientOpen();
866  for ( int k = 0; k < Results::maxNumForces; ++k )
867  {
868  Force *f = r->f[k];
869  register ForceList::iterator f_i, f_e;
870  f_i = msg->forceList[k]->begin();
871  f_e = msg->forceList[k]->end();
872  for ( ; f_i != f_e; ++f_i, ++f ) *f += *f_i;
873  }
875  delete msg;
876 }
877 
879 {
880  numGBISP3Arrived++;
881  DebugM(4, "patchID("<<patchID<<") receiveRes() #nodes("<<msg->nodeSize<<")\n");
882  Results *r = forceBox.clientOpen(msg->nodeSize);
883  register char* isNonZero = msg->isForceNonZero;
884  register Force* f_i = msg->forceArr;
885  for ( int k = 0; k < Results::maxNumForces; ++k )
886  {
887  Force *f = r->f[k];
888  int nf = msg->flLen[k];
889 #ifdef ARCH_POWERPC
890 #pragma disjoint (*f_i, *f)
891 #endif
892  for (int count = 0; count < nf; count++) {
893  if(*isNonZero){
894  f[count].x += f_i->x;
895  f[count].y += f_i->y;
896  f[count].z += f_i->z;
897  f_i++;
898  }
899  isNonZero++;
900  }
901  }
903 
904  delete msg;
905 }
906 
908 {
909  // This is used for LSS in QM/MM simulations.
910  // Changes atom labels so that we effectively exchange solvent
911  // molecules between classical and quantum modes.
912 
914  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
915  const Real * const qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
916  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
917  Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
918 
919  ComputeQMMgr *mgrP = CProxy_ComputeQMMgr::ckLocalBranch(
920  CkpvAccess(BOCclass_group).computeQMMgr) ;
921 
922  FullAtom *a_i = atom.begin();
923 
924  for (int i=0; i<numAtoms; ++i ) {
925 
926  LSSSubsDat *subP = lssSubs(mgrP).find( LSSSubsDat(a_i[i].id) ) ;
927 
928  if ( subP != NULL ) {
929  a_i[i].id = subP->newID ;
930  a_i[i].vdwType = subP->newVdWType ;
931 
932  // If we are swappign a classical atom with a QM one, the charge
933  // will need extra handling.
934  if (qmAtomGroup[subP->newID] > 0 && simParams->PMEOn) {
935  // We make sure that the last atom charge calculated for the
936  // QM atom being transfered here is available for PME
937  // in the next step.
938 
939  // Loops over all QM atoms (in all QM groups) comparing their
940  // global indices (the sequential atom ID from NAMD).
941  for (int qmIter=0; qmIter<numQMAtms; qmIter++) {
942 
943  if (qmAtmIndx[qmIter] == subP->newID) {
944  qmAtmChrg[qmIter] = subP->newCharge;
945  break;
946  }
947 
948  }
949 
950  // For QM atoms, the charge in the full atom structure is zero.
951  // Electrostatic interactions between QM atoms and their
952  // environment is handled in ComputeQM.
953  a_i[i].charge = 0;
954  }
955  else {
956  // If we are swappign a QM atom with a Classical one, only the charge
957  // in the full atomstructure needs updating, since it used to be zero.
958  a_i[i].charge = subP->newCharge ;
959  }
960  }
961  }
962 
963  return;
964 }
965 
966 
968 //
969 // begin SOA
970 //
971 void HomePatch::positionsReady_SOA(int doMigration)
972 {
974  // char prbuf[32];
975  // sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY_SOA], this->getPatchID());
976  // NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY_SOA, prbuf);
977  if(!simParams->CUDASOAintegrate) flags.sequence++;
978  // flags.sequence++;
979  if (!patchMapRead) { readPatchMap(); }
980  if (numNeighbors && ! simParams->staticAtomAssignment) {
981  if (doMigration) {
982  // copy SOA updates to AOS
983  // XXX TODO:
984  copy_updates_to_AOS();
985  // make sure to invalidate RATTLE lists when atoms move
986  rattleListValid_SOA = false;
987  rattleListValid = false;
988  // this has a suspend
989  doAtomMigration();
990  } else {
991  // XXX TODO: Get rid of this marginCheck for every tstep
992  // move this to the GPU afterwards
993  if(!simParams->CUDASOAintegrate) doMarginCheck_SOA();
994  }
995  }
996 
997 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
998  char prbuf[32];
999  sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY], this->getPatchID());
1000  NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY, prbuf);
1001 #endif
1002 
1003 #if 0
1004  if (doMigration && simParams->qmLSSOn)
1005  qmSwapAtoms();
1006 #endif
1007 
1008 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1009  if ( doMigration ) {
1010  // XXX Since we just migrated FullAtom array is up-to-date
1011  int n = numAtoms;
1012  int * __restrict ao_SOA = patchDataSOA.sortOrder;
1013  int * __restrict unsort_SOA = patchDataSOA.unsortOrder;
1014  #if defined(NAMD_CUDA) || defined(NAMD_HIP) || (defined(NAMD_MIC) && MIC_SORT_ATOMS != 0)
1015  //#if 0
1016  int nfree_SOA;
1017  if ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces ) {
1018  int k = 0;
1019  int k2 = n;
1020  int * __restrict atomFixed = patchDataSOA.atomFixed;
1021  for ( int j=0; j<n; ++j ) {
1022  // put fixed atoms at end
1023  if ( atomFixed[j] ) ao_SOA[--k2] = j;
1024  else ao_SOA[k++] = j;
1025  }
1026  nfree_SOA = k;
1027  } else {
1028  nfree_SOA = n;
1029  for ( int j=0; j<n; ++j ) {
1030  ao_SOA[j] = j;
1031  }
1032  }
1033 #if 1
1034  sortAtomsForCUDA_SOA(ao_SOA, unsort_SOA,
1035  patchDataSOA.pos_x, patchDataSOA.pos_y, patchDataSOA.pos_z,
1036  nfree_SOA, n);
1037 #endif
1038 #if 0
1039  for (int i = 0; i < n; ++i) {
1040  ao_SOA[i] = i;
1041  printf("ao_SOA[%d] = %d\n", i, ao_SOA[i]);
1042  }
1043 #endif
1044 
1045 #else
1046  for (int i = 0; i < n; ++i) {
1047  ao_SOA[i] = i;
1048  }
1049 #endif
1050  }
1051  {
1052  const double charge_scaling = sqrt(COULOMB * ComputeNonbondedUtil::scaling * ComputeNonbondedUtil::dielectric_1);
1053  const Vector ucenter = lattice.unscale(center);
1054  const BigReal ucenter_x = ucenter.x;
1055  const BigReal ucenter_y = ucenter.y;
1056  const BigReal ucenter_z = ucenter.z;
1057  const int n = numAtoms;
1058  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1059  int n_16 = n;
1060  n_16 = (n + 15) & (~15);
1061  cudaAtomList.resize(n_16);
1062  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1063  mic_position_t *atom_x = ((mic_position_t*)ac) + (0 * n_16);
1064  mic_position_t *atom_y = ((mic_position_t*)ac) + (1 * n_16);
1065  mic_position_t *atom_z = ((mic_position_t*)ac) + (2 * n_16);
1066  mic_position_t *atom_q = ((mic_position_t*)ac) + (3 * n_16);
1067  #elif defined(NAMD_AVXTILES)
1068  int n_avx = (n + 15) & (~15);
1069  cudaAtomList.resize(n_avx);
1070  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1071  tiles.realloc(n, ac);
1072  #else
1073  if(doMigration) cudaAtomList.resize(n);
1074  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1075  #endif
1076  double * __restrict pos_x = patchDataSOA.pos_x;
1077  double * __restrict pos_y = patchDataSOA.pos_y;
1078  double * __restrict pos_z = patchDataSOA.pos_z;
1079  float * __restrict charge = patchDataSOA.charge;
1080  int * __restrict ao_SOA = patchDataSOA.sortOrder;
1081 #ifndef NODEGROUP_FORCE_REGISTER
1082 //#if 1
1083  for ( int k=0; k<n; ++k ) {
1084  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1085  int j = ao_SOA[k];
1086  atom_x[k] = pos_x[j] - ucenter_x;
1087  atom_y[k] = pos_y[j] - ucenter_y;
1088  atom_z[k] = pos_z[j] - ucenter_z;
1089  atom_q[k] = charge_scaling * charge[j];
1090  #else
1091  // XXX TODO: This has to go
1092  int j = ao_SOA[k];
1093  // JM: Calculating single-precision patch-centered atomic coordinates as
1094  // offsets. By adding single-precision offsets to double-precision
1095  // patch-patch center distances, we maintain full precision
1096  // XXX NOTE: check where I can use this to use float instead of double in NAMD
1097  ac[k].x = pos_x[j] - ucenter_x;
1098  ac[k].y = pos_y[j] - ucenter_y;
1099  ac[k].z = pos_z[j] - ucenter_z;
1100  // XXX TODO: Compute charge scaling on GPUs and not here to avoid a copy
1101  // for every timestep
1102  // XXX TODO: Check when do we have to update this value and do it
1103  // on the gpu
1104  ac[k].q = charge_scaling * charge[j];
1105  #endif
1106  }
1107 #else
1108  if(!simParams->CUDASOAintegrate || doMigration){
1109  for ( int k=0; k<n; ++k ) {
1110  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1111  int j = ao_SOA[k];
1112  atom_x[k] = pos_x[j] - ucenter_x;
1113  atom_y[k] = pos_y[j] - ucenter_y;
1114  atom_z[k] = pos_z[j] - ucenter_z;
1115  atom_q[k] = charge_scaling * charge[j];
1116  #else
1117  // XXX TODO: This has to go
1118  int j = ao_SOA[k];
1119  // JM: Calculating single-precision patch-centered atomic coordinates as
1120  // offsets. By adding single-precision offsets to double-precision
1121  // patch-patch center distances, we maintain full precision
1122  ac[k].x = pos_x[j] - ucenter_x;
1123  ac[k].y = pos_y[j] - ucenter_y;
1124  ac[k].z = pos_z[j] - ucenter_z;
1125  // XXX TODO: Compute charge scaling on GPUs and not here to avoid a copy
1126  // for every timestep
1127  // XXX TODO: Check when do we have to update this value and do it
1128  // on the gpu
1129  ac[k].q = charge_scaling * charge[j];
1130  #endif
1131  }
1132  }
1133 #endif
1134  }
1135 #else
1136  doMigration = doMigration && numNeighbors;
1137 #endif
1138  doMigration = doMigration || ! patchMapRead;
1139 
1140  doMigration = doMigration || doAtomUpdate;
1141  doAtomUpdate = false;
1142 
1143  // Workaround for oversize groups:
1144  // reset nonbondedGroupSize (ngs) before force calculation,
1145  // making sure that subset of hydrogen group starting with
1146  // parent atom are all within 0.5 * hgroupCutoff.
1147  // XXX hydrogentGroupSize remains constant but is checked for nonzero
1148  // XXX should be skipped for CUDA, ngs not used by CUDA kernels
1149  // XXX should this also be skipped for KNL kernels?
1150  // ngs used by ComputeNonbondedBase.h - CPU nonbonded kernels
1151  // ngs used by ComputeGBIS.C - CPU GB nonbonded kernels
1152 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1154 #endif
1155 
1156  // Copy information needed by computes and proxys to Patch::p.
1157  if (doMigration) {
1158  // Atom data changes after migration, so must copy all of it.
1159  // Must resize CompAtom and CompAtomExt arrays to new atom count.
1160  p.resize(numAtoms);
1161  pExt.resize(numAtoms);
1162  CompAtom * __restrict p_i = p.begin();
1163  CompAtomExt * __restrict pExt_i = pExt.begin();
1164  const double * __restrict pos_x = patchDataSOA.pos_x;
1165  const double * __restrict pos_y = patchDataSOA.pos_y;
1166  const double * __restrict pos_z = patchDataSOA.pos_z;
1167  const float * __restrict charge = patchDataSOA.charge;
1168  const int * __restrict vdwType = patchDataSOA.vdwType;
1169  const int * __restrict partition = patchDataSOA.partition;
1170 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1171  const int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
1172 #endif
1173  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
1174  const int * __restrict isWater = patchDataSOA.isWater;
1175  int n = numAtoms;
1176  for (int i=0; i < n; i++) {
1177  p_i[i].position.x = pos_x[i];
1178  p_i[i].position.y = pos_y[i];
1179  p_i[i].position.z = pos_z[i];
1180  p_i[i].charge = charge[i];
1181  p_i[i].vdwType = vdwType[i];
1182  p_i[i].partition = partition[i];
1183 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1184  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1185 #endif
1186  p_i[i].hydrogenGroupSize = hydrogenGroupSize[i];
1187  p_i[i].isWater = isWater[i];
1188  }
1189 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1190  const int * __restrict sortOrder = patchDataSOA.sortOrder;
1191 #endif
1192  const int * __restrict id = patchDataSOA.id;
1193 #if defined(MEM_OPT_VERSION)
1194  const int * __restrict exclId = patchDataSOA.exclId;
1195  const int * __restrict sigId = patchDataSOA.sigId;
1196 #endif
1197  const int * __restrict atomFixed = patchDataSOA.atomFixed;
1198  const int * __restrict groupFixed = patchDataSOA.groupFixed;
1199  // Copy into CompAtomExt using typecast to temporary CompAtomExtCopy
1200  // to avoid loop vectorization bug in Intel 2018 compiler.
1201 #ifndef USE_NO_BITFIELDS
1202  CompAtomExtCopy *pExtCopy_i = (CompAtomExtCopy *) pExt_i;
1203 #endif // USE_NO_BITFIELDS
1204  for (int i=0; i < n; i++) {
1205 #ifndef USE_NO_BITFIELDS
1206 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1207  pExtCopy_i[i].sortOrder = sortOrder[i];
1208 #endif
1209 #if defined(MEM_OPT_VERSION)
1210  pExtCopy_i[i].id = id[i];
1211  pExtCopy_i[i].exclId = exclId[i];
1212  ASSERT(atomFixed[i] == 0 || atomFixed[i] == 1);
1213  ASSERT(groupFixed[i] == 0 || groupFixed[i] == 1);
1214  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed[i];
1215  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed[i];
1216  pExtCopy_i[i].sigId = (sigId[i] | atomFixedBit | groupFixedBit);
1217 #else
1218  ASSERT(atomFixed[i] == 0 || atomFixed[i] == 1);
1219  ASSERT(groupFixed[i] == 0 || groupFixed[i] == 1);
1220  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed[i];
1221  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed[i];
1222  pExtCopy_i[i].id = (id[i] | atomFixedBit | groupFixedBit);
1223 #endif // if defined(MEM_OPT_VERSION)
1224 #else
1225 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1226  pExt_i[i].sortOrder = sortOrder[i];
1227 #endif
1228  pExt_i[i].id = id[i];
1229 #if defined(MEM_OPT_VERSION)
1230  pExt_i[i].exclId = exclId[i];
1231  pExt_i[i].sigId = sigId[i];
1232 #endif
1233  pExt_i[i].atomFixed = atomFixed[i];
1234  pExt_i[i].groupFixed = groupFixed[i];
1235 #endif // USE_NO_BITFIELDS
1236  }
1237  }
1238  else {
1239  // JM: This is done for every timestep
1240  // Only need to copy positions, nonbondedGroupSize, and sortOrder.
1241  // Other data remains unchanged.
1242  CompAtom * __restrict p_i = p.begin();
1243  CompAtomExt * __restrict pExt_i = pExt.begin();
1244  const double * __restrict pos_x = patchDataSOA.pos_x;
1245  const double * __restrict pos_y = patchDataSOA.pos_y;
1246  const double * __restrict pos_z = patchDataSOA.pos_z;
1247 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1248  const int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
1249 #endif
1250  int n = numAtoms;
1251 #ifndef NODEGROUP_FORCE_REGISTER
1252 //#if 1
1253  for (int i=0; i < n; i++) {
1254  p_i[i].position.x = pos_x[i];
1255  p_i[i].position.y = pos_y[i];
1256  p_i[i].position.z = pos_z[i];
1257 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1258  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1259 #endif
1260  }
1261 #else
1262  if(!simParams->CUDASOAintegrate || doMigration){
1263  for (int i=0; i < n; i++) {
1264  p_i[i].position.x = pos_x[i];
1265  p_i[i].position.y = pos_y[i];
1266  p_i[i].position.z = pos_z[i];
1267 #if !defined(NAMD_CUDA) && !defined(NAMD_HIP)
1268  p_i[i].nonbondedGroupSize = nonbondedGroupSize[i];
1269 #endif
1270  }
1271  }
1272 #endif
1273 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1274  const int * __restrict sortOrder = patchDataSOA.sortOrder;
1275 #ifdef NODEGROUP_FORCE_REGISTER
1276 //#if 0
1277  if(!simParams->CUDASOAintegrate || doMigration){
1278  for (int i=0; i < n; i++) {
1279  pExt_i[i].sortOrder = sortOrder[i];
1280  }
1281  }
1282 #else
1283  //if(!simParams->CUDASOAintegrate || doMigration){
1284  for (int i=0; i < n; i++) {
1285  pExt_i[i].sortOrder = sortOrder[i];
1286  }
1287 #endif
1288 #endif
1289  } // end Copy information to Patch::p
1290 
1291  // Measure atom movement to test pairlist validity
1292  // XXX TODO: Check if this ever needs to be done if CUDASOAintegrate
1293 #ifdef NODEGROUP_FORCE_REGISTER
1294  if(!simParams->CUDASOAintegrate || flags.sequence == 0){
1295  doPairlistCheck();
1296  }
1297 #else
1298  doPairlistCheck();
1299 #endif
1300 
1301 #if 0
1302  if (flags.doMolly) mollyAverage();
1303  // BEGIN LA
1305  // END LA
1306 
1307  if (flags.doGBIS) {
1308  //reset for next time step
1309  numGBISP1Arrived = 0;
1310  phase1BoxClosedCalled = false;
1311  numGBISP2Arrived = 0;
1312  phase2BoxClosedCalled = false;
1313  numGBISP3Arrived = 0;
1314  phase3BoxClosedCalled = false;
1315  if (doMigration || isNewProxyAdded)
1317  }
1318 
1319  if (flags.doLCPO) {
1320  if (doMigration || isNewProxyAdded) {
1321  setLcpoType();
1322  }
1323  }
1324 #endif
1325 
1326  // Must Add Proxy Changes when migration completed!
1328  int *pids = NULL;
1329  int pidsPreAllocated = 1;
1330  int npid;
1331  if (proxySendSpanning == 0) {
1332  npid = proxy.size();
1333  pids = new int[npid];
1334  pidsPreAllocated = 0;
1335  int *pidi = pids;
1336  int *pide = pids + proxy.size();
1337  int patchNodesLast =
1338  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
1339  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
1340  {
1341  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
1342  *(--pide) = *pli;
1343  } else {
1344  *(pidi++) = *pli;
1345  }
1346  }
1347  }
1348  else {
1349 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1350  #ifdef USE_NODEPATCHMGR
1351  npid = numNodeChild;
1352  pids = nodeChildren;
1353  #else
1354  npid = nChild;
1355  pids = child;
1356  #endif
1357 #else
1358  npid = nChild;
1359  pidsPreAllocated = 0;
1360  pids = new int[proxySpanDim];
1361  for (int i=0; i<nChild; i++) pids[i] = child[i];
1362 #endif
1363  }
1364  if (npid) { //have proxies
1365  int seq = flags.sequence;
1366  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
1367  //begin to prepare proxy msg and send it
1368  int pdMsgPLLen = p.size();
1369  int pdMsgAvgPLLen = 0;
1370 #if 0
1371  if(flags.doMolly) {
1372  pdMsgAvgPLLen = p_avg.size();
1373  }
1374 #endif
1375  // BEGIN LA
1376  int pdMsgVLLen = 0;
1377 #if 0
1378  if (flags.doLoweAndersen) {
1379  pdMsgVLLen = v.size();
1380  }
1381 #endif
1382  // END LA
1383 
1384  int intRadLen = 0;
1385 #if 0
1386  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
1387  intRadLen = numAtoms * 2;
1388  }
1389 #endif
1390 
1391  //LCPO
1392  int lcpoTypeLen = 0;
1393 #if 0
1394  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
1395  lcpoTypeLen = numAtoms;
1396  }
1397 #endif
1398 
1399  int pdMsgPLExtLen = 0;
1400  if(doMigration || isNewProxyAdded) {
1401  pdMsgPLExtLen = pExt.size();
1402  }
1403 
1404  int cudaAtomLen = 0;
1405 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1406  cudaAtomLen = numAtoms;
1407 #endif
1408 
1409  #ifdef NAMD_MIC
1410  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
1411  cudaAtomLen = numAtoms;
1412  #else
1413  cudaAtomLen = (numAtoms + 15) & (~15);
1414  #endif
1415  #endif
1416  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
1417  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
1418 
1419  SET_PRIORITY(nmsg,seq,priority);
1420  nmsg->patch = patchID;
1421  nmsg->flags = flags;
1422  nmsg->plLen = pdMsgPLLen;
1423  //copying data to the newly created msg
1424  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1425  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
1426  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1427  nmsg->avgPlLen = pdMsgAvgPLLen;
1428 #if 0
1429  if(flags.doMolly) {
1430  memcpy(nmsg->avgPositionList, p_avg.begin(), sizeof(CompAtom)*pdMsgAvgPLLen);
1431  }
1432 #endif
1433  // BEGIN LA
1434  nmsg->vlLen = pdMsgVLLen;
1435 #if 0
1436  if (flags.doLoweAndersen) {
1437  memcpy(nmsg->velocityList, v.begin(), sizeof(CompAtom)*pdMsgVLLen);
1438  }
1439 #endif
1440  // END LA
1441 
1442 #if 0
1443  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
1444  for (int i = 0; i < numAtoms * 2; i++) {
1445  nmsg->intRadList[i] = intRad[i];
1446  }
1447  }
1448 #endif
1449 
1450 #if 0
1451  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
1452  for (int i = 0; i < numAtoms; i++) {
1453  nmsg->lcpoTypeList[i] = lcpoType[i];
1454  }
1455  }
1456 #endif
1457  nmsg->plExtLen = pdMsgPLExtLen;
1458  if(doMigration || isNewProxyAdded){
1459  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
1460  }
1461 
1462 // DMK
1463 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1464  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1465  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
1466  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1467 #endif
1468 
1469 #if NAMD_SeparateWaters != 0
1470  //DMK - Atom Separation (water vs. non-water)
1471  nmsg->numWaterAtoms = numWaterAtoms;
1472 #endif
1473 
1474 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1475  nmsg->isFromImmMsgCall = 0;
1476 #endif
1477 
1478  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1479  DebugFileTrace *dft = DebugFileTrace::Object();
1480  dft->openTrace();
1481  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
1482  for(int i=0; i<npid; i++) {
1483  dft->writeTrace("%d ", pids[i]);
1484  }
1485  dft->writeTrace("\n");
1486  dft->closeTrace();
1487  #endif
1488 
1489 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1490  if (isProxyChanged || localphs == NULL)
1491  {
1492 //CmiPrintf("[%d] Build persistent: isProxyChanged: %d %p\n", CkMyPe(), isProxyChanged, localphs);
1493  //CmiAssert(isProxyChanged);
1494  if (nphs) {
1495  for (int i=0; i<nphs; i++) {
1496  CmiDestoryPersistent(localphs[i]);
1497  }
1498  delete [] localphs;
1499  }
1500  localphs = new PersistentHandle[npid];
1501  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
1502  for (int i=0; i<npid; i++) {
1503 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1504  if (proxySendSpanning)
1505  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1506  else
1507 #endif
1508  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1509  }
1510  nphs = npid;
1511  }
1512  CmiAssert(nphs == npid && localphs != NULL);
1513  CmiUsePersistentHandle(localphs, nphs);
1514 #endif
1515  if(doMigration || isNewProxyAdded) {
1516  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
1517  }else{
1518  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
1519  }
1520 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1521  CmiUsePersistentHandle(NULL, 0);
1522 #endif
1523  isNewProxyAdded = 0;
1524  }
1525  isProxyChanged = 0;
1526  if(!pidsPreAllocated) delete [] pids;
1527  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
1528 
1529 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
1530  positionPtrBegin = p.begin();
1531  positionPtrEnd = p.end();
1532 #endif
1533 
1534 #if 0
1535  if(flags.doMolly) {
1538  }
1539  // BEGIN LA
1540  if (flags.doLoweAndersen) {
1541  velocityPtrBegin = v.begin();
1542  velocityPtrEnd = v.end();
1543  }
1544  // END LA
1545 #endif
1546  // fprintf(stderr, "(Pe[%d] tstep %d)calling positionsReady on patch %d\n",
1547  // CkMyPe(), this->flags.step, this->patchID);
1548  Patch::positionsReady(doMigration);
1549 
1550  patchMapRead = 1;
1551 
1552  // gzheng
1553  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY_SOA);
1554 
1555  Sync::Object()->PatchReady();
1556 
1557 #if 0
1558  fprintf(stderr, "Patch %d atom IDS\n", this->patchID);
1559  AtomMap* mapper = AtomMap::Object();
1560  for(int i = 0 ; i < numAtoms; i++){
1561  fprintf(stderr, "atom[%d] = %d %d %d\n", i, atom[i].id,
1562  mapper->localID(atom[i].id).pid, mapper->localID(atom[i].id).index);
1563  }
1564 #endif
1565 
1566 }
1567 //
1568 // end SOA
1569 //
1571 
1573 //
1574 // GPU migration code path
1575 //
1576 
1577 #ifdef NODEGROUP_FORCE_REGISTER
1578 
1579 void HomePatch::updateAtomCount(const int n, const int reallocate) {
1580  numAtoms = n;
1581  // See MigrationCUDAKernel.h
1582  if (n > MigrationCUDAKernel::kMaxAtomsPerPatch) {
1583  NAMD_die("Device migration does not currently support patches with greater than 2048 atoms.\n"
1584  "Please run with a smaller margin or without device migration.\n");
1585  }
1586 }
1587 
1589  atom.resize(numAtoms);
1590  cudaAtomList.resize(numAtoms);
1591  p.resize(numAtoms);
1592  pExt.resize(numAtoms);
1593 
1594  size_t nbytes = patchDataSOA.numBytes;
1595  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048)) {
1596 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1597  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms));
1598  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
1599 #else
1600  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms) );
1601  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
1602 #endif
1603  }
1604 }
1605 
1606 void HomePatch::clearAtomMap() {
1607  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
1608 }
1609 
1610 void HomePatch::positionsReady_GPU(int doMigration, int startup) {
1612  if (doMigration) {
1613  // make sure to invalidate RATTLE lists when atoms move
1614  rattleListValid_SOA = false;
1615  rattleListValid = false;
1616  //invalidate gridforced list
1617  gridForceIdxChecked=false;
1618 
1619  }
1620  if (startup) {
1621  const int numAtomAlloc = 2048;
1622  atom.reserve(numAtomAlloc);
1623  cudaAtomList.reserve(numAtomAlloc);
1624  p.reserve(numAtomAlloc);
1625  pExt.reserve(numAtomAlloc);
1626 
1627  for ( int j = 0; j < Results::maxNumForces; ++j ) {
1628  f[j].reserve(numAtomAlloc);
1629  }
1630  }
1631 
1632  int n = numAtoms;
1633  atom.resize(n);
1634  cudaAtomList.resize(n);
1635  p.resize(n);
1636  pExt.resize(n);
1637 
1638  size_t nbytes = patchDataSOA.numBytes;
1639  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048)) {
1640 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1641  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048));
1642  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
1643 #else
1644  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms, 2048) );
1645  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
1646 #endif
1647  }
1648 
1649 
1650  doMigration = doMigration || ! patchMapRead;
1651 
1652  doMigration = doMigration || doAtomUpdate;
1653  doAtomUpdate = false;
1654 
1655 
1656  // This is set during startup depending on which features are enabled.
1657  const bool updateAtomMap = simParams->updateAtomMap;
1658 
1659  // If startup or update atom map, then we need to update some data structures.
1660  // For startup this is mainly for tuple generations
1661  if (startup || updateAtomMap) {
1662  // These are used in the atom map, so are needed for startup and updateAtomMap
1663  CompAtomExt * __restrict pExt_i = pExt.begin();
1664 #ifndef USE_NO_BITFIELDS
1665  CompAtomExtCopy *pExtCopy_i = (CompAtomExtCopy *) pExt_i;
1666 #endif
1667  for (int i=0; i < numAtoms; i++) {
1668  const uint32 atomFixed = uint32(atom[i].atomFixed);
1669  const uint32 groupFixed = uint32(atom[i].groupFixed);
1670  const uint32 id = uint32(atom[i].id);
1671 #ifndef USE_NO_BITFIELDS
1672  ASSERT(atomFixed == 0 || atomFixed == 1);
1673  ASSERT(groupFixed == 0 || groupFixed == 1);
1674  uint32 atomFixedBit = NAMD_ATOM_FIXED_MASK * atomFixed;
1675  uint32 groupFixedBit = NAMD_GROUP_FIXED_MASK * groupFixed;
1676  pExtCopy_i[i].id = (id | atomFixedBit | groupFixedBit);
1677 #else
1678  pExt_i[i].id = id;
1679  pExt_i[i].atomFixed = atomFixed;
1680  pExt_i[i].groupFixed = groupFixed;
1681 #endif // USE_NO_BITFIELDS
1682  }
1683  }
1684  if (startup) {
1685  // These are used in the exclusion tuple generation, so are only needed for startup
1686  CompAtom * __restrict p_i = p.begin();
1687  for (int i=0; i < numAtoms; i++) {
1688  p_i[i].vdwType = atom[i].vdwType;
1689  }
1690  if (simParams->alchOn) {
1691  for (int i=0; i < numAtoms; i++) {
1692  p_i[i].partition = atom[i].partition;
1693  }
1694  }
1695  // copy data from AOS into SOA
1696  for (int i=0; i < numAtoms; i++) {
1697  patchDataSOA.id[i] = int(atom[i].id);
1698  patchDataSOA.atomFixed[i] = int(atom[i].atomFixed);
1699  patchDataSOA.groupFixed[i] = int(atom[i].groupFixed);
1700  }
1701  } else if (updateAtomMap) {
1702  // This might be overkill for updating atom map
1703  copy_atoms_to_SOA();
1704  }
1705 
1706  // Measure atom movement to test pairlist validity
1707  // XXX TODO: Check if this ever needs to be done if CUDASOAintegrate
1708 #ifdef NODEGROUP_FORCE_REGISTER
1709  if(!simParams->CUDASOAintegrate || flags.sequence == 0){
1710  doPairlistCheck();
1711  }
1712 #else
1713  doPairlistCheck();
1714 #endif
1715 
1716  // Must Add Proxy Changes when migration completed!
1718  int *pids = NULL;
1719  int pidsPreAllocated = 1;
1720  int npid;
1721  if (proxySendSpanning == 0) {
1722  npid = proxy.size();
1723  pids = new int[npid];
1724  pidsPreAllocated = 0;
1725  int *pidi = pids;
1726  int *pide = pids + proxy.size();
1727  int patchNodesLast =
1728  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
1729  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
1730  {
1731  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
1732  *(--pide) = *pli;
1733  } else {
1734  *(pidi++) = *pli;
1735  }
1736  }
1737  }
1738  else {
1739 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1740  #ifdef USE_NODEPATCHMGR
1741  npid = numNodeChild;
1742  pids = nodeChildren;
1743  #else
1744  npid = nChild;
1745  pids = child;
1746  #endif
1747 #else
1748  npid = nChild;
1749  pidsPreAllocated = 0;
1750  pids = new int[proxySpanDim];
1751  for (int i=0; i<nChild; i++) pids[i] = child[i];
1752 #endif
1753  }
1754  if (npid) { //have proxies
1755  int seq = flags.sequence;
1756  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
1757  //begin to prepare proxy msg and send it
1758  int pdMsgPLLen = p.size();
1759  int pdMsgAvgPLLen = 0;
1760  // BEGIN LA
1761  int pdMsgVLLen = 0;
1762  // END LA
1763 
1764  int intRadLen = 0;
1765 
1766  //LCPO
1767  int lcpoTypeLen = 0;
1768 
1769  int pdMsgPLExtLen = 0;
1770  if(doMigration || isNewProxyAdded) {
1771  pdMsgPLExtLen = pExt.size();
1772  }
1773 
1774  int cudaAtomLen = 0;
1775 #ifdef NAMD_CUDA
1776  cudaAtomLen = numAtoms;
1777 #endif
1778 
1779  #ifdef NAMD_MIC
1780  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
1781  cudaAtomLen = numAtoms;
1782  #else
1783  cudaAtomLen = (numAtoms + 15) & (~15);
1784  #endif
1785  #endif
1786  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
1787  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
1788 
1789  SET_PRIORITY(nmsg,seq,priority);
1790  nmsg->patch = patchID;
1791  nmsg->flags = flags;
1792  nmsg->plLen = pdMsgPLLen;
1793  //copying data to the newly created msg
1794  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1795  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
1796  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1797  nmsg->avgPlLen = pdMsgAvgPLLen;
1798  // BEGIN LA
1799  nmsg->vlLen = pdMsgVLLen;
1800  // END LA
1801 
1802  nmsg->plExtLen = pdMsgPLExtLen;
1803  if(doMigration || isNewProxyAdded){
1804  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
1805  }
1806 
1807 // DMK
1808 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
1809  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
1810  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
1811  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
1812 #endif
1813 
1814 #if NAMD_SeparateWaters != 0
1815  //DMK - Atom Separation (water vs. non-water)
1816  nmsg->numWaterAtoms = numWaterAtoms;
1817 #endif
1818 
1819 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
1820  nmsg->isFromImmMsgCall = 0;
1821 #endif
1822 
1823  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
1824  DebugFileTrace *dft = DebugFileTrace::Object();
1825  dft->openTrace();
1826  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
1827  for(int i=0; i<npid; i++) {
1828  dft->writeTrace("%d ", pids[i]);
1829  }
1830  dft->writeTrace("\n");
1831  dft->closeTrace();
1832  #endif
1833 
1834 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1835  if (isProxyChanged || localphs == NULL)
1836  {
1837  if (nphs) {
1838  for (int i=0; i<nphs; i++) {
1839  CmiDestoryPersistent(localphs[i]);
1840  }
1841  delete [] localphs;
1842  }
1843  localphs = new PersistentHandle[npid];
1844  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
1845  for (int i=0; i<npid; i++) {
1846 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
1847  if (proxySendSpanning)
1848  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1849  else
1850 #endif
1851  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
1852  }
1853  nphs = npid;
1854  }
1855  CmiAssert(nphs == npid && localphs != NULL);
1856  CmiUsePersistentHandle(localphs, nphs);
1857 #endif
1858  if(doMigration || isNewProxyAdded) {
1859  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
1860  }else{
1861  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
1862  }
1863 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
1864  CmiUsePersistentHandle(NULL, 0);
1865 #endif
1866  isNewProxyAdded = 0;
1867  }
1868  isProxyChanged = 0;
1869  if(!pidsPreAllocated) delete [] pids;
1870  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
1871 
1872 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
1873  positionPtrBegin = p.begin();
1874  positionPtrEnd = p.end();
1875 #endif
1876 
1877  Patch::positionsReady(doMigration, startup);
1878 
1879  patchMapRead = 1;
1880 
1881  // gzheng
1882  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY_SOA);
1883 
1884  Sync::Object()->PatchReady();
1885 }
1886 
1887 #endif // NODEGROUP_FORCE_REGISTER
1888 
1889 //
1890 // end of GPU migration code path
1891 //
1893 
1894 
1895 void HomePatch::positionsReady(int doMigration)
1896 {
1898 
1899  flags.sequence++;
1900  if (doMigration)
1901  gridForceIdxChecked=false;
1902  if (!patchMapRead) { readPatchMap(); }
1903 
1904  if (numNeighbors && ! simParams->staticAtomAssignment) {
1905  if (doMigration) {
1906  rattleListValid = false;
1907  doAtomMigration();
1908  } else {
1909  doMarginCheck();
1910  }
1911  }
1912 
1913 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1914  char prbuf[32];
1915  sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::POSITIONS_READY], this->getPatchID());
1916  NAMD_EVENT_START_EX(1, NamdProfileEvent::POSITIONS_READY, prbuf);
1917 #endif
1918 
1919  if (doMigration && simParams->qmLSSOn)
1920  qmSwapAtoms();
1921 
1922 #if defined(NAMD_CUDA) || defined(NAMD_MIC) || defined(NAMD_AVXTILES) || defined(NAMD_HIP)
1923  #ifdef NAMD_AVXTILES
1924  if ( simParams->useAVXTiles ) {
1925  #endif
1926  if ( doMigration ) {
1927  int n = numAtoms;
1928  FullAtom *a_i = atom.begin();
1929  #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_AVXTILES) || \
1930  (defined(NAMD_MIC) && MIC_SORT_ATOMS != 0)
1931  int *ao = new int[n];
1932  int nfree;
1933  if ( simParams->fixedAtomsOn && ! simParams->fixedAtomsForces ) {
1934  int k = 0;
1935  int k2 = n;
1936  for ( int j=0; j<n; ++j ) {
1937  // put fixed atoms at end
1938  if ( a_i[j].atomFixed ) ao[--k2] = j;
1939  else ao[k++] = j;
1940  }
1941  nfree = k;
1942  } else {
1943  nfree = n;
1944  for ( int j=0; j<n; ++j ) {
1945  ao[j] = j;
1946  }
1947  }
1948  sortAtomsForCUDA(ao,a_i,nfree,n);
1949  for ( int i=0; i<n; ++i ) {
1950  a_i[i].sortOrder = ao[i];
1951  }
1952  delete [] ao;
1953  #else
1954  for (int i = 0; i < n; ++i) {
1955  a_i[i].sortOrder = i;
1956  }
1957  #endif
1958  }
1959 
1960  {
1961  const double charge_scaling = sqrt(COULOMB * ComputeNonbondedUtil::scaling * ComputeNonbondedUtil::dielectric_1);
1962  const Vector ucenter = lattice.unscale(center);
1963  const BigReal ucenter_x = ucenter.x;
1964  const BigReal ucenter_y = ucenter.y;
1965  const BigReal ucenter_z = ucenter.z;
1966  const int n = numAtoms;
1967  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1968  int n_16 = n;
1969  n_16 = (n + 15) & (~15);
1970  cudaAtomList.resize(n_16);
1971  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1972  mic_position_t *atom_x = ((mic_position_t*)ac) + (0 * n_16);
1973  mic_position_t *atom_y = ((mic_position_t*)ac) + (1 * n_16);
1974  mic_position_t *atom_z = ((mic_position_t*)ac) + (2 * n_16);
1975  mic_position_t *atom_q = ((mic_position_t*)ac) + (3 * n_16);
1976  #elif defined(NAMD_AVXTILES)
1977  int n_avx = (n + 15) & (~15);
1978  cudaAtomList.resize(n_avx);
1979  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1980  tiles.realloc(n, ac);
1981  #else
1982  #if 1
1983  cudaAtomList.resize(n);
1984  CudaAtom *ac = cudaAtomPtr = cudaAtomList.begin();
1985  #else
1986  reallocate_host<CudaAtom>(&cudaAtomList, &sizeCudaAtomList, n);
1987  CudaAtom *ac = cudaAtomPtr = &cudaAtomList[0];
1988  #endif
1989  #endif
1990  const FullAtom *a = atom.begin();
1991  for ( int k=0; k<n; ++k ) {
1992  #if defined(NAMD_MIC) && (MIC_HANDCODE_FORCE_SOA_VS_AOS == 0)
1993  int j = a[k].sortOrder;
1994  atom_x[k] = a[j].position.x - ucenter_x;
1995  atom_y[k] = a[j].position.y - ucenter_y;
1996  atom_z[k] = a[j].position.z - ucenter_z;
1997  atom_q[k] = charge_scaling * a[j].charge;
1998  #else
1999  int j = a[k].sortOrder;
2000  ac[k].x = a[j].position.x - ucenter_x;
2001  ac[k].y = a[j].position.y - ucenter_y;
2002  ac[k].z = a[j].position.z - ucenter_z;
2003  ac[k].q = charge_scaling * a[j].charge;
2004  #endif
2005  }
2006  #ifdef NAMD_AVXTILES
2007  {
2008  if (n > 0) {
2009  int j = a[n-1].sortOrder;
2010  for ( int k=n; k<n_avx; ++k ) {
2011  ac[k].x = a[j].position.x - ucenter_x;
2012  ac[k].y = a[j].position.y - ucenter_y;
2013  ac[k].z = a[j].position.z - ucenter_z;
2014  }
2015  }
2016  }
2017  #endif
2018  }
2019  #ifdef NAMD_AVXTILES
2020  // If "Tiles" mode disabled, no use of the CUDA data structures
2021  } else doMigration = doMigration && numNeighbors;
2022  #endif
2023 #else
2024  doMigration = doMigration && numNeighbors;
2025 #endif
2026  doMigration = doMigration || ! patchMapRead;
2027 
2028  doMigration = doMigration || doAtomUpdate;
2029  doAtomUpdate = false;
2030 
2031  // Workaround for oversize groups:
2032  // reset nonbondedGroupSize (ngs) before force calculation,
2033  // making sure that subset of hydrogen group starting with
2034  // parent atom are all within 0.5 * hgroupCutoff.
2035  // XXX hydrogentGroupSize remains constant but is checked for nonzero
2036  // XXX should be skipped for CUDA, ngs not used by CUDA kernels
2037  // XXX should this also be skipped for KNL kernels?
2038  // ngs used by ComputeNonbondedBase.h - CPU nonbonded kernels
2039  // ngs used by ComputeGBIS.C - CPU GB nonbonded kernels
2040 #if ! (defined(NAMD_CUDA) || defined(NAMD_HIP))
2041 #if defined(NAMD_AVXTILES)
2042  if (!simParams->useAVXTiles)
2043 #endif
2044  doGroupSizeCheck();
2045 #endif
2046 
2047  // Copy information needed by computes and proxys to Patch::p.
2048  // Resize only if atoms were migrated
2049  if (doMigration) {
2050  p.resize(numAtoms);
2051  pExt.resize(numAtoms);
2052  }
2053  CompAtom *p_i = p.begin();
2054  CompAtomExt *pExt_i = pExt.begin();
2055  FullAtom *a_i = atom.begin();
2056  int i; int n = numAtoms;
2057  for ( i=0; i<n; ++i ) {
2058  p_i[i] = a_i[i];
2059  pExt_i[i] = a_i[i];
2060  }
2061 
2062  // Measure atom movement to test pairlist validity
2063  doPairlistCheck();
2064 
2065  if (flags.doMolly) mollyAverage();
2066  // BEGIN LA
2068  // END LA
2069 
2070  if (flags.doGBIS) {
2071  //reset for next time step
2072  numGBISP1Arrived = 0;
2073  phase1BoxClosedCalled = false;
2074  numGBISP2Arrived = 0;
2075  phase2BoxClosedCalled = false;
2076  numGBISP3Arrived = 0;
2077  phase3BoxClosedCalled = false;
2078  if (doMigration || isNewProxyAdded)
2080  }
2081 
2082  if (flags.doLCPO) {
2083  if (doMigration || isNewProxyAdded) {
2084  setLcpoType();
2085  }
2086  }
2087 
2088  // Must Add Proxy Changes when migration completed!
2090  int *pids = NULL;
2091  int pidsPreAllocated = 1;
2092  int npid;
2093  if (proxySendSpanning == 0) {
2094  npid = proxy.size();
2095  pids = new int[npid];
2096  pidsPreAllocated = 0;
2097  int *pidi = pids;
2098  int *pide = pids + proxy.size();
2099  int patchNodesLast =
2100  ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
2101  for ( pli = proxy.begin(); pli != proxy.end(); ++pli )
2102  {
2103  if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(*pli) ) {
2104  *(--pide) = *pli;
2105  } else {
2106  *(pidi++) = *pli;
2107  }
2108  }
2109  }
2110  else {
2111 #ifdef NODEAWARE_PROXY_SPANNINGTREE
2112  #ifdef USE_NODEPATCHMGR
2113  npid = numNodeChild;
2114  pids = nodeChildren;
2115  #else
2116  npid = nChild;
2117  pids = child;
2118  #endif
2119 #else
2120  npid = nChild;
2121  pidsPreAllocated = 0;
2122  pids = new int[proxySpanDim];
2123  for (int i=0; i<nChild; i++) pids[i] = child[i];
2124 #endif
2125  }
2126  if (npid) { //have proxies
2127  int seq = flags.sequence;
2128  int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
2129  //begin to prepare proxy msg and send it
2130  int pdMsgPLLen = p.size();
2131  int pdMsgAvgPLLen = 0;
2132  if(flags.doMolly) {
2133  pdMsgAvgPLLen = p_avg.size();
2134  }
2135  // BEGIN LA
2136  int pdMsgVLLen = 0;
2137  if (flags.doLoweAndersen) {
2138  pdMsgVLLen = v.size();
2139  }
2140  // END LA
2141 
2142  int intRadLen = 0;
2143  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
2144  intRadLen = numAtoms * 2;
2145  }
2146 
2147  //LCPO
2148  int lcpoTypeLen = 0;
2149  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
2150  lcpoTypeLen = numAtoms;
2151  }
2152 
2153  int pdMsgPLExtLen = 0;
2154  if(doMigration || isNewProxyAdded) {
2155  pdMsgPLExtLen = pExt.size();
2156  }
2157 
2158  int cudaAtomLen = 0;
2159 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2160  cudaAtomLen = numAtoms;
2161 #elif defined(NAMD_AVXTILES)
2162  if (simParams->useAVXTiles)
2163  cudaAtomLen = (numAtoms + 15) & (~15);
2164 #elif defined(NAMD_MIC)
2165  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
2166  cudaAtomLen = numAtoms;
2167  #else
2168  cudaAtomLen = (numAtoms + 15) & (~15);
2169  #endif
2170 #endif
2171 
2172  ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgVLLen, intRadLen,
2173  lcpoTypeLen, pdMsgPLExtLen, cudaAtomLen, PRIORITY_SIZE) ProxyDataMsg; // BEGIN LA, END LA
2174 
2175  SET_PRIORITY(nmsg,seq,priority);
2176  nmsg->patch = patchID;
2177  nmsg->flags = flags;
2178  nmsg->plLen = pdMsgPLLen;
2179  //copying data to the newly created msg
2180  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
2181  memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
2182  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
2183  nmsg->avgPlLen = pdMsgAvgPLLen;
2184  if(flags.doMolly) {
2185  memcpy(nmsg->avgPositionList, p_avg.begin(), sizeof(CompAtom)*pdMsgAvgPLLen);
2186  }
2187  // BEGIN LA
2188  nmsg->vlLen = pdMsgVLLen;
2189  if (flags.doLoweAndersen) {
2190  memcpy(nmsg->velocityList, v.begin(), sizeof(CompAtom)*pdMsgVLLen);
2191  }
2192  // END LA
2193 
2194  if (flags.doGBIS && (doMigration || isNewProxyAdded)) {
2195  for (int i = 0; i < numAtoms * 2; i++) {
2196  nmsg->intRadList[i] = intRad[i];
2197  }
2198  }
2199 
2200  if (flags.doLCPO && (doMigration || isNewProxyAdded)) {
2201  for (int i = 0; i < numAtoms; i++) {
2202  nmsg->lcpoTypeList[i] = lcpoType[i];
2203  }
2204  }
2205 
2206  nmsg->plExtLen = pdMsgPLExtLen;
2207  if(doMigration || isNewProxyAdded){
2208  memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
2209  }
2210 
2211 // DMK
2212 #if defined(NAMD_CUDA) || defined(NAMD_MIC) || defined(NAMD_AVXTILES) || defined(NAMD_HIP)
2213  #ifdef NAMD_AVXTILES
2214  if (simParams->useAVXTiles)
2215  #endif
2216  {
2217  NAMD_EVENT_START(1, NamdProfileEvent::MEMCPY);
2218  memcpy(nmsg->cudaAtomList, cudaAtomPtr, sizeof(CudaAtom)*cudaAtomLen);
2219  NAMD_EVENT_STOP(1, NamdProfileEvent::MEMCPY);
2220  }
2221 #endif
2222 
2223 #if NAMD_SeparateWaters != 0
2224  //DMK - Atom Separation (water vs. non-water)
2225  nmsg->numWaterAtoms = numWaterAtoms;
2226 #endif
2227 
2228 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)
2229  nmsg->isFromImmMsgCall = 0;
2230 #endif
2231 
2232  #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
2233  DebugFileTrace *dft = DebugFileTrace::Object();
2234  dft->openTrace();
2235  dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
2236  for(int i=0; i<npid; i++) {
2237  dft->writeTrace("%d ", pids[i]);
2238  }
2239  dft->writeTrace("\n");
2240  dft->closeTrace();
2241  #endif
2242 
2243 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
2244  if (isProxyChanged || localphs == NULL)
2245  {
2246 //CmiPrintf("[%d] Build persistent: isProxyChanged: %d %p\n", CkMyPe(), isProxyChanged, localphs);
2247  //CmiAssert(isProxyChanged);
2248  if (nphs) {
2249  for (int i=0; i<nphs; i++) {
2250  CmiDestoryPersistent(localphs[i]);
2251  }
2252  delete [] localphs;
2253  }
2254  localphs = new PersistentHandle[npid];
2255  int persist_size = sizeof(envelope) + sizeof(ProxyDataMsg) + sizeof(CompAtom)*(pdMsgPLLen+pdMsgAvgPLLen+pdMsgVLLen) + intRadLen*sizeof(Real) + lcpoTypeLen*sizeof(int) + sizeof(CompAtomExt)*pdMsgPLExtLen + sizeof(CudaAtom)*cudaAtomLen + PRIORITY_SIZE/8 + 2048;
2256  for (int i=0; i<npid; i++) {
2257 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
2258  if (proxySendSpanning)
2259  localphs[i] = CmiCreateNodePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
2260  else
2261 #endif
2262  localphs[i] = CmiCreatePersistent(pids[i], persist_size, sizeof(envelope)+sizeof(ProxyDataMsg));
2263  }
2264  nphs = npid;
2265  }
2266  CmiAssert(nphs == npid && localphs != NULL);
2267  CmiUsePersistentHandle(localphs, nphs);
2268 #endif
2269  if(doMigration || isNewProxyAdded) {
2270  ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
2271  }else{
2272  ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
2273  }
2274 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE
2275  CmiUsePersistentHandle(NULL, 0);
2276 #endif
2277  isNewProxyAdded = 0;
2278  }
2279  isProxyChanged = 0;
2280  if(!pidsPreAllocated) delete [] pids;
2281  DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
2282 
2283 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
2284  positionPtrBegin = p.begin();
2285  positionPtrEnd = p.end();
2286 #endif
2287 
2288  if(flags.doMolly) {
2291  }
2292  // BEGIN LA
2293  if (flags.doLoweAndersen) {
2294  velocityPtrBegin = v.begin();
2295  velocityPtrEnd = v.end();
2296  }
2297  // END LA
2298 
2299  Patch::positionsReady(doMigration);
2300 
2301  patchMapRead = 1;
2302 
2303  // gzheng
2304  Sync::Object()->PatchReady();
2305 
2306  NAMD_EVENT_STOP(1, NamdProfileEvent::POSITIONS_READY);
2307 
2308 }
2309 
2311 {
2312  replacementForces = f;
2313 }
2314 
2315 void HomePatch::saveForce(const int ftag)
2316 {
2317  f_saved[ftag].resize(numAtoms);
2318  for ( int i = 0; i < numAtoms; ++i )
2319  {
2320  f_saved[ftag][i] = f[ftag][i];
2321  }
2322 }
2323 
2324 
2325 void HomePatch::sort_solvent_atoms() {
2326 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2327  char ssabuf[32];
2328  sprintf(ssabuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SORT_SOLVENT_ATOMS], this->getPatchID());
2329  NAMD_EVENT_START_EX(1, NamdProfileEvent::SORT_SOLVENT_ATOMS, ssabuf);
2330 #endif
2331 
2332 
2333  // goal is to move SETTLE water molecules to end of FullAtom array
2334  // first we need to count the number of SETTLE water molecules
2335  sortatom.resize(numAtoms);
2336  FullAtom * __restrict a = atom.begin();
2337  FullAtom * __restrict sa = sortatom.begin();
2338  numSolventAtoms = 0;
2339  numSoluteAtoms = 0;
2340  int i, j, k;
2341  for (i = 0; i < numAtoms; i++) {
2342  if (a[i].isWater) numSolventAtoms++;
2343  }
2344  j = 0; // starting index for solute
2345  numSoluteAtoms = numAtoms - numSolventAtoms;
2346  k = numSoluteAtoms; // starting index for water
2347  for (i = 0; i < numAtoms; i++) {
2348  if (a[i].isWater) {
2349  sa[k++] = a[i];
2350  }
2351  else {
2352  sa[j++] = a[i];
2353  }
2354  }
2355  // replace atom array by sorted array by swapping memory buffers
2356  atom.swap(sortatom);
2357  // XXX improve next line to avoid division
2358  // XXX should NOT assume 3 atoms per water
2359  numWaters = numSolventAtoms / 3;
2360 
2361  NAMD_EVENT_STOP(1, NamdProfileEvent::SORT_SOLVENT_ATOMS);
2362 
2363 }
2364 
2365 
2367  p->buffer = NULL;
2368  p->numBytes = 0;
2369  p->numAtoms = 0;
2370  p->maxAtoms = 0;
2371 }
2372 
2374  PatchDataSOA *p,
2375  int natoms,
2376  int padding
2377  ) {
2378  if (natoms > p->maxAtoms) {
2379  // maxAtoms extends numAtoms to next multiple of padding
2380  p->maxAtoms = ((natoms + padding - 1) / padding) * padding;
2381  // set numBytes to allow each array to have maxAtoms space
2382  // count up total number of double, float, and int arrays
2383  // XXX TODO : these magic numbers are bad for maintability
2385  // XXX TODO: For Colvars, tclForces, IMD or any GlobalMaster
2386  // client that requires the global forces, we need extra 3 buffers
2387  // to save the x, y and z components of the forces, and therefore
2388  // the number of double fields should be increased by 3 (from 31 to 34).
2389  int numdoubles = (simParams->colvarsOn || simParams->tclForcesOn || simParams->IMDon) ? 34: 31;
2390  p->numBytes = p->maxAtoms *
2391  (numdoubles * sizeof(double) + 9*sizeof(float) + 17*sizeof(int));
2392  }
2393  p->numAtoms = natoms;
2394  return p->numBytes;
2395 }
2396 
2398  PatchDataSOA *p,
2399  void *mybuffer
2400  ) {
2402  p->buffer = (unsigned char *) mybuffer;
2403  unsigned char *t = p->buffer;
2404  p->pos_x = (double *) t;
2405  t += p->maxAtoms * sizeof(double);
2406  p->pos_y = (double *) t;
2407  t += p->maxAtoms * sizeof(double);
2408  p->pos_z = (double *) t;
2409  t += p->maxAtoms * sizeof(double);
2410  p->charge = (float *) t;
2411  t += p->maxAtoms * sizeof(float);
2412  p->vdwType = (int *) t;
2413  t += p->maxAtoms * sizeof(int);
2414  p->partition = (int *) t;
2415  t += p->maxAtoms * sizeof(int);
2416  p->nonbondedGroupSize = (int *) t;
2417  t += p->maxAtoms * sizeof(int);
2418  p->hydrogenGroupSize = (int *) t;
2419  t += p->maxAtoms * sizeof(int);
2420  p->isWater = (int *) t;
2421  t += p->maxAtoms * sizeof(int);
2422  p->sortOrder = (int *) t;
2423  t += p->maxAtoms * sizeof(int);
2424  p->unsortOrder = (int *) t;
2425  t += p->maxAtoms * sizeof(int);
2426  p->id = (int *) t;
2427  t += p->maxAtoms * sizeof(int);
2428  p->exclId = (int *) t;
2429  t += p->maxAtoms * sizeof(int);
2430  p->sigId = (int *) t;
2431  t += p->maxAtoms * sizeof(int);
2432  p->atomFixed = (int *) t;
2433  t += p->maxAtoms * sizeof(int);
2434  p->groupFixed = (int *) t;
2435  t += p->maxAtoms * sizeof(int);
2436  p->vel_x = (double *) t;
2437  t += p->maxAtoms * sizeof(double);
2438  p->vel_y = (double *) t;
2439  t += p->maxAtoms * sizeof(double);
2440  p->vel_z = (double *) t;
2441  t += p->maxAtoms * sizeof(double);
2442  p->fixedPosition_x = (double *) t;
2443  t += p->maxAtoms * sizeof(double);
2444  p->fixedPosition_y = (double *) t;
2445  t += p->maxAtoms * sizeof(double);
2446  p->fixedPosition_z = (double *) t;
2447  t += p->maxAtoms * sizeof(double);
2448  p->recipMass = (double *) t;
2449  t += p->maxAtoms * sizeof(double);
2450  p->mass = (float *) t;
2451  t += p->maxAtoms * sizeof(float);
2452  p->langevinParam = (float *) t;
2453  t += p->maxAtoms * sizeof(float);
2454  p->status = (int *) t;
2455  t += p->maxAtoms * sizeof(int);
2456  p->transform_i = (int *) t;
2457  t += p->maxAtoms * sizeof(int);
2458  p->transform_j = (int *) t;
2459  t += p->maxAtoms * sizeof(int);
2460  p->transform_k = (int *) t;
2461  t += p->maxAtoms * sizeof(int);
2462  p->migrationGroupSize = (int *) t;
2463  t += p->maxAtoms * sizeof(int);
2464  p->rigidBondLength = (float *) t;
2465  t += p->maxAtoms * sizeof(float);
2466  p->langScalVelBBK2 = (float *) t;
2467  t += p->maxAtoms * sizeof(float);
2468  p->langScalRandBBK2 = (float *) t;
2469  t += p->maxAtoms * sizeof(float);
2470  p->gaussrand_x = (float *) t;
2471  t += p->maxAtoms * sizeof(float);
2472  p->gaussrand_y = (float *) t;
2473  t += p->maxAtoms * sizeof(float);
2474  p->gaussrand_z = (float *) t;
2475  t += p->maxAtoms * sizeof(float);
2476  p->f_normal_x = (double *) t;
2477  t += p->maxAtoms * sizeof(double);
2478  p->f_normal_y = (double *) t;
2479  t += p->maxAtoms * sizeof(double);
2480  p->f_normal_z = (double *) t;
2481  t += p->maxAtoms * sizeof(double);
2482  p->f_nbond_x = (double *) t;
2483  t += p->maxAtoms * sizeof(double);
2484  p->f_nbond_y = (double *) t;
2485  t += p->maxAtoms * sizeof(double);
2486  p->f_nbond_z = (double *) t;
2487  t += p->maxAtoms * sizeof(double);
2488  p->f_slow_x = (double *) t;
2489  t += p->maxAtoms * sizeof(double);
2490  p->f_slow_y = (double *) t;
2491  t += p->maxAtoms * sizeof(double);
2492  p->f_slow_z = (double *) t;
2493  t += p->maxAtoms * sizeof(float);
2494  if (simParams->colvarsOn || simParams->tclForcesOn || (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces))){
2495  p->f_global_x = (double *) t;
2496  t += p->maxAtoms * sizeof(double);
2497  p->f_global_y = (double *) t;
2498  t += p->maxAtoms * sizeof(double);
2499  p->f_global_z = (double *) t;
2500  t += p->maxAtoms * sizeof(double);
2501  }
2502  p->f_saved_nbond_x = (double *) t;
2503  t += p->maxAtoms * sizeof(double);
2504  p->f_saved_nbond_y = (double *) t;
2505  t += p->maxAtoms * sizeof(double);
2506  p->f_saved_nbond_z = (double *) t;
2507  t += p->maxAtoms * sizeof(double);
2508  p->f_saved_slow_x = (double *) t;
2509  t += p->maxAtoms * sizeof(double);
2510  p->f_saved_slow_y = (double *) t;
2511  t += p->maxAtoms * sizeof(double);
2512  p->f_saved_slow_z = (double *) t;
2513  t += p->maxAtoms * sizeof(double);
2514  p->velNew_x = (double *) t;
2515  t += p->maxAtoms * sizeof(double);
2516  p->velNew_y = (double *) t;
2517  t += p->maxAtoms * sizeof(double);
2518  p->velNew_z = (double *) t;
2519  t += p->maxAtoms * sizeof(double);
2520  p->posNew_x = (double *) t;
2521  t += p->maxAtoms * sizeof(double);
2522  p->posNew_y = (double *) t;
2523  t += p->maxAtoms * sizeof(double);
2524  p->posNew_z = (double *) t;
2525 }
2526 
2527 
2528 void HomePatch::copy_atoms_to_SOA() {
2529  // make sure that SOA data storage is big enough
2530  // do not resize buffer and reset internal array pointers
2531  // unless the buffer size changes
2532  size_t nbytes = patchDataSOA.numBytes;
2533  if (nbytes != PatchDataSOA_set_size(&patchDataSOA, numAtoms)) {
2534 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2535  reallocate_host<unsigned char>(&soa_buffer,&soa_buffer_size, PatchDataSOA_set_size(&patchDataSOA, numAtoms));
2536  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer);
2537 #else
2538  soa_buffer.resize( PatchDataSOA_set_size(&patchDataSOA, numAtoms) );
2539  PatchDataSOA_set_buffer( &patchDataSOA, soa_buffer.begin() );
2540 #endif
2541  }
2542 
2543  // copy data from AOS into SOA
2544  for (int i=0; i < numAtoms; i++) {
2545  patchDataSOA.pos_x[i] = atom[i].position.x;
2546  patchDataSOA.pos_y[i] = atom[i].position.y;
2547  patchDataSOA.pos_z[i] = atom[i].position.z;
2548  patchDataSOA.charge[i] = atom[i].charge;
2549  patchDataSOA.vdwType[i] = int(atom[i].vdwType);
2550  patchDataSOA.partition[i] = int(atom[i].partition);
2551  // XXX nonbondedGroupSize is always recalculated
2552  //patchDataSOA.nonbondedGroupSize[i] = int(atom[i].nonbondedGroupSize);
2553  patchDataSOA.hydrogenGroupSize[i] = int(atom[i].hydrogenGroupSize);
2554  patchDataSOA.isWater[i] = int(atom[i].isWater);
2555  // XXX sortOrder is recalculated
2556  // XXX and defined only for NAMD_CUDA or NAMD_MIC
2557  //patchDataSOA.sortOrder[i] = int(atom[i].sortOrder);
2558  patchDataSOA.id[i] = int(atom[i].id);
2559 #ifdef MEM_OPT_VERSION
2560  patchDataSOA.exclId[i] = int(atom[i].exclId);
2561  patchDataSOA.sigId[i] = int(atom[i].sigId);
2562 #endif
2563  patchDataSOA.atomFixed[i] = int(atom[i].atomFixed);
2564  patchDataSOA.groupFixed[i] = int(atom[i].groupFixed);
2565 
2566  patchDataSOA.vel_x[i] = atom[i].velocity.x;
2567  patchDataSOA.vel_y[i] = atom[i].velocity.y;
2568  patchDataSOA.vel_z[i] = atom[i].velocity.z;
2569  patchDataSOA.fixedPosition_x[i] = atom[i].fixedPosition.x;
2570  patchDataSOA.fixedPosition_y[i] = atom[i].fixedPosition.y;
2571  patchDataSOA.fixedPosition_z[i] = atom[i].fixedPosition.z;
2572  patchDataSOA.mass[i] = atom[i].mass;
2573  patchDataSOA.langevinParam[i] = atom[i].langevinParam;
2574  patchDataSOA.status[i] = atom[i].status;
2575  patchDataSOA.transform_i[i] = atom[i].transform.i;
2576  patchDataSOA.transform_j[i] = atom[i].transform.j;
2577  patchDataSOA.transform_k[i] = atom[i].transform.k;
2578  patchDataSOA.migrationGroupSize[i] = atom[i].migrationGroupSize;
2579  patchDataSOA.rigidBondLength[i] = atom[i].rigidBondLength;
2580  }
2581 
2582  // calculate quantities derived basic components
2583  calculate_derived_SOA();
2584 }
2585 
2586 
2587 void HomePatch::calculate_derived_SOA() {
2589  for (int i=0; i < numAtoms; i++) {
2590  patchDataSOA.recipMass[i] = (atom[i].mass > 0 ? 1.f / atom[i].mass : 0);
2591  }
2592  if (simParams->langevinOn) {
2593  BigReal dt_fs = simParams->dt;
2594  BigReal dt = dt_fs * 0.001; // convert timestep to ps
2595  BigReal kbT = BOLTZMANN * simParams->langevinTemp;
2596  int lesReduceTemp = (simParams->lesOn && simParams->lesReduceTemp);
2597  BigReal tempFactor = (lesReduceTemp ? 1. / simParams->lesFactor : 1);
2598  for (int i=0; i < numAtoms; i++) {
2599  BigReal dt_gamma = dt * patchDataSOA.langevinParam[i];
2600  patchDataSOA.langScalRandBBK2[i] = (float) sqrt( 2 * dt_gamma * kbT *
2601  ( atom[i].partition ? tempFactor : 1 ) * patchDataSOA.recipMass[i] );
2602  patchDataSOA.langScalVelBBK2[i] = (float) (1 / (1 + 0.5 * dt_gamma));
2603  }
2604  }
2605 }
2606 
2607 
2608 void HomePatch::copy_forces_to_SOA() {
2609  NAMD_EVENT_START(1, NamdProfileEvent::COPY_FORCES_TO_SOA);
2611  const int saveForce = (simParams->tclForcesOn || simParams->colvarsOn || (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces)));
2612  const Force *fnormal = f[Results::normal].const_begin();
2613  for (int i=0; i < numAtoms; i++) {
2614  // we set the f_normal_x,y,z to zero before calling positionsReady_SOA in ComputeObjects
2615  patchDataSOA.f_normal_x[i] += fnormal[i].x;
2616  patchDataSOA.f_normal_y[i] += fnormal[i].y;
2617  patchDataSOA.f_normal_z[i] += fnormal[i].z;
2618  }
2619  if (flags.doNonbonded) {
2620  const Force *fnbond = f[Results::nbond].const_begin();
2621  if(saveForce) {
2622  for (int i=0; i < numAtoms; i++) {
2623  Force f = fnbond[i];
2624  patchDataSOA.f_nbond_x[i] = f.x;
2625  patchDataSOA.f_nbond_y[i] = f.y;
2626  patchDataSOA.f_nbond_z[i] = f.z;
2627  patchDataSOA.f_saved_nbond_x[i] = f.x;
2628  patchDataSOA.f_saved_nbond_y[i] = f.y;
2629  patchDataSOA.f_saved_nbond_z[i] = f.z;
2630  }
2631  } else {
2632  for (int i=0; i < numAtoms; i++) {
2633  patchDataSOA.f_nbond_x[i] = fnbond[i].x;
2634  patchDataSOA.f_nbond_y[i] = fnbond[i].y;
2635  patchDataSOA.f_nbond_z[i] = fnbond[i].z;
2636  }
2637  }
2638  }
2639  else {
2640  for (int i=0; i < numAtoms; i++) {
2641  patchDataSOA.f_nbond_x[i] = 0;
2642  patchDataSOA.f_nbond_y[i] = 0;
2643  patchDataSOA.f_nbond_z[i] = 0;
2644  }
2645  // we dont set the saved force to zero if we dont have nonbonded force.
2646  }
2648  const Force *fslow = f[Results::slow].const_begin();
2649  if(saveForce) {
2650  for (int i = 0; i < numAtoms; i++) {
2651  Force f = fslow[i];
2652  patchDataSOA.f_slow_x[i] = f.x;
2653  patchDataSOA.f_slow_y[i] = f.y;
2654  patchDataSOA.f_slow_z[i] = f.z;
2655  patchDataSOA.f_saved_slow_x[i] = f.x;
2656  patchDataSOA.f_saved_slow_y[i] = f.y;
2657  patchDataSOA.f_saved_slow_z[i] = f.z;
2658  }
2659  } else {
2660  for (int i = 0; i < numAtoms; i++) {
2661  patchDataSOA.f_slow_x[i] = fslow[i].x;
2662  patchDataSOA.f_slow_y[i] = fslow[i].y;
2663  patchDataSOA.f_slow_z[i] = fslow[i].z;
2664  }
2665  }
2666  }
2667  else {
2668  for (int i = 0; i < numAtoms; i++) {
2669  patchDataSOA.f_slow_x[i] = 0;
2670  patchDataSOA.f_slow_y[i] = 0;
2671  patchDataSOA.f_slow_z[i] = 0;
2672  }
2673  // we dont set the saved force to zero if we dont have slow force.
2674  }
2675  NAMD_EVENT_STOP(1, NamdProfileEvent::COPY_FORCES_TO_SOA);
2676 }
2677 
2678 
2679 void HomePatch::copy_updates_to_AOS() {
2680  for (int i=0; i < numAtoms; i++) {
2681  atom[i].velocity.x = patchDataSOA.vel_x[i];
2682  atom[i].velocity.y = patchDataSOA.vel_y[i];
2683  atom[i].velocity.z = patchDataSOA.vel_z[i];
2684  atom[i].position.x = patchDataSOA.pos_x[i];
2685  atom[i].position.y = patchDataSOA.pos_y[i];
2686  atom[i].position.z = patchDataSOA.pos_z[i];
2687  }
2688 }
2689 
2690 
2691 void HomePatch::copy_forces_to_AOS() {
2692  switch (flags.maxForceUsed) { // intentionally fallthrough
2693  case 2: {
2694  ForceList& current_force = f[2];
2695  for (int i=0; i < numAtoms; i++) {
2696  current_force[i].x = patchDataSOA.f_slow_x[i];
2697  current_force[i].y = patchDataSOA.f_slow_y[i];
2698  current_force[i].z = patchDataSOA.f_slow_z[i];
2699  }
2700  }
2701  case 1: {
2702  ForceList& current_force = f[1];
2703  for (int i=0; i < numAtoms; i++) {
2704  current_force[i].x = patchDataSOA.f_nbond_x[i];
2705  current_force[i].y = patchDataSOA.f_nbond_y[i];
2706  current_force[i].z = patchDataSOA.f_nbond_z[i];
2707  }
2708  }
2709  case 0: {
2710  ForceList& current_force = f[0];
2711  for (int i=0; i < numAtoms; i++) {
2712  current_force[i].x = patchDataSOA.f_normal_x[i];
2713  current_force[i].y = patchDataSOA.f_normal_y[i];
2714  current_force[i].z = patchDataSOA.f_normal_z[i];
2715  }
2716  }
2717  }
2718 }
2719 
2720 
2721 void HomePatch::zero_global_forces_SOA() {
2722  memset(this->patchDataSOA.f_global_x, 0, sizeof(double)*numAtoms);
2723  memset(this->patchDataSOA.f_global_y, 0, sizeof(double)*numAtoms);
2724  memset(this->patchDataSOA.f_global_z, 0, sizeof(double)*numAtoms);
2725 }
2726 
2727 #undef DEBUG_REDISTRIB_FORCE
2728 #undef DEBUG_REDISTRIB_FORCE_VERBOSE
2729 //#define DEBUG_REDISTRIB_FORCE
2730 
2745 void HomePatch::redistrib_colinear_lp_force(
2746  Vector& fi, Vector& fj, Vector& fk,
2747  const Vector& ri, const Vector& rj, const Vector& rk,
2748  Real distance_f, Real scale_f, Tensor *virial) {
2749  BigReal distance = distance_f;
2750  BigReal scale = scale_f;
2751  Vector r_jk = rj - rk;
2752  // TODO: Add a check for small distances?
2753  BigReal r_jk_rlength = r_jk.rlength();
2754  distance *= r_jk_rlength;
2755  BigReal fdot = distance*(fi*r_jk)*r_jk_rlength*r_jk_rlength;
2756  const Vector fja = (1. + scale + distance)*fi - r_jk*fdot;
2757  const Vector fka = fi - fja;
2758  if (virial) {
2759  *virial += outer(fja, rj) + outer(fka, rk) - outer(fi, ri);
2760  }
2761  fj += fja;
2762  fk += fka;
2763  fi = 0;
2764 }
2765 
2791 #define FIX_FOR_WATER
2792 void HomePatch::redistrib_relative_lp_force(
2793  Vector& fi, Vector& fj, Vector& fk, Vector& fl,
2794  const Vector& ri, const Vector& rj, const Vector& rk, const Vector& rl,
2795  Tensor *virial, int midpt) {
2796 #ifdef DEBUG_REDISTRIB_FORCE
2797  Vector foldnet, toldnet; // old net force, old net torque
2798  foldnet = fi + fj + fk + fl;
2799  toldnet = cross(ri,fi) + cross(rj,fj) + cross(rk,fk) + cross(rl,fl);
2800 #endif
2801  Vector fja(0), fka(0), fla(0);
2802 
2803  Vector r = ri - rj;
2804  BigReal invr2 = r.rlength();
2805  invr2 *= invr2;
2806  BigReal fdot = (fi*r) * invr2;
2807  Vector fr = r * fdot;
2808 
2809  fja += fr;
2810 
2811  Vector s, t;
2812  if (midpt) {
2813  s = rj - 0.5*(rk + rl);
2814  t = 0.5*(rk - rl);
2815  }
2816  else {
2817  s = rj - rk;
2818  t = rk - rl;
2819  }
2820  BigReal invs2 = s.rlength();
2821  invs2 *= invs2;
2822 
2823  Vector p = cross(r,s);
2824 #if !defined(FIX_FOR_WATER)
2825  BigReal invp = p.rlength();
2826 #else
2827  BigReal p2 = p.length2(); // fix division by zero above
2828 #endif
2829 
2830  Vector q = cross(s,t);
2831  BigReal invq = q.rlength();
2832 
2833 #if !defined(FIX_FOR_WATER)
2834  BigReal fpdot = (fi*p) * invp;
2835  Vector fp = p * fpdot;
2836  Vector ft = fi - fr - fp;
2837 #else
2838  BigReal fpdot;
2839  Vector fp, ft;
2840  if (p2 < 1e-6) { // vector is near zero, assume no fp contribution to force
2841  fpdot = 0;
2842  fp = 0;
2843  ft = fi - fr;
2844  }
2845  else {
2846  fpdot = (fi*p) / sqrt(p2);
2847  fp = p * fpdot;
2848  ft = fi - fr - fp;
2849  }
2850 #endif
2851 
2852  fja += ft;
2853  Vector v = cross(r,ft); // torque
2854  ft = cross(s,v) * invs2;
2855  fja -= ft;
2856 
2857  if (midpt) {
2858  fka += 0.5 * ft;
2859  fla += 0.5 * ft;
2860  }
2861  else {
2862  fka += ft;
2863  }
2864 
2865  BigReal srdot = (s*r) * invs2;
2866  Vector rr = r - s*srdot;
2867  BigReal rrdot = rr.length();
2868  BigReal stdot = (s*t) * invs2;
2869  Vector tt = t - s*stdot;
2870  BigReal invtt = tt.rlength();
2871  BigReal fact = rrdot*fpdot*invtt*invq;
2872  Vector fq = q * fact;
2873 
2874  fla += fq;
2875  fja += fp*(1+srdot) + fq*stdot;
2876 
2877  ft = fq*(1+stdot) + fp*srdot;
2878 
2879  if (midpt) {
2880  fka += -0.5*ft;
2881  fla += -0.5*ft;
2882  }
2883  else {
2884  fka -= ft;
2885  }
2886 
2887  if (virial) {
2888  Tensor va = outer(fja,rj);
2889  va += outer(fka,rk);
2890  va += outer(fla,rl);
2891  va -= outer(fi,ri);
2892  *virial += va;
2893  }
2894 
2895  fi = 0; // lone pair has zero force
2896  fj += fja;
2897  fk += fka;
2898  fl += fla;
2899 
2900 #ifdef DEBUG_REDISTRIB_FORCE
2901 #define TOL_REDISTRIB 1e-4
2902  Vector fnewnet, tnewnet; // new net force, new net torque
2903  fnewnet = fi + fj + fk + fl;
2904  tnewnet = cross(ri,fi) + cross(rj,fj) + cross(rk,fk) + cross(rl,fl);
2905  Vector fdiff = fnewnet - foldnet;
2906  Vector tdiff = tnewnet - toldnet;
2907  if (fdiff.length2() > TOL_REDISTRIB*TOL_REDISTRIB) {
2908  printf("Error: force redistribution for water exceeded tolerance: "
2909  "fdiff=(%f, %f, %f)\n", fdiff.x, fdiff.y, fdiff.z);
2910  }
2911  if (tdiff.length2() > TOL_REDISTRIB*TOL_REDISTRIB) {
2912  printf("Error: torque redistribution for water exceeded tolerance: "
2913  "tdiff=(%f, %f, %f)\n", tdiff.x, tdiff.y, tdiff.z);
2914  }
2915 #endif
2916 }
2917 
2918 void HomePatch::redistrib_ap_force(Vector& fi, Vector& fj) {
2919  // final state 'atom' force must transfer to initial state atom
2920  // in single topology FEP.
2921  Vector fi_old = fi;
2922  Vector fj_old = fj;
2923  fi = fi_old + fj_old;
2924  fj = fi_old + fj_old;
2925 }
2926 
2927 /* Redistribute forces from the massless lonepair charge particle onto
2928  * the other atoms of the water.
2929  *
2930  * This is done using the same algorithm as charmm uses for TIP4P lonepairs.
2931  *
2932  * Pass by reference the forces (O H1 H2 LP) to be modified,
2933  * pass by constant reference the corresponding positions,
2934  * and a pointer to virial.
2935  */
2936 void HomePatch::redistrib_lp_water_force(
2937  Vector& f_ox, Vector& f_h1, Vector& f_h2, Vector& f_lp,
2938  const Vector& p_ox, const Vector& p_h1, const Vector& p_h2,
2939  const Vector& p_lp, Tensor *virial) {
2940 
2941 #ifdef DEBUG_REDISTRIB_FORCE
2942  // Debug information to check against results at end
2943 
2944  // total force and torque relative to origin
2945  Vector totforce, tottorque;
2946  totforce = f_ox + f_h1 + f_h2 + f_lp;
2947  tottorque = cross(f_ox, p_ox) + cross(f_h1, p_h1) + cross(f_h2, p_h2);
2948  //printf("Torque without LP is %f/%f/%f\n",
2949  // tottorque.x, tottorque.y, tottorque.z);
2950  tottorque += cross(f_lp, p_lp);
2951  //printf("Torque with LP is %f/%f/%f\n",
2952  // tottorque.x, tottorque.y, tottorque.z);
2953 #endif
2954 
2955  // accumulate force adjustments
2956  Vector fad_ox(0), fad_h(0);
2957 
2958  // Calculate the radial component of the force and add it to the oxygen
2959  Vector r_ox_lp = p_lp - p_ox;
2960  BigReal invlen2_r_ox_lp = r_ox_lp.rlength();
2961  invlen2_r_ox_lp *= invlen2_r_ox_lp;
2962  BigReal rad_factor = (f_lp * r_ox_lp) * invlen2_r_ox_lp;
2963  Vector f_rad = r_ox_lp * rad_factor;
2964 
2965  fad_ox += f_rad;
2966 
2967  // Calculate the angular component
2968  Vector r_hcom_ox = p_ox - ( (p_h1 + p_h2) * 0.5 );
2969  // Vector r_h2_h1_2 = (p_h1 - p_h2) * 0.5; // half of r_h2_h1
2970 
2971  // deviation from collinearity of charge site
2972  //Vector r_oop = cross(r_ox_lp, r_hcom_ox);
2973  //
2974  // vector out of o-h-h plane
2975  //Vector r_perp = cross(r_hcom_ox, r_h2_h1_2);
2976 
2977  // Here we assume that Ox/Lp/Hcom are linear
2978  // If you want to correct for deviations, this is the place
2979 
2980 // printf("Deviation from linearity for ox %i: %f/%f/%f\n", oxind, r_oop.x, r_oop.y, r_oop.z);
2981 
2982  Vector f_ang = f_lp - f_rad; // leave the angular component
2983 
2984  // now split this component onto the other atoms
2985  BigReal len_r_ox_lp = r_ox_lp.length();
2986  BigReal invlen_r_hcom_ox = r_hcom_ox.rlength();
2987  BigReal oxcomp = (r_hcom_ox.length() - len_r_ox_lp) * invlen_r_hcom_ox;
2988  BigReal hydcomp = 0.5 * len_r_ox_lp * invlen_r_hcom_ox;
2989 
2990  fad_ox += (f_ang * oxcomp);
2991  fad_h += (f_ang * hydcomp); // adjustment for both hydrogens
2992 
2993  // Add virial contributions
2994  if (virial) {
2995  Tensor vir = outer(fad_ox, p_ox);
2996  vir += outer(fad_h, p_h1);
2997  vir += outer(fad_h, p_h2);
2998  vir -= outer(f_lp, p_lp);
2999  *virial += vir;
3000  }
3001 
3002  //Vector zerovec(0.0, 0.0, 0.0);
3003  f_lp = 0;
3004  f_ox += fad_ox;
3005  f_h1 += fad_h;
3006  f_h2 += fad_h;
3007 
3008 #ifdef DEBUG_REDISTRIB_FORCE
3009  // Check that the total force and torque come out right
3010  Vector newforce, newtorque;
3011  newforce = f_ox + f_h1 + f_h2;
3012  newtorque = cross(f_ox, p_ox) + cross(f_h1, p_h1) + cross(f_h2, p_h2);
3013  Vector fdiff = newforce - totforce;
3014  Vector tdiff = newtorque - tottorque;
3015  BigReal error = fdiff.length();
3016  if (error > 0.0001) {
3017  printf("Error: Force redistribution for water "
3018  "exceeded force tolerance: error=%f\n", error);
3019  }
3020 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
3021  printf("Error in net force: %f\n", error);
3022 #endif
3023 
3024  error = tdiff.length();
3025  if (error > 0.0001) {
3026  printf("Error: Force redistribution for water "
3027  "exceeded torque tolerance: error=%f\n", error);
3028  }
3029 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
3030  printf("Error in net torque: %f\n", error);
3031 #endif
3032 #endif /* DEBUG */
3033 }
3034 
3053 void HomePatch::reposition_colinear_lonepair(
3054  Vector& ri, const Vector& rj, const Vector& rk,
3055  Real distance_f, Real scale_f)
3056 {
3057  BigReal distance = distance_f;
3058  BigReal scale = scale_f;
3059  Vector r_jk = rj - rk;
3060  BigReal r2 = r_jk.length2();
3061  if (r2 < 1e-10 || 100. < r2) { // same low tolerance as used in CHARMM
3062  iout << iWARN << "Large/small distance between lonepair reference atoms: ("
3063  << rj << ") (" << rk << ")\n" << endi;
3064  }
3065  ri = rj + (scale + distance*r_jk.rlength())*r_jk;
3066 }
3067 
3082 void HomePatch::reposition_relative_lonepair(
3083  Vector& ri, const Vector& rj, const Vector& rk, const Vector& rl,
3084  Real distance, Real angle, Real dihedral)
3085 {
3086  if ( (rj-rk).length2() > 100. || (rj-rl).length2() > 100. ) {
3087  iout << iWARN << "Large distance between lonepair reference atoms: ("
3088  << rj << ") (" << rk << ") (" << rl << ")\n" << endi;
3089  }
3090  BigReal r, t, p, cst, snt, csp, snp, invlen;
3091  Vector v, w, a, b, c;
3092 
3093  if (distance >= 0) {
3094  v = rk;
3095  r = distance;
3096  }
3097  else {
3098  v = 0.5*(rk + rl);
3099  r = -distance;
3100  }
3101 
3102  t = angle;
3103  p = dihedral;
3104  cst = cos(t);
3105  snt = sin(t);
3106  csp = cos(p);
3107  snp = sin(p);
3108  a = v - rj;
3109  b = rl - v;
3110  invlen = a.rlength();
3111  a *= invlen;
3112  c = cross(b, a);
3113  invlen = c.rlength();
3114  c *= invlen;
3115  b = cross(a, c);
3116  w.x = r*cst;
3117  w.y = r*snt*csp;
3118  w.z = r*snt*snp;
3119  ri.x = rj.x + w.x*a.x + w.y*b.x + w.z*c.x;
3120  ri.y = rj.y + w.x*a.y + w.y*b.y + w.z*c.y;
3121  ri.z = rj.z + w.x*a.z + w.y*b.z + w.z*c.z;
3122 }
3123 
3124 void HomePatch::reposition_alchpair(Vector& ri, Vector& rj, Mass& Mi, Mass& Mj) {
3125  Vector ri_old, rj_old;
3126  Mass mi, mj;
3127  ri_old.x = ri.x;
3128  ri_old.y = ri.y;
3129  ri_old.z = ri.z;
3130  rj_old.x = rj.x;
3131  rj_old.y = rj.y;
3132  rj_old.z = rj.z;
3133 
3134  mi = Mi; mj = Mj;
3135  ri.x = (mi * ri_old.x + mj * rj_old.x)/(mi + mj);
3136  ri.y = (mi * ri_old.y + mj * rj_old.y)/(mi + mj);
3137  ri.z = (mi * ri_old.z + mj * rj_old.z)/(mi + mj);
3138  rj.x = ri.x;
3139  rj.y = ri.y;
3140  rj.z = ri.z;
3141 }
3142 
3143 void HomePatch::reposition_all_lonepairs(void) {
3144  // ASSERT: simParams->lonepairs == TRUE
3145  for (int i=0; i < numAtoms; i++) {
3146  if (atom[i].mass < 0.01) {
3147  // found a lone pair
3148  AtomID aid = atom[i].id; // global atom ID of lp
3149  Lphost *lph = Node::Object()->molecule->get_lphost(aid); // its lphost
3150  if (lph == NULL) {
3151  char errmsg[512];
3152  sprintf(errmsg, "reposition lone pairs: "
3153  "no Lphost exists for LP %d\n", aid);
3154  NAMD_die(errmsg);
3155  }
3156  LocalID j = AtomMap::Object()->localID(lph->atom2);
3157  LocalID k = AtomMap::Object()->localID(lph->atom3);
3158  LocalID l = AtomMap::Object()->localID(lph->atom4);
3159  if (j.pid != patchID || k.pid != patchID || l.pid != patchID) {
3160  char errmsg[512];
3161  sprintf(errmsg, "reposition lone pairs: "
3162  "LP %d has some Lphost atom off patch\n", aid);
3163  NAMD_die(errmsg);
3164  }
3165  // reposition this lone pair
3166  if (lph->numhosts == 2) {
3167  reposition_colinear_lonepair(atom[i].position, atom[j.index].position,
3168  atom[k.index].position, lph->distance, lph->angle);
3169  }
3170  else if (lph->numhosts == 3) {
3171  reposition_relative_lonepair(atom[i].position, atom[j.index].position,
3172  atom[k.index].position, atom[l.index].position,
3173  lph->distance, lph->angle, lph->dihedral);
3174  }
3175  }
3176  }
3177 }
3178 
3179 void HomePatch::reposition_all_alchpairs(void) {
3180  Molecule *mol = Node::Object()->molecule;
3181  int numFepInitial = mol->numFepInitial;
3182  int alchPair_id;
3183  for (int i = 0; i < numAtoms; i++) {
3184  if (atom[i].partition == 4 ) {
3185  alchPair_id = atom[i].id + numFepInitial; //global id of the pair atom.
3186  LocalID j = AtomMap::Object()->localID(alchPair_id);
3187  reposition_alchpair(atom[i].position, atom[j.index].position, atom[i].mass, atom[j.index].mass);
3188  }
3189  }
3190 }
3191 
3192 void HomePatch::swm4_omrepos(Vector *ref, Vector *pos, Vector *vel,
3193  BigReal invdt) {
3194  // Reposition lonepair (Om) particle of Drude SWM4 water.
3195  // Same comments apply as to tip4_omrepos(), but the ordering of atoms
3196  // is different: O, D, LP, H1, H2.
3197  pos[2] = pos[0] + (0.5 * (pos[3] + pos[4]) - pos[0]) * (r_om / r_ohc);
3198  // Now, adjust velocity of particle to get it to appropriate place
3199  // during next integration "drift-step"
3200  if (invdt != 0) {
3201  vel[2] = (pos[2] - ref[2]) * invdt;
3202  }
3203  // No virial correction needed since lonepair is massless
3204 }
3205 
3206 void HomePatch::tip4_omrepos(Vector* ref, Vector* pos, Vector* vel, BigReal invdt) {
3207  /* Reposition the om particle of a tip4p water
3208  * A little geometry shows that the appropriate position is given by
3209  * R_O + (1 / 2 r_ohc) * ( 0.5 (R_H1 + R_H2) - R_O )
3210  * Here r_om is the distance from the oxygen to Om site, and r_ohc
3211  * is the altitude from the oxygen to the hydrogen center of mass
3212  * Those quantities are precalculated upon initialization of HomePatch
3213  *
3214  * Ordering of TIP4P atoms: O, H1, H2, LP.
3215  */
3216 
3217  //printf("rom/rohc are %f %f and invdt is %f\n", r_om, r_ohc, invdt);
3218  //printf("Other positions are: \n 0: %f %f %f\n 1: %f %f %f\n 2: %f %f %f\n", pos[0].x, pos[0].y, pos[0].z, pos[1].x, pos[1].y, pos[1].z, pos[2].x, pos[2].y, pos[2].z);
3219  pos[3] = pos[0] + (0.5 * (pos[1] + pos[2]) - pos[0]) * (r_om / r_ohc);
3220  //printf("New position for lp is %f %f %f\n", pos[3].x, pos[3].y, pos[3].z);
3221 
3222  // Now, adjust the velocity of the particle to get it to the appropriate place
3223  if (invdt != 0) {
3224  vel[3] = (pos[3] - ref[3]) * invdt;
3225  }
3226 
3227  // No virial correction needed, since this is a massless particle
3228  return;
3229 }
3230 
3231 void HomePatch::redistrib_lonepair_forces(const int ftag, Tensor *virial) {
3232  // ASSERT: simParams->lonepairs == TRUE
3233  ForceList *f_mod = f;
3234  for (int i = 0; i < numAtoms; i++) {
3235  if (atom[i].mass < 0.01) {
3236  // found a lone pair
3237  AtomID aid = atom[i].id; // global atom ID of lp
3238  Lphost *lph = Node::Object()->molecule->get_lphost(aid); // its lphost
3239  if (lph == NULL) {
3240  char errmsg[512];
3241  sprintf(errmsg, "redistrib lone pair forces: "
3242  "no Lphost exists for LP %d\n", aid);
3243  NAMD_die(errmsg);
3244  }
3245  LocalID j = AtomMap::Object()->localID(lph->atom2);
3246  LocalID k = AtomMap::Object()->localID(lph->atom3);
3247  LocalID l = AtomMap::Object()->localID(lph->atom4);
3248  if (j.pid != patchID || k.pid != patchID || l.pid != patchID) {
3249  char errmsg[512];
3250  sprintf(errmsg, "redistrib lone pair forces: "
3251  "LP %d has some Lphost atom off patch\n", aid);
3252  NAMD_die(errmsg);
3253  }
3254  // redistribute forces from this lone pair
3255  if (lph->numhosts == 2) {
3256  redistrib_colinear_lp_force(f_mod[ftag][i], f_mod[ftag][j.index],
3257  f_mod[ftag][k.index], atom[i].position, atom[j.index].position,
3258  atom[k.index].position, lph->distance, lph->angle, virial);
3259  }
3260  else if (lph->numhosts == 3) {
3261  int midpt = (lph->distance < 0);
3262  redistrib_relative_lp_force(f_mod[ftag][i], f_mod[ftag][j.index],
3263  f_mod[ftag][k.index], f_mod[ftag][l.index],
3264  atom[i].position, atom[j.index].position,
3265  atom[k.index].position, atom[l.index].position, virial, midpt);
3266  }
3267  }
3268  }
3269 }
3270 
3271 void HomePatch::redistrib_alchpair_forces(const int ftag) { //Virial unchanged
3273  Molecule *mol = Node::Object()->molecule;
3274  ForceList *f_mod = f;
3275  int numFepInitial = mol->numFepInitial;
3276  BigReal lambda = simParams->alchLambda;
3277  int alchPair_id;
3278  for (int i = 0; i < numAtoms; i++) {
3279  if (atom[i].partition == 4 ) {
3280  alchPair_id = atom[i].id + numFepInitial; //global id of the pair atom.
3281  LocalID j = AtomMap::Object()->localID(alchPair_id);
3282  redistrib_ap_force(f_mod[ftag][i],f_mod[ftag][j.index]);
3283  }
3284  }
3285 }
3286 
3287 void HomePatch::redistrib_swm4_forces(const int ftag, Tensor *virial) {
3288  // Loop over the patch's atoms and apply the appropriate corrections
3289  // to get all forces off of lone pairs
3290  ForceList *f_mod = f;
3291  for (int i = 0; i < numAtoms; i++) {
3292  if (atom[i].mass < 0.01) {
3293  // found lonepair
3294  redistrib_lp_water_force(f_mod[ftag][i-2], f_mod[ftag][i+1],
3295  f_mod[ftag][i+2], f_mod[ftag][i],
3296  atom[i-2].position, atom[i+1].position,
3297  atom[i+2].position, atom[i].position, virial);
3298  }
3299  }
3300 }
3301 
3302 void HomePatch::redistrib_tip4p_forces(const int ftag, Tensor* virial) {
3303  // Loop over the patch's atoms and apply the appropriate corrections
3304  // to get all forces off of lone pairs
3305  // Atom ordering: O H1 H2 LP
3306  ForceList *f_mod =f;
3307  for (int i=0; i<numAtoms; i++) {
3308  if (atom[i].mass < 0.01) {
3309  // found lonepair
3310  redistrib_lp_water_force(f_mod[ftag][i-3], f_mod[ftag][i-2],
3311  f_mod[ftag][i-1], f_mod[ftag][i],
3312  atom[i-3].position, atom[i-2].position,
3313  atom[i-1].position, atom[i].position, virial);
3314  }
3315  }
3316 }
3317 
3318 
3320  FullAtom * __restrict atom_arr,
3321  const Force * __restrict force_arr,
3322  const BigReal dt,
3323  int num_atoms
3324  ) {
3326 
3327  if ( simParams->fixedAtomsOn ) {
3328  for ( int i = 0; i < num_atoms; ++i ) {
3329  if ( atom_arr[i].atomFixed ) {
3330  atom_arr[i].velocity = 0;
3331  } else {
3332  BigReal dt_mass = dt * atom_arr[i].recipMass; // dt/mass
3333  atom_arr[i].velocity.x += force_arr[i].x * dt_mass;
3334  atom_arr[i].velocity.y += force_arr[i].y * dt_mass;
3335  atom_arr[i].velocity.z += force_arr[i].z * dt_mass;
3336  }
3337  }
3338  } else {
3339  for ( int i = 0; i < num_atoms; ++i ) {
3340  BigReal dt_mass = dt * atom_arr[i].recipMass; // dt/mass
3341  atom_arr[i].velocity.x += force_arr[i].x * dt_mass;
3342  atom_arr[i].velocity.y += force_arr[i].y * dt_mass;
3343  atom_arr[i].velocity.z += force_arr[i].z * dt_mass;
3344  }
3345  }
3346 }
3347 
3349  FullAtom * __restrict atom_arr,
3350  const Force * __restrict force_arr1,
3351  const Force * __restrict force_arr2,
3352  const Force * __restrict force_arr3,
3353  const BigReal dt1,
3354  const BigReal dt2,
3355  const BigReal dt3,
3356  int num_atoms
3357  ) {
3359 
3360  if ( simParams->fixedAtomsOn ) {
3361  for ( int i = 0; i < num_atoms; ++i ) {
3362  if ( atom_arr[i].atomFixed ) {
3363  atom_arr[i].velocity = 0;
3364  } else {
3365  BigReal rmass = atom_arr[i].recipMass; // 1/mass
3366  atom_arr[i].velocity.x += (force_arr1[i].x*dt1
3367  + force_arr2[i].x*dt2 + force_arr3[i].x*dt3) * rmass;
3368  atom_arr[i].velocity.y += (force_arr1[i].y*dt1
3369  + force_arr2[i].y*dt2 + force_arr3[i].y*dt3) * rmass;
3370  atom_arr[i].velocity.z += (force_arr1[i].z*dt1
3371  + force_arr2[i].z*dt2 + force_arr3[i].z*dt3) * rmass;
3372  }
3373  }
3374  } else {
3375  for ( int i = 0; i < num_atoms; ++i ) {
3376  BigReal rmass = atom_arr[i].recipMass; // 1/mass
3377  atom_arr[i].velocity.x += (force_arr1[i].x*dt1
3378  + force_arr2[i].x*dt2 + force_arr3[i].x*dt3) * rmass;
3379  atom_arr[i].velocity.y += (force_arr1[i].y*dt1
3380  + force_arr2[i].y*dt2 + force_arr3[i].y*dt3) * rmass;
3381  atom_arr[i].velocity.z += (force_arr1[i].z*dt1
3382  + force_arr2[i].z*dt2 + force_arr3[i].z*dt3) * rmass;
3383  }
3384  }
3385 }
3386 
3388  FullAtom * __restrict atom_arr,
3389  const BigReal dt,
3390  int num_atoms
3391  ) {
3393  if ( simParams->fixedAtomsOn ) {
3394  for ( int i = 0; i < num_atoms; ++i ) {
3395  if ( ! atom_arr[i].atomFixed ) {
3396  atom_arr[i].position.x += atom_arr[i].velocity.x * dt;
3397  atom_arr[i].position.y += atom_arr[i].velocity.y * dt;
3398  atom_arr[i].position.z += atom_arr[i].velocity.z * dt;
3399  }
3400  }
3401  } else {
3402  for ( int i = 0; i < num_atoms; ++i ) {
3403  atom_arr[i].position.x += atom_arr[i].velocity.x * dt;
3404  atom_arr[i].position.y += atom_arr[i].velocity.y * dt;
3405  atom_arr[i].position.z += atom_arr[i].velocity.z * dt;
3406  }
3407  }
3408 }
3409 
3410 int HomePatch::hardWallDrude(const BigReal timestep, Tensor *virial,
3411  SubmitReduction *ppreduction)
3412 {
3413  Molecule *mol = Node::Object()->molecule;
3415  const BigReal kbt=BOLTZMANN*simParams->drudeTemp;
3416  const int fixedAtomsOn = simParams->fixedAtomsOn;
3417  const BigReal dt = timestep / TIMEFACTOR;
3418  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3419  int i, ia, ib, j;
3420  int dieOnError = simParams->rigidDie;
3421  Tensor wc; // constraint virial
3422  BigReal idz, zmin, delta_T, maxtime=timestep,v_Bond;
3423  int nslabs;
3424 
3425  // start data for hard wall boundary between drude and its host atom
3426  // static int Count=0;
3427  int Idx;
3428  double r_wall, r_wall_SQ, rab, rab_SQ, dr, mass_a, mass_b, mass_sum;
3429  Vector v_ab, vb_1, vp_1, vb_2, vp_2, new_vel_a, new_vel_b, new_pos_a, new_pos_b, *new_pos, *new_vel;
3430  double dot_v_r_1, dot_v_r_2;
3431  double vb_cm, dr_a, dr_b;
3432  // end data for hard wall boundary between drude and its host atom
3433 
3434  // start calculation of hard wall boundary between drude and its host atom
3435  if (simParams->drudeHardWallOn) {
3436  if (ppreduction) {
3437  nslabs = simParams->pressureProfileSlabs;
3438  idz = nslabs/lattice.c().z;
3439  zmin = lattice.origin().z - 0.5*lattice.c().z;
3440  }
3441 
3442  r_wall = simParams->drudeBondLen;
3443  r_wall_SQ = r_wall*r_wall;
3444  // Count++;
3445  for (i=1; i<numAtoms; i++) {
3446  if ( (0.05 < atom[i].mass) && ((atom[i].mass < 1.0)) ) { // drude particle
3447  ia = i-1;
3448  ib = i;
3449 
3450  v_ab = atom[ib].position - atom[ia].position;
3451  rab_SQ = v_ab.x*v_ab.x + v_ab.y*v_ab.y + v_ab.z*v_ab.z;
3452 
3453  if (rab_SQ > r_wall_SQ) { // to impose the hard wall constraint
3454  rab = sqrt(rab_SQ);
3455  if ( (rab > (2.0*r_wall)) && dieOnError ) { // unexpected situation
3456  iout << iERROR << "HardWallDrude> "
3457  << "The drude is too far away from atom "
3458  << (atom[ia].id + 1) << " d = " << rab << "!\n" << endi;
3459  return -1; // triggers early exit
3460  }
3461 
3462  v_ab.x /= rab;
3463  v_ab.y /= rab;
3464  v_ab.z /= rab;
3465 
3466  if ( fixedAtomsOn && atom[ia].atomFixed ) { // the heavy atom is fixed
3467  if (atom[ib].atomFixed) { // the drude is fixed too
3468  continue;
3469  }
3470  else { // only the heavy atom is fixed
3471  dot_v_r_2 = atom[ib].velocity.x*v_ab.x
3472  + atom[ib].velocity.y*v_ab.y + atom[ib].velocity.z*v_ab.z;
3473  vb_2 = dot_v_r_2 * v_ab;
3474  vp_2 = atom[ib].velocity - vb_2;
3475 
3476  dr = rab - r_wall;
3477  if(dot_v_r_2 == 0.0) {
3478  delta_T = maxtime;
3479  }
3480  else {
3481  delta_T = dr/fabs(dot_v_r_2); // the time since the collision occurs
3482  if(delta_T > maxtime ) delta_T = maxtime; // make sure it is not crazy
3483  }
3484 
3485  dot_v_r_2 = -dot_v_r_2*sqrt(kbt/atom[ib].mass)/fabs(dot_v_r_2);
3486 
3487  vb_2 = dot_v_r_2 * v_ab;
3488 
3489  new_vel_a = atom[ia].velocity;
3490  new_vel_b = vp_2 + vb_2;
3491 
3492  dr_b = -dr + delta_T*dot_v_r_2; // L = L_0 + dT *v_new, v was flipped
3493 
3494  new_pos_a = atom[ia].position;
3495  new_pos_b = atom[ib].position + dr_b*v_ab; // correct the position
3496  }
3497  }
3498  else {
3499  mass_a = atom[ia].mass;
3500  mass_b = atom[ib].mass;
3501  mass_sum = mass_a+mass_b;
3502 
3503  dot_v_r_1 = atom[ia].velocity.x*v_ab.x
3504  + atom[ia].velocity.y*v_ab.y + atom[ia].velocity.z*v_ab.z;
3505  vb_1 = dot_v_r_1 * v_ab;
3506  vp_1 = atom[ia].velocity - vb_1;
3507 
3508  dot_v_r_2 = atom[ib].velocity.x*v_ab.x
3509  + atom[ib].velocity.y*v_ab.y + atom[ib].velocity.z*v_ab.z;
3510  vb_2 = dot_v_r_2 * v_ab;
3511  vp_2 = atom[ib].velocity - vb_2;
3512 
3513  vb_cm = (mass_a*dot_v_r_1 + mass_b*dot_v_r_2)/mass_sum;
3514 
3515  dot_v_r_1 -= vb_cm;
3516  dot_v_r_2 -= vb_cm;
3517 
3518  dr = rab - r_wall;
3519 
3520  if(dot_v_r_2 == dot_v_r_1) {
3521  delta_T = maxtime;
3522  }
3523  else {
3524  delta_T = dr/fabs(dot_v_r_2 - dot_v_r_1); // the time since the collision occurs
3525  if(delta_T > maxtime ) delta_T = maxtime; // make sure it is not crazy
3526  }
3527 
3528  // the relative velocity between ia and ib. Drawn according to T_Drude
3529  v_Bond = sqrt(kbt/mass_b);
3530 
3531  // reflect the velocity along bond vector and scale down
3532  dot_v_r_1 = -dot_v_r_1*v_Bond*mass_b/(fabs(dot_v_r_1)*mass_sum);
3533  dot_v_r_2 = -dot_v_r_2*v_Bond*mass_a/(fabs(dot_v_r_2)*mass_sum);
3534 
3535  dr_a = dr*mass_b/mass_sum + delta_T*dot_v_r_1;
3536  dr_b = -dr*mass_a/mass_sum + delta_T*dot_v_r_2;
3537 
3538  new_pos_a = atom[ia].position + dr_a*v_ab; // correct the position
3539  new_pos_b = atom[ib].position + dr_b*v_ab;
3540  // atom[ia].position += (dr_a*v_ab); // correct the position
3541  // atom[ib].position += (dr_b*v_ab);
3542 
3543  dot_v_r_1 += vb_cm;
3544  dot_v_r_2 += vb_cm;
3545 
3546  vb_1 = dot_v_r_1 * v_ab;
3547  vb_2 = dot_v_r_2 * v_ab;
3548 
3549  new_vel_a = vp_1 + vb_1;
3550  new_vel_b = vp_2 + vb_2;
3551  }
3552 
3553  int ppoffset, partition;
3554  if ( invdt == 0 ) {
3555  atom[ia].position = new_pos_a;
3556  atom[ib].position = new_pos_b;
3557  }
3558  else if ( virial == 0 ) {
3559  atom[ia].velocity = new_vel_a;
3560  atom[ib].velocity = new_vel_b;
3561  }
3562  else {
3563  for ( j = 0; j < 2; j++ ) {
3564  if (j==0) { // atom ia, heavy atom
3565  Idx = ia;
3566  new_pos = &new_pos_a;
3567  new_vel = &new_vel_a;
3568  }
3569  else if (j==1) { // atom ib, drude
3570  Idx = ib;
3571  new_pos = &new_pos_b;
3572  new_vel = &new_vel_b;
3573  }
3574  Force df = (*new_vel - atom[Idx].velocity) *
3575  ( atom[Idx].mass * invdt );
3576  Tensor vir = outer(df, atom[Idx].position);
3577  wc += vir;
3578  atom[Idx].velocity = *new_vel;
3579  atom[Idx].position = *new_pos;
3580 
3581  if (ppreduction) {
3582  if (!j) {
3583  BigReal z = new_pos->z;
3584  int partition = atom[Idx].partition;
3585  int slab = (int)floor((z-zmin)*idz);
3586  if (slab < 0) slab += nslabs;
3587  else if (slab >= nslabs) slab -= nslabs;
3588  ppoffset = 3*(slab + nslabs*partition);
3589  }
3590  ppreduction->item(ppoffset ) += vir.xx;
3591  ppreduction->item(ppoffset+1) += vir.yy;
3592  ppreduction->item(ppoffset+2) += vir.zz;
3593  }
3594 
3595  }
3596  }
3597  }
3598  }
3599  }
3600 
3601  // if ( (Count>10000) && (Count%10==0) ) {
3602  // v_ab = atom[1].position - atom[0].position;
3603  // rab_SQ = v_ab.x*v_ab.x + v_ab.y*v_ab.y + v_ab.z*v_ab.z;
3604  // iout << "DBG_R: " << Count << " " << sqrt(rab_SQ) << "\n" << endi;
3605  // }
3606 
3607  }
3608 
3609  // end calculation of hard wall boundary between drude and its host atom
3610 
3611  if ( dt && virial ) *virial += wc;
3612 
3613  return 0;
3614 }
3615 
3617 #ifdef DEBUG_MINIMIZE
3618  if (patchID == 0) {
3619  printf("Step %d, patch %d: buildRattleList()\n",
3620  flags.step, (int)patchID);
3621  }
3622 #endif
3624  const int fixedAtomsOn = simParams->fixedAtomsOn;
3625  const int useSettle = simParams->useSettle;
3626 
3627  // Re-size to containt numAtoms elements
3628  velNew.resize(numAtoms);
3629  posNew.resize(numAtoms);
3630 
3631  // Size of a hydrogen group for water
3632  const WaterModel watmodel = simParams->watmodel;
3633  const int wathgsize = getWaterModelGroupSize(watmodel);
3634 
3635  // Initialize the settle algorithm with water parameters
3636  // settle1() assumes all waters are identical,
3637  // and will generate bad results if they are not.
3638  // XXX this will move to Molecule::build_atom_status when that
3639  // version is debugged
3640  if ( ! settle_initialized ) {
3641  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
3642  // find a water
3643  if (atom[ig].rigidBondLength > 0) {
3644  int oatm;
3645  if (watmodel == WaterModel::SWM4) {
3646  oatm = ig+3; // skip over Drude and Lonepair
3647  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
3648  // ig, atom[ig].mass, oatm, atom[oatm].mass);
3649  }
3650  else {
3651  oatm = ig+1;
3652  // Avoid using the Om site to set this by mistake
3653  if (atom[ig].mass < 0.5 || atom[ig+1].mass < 0.5) {
3654  oatm += 1;
3655  }
3656  }
3657 
3658  // initialize settle water parameters
3659  settle1init(atom[ig].mass, atom[oatm].mass,
3660  atom[ig].rigidBondLength,
3661  atom[oatm].rigidBondLength,
3662  settle_mO, settle_mH,
3663  settle_mOrmT, settle_mHrmT, settle_ra,
3664  settle_rb, settle_rc, settle_rra);
3665  settle_initialized = 1;
3666  break; // done with init
3667  }
3668  }
3669  }
3670 
3671  Vector ref[10];
3672  BigReal rmass[10];
3673  BigReal dsq[10];
3674  int fixed[10];
3675  int ial[10];
3676  int ibl[10];
3677 
3678  int numSettle = 0;
3679  int numRattle = 0;
3680  int posRattleParam = 0;
3681 
3682  settleList.clear();
3683  rattleList.clear();
3684  noconstList.clear();
3685  rattleParam.clear();
3686 
3687  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
3688  int hgs = atom[ig].hydrogenGroupSize;
3689  if ( hgs == 1 ) {
3690  // only one atom in group
3691  noconstList.push_back(ig);
3692  continue;
3693  }
3694  int anyfixed = 0;
3695  for (int i = 0; i < hgs; ++i ) {
3696  ref[i] = atom[ig+i].position;
3697  rmass[i] = (atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.);
3698  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
3699  if ( fixed[i] ) {
3700  anyfixed = 1;
3701  rmass[i] = 0.;
3702  }
3703  }
3704  int icnt = 0;
3705  BigReal tmp = atom[ig].rigidBondLength;
3706  if (tmp > 0.0) { // for water
3707  if (hgs != wathgsize) {
3708  char errmsg[256];
3709  sprintf(errmsg, "Water molecule starting with atom %d contains %d atoms "
3710  "but the specified water model requires %d atoms.\n",
3711  atom[ig].id+1, hgs, wathgsize);
3712  NAMD_die(errmsg);
3713  }
3714  // Use SETTLE for water unless some of the water atoms are fixed,
3715  if (useSettle && !anyfixed) {
3716  // Store to Settle -list
3717  settleList.push_back(ig);
3718  continue;
3719  }
3720  if ( !(fixed[1] && fixed[2]) ) {
3721  dsq[icnt] = tmp * tmp;
3722  ial[icnt] = 1;
3723  ibl[icnt] = 2;
3724  ++icnt;
3725  }
3726  } // if (tmp > 0.0)
3727  for (int i = 1; i < hgs; ++i ) { // normal bonds to mother atom
3728  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
3729  if ( !(fixed[0] && fixed[i]) ) {
3730  dsq[icnt] = tmp * tmp;
3731  ial[icnt] = 0;
3732  ibl[icnt] = i;
3733  ++icnt;
3734  }
3735  }
3736  }
3737  if ( icnt == 0 ) {
3738  // no constraints
3739  noconstList.push_back(ig);
3740  continue;
3741  }
3742  // Store to Rattle -list
3743  RattleList rattleListElem;
3744  rattleListElem.ig = ig;
3745  rattleListElem.icnt = icnt;
3746  rattleList.push_back(rattleListElem);
3747  for (int i = 0; i < icnt; ++i ) {
3748  int a = ial[i];
3749  int b = ibl[i];
3750  RattleParam rattleParamElem;
3751  rattleParamElem.ia = a;
3752  rattleParamElem.ib = b;
3753  rattleParamElem.dsq = dsq[i];
3754  rattleParamElem.rma = rmass[a];
3755  rattleParamElem.rmb = rmass[b];
3756  rattleParam.push_back(rattleParamElem);
3757  }
3758  //adding dummy atom in the hydrogen group
3759  for (int i = icnt; i < 4; ++i ) {
3760  RattleParam rattleParamElem;
3761  rattleParamElem.ia = 0;
3762  rattleParamElem.ib = 0;
3763  rattleParamElem.dsq = 0;
3764  rattleParamElem.rma = 0;
3765  rattleParamElem.rmb = 0;
3766  rattleParam.push_back(rattleParamElem);
3767  }
3768 #if 0
3769  for(int i = 0; i < 4; ++i) {
3770  std::cout << rattleParam[i].ia << " " << rattleParam[i].ib << std::endl;
3771  }
3772  std::cout << std::endl;
3773 #endif
3774  }
3775 
3776 }
3777 
3778 void HomePatch::addRattleForce(const BigReal invdt, Tensor& wc) {
3779  for (int ig = 0; ig < numAtoms; ++ig ) {
3780  Force df = (velNew[ig] - atom[ig].velocity) * ( atom[ig].mass * invdt );
3781  Tensor vir = outer(df, atom[ig].position);
3782  wc += vir;
3783  f[Results::normal][ig] += df;
3784  atom[ig].velocity = velNew[ig];
3785  }
3786 }
3787 
3788 int HomePatch::rattle1(const BigReal timestep, Tensor *virial,
3789  SubmitReduction *ppreduction) {
3790 
3792  if (simParams->watmodel != WaterModel::TIP3 || ppreduction) {
3793  // Call old rattle1 -method instead
3794  return rattle1old(timestep, virial, ppreduction);
3795  }
3796 
3797  if (!rattleListValid) {
3798  buildRattleList();
3799  rattleListValid = true;
3800  }
3801 
3802  const int fixedAtomsOn = simParams->fixedAtomsOn;
3803  const int useSettle = simParams->useSettle;
3804  const BigReal dt = timestep / TIMEFACTOR;
3805  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3806  const BigReal tol2 = 2.0 * simParams->rigidTol;
3807  int maxiter = simParams->rigidIter;
3808  int dieOnError = simParams->rigidDie;
3809 
3810  Vector ref[10]; // reference position
3811  Vector pos[10]; // new position
3812  Vector vel[10]; // new velocity
3813 
3814  // Manual un-roll
3815  int n = (settleList.size()/2)*2;
3816  for (int j=0;j < n;j+=2) {
3817  int ig;
3818  ig = settleList[j];
3819  for (int i = 0; i < 3; ++i ) {
3820  ref[i] = atom[ig+i].position;
3821  pos[i] = atom[ig+i].position + atom[ig+i].velocity * dt;
3822  }
3823  ig = settleList[j+1];
3824  for (int i = 0; i < 3; ++i ) {
3825  ref[i+3] = atom[ig+i].position;
3826  pos[i+3] = atom[ig+i].position + atom[ig+i].velocity * dt;
3827  }
3828  settle1_SIMD<2>(ref, pos,
3829  settle_mOrmT, settle_mHrmT, settle_ra,
3830  settle_rb, settle_rc, settle_rra);
3831 
3832  ig = settleList[j];
3833  for (int i = 0; i < 3; ++i ) {
3834  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3835  posNew[ig+i] = pos[i];
3836  }
3837  ig = settleList[j+1];
3838  for (int i = 0; i < 3; ++i ) {
3839  velNew[ig+i] = (pos[i+3] - ref[i+3])*invdt;
3840  posNew[ig+i] = pos[i+3];
3841  }
3842 
3843  }
3844 
3845  if (settleList.size() % 2) {
3846  int ig = settleList[settleList.size()-1];
3847  for (int i = 0; i < 3; ++i ) {
3848  ref[i] = atom[ig+i].position;
3849  pos[i] = atom[ig+i].position + atom[ig+i].velocity * dt;
3850  }
3851  settle1_SIMD<1>(ref, pos,
3852  settle_mOrmT, settle_mHrmT, settle_ra,
3853  settle_rb, settle_rc, settle_rra);
3854  for (int i = 0; i < 3; ++i ) {
3855  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3856  posNew[ig+i] = pos[i];
3857  }
3858  }
3859 
3860  int posParam = 0;
3861  for (int j=0;j < rattleList.size();++j) {
3862 
3863  BigReal refx[10];
3864  BigReal refy[10];
3865  BigReal refz[10];
3866 
3867  BigReal posx[10];
3868  BigReal posy[10];
3869  BigReal posz[10];
3870 
3871  int ig = rattleList[j].ig;
3872  int icnt = rattleList[j].icnt;
3873  int hgs = atom[ig].hydrogenGroupSize;
3874  for (int i = 0; i < hgs; ++i ) {
3875  ref[i] = atom[ig+i].position;
3876  pos[i] = atom[ig+i].position;
3877  if (!(fixedAtomsOn && atom[ig+i].atomFixed)) {
3878  pos[i] += atom[ig+i].velocity * dt;
3879  }
3880  refx[i] = ref[i].x;
3881  refy[i] = ref[i].y;
3882  refz[i] = ref[i].z;
3883  posx[i] = pos[i].x;
3884  posy[i] = pos[i].y;
3885  posz[i] = pos[i].z;
3886  }
3887 
3888  bool done;
3889  bool consFailure;
3890  if (icnt == 1) {
3891  rattlePair<1>(&rattleParam[posParam],
3892  refx, refy, refz,
3893  posx, posy, posz,
3894  consFailure);
3895  done = true;
3896  } else {
3897  if (simParams->mshakeOn) {
3898  MSHAKEIterate(icnt, &rattleParam[posParam],
3899  refx, refy, refz,
3900  posx, posy, posz,
3901  tol2, maxiter,
3902  done, consFailure);
3903  }
3904  else if(simParams->lincsOn) {
3905  LINCS(icnt, &rattleParam[posParam],
3906  refx, refy, refz,
3907  posx, posy, posz,
3908  tol2, maxiter, done, consFailure);
3909  }
3910  else
3911  rattleN(icnt, &rattleParam[posParam],
3912  refx, refy, refz,
3913  posx, posy, posz,
3914  tol2, maxiter,
3915  done, consFailure);
3916  }
3917 
3918  // Advance position in rattleParam
3919  //posParam += icnt;
3920  posParam += 4;
3921  for (int i = 0; i < hgs; ++i ) {
3922  pos[i].x = posx[i];
3923  pos[i].y = posy[i];
3924  pos[i].z = posz[i];
3925  }
3926 
3927  for (int i = 0; i < hgs; ++i ) {
3928  velNew[ig+i] = (pos[i] - ref[i])*invdt;
3929  posNew[ig+i] = pos[i];
3930  }
3931 
3932  if ( consFailure ) {
3933  if ( dieOnError ) {
3934  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
3935  << (atom[ig].id + 1) << "!\n" << endi;
3936  return -1; // triggers early exit
3937  } else {
3938  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
3939  << (atom[ig].id + 1) << "!\n" << endi;
3940  }
3941  } else if ( ! done ) {
3942  if ( dieOnError ) {
3943  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
3944  << (atom[ig].id + 1) << "!\n" << endi;
3945  return -1; // triggers early exit
3946  } else {
3947  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
3948  << (atom[ig].id + 1) << "!\n" << endi;
3949  }
3950  }
3951  }
3952  // Finally, we have to go through atoms that are not involved in rattle just so that we have
3953  // their positions and velocities up-to-date in posNew and velNew
3954  for (int j=0;j < noconstList.size();++j) {
3955  int ig = noconstList[j];
3956  int hgs = atom[ig].hydrogenGroupSize;
3957  for (int i = 0; i < hgs; ++i ) {
3958  velNew[ig+i] = atom[ig+i].velocity;
3959  posNew[ig+i] = atom[ig+i].position;
3960  }
3961  }
3962 
3963  if ( invdt == 0 ) {
3964  for (int ig = 0; ig < numAtoms; ++ig )
3965  atom[ig].position = posNew[ig];
3966  } else if ( virial == 0 ) {
3967  for (int ig = 0; ig < numAtoms; ++ig )
3968  atom[ig].velocity = velNew[ig];
3969  } else {
3970  Tensor wc; // constraint virial
3971  addRattleForce(invdt, wc);
3972  *virial += wc;
3973  }
3974 
3975  return 0;
3976 }
3977 
3978 // RATTLE algorithm from Allen & Tildesley
3979 int HomePatch::rattle1old(const BigReal timestep, Tensor *virial,
3980  SubmitReduction *ppreduction)
3981 {
3982  // CkPrintf("Call HomePatch::rattle1old\n");
3983  Molecule *mol = Node::Object()->molecule;
3985  const int fixedAtomsOn = simParams->fixedAtomsOn;
3986  const int useSettle = simParams->useSettle;
3987  const BigReal dt = timestep / TIMEFACTOR;
3988  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
3989  BigReal tol2 = 2.0 * simParams->rigidTol;
3990  int maxiter = simParams->rigidIter;
3991  int dieOnError = simParams->rigidDie;
3992  int i, iter;
3993  BigReal dsq[10], tmp;
3994  int ial[10], ibl[10];
3995  Vector ref[10]; // reference position
3996  Vector refab[10]; // reference vector
3997  Vector pos[10]; // new position
3998  Vector vel[10]; // new velocity
3999  Vector netdp[10]; // total momentum change from constraint
4000  BigReal rmass[10]; // 1 / mass
4001  int fixed[10]; // is atom fixed?
4002  Tensor wc; // constraint virial
4003  BigReal idz, zmin;
4004  int nslabs;
4005 
4006  // Size of a hydrogen group for water
4007  const WaterModel watmodel = simParams->watmodel;
4008  const int wathgsize = getWaterModelGroupSize(watmodel);
4009 
4010  // Initialize the settle algorithm with water parameters
4011  // settle1() assumes all waters are identical,
4012  // and will generate bad results if they are not.
4013  // XXX this will move to Molecule::build_atom_status when that
4014  // version is debugged
4015  if ( ! settle_initialized ) {
4016  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4017  // find a water
4018  if (atom[ig].rigidBondLength > 0) {
4019  int oatm;
4020  if (watmodel == WaterModel::SWM4) {
4021  oatm = ig+3; // skip over Drude and Lonepair
4022  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
4023  // ig, atom[ig].mass, oatm, atom[oatm].mass);
4024  }
4025  else {
4026  oatm = ig+1;
4027  // Avoid using the Om site to set this by mistake
4028  if (atom[ig].mass < 0.5 || atom[ig+1].mass < 0.5) {
4029  oatm += 1;
4030  }
4031  }
4032 
4033  // initialize settle water parameters
4034  settle1init(atom[ig].mass, atom[oatm].mass,
4035  atom[ig].rigidBondLength,
4036  atom[oatm].rigidBondLength,
4037  settle_mO, settle_mH,
4038  settle_mOrmT, settle_mHrmT, settle_ra,
4039  settle_rb, settle_rc, settle_rra);
4040  settle_initialized = 1;
4041  break; // done with init
4042  }
4043  }
4044  }
4045 
4046  if (ppreduction) {
4047  nslabs = simParams->pressureProfileSlabs;
4048  idz = nslabs/lattice.c().z;
4049  zmin = lattice.origin().z - 0.5*lattice.c().z;
4050  }
4051 
4052  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4053  int hgs = atom[ig].hydrogenGroupSize;
4054  if ( hgs == 1 ) continue; // only one atom in group
4055  // cache data in local arrays and integrate positions normally
4056  int anyfixed = 0;
4057  for ( i = 0; i < hgs; ++i ) {
4058  ref[i] = atom[ig+i].position;
4059  pos[i] = atom[ig+i].position;
4060  vel[i] = atom[ig+i].velocity;
4061  rmass[i] = (atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.);
4062  //printf("rmass of %i is %f\n", ig+i, rmass[i]);
4063  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4064  //printf("fixed status of %i is %i\n", i, fixed[i]);
4065  // undo addVelocityToPosition to get proper reference coordinates
4066  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; } else pos[i] += vel[i] * dt;
4067  }
4068  int icnt = 0;
4069  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4070  if (hgs != wathgsize) {
4071  char errmsg[256];
4072  sprintf(errmsg, "Water molecule starting with atom %d contains %d atoms "
4073  "but the specified water model requires %d atoms.\n",
4074  atom[ig].id+1, hgs, wathgsize);
4075  NAMD_die(errmsg);
4076  }
4077  // Use SETTLE for water unless some of the water atoms are fixed,
4078  if (useSettle && !anyfixed) {
4079  if (watmodel == WaterModel::SWM4) {
4080  // SWM4 ordering: O D LP H1 H2
4081  // do swap(O,LP) and call settle with subarray O H1 H2
4082  // swap back after we return
4083  Vector lp_ref = ref[2];
4084  Vector lp_pos = pos[2];
4085  Vector lp_vel = vel[2];
4086  ref[2] = ref[0];
4087  pos[2] = pos[0];
4088  vel[2] = vel[0];
4089  settle1(ref+2, pos+2, vel+2, invdt,
4090  settle_mOrmT, settle_mHrmT, settle_ra,
4091  settle_rb, settle_rc, settle_rra);
4092  ref[0] = ref[2];
4093  pos[0] = pos[2];
4094  vel[0] = vel[2];
4095  ref[2] = lp_ref;
4096  pos[2] = lp_pos;
4097  vel[2] = lp_vel;
4098  // determine for LP updated pos and vel
4099  swm4_omrepos(ref, pos, vel, invdt);
4100  }
4101  else {
4102  settle1(ref, pos, vel, invdt,
4103  settle_mOrmT, settle_mHrmT, settle_ra,
4104  settle_rb, settle_rc, settle_rra);
4105  if (watmodel == WaterModel::TIP4) {
4106  tip4_omrepos(ref, pos, vel, invdt);
4107  }
4108  }
4109 
4110  // which slab the hydrogen group will belong to
4111  // for pprofile calculations.
4112  int ppoffset, partition;
4113  if ( invdt == 0 ) for ( i = 0; i < wathgsize; ++i ) {
4114  atom[ig+i].position = pos[i];
4115  } else if ( virial == 0 ) for ( i = 0; i < wathgsize; ++i ) {
4116  atom[ig+i].velocity = vel[i];
4117  } else for ( i = 0; i < wathgsize; ++i ) {
4118  Force df = (vel[i] - atom[ig+i].velocity) * ( atom[ig+i].mass * invdt );
4119  Tensor vir = outer(df, ref[i]);
4120  wc += vir;
4121  f[Results::normal][ig+i] += df;
4122  atom[ig+i].velocity = vel[i];
4123  if (ppreduction) {
4124  // put all the atoms from a water in the same slab. Atom 0
4125  // should be the parent atom.
4126  if (!i) {
4127  BigReal z = pos[i].z;
4128  partition = atom[ig].partition;
4129  int slab = (int)floor((z-zmin)*idz);
4130  if (slab < 0) slab += nslabs;
4131  else if (slab >= nslabs) slab -= nslabs;
4132  ppoffset = 3*(slab + nslabs*partition);
4133  }
4134  ppreduction->item(ppoffset ) += vir.xx;
4135  ppreduction->item(ppoffset+1) += vir.yy;
4136  ppreduction->item(ppoffset+2) += vir.zz;
4137  }
4138  }
4139  continue;
4140  }
4141  if ( !(fixed[1] && fixed[2]) ) {
4142  dsq[icnt] = tmp * tmp; ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4143  }
4144  }
4145  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4146  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4147  if ( !(fixed[0] && fixed[i]) ) {
4148  dsq[icnt] = tmp * tmp; ial[icnt] = 0; ibl[icnt] = i; ++icnt;
4149  }
4150  }
4151  }
4152  if ( icnt == 0 ) continue; // no constraints
4153  for ( i = 0; i < icnt; ++i ) {
4154  refab[i] = ref[ial[i]] - ref[ibl[i]];
4155  }
4156  for ( i = 0; i < hgs; ++i ) {
4157  netdp[i] = 0.;
4158  }
4159  int done;
4160  int consFailure;
4161  for ( iter = 0; iter < maxiter; ++iter ) {
4162 //if (iter > 0) CkPrintf("iteration %d\n", iter);
4163  done = 1;
4164  consFailure = 0;
4165  for ( i = 0; i < icnt; ++i ) {
4166  int a = ial[i]; int b = ibl[i];
4167  Vector pab = pos[a] - pos[b];
4168  BigReal pabsq = pab.x*pab.x + pab.y*pab.y + pab.z*pab.z;
4169  BigReal rabsq = dsq[i];
4170  BigReal diffsq = rabsq - pabsq;
4171  if ( fabs(diffsq) > (rabsq * tol2) ) {
4172  Vector &rab = refab[i];
4173  BigReal rpab = rab.x*pab.x + rab.y*pab.y + rab.z*pab.z;
4174  if ( rpab < ( rabsq * 1.0e-6 ) ) {
4175  done = 0;
4176  consFailure = 1;
4177  continue;
4178  }
4179  BigReal rma = rmass[a];
4180  BigReal rmb = rmass[b];
4181  BigReal gab = diffsq / ( 2.0 * ( rma + rmb ) * rpab );
4182  Vector dp = rab * gab;
4183  pos[a] += rma * dp;
4184  pos[b] -= rmb * dp;
4185  if ( invdt != 0. ) {
4186  dp *= invdt;
4187  netdp[a] += dp;
4188  netdp[b] -= dp;
4189  }
4190  done = 0;
4191  }
4192  }
4193  if ( done ) break;
4194  }
4195 
4196  if ( consFailure ) {
4197  if ( dieOnError ) {
4198  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
4199  << (atom[ig].id + 1) << "!\n" << endi;
4200  return -1; // triggers early exit
4201  } else {
4202  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
4203  << (atom[ig].id + 1) << "!\n" << endi;
4204  }
4205  } else if ( ! done ) {
4206  if ( dieOnError ) {
4207  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
4208  << (atom[ig].id + 1) << "!\n" << endi;
4209  return -1; // triggers early exit
4210  } else {
4211  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
4212  << (atom[ig].id + 1) << "!\n" << endi;
4213  }
4214  }
4215 
4216  // store data back to patch
4217  int ppoffset, partition;
4218  if ( invdt == 0 ) for ( i = 0; i < hgs; ++i ) {
4219  atom[ig+i].position = pos[i];
4220  } else if ( virial == 0 ) for ( i = 0; i < hgs; ++i ) {
4221  atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
4222  } else for ( i = 0; i < hgs; ++i ) {
4223  Force df = netdp[i] * invdt;
4224  Tensor vir = outer(df, ref[i]);
4225  wc += vir;
4226  f[Results::normal][ig+i] += df;
4227  atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
4228  if (ppreduction) {
4229  if (!i) {
4230  BigReal z = pos[i].z;
4231  int partition = atom[ig].partition;
4232  int slab = (int)floor((z-zmin)*idz);
4233  if (slab < 0) slab += nslabs;
4234  else if (slab >= nslabs) slab -= nslabs;
4235  ppoffset = 3*(slab + nslabs*partition);
4236  }
4237  ppreduction->item(ppoffset ) += vir.xx;
4238  ppreduction->item(ppoffset+1) += vir.yy;
4239  ppreduction->item(ppoffset+2) += vir.zz;
4240  }
4241  }
4242  }
4243  if ( dt && virial ) *virial += wc;
4244 
4245  return 0;
4246 }
4247 
4248 // RATTLE algorithm from Allen & Tildesley
4249 void HomePatch::rattle2(const BigReal timestep, Tensor *virial)
4250 {
4251  Molecule *mol = Node::Object()->molecule;
4253  const int fixedAtomsOn = simParams->fixedAtomsOn;
4254  const int useSettle = simParams->useSettle;
4255  const BigReal dt = timestep / TIMEFACTOR;
4256  Tensor wc; // constraint virial
4257  BigReal tol = simParams->rigidTol;
4258  int maxiter = simParams->rigidIter;
4259  int dieOnError = simParams->rigidDie;
4260  int i, iter;
4261  BigReal dsqi[10], tmp;
4262  int ial[10], ibl[10];
4263  Vector ref[10]; // reference position
4264  Vector refab[10]; // reference vector
4265  Vector vel[10]; // new velocity
4266  BigReal rmass[10]; // 1 / mass
4267  BigReal redmass[10]; // reduced mass
4268  int fixed[10]; // is atom fixed?
4269 
4270  // Size of a hydrogen group for water
4271  const WaterModel watmodel = simParams->watmodel;
4272  const int wathgsize = getWaterModelGroupSize(watmodel);
4273 
4274  // CkPrintf("In rattle2!\n");
4275  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4276  // CkPrintf("ig=%d\n",ig);
4277  int hgs = atom[ig].hydrogenGroupSize;
4278  if ( hgs == 1 ) continue; // only one atom in group
4279  // cache data in local arrays and integrate positions normally
4280  int anyfixed = 0;
4281  for ( i = 0; i < hgs; ++i ) {
4282  ref[i] = atom[ig+i].position;
4283  vel[i] = atom[ig+i].velocity;
4284  rmass[i] = atom[ig+i].mass > 0. ? 1. / atom[ig+i].mass : 0.;
4285  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4286  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; }
4287  }
4288  int icnt = 0;
4289  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4290  if (hgs != wathgsize) {
4291  NAMD_bug("Hydrogen group error caught in rattle2().");
4292  }
4293  // Use SETTLE for water unless some of the water atoms are fixed,
4294  if (useSettle && !anyfixed) {
4295  if (watmodel == WaterModel::SWM4) {
4296  // SWM4 ordering: O D LP H1 H2
4297  // do swap(O,LP) and call settle with subarray O H1 H2
4298  // swap back after we return
4299  Vector lp_ref = ref[2];
4300  // Vector lp_vel = vel[2];
4301  ref[2] = ref[0];
4302  vel[2] = vel[0];
4303  settle2(atom[ig].mass, atom[ig+3].mass, ref+2, vel+2, dt, virial);
4304  ref[0] = ref[2];
4305  vel[0] = vel[2];
4306  ref[2] = lp_ref;
4307  // vel[2] = vel[0]; // set LP vel to O vel
4308  }
4309  else {
4310  settle2(atom[ig].mass, atom[ig+1].mass, ref, vel, dt, virial);
4311  if (watmodel == WaterModel::TIP4) {
4312  vel[3] = vel[0];
4313  }
4314  }
4315  for (i=0; i<hgs; i++) {
4316  atom[ig+i].velocity = vel[i];
4317  }
4318  continue;
4319  }
4320  if ( !(fixed[1] && fixed[2]) ) {
4321  redmass[icnt] = 1. / (rmass[1] + rmass[2]);
4322  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4323  }
4324  }
4325  // CkPrintf("Loop 2\n");
4326  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4327  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4328  if ( !(fixed[0] && fixed[i]) ) {
4329  redmass[icnt] = 1. / (rmass[0] + rmass[i]);
4330  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 0;
4331  ibl[icnt] = i; ++icnt;
4332  }
4333  }
4334  }
4335  if ( icnt == 0 ) continue; // no constraints
4336  // CkPrintf("Loop 3\n");
4337  for ( i = 0; i < icnt; ++i ) {
4338  refab[i] = ref[ial[i]] - ref[ibl[i]];
4339  }
4340  // CkPrintf("Loop 4\n");
4341  int done;
4342  for ( iter = 0; iter < maxiter; ++iter ) {
4343  done = 1;
4344  for ( i = 0; i < icnt; ++i ) {
4345  int a = ial[i]; int b = ibl[i];
4346  Vector vab = vel[a] - vel[b];
4347  Vector &rab = refab[i];
4348  BigReal rabsqi = dsqi[i];
4349  BigReal rvab = rab.x*vab.x + rab.y*vab.y + rab.z*vab.z;
4350  if ( (fabs(rvab) * dt * rabsqi) > tol ) {
4351  Vector dp = rab * (-rvab * redmass[i] * rabsqi);
4352  wc += outer(dp,rab);
4353  vel[a] += rmass[a] * dp;
4354  vel[b] -= rmass[b] * dp;
4355  done = 0;
4356  }
4357  }
4358  if ( done ) break;
4359  //if (done) { if (iter > 0) CkPrintf("iter=%d\n", iter); break; }
4360  }
4361  if ( ! done ) {
4362  if ( dieOnError ) {
4363  NAMD_die("Exceeded maximum number of iterations in rattle2().");
4364  } else {
4365  iout << iWARN <<
4366  "Exceeded maximum number of iterations in rattle2().\n" << endi;
4367  }
4368  }
4369  // store data back to patch
4370  for ( i = 0; i < hgs; ++i ) {
4371  atom[ig+i].velocity = vel[i];
4372  }
4373  }
4374  // CkPrintf("Leaving rattle2!\n");
4375  // check that there isn't a constant needed here!
4376  *virial += wc / ( 0.5 * dt );
4377 
4378 }
4379 
4380 
4381 // Adjust gradients for minimizer
4382 void HomePatch::minimize_rattle2(const BigReal timestep, Tensor *virial, bool forces)
4383 {
4384  Molecule *mol = Node::Object()->molecule;
4386  Force *f1 = f[Results::normal].begin();
4387  const int fixedAtomsOn = simParams->fixedAtomsOn;
4388  const int useSettle = simParams->useSettle;
4389  const BigReal dt = timestep / TIMEFACTOR;
4390  Tensor wc; // constraint virial
4391  BigReal tol = simParams->rigidTol;
4392  int maxiter = simParams->rigidIter;
4393  int dieOnError = simParams->rigidDie;
4394  int i, iter;
4395  BigReal dsqi[10], tmp;
4396  int ial[10], ibl[10];
4397  Vector ref[10]; // reference position
4398  Vector refab[10]; // reference vector
4399  Vector vel[10]; // new velocity
4400  BigReal rmass[10]; // 1 / mass
4401  BigReal redmass[10]; // reduced mass
4402  int fixed[10]; // is atom fixed?
4403 
4404  // Size of a hydrogen group for water
4405  const WaterModel watmodel = simParams->watmodel;
4406  const int wathgsize = getWaterModelGroupSize(watmodel);
4407 
4408  // CkPrintf("In rattle2!\n");
4409  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
4410  // CkPrintf("ig=%d\n",ig);
4411  int hgs = atom[ig].hydrogenGroupSize;
4412  if ( hgs == 1 ) continue; // only one atom in group
4413  // cache data in local arrays and integrate positions normally
4414  int anyfixed = 0;
4415  for ( i = 0; i < hgs; ++i ) {
4416  ref[i] = atom[ig+i].position;
4417  vel[i] = ( forces ? f1[ig+i] : atom[ig+i].velocity );
4418  rmass[i] = 1.0;
4419  fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
4420  if ( fixed[i] ) { anyfixed = 1; rmass[i] = 0.; }
4421  }
4422  int icnt = 0;
4423  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
4424  if (hgs != wathgsize) {
4425  NAMD_bug("Hydrogen group error caught in rattle2().");
4426  }
4427  // Use SETTLE for water unless some of the water atoms are fixed,
4428  if (useSettle && !anyfixed) {
4429  if (watmodel == WaterModel::SWM4) {
4430  // SWM4 ordering: O D LP H1 H2
4431  // do swap(O,LP) and call settle with subarray O H1 H2
4432  // swap back after we return
4433  Vector lp_ref = ref[2];
4434  // Vector lp_vel = vel[2];
4435  ref[2] = ref[0];
4436  vel[2] = vel[0];
4437  settle2(1.0, 1.0, ref+2, vel+2, dt, virial);
4438  ref[0] = ref[2];
4439  vel[0] = vel[2];
4440  ref[2] = lp_ref;
4441  // vel[2] = vel[0]; // set LP vel to O vel
4442  }
4443  else {
4444  settle2(1.0, 1.0, ref, vel, dt, virial);
4445  if (watmodel == WaterModel::TIP4) {
4446  vel[3] = vel[0];
4447  }
4448  }
4449  for (i=0; i<hgs; i++) {
4450  ( forces ? f1[ig+i] : atom[ig+i].velocity ) = vel[i];
4451  }
4452  continue;
4453  }
4454  if ( !(fixed[1] && fixed[2]) ) {
4455  redmass[icnt] = 1. / (rmass[1] + rmass[2]);
4456  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
4457  }
4458  }
4459  // CkPrintf("Loop 2\n");
4460  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4461  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
4462  if ( !(fixed[0] && fixed[i]) ) {
4463  redmass[icnt] = 1. / (rmass[0] + rmass[i]);
4464  dsqi[icnt] = 1. / (tmp * tmp); ial[icnt] = 0;
4465  ibl[icnt] = i; ++icnt;
4466  }
4467  }
4468  }
4469  if ( icnt == 0 ) continue; // no constraints
4470  // CkPrintf("Loop 3\n");
4471  for ( i = 0; i < icnt; ++i ) {
4472  refab[i] = ref[ial[i]] - ref[ibl[i]];
4473  }
4474  // CkPrintf("Loop 4\n");
4475  int done;
4476  for ( iter = 0; iter < maxiter; ++iter ) {
4477  done = 1;
4478  for ( i = 0; i < icnt; ++i ) {
4479  int a = ial[i]; int b = ibl[i];
4480  Vector vab = vel[a] - vel[b];
4481  Vector &rab = refab[i];
4482  BigReal rabsqi = dsqi[i];
4483  BigReal rvab = rab.x*vab.x + rab.y*vab.y + rab.z*vab.z;
4484  if ( (fabs(rvab) * dt * rabsqi) > tol ) {
4485  Vector dp = rab * (-rvab * redmass[i] * rabsqi);
4486  wc += outer(dp,rab);
4487  vel[a] += rmass[a] * dp;
4488  vel[b] -= rmass[b] * dp;
4489  done = 0;
4490  }
4491  }
4492  if ( done ) break;
4493  //if (done) { if (iter > 0) CkPrintf("iter=%d\n", iter); break; }
4494  }
4495  if ( ! done ) {
4496  if ( dieOnError ) {
4497  NAMD_die("Exceeded maximum number of iterations in rattle2().");
4498  } else {
4499  iout << iWARN <<
4500  "Exceeded maximum number of iterations in rattle2().\n" << endi;
4501  }
4502  }
4503  // store data back to patch
4504  for ( i = 0; i < hgs; ++i ) {
4505  ( forces ? f1[ig+i] : atom[ig+i].velocity ) = vel[i];
4506  }
4507  }
4508  // CkPrintf("Leaving rattle2!\n");
4509  // check that there isn't a constant needed here!
4510  *virial += wc / ( 0.5 * dt );
4511 
4512 }
4513 
4514 
4516 //
4517 // begin SOA rattle
4518 //
4519 
4521 #ifdef DEBUG_MINIMIZE
4522  if (patchID == 0) {
4523  printf("Step %d, patch %d: buildRattleList_SOA()\n",
4524  flags.step, (int)patchID);
4525  }
4526 #endif
4527 
4528  // Called when rattleListValid_SOA is false.
4529  // List will stay valid until atom migration or some other event,
4530  // such as exchanging replicas, SCRIPT_REVERT from Tcl, reinit atoms.
4531 
4532  const double * __restrict pos_x = patchDataSOA.pos_x;
4533  const double * __restrict pos_y = patchDataSOA.pos_y;
4534  const double * __restrict pos_z = patchDataSOA.pos_z;
4535  const float * __restrict mass = patchDataSOA.mass;
4536  const double * __restrict recipMass = patchDataSOA.recipMass;
4537  const float * __restrict rigidBondLength = patchDataSOA.rigidBondLength;
4538  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
4539 
4541  const int fixedAtomsOn = simParams->fixedAtomsOn;
4542  const int useSettle = simParams->useSettle;
4543 
4544  // Size of a hydrogen group for water
4545  const WaterModel watmodel = simParams->watmodel;
4546  const int wathgsize = getWaterModelGroupSize(watmodel);
4547 
4548  // Initialize the settle algorithm with water parameters
4549  // settle1() assumes all waters are identical,
4550  // and will generate bad results if they are not.
4551  // XXX this will move to Molecule::build_atom_status when that
4552  // version is debugged
4553  if ( ! settle_initialized ) {
4554  for (int ig = numSoluteAtoms;
4555  ig < numAtoms;
4556  ig += hydrogenGroupSize[ig]) {
4557  // find a water
4558  if (rigidBondLength[ig] > 0) {
4559  int oatm;
4560  if (watmodel == WaterModel::SWM4) {
4561  oatm = ig+3; // skip over Drude and Lonepair
4562  //printf("ig=%d mass_ig=%g oatm=%d mass_oatm=%g\n",
4563  // ig, atom[ig].mass, oatm, atom[oatm].mass);
4564  }
4565  else {
4566  oatm = ig+1;
4567  // Avoid using the Om site to set this by mistake
4568  if (mass[ig] < 0.5 || mass[ig+1] < 0.5) {
4569  oatm += 1;
4570  }
4571  }
4572 
4573  // initialize settle water parameters
4574  settle1init(mass[ig], mass[oatm],
4575  rigidBondLength[ig],
4576  rigidBondLength[oatm],
4577  settle_mO, settle_mH,
4578  settle_mOrmT, settle_mHrmT, settle_ra,
4579  settle_rb, settle_rc, settle_rra);
4580  settle_initialized = 1;
4581  break; // done with init
4582  }
4583  }
4584  }
4585 
4586  BigReal dsq[10];
4587  int ial[10];
4588  int ibl[10];
4589 
4590  rattleList.clear();
4591  noconstList.clear();
4592  rattleParam.clear();
4593 
4594  for (int ig = 0; ig < numSoluteAtoms; ig += hydrogenGroupSize[ig]) {
4595  int hgs = hydrogenGroupSize[ig];
4596  if (hgs == 0) {
4597  NAMD_bug("Hydrogen group size 0. Exiting to avoid infinite loop\n.");
4598  } else if ( hgs == 1 ) {
4599  // only one atom in group
4600  noconstList.push_back(ig);
4601  continue;
4602  }
4603  int icnt = 0;
4604  // XXX convert rigid bond length to double to square it?
4605  BigReal tmp = rigidBondLength[ig];
4606  if (tmp > 0.0) { // for water
4607  dsq[icnt] = tmp * tmp;
4608  ial[icnt] = 1;
4609  ibl[icnt] = 2;
4610  ++icnt;
4611  }
4612  for (int i = 1; i < hgs; ++i ) { // normal bonds to mother atom
4613  if ( ( tmp = rigidBondLength[ig+i] ) > 0 ) {
4614  dsq[icnt] = tmp * tmp;
4615  ial[icnt] = 0;
4616  ibl[icnt] = i;
4617  ++icnt;
4618  }
4619  }
4620  if ( icnt == 0 ) {
4621  // no constraints
4622  noconstList.push_back(ig);
4623  continue;
4624  }
4625  // Store to Rattle -list
4626  RattleList rattleListElem;
4627  rattleListElem.ig = ig;
4628  rattleListElem.icnt = icnt;
4629  rattleList.push_back(rattleListElem);
4630  for (int i = 0; i < icnt; ++i ) {
4631  int a = ial[i];
4632  int b = ibl[i];
4633  RattleParam rattleParamElem;
4634  rattleParamElem.ia = a;
4635  rattleParamElem.ib = b;
4636  rattleParamElem.dsq = dsq[i];
4637  rattleParamElem.rma = recipMass[ig+a];
4638  rattleParamElem.rmb = recipMass[ig+b];
4639  rattleParam.push_back(rattleParamElem);
4640  }
4641  //adding dummy atom in the hydrogen group
4642  for (int i = icnt; i < 4; ++i )
4643  {
4644  RattleParam rattleParamElem;
4645  rattleParamElem.ia = 0;
4646  rattleParamElem.ib = 0;
4647  rattleParamElem.dsq = 0;
4648  rattleParamElem.rma = 0;
4649  rattleParamElem.rmb = 0;
4650  rattleParam.push_back(rattleParamElem);
4651  }
4652 
4653  }
4654 }
4655 
4656 // dt scaled by 1/TIMEFACTOR
4657 // Removed code handling fixed atoms.
4658 // XXX ppreduction == NULL
4659 int HomePatch::rattle1_SOA(const BigReal dt, Tensor *virial,
4660  SubmitReduction *ppreduction) {
4661 
4663 
4664 #if 1
4665  if (!rattleListValid_SOA) {
4667  rattleListValid_SOA = true;
4668  }
4669 #endif
4670 
4671  double * __restrict pos_x = patchDataSOA.pos_x;
4672  double * __restrict pos_y = patchDataSOA.pos_y;
4673  double * __restrict pos_z = patchDataSOA.pos_z;
4674  double * __restrict vel_x = patchDataSOA.vel_x;
4675  double * __restrict vel_y = patchDataSOA.vel_y;
4676  double * __restrict vel_z = patchDataSOA.vel_z;
4677  double * __restrict posNew_x = patchDataSOA.posNew_x;
4678  double * __restrict posNew_y = patchDataSOA.posNew_y;
4679  double * __restrict posNew_z = patchDataSOA.posNew_z;
4680  double * __restrict velNew_x = patchDataSOA.velNew_x;
4681  double * __restrict velNew_y = patchDataSOA.velNew_y;
4682  double * __restrict velNew_z = patchDataSOA.velNew_z;
4683  const int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
4684 #ifdef __INTEL_COMPILER
4685  __assume_aligned(pos_x,64);
4686  __assume_aligned(pos_y,64);
4687  __assume_aligned(pos_z,64);
4688  __assume_aligned(vel_x,64);
4689  __assume_aligned(vel_y,64);
4690  __assume_aligned(vel_z,64);
4691  __assume_aligned(posNew_x,64);
4692  __assume_aligned(posNew_y,64);
4693  __assume_aligned(posNew_z,64);
4694  __assume_aligned(velNew_x,64);
4695  __assume_aligned(velNew_y,64);
4696  __assume_aligned(velNew_z,64);
4697  __assume_aligned(hydrogenGroupSize,64);
4698 #endif
4699 
4700  const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
4701  const BigReal tol2 = 2.0 * simParams->rigidTol;
4702  int maxiter = simParams->rigidIter;
4703  int dieOnError = simParams->rigidDie;
4704 
4705  // calculate full step update to all positions
4706  for (int i=0; i < numAtoms; i++) {
4707  posNew_x[i] = pos_x[i] + vel_x[i] * dt;
4708  posNew_y[i] = pos_y[i] + vel_y[i] * dt;
4709  posNew_z[i] = pos_z[i] + vel_z[i] * dt;
4710  }
4711 
4712  // call settle to process all waters at once
4713  // XXX this assumes sorting waters into consecutive part of list
4714  //int numWaters = settleList.size();
4715  if (numSolventAtoms > 0) {
4716  int n = numSoluteAtoms; // index of first water in list is past solute
4717  settle1_SOA(&pos_x[n], &pos_y[n], &pos_z[n],
4718  &posNew_x[n], &posNew_y[n], &posNew_z[n],
4719  numWaters,
4720  settle_mOrmT, settle_mHrmT, settle_ra,
4721  settle_rb, settle_rc, settle_rra);
4722  }
4723  int posParam = 0;
4724  for (int j=0;j < rattleList.size();++j) {
4725  int ig = rattleList[j].ig;
4726  int icnt = rattleList[j].icnt;
4727  bool done;
4728  bool consFailure;
4729  if (icnt == 1) {
4730  rattlePair<1>(&rattleParam[posParam],
4731  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4732  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4733  consFailure
4734  );
4735  done = true;
4736  } else {
4737  if (simParams->mshakeOn) {
4738  //buildConstantMatrix();
4739  MSHAKEIterate(icnt, &rattleParam[posParam],
4740  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4741  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4742  tol2, maxiter,
4743  done, consFailure);
4744  }
4745  else if(simParams->lincsOn) {
4746  LINCS(icnt, &rattleParam[posParam],
4747  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4748  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4749  tol2, maxiter, done, consFailure);
4750  }
4751 
4752  else
4753  rattleN(icnt, &rattleParam[posParam],
4754  &pos_x[ig], &pos_y[ig], &pos_z[ig],
4755  &posNew_x[ig], &posNew_y[ig], &posNew_z[ig],
4756  tol2, maxiter,
4757  done, consFailure);
4758  }
4759 
4760  // Advance position in rattleParam
4761 // posParam += icnt;
4762  posParam += 4;
4763  if ( consFailure ) {
4764  if ( dieOnError ) {
4765  iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
4766  << (atom[ig].id + 1) << "!\n" << endi;
4767  return -1; // triggers early exit
4768  } else {
4769  iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
4770  << (atom[ig].id + 1) << "!\n" << endi;
4771  }
4772  } else if ( ! done ) {
4773  if ( dieOnError ) {
4774  iout << iERROR << "Exceeded RATTLE iteration limit for atom "
4775  << (atom[ig].id + 1) << "!\n" << endi;
4776  return -1; // triggers early exit
4777  } else {
4778  iout << iWARN << "Exceeded RATTLE iteration limit for atom "
4779  << (atom[ig].id + 1) << "!\n" << endi;
4780  }
4781  }
4782 
4783  } // end rattle
4784  // Now that all new positions are known, determine new velocities
4785  // needed to reach new position.
4786  for (int i=0; i < numAtoms; i++) {
4787  velNew_x[i] = (posNew_x[i] - pos_x[i]) * invdt;
4788  velNew_y[i] = (posNew_y[i] - pos_y[i]) * invdt;
4789  velNew_z[i] = (posNew_z[i] - pos_z[i]) * invdt;
4790  }
4791 
4792  // Bring new positions and velocities back to reference for noconstList.
4793  // No need to check hydrogen group size, since no fixed atoms.
4794  int numNoconst = noconstList.size();
4795  for (int j=0; j < numNoconst; j++) {
4796  int ig = noconstList[j];
4797  posNew_x[ig] = pos_x[ig];
4798  posNew_y[ig] = pos_y[ig];
4799  posNew_z[ig] = pos_z[ig];
4800  velNew_x[ig] = vel_x[ig];
4801  velNew_y[ig] = vel_y[ig];
4802  velNew_z[ig] = vel_z[ig];
4803  }
4804 
4805  if ( invdt == 0 ) {
4806  for (int ig = 0; ig < numAtoms; ++ig ) {
4807  pos_x[ig] = posNew_x[ig];
4808  pos_y[ig] = posNew_y[ig];
4809  pos_z[ig] = posNew_z[ig];
4810  }
4811  }
4812  else if ( virial == 0 ) {
4813  for (int ig = 0; ig < numAtoms; ++ig ) {
4814  vel_x[ig] = velNew_x[ig];
4815  vel_y[ig] = velNew_y[ig];
4816  vel_z[ig] = velNew_z[ig];
4817  }
4818  }
4819  else {
4820  const float * __restrict mass = patchDataSOA.mass;
4821  double * __restrict f_normal_x = patchDataSOA.f_normal_x;
4822  double * __restrict f_normal_y = patchDataSOA.f_normal_y;
4823  double * __restrict f_normal_z = patchDataSOA.f_normal_z;
4824 #ifdef __INTEL_COMPILER
4825  __assume_aligned(mass,64);
4826  __assume_aligned(f_normal_x,64);
4827  __assume_aligned(f_normal_y,64);
4828  __assume_aligned(f_normal_z,64);
4829 #endif
4830  Tensor vir; // = 0
4831  for (int ig = 0; ig < numAtoms; ig++) {
4832  BigReal df_x = (velNew_x[ig] - vel_x[ig]) * ( mass[ig] * invdt );
4833  BigReal df_y = (velNew_y[ig] - vel_y[ig]) * ( mass[ig] * invdt );
4834  BigReal df_z = (velNew_z[ig] - vel_z[ig]) * ( mass[ig] * invdt );
4835  f_normal_x[ig] += df_x;
4836  f_normal_y[ig] += df_y;
4837  f_normal_z[ig] += df_z;
4838  vir.xx += df_x * pos_x[ig];
4839  vir.xy += df_x * pos_y[ig];
4840  vir.xz += df_x * pos_z[ig];
4841  vir.yx += df_y * pos_x[ig];
4842  vir.yy += df_y * pos_y[ig];
4843  vir.yz += df_y * pos_z[ig];
4844  vir.zx += df_z * pos_x[ig];
4845  vir.zy += df_z * pos_y[ig];
4846  vir.zz += df_z * pos_z[ig];
4847  }
4848  *virial += vir;
4849  for (int ig = 0; ig < numAtoms; ig++) {
4850  vel_x[ig] = velNew_x[ig];
4851  vel_y[ig] = velNew_y[ig];
4852  vel_z[ig] = velNew_z[ig];
4853  }
4854  }
4855 
4856  return 0;
4857 }
4858 
4859 //
4860 // end SOA rattle
4861 //
4863 
4864 
4865 // BEGIN LA
4867 {
4868  DebugM(2, "loweAndersenVelocities\n");
4869  Molecule *mol = Node::Object()->molecule;
4871  v.resize(numAtoms);
4872  for (int i = 0; i < numAtoms; ++i) {
4873  //v[i] = p[i];
4874  // co-opt CompAtom structure to pass velocity and mass information
4875  v[i].position = atom[i].velocity;
4876  v[i].charge = atom[i].mass;
4877  }
4878  DebugM(2, "loweAndersenVelocities\n");
4879 }
4880 
4882 {
4883  DebugM(2, "loweAndersenFinish\n");
4884  v.resize(0);
4885 }
4886 // END LA
4887 
4888 //LCPO
4890  Molecule *mol = Node::Object()->molecule;
4891  const int *lcpoParamType = mol->getLcpoParamType();
4892 
4894  for (int i = 0; i < numAtoms; i++) {
4895  lcpoType[i] = lcpoParamType[pExt[i].id];
4896  }
4897 }
4898 
4899 //set intrinsic radii of atom when doMigration
4901  intRad.resize(numAtoms*2);
4902  intRad.setall(0);
4903  Molecule *mol = Node::Object()->molecule;
4905  Real offset = simParams->coulomb_radius_offset;
4906  for (int i = 0; i < numAtoms; i++) {
4907  Real rad = MassToRadius(atom[i].mass);//in ComputeGBIS.inl
4908  Real screen = MassToScreen(atom[i].mass);//same
4909  intRad[2*i+0] = rad - offset;//r0
4910  intRad[2*i+1] = screen*(rad - offset);//s0
4911  }
4912 }
4913 
4914 //compute born radius after phase 1, before phase 2
4916 
4918  BigReal alphaMax = simParams->alpha_max;
4919  BigReal delta = simParams->gbis_delta;
4920  BigReal beta = simParams->gbis_beta;
4921  BigReal gamma = simParams->gbis_gamma;
4922  BigReal coulomb_radius_offset = simParams->coulomb_radius_offset;
4923 
4924  BigReal rhoi;
4925  BigReal rhoi0;
4926  //calculate bornRad from psiSum
4927  for (int i = 0; i < numAtoms; i++) {
4928  rhoi0 = intRad[2*i];
4929  rhoi = rhoi0+coulomb_radius_offset;
4930  psiFin[i] += psiSum[i];
4931  psiFin[i] *= rhoi0;
4932  bornRad[i]=1/(1/rhoi0-1/rhoi*tanh(psiFin[i]*(delta+psiFin[i]*(-beta+gamma*psiFin[i]))));
4933  bornRad[i] = (bornRad[i] > alphaMax) ? alphaMax : bornRad[i];
4934 #ifdef PRINT_COMP
4935  CkPrintf("BORNRAD(%04i)[%04i] = % .4e\n",flags.sequence,pExt[i].id,bornRad[i]);
4936 #endif
4937  }
4938 
4939  gbisP2Ready();
4940 }
4941 
4942 //compute dHdrPrefix after phase 2, before phase 3
4944 
4946  BigReal delta = simParams->gbis_delta;
4947  BigReal beta = simParams->gbis_beta;
4948  BigReal gamma = simParams->gbis_gamma;
4949  BigReal epsilon_s = simParams->solvent_dielectric;
4950  BigReal epsilon_p = simParams->dielectric;
4951  BigReal epsilon_s_i = 1/simParams->solvent_dielectric;
4952  BigReal epsilon_p_i = 1/simParams->dielectric;
4953  BigReal coulomb_radius_offset = simParams->coulomb_radius_offset;
4954  BigReal kappa = simParams->kappa;
4955  BigReal fij, expkappa, Dij, dEdai, dedasum;
4956  BigReal rhoi, rhoi0, psii, nbetapsi;
4957  BigReal gammapsi2, tanhi, daidr;
4958  for (int i = 0; i < numAtoms; i++) {
4959  //add diagonal dEda term
4960  dHdrPrefix[i] += dEdaSum[i];//accumulated from proxies
4961  fij = bornRad[i];//inf
4962  expkappa = exp(-kappa*fij);//0
4963  Dij = epsilon_p_i - expkappa*epsilon_s_i;//dielectric term
4964  //calculate dHij prefix
4965  dEdai = -0.5*COULOMB*atom[i].charge*atom[i].charge
4966  *(kappa*epsilon_s_i*expkappa-Dij/fij)/bornRad[i];
4967  dHdrPrefix[i] += dEdai;
4968  dedasum = dHdrPrefix[i];
4969 
4970  rhoi0 = intRad[2*i];
4971  rhoi = rhoi0+coulomb_radius_offset;
4972  psii = psiFin[i];
4973  nbetapsi = -beta*psii;
4974  gammapsi2 = gamma*psii*psii;
4975  tanhi = tanh(psii*(delta+nbetapsi+gammapsi2));
4976  daidr = bornRad[i]*bornRad[i]*rhoi0/rhoi*(1-tanhi*tanhi)
4977  * (delta+nbetapsi+nbetapsi+gammapsi2+gammapsi2+gammapsi2);
4978  dHdrPrefix[i] *= daidr;//dHdrPrefix previously equaled dEda
4979 #ifdef PRINT_COMP
4980  CkPrintf("DHDR(%04i)[%04i] = % .4e\n",flags.sequence,pExt[i].id,dHdrPrefix[i]);
4981 #endif
4982  }
4983  gbisP3Ready();
4984 }
4985 
4986 //send born radius to proxies to begin phase 2
4988  if (proxy.size() > 0) {
4989  CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
4990  for (int i = 0; i < proxy.size(); i++) {
4991  int node = proxy[i];
4993  msg->patch = patchID;
4994  msg->origPe = CkMyPe();
4995  memcpy(msg->bornRad,bornRad.begin(),numAtoms*sizeof(Real));
4996  msg->destPe = node;
4997  int seq = flags.sequence;
4999  SET_PRIORITY(msg,seq,priority);
5000  cp[node].recvData(msg);
5001  }
5002  }
5004 }
5005 
5006 //send dHdrPrefix to proxies to begin phase 3
5008  if (proxy.size() > 0) {
5009  CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
5010  //only nonzero message should be sent for doFullElec
5011  int msgAtoms = (flags.doFullElectrostatics) ? numAtoms : 0;
5012  for (int i = 0; i < proxy.size(); i++) {
5013  int node = proxy[i];
5015  msg->patch = patchID;
5016  msg->dHdrPrefixLen = msgAtoms;
5017  msg->origPe = CkMyPe();
5018  memcpy(msg->dHdrPrefix, dHdrPrefix.begin(), msgAtoms*sizeof(Real));
5019  msg->destPe = node;
5020  int seq = flags.sequence;
5022  SET_PRIORITY(msg,seq,priority);
5023  cp[node].recvData(msg);
5024  }
5025  }
5027 }
5028 
5029 //receive proxy results from phase 1
5031  ++numGBISP1Arrived;
5032  for ( int i = 0; i < msg->psiSumLen; ++i ) {
5033  psiFin[i] += msg->psiSum[i];
5034  }
5035  delete msg;
5036 
5037  if (flags.doNonbonded) {
5038  //awaken if phase 1 done
5039  if (phase1BoxClosedCalled == true &&
5040  numGBISP1Arrived==proxy.size() ) {
5041  // fprintf(stderr, "Calling awaken() on patch %d: 4\n", this->patchID);
5042  sequencer->awaken();
5043  }
5044  } else {
5045  //awaken if all phases done on noWork step
5046  if (boxesOpen == 0 &&
5047  numGBISP1Arrived == proxy.size() &&
5048  numGBISP2Arrived == proxy.size() &&
5049  numGBISP3Arrived == proxy.size()) {
5050  // fprintf(stderr, "Calling awaken() on patch %d: 5\n", this->patchID);
5051  sequencer->awaken();
5052  }
5053  }
5054 }
5055 
5056 //receive proxy results from phase 2
5058  ++numGBISP2Arrived;
5059  //accumulate dEda
5060  for ( int i = 0; i < msg->dEdaSumLen; ++i ) {
5061  dHdrPrefix[i] += msg->dEdaSum[i];
5062  }
5063  delete msg;
5064 
5065  if (flags.doNonbonded) {
5066  //awaken if phase 2 done
5067  if (phase2BoxClosedCalled == true &&
5068  numGBISP2Arrived==proxy.size() ) {
5069  // fprintf(stderr, "Calling awaken() on patch %d: 6\n", this->patchID);
5070  sequencer->awaken();
5071  }
5072  } else {
5073  //awaken if all phases done on noWork step
5074  if (boxesOpen == 0 &&
5075  numGBISP1Arrived == proxy.size() &&
5076  numGBISP2Arrived == proxy.size() &&
5077  numGBISP3Arrived == proxy.size()) {
5078  // fprintf(stderr, "Calling awaken() on patch %d: 7\n", this->patchID);
5079  sequencer->awaken();
5080  }
5081  }
5082 }
5083 
5084 // MOLLY algorithm part 1
5086 {
5087  Molecule *mol = Node::Object()->molecule;
5089  BigReal tol = simParams->mollyTol;
5090  int maxiter = simParams->mollyIter;
5091  int i, iter;
5092  HGArrayBigReal dsq;
5093  BigReal tmp;
5094  HGArrayInt ial, ibl;
5095  HGArrayVector ref; // reference position
5096  HGArrayVector refab; // reference vector
5097  HGArrayBigReal rmass; // 1 / mass
5098  BigReal *lambda; // Lagrange multipliers
5099  CompAtom *avg; // averaged position
5100  int numLambdas = 0;
5101  HGArrayInt fixed; // is atom fixed?
5102 
5103  // iout<<iINFO << "mollyAverage: "<<std::endl<<endi;
5105  for ( i=0; i<numAtoms; ++i ) p_avg[i] = p[i];
5106 
5107  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
5108  int hgs = atom[ig].hydrogenGroupSize;
5109  if ( hgs == 1 ) continue; // only one atom in group
5110  for ( i = 0; i < hgs; ++i ) {
5111  ref[i] = atom[ig+i].position;
5112  rmass[i] = 1. / atom[ig+i].mass;
5113  fixed[i] = ( simParams->fixedAtomsOn && atom[ig+i].atomFixed );
5114  if ( fixed[i] ) rmass[i] = 0.;
5115  }
5116  avg = &(p_avg[ig]);
5117  int icnt = 0;
5118 
5119  if ( ( tmp = atom[ig].rigidBondLength ) > 0 ) { // for water
5120  if ( hgs != 3 ) {
5121  NAMD_die("Hydrogen group error caught in mollyAverage(). It's a bug!\n");
5122  }
5123  if ( !(fixed[1] && fixed[2]) ) {
5124  dsq[icnt] = tmp * tmp; ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
5125  }
5126  }
5127  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
5128  if ( ( tmp = atom[ig+i].rigidBondLength ) > 0 ) {
5129  if ( !(fixed[0] && fixed[i]) ) {
5130  dsq[icnt] = tmp * tmp; ial[icnt] = 0; ibl[icnt] = i; ++icnt;
5131  }
5132  }
5133  }
5134  if ( icnt == 0 ) continue; // no constraints
5135  numLambdas += icnt;
5136  molly_lambda.resize(numLambdas);
5137  lambda = &(molly_lambda[numLambdas - icnt]);
5138  for ( i = 0; i < icnt; ++i ) {
5139  refab[i] = ref[ial[i]] - ref[ibl[i]];
5140  }
5141  // iout<<iINFO<<"hgs="<<hgs<<" m="<<icnt<<std::endl<<endi;
5142  iter=average(avg,ref,lambda,hgs,icnt,rmass,dsq,ial,ibl,refab,tol,maxiter);
5143  if ( iter == maxiter ) {
5144  iout << iWARN << "Exceeded maximum number of iterations in mollyAverage().\n"<<endi;
5145  }
5146  }
5147 
5148  // for ( i=0; i<numAtoms; ++i ) {
5149  // if ( ( p_avg[i].position - p[i].position ).length2() > 1.0 ) {
5150  // iout << iERROR << "MOLLY moved atom " << (p[i].id + 1) << " from "
5151  // << p[i].position << " to " << p_avg[i].position << "\n" << endi;
5152  // }
5153  // }
5154 
5155 }
5156 
5157 
5158 // MOLLY algorithm part 2
5160 {
5161  Molecule *mol = Node::Object()->molecule;
5163  Tensor wc; // constraint virial
5164  int i;
5165  HGArrayInt ial, ibl;
5166  HGArrayVector ref; // reference position
5167  CompAtom *avg; // averaged position
5168  HGArrayVector refab; // reference vector
5169  HGArrayVector force; // new force
5170  HGArrayBigReal rmass; // 1 / mass
5171  BigReal *lambda; // Lagrange multipliers
5172  int numLambdas = 0;
5173  HGArrayInt fixed; // is atom fixed?
5174 
5175  for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
5176  int hgs = atom[ig].hydrogenGroupSize;
5177  if (hgs == 1 ) continue; // only one atom in group
5178  for ( i = 0; i < hgs; ++i ) {
5179  ref[i] = atom[ig+i].position;
5180  force[i] = f[Results::slow][ig+i];
5181  rmass[i] = 1. / atom[ig+i].mass;
5182  fixed[i] = ( simParams->fixedAtomsOn && atom[ig+i].atomFixed );
5183  if ( fixed[i] ) rmass[i] = 0.;
5184  }
5185  int icnt = 0;
5186  // c-ji I'm only going to mollify water for now
5187  if ( atom[ig].rigidBondLength > 0 ) { // for water
5188  if ( hgs != 3 ) {
5189  NAMD_die("Hydrogen group error caught in mollyMollify(). It's a bug!\n");
5190  }
5191  if ( !(fixed[1] && fixed[2]) ) {
5192  ial[icnt] = 1; ibl[icnt] = 2; ++icnt;
5193  }
5194  }
5195  for ( i = 1; i < hgs; ++i ) { // normal bonds to mother atom
5196  if ( atom[ig+i].rigidBondLength > 0 ) {
5197  if ( !(fixed[0] && fixed[i]) ) {
5198  ial[icnt] = 0; ibl[icnt] = i; ++icnt;
5199  }
5200  }
5201  }
5202 
5203  if ( icnt == 0 ) continue; // no constraints
5204  lambda = &(molly_lambda[numLambdas]);
5205  numLambdas += icnt;
5206  for ( i = 0; i < icnt; ++i ) {
5207  refab[i] = ref[ial[i]] - ref[ibl[i]];
5208  }
5209  avg = &(p_avg[ig]);
5210  mollify(avg,ref,lambda,force,hgs,icnt,rmass,ial,ibl,refab);
5211  // store data back to patch
5212  for ( i = 0; i < hgs; ++i ) {
5213  wc += outer(force[i]-f[Results::slow][ig+i],ref[i]);
5214  f[Results::slow][ig+i] = force[i];
5215  }
5216  }
5217  // check that there isn't a constant needed here!
5218  *virial += wc;
5219  p_avg.resize(0);
5220 }
5221 
5223  checkpoint_atom.copy(atom);
5224  checkpoint_lattice = lattice;
5225 
5226  // DMK - Atom Separation (water vs. non-water)
5227  #if NAMD_SeparateWaters != 0
5228  checkpoint_numWaterAtoms = numWaterAtoms;
5229  #endif
5230 }
5231 
5232 void HomePatch::revert(void) {
5233  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5234 
5235  atom.copy(checkpoint_atom);
5236  numAtoms = atom.size();
5237  lattice = checkpoint_lattice;
5238 
5239  doAtomUpdate = true;
5240  rattleListValid = false;
5241  rattleListValid_SOA = false;
5242 
5243  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5244 
5246  if (simParams->SOAintegrateOn) {
5247  sort_solvent_atoms();
5248  copy_atoms_to_SOA();
5249 #if 0
5250  if (simParams->rigidBonds != RIGID_NONE) {
5252  rattleListValid_SOA = true;
5253  }
5254 #endif
5255  }
5256 
5257  // DMK - Atom Separation (water vs. non-water)
5258  #if NAMD_SeparateWaters != 0
5259  numWaterAtoms = checkpoint_numWaterAtoms;
5260  #endif
5261 }
5262 
5263 void HomePatch::exchangeCheckpoint(int scriptTask, int &bpc) { // initiating replica
5265  checkpoint_task = scriptTask;
5266  const int remote = simParams->scriptIntArg1;
5267  const char *key = simParams->scriptStringArg1;
5268  PatchMgr::Object()->sendCheckpointReq(patchID, remote, key, scriptTask);
5269 }
5270 
5271 void HomePatch::recvCheckpointReq(int task, const char *key, int replica, int pe) { // responding replica
5272  if ( task == SCRIPT_CHECKPOINT_FREE ) {
5273  if ( ! checkpoints.count(key) ) {
5274  NAMD_die("Unable to free checkpoint, requested key was never stored.");
5275  }
5276  delete checkpoints[key];
5277  checkpoints.erase(key);
5278  PatchMgr::Object()->sendCheckpointAck(patchID, replica, pe);
5279  return;
5280  }
5281  CheckpointAtomsMsg *msg;
5282  if ( task == SCRIPT_CHECKPOINT_LOAD || task == SCRIPT_CHECKPOINT_SWAP ) {
5283  if ( ! checkpoints.count(key) ) {
5284  NAMD_die("Unable to load checkpoint, requested key was never stored.");
5285  }
5286  checkpoint_t &cp = *checkpoints[key];
5287  msg = new (cp.numAtoms,1,0) CheckpointAtomsMsg;
5288  msg->lattice = cp.lattice;
5290  msg->numAtoms = cp.numAtoms;
5291  memcpy(msg->atoms,cp.atoms.begin(),cp.numAtoms*sizeof(FullAtom));
5292  } else {
5293  msg = new (0,1,0) CheckpointAtomsMsg;
5294  }
5295  msg->pid = patchID;
5296  msg->replica = CmiMyPartition();
5297  msg->pe = CkMyPe();
5298  PatchMgr::Object()->sendCheckpointLoad(msg, replica, pe);
5299 }
5300 
5301 void HomePatch::recvCheckpointLoad(CheckpointAtomsMsg *msg) { // initiating replica
5303  const int remote = simParams->scriptIntArg1;
5304  const char *key = simParams->scriptStringArg1;
5306  NAMD_bug("HomePatch::recvCheckpointLoad called during checkpointFree.");
5307  }
5308  if ( msg->replica != remote ) {
5309  NAMD_bug("HomePatch::recvCheckpointLoad message from wrong replica.");
5310  }
5312  CheckpointAtomsMsg *newmsg = new (numAtoms,1+strlen(key),0) CheckpointAtomsMsg;
5313  strcpy(newmsg->key,key);
5314  newmsg->lattice = lattice;
5315  newmsg->berendsenPressure_count = sequencer->berendsenPressure_count;
5316  newmsg->pid = patchID;
5317  newmsg->pe = CkMyPe();
5318  newmsg->replica = CmiMyPartition();
5319  newmsg->numAtoms = numAtoms;
5320  memcpy(newmsg->atoms,atom.begin(),numAtoms*sizeof(FullAtom));
5321  PatchMgr::Object()->sendCheckpointStore(newmsg, remote, msg->pe);
5322  }
5324  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5325  lattice = msg->lattice;
5327  numAtoms = msg->numAtoms;
5328  atom.resize(numAtoms);
5329  memcpy(atom.begin(),msg->atoms,numAtoms*sizeof(FullAtom));
5330  doAtomUpdate = true;
5331  rattleListValid = false;
5332  rattleListValid_SOA = false;
5333  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5334  if (simParams->SOAintegrateOn) {
5335  sort_solvent_atoms();
5336  copy_atoms_to_SOA();
5337 #if 0
5338  if (simParams->rigidBonds != RIGID_NONE) {
5340  rattleListValid_SOA = true;
5341  }
5342 #endif
5343  }
5344  }
5347  }
5348  delete msg;
5349 }
5350 
5351 void HomePatch::recvCheckpointStore(CheckpointAtomsMsg *msg) { // responding replica
5352  if ( ! checkpoints.count(msg->key) ) {
5353  checkpoints[msg->key] = new checkpoint_t;
5354  }
5355  checkpoint_t &cp = *checkpoints[msg->key];
5356  cp.lattice = msg->lattice;
5358  cp.numAtoms = msg->numAtoms;
5359  cp.atoms.resize(cp.numAtoms);
5360  memcpy(cp.atoms.begin(),msg->atoms,cp.numAtoms*sizeof(FullAtom));
5362  delete msg;
5363 }
5364 
5365 void HomePatch::recvCheckpointAck() { // initiating replica
5366  CkpvAccess(_qd)->process();
5367 }
5368 
5369 
5370 void HomePatch::exchangeAtoms(int scriptTask) {
5372  // CkPrintf("exchangeAtoms %d %d %d %d\n", CmiMyPartition(), scriptTask, (int)(simParams->scriptArg1), (int)(simParams->scriptArg2));
5373  if ( scriptTask == SCRIPT_ATOMSEND || scriptTask == SCRIPT_ATOMSENDRECV ) {
5374  exchange_dst = (int) simParams->scriptArg1;
5375  // create and save outgoing message
5380  memcpy(exchange_msg->atoms,atom.begin(),numAtoms*sizeof(FullAtom));
5381  if ( exchange_req >= 0 ) {
5383  }
5384  }
5385  if ( scriptTask == SCRIPT_ATOMRECV || scriptTask == SCRIPT_ATOMSENDRECV ) {
5386  exchange_src = (int) simParams->scriptArg2;
5388  }
5389 }
5390 
5392  exchange_req = req;
5393  if ( exchange_msg ) {
5394  // CkPrintf("recvExchangeReq %d %d\n", CmiMyPartition(), exchange_dst);
5396  exchange_msg = 0;
5397  exchange_req = -1;
5398  CkpvAccess(_qd)->process();
5399  }
5400 }
5401 
5403  // CkPrintf("recvExchangeMsg %d %d\n", CmiMyPartition(), exchange_src);
5404  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5405  lattice = msg->lattice;
5406  numAtoms = msg->numAtoms;
5407  atom.resize(numAtoms);
5408  memcpy(atom.begin(),msg->atoms,numAtoms*sizeof(FullAtom));
5409  delete msg;
5410  CkpvAccess(_qd)->process();
5411  doAtomUpdate = true;
5412  rattleListValid = false;
5413  rattleListValid_SOA = false;
5414  if ( ! numNeighbors ) atomMapper->registerIDsFullAtom(atom.begin(),atom.end());
5416  if (simParams->SOAintegrateOn) {
5417  sort_solvent_atoms();
5418  copy_atoms_to_SOA();
5419 #if 0
5420  if (simParams->rigidBonds != RIGID_NONE) {
5422  rattleListValid_SOA = true;
5423  }
5424 #endif
5425  }
5426 }
5427 
5428 void HomePatch::submitLoadStats(int timestep)
5429 {
5431 }
5432 
5433 
5434 //
5435 // XXX operates on CompAtom, not FullAtom
5436 //
5437 // XXX TODO: This operation could be moved to the gpu (?)
5439 {
5440 #if 0
5441 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
5442  char dpcbuf[32];
5443  sprintf(dpcbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::DO_PAIRLIST_CHECK], this->getPatchID());
5444  NAMD_EVENT_START_EX(1, NamdProfileEvent::DO_PAIRLIST_CHECK, dpcbuf);
5445 #endif
5446 #endif
5447 
5449 
5450  if ( numAtoms == 0 || ! flags.usePairlists ) {
5451  flags.pairlistTolerance = 0.;
5452  flags.maxAtomMovement = 99999.;
5453 #if 0
5454  NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5455 #endif
5456  return;
5457  }
5458 
5459  int i; int n = numAtoms;
5460  CompAtom *p_i = p.begin();
5461 
5462  if ( flags.savePairlists ) {
5463  flags.pairlistTolerance = doPairlistCheck_newTolerance;
5464  flags.maxAtomMovement = 0.;
5465  doPairlistCheck_newTolerance *= (1. - simParams->pairlistShrink);
5466  doPairlistCheck_lattice = lattice;
5467  doPairlistCheck_positions.resize(numAtoms);
5468  CompAtom *psave_i = doPairlistCheck_positions.begin();
5469  for ( i=0; i<n; ++i ) { psave_i[i] = p_i[i]; }
5470 #if 0
5471  NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5472 #endif
5473  return;
5474  }
5475 
5476  Lattice &lattice_old = doPairlistCheck_lattice;
5477  Position center_cur = lattice.unscale(center);
5478  Position center_old = lattice_old.unscale(center);
5479  Vector center_delta = center_cur - center_old;
5480 
5481  // find max deviation to corner (any neighbor shares a corner)
5482  BigReal max_cd = 0.;
5483  for ( i=0; i<2; ++i ) {
5484  for ( int j=0; j<2; ++j ) {
5485  for ( int k=0; k<2; ++k ) {
5486  ScaledPosition corner( i ? min.x : max.x , j ? min.y : max.y , k ? min.z : max.z );
5487  Vector corner_delta = lattice.unscale(corner) - lattice_old.unscale(corner);
5488  corner_delta -= center_delta;
5489  BigReal cd = corner_delta.length2();
5490  if ( cd > max_cd ) max_cd = cd;
5491  }
5492  }
5493  }
5494  max_cd = sqrt(max_cd);
5495 
5496  // find max deviation of atoms relative to center
5497  BigReal max_pd = 0.;
5498  CompAtom *p_old_i = doPairlistCheck_positions.begin();
5499  for ( i=0; i<n; ++i ) {
5500  // JM: Calculating position difference and making it patch-centered
5501  Vector p_delta = p_i[i].position - p_old_i[i].position;
5502  p_delta -= center_delta;
5503  BigReal pd = p_delta.length2();
5504  if ( pd > max_pd ) max_pd = pd;
5505  }
5506  max_pd = sqrt(max_pd);
5507 
5508  BigReal max_tol = max_pd + max_cd;
5509 
5510  flags.maxAtomMovement = max_tol;
5511 
5512  // if ( max_tol > flags.pairlistTolerance ) iout << "tolerance " << max_tol << " > " << flags.pairlistTolerance << "\n" << endi;
5513 
5514  if ( max_tol > ( (1. - simParams->pairlistTrigger) *
5515  doPairlistCheck_newTolerance ) ) {
5516  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: Increasing pairList tolerance(%lf %lf)\n",
5517  // max_tol, doPairlistCheck_newTolerance);
5518  doPairlistCheck_newTolerance *= (1. + simParams->pairlistGrow);
5519  }
5520 
5521  if ( max_tol > doPairlistCheck_newTolerance ) {
5522  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: Decreasing pairList tolerance(%lf %lf)\n",
5523  // max_tol, doPairlistCheck_newTolerance);
5524  doPairlistCheck_newTolerance = max_tol / (1. - simParams->pairlistTrigger);
5525  }
5526 
5527  //if(this->getPatchID() == 0) fprintf(stderr, "CPU: New patchTolerance: %lf\n", doPairlistCheck_newTolerance);
5528 
5529 // NAMD_EVENT_STOP(1, NamdProfileEvent::DO_PAIRLIST_CHECK);
5530 }
5531 
5532 
5534 //
5535 // begin SOA
5536 //
5538 {
5539  if ( ! flags.doNonbonded ) return;
5540 
5542  BigReal hgcut = 0.5 * simParams->hgroupCutoff; hgcut *= hgcut;
5543  BigReal maxrad2 = 0.;
5544 
5545  double * __restrict pos_x = patchDataSOA.pos_x;
5546  double * __restrict pos_y = patchDataSOA.pos_y;
5547  double * __restrict pos_z = patchDataSOA.pos_z;
5548  int * __restrict hydrogenGroupSize = patchDataSOA.hydrogenGroupSize;
5549  int * __restrict nonbondedGroupSize = patchDataSOA.nonbondedGroupSize;
5550 
5551  int j=0;
5552  while (j < numAtoms) {
5553  const int hgs = hydrogenGroupSize[j];
5554  if ( ! hgs ) break; // avoid infinite loop on bug
5555  int ngs = hgs;
5556  if ( ngs > 5 ) ngs = 5; // XXX why? limit to at most 5 atoms per group
5557  BigReal x = pos_x[j];
5558  BigReal y = pos_y[j];
5559  BigReal z = pos_z[j];
5560  int i;
5561  for ( i = 1; i < ngs; ++i ) { // limit spatial extent
5562  nonbondedGroupSize[j+i] = 0;
5563  BigReal dx = pos_x[j+i] - x;
5564  BigReal dy = pos_y[j+i] - y;
5565  BigReal dz = pos_z[j+i] - z;
5566  BigReal r2 = dx * dx + dy * dy + dz * dz;
5567  if ( r2 > hgcut ) break;
5568  else if ( r2 > maxrad2 ) maxrad2 = r2;
5569  }
5570  nonbondedGroupSize[j] = i;
5571  for ( ; i < hgs; ++i ) {
5572  nonbondedGroupSize[j+i] = 1;
5573  }
5574  j += hgs;
5575  }
5576 
5577  if (j != numAtoms) {
5578  NAMD_bug("hydrogenGroupSize is zero in HomePatch::doGroupSizeCheck");
5579  }
5580 
5581  flags.maxGroupRadius = sqrt(maxrad2);
5582 
5583 }
5584 //
5585 // end SOA
5586 //
5588 
5589 
5591 {
5592  if ( ! flags.doNonbonded ) return;
5593 
5595  BigReal hgcut = 0.5 * simParams->hgroupCutoff; hgcut *= hgcut;
5596  BigReal maxrad2 = 0.;
5597 
5598  FullAtomList::iterator p_i = atom.begin();
5599  FullAtomList::iterator p_e = atom.end();
5600 
5601  while ( p_i != p_e ) {
5602  const int hgs = p_i->hydrogenGroupSize;
5603  if ( ! hgs ) break; // avoid infinite loop on bug
5604  int ngs = hgs;
5605  if ( ngs > 5 ) ngs = 5; // XXX why? limit to at most 5 atoms per group
5606  BigReal x = p_i->position.x;
5607  BigReal y = p_i->position.y;
5608  BigReal z = p_i->position.z;
5609  int i;
5610  for ( i = 1; i < ngs; ++i ) { // limit spatial extent
5611  p_i[i].nonbondedGroupSize = 0;
5612  BigReal dx = p_i[i].position.x - x;
5613  BigReal dy = p_i[i].position.y - y;
5614  BigReal dz = p_i[i].position.z - z;
5615  BigReal r2 = dx * dx + dy * dy + dz * dz;
5616  if ( r2 > hgcut ) break;
5617  else if ( r2 > maxrad2 ) maxrad2 = r2;
5618  }
5619  p_i->nonbondedGroupSize = i;
5620  for ( ; i < hgs; ++i ) {
5621  p_i[i].nonbondedGroupSize = 1;
5622  }
5623  p_i += hgs;
5624  }
5625 
5626  if ( p_i != p_e ) {
5627  NAMD_bug("hydrogenGroupSize is zero in HomePatch::doGroupSizeCheck");
5628  }
5629 
5630  flags.maxGroupRadius = sqrt(maxrad2);
5631 
5632 }
5633 
5634 
5636 //
5637 // begin SOA
5638 //
5640 {
5642 
5643  BigReal sysdima = lattice.a_r().unit() * lattice.a();
5644  BigReal sysdimb = lattice.b_r().unit() * lattice.b();
5645  BigReal sysdimc = lattice.c_r().unit() * lattice.c();
5646 
5647  BigReal minSize = simParams->patchDimension - simParams->margin;
5648 
5649  if ( ( aAwayDist*sysdima < minSize*0.9999 ) ||
5650  ( bAwayDist*sysdimb < minSize*0.9999 ) ||
5651  ( cAwayDist*sysdimc < minSize*0.9999 ) ) {
5652 
5653  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5654  "Possible solutions are to restart from a recent checkpoint,\n"
5655  "increase margin, or disable useFlexibleCell for liquid simulation.");
5656  }
5657 
5658  BigReal cutoff = simParams->cutoff;
5659 
5660  BigReal margina = 0.5 * ( aAwayDist - cutoff / sysdima );
5661  BigReal marginb = 0.5 * ( bAwayDist - cutoff / sysdimb );
5662  BigReal marginc = 0.5 * ( cAwayDist - cutoff / sysdimc );
5663 
5664  if ( (margina < -0.0001) || (marginb < -0.0001) || (marginc < -0.0001) ) {
5665  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5666  "There are probably many margin violations already reported.\n"
5667  "Possible solutions are to restart from a recent checkpoint,\n"
5668  "increase margin, or disable useFlexibleCell for liquid simulation.");
5669  }
5670 
5671  BigReal minx = min.x - margina;
5672  BigReal miny = min.y - marginb;
5673  BigReal minz = min.z - marginc;
5674  BigReal maxx = max.x + margina;
5675  BigReal maxy = max.y + marginb;
5676  BigReal maxz = max.z + marginc;
5677 
5678  int xdev, ydev, zdev;
5679  int problemCount = 0;
5680 
5681  double * __restrict pos_x = patchDataSOA.pos_x;
5682  double * __restrict pos_y = patchDataSOA.pos_y;
5683  double * __restrict pos_z = patchDataSOA.pos_z;
5684  for (int i=0; i < numAtoms; i++) {
5685  Vector pos(pos_x[i],pos_y[i],pos_z[i]);
5686 
5687  ScaledPosition s = lattice.scale(pos);
5688 
5689  // check if atom is within bounds
5690  if (s.x < minx) xdev = 0;
5691  else if (maxx <= s.x) xdev = 2;
5692  else xdev = 1;
5693 
5694  if (s.y < miny) ydev = 0;
5695  else if (maxy <= s.y) ydev = 2;
5696  else ydev = 1;
5697 
5698  if (s.z < minz) zdev = 0;
5699  else if (maxz <= s.z) zdev = 2;
5700  else zdev = 1;
5701 
5702  if (mInfo[xdev][ydev][zdev]) { // somewhere else to be
5703  ++problemCount;
5704  }
5705 
5706  }
5707 
5708  marginViolations = problemCount;
5709  // if ( problemCount ) {
5710  // iout << iERROR <<
5711  // "Found " << problemCount << " margin violations!\n" << endi;
5712  // }
5713 
5714 }
5715 //
5716 // end SOA
5717 //
5719 
5720 
5722 {
5724 
5725  BigReal sysdima = lattice.a_r().unit() * lattice.a();
5726  BigReal sysdimb = lattice.b_r().unit() * lattice.b();
5727  BigReal sysdimc = lattice.c_r().unit() * lattice.c();
5728 
5729  BigReal minSize = simParams->patchDimension - simParams->margin;
5730 
5731  if ( ( aAwayDist*sysdima < minSize*0.9999 ) ||
5732  ( bAwayDist*sysdimb < minSize*0.9999 ) ||
5733  ( cAwayDist*sysdimc < minSize*0.9999 ) ) {
5734 
5735  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5736  "Possible solutions are to restart from a recent checkpoint,\n"
5737  "increase margin, or disable useFlexibleCell for liquid simulation.");
5738  }
5739 
5740  BigReal cutoff = simParams->cutoff;
5741 
5742  BigReal margina = 0.5 * ( aAwayDist - cutoff / sysdima );
5743  BigReal marginb = 0.5 * ( bAwayDist - cutoff / sysdimb );
5744  BigReal marginc = 0.5 * ( cAwayDist - cutoff / sysdimc );
5745 
5746  if ( (margina < -0.0001) || (marginb < -0.0001) || (marginc < -0.0001) ) {
5747  NAMD_die("Periodic cell has become too small for original patch grid!\n"
5748  "There are probably many margin violations already reported.\n"
5749  "Possible solutions are to restart from a recent checkpoint,\n"
5750  "increase margin, or disable useFlexibleCell for liquid simulation.");
5751  }
5752 
5753  BigReal minx = min.x - margina;
5754  BigReal miny = min.y - marginb;
5755  BigReal minz = min.z - marginc;
5756  BigReal maxx = max.x + margina;
5757  BigReal maxy = max.y + marginb;
5758  BigReal maxz = max.z + marginc;
5759 
5760  int xdev, ydev, zdev;
5761  int problemCount = 0;
5762 
5763  FullAtomList::iterator p_i = atom.begin();
5764  FullAtomList::iterator p_e = atom.end();
5765  for ( ; p_i != p_e; ++p_i ) {
5766 
5768 
5769  // check if atom is within bounds
5770  if (s.x < minx) xdev = 0;
5771  else if (maxx <= s.x) xdev = 2;
5772  else xdev = 1;
5773 
5774  if (s.y < miny) ydev = 0;
5775  else if (maxy <= s.y) ydev = 2;
5776  else ydev = 1;
5777 
5778  if (s.z < minz) zdev = 0;
5779  else if (maxz <= s.z) zdev = 2;
5780  else zdev = 1;
5781 
5782  if (mInfo[xdev][ydev][zdev]) { // somewhere else to be
5783  ++problemCount;
5784  }
5785 
5786  }
5787 
5788  marginViolations = problemCount;
5789  // if ( problemCount ) {
5790  // iout << iERROR <<
5791  // "Found " << problemCount << " margin violations!\n" << endi;
5792  // }
5793 
5794 }
5795 
5796 
5797 void
5799 {
5800 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
5801  char ambuf[32];
5802  sprintf(ambuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::ATOM_MIGRATIONS], this->getPatchID());
5803  NAMD_EVENT_START_EX(1, NamdProfileEvent::ATOM_MIGRATIONS, ambuf);
5804 #endif
5805 
5806  // every patch needs to call this once per migration step
5807  // XXX TODO: check if the cpu version also calls it once per tstep
5808  int i;
5809  for (i=0; i<numNeighbors; i++) {
5810  realInfo[i].mList.resize(0);
5811  }
5812 
5813  // Purge the AtomMap
5814  atomMapper->unregisterIDsFullAtom(atom.begin(),atom.end());
5815 
5816  // Determine atoms that need to migrate
5817 
5818  BigReal minx = min.x;
5819  BigReal miny = min.y;
5820  BigReal minz = min.z;
5821  BigReal maxx = max.x;
5822  BigReal maxy = max.y;
5823  BigReal maxz = max.z;
5824 
5825  int xdev, ydev, zdev;
5826  int delnum = 0;
5827 
5828  FullAtomList::iterator atom_i = atom.begin();
5829  FullAtomList::iterator atom_e = atom.end();
5830 
5831  // DMK - Atom Separation (water vs. non-water)
5832  #if NAMD_SeparateWaters != 0
5833  FullAtomList::iterator atom_first = atom_i;
5834  int numLostWaterAtoms = 0;
5835  #endif
5836 
5837  while ( atom_i != atom_e ) {
5838  // Even though this code iterates through all atoms successively
5839  // it moves entire hydrogen/migration groups as follows:
5840  // Only the parent atom of the hydrogen/migration group has
5841  // nonzero migrationGroupSize. Values determined for xdev,ydev,zdev
5842  // will persist through the remaining group members so that each
5843  // following atom will again be added to the same mList.
5844  if ( atom_i->migrationGroupSize ) {
5845  Position pos = atom_i->position;
5846  if ( atom_i->migrationGroupSize != atom_i->hydrogenGroupSize ) {
5847  // If there are multiple hydrogen groups in a migration group
5848  // (e.g. for supporting lone pairs)
5849  // the following code takes the average position (midpoint)
5850  // of their parents.
5851  int mgs = atom_i->migrationGroupSize;
5852  int c = 1;
5853  for ( int j=atom_i->hydrogenGroupSize; j<mgs;
5854  j+=(atom_i+j)->hydrogenGroupSize ) {
5855  pos += (atom_i+j)->position;
5856  ++c;
5857  }
5858  pos *= 1./c;
5859  // iout << "mgroup " << atom_i->id << " at " << pos << "\n" << endi;
5860  }
5861 
5862  // Scaling the position below transforms space within patch from
5863  // what could have been a rotated parallelepiped into
5864  // orthogonal coordinates, where we can use minmax comparison
5865  // to detect which of our nearest neighbors this
5866  // parent atom might have entered.
5867  ScaledPosition s = lattice.scale(pos);
5868 
5869  // check if atom is within bounds
5870  if (s.x < minx) xdev = 0;
5871  else if (maxx <= s.x) xdev = 2;
5872  else xdev = 1;
5873 
5874  if (s.y < miny) ydev = 0;
5875  else if (maxy <= s.y) ydev = 2;
5876  else ydev = 1;
5877 
5878  if (s.z < minz) zdev = 0;
5879  else if (maxz <= s.z) zdev = 2;
5880  else zdev = 1;
5881 
5882  }
5883 
5884  if (mInfo[xdev][ydev][zdev]) { // process atom for migration
5885  // Don't migrate if destination is myself
5886 
5887  // See if we have a migration list already
5888  MigrationList &mCur = mInfo[xdev][ydev][zdev]->mList;
5889  DebugM(3,"Migrating atom " << atom_i->id << " from patch "
5890  << patchID << " with position " << atom_i->position << "\n");
5891  mCur.add(*atom_i);
5892  ++delnum;
5893 
5894 
5895  // DMK - Atom Separation (water vs. non-water)
5896  #if NAMD_SeparateWaters != 0
5897  // Check to see if this atom is part of a water molecule. If
5898  // so, numWaterAtoms needs to be adjusted to refect the lost of
5899  // this atom.
5900  // NOTE: The atom separation code assumes that if the oxygen
5901  // atom of the hydrogen group making up the water molecule is
5902  // migrated to another HomePatch, the hydrogens will also
5903  // move!!!
5904  int atomIndex = atom_i - atom_first;
5905  if (atomIndex < numWaterAtoms)
5906  numLostWaterAtoms++;
5907  #endif
5908 
5909 
5910  } else {
5911  // By keeping track of delnum total being deleted from FullAtomList
5912  // the else clause allows us to fill holes as we visit each atom.
5913 
5914  if ( delnum ) { *(atom_i-delnum) = *atom_i; }
5915 
5916  }
5917 
5918  ++atom_i;
5919  }
5920 
5921  // DMK - Atom Separation (water vs. non-water)
5922  #if NAMD_SeparateWaters != 0
5923  numWaterAtoms -= numLostWaterAtoms;
5924  #endif
5925 
5926 
5927  int delpos = numAtoms - delnum;
5928  DebugM(4,"numAtoms " << numAtoms << " deleted " << delnum << "\n");
5929  atom.del(delpos,delnum);
5930 
5931  numAtoms = atom.size();
5932  // Calls sendMigrationMsgs to the manager.
5933  // the manager only
5934  // wait, does this work??????
5935  PatchMgr::Object()->sendMigrationMsgs(patchID, realInfo, numNeighbors);
5936 
5937  // signal depositMigration() that we are inMigration mode
5938  inMigration = true;
5939 
5940  // Drain the migration message buffer
5941  for (i=0; i<numMlBuf; i++) {
5942  DebugM(1, "Draining migration buffer ("<<i<<","<<numMlBuf<<")\n");
5944  }
5945  numMlBuf = 0;
5946 
5947  NAMD_EVENT_STOP(1, NamdProfileEvent::ATOM_MIGRATIONS);
5948 
5949  if (!allMigrationIn) {
5950  DebugM(3,"All Migrations NOT in, we are suspending patch "<<patchID<<"\n");
5951  migrationSuspended = true;
5952  sequencer->suspend();
5953  migrationSuspended = false;
5954  }
5955  allMigrationIn = false;
5956 
5957  inMigration = false;
5958  marginViolations = 0;
5959 }
5960 
5961 void
5963 {
5964 
5965  if (!inMigration) { // We have to buffer changes due to migration
5966  // until our patch is in migration mode
5967  msgbuf[numMlBuf++] = msg;
5968  return;
5969  }
5970 
5971  // DMK - Atom Separation (water vs. non-water)
5972  #if NAMD_SeparateWaters != 0
5973 
5974 
5975  // Merge the incoming list of atoms with the current list of
5976  // atoms. Note that mergeSeparatedAtomList() will apply any
5977  // required transformations to the incoming atoms as it is
5978  // separating them.
5979  mergeAtomList(msg->migrationList);
5980 
5981 
5982  #else
5983 
5984  // Merge the incoming list of atoms with the current list of
5985  // atoms. Apply transformations to the incoming atoms as they are
5986  // added to this patch's list.
5987  {
5988  MigrationList &migrationList = msg->migrationList;
5990  Transform mother_transform;
5991  for (mi = migrationList.begin(); mi != migrationList.end(); mi++) {
5992  DebugM(1,"Migrating atom " << mi->id << " to patch "
5993  << patchID << " with position " << mi->position << "\n");
5994  if ( mi->migrationGroupSize ) {
5995  if ( mi->migrationGroupSize != mi->hydrogenGroupSize ) {
5996  Position pos = mi->position;
5997  int mgs = mi->migrationGroupSize;
5998  int c = 1;
5999  for ( int j=mi->hydrogenGroupSize; j<mgs;
6000  j+=(mi+j)->hydrogenGroupSize ) {
6001  pos += (mi+j)->position;
6002  ++c;
6003  }
6004  pos *= 1./c;
6005  // iout << "mgroup " << mi->id << " at " << pos << "\n" << endi;
6006  mother_transform = mi->transform;
6007  pos = lattice.nearest(pos,center,&mother_transform);
6009  mi->position = lattice.apply_transform(mi->position,mother_transform);
6010  mi->transform = mother_transform;
6011  } else {
6012  mi->position = lattice.nearest(mi->position,center,&(mi->transform));
6013  mother_transform = mi->transform;
6014  }
6015  } else {
6017  mi->position = lattice.apply_transform(mi->position,mother_transform);
6018  mi->transform = mother_transform;
6019  }
6020  atom.add(*mi);
6021  }
6022  }
6023 
6024 
6025  #endif // if (NAMD_SeparateWaters != 0)
6026 
6027 
6028  numAtoms = atom.size();
6029  delete msg;
6030 
6031  DebugM(3,"Counter on " << patchID << " = " << patchMigrationCounter << "\n");
6032  if (!--patchMigrationCounter) {
6033  // DH - All atoms are now incorporated from migration.
6034  // This is where we can separate waters from non-waters and
6035  // perhaps sort non-waters by hydrogen group size.
6037  if (simParams->SOAintegrateOn) {
6038  sort_solvent_atoms();
6039  copy_atoms_to_SOA();
6040 #if 0
6041  if (simParams->rigidBonds != RIGID_NONE) {
6043  rattleListValid_SOA = true;
6044  }
6045 #endif
6046  }
6047  DebugM(3,"All Migrations are in for patch "<<patchID<<"\n");
6048  allMigrationIn = true;
6049  patchMigrationCounter = numNeighbors;
6050  if (migrationSuspended) {
6051  DebugM(3,"patch "<<patchID<<" is being awakened\n");
6052  migrationSuspended = false;
6053  // fprintf(stderr, "Calling awaken() on patch %d: 8\n", this->patchID);
6054  sequencer->awaken();
6055  return;
6056  }
6057  else {
6058  DebugM(3,"patch "<<patchID<<" was not suspended\n");
6059  }
6060  }
6061 }
6062 
6063 
6064 
6065 // DMK - Atom Separation (water vs. non-water)
6066 #if NAMD_SeparateWaters != 0
6067 
6068 // This function will separate waters from non-waters in the current
6069 // atom list (regardless of whether or not the atom list is has been
6070 // sorted yet or not).
6071 void HomePatch::separateAtoms() {
6073 
6074  // Basic Idea: Iterate through all the atoms in the current list
6075  // of atoms. Pack the waters in the current atoms list and move
6076  // the non-waters to the scratch list. Once the atoms have all
6077  // been separated, copy the non-waters to the end of the waters.
6078  // NOTE: This code does not assume that the atoms list has been
6079  // separated in any manner.
6080 
6081  // NOTE: Sanity check - Doesn't look like the default constructor actually
6082  // adds any atoms but does set numAtoms. ???
6083  if (atom.size() < 0) return; // Nothing to do.
6084 
6085  // Resize the scratch FullAtomList (tempAtom)
6086  tempAtom.resize(numAtoms); // NOTE: Worst case: all non-water
6087 
6088  // Define size of a water hydrogen group
6089  int wathgsize = 3;
6090  if (simParams->watmodel == WaterModel::TIP4) wathgsize = 4;
6091  else if (simParams->watmodel == WaterModel::SWM4) wathgsize = 5;
6092 
6093  // Iterate through all the atoms
6094  int i = 0;
6095  int waterIndex = 0;
6096  int nonWaterIndex = 0;
6097  while (i < numAtoms) {
6098 
6099  FullAtom &atom_i = atom[i];
6100  Mass mass = atom_i.mass;
6101  int hgs = atom_i.hydrogenGroupSize;
6102  // Check to see if this hydrogen group is a water molecule
6103  if (IS_HYDROGEN_GROUP_WATER(hgs, mass)) {
6104 
6105  // Move this hydrogen group up in the current atom list
6106  if (waterIndex != i) {
6107  atom[waterIndex ] = atom[i ]; // Oxygen
6108  atom[waterIndex + 1] = atom[i + 1]; // Hydrogen
6109  atom[waterIndex + 2] = atom[i + 2]; // Hydrogen
6110  if (wathgsize > 3) atom[waterIndex + 3] = atom[i + 3]; // lonepair
6111  if (wathgsize > 4) atom[waterIndex + 4] = atom[i + 4]; // drude
6112  // actual Drude water is arranged: O D LP H H
6113  }
6114 
6115  waterIndex += wathgsize;
6116  i += wathgsize;
6117 
6118  } else {
6119 
6120  // Move this hydrogen group into non-water (scratch) atom list
6121  for (int j = 0; j < hgs; j++)
6122  tempAtom[nonWaterIndex + j] = atom[i + j];
6123 
6124  nonWaterIndex += hgs;
6125  i += hgs;
6126  }
6127 
6128  } // end iterating through atoms
6129 
6130  // Iterate through the non-water (scratch) atom list, adding the
6131  // atoms to the end of the atom list.
6132  // NOTE: This could be done with a straight memcpy if the internal
6133  // data structures of ResizeArray could be accessed directly.
6134  // Or, perhaps add a member to ResizeArray that can add a consecutive
6135  // list of elements starting at a particular index (would be cleaner).
6136  for (i = 0; i < nonWaterIndex; i++)
6137  atom[waterIndex + i] = tempAtom[i];
6138 
6139  // Set numWaterAtoms
6140  numWaterAtoms = waterIndex;
6141 }
6142 
6143 
6144 // This function will merge the given list of atoms (not assumed to
6145 // be separated) with the current list of atoms (already assumed
6146 // to be separated).
6147 // NOTE: This function applies the transformations to the incoming
6148 // atoms as it is separating them.
6149 void HomePatch::mergeAtomList(FullAtomList &al) {
6151 
6152  // Sanity check
6153  if (al.size() <= 0) return; // Nothing to do
6154 
6155  const int orig_atomSize = atom.size();
6156  const int orig_alSize = al.size();
6157 
6158  // Resize the atom list (will eventually hold contents of both lists)
6159  atom.resize(orig_atomSize + orig_alSize); // NOTE: Will have contents of both
6160 
6161 
6162  #if 0 // version where non-waters are moved to scratch first
6163 
6164 
6165  // Basic Idea: The current list is separated already so copy the
6166  // non-water atoms out of it into the scratch atom array. Then
6167  // separate the incoming/given list (al), adding the waters to the
6168  // end of the waters in atom list and non-waters to the end of the
6169  // scratch list. At this point, all waters are in atom list and all
6170  // non-waters are in the scratch list so just copy the scratch list
6171  // to the end of the atom list.
6172  // NOTE: If al is already separated and the number of waters in it
6173  // is know, could simply move the non-waters in atoms back by that
6174  // amount and directly copy the waters in al into the created gap
6175  // and the non-waters in al to the end. Leave this as an
6176  // optimization for later since I'm not sure if this would actually
6177  // do better as the combining code (for combining migration
6178  // messages) would also have to merge the contents of the atom lists
6179  // they carry. Generally speaking, there is probably a faster way
6180  // to do this, but this will get it working.
6181 
6182  // Copy all the non-waters in the current atom list into the
6183  // scratch atom list.
6184  const int orig_atom_numNonWaters = orig_atomSize - numWaterAtoms;
6185  tempAtom.resize(orig_atom_numNonWaters + al.size()); // NOTE: Worst case
6186  for (int i = 0; i < orig_atom_numNonWaters; i++)
6187  tempAtom[i] = atom[numWaterAtoms + i];
6188 
6189  // Separate the contents of the given atom list (applying the
6190  // transforms as needed)
6191  int atom_waterIndex = numWaterAtoms;
6192  int atom_nonWaterIndex = orig_atom_numNonWaters;
6193  int i = 0;
6194  while (i < orig_alSize) {
6195 
6196  FullAtom &atom_i = al[i];
6197  int hgs = atom_i.hydrogenGroupSize;
6198  if ( hgs != atom_i.migrationGroupSize ) {
6199  NAMD_bug("HomePatch::mergeAtomList() not updated for migration groups!");
6200  }
6201  Mass mass = atom_i.mass;
6202 
6203  if (IS_HYDROGEN_GROUP_WATER(hgs, mass)) {
6204 
6205  // Apply the transforms
6206 
6207  // Oxygen (@ +0)
6208  al[i].position = lattice.nearest(al[i].position, center, &(al[i].transform));
6209  Transform mother_transform = al[i].transform;
6210 
6211  // Hydrogen (@ +1)
6212  al[i+1].position = lattice.reverse_transform(al[i+1].position, al[i+1].transform);
6213  al[i+1].position = lattice.apply_transform(al[i+1].position, mother_transform);
6214  al[i+1].transform = mother_transform;
6215 
6216  // Hydrogen (@ +2)
6217  al[i+2].position = lattice.reverse_transform(al[i+2].position, al[i+2].transform);
6218  al[i+2].position = lattice.apply_transform(al[i+2].position, mother_transform);
6219  al[i+2].transform = mother_transform;
6220 
6221  // Add to the end of the waters in the current list of atoms
6222  atom[atom_waterIndex ] = al[i ];
6223  atom[atom_waterIndex + 1] = al[i + 1];
6224  atom[atom_waterIndex + 2] = al[i + 2];
6225 
6226  atom_waterIndex += 3;
6227  i += 3;
6228 
6229  } else {
6230 
6231  // Apply the transforms
6232 
6233  // Non-Hydrogen (@ +0)
6234  al[i].position = lattice.nearest(al[i].position, center, &(al[i].transform));
6235  Transform mother_transform = al[i].transform;
6236 
6237  // Hydrogens (@ +1 -> +(hgs-1))
6238  for (int j = 1; j < hgs; j++) {
6239  al[i+j].position = lattice.reverse_transform(al[i+j].position, al[i+j].transform);
6240  al[i+j].position = lattice.apply_transform(al[i+j].position, mother_transform);
6241  al[i+j].transform = mother_transform;
6242  }
6243 
6244  // Add to the end of the non-waters (scratch) atom list
6245  for (int j = 0; j < hgs; j++)
6246  tempAtom[atom_nonWaterIndex + j] = al[i + j];
6247 
6248  atom_nonWaterIndex += hgs;
6249  i += hgs;
6250  }
6251 
6252  } // end while iterating through given atom list
6253 
6254  // Copy all the non-waters to the end of the current atom list
6255  for (int i = 0; i < atom_nonWaterIndex; i++)
6256  atom[atom_waterIndex + i] = tempAtom[i];
6257 
6258  // Set numWaterAtoms and numAtoms
6259  numWaterAtoms = atom_waterIndex;
6260  numAtoms = atom.size();
6261 
6262 
6263  #else
6264 
6265 
6266  // Basic Idea: Count the number of water atoms in the incoming atom
6267  // list then move the non-waters back in the current atom list to
6268  // make room for the incoming waters. Once there is room in the
6269  // current list, separate the incoming list as the atoms are being
6270  // added to the current list.
6271  // NOTE: Since the incoming atom list is likely to be small,
6272  // iterating over its hydrogen groups twice should not be too bad.
6273  // NOTE: This code assumes the current list is already separated,
6274  // the incoming list may not be separated, and the transforms are
6275  // applied to the incoming atoms as the separation occurs.
6276 
6277  // size of a water hydrogen group
6278  int wathgsize = 3;
6279  if (simParams->watmodel == WaterModel::TIP4) wathgsize = 4;
6280  else if (simParams->watmodel == WaterModel::SWM4) wathgsize = 5;
6281 
6282  // Count the number of waters in the given atom list
6283  int al_numWaterAtoms = 0;
6284  int i = 0;
6285  while (i < orig_alSize) {
6286 
6287  FullAtom &atom_i = al[i];
6288  int hgs = atom_i.hydrogenGroupSize;
6289  Mass mass = atom_i.mass;
6290 
6291  if (IS_HYDROGEN_GROUP_WATER(hgs, mass)) {
6292  al_numWaterAtoms += wathgsize;
6293  }
6294 
6295  i += hgs;
6296  }
6297 
6298  // Move all of the non-waters in the current atom list back (to a
6299  // higher index) by the number of waters in the given list.
6300  if (al_numWaterAtoms > 0) {
6301  for (i = orig_atomSize - 1; i >= numWaterAtoms; i--) {
6302  atom[i + al_numWaterAtoms] = atom[i];
6303  }
6304  }
6305 
6306  // Separte the atoms in the given atom list. Perform the
6307  // transformations on them and then add them to the appropriate
6308  // location in the current atom list.
6309  int atom_waterIndex = numWaterAtoms;
6310  int atom_nonWaterIndex = orig_atomSize + al_numWaterAtoms;
6311  i = 0;
6312  while (i < orig_alSize) {
6313 
6314  FullAtom &atom_i = al[i];
6315  int hgs = atom_i.hydrogenGroupSize;
6316  if ( hgs != atom_i.migrationGroupSize ) {
6317  NAMD_bug("HomePatch::mergeAtomList() not updated for migration groups!");
6318  }
6319  Mass mass = atom_i.mass;
6320 
6321  if (IS_HYDROGEN_GROUP_WATER(hgs, mass)) {
6322 
6323  // Apply the transforms
6324 
6325  // Oxygen (@ +0)
6326  al[i].position = lattice.nearest(al[i].position, center, &(al[i].transform));
6327  Transform mother_transform = al[i].transform;
6328 
6329  // Hydrogen (@ +1)
6330  al[i+1].position = lattice.reverse_transform(al[i+1].position, al[i+1].transform);
6331  al[i+1].position = lattice.apply_transform(al[i+1].position, mother_transform);
6332  al[i+1].transform = mother_transform;
6333 
6334  // Hydrogen (@ +2)
6335  al[i+2].position = lattice.reverse_transform(al[i+2].position, al[i+2].transform);
6336  al[i+2].position = lattice.apply_transform(al[i+2].position, mother_transform);
6337  al[i+2].transform = mother_transform;
6338 
6339  // Add to the end of the waters in the current list of atoms
6340  atom[atom_waterIndex ] = al[i ];
6341  atom[atom_waterIndex + 1] = al[i + 1];
6342  atom[atom_waterIndex + 2] = al[i + 2];
6343 
6344  if (wathgsize > 3) atom[atom_waterIndex + 3] = al[i + 3];
6345 
6346  atom_waterIndex += wathgsize;
6347  i += wathgsize;
6348 
6349  } else {
6350 
6351  // Apply the transforms
6352 
6353  // Non-Hydrogen (@ +0)
6354  al[i].position = lattice.nearest(al[i].position, center, &(al[i].transform));
6355  Transform mother_transform = al[i].transform;
6356 
6357  // Hydrogens (@ +1 -> +(hgs-1))
6358  for (int j = 1; j < hgs; j++) {
6359  al[i+j].position = lattice.reverse_transform(al[i+j].position, al[i+j].transform);
6360  al[i+j].position = lattice.apply_transform(al[i+j].position, mother_transform);
6361  al[i+j].transform = mother_transform;
6362  }
6363 
6364  // Add to the end of the non-waters (scratch) atom list
6365  for (int j = 0; j < hgs; j++)
6366  atom[atom_nonWaterIndex + j] = al[i + j];
6367 
6368  atom_nonWaterIndex += hgs;
6369  i += hgs;
6370  }
6371 
6372  } // end while iterating through given atom list
6373 
6374  // Set numWaterAtoms and numAtoms
6375  numWaterAtoms = atom_waterIndex;
6376  numAtoms = atom_nonWaterIndex;
6377 
6378  #endif
6379 }
6380 
6381 #endif
6382 
6383 
6384 
6385 inline void lubksb(HGMatrixBigReal &a, int n, HGArrayInt &indx,
6386  HGArrayBigReal &b)
6387 {
6388  int i,ii=-1,ip,j;
6389  double sum;
6390 
6391  for (i=0;i<n;i++) {
6392  ip=indx[i];
6393  sum=b[ip];
6394  b[ip]=b[i];
6395  if (ii >= 0)
6396  for (j=ii;j<i;j++) sum -= a[i][j]*b[j];
6397  else if (sum) ii=i;
6398  b[i]=sum;
6399  }
6400  for (i=n-1;i>=0;i--) {
6401  sum=b[i];
6402  for (j=i+1;j<n;j++) sum -= a[i][j]*b[j];
6403  b[i]=sum/a[i][i];
6404  }
6405 }
6406 
6407 
6408 inline void ludcmp(HGMatrixBigReal &a, int n, HGArrayInt &indx, BigReal *d)
6409 {
6410 
6411  int i,imax,j,k;
6412  double big,dum,sum,temp;
6413  HGArrayBigReal vv;
6414  *d=1.0;
6415  for (i=0;i<n;i++) {
6416  big=0.0;
6417  for (j=0;j<n;j++)
6418  if ((temp=fabs(a[i][j])) > big) big=temp;
6419  if (big == 0.0) NAMD_die("Singular matrix in routine ludcmp\n");
6420  vv[i]=1.0/big;
6421  }
6422  for (j=0;j<n;j++) {
6423  for (i=0;i<j;i++) {
6424  sum=a[i][j];
6425  for (k=0;k<i;k++) sum -= a[i][k]*a[k][j];
6426  a[i][j]=sum;
6427  }
6428  big=0.0;
6429  for (i=j;i<n;i++) {
6430  sum=a[i][j];
6431  for (k=0;k<j;k++)
6432  sum -= a[i][k]*a[k][j];
6433  a[i][j]=sum;
6434  if ( (dum=vv[i]*fabs(sum)) >= big) {
6435  big=dum;
6436  imax=i;
6437  }
6438  }
6439  if (j != imax) {
6440  for (k=0;k<n;k++) {
6441  dum=a[imax][k];
6442  a[imax][k]=a[j][k];
6443  a[j][k]=dum;
6444  }
6445  *d = -(*d);
6446  vv[imax]=vv[j];
6447  }
6448  indx[j]=imax;
6449  if (a[j][j] == 0.0) a[j][j]=TINY;
6450  if (j != n-1) {
6451  dum=1.0/(a[j][j]);
6452  for (i=j+1;i<n;i++) a[i][j] *= dum;
6453  }
6454  }
6455 }
6456 
6457 
6458 inline void G_q(const HGArrayVector &refab, HGMatrixVector &gqij,
6459  const int n, const int m, const HGArrayInt &ial, const HGArrayInt &ibl) {
6460  int i;
6461  // step through the rows of the matrix
6462  for(i=0;i<m;i++) {
6463  gqij[i][ial[i]]=2.0*refab[i];
6464  gqij[i][ibl[i]]=-gqij[i][ial[i]];
6465  }
6466 };
6467 
6468 
6469 // c-ji code for MOLLY 7-31-99
6470 int average(CompAtom *qtilde,const HGArrayVector &q,BigReal *lambda,const int n,const int m, const HGArrayBigReal &imass, const HGArrayBigReal &length2, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab, const BigReal tolf, const int ntrial) {
6471  // input: n = length of hydrogen group to be averaged (shaked)
6472  // q[n] = vector array of original positions
6473  // m = number of constraints
6474  // imass[n] = inverse mass for each atom
6475  // length2[m] = square of reference bond length for each constraint
6476  // ial[m] = atom a in each constraint
6477  // ibl[m] = atom b in each constraint
6478  // refab[m] = vector of q_ial(i) - q_ibl(i) for each constraint
6479  // tolf = function error tolerance for Newton's iteration
6480  // ntrial = max number of Newton's iterations
6481  // output: lambda[m] = double array of lagrange multipliers (used by mollify)
6482  // qtilde[n] = vector array of averaged (shaked) positions
6483 
6484  int k,k1,i,j;
6485  BigReal errx,errf,d,tolx;
6486 
6487  HGArrayInt indx;
6488  HGArrayBigReal p;
6489  HGArrayBigReal fvec;
6490  HGMatrixBigReal fjac;
6491  HGArrayVector avgab;
6492  HGMatrixVector grhs;
6493  HGMatrixVector auxrhs;
6494  HGMatrixVector glhs;
6495 
6496  // iout <<iINFO << "average: n="<<n<<" m="<<m<<std::endl<<endi;
6497  tolx=tolf;
6498 
6499  // initialize lambda, globalGrhs
6500 
6501  for (i=0;i<m;i++) {
6502  lambda[i]=0.0;
6503  }
6504 
6505  // define grhs, auxrhs for all iterations
6506  // grhs= g_x(q)
6507  //
6508  G_q(refab,grhs,n,m,ial,ibl);
6509  for (k=1;k<=ntrial;k++) {
6510  // usrfun(qtilde,q0,lambda,fvec,fjac,n,water);
6511  HGArrayBigReal gij;
6512  // this used to be main loop of usrfun
6513  // compute qtilde given q0, lambda, IMASSes
6514  {
6515  BigReal multiplier;
6516  HGArrayVector tmp;
6517  for (i=0;i<m;i++) {
6518  multiplier = lambda[i];
6519  // auxrhs = M^{-1}grhs^{T}
6520  for (j=0;j<n;j++) {
6521  auxrhs[i][j]=multiplier*imass[j]*grhs[i][j];
6522  }
6523  }
6524  for (j=0;j<n;j++) {
6525  // tmp[j]=0.0;
6526  for (i=0;i<m;i++) {
6527  tmp[j]+=auxrhs[i][j];
6528  }
6529  }
6530 
6531  for (j=0;j<n;j++) {
6532  qtilde[j].position=q[j]+tmp[j];
6533  }
6534  // delete [] tmp;
6535  }
6536 
6537  for ( i = 0; i < m; i++ ) {
6538  avgab[i] = qtilde[ial[i]].position - qtilde[ibl[i]].position;
6539  }
6540 
6541  // iout<<iINFO << "Calling Jac" << std::endl<<endi;
6542  // Jac(qtilde, q0, fjac,n,water);
6543  {
6544  // Vector glhs[3*n+3];
6545 
6546  HGMatrixVector grhs2;
6547 
6548  G_q(avgab,glhs,n,m,ial,ibl);
6549 #ifdef DEBUG0
6550  iout<<iINFO << "G_q:" << std::endl<<endi;
6551  for (i=0;i<m;i++) {
6552  iout<<iINFO << glhs[i*n+0] << " " << glhs[i*n+1] << " " << glhs[i*n+2] << std::endl<<endi;
6553  }
6554 #endif
6555  // G_q(refab,grhs2,m,ial,ibl);
6556  // update with the masses
6557  for (j=0; j<n; j++) { // number of atoms
6558  for (i=0; i<m;i++) { // number of constraints
6559  grhs2[i][j] = grhs[i][j]*imass[j];
6560  }
6561  }
6562 
6563  // G_q(qtilde) * M^-1 G_q'(q0) =
6564  // G_q(qtilde) * grhs'
6565  for (i=0;i<m;i++) { // number of constraints
6566  for (j=0;j<m;j++) { // number of constraints
6567  fjac[i][j] = 0;
6568  for (k1=0;k1<n;k1++) {
6569  fjac[i][j] += glhs[i][k1]*grhs2[j][k1];
6570  }
6571  }
6572  }
6573 #ifdef DEBUG0
6574  iout<<iINFO << "glhs" <<endi;
6575  for(i=0;i<9;i++) {
6576  iout<<iINFO << glhs[i] << ","<<endi;
6577  }
6578  iout<<iINFO << std::endl<<endi;
6579  for(i=0;i<9;i++) {
6580  iout<<iINFO << grhs2[i] << ","<<endi;
6581  }
6582  iout<<iINFO << std::endl<<endi;
6583 #endif
6584  // delete[] grhs2;
6585  }
6586  // end of Jac calculation
6587 #ifdef DEBUG0
6588  iout<<iINFO << "Jac" << std::endl<<endi;
6589  for (i=0;i<m;i++)
6590  for (j=0;j<m;j++)
6591  iout<<iINFO << fjac[i][j] << " "<<endi;
6592  iout<< std::endl<<endi;
6593 #endif
6594  // calculate constraints in gij for n constraints this being a water
6595  // G(qtilde, gij, n, water);
6596  for (i=0;i<m;i++) {
6597  gij[i]=avgab[i]*avgab[i]-length2[i];
6598  }
6599 #ifdef DEBUG0
6600  iout<<iINFO << "G" << std::endl<<endi;
6601  iout<<iINFO << "( "<<endi;
6602  for(i=0;i<m-1;i++) {
6603  iout<<iINFO << gij[i] << ", "<<endi;
6604  }
6605  iout<<iINFO << gij[m-1] << ")" << std::endl<<endi;
6606 #endif
6607  // fill the return vector
6608  for(i=0;i<m;i++) {
6609  fvec[i] = gij[i];
6610  }
6611  // free up the constraints
6612  // delete[] gij;
6613  // continue Newton's iteration
6614  errf=0.0;
6615  for (i=0;i<m;i++) errf += fabs(fvec[i]);
6616 #ifdef DEBUG0
6617  iout<<iINFO << "errf: " << errf << std::endl<<endi;
6618 #endif
6619  if (errf <= tolf) {
6620  break;
6621  }
6622  for (i=0;i<m;i++) p[i] = -fvec[i];
6623  // iout<<iINFO << "Doing dcmp in average " << std::endl<<endi;
6624  ludcmp(fjac,m,indx,&d);
6625  lubksb(fjac,m,indx,p);
6626 
6627  errx=0.0;
6628  for (i=0;i<m;i++) {
6629  errx += fabs(p[i]);
6630  }
6631  for (i=0;i<m;i++)
6632  lambda[i] += p[i];
6633 
6634 #ifdef DEBUG0
6635  iout<<iINFO << "lambda:" << lambda[0]
6636  << " " << lambda[1] << " " << lambda[2] << std::endl<<endi;
6637  iout<<iINFO << "errx: " << errx << std::endl<<endi;
6638 #endif
6639  if (errx <= tolx) break;
6640 #ifdef DEBUG0
6641  iout<<iINFO << "Qtilde:" << std::endl<<endi;
6642  iout<<iINFO << qtilde[0].position << " " << qtilde[1].position << " " << qtilde[2].position << std::endl<<endi;
6643 #endif
6644  }
6645 #ifdef DEBUG
6646  iout<<iINFO << "LAMBDA:" << lambda[0] << " " << lambda[1] << " " << lambda[2] << std::endl<<endi;
6647 #endif
6648 
6649  return k; //
6650 }
6651 
6652 void mollify(CompAtom *qtilde,const HGArrayVector &q0,const BigReal *lambda, HGArrayVector &force,const int n, const int m, const HGArrayBigReal &imass,const HGArrayInt &ial,const HGArrayInt &ibl,const HGArrayVector &refab) {
6653  int i,j,k;
6654  BigReal d;
6655  HGMatrixBigReal fjac;
6656  Vector zero(0.0,0.0,0.0);
6657 
6658  HGArrayVector tmpforce;
6659  HGArrayVector tmpforce2;
6660  HGArrayVector y;
6661  HGMatrixVector grhs;
6662  HGMatrixVector glhs;
6663  HGArrayBigReal aux;
6664  HGArrayInt indx;
6665 
6666  for(i=0;i<n;i++) {
6667  tmpforce[i]=imass[i]*force[i];
6668  }
6669 
6670  HGMatrixVector grhs2;
6671  HGArrayVector avgab;
6672 
6673  for ( i = 0; i < m; i++ ) {
6674  avgab[i] = qtilde[ial[i]].position - qtilde[ibl[i]].position;
6675  }
6676 
6677  G_q(avgab,glhs,n,m,ial,ibl);
6678  G_q(refab,grhs,n,m,ial,ibl);
6679  // update with the masses
6680  for (j=0; j<n; j++) { // number of atoms
6681  for (i=0; i<m;i++) { // number of constraints
6682  grhs2[i][j] = grhs[i][j]*imass[j];
6683  }
6684  }
6685 
6686  // G_q(qtilde) * M^-1 G_q'(q0) =
6687  // G_q(qtilde) * grhs'
6688  for (i=0;i<m;i++) { // number of constraints
6689  for (j=0;j<m;j++) { // number of constraints
6690  fjac[j][i] = 0;
6691  for (k=0;k<n;k++) {
6692  fjac[j][i] += glhs[i][k]*grhs2[j][k];
6693  }
6694  }
6695  }
6696 
6697  // aux=gqij*tmpforce
6698  // globalGrhs::computeGlobalGrhs(q0,n,water);
6699  // G_q(refab,grhs,m,ial,ibl);
6700  for(i=0;i<m;i++) {
6701  aux[i]=0.0;
6702  for(j=0;j<n;j++) {
6703  aux[i]+=grhs[i][j]*tmpforce[j];
6704  }
6705  }
6706 
6707  ludcmp(fjac,m,indx,&d);
6708  lubksb(fjac,m,indx,aux);
6709 
6710  for(j=0;j<n;j++) {
6711  y[j] = zero;
6712  for(i=0;i<m;i++) {
6713  y[j] += aux[i]*glhs[i][j];
6714  }
6715  }
6716  for(i=0;i<n;i++) {
6717  y[i]=force[i]-y[i];
6718  }
6719 
6720  // gqq12*y
6721  for(i=0;i<n;i++) {
6722  tmpforce2[i]=imass[i]*y[i];
6723  }
6724 
6725  // here we assume that tmpforce is initialized to zero.
6726  for (i=0;i<n;i++) {
6727  tmpforce[i]=zero;
6728  }
6729 
6730  for (j=0;j<m;j++) {
6731  Vector tmpf = 2.0*lambda[j]*(tmpforce2[ial[j]]-tmpforce2[ibl[j]]);
6732  tmpforce[ial[j]] += tmpf;
6733  tmpforce[ibl[j]] -= tmpf;
6734  }
6735  // c-ji the other bug for 2 constraint water was this line (2-4-99)
6736  // for(i=0;i<m;i++) {
6737  for(i=0;i<n;i++) {
6738  force[i]=tmpforce[i]+y[i];
6739  }
6740 
6741 }
6742 
static Node * Object()
Definition: Node.h:86
void doMarginCheck_SOA()
Definition: HomePatch.C:5639
void PatchDataSOA_set_buffer(PatchDataSOA *p, void *mybuffer)
Definition: HomePatch.C:2397
double * vel_y
Definition: NamdTypes.h:397
void depositMigration(MigrateAtomsMsg *)
Definition: HomePatch.C:5962
void copy(ResizeArray< Elem > &ra)
Definition: ResizeArray.h:59
void recvCheckpointLoad(CheckpointAtomsMsg *msg)
Definition: HomePatch.C:5301
BigReal zy
Definition: Tensor.h:19
template void settle1_SIMD< 1 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
void sendProxies()
Definition: HomePatch.C:509
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
Data * clientOpen(int count=1)
Definition: OwnerBox.h:58
int ib
Definition: Settle.h:58
int * lcpoTypeList
Definition: ProxyMgr.h:112
float q
Definition: CudaRecord.h:59
int numNodesWithPatches(void)
Definition: PatchMap.h:61
void runSequencer(void)
Definition: HomePatch.C:305
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
Definition: HomePatch.C:4382
void settle1_SOA(const double *__restrict ref_x, const double *__restrict ref_y, const double *__restrict ref_z, double *__restrict pos_x, double *__restrict pos_y, double *__restrict pos_z, int numWaters, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
Definition: Settle.C:1487
int size(void) const
Definition: ResizeArray.h:131
RealList intRad
Definition: Patch.h:162
void positionsReady_SOA(int doMigration=0)
Definition: HomePatch.C:971
int32 * isWater
Definition: NamdTypes.h:386
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
BigReal xz
Definition: Tensor.h:17
void registerProxy(RegisterProxyMsg *)
Definition: HomePatch.C:443
void updateAtomBuffers()
OwnerBox< Patch, Results > forceBox
Definition: Patch.h:246
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
int marginViolations
Definition: HomePatch.h:401
NAMD_HOST_DEVICE Position reverse_transform(Position data, const Transform &t) const
Definition: Lattice.h:143
void reserve(int i)
Definition: ResizeArray.h:88
#define BOLTZMANN
Definition: common.h:54
int32 atom2
Definition: structures.h:123
SortedArray< LSSSubsDat > & lssSubs(ComputeQMMgr *mgr)
Definition: ComputeQM.C:596
#define TINY
Definition: HomePatch.C:54
double * f_normal_z
Definition: NamdTypes.h:430
int flLen[Results::maxNumForces]
Definition: ProxyMgr.h:179
int32 * groupFixed
Definition: NamdTypes.h:394
BigReal max_a(int pid) const
Definition: PatchMap.h:92
const int * get_qmAtmIndx()
Definition: Molecule.h:863
double * f_normal_y
Definition: NamdTypes.h:429
Lattice & lattice
Definition: Patch.h:127
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
double * posNew_z
Definition: NamdTypes.h:453
static PatchMap * Object()
Definition: PatchMap.h:27
void clientRemove()
Definition: OwnerBox.h:91
void sendProxies(int pid, int *list, int n)
Definition: ProxyMgr.C:599
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:50
int32 * exclId
Definition: NamdTypes.h:391
int32 * unsortOrder
Definition: NamdTypes.h:389
double * f_saved_slow_z
Definition: NamdTypes.h:445
NAMD_HOST_DEVICE Tensor outer(const Vector &v1, const Vector &v2)
Definition: Tensor.h:241
double * f_slow_y
Definition: NamdTypes.h:435
CompAtom * velocityPtrEnd
Definition: Patch.h:209
int exchange_dst
Definition: HomePatch.h:514
Definition: Vector.h:72
int32 numhosts
Either 2 or 3 host atoms, depending on LP type.
Definition: structures.h:126
#define NAMD_SeparateWaters
DMK - Atom Separation (water vs. non-water)
Definition: common.h:204
SimParameters * simParameters
Definition: Node.h:181
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3319
int get_numQMAtoms()
Definition: Molecule.h:865
void sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
Definition: ProxyMgr.C:1159
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3788
float z
Definition: CudaRecord.h:59
MigrationList migrationList
void rattle2(const BigReal, Tensor *virial)
Definition: HomePatch.C:4249
int savePairlists
Definition: PatchTypes.h:41
double * f_global_z
Definition: NamdTypes.h:439
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
Definition: HomePatch.C:671
NAMD_HOST_DEVICE Position unscale(ScaledPosition s) const
Definition: Lattice.h:77
float Real
Definition: common.h:118
#define COULOMB
Definition: common.h:53
BigReal & item(int i)
Definition: ReductionMgr.h:336
void gbisComputeAfterP2()
Definition: HomePatch.C:4943
#define DebugM(x, y)
Definition: Debug.h:75
void MSHAKEIterate(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:830
void sendExchangeReq(int pid, int src)
Definition: PatchMgr.C:388
Real dihedral
Definition: structures.h:129
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
Real distance
Definition: structures.h:127
int32 atom3
Definition: structures.h:124
float x
Definition: CudaRecord.h:59
CompAtom * avgPositionPtrEnd
Definition: Patch.h:205
void receiveResults(ProxyResultVarsizeMsg *msg)
Definition: HomePatch.C:837
RealList dHdrPrefix
Definition: Patch.h:166
char const *const NamdProfileEventStr[]
int usePairlists
Definition: PatchTypes.h:40
Position position
Definition: NamdTypes.h:78
BigReal dsq
Definition: Settle.h:59
ExchangeAtomsMsg * exchange_msg
Definition: HomePatch.h:517
BigReal yz
Definition: Tensor.h:18
int32 maxAtoms
max number of atoms available, multiple of MAXFACTOR
Definition: NamdTypes.h:457
#define PROXY_DATA_PRIORITY
Definition: Priorities.h:40
CompAtomList v
Definition: Patch.h:156
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
virtual void boxClosed(int)
Definition: HomePatch.C:370
int inMigration
Definition: HomePatch.h:569
GBRealList psiFin
Definition: Patch.h:164
void doGroupSizeCheck()
Definition: HomePatch.C:5590
BigReal HGMatrixBigReal[MAXHGS][MAXHGS]
Definition: HomePatch.C:68
CompAtom * avgPositionList
Definition: ProxyMgr.h:104
void loweAndersenVelocities()
Definition: HomePatch.C:4866
#define iout
Definition: InfoStream.h:51
int doLoweAndersen
Definition: PatchTypes.h:28
double * f_saved_slow_y
Definition: NamdTypes.h:444
CompAtom * velocityList
Definition: ProxyMgr.h:107
float * langScalVelBBK2
derived from langevinParam
Definition: NamdTypes.h:419
void sortAtomsForCUDA(int *order, const FullAtom *atoms, int nfree, int n)
Definition: SortAtoms.C:123
int add(const Elem &elem)
Definition: ResizeArray.h:101
void recvCheckpointReq(int task, const char *key, int replica, int pe)
Definition: HomePatch.C:5271
CudaAtom * cudaAtomPtr
Definition: Patch.h:212
void exchangeCheckpoint(int scriptTask, int &bpc)
Definition: HomePatch.C:5263
AtomMapper * atomMapper
Definition: Patch.h:159
double * f_saved_slow_x
Definition: NamdTypes.h:443
template void settle1_SIMD< 2 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
float * gaussrand_y
Definition: NamdTypes.h:424
FullAtom * atoms
Definition: PatchMgr.h:89
GBRealList dEdaSum
Definition: Patch.h:167
Molecule stores the structural information for the system.
Definition: Molecule.h:174
double * pos_y
Definition: NamdTypes.h:378
void gbisP3Ready()
Definition: HomePatch.C:5007
double * velNew_z
Definition: NamdTypes.h:450
bool rattleListValid
Definition: HomePatch.h:453
double * fixedPosition_y
Definition: NamdTypes.h:401
void positionsReady(int doMigration=0)
Definition: HomePatch.C:1895
int32 * migrationGroupSize
Definition: NamdTypes.h:414
void recvCheckpointAck()
Definition: HomePatch.C:5365
int settle2(BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial)
Definition: Settle.C:1473
std::vector< RattleList > rattleList
Definition: HomePatch.h:449
bool gridForceIdxChecked
Definition: HomePatch.h:402
Definition: Patch.h:35
void patchLoad(PatchID id, int nAtoms, int timestep)
float * mass
Definition: NamdTypes.h:405
Flags flags
Definition: Patch.h:128
double * f_global_y
Definition: NamdTypes.h:438
void resize(int i)
Definition: ResizeArray.h:84
double * f_nbond_y
Definition: NamdTypes.h:432
int32 index
Definition: NamdTypes.h:300
void setall(const Elem &elem)
Definition: ResizeArray.h:94
uint32 id
Definition: NamdTypes.h:160
void revert(void)
Definition: HomePatch.C:5232
Charge charge
Definition: NamdTypes.h:79
int32 atom4
Definition: structures.h:125
void sendCheckpointAck(int pid, int dst, int dstpe)
Definition: PatchMgr.C:360
static ProxyNodeAwareSpanningTreeMsg * getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size)
Definition: ProxyMgr.C:196
void reorder(Elem *a, int n)
Definition: Random.h:234
void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist, BigReal &mO, BigReal &mH, BigReal &mOrmT, BigReal &mHrmT, BigReal &ra, BigReal &rb, BigReal &rc, BigReal &rra)
initialize cached water properties
Definition: Settle.C:46
NodeIDList tree
Definition: ProxyMgr.h:265
void doGroupSizeCheck_SOA()
Definition: HomePatch.C:5537
double * f_nbond_z
Definition: NamdTypes.h:433
void positionsReady_GPU(int doMigration=0, int startup=0)
int newVdWType
Definition: ComputeQM.h:33
#define PRIORITY_SIZE
Definition: Priorities.h:13
int32 * hydrogenGroupSize
Definition: NamdTypes.h:385
size_t numBytes
number of bytes allocated for soa_buffer
Definition: NamdTypes.h:455
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
Definition: common.h:228
Real r_ohc
Definition: Molecule.h:496
Lattice lattice
Definition: PatchMgr.h:109
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:4659
void updateAtomCount(const int n, const int reallocate)
double * f_normal_x
Definition: NamdTypes.h:428
void qmSwapAtoms()
Definition: HomePatch.C:907
BigReal HGArrayBigReal[MAXHGS]
Definition: HomePatch.C:66
float * langevinParam
Definition: NamdTypes.h:406
Definition: Random.h:37
float * gaussrand_x
fill with Gaussian distributed random numbers
Definition: NamdTypes.h:423
int const * getLcpoParamType()
Definition: Molecule.h:508
void awaken(void)
Definition: Sequencer.h:55
#define NAMD_EVENT_START(eon, id)
uint32_t uint32
Definition: common.h:42
int32 * vdwType
Definition: NamdTypes.h:382
int boxesOpen
Definition: Patch.h:250
#define NAMD_GROUP_FIXED_MASK
Definition: NamdTypes.h:146
IntList lcpoType
Definition: Patch.h:171
int32 * sortOrder
Definition: NamdTypes.h:388
void LINCS(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:936
static int compDistance(const void *a, const void *b)
Definition: HomePatch.C:497
void unregisterProxy(UnregisterProxyMsg *)
Definition: HomePatch.C:457
NAMD_HOST_DEVICE BigReal length(void) const
Definition: Vector.h:202
GBRealList psiSum
Definition: Patch.h:163
GBReal * dEdaSum
Definition: ProxyMgr.h:51
CudaAtom * cudaAtomList
Definition: ProxyMgr.h:123
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void gbisP2Ready()
Definition: Patch.C:598
ResizeArray< FullAtom > atoms
Definition: HomePatch.h:506
template void rattlePair< 1 >(const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, bool &consFailure)
int oneAwayNeighbors(int pid, PatchID *neighbor_ids=0)
Definition: PatchMap.C:532
void mollify(CompAtom *qtilde, const HGArrayVector &q0, const BigReal *lambda, HGArrayVector &force, const int n, const int m, const HGArrayBigReal &imass, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab)
Definition: HomePatch.C:6652
float * gaussrand_z
Definition: NamdTypes.h:425
zVector HGMatrixVector[MAXHGS][MAXHGS]
Definition: HomePatch.C:69
BigReal min_c(int pid) const
Definition: PatchMap.h:95
int doFullElectrostatics
Definition: PatchTypes.h:23
BigReal yx
Definition: Tensor.h:18
int32 migrationGroupSize
Definition: NamdTypes.h:230
std::map< std::string, checkpoint_t * > checkpoints
Definition: HomePatch.h:508
int numaway_c(void) const
Definition: PatchMap.h:70
double * vel_x
Jim recommends double precision velocity.
Definition: NamdTypes.h:396
int32 * id
Definition: NamdTypes.h:390
void submitLoadStats(int timestep)
Definition: HomePatch.C:5428
CompAtomList p_avg
Definition: Patch.h:154
void mollyMollify(Tensor *virial)
Definition: HomePatch.C:5159
int16 vdwType
Definition: NamdTypes.h:80
int numFepInitial
Definition: Molecule.h:643
float * charge
Definition: NamdTypes.h:381
int rattle1old(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3979
int plExtLen
Definition: ProxyMgr.h:121
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
Definition: Lattice.h:83
~HomePatch()
Definition: HomePatch.C:357
Force * f[maxNumForces]
Definition: PatchTypes.h:150
void buildRattleList()
Definition: HomePatch.C:3616
NodeID destNodeID
Definition: Migration.h:21
void clientClose(int count=1)
Definition: OwnerBox.h:62
CompAtomList p
Definition: Patch.h:153
void recvSpanningTree(int *t, int n)
Definition: HomePatch.C:677
LocalID localID(AtomID id)
Definition: AtomMap.h:78
Real * get_qmAtmChrg()
Definition: Molecule.h:862
NAMD_HOST_DEVICE BigReal length2(void) const
Definition: Vector.h:206
static Sync * Object()
Definition: Sync.h:52
int numAtoms
Definition: Patch.h:151
void run(void)
Definition: Sequencer.C:269
void replaceForces(ExtForce *f)
Definition: HomePatch.C:2310
std::vector< int > settleList
Definition: HomePatch.h:448
#define NAMD_ATOM_FIXED_MASK
Definition: NamdTypes.h:145
int32 * transform_i
Definition: NamdTypes.h:410
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
const_iterator const_begin(void) const
Definition: ResizeArray.h:39
PatchID getPatchID() const
Definition: Patch.h:114
void sendMigrationMsgs(PatchID, MigrationInfo *, int)
Definition: PatchMgr.C:175
int numaway_a(void) const
Definition: PatchMap.h:68
const Real * get_qmAtomGroup() const
Definition: Molecule.h:859
int maxForceUsed
Definition: PatchTypes.h:33
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
double * f_saved_nbond_x
Definition: NamdTypes.h:440
int sequence
Definition: PatchTypes.h:18
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
int32 * partition
Definition: NamdTypes.h:383
void gbisComputeAfterP1()
Definition: HomePatch.C:4915
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:95
int doNonbonded
Definition: PatchTypes.h:22
void NAMD_die(const char *err_msg)
Definition: common.C:147
void sendNodeAwareSpanningTree()
Definition: HomePatch.C:672
Force force
Definition: NamdTypes.h:307
void lubksb(HGMatrixBigReal &a, int n, HGArrayInt &indx, HGArrayBigReal &b)
Definition: HomePatch.C:6385
double * f_saved_nbond_z
Definition: NamdTypes.h:442
static LdbCoordinator * Object()
BigReal min_a(int pid) const
Definition: PatchMap.h:91
void setLcpoType()
Definition: HomePatch.C:4889
CompAtom * avgPositionPtrBegin
Definition: Patch.h:204
double * posNew_y
Definition: NamdTypes.h:452
PatchID pid
Definition: NamdTypes.h:299
int exchange_req
Definition: HomePatch.h:516
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
void doPairlistCheck()
Definition: HomePatch.C:5438
int32 NodeID
Definition: NamdTypes.h:289
int berendsenPressure_count
Definition: Sequencer.h:294
int32 nodeID
Definition: NamdTypes.h:327
static AtomMap * Object()
Definition: AtomMap.h:37
void gbisP3Ready()
Definition: Patch.C:614
void sortAtomsForCUDA_SOA(int *__restrict order, int *__restrict unorder, const double *__restrict ax, const double *__restrict ay, const double *__restrict az, int nfree, int n)
Definition: SortAtoms.C:317
void doAtomMigration()
Definition: HomePatch.C:5798
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
MigrateAtomsMsg * msgbuf[PatchMap::MaxOneAway]
Definition: HomePatch.h:571
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3410
float * rigidBondLength
Definition: NamdTypes.h:416
BigReal maxAtomMovement
Definition: PatchTypes.h:43
void PatchReady(void)
Definition: Sync.C:150
void saveForce(const int ftag=Results::normal)
Definition: HomePatch.C:2315
void setGBISIntrinsicRadii()
Definition: HomePatch.C:4900
std::vector< Vector > posNew
Definition: HomePatch.h:458
BigReal xx
Definition: Tensor.h:17
PatchID patch
Definition: ProxyMgr.h:97
void buildRattleList_SOA()
Definition: HomePatch.C:4520
int berendsenPressure_count
Definition: PatchMgr.h:87
int exchange_src
Definition: HomePatch.h:515
Flags flags
Definition: ProxyMgr.h:98
void gbisP2Ready()
Definition: HomePatch.C:4987
Real angle
Definition: structures.h:128
void sendSpanningTree()
Definition: HomePatch.C:700
void checkpoint(void)
Definition: HomePatch.C:5222
BigReal zz
Definition: Tensor.h:19
std::vector< RattleParam > rattleParam
Definition: HomePatch.h:450
void sendProxyData(ProxyDataMsg *, int, int *)
Definition: ProxyMgr.C:1562
void sendSpanningTree(ProxySpanningTreeMsg *)
Definition: ProxyMgr.C:1154
Real * intRadList
Definition: ProxyMgr.h:110
float y
Definition: CudaRecord.h:59
int32 * transform_k
Definition: NamdTypes.h:412
#define ASSERT(expr)
Definition: Debug.h:23
void suspend(void)
Definition: Sequencer.C:279
void sendCheckpointLoad(CheckpointAtomsMsg *msg, int dst, int dstpe)
Definition: PatchMgr.C:310
int32 * peIDs
Definition: NamdTypes.h:328
uint8 nonbondedGroupSize
Definition: NamdTypes.h:82
double * recipMass
derived from mass
Definition: NamdTypes.h:404
CompAtom * positionList
Definition: ProxyMgr.h:102
int HGArrayInt[MAXHGS]
Definition: HomePatch.C:65
Real newCharge
Definition: ComputeQM.h:34
#define simParams
Definition: Output.C:131
int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
optimized settle1 algorithm, reuses water properties as much as possible
Definition: Settle.C:63
RealList bornRad
Definition: Patch.h:165
void printOut(char *tag)
Definition: ProxyMgr.C:217
double * velNew_y
Definition: NamdTypes.h:449
int32 sortOrder
Definition: NamdTypes.h:153
double * fixedPosition_x
Definition: NamdTypes.h:400
void ludcmp(HGMatrixBigReal &a, int n, HGArrayInt &indx, BigReal *d)
Definition: HomePatch.C:6408
void sendProxyList(int pid, int *plist, int size)
Definition: ProxyMgr.C:1978
#define NAMD_EVENT_START_EX(eon, id, str)
iterator begin(void)
Definition: ResizeArray.h:36
BigReal max_b(int pid) const
Definition: PatchMap.h:94
double * pos_z
Definition: NamdTypes.h:379
void sendProxyAll(ProxyDataMsg *, int, int *)
Definition: ProxyMgr.C:1676
double * f_slow_x
Definition: NamdTypes.h:434
MigrationList mList
Definition: Migration.h:22
void clientAdd()
Definition: OwnerBox.h:77
ForceList * forceList[Results::maxNumForces]
Definition: ProxyMgr.h:168
void G_q(const HGArrayVector &refab, HGMatrixVector &gqij, const int n, const int m, const HGArrayInt &ial, const HGArrayInt &ibl)
Definition: HomePatch.C:6458
BigReal rmb
Definition: Settle.h:61
NodeID node
Definition: ProxyMgr.h:166
const PatchID patchID
Definition: Patch.h:150
Definition: Tensor.h:15
Elem * find(const Elem &elem)
Definition: SortedArray.h:94
void mollyAverage()
Definition: HomePatch.C:5085
int32 AtomID
Definition: NamdTypes.h:35
OwnerBox< Patch, GBReal > dEdaSumBox
Definition: Patch.h:236
BigReal xy
Definition: Tensor.h:17
iterator end(void)
Definition: ResizeArray.h:37
bool rattleListValid_SOA
Definition: HomePatch.h:454
double * pos_x
Definition: NamdTypes.h:377
int find(const Elem &e) const
Definition: ResizeArray.h:141
BigReal y
Definition: Vector.h:74
static float MassToRadius(Mass mi)
Definition: ComputeGBIS.inl:55
int flLen[Results::maxNumForces]
Definition: ProxyMgr.h:233
#define MAXHGS
Definition: HomePatch.C:55
Real * dHdrPrefix
Definition: ProxyMgr.h:59
int doLCPO
Definition: PatchTypes.h:31
void del(int index, int num=1)
Definition: ResizeArray.h:108
int numaway_b(void) const
Definition: PatchMap.h:69
int32 * transform_j
Definition: NamdTypes.h:411
BigReal rma
Definition: Settle.h:60
double * vel_z
Definition: NamdTypes.h:398
Mass mass
Definition: NamdTypes.h:218
static float MassToScreen(Mass mi)
BigReal yy
Definition: Tensor.h:18
int32 numPes
Definition: NamdTypes.h:329
void recvExchangeReq(int req)
Definition: HomePatch.C:5391
int ia
Definition: Settle.h:57
CompAtomExt * positionExtList
Definition: ProxyMgr.h:122
void buildSpanningTree(void)
Definition: HomePatch.C:715
#define TIMEFACTOR
Definition: common.h:55
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int32 * sigId
Definition: NamdTypes.h:392
int checkpoint_task
Definition: HomePatch.h:501
double * f_saved_nbond_y
Definition: NamdTypes.h:441
float Mass
Definition: ComputeGBIS.inl:20
BigReal patchDimension
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
double * f_nbond_x
Definition: NamdTypes.h:431
unsigned char * buffer
Definition: NamdTypes.h:375
void PatchDataSOA_initialize(PatchDataSOA *p)
Definition: HomePatch.C:2366
BigReal pairlistTolerance
Definition: PatchTypes.h:42
std::vector< Vector > velNew
Definition: HomePatch.h:457
void receiveResult(ProxyGBISP1ResultMsg *msg)
Definition: HomePatch.C:5030
double * f_global_x
Definition: NamdTypes.h:437
Lphost * get_lphost(int atomid) const
Definition: Molecule.h:1157
void rattleN(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:1359
Lattice lattice
Definition: PatchMgr.h:82
int doGBIS
Definition: PatchTypes.h:30
void MSHAKE_CUDA(int *, const int size, const RattleParam *rattleParam, BigReal *refx, BigReal *refy, BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
double * fixedPosition_z
Definition: NamdTypes.h:402
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void unregisterIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:100
zVector HGArrayVector[MAXHGS]
Definition: HomePatch.C:67
int newID
Definition: ComputeQM.h:32
int avgPlLen
Definition: ProxyMgr.h:103
int32 * status
Definition: NamdTypes.h:408
void registerPatch(int patchID, int numPes, int *pes)
Definition: ProxyMgr.C:1839
size_t PatchDataSOA_set_size(PatchDataSOA *p, int natoms, int padding)
Definition: HomePatch.C:2373
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3348
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3387
std::vector< int > noconstList
Definition: HomePatch.h:451
ForceList f[Results::maxNumForces]
Definition: Patch.h:214
float * langScalRandBBK2
from langevinParam and recipMass
Definition: NamdTypes.h:420
NAMD_HOST_DEVICE BigReal rlength(void) const
Definition: Vector.h:210
#define GB2_PROXY_DATA_PRIORITY
Definition: Priorities.h:58
void swap(ResizeArray< Elem > &ra)
Definition: ResizeArray.h:64
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
int proxySpanDim
Definition: ProxyMgr.C:47
#define RIGID_NONE
Definition: SimParameters.h:80
FullAtom * atoms
Definition: PatchMgr.h:112
void loweAndersenFinish()
Definition: HomePatch.C:4881
double * posNew_x
Definition: NamdTypes.h:451
int32 * atomFixed
Definition: NamdTypes.h:393
int32 * nonbondedGroupSize
Definition: NamdTypes.h:384
int isOpen()
Definition: OwnerBox.h:51
BigReal min_b(int pid) const
Definition: PatchMap.h:93
void addRattleForce(const BigReal invdt, Tensor &wc)
Definition: HomePatch.C:3778
int32 PatchID
Definition: NamdTypes.h:287
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
BigReal zx
Definition: Tensor.h:19
CompAtomExtList pExt
Definition: Patch.h:181
WaterModel
Definition: common.h:221
void sendCheckpointStore(CheckpointAtomsMsg *msg, int dst, int dstpe)
Definition: PatchMgr.C:335
Molecule * molecule
Definition: Node.h:179
BigReal maxGroupRadius
Definition: PatchTypes.h:44
void positionsReady(int n=0, int startup=1)
Definition: Patch.C:403
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
int numMlBuf
Definition: HomePatch.h:570
void sendCheckpointReq(int pid, int remote, const char *key, int task)
Definition: PatchMgr.C:272
void useSequencer(Sequencer *sequencerPtr)
Definition: HomePatch.C:301
void doMarginCheck()
Definition: HomePatch.C:5721
static PatchMgr * Object()
Definition: PatchMgr.h:152
OwnerBox< Patch, GBReal > psiSumBox
Definition: Patch.h:232
#define GB3_PROXY_DATA_PRIORITY
Definition: Priorities.h:66
double * f_slow_z
Definition: NamdTypes.h:436
Real r_om
Definition: Molecule.h:495
void sendExchangeMsg(ExchangeAtomsMsg *msg, int dst, int dstpe)
Definition: PatchMgr.C:419
int doMolly
Definition: PatchTypes.h:25
void recvCheckpointStore(CheckpointAtomsMsg *msg)
Definition: HomePatch.C:5351
void recvExchangeMsg(ExchangeAtomsMsg *msg)
Definition: HomePatch.C:5402
double * velNew_x
temp storage for rigid bond constraints
Definition: NamdTypes.h:448
double BigReal
Definition: common.h:123
int average(CompAtom *qtilde, const HGArrayVector &q, BigReal *lambda, const int n, const int m, const HGArrayBigReal &imass, const HGArrayBigReal &length2, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab, const BigReal tolf, const int ntrial)
Definition: HomePatch.C:6470
Transform transform
Definition: NamdTypes.h:229
int step
Definition: PatchTypes.h:16
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
CompAtom * velocityPtrBegin
Definition: Patch.h:208
int32 numAtoms
number of atoms
Definition: NamdTypes.h:456
int proxySendSpanning
Definition: ProxyMgr.C:44
void exchangeAtoms(int scriptTask)
Definition: HomePatch.C:5370