NAMD
Settle.C
Go to the documentation of this file.
1 
7 //#include "InfoStream.h"
8 #include "Settle.h"
9 #include <string.h>
10 #include <math.h>
11 #ifdef NAMD_MSHAKE
12 #include <Eigen/Dense>
13 #endif
14 #include "InfoStream.h"
15 #include <cmath>
16 //#include <charm++.h> // for CkPrintf
17 #include <iostream>
18 
19 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
20 #include <emmintrin.h> // SSE2
21 #if defined(__INTEL_COMPILER)
22 #define __align(X) __declspec(align(X) )
23 #elif defined(__PGI)
24 #define __align(X) __attribute__((aligned(X) ))
25 #define MISSING_mm_cvtsd_f64
26 #elif defined(__GNUC__)
27 #define __align(X) __attribute__((aligned(X) ))
28 #if (__GNUC__ < 4)
29 #define MISSING_mm_cvtsd_f64
30 #endif
31 #else
32 #define __align(X) __declspec(align(X) )
33 #endif
34 #endif
35 
36 //
37 // XXX static and global variables are unsafe for shared memory builds.
38 // The global and static vars should be eliminated.
39 // Unfortunately, the routines that use these below are actually
40 // in use in NAMD.
41 //
42 
43 // Initialize various properties of the waters
44 // settle1() assumes all waters are identical,
45 // and will generate bad results if they are not.
46 void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist,
47  BigReal &mO, BigReal &mH,
48  BigReal &mOrmT, BigReal &mHrmT, BigReal &ra,
49  BigReal &rb, BigReal &rc, BigReal &rra) {
50  BigReal rmT = 1.0 / (pmO+pmH+pmH);
51  mO = pmO;
52  mH = pmH;
53  mOrmT = pmO * rmT;
54  mHrmT = pmH * rmT;
55  BigReal t1 = 0.5*pmO/pmH;
56  rc = 0.5*hhdist;
57  ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
58  rb = t1*ra;
59  rra = 1.0 / ra;
60 }
61 
62 
63 int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt,
64  BigReal mOrmT, BigReal mHrmT, BigReal ra,
65  BigReal rb, BigReal rc, BigReal rra) {
66 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
67  // SSE acceleration of some of the costly parts of settle using
68  // the Intel C/C++ classes. This implementation uses the SSE units
69  // less efficiency than is potentially possible, but in order to do
70  // better, the settle algorithm will have to be vectorized and operate
71  // on multiple waters at a time. Doing so could give us the ability to
72  // do two (double precison) or four (single precision) waters at a time.
73  // This code achieves a modest speedup without the need to reorganize
74  // the NAMD structure. Once we have water molecules sorted in a single
75  // block we can do far better.
76 
77  // vectors in the plane of the original positions
78  Vector b0, c0;
79 
80  __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x); // ref0.y and ref0.x
81  __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x); // ref1.y and ref1.x
82 
83  __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
84  _mm_storeu_pd((double *) &b0.x, B0xy);
85  b0.z = ref[1].z - ref[0].z;
86 
87  __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x); // ref2.y and ref2.x
88 
89  __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
90  _mm_storeu_pd((double *) &c0.x, C0xy);
91  c0.z = ref[2].z - ref[0].z;
92 
93  // new center of mass
94  // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
95  __align(16) Vector a1;
96  __align(16) Vector b1;
97  __align(16) Vector c1;
98  __align(16) Vector d0;
99 
100  __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
101  __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
102  __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
103 
104  __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
105  __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
106  __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
107 
108  d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
109  a1.z = pos[0].z - d0.z;
110  b1.z = pos[1].z - d0.z;
111  c1.z = pos[2].z - d0.z;
112 
113  __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
114  _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
115 
116  __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
117  _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
118 
119  __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
120  _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
121 
122  _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
123 
124  // Vectors describing transformation from original coordinate system to
125  // the 'primed' coordinate system as in the diagram.
126  Vector n0 = cross(b0, c0);
127  Vector n1 = cross(a1, n0);
128  Vector n2 = cross(n0, n1);
129 #else
130  // vectors in the plane of the original positions
131  Vector b0 = ref[1]-ref[0];
132  Vector c0 = ref[2]-ref[0];
133 
134  // new center of mass
135  Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
136 
137  Vector a1 = pos[0] - d0;
138  Vector b1 = pos[1] - d0;
139  Vector c1 = pos[2] - d0;
140 
141  // Vectors describing transformation from original coordinate system to
142  // the 'primed' coordinate system as in the diagram.
143  Vector n0 = cross(b0, c0);
144  Vector n1 = cross(a1, n0);
145  Vector n2 = cross(n0, n1);
146 #endif
147 
148 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
149  __m128d l1 = _mm_set_pd(n0.x, n0.y);
150  l1 = _mm_mul_pd(l1, l1);
151  // n0.x^2 + n0.y^2
152  double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
153 
154  __m128d l3 = _mm_set_pd(n1.y, n1.z);
155  l3 = _mm_mul_pd(l3, l3);
156  // n1.y^2 + n1.z^2
157  double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
158 
159  __m128d l2 = _mm_set_pd(n1.x, n0.z);
160  // len(n1)^2 and len(n0)^2
161  __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
162 
163  __m128d l4 = _mm_set_pd(n2.x, n2.y);
164  l4 = _mm_mul_pd(l4, l4);
165  // n2.x^2 + n2.y^2
166  double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
167  double ts2 = l4xy2 + (n2.z * n2.z); // len(n2)^2
168 
169  double invlens[4];
170  // since rsqrt_nr() doesn't work with current compiler
171  // this is the next best option
172  static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
173 
174  // 1/len(n1) and 1/len(n0)
175  __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
176 
177  // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
178  _mm_storeu_pd(invlens, invlen12);
179 
180  n0 = n0 * invlens[0];
181 
182  // shuffle the order of operations around from the normal algorithm so
183  // that we can double pump sqrt() with n2 and cosphi at the same time
184  // these components are usually computed down in the canonical water block
185  BigReal A1Z = n0 * a1;
186  BigReal sinphi = A1Z * rra;
187  BigReal tmp = 1.0-sinphi*sinphi;
188 
189  __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
190  // invlens[2] = 1/len(n2), invlens[3] = cosphi
191  _mm_storeu_pd(invlens+2, n2cosphi);
192 
193  n1 = n1 * invlens[1];
194  n2 = n2 * (1.0 / invlens[2]);
195  BigReal cosphi = invlens[3];
196 
197  b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
198  c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
199 
200  b1 = Vector(n1*b1, n2*b1, n0*b1);
201  c1 = Vector(n1*c1, n2*c1, n0*c1);
202 
203  // now we can compute positions of canonical water
204  BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
205  tmp = 1.0-sinpsi*sinpsi;
206  BigReal cospsi = sqrt(tmp);
207 #else
208  n0 = n0.unit();
209  n1 = n1.unit();
210  n2 = n2.unit();
211 
212  b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
213  c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
214 
215  BigReal A1Z = n0 * a1;
216  b1 = Vector(n1*b1, n2*b1, n0*b1);
217  c1 = Vector(n1*c1, n2*c1, n0*c1);
218 
219  // now we can compute positions of canonical water
220  BigReal sinphi = A1Z * rra;
221  BigReal tmp = 1.0-sinphi*sinphi;
222  BigReal cosphi = sqrt(tmp);
223  BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
224  tmp = 1.0-sinpsi*sinpsi;
225  BigReal cospsi = sqrt(tmp);
226 #endif
227 
228  BigReal rbphi = -rb*cosphi;
229  BigReal tmp1 = rc*sinpsi*sinphi;
230  BigReal tmp2 = rc*sinpsi*cosphi;
231 
232  Vector a2(0, ra*cosphi, ra*sinphi);
233  Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
234  Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
235 
236  // there are no a0 terms because we've already subtracted the term off
237  // when we first defined b0 and c0.
238  BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
239  BigReal beta = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
240  BigReal gama = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
241 
242  BigReal a2b2 = alpha*alpha + beta*beta;
243  BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
244  BigReal costheta = sqrt(1.0 - sintheta*sintheta);
245 
246 #if 0
247  Vector a3( -a2.y*sintheta,
248  a2.y*costheta,
249  a2.z);
250  Vector b3(b2.x*costheta - b2.y*sintheta,
251  b2.x*sintheta + b2.y*costheta,
252  b2.z);
253  Vector c3(c2.x*costheta - c2.y*sintheta,
254  c2.x*sintheta + c2.y*costheta,
255  c2.z);
256 
257 #else
258  Vector a3( -a2.y*sintheta,
259  a2.y*costheta,
260  A1Z);
261  Vector b3(b2.x*costheta - b2.y*sintheta,
262  b2.x*sintheta + b2.y*costheta,
263  b1.z);
264  Vector c3(-b2.x*costheta - c2.y*sintheta,
265  -b2.x*sintheta + c2.y*costheta,
266  c1.z);
267 
268 #endif
269 
270  // undo the transformation; generate new normal vectors from the transpose.
271  Vector m1(n1.x, n2.x, n0.x);
272  Vector m2(n1.y, n2.y, n0.y);
273  Vector m0(n1.z, n2.z, n0.z);
274 
275  pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
276  pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
277  pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
278 
279  // dt can be negative during startup!
280  if (invdt != 0) {
281  vel[0] = (pos[0]-ref[0])*invdt;
282  vel[1] = (pos[1]-ref[1])*invdt;
283  vel[2] = (pos[2]-ref[2])*invdt;
284  }
285 
286  return 0;
287 }
288 
289 //
290 // Settle multiple waters using SIMD
291 //
292 template <int veclen>
293 void settle1_SIMD(const Vector *ref, Vector *pos,
294  BigReal mOrmT, BigReal mHrmT, BigReal ra,
295  BigReal rb, BigReal rc, BigReal rra) {
296 
297  BigReal ref0xt[veclen];
298  BigReal ref0yt[veclen];
299  BigReal ref0zt[veclen];
300  BigReal ref1xt[veclen];
301  BigReal ref1yt[veclen];
302  BigReal ref1zt[veclen];
303  BigReal ref2xt[veclen];
304  BigReal ref2yt[veclen];
305  BigReal ref2zt[veclen];
306 
307  BigReal pos0xt[veclen];
308  BigReal pos0yt[veclen];
309  BigReal pos0zt[veclen];
310  BigReal pos1xt[veclen];
311  BigReal pos1yt[veclen];
312  BigReal pos1zt[veclen];
313  BigReal pos2xt[veclen];
314  BigReal pos2yt[veclen];
315  BigReal pos2zt[veclen];
316 
317  for (int i=0;i < veclen;i++) {
318  ref0xt[i] = ref[i*3+0].x;
319  ref0yt[i] = ref[i*3+0].y;
320  ref0zt[i] = ref[i*3+0].z;
321  ref1xt[i] = ref[i*3+1].x;
322  ref1yt[i] = ref[i*3+1].y;
323  ref1zt[i] = ref[i*3+1].z;
324  ref2xt[i] = ref[i*3+2].x;
325  ref2yt[i] = ref[i*3+2].y;
326  ref2zt[i] = ref[i*3+2].z;
327 
328  pos0xt[i] = pos[i*3+0].x;
329  pos0yt[i] = pos[i*3+0].y;
330  pos0zt[i] = pos[i*3+0].z;
331  pos1xt[i] = pos[i*3+1].x;
332  pos1yt[i] = pos[i*3+1].y;
333  pos1zt[i] = pos[i*3+1].z;
334  pos2xt[i] = pos[i*3+2].x;
335  pos2yt[i] = pos[i*3+2].y;
336  pos2zt[i] = pos[i*3+2].z;
337  }
338 
339 #pragma omp simd
340  for (int i=0;i < veclen;i++) {
341 
342  BigReal ref0x = ref0xt[i];
343  BigReal ref0y = ref0yt[i];
344  BigReal ref0z = ref0zt[i];
345  BigReal ref1x = ref1xt[i];
346  BigReal ref1y = ref1yt[i];
347  BigReal ref1z = ref1zt[i];
348  BigReal ref2x = ref2xt[i];
349  BigReal ref2y = ref2yt[i];
350  BigReal ref2z = ref2zt[i];
351 
352  BigReal pos0x = pos0xt[i];
353  BigReal pos0y = pos0yt[i];
354  BigReal pos0z = pos0zt[i];
355  BigReal pos1x = pos1xt[i];
356  BigReal pos1y = pos1yt[i];
357  BigReal pos1z = pos1zt[i];
358  BigReal pos2x = pos2xt[i];
359  BigReal pos2y = pos2yt[i];
360  BigReal pos2z = pos2zt[i];
361 
362  // vectors in the plane of the original positions
363  BigReal b0x = ref1x - ref0x;
364  BigReal b0y = ref1y - ref0y;
365  BigReal b0z = ref1z - ref0z;
366 
367  BigReal c0x = ref2x - ref0x;
368  BigReal c0y = ref2y - ref0y;
369  BigReal c0z = ref2z - ref0z;
370 
371  // new center of mass
372  BigReal d0x = pos0x*mOrmT + ((pos1x + pos2x)*mHrmT);
373  BigReal d0y = pos0y*mOrmT + ((pos1y + pos2y)*mHrmT);
374  BigReal d0z = pos0z*mOrmT + ((pos1z + pos2z)*mHrmT);
375 
376  BigReal a1x = pos0x - d0x;
377  BigReal a1y = pos0y - d0y;
378  BigReal a1z = pos0z - d0z;
379 
380  BigReal b1x = pos1x - d0x;
381  BigReal b1y = pos1y - d0y;
382  BigReal b1z = pos1z - d0z;
383 
384  BigReal c1x = pos2x - d0x;
385  BigReal c1y = pos2y - d0y;
386  BigReal c1z = pos2z - d0z;
387 
388  // Vectors describing transformation from original coordinate system to
389  // the 'primed' coordinate system as in the diagram.
390  // n0 = b0 x c0
391  BigReal n0x = b0y*c0z-c0y*b0z;
392  BigReal n0y = c0x*b0z-b0x*c0z;
393  BigReal n0z = b0x*c0y-c0x*b0y;
394 
395  // n1 = a1 x n0
396  BigReal n1x = a1y*n0z-n0y*a1z;
397  BigReal n1y = n0x*a1z-a1x*n0z;
398  BigReal n1z = a1x*n0y-n0x*a1y;
399 
400  // n2 = n0 x n1
401  BigReal n2x = n0y*n1z-n1y*n0z;
402  BigReal n2y = n1x*n0z-n0x*n1z;
403  BigReal n2z = n0x*n1y-n1x*n0y;
404 
405  // Normalize n0
406  BigReal n0inv = 1.0/sqrt(n0x*n0x + n0y*n0y + n0z*n0z);
407  n0x *= n0inv;
408  n0y *= n0inv;
409  n0z *= n0inv;
410 
411  BigReal n1inv = 1.0/sqrt(n1x*n1x + n1y*n1y + n1z*n1z);
412  n1x *= n1inv;
413  n1y *= n1inv;
414  n1z *= n1inv;
415 
416  BigReal n2inv = 1.0/sqrt(n2x*n2x + n2y*n2y + n2z*n2z);
417  n2x *= n2inv;
418  n2y *= n2inv;
419  n2z *= n2inv;
420 
421  //b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
422  BigReal n1b0 = n1x*b0x + n1y*b0y + n1z*b0z;
423  BigReal n2b0 = n2x*b0x + n2y*b0y + n2z*b0z;
424 
425  //c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
426  BigReal n1c0 = n1x*c0x + n1y*c0y + n1z*c0z;
427  BigReal n2c0 = n2x*c0x + n2y*c0y + n2z*c0z;
428 
429  BigReal A1Z = n0x*a1x + n0y*a1y + n0z*a1z;
430 
431  //b1 = Vector(n1*b1, n2*b1, n0*b1);
432  BigReal n1b1 = n1x*b1x + n1y*b1y + n1z*b1z;
433  BigReal n2b1 = n2x*b1x + n2y*b1y + n2z*b1z;
434  BigReal n0b1 = n0x*b1x + n0y*b1y + n0z*b1z;
435 
436  //c1 = Vector(n1*c1, n2*c1, n0*c1);
437  BigReal n1c1 = n1x*c1x + n1y*c1y + n1z*c1z;
438  BigReal n2c1 = n2x*c1x + n2y*c1y + n2z*c1z;
439  BigReal n0c1 = n0x*c1x + n0y*c1y + n0z*c1z;
440 
441  // now we can compute positions of canonical water
442  BigReal sinphi = A1Z * rra;
443  BigReal tmp = 1.0-sinphi*sinphi;
444  BigReal cosphi = sqrt(tmp);
445  BigReal sinpsi = (n0b1 - n0c1)/(2.0*rc*cosphi);
446  tmp = 1.0-sinpsi*sinpsi;
447  BigReal cospsi = sqrt(tmp);
448 
449  BigReal rbphi = -rb*cosphi;
450  BigReal tmp1 = rc*sinpsi*sinphi;
451  BigReal tmp2 = rc*sinpsi*cosphi;
452 
453  //Vector a2(0, ra*cosphi, ra*sinphi);
454  BigReal a2y = ra*cosphi;
455 
456  //Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
457  BigReal b2x = -rc*cospsi;
458  BigReal b2y = rbphi - tmp1;
459 
460  //Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
461  BigReal c2y = rbphi + tmp1;
462 
463  // there are no a0 terms because we've already subtracted the term off
464  // when we first defined b0 and c0.
465  BigReal alpha = b2x*(n1b0 - n1c0) + n2b0*b2y + n2c0*c2y;
466  BigReal beta = b2x*(n2c0 - n2b0) + n1b0*b2y + n1c0*c2y;
467  BigReal gama = n1b0*n2b1 - n1b1*n2b0 + n1c0*n2c1 - n1c1*n2c0;
468 
469  BigReal a2b2 = alpha*alpha + beta*beta;
470  BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
471  BigReal costheta = sqrt(1.0 - sintheta*sintheta);
472 
473  //Vector a3( -a2y*sintheta,
474  // a2y*costheta,
475  // A1Z);
476  BigReal a3x = -a2y*sintheta;
477  BigReal a3y = a2y*costheta;
478  BigReal a3z = A1Z;
479 
480  // Vector b3(b2x*costheta - b2y*sintheta,
481  // b2x*sintheta + b2y*costheta,
482  // n0b1);
483  BigReal b3x = b2x*costheta - b2y*sintheta;
484  BigReal b3y = b2x*sintheta + b2y*costheta;
485  BigReal b3z = n0b1;
486 
487  // Vector c3(-b2x*costheta - c2y*sintheta,
488  // -b2x*sintheta + c2y*costheta,
489  // n0c1);
490  BigReal c3x = -b2x*costheta - c2y*sintheta;
491  BigReal c3y = -b2x*sintheta + c2y*costheta;
492  BigReal c3z = n0c1;
493 
494  // undo the transformation; generate new normal vectors from the transpose.
495  // Vector m1(n1.x, n2.x, n0.x);
496  BigReal m1x = n1x;
497  BigReal m1y = n2x;
498  BigReal m1z = n0x;
499 
500  // Vector m2(n1.y, n2.y, n0.y);
501  BigReal m2x = n1y;
502  BigReal m2y = n2y;
503  BigReal m2z = n0y;
504 
505  // Vector m0(n1.z, n2.z, n0.z);
506  BigReal m0x = n1z;
507  BigReal m0y = n2z;
508  BigReal m0z = n0z;
509 
510  //pos[i*3+0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
511  pos0x = a3x*m1x + a3y*m1y + a3z*m1z + d0x;
512  pos0y = a3x*m2x + a3y*m2y + a3z*m2z + d0y;
513  pos0z = a3x*m0x + a3y*m0y + a3z*m0z + d0z;
514 
515  // pos[i*3+1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
516  pos1x = b3x*m1x + b3y*m1y + b3z*m1z + d0x;
517  pos1y = b3x*m2x + b3y*m2y + b3z*m2z + d0y;
518  pos1z = b3x*m0x + b3y*m0y + b3z*m0z + d0z;
519 
520  // pos[i*3+2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
521  pos2x = c3x*m1x + c3y*m1y + c3z*m1z + d0x;
522  pos2y = c3x*m2x + c3y*m2y + c3z*m2z + d0y;
523  pos2z = c3x*m0x + c3y*m0y + c3z*m0z + d0z;
524 
525  pos0xt[i] = pos0x;
526  pos0yt[i] = pos0y;
527  pos0zt[i] = pos0z;
528  pos1xt[i] = pos1x;
529  pos1yt[i] = pos1y;
530  pos1zt[i] = pos1z;
531  pos2xt[i] = pos2x;
532  pos2yt[i] = pos2y;
533  pos2zt[i] = pos2z;
534  }
535 
536  for (int i=0;i < veclen;i++) {
537  pos[i*3+0].x = pos0xt[i];
538  pos[i*3+0].y = pos0yt[i];
539  pos[i*3+0].z = pos0zt[i];
540  pos[i*3+1].x = pos1xt[i];
541  pos[i*3+1].y = pos1yt[i];
542  pos[i*3+1].z = pos1zt[i];
543  pos[i*3+2].x = pos2xt[i];
544  pos[i*3+2].y = pos2yt[i];
545  pos[i*3+2].z = pos2zt[i];
546  }
547 
548 }
549 
550 //
551 // Rattle pair of atoms
552 //
553 template <int veclen>
554 void rattlePair(const RattleParam* rattleParam,
555  const BigReal *refx, const BigReal *refy, const BigReal *refz,
556  BigReal *posx, BigReal *posy, BigReal *posz, bool& consFailure) {
557 
558  int a = rattleParam[0].ia;
559  int b = rattleParam[0].ib;
560  BigReal pabx = posx[a] - posx[b];
561  BigReal paby = posy[a] - posy[b];
562  BigReal pabz = posz[a] - posz[b];
563  BigReal pabsq = pabx*pabx + paby*paby + pabz*pabz;
564  BigReal rabsq = rattleParam[0].dsq;
565  BigReal diffsq = rabsq - pabsq;
566  BigReal rabx = refx[a] - refx[b];
567  BigReal raby = refy[a] - refy[b];
568  BigReal rabz = refz[a] - refz[b];
569 
570  BigReal refsq = rabx*rabx + raby*raby + rabz*rabz;
571 
572  BigReal rpab = rabx*pabx + raby*paby + rabz*pabz;
573 
574  BigReal rma = rattleParam[0].rma;
575  BigReal rmb = rattleParam[0].rmb;
576 
577  BigReal gab;
578  BigReal sqrtarg = rpab*rpab + refsq*diffsq;
579  if ( sqrtarg < 0. ) {
580  consFailure = true;
581  gab = 0.;
582  } else {
583  consFailure = false;
584  gab = (-rpab + sqrt(sqrtarg))/(refsq*(rma + rmb));
585  }
586 
587  BigReal dpx = rabx * gab;
588  BigReal dpy = raby * gab;
589  BigReal dpz = rabz * gab;
590  posx[a] += rma * dpx;
591  posy[a] += rma * dpy;
592  posz[a] += rma * dpz;
593  posx[b] -= rmb * dpx;
594  posy[b] -= rmb * dpy;
595  posz[b] -= rmb * dpz;
596 
597 }
598 #if 0
599 void iterate(const int icnt, const RattleParam* rattleParam,
600  const BigReal *refx, const BigReal *refy, const BigReal *refz,
601  BigReal *posx, BigReal *posy, BigReal *posz,
602  const BigReal tol2, const int maxiter,
603  bool& done, bool& consFailure)
604 {
605 
606  using namespace Eigen;
607  #ifdef SHORTREALS
608  typedef VectorXf VectorXr;
609  typedef MatrixXf MatrixXr;
610  #else
611  typedef VectorXd VectorXr;
612  typedef MatrixXd MatrixXr;
613  #endif
614  VectorXr sigma(icnt), lambda(icnt), ptmp(3*icnt), rtmp(3*icnt);
615  MatrixXr A(icnt, icnt);
616  register int loop;
617  consFailure = false;
618  PartialPivLU<MatrixXr> lu;
619  bool constructMatrix = true;
620  for(loop = 0; loop < maxiter; ++loop)
621  {
622  done = true;
623  for(int i = 0; i < icnt; ++i)
624  {
625  int a = rattleParam[i].ia;
626  int b = rattleParam[i].ib;
627  BigReal pabx = posx[a] - posx[b];
628  BigReal paby = posy[a] - posy[b];
629  BigReal pabz = posz[a] - posz[b];
630  ptmp(3*i) = pabx;
631  ptmp(3*i+1) = paby;
632  ptmp(3*i+2) = pabz;
633  BigReal pabsq = pabx*pabx + paby*paby + pabz*pabz;
634  BigReal rabsq = rattleParam[i].dsq;
635  BigReal diffsq = pabsq - rabsq;
636  sigma(i) = diffsq;
637  if ( fabs(diffsq) > (rabsq * tol2) )
638  done = false;
639  }
640  if(!done)
641  {
642  if(constructMatrix)
643  {
644  for(int i = 0; i < icnt; ++i)
645  {
646  int a = rattleParam[i].ia;
647  int b = rattleParam[i].ib;
648  rtmp[3*i] = refx[a] - refx[b];
649  rtmp[3*i+1] = refy[a] - refy[b];
650  rtmp[3*i+2] = refz[a] - refz[b];
651  }
652  for(int i = 0; i < icnt; ++i)
653  {
654  //int a = rattleParam[i].ia;
655  //int b = rattleParam[i].ib;
656  BigReal pabx = ptmp[3*i];
657  BigReal paby = ptmp[3*i+1];
658  BigReal pabz = ptmp[3*i+2];
659  BigReal rma = rattleParam[i].rma;
660  BigReal rmb = rattleParam[i].rmb;
661 
662  for(int j = 0; j < icnt; ++j)
663  {
664  //int c = rattleParam[j].ia;
665  //int d = rattleParam[j].ib;
666  BigReal rabx = rtmp[3*j];
667  BigReal raby = rtmp[3*j+1];
668  BigReal rabz = rtmp[3*j+2];
669  if(i==j)
670  A(i,i) = 2.*(pabx*rabx+paby*raby+pabz*rabz)*(rma+rmb);
671  else
672  A(i,j) = 2.*(pabx*rabx+paby*raby+pabz*rabz)*rma;
673  }
674  }
675  //sigma = A.transpose()*sigma;
676  //A = A.transpose() * A;
677  //LLT<MatrixXr> llt;
678  lu.compute(A);
679  constructMatrix = false;
680  }
681  //sigma = A.transpose()*sigma;
682  //lambda = A.llt().solve(sigma);
683  lambda = lu.solve(sigma);
684  //lambda = A.lu().solve(sigma);
685  for(int i = 0; i < icnt; ++i)
686  {
687  int a = rattleParam[i].ia;
688  int b = rattleParam[i].ib;
689  BigReal rma = rattleParam[i].rma * lambda(i);
690  BigReal rmb = rattleParam[i].rmb * lambda(i);
691 
692  BigReal rabx = rtmp[3*i];
693  BigReal raby = rtmp[3*i+1];
694  BigReal rabz = rtmp[3*i+2];
695  posx[a] -= rma * rabx;
696  posy[a] -= rma * raby;
697  posz[a] -= rma * rabz;
698  posx[b] += rmb * rabx;
699  posy[b] += rmb * raby;
700  posz[b] += rmb * rabz;
701  }
702  }
703  else
704  break;
705  }
706  if(loop >= maxiter)
707  consFailure = true;
708 }
709 #endif
710 #if !(defined(__CUDACC__) || defined(__HIPCC__))
711 static inline BigReal det_3by3(BigReal A[4][4])
712 {
713  return
714  A[0][0]*(A[1][1]*A[2][2]-A[1][2]*A[2][1])-
715  A[0][1]*(A[1][0]*A[2][2]-A[1][2]*A[2][0])+
716  A[0][2]*(A[1][0]*A[2][1]-A[1][1]*A[2][0]);
717 }
718 
719 static void swap_row(BigReal A[4][4], BigReal b[4], int r1, int r2)
720 {
721  #pragma unroll
722  for (int i = 0; i < 4; i++)
723  {
724  BigReal* p1 = &A[r1][i];
725  BigReal* p2 = &A[r2][i];
726  BigReal tmp = *p1;
727  *p1 = *p2;
728  *p2 = tmp;
729  }
730  BigReal tmp;
731  tmp = b[r1];
732  b[r1] = b[r2];
733  b[r2] = tmp;
734 }
735 
736 static void solve_4by4(BigReal lambda [4], BigReal A[4][4], BigReal sigma[4])
737 {
738 
739  #pragma unroll
740  for (int k = 0; k < 4; ++k)
741  {
742  #ifdef PIVOT
743  int piv_row = k;
744  BigReal Max = A[k][k];
745 
746  for (int row = k + 1; row < 4; ++row)
747  {
748  if ((tmp = fabs(A(row, k))) > Max)
749  {
750  piv_row = row;
751  Max = tmp;
752  }
753  }
754  if(k != piv_row)
755  swap_row(A, sigma, k, piv_row);
756  #endif
757  for (int row = k + 1; row < 4; ++row)
758  {
759  BigReal tmp = A[row][k]/ A[k][k];
760  for (int col = k+1; col < 4; col++)
761  A[row][col] -= tmp * A[k][col];
762  A[row][k] = 0.;
763  sigma[row]-= tmp * sigma[k];
764  }
765  }
766  for (int row = 3; row >= 0; --row)
767  {
768  BigReal tmp = sigma[row];
769  for (int j = 3; j > row; --j)
770  tmp -= lambda[j] * A[row][j];
771  lambda[row] = tmp / A[row][row];
772  }
773 }
774 
775 static void solveMatrix(BigReal lambda [4], BigReal A[4][4], BigReal sigma[4], int icnt)
776 {
777  switch(icnt)
778  {
779  case 1:
780  {
781  lambda[0] = sigma[0]/A[0][0];
782  break;
783  }
784  case 2:
785  {
786 
787  BigReal det=1./(A[0][0]*A[1][1]-A[0][1]*A[1][0]);
788  lambda[0] = ( A[1][1]*sigma[0]-A[0][1]*sigma[1])*det;
789  lambda[1] = (-A[1][0]*sigma[0]+A[0][0]*sigma[1])*det;
790  break;
791  }
792  case 3:
793  {
794  BigReal det = 1./det_3by3(A);
795  lambda[0] = det*((A[1][1]*A[2][2]-A[1][2]*A[2][1])*sigma[0]-
796  (A[0][1]*A[2][2]-A[0][2]*A[2][1])*sigma[1]+
797  (A[0][1]*A[1][2]-A[0][2]*A[1][1])*sigma[2]);
798 
799  lambda[1] = det*((A[1][2]*A[2][0]-A[1][0]*A[2][2])*sigma[0]+
800  (A[0][0]*A[2][2]-A[0][2]*A[2][0])*sigma[1]-
801  (A[0][0]*A[1][2]-A[0][2]*A[1][0])*sigma[2]);
802 
803  lambda[2] = det*((A[1][0]*A[2][1]-A[1][1]*A[2][0])*sigma[0]-
804  (A[0][0]*A[2][1]-A[0][1]*A[2][0])*sigma[1]+
805  (A[0][0]*A[1][1]-A[0][1]*A[1][0])*sigma[2]);
806  break;
807  }
808  case 4:
809  {
810  solve_4by4(lambda, A, sigma);
811  break;
812  }
813  }
814 }
815 
816 static void solveFullInverse(BigReal A[4][4], BigReal S[4][4], int icnt)
817 {
818  BigReal x[4];
819  for(int i = 0; i < icnt; ++i)
820  {
821  BigReal b[4] = {0, 0, 0, 0};
822  b[i] = 1.;
823  solveMatrix(x, S, b, icnt);
824  #pragma unroll
825  for(int j = 0; j < 4; ++j)
826  A[j][i] = x[j];
827  }
828 }
829 
830 void MSHAKEIterate(const int icnt, const RattleParam* rattleParam,
831  const BigReal *refx, const BigReal *refy, const BigReal *refz,
832  BigReal *posx, BigReal *posy, BigReal *posz,
833  const BigReal tol2, const int maxiter,
834  bool& done, bool& consFailure)
835 {
836  BigReal sigma[4], lambda[4];
837  BigReal A[4][4];
838 
839  BigReal pabx[4];
840  BigReal rabx[4];
841  BigReal paby[4];
842  BigReal raby[4];
843  BigReal pabz[4];
844  BigReal rabz[4];
845  register int loop;
846  consFailure = false;
847  done = true;
848  #pragma unroll
849  for(int i = 0; i < 4; ++i)
850  {
851  int a = rattleParam[i].ia;
852  int b = rattleParam[i].ib;
853  pabx[i] = posx[a] - posx[b];
854  paby[i] = posy[a] - posy[b];
855  pabz[i] = posz[a] - posz[b];
856  rabx[i] = refx[a] - refx[b];
857  raby[i] = refy[a] - refy[b];
858  rabz[i] = refz[a] - refz[b];
859  }
860  #pragma unroll
861  for(int i = 0; i < 4; ++i)
862  {
863  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
864  BigReal rabsq = rattleParam[i].dsq;
865  BigReal diffsq = pabsq - rabsq;
866  sigma[i] = diffsq;
867  if ( fabs(diffsq) > (rabsq * tol2) && i < icnt)
868  done = false;
869  }
870  for(loop = 0; loop < maxiter; ++loop)
871  {
872  if(!done)
873  {
874  //construct A
875  #pragma unroll
876  for(int j = 0; j < 4; ++j)
877  {
878  BigReal rma = rattleParam[j].rma;
879  #pragma unroll
880  for(int i = 0; i < 4; ++i)
881  {
882  A[j][i] = 2.*(pabx[j]*rabx[i]+paby[j]*raby[i]+pabz[j]*rabz[i])*rma;
883  }
884  BigReal rmb = rattleParam[j].rmb;
885  A[j][j] += 2.*(pabx[j]*rabx[j]+paby[j]*raby[j]+pabz[j]*rabz[j])*rmb;
886  lambda[j] = 0.;
887  }
888  lambda[0] = 0;
889  lambda[1] = 0;
890  lambda[2] = 0;
891  lambda[3] = 0;
892  solveMatrix(lambda, A, sigma, icnt);
893  #pragma unroll
894  for(int i = 0; i < 4; ++i)
895  {
896  int a = rattleParam[i].ia;
897  int b = rattleParam[i].ib;
898  BigReal rma = rattleParam[i].rma * lambda[i];
899  BigReal rmb = rattleParam[i].rmb * lambda[i];
900 
901  posx[a] -= rma * rabx[i];
902  posy[a] -= rma * raby[i];
903  posz[a] -= rma * rabz[i];
904  posx[b] += rmb * rabx[i];
905  posy[b] += rmb * raby[i];
906  posz[b] += rmb * rabz[i];
907 
908  }
909  }
910  else
911  break;
912  done = true;
913  #pragma unroll
914  for(int i = 0; i < 4; ++i)
915  {
916  int a = rattleParam[i].ia;
917  int b = rattleParam[i].ib;
918  pabx[i] = posx[a] - posx[b];
919  paby[i] = posy[a] - posy[b];
920  pabz[i] = posz[a] - posz[b];
921  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
922  BigReal rabsq = rattleParam[i].dsq;
923  BigReal diffsq = pabsq - rabsq;
924  sigma[i] = diffsq;
925  if ( fabs(diffsq) > (rabsq * tol2) && i < icnt)
926  done = false;
927  }
928  }
929  if(loop == maxiter)
930  {
931  consFailure = true;
932  }
933 }
934 
935 #endif
936 void LINCS(const int icnt, const RattleParam* rattleParam,
937  const BigReal *refx, const BigReal *refy, const BigReal *refz,
938  BigReal *posx, BigReal *posy, BigReal *posz,
939  const BigReal tol2, const int maxiter,
940  bool& done, bool& consFailure)
941 {
942  BigReal pabx[4];
943  BigReal rabx[4];
944  BigReal paby[4];
945  BigReal raby[4];
946  BigReal pabz[4];
947  BigReal rabz[4];
948  BigReal drab[4];
949 
950  //check each constraint
951  consFailure = false;
952  done = true;
953  int iter = 0;
954  #pragma unroll
955  for(int i = 0; i < 4; ++i)
956  {
957  int a = rattleParam[i].ia;
958  int b = rattleParam[i].ib;
959  pabx[i] = posx[a] - posx[b];
960  paby[i] = posy[a] - posy[b];
961  pabz[i] = posz[a] - posz[b];
962  rabx[i] = refx[a] - refx[b];
963  raby[i] = refy[a] - refy[b];
964  rabz[i] = refz[a] - refz[b];
965  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
966  BigReal rabsq = rattleParam[i].dsq;
967  if(i < icnt)
968  drab[i] = 1./sqrt(rabx[i]*rabx[i]+raby[i]*raby[i]+rabz[i]*rabz[i]);
969  else
970  drab[i] = 0;
971  BigReal diffsq = pabsq - rabsq;
972  if ( fabs(diffsq) > (rabsq * tol2) && i < icnt)
973  done = false;
974  }
975  if(!done)
976  {
977  BigReal S[4][4];
978  BigReal A[4][4];
979  //construct S
980  #pragma unroll
981  for(int i = 0; i < 16; ++i)
982  {
983  int idy = i >> 2;
984  int idx = i - (idy << 2);
985  S[idy][idx] = (rabx[idx]*rabx[idy]+raby[idx]*raby[idy]+rabz[idx]*rabz[idy])*rattleParam[idx].rma
986  *drab[idx]*drab[idy];
987  }
988  BigReal r_unc[4];
989  #pragma unroll
990  for(int i = 0; i < 4; ++i)
991  {
992  if(i < icnt)
993  S[i][i] += rattleParam[i].rmb/rattleParam[i].rma*S[i][i];
994  r_unc[i] = (pabx[i]*rabx[i]+paby[i]*raby[i]+pabz[i]*rabz[i])*drab[i] -sqrt(rattleParam[i].dsq);//- 1./drab[i];
995  }
996  BigReal lambda[4] = {0,0,0,0};
997  #pragma unroll
998  for(int i = 0; i < 4; ++i)
999  #pragma unroll
1000  for(int j = 0; j < 4; ++j)
1001  A[i][j]=0;
1002  solveFullInverse(A, S, icnt);
1003  #pragma unroll
1004  for(int i = 0; i < 4; ++i)
1005  #pragma unroll
1006  for(int j = 0; j < 4; ++j)
1007  lambda[i] += A[i][j] * r_unc[j];
1008  #pragma unroll
1009  for(int i = 0; i < 4; ++i)
1010  {
1011  int a = rattleParam[i].ia;
1012  int b = rattleParam[i].ib;
1013  BigReal k = lambda[i];
1014  BigReal rma = rattleParam[i].rma*k*drab[i];
1015  BigReal rmb = rattleParam[i].rmb*k*drab[i];
1016  posx[a] = posx[a] - rabx[i]*rma;
1017  posy[a] = posy[a] - raby[i]*rma;
1018  posz[a] = posz[a] - rabz[i]*rma;
1019  posx[b] = posx[b] + rabx[i]*rmb;
1020  posy[b] = posy[b] + raby[i]*rmb;
1021  posz[b] = posz[b] + rabz[i]*rmb;
1022  }
1023  for(iter = 1; iter < maxiter; ++iter)
1024  {
1025  done = true;
1026  #pragma unroll
1027  for(int i = 0; i < 4; ++i)
1028  {
1029  int a = rattleParam[i].ia;
1030  int b = rattleParam[i].ib;
1031  pabx[i] = posx[a] - posx[b];
1032  paby[i] = posy[a] - posy[b];
1033  pabz[i] = posz[a] - posz[b];
1034  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1035  BigReal rabsq = rattleParam[i].dsq;
1036  BigReal diffsq = pabsq - rabsq;
1037  if ( fabs(diffsq) > (rabsq * tol2) && i < icnt)
1038  done = false;
1039  r_unc[i] = (pabx[i]*rabx[i]+paby[i]*raby[i]+pabz[i]*rabz[i])*drab[i] -sqrt(2.*rabsq-pabsq);
1040  }
1041  if(done)
1042  break;
1043 
1044  //solveMatrix(lambda, S, r_unc);
1045  lambda[0] = 0;
1046  lambda[1] = 0;
1047  lambda[2] = 0;
1048  lambda[3] = 0;
1049  #pragma unroll
1050  for(int i = 0; i < 4; ++i)
1051  #pragma unroll
1052  for(int j = 0; j < 4; ++j)
1053  lambda[i] += A[i][j] * r_unc[j];
1054 
1055  #pragma unroll
1056  for(int i = 0; i < 4; ++i)
1057  {
1058  int a = rattleParam[i].ia;
1059  int b = rattleParam[i].ib;
1060  BigReal k = lambda[i];
1061  BigReal rma = rattleParam[i].rma*k*drab[i];
1062  BigReal rmb = rattleParam[i].rmb*k*drab[i];
1063  posx[a] = posx[a] - rabx[i]*rma;
1064  posy[a] = posy[a] - raby[i]*rma;
1065  posz[a] = posz[a] - rabz[i]*rma;
1066  posx[b] = posx[b] + rabx[i]*rmb;
1067  posy[b] = posy[b] + raby[i]*rmb;
1068  posz[b] = posz[b] + rabz[i]*rmb;
1069  }
1070  }
1071  }
1072 
1073  if(iter >= maxiter)
1074  consFailure = true;
1075 }
1076 
1077 #if 0
1078 #ifdef NAMD_MSHAKE
1079 void MSHAKEIterate(const int icnt, const RattleParam* rattleParam,
1080  const BigReal *refx, const BigReal *refy, const BigReal *refz,
1081  BigReal *posx, BigReal *posy, BigReal *posz,
1082  const BigReal tol2, const int maxiter,
1083  bool& done, bool& consFailure)
1084 {
1085  using namespace Eigen;
1086  #ifdef SHORTREALS
1087  typedef VectorXf VectorXr;
1088  typedef MatrixXf MatrixXr;
1089  #else
1090  typedef VectorXd VectorXr;
1091  typedef MatrixXd MatrixXr;
1092  #endif
1093  VectorXr sigma(icnt), lambda(icnt);
1094  MatrixXr A(icnt, icnt);
1095 
1096  //VectorXr pabx(icnt), rabx(icnt);
1097  //VectorXr paby(icnt), raby(icnt);
1098  //VectorXr pabz(icnt), rabz(icnt);
1099  BigReal* pabx = new BigReal[icnt];
1100  BigReal* rabx = new BigReal[icnt];
1101  BigReal* paby = new BigReal[icnt];
1102  BigReal* raby = new BigReal[icnt];
1103  BigReal* pabz = new BigReal[icnt];
1104  BigReal* rabz = new BigReal[icnt];
1105 
1106  //check each constraint
1107  register int loop;
1108  consFailure = false;
1109  done = true;
1110  for(int i = 0; i < icnt; ++i)
1111  {
1112  int a = rattleParam[i].ia;
1113  int b = rattleParam[i].ib;
1114  pabx[i] = posx[a] - posx[b];
1115  paby[i] = posy[a] - posy[b];
1116  pabz[i] = posz[a] - posz[b];
1117  rabx[i] = refx[a] - refx[b];
1118  raby[i] = refy[a] - refy[b];
1119  rabz[i] = refz[a] - refz[b];
1120 
1121  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1122  BigReal rabsq = rattleParam[i].dsq;
1123  BigReal diffsq = pabsq - rabsq;
1124  sigma(i) = diffsq;
1125  if ( fabs(diffsq) > (rabsq * tol2) )
1126  done = false;
1127  }
1128  for(loop = 0; loop < maxiter; ++loop)
1129  {
1130  if(!done)
1131  {
1132  //construct A
1133  for(int i = 0; i < icnt*icnt; ++i)
1134  {
1135  int idx2 = i / icnt;
1136  int idx1 = i - icnt * idx2;
1137  BigReal rma = rattleParam[idx1].rma;
1138  A(idx1,idx2) = 2.*(pabx[idx1]*rabx[idx2]+paby[idx1]*raby[idx2]+pabz[idx1]*rabz[idx2])*rma;
1139  }
1140  for(int i = 0; i < icnt; ++i)
1141  {
1142  //BigReal rmb = rattleParam[i].rmb;
1143  //A(i,i) += 2.*(pabx[i]*rabx[i]+paby[i]*raby[i]+pabz[i]*rabz[i])*rmb;
1144  A(i,i) += A(i,i)*rattleParam[i].rmb/rattleParam[i].rma;
1145  }
1146  //sigma = A.transpose()*sigma;
1147  //A = A.transpose() * A;
1148  //PartialPivLU<MatrixXr> lu;
1149  //lu.compute(A);
1150  lambda = A.partialPivLu().solve(sigma);
1151  for(int i = 0; i < icnt; ++i)
1152  {
1153  int a = rattleParam[i].ia;
1154  int b = rattleParam[i].ib;
1155  BigReal rma = rattleParam[i].rma * lambda(i);
1156  BigReal rmb = rattleParam[i].rmb * lambda(i);
1157 
1158  posx[a] -= rma * rabx[i];
1159  posy[a] -= rma * raby[i];
1160  posz[a] -= rma * rabz[i];
1161  posx[b] += rmb * rabx[i];
1162  posy[b] += rmb * raby[i];
1163  posz[b] += rmb * rabz[i];
1164  }
1165  }
1166  else
1167  break;
1168  done = true;
1169  for(int i = 0; i < icnt; ++i)
1170  {
1171  int a = rattleParam[i].ia;
1172  int b = rattleParam[i].ib;
1173  pabx[i] = posx[a] - posx[b];
1174  paby[i] = posy[a] - posy[b];
1175  pabz[i] = posz[a] - posz[b];
1176  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1177  BigReal rabsq = rattleParam[i].dsq;
1178  BigReal diffsq = pabsq - rabsq;
1179  sigma(i) = diffsq;
1180  if ( fabs(diffsq) > (rabsq * tol2) )
1181  done = false;
1182  }
1183  }
1184 
1185  if(loop == maxiter)
1186  consFailure = true;
1187  //write out the total number of iteration number
1188  //std::fstream ofile;
1189  //ofile.open("iteration_loops_mshake", std::fstream::app);
1190  //ofile << loop << std::endl;
1191  //ofile.close();
1192 
1193  delete [] pabx;
1194  delete [] rabx;
1195  delete [] paby;
1196  delete [] raby;
1197  delete [] pabz;
1198  delete [] rabz;
1199  //return loop;
1200 }
1201 void LINCS(const int icnt, const RattleParam* rattleParam,
1202  const BigReal *refx, const BigReal *refy, const BigReal *refz,
1203  BigReal *posx, BigReal *posy, BigReal *posz,
1204  const BigReal tol2, const int maxiter,
1205  bool& done, bool& consFailure)
1206 {
1207  using namespace Eigen;
1208  #ifdef SHORTREALS
1209  typedef VectorXf VectorXr;
1210  typedef MatrixXf MatrixXr;
1211  #else
1212  typedef VectorXd VectorXr;
1213  typedef MatrixXd MatrixXr;
1214  #endif
1215  BigReal* pabx = new BigReal[icnt];
1216  BigReal* rabx = new BigReal[icnt];
1217  BigReal* paby = new BigReal[icnt];
1218  BigReal* raby = new BigReal[icnt];
1219  BigReal* pabz = new BigReal[icnt];
1220  BigReal* rabz = new BigReal[icnt];
1221  BigReal* drab = new BigReal[icnt];
1222 
1223  //check each constraint
1224  consFailure = false;
1225  done = true;
1226  int iter = 0;
1227  for(int i = 0; i < icnt; ++i)
1228  {
1229  int a = rattleParam[i].ia;
1230  int b = rattleParam[i].ib;
1231  pabx[i] = posx[a] - posx[b];
1232  paby[i] = posy[a] - posy[b];
1233  pabz[i] = posz[a] - posz[b];
1234  rabx[i] = refx[a] - refx[b];
1235  raby[i] = refy[a] - refy[b];
1236  rabz[i] = refz[a] - refz[b];
1237 
1238  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1239 
1240  BigReal rabsq = rattleParam[i].dsq;
1241  //drab[i] = 1./sqrt(rabsq);
1242  drab[i] = 1./sqrt(rabx[i]*rabx[i]+raby[i]*raby[i]+rabz[i]*rabz[i]);
1243  BigReal diffsq = pabsq - rabsq;
1244  if ( fabs(diffsq) > (rabsq * tol2) )
1245  done = false;
1246  }
1247  if(!done)
1248  {
1249  MatrixXr S(icnt, icnt);
1250  //construct S
1251  for(int i = 0; i < icnt*icnt; ++i)
1252  {
1253  int idy = (int)(i / icnt);
1254  int idx = i - idy * icnt;
1255  S(idy,idx) = (rabx[idx]*rabx[idy]+raby[idx]*raby[idy]+rabz[idx]*rabz[idy])*rattleParam[idx].rma
1256  *drab[idx]*drab[idy];
1257  }
1258  VectorXr r_unc(icnt);
1259  for(int i = 0; i < icnt; ++i)
1260  {
1261  S(i,i) += rattleParam[i].rmb/rattleParam[i].rma*S(i,i);
1262  r_unc(i) = (pabx[i]*rabx[i]+paby[i]*raby[i]+pabz[i]*rabz[i])*drab[i] -sqrt(rattleParam[i].dsq);//- 1./drab[i];
1263  }
1264  LLT<MatrixXr> lltOfS(S);
1265  VectorXr lambda = lltOfS.solve(r_unc);
1266 
1267  for(int i = 0; i < icnt; ++i)
1268  {
1269  int a = rattleParam[i].ia;
1270  int b = rattleParam[i].ib;
1271  BigReal k = lambda(i);
1272  BigReal rma = rattleParam[i].rma*k*drab[i];
1273  BigReal rmb = rattleParam[i].rmb*k*drab[i];
1274  posx[a] = posx[a] - rabx[i]*rma;
1275  posy[a] = posy[a] - raby[i]*rma;
1276  posz[a] = posz[a] - rabz[i]*rma;
1277  posx[b] = posx[b] + rabx[i]*rmb;
1278  posy[b] = posy[b] + raby[i]*rmb;
1279  posz[b] = posz[b] + rabz[i]*rmb;
1280  }
1281  for(iter = 1; iter < maxiter; ++iter)
1282  {
1283  done = true;
1284  for(int i = 0; i < icnt; ++i)
1285  {
1286  int a = rattleParam[i].ia;
1287  int b = rattleParam[i].ib;
1288  pabx[i] = posx[a] - posx[b];
1289  paby[i] = posy[a] - posy[b];
1290  pabz[i] = posz[a] - posz[b];
1291  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1292  BigReal rabsq = rattleParam[i].dsq;
1293  BigReal diffsq = pabsq - rabsq;
1294  //std::cout << iter << " " << diffsq << std::endl;
1295  if ( fabs(diffsq) > (rabsq * tol2) )
1296  done = false;
1297  r_unc(i) = (pabx[i]*rabx[i]+paby[i]*raby[i]+pabz[i]*rabz[i])*drab[i] -sqrt(2.*rabsq-pabsq);
1298  }
1299  if(done)
1300  break;
1301 
1302  lambda = lltOfS.solve(r_unc);
1303  for(int i = 0; i < icnt; ++i)
1304  {
1305  int a = rattleParam[i].ia;
1306  int b = rattleParam[i].ib;
1307  BigReal k = lambda(i);
1308  BigReal rma = rattleParam[i].rma*k*drab[i];
1309  BigReal rmb = rattleParam[i].rmb*k*drab[i];
1310  posx[a] = posx[a] - rabx[i]*rma;
1311  posy[a] = posy[a] - raby[i]*rma;
1312  posz[a] = posz[a] - rabz[i]*rma;
1313  posx[b] = posx[b] + rabx[i]*rmb;
1314  posy[b] = posy[b] + raby[i]*rmb;
1315  posz[b] = posz[b] + rabz[i]*rmb;
1316  }
1317  //}
1318  //done = true;
1319  /*
1320  for(int i = 0; i < icnt; ++i)
1321  {
1322  int a = rattleParam[i].ia;
1323  int b = rattleParam[i].ib;
1324  pabx[i] = posx[a] - posx[b];
1325  paby[i] = posy[a] - posy[b];
1326  pabz[i] = posz[a] - posz[b];
1327 
1328  BigReal pabsq = pabx[i]*pabx[i] + paby[i]*paby[i] + pabz[i]*pabz[i];
1329  BigReal rabsq = rattleParam[i].dsq;
1330  BigReal diffsq = pabsq - rabsq;
1331  //std::cout << iter << " " << diffsq << std::endl;
1332  if ( fabs(diffsq) > (rabsq * tol2) )
1333  done = false;
1334  }
1335  if(done)
1336  break;*/
1337  }
1338  //write out the total number of iteration number
1339  //std::fstream ofile;
1340  //ofile.open("iteration_loops_lincs", std::fstream::app);
1341  //ofile << iter << std::endl;
1342  //ofile.close();
1343  }
1344 
1345  if(iter >= maxiter)
1346  consFailure = true;
1347 
1348  delete [] pabx;
1349  delete [] rabx;
1350  delete [] paby;
1351  delete [] raby;
1352  delete [] pabz;
1353  delete [] rabz;
1354  delete [] drab;
1355  //return iter;
1356 }
1357 #endif
1358 #endif
1359 void rattleN(const int icnt, const RattleParam* rattleParam,
1360  const BigReal *refx, const BigReal *refy, const BigReal *refz,
1361  BigReal *posx, BigReal *posy, BigReal *posz,
1362  const BigReal tol2, const int maxiter,
1363  bool& done, bool& consFailure) {
1364 
1365  for (int iter = 0; iter < maxiter; ++iter ) {
1366  done = true;
1367  consFailure = false;
1368  for (int i = 0; i < icnt; ++i ) {
1369  int a = rattleParam[i].ia;
1370  int b = rattleParam[i].ib;
1371  BigReal pabx = posx[a] - posx[b];
1372  BigReal paby = posy[a] - posy[b];
1373  BigReal pabz = posz[a] - posz[b];
1374  BigReal pabsq = pabx*pabx + paby*paby + pabz*pabz;
1375  BigReal rabsq = rattleParam[i].dsq;
1376  BigReal diffsq = rabsq - pabsq;
1377  if ( fabs(diffsq) > (rabsq * tol2) ) {
1378  BigReal rabx = refx[a] - refx[b];
1379  BigReal raby = refy[a] - refy[b];
1380  BigReal rabz = refz[a] - refz[b];
1381  BigReal rpab = rabx*pabx + raby*paby + rabz*pabz;
1382  if ( rpab < ( rabsq * 1.0e-6 ) ) {
1383  done = false;
1384  consFailure = true;
1385  continue;
1386  }
1387  BigReal rma = rattleParam[i].rma;
1388  BigReal rmb = rattleParam[i].rmb;
1389  BigReal gab = diffsq / ( 2.0 * ( rma + rmb ) * rpab );
1390  BigReal dpx = rabx * gab;
1391  BigReal dpy = raby * gab;
1392  BigReal dpz = rabz * gab;
1393  posx[a] += rma * dpx;
1394  posy[a] += rma * dpy;
1395  posz[a] += rma * dpz;
1396  posx[b] -= rmb * dpx;
1397  posy[b] -= rmb * dpy;
1398  posz[b] -= rmb * dpz;
1399  done = false;
1400  }
1401  }
1402  if ( done ) break;
1403  }
1404 
1405 }
1406 
1407 //
1408 // Explicit instances of templated methods
1409 //
1410 template void rattlePair<1>(const RattleParam* rattleParam,
1411  const BigReal *refx, const BigReal *refy, const BigReal *refz,
1412  BigReal *posx, BigReal *posy, BigReal *posz, bool& consFailure);
1413 template void settle1_SIMD<2>(const Vector *ref, Vector *pos,
1414  BigReal mOrmT, BigReal mHrmT, BigReal ra,
1415  BigReal rb, BigReal rc, BigReal rra);
1416 template void settle1_SIMD<1>(const Vector *ref, Vector *pos,
1417  BigReal mOrmT, BigReal mHrmT, BigReal ra,
1418  BigReal rb, BigReal rc, BigReal rra);
1419 
1420 static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel,
1421  BigReal dt, Tensor *virial) {
1422 
1423  Vector rAB = pos[1]-pos[0];
1424  Vector rBC = pos[2]-pos[1];
1425  Vector rCA = pos[0]-pos[2];
1426 
1427  Vector AB = rAB.unit();
1428  Vector BC = rBC.unit();
1429  Vector CA = rCA.unit();
1430 
1431  BigReal cosA = -AB * CA;
1432  BigReal cosB = -BC * AB;
1433  BigReal cosC = -CA * BC;
1434 
1435  BigReal vab = (vel[1]-vel[0])*AB;
1436  BigReal vbc = (vel[2]-vel[1])*BC;
1437  BigReal vca = (vel[0]-vel[2])*CA;
1438 
1439  BigReal mab = ma+mb;
1440 
1441  BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
1442  - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
1443 
1444  BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
1445  vbc*(mb*cosC*cosA - mab*cosB) +
1446  vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
1447 
1448  BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
1449  vca*ma*(mb*cosA*cosB - mab*cosC) +
1450  vab*ma*(mb*cosC*cosA - mab*cosB))/d;
1451 
1452  BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
1453  vab*(ma*cosB*cosC - 2*mb*cosA) +
1454  vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
1455 
1456  Vector ga = tab*AB - tca*CA;
1457  Vector gb = tbc*BC - tab*AB;
1458  Vector gc = tca*CA - tbc*BC;
1459 #if 0
1460  if (virial) {
1461  *virial += 0.5*outer(tab, rAB)/dt;
1462  *virial += 0.5*outer(tbc, rBC)/dt;
1463  *virial += 0.5*outer(tca, rCA)/dt;
1464  }
1465 #endif
1466  vel[0] += (0.5/ma)*ga;
1467  vel[1] += (0.5/mb)*gb;
1468  vel[2] += (0.5/mb)*gc;
1469 
1470  return 0;
1471 }
1472 
1473 int settle2(BigReal mO, BigReal mH, const Vector *pos,
1474  Vector *vel, BigReal dt, Tensor *virial) {
1475 
1476  settlev(pos, mO, mH, vel, dt, virial);
1477  return 0;
1478 }
1479 
1480 
1482 //
1483 // begin SOA code
1484 //
1485 
1486 // Unlike previous versions, this routine loops over all waters in patch.
1488  const double * __restrict ref_x,
1489  const double * __restrict ref_y,
1490  const double * __restrict ref_z,
1491  double * __restrict pos_x,
1492  double * __restrict pos_y,
1493  double * __restrict pos_z,
1494  int numWaters,
1495  BigReal mOrmT,
1496  BigReal mHrmT,
1497  BigReal ra,
1498  BigReal rb,
1499  BigReal rc,
1500  BigReal rra
1501  ) {
1502  for (int i=0; i < numWaters; i++) {
1503  BigReal ref0x = ref_x[3*i];
1504  BigReal ref0y = ref_y[3*i];
1505  BigReal ref0z = ref_z[3*i];
1506  BigReal ref1x = ref_x[3*i+1];
1507  BigReal ref1y = ref_y[3*i+1];
1508  BigReal ref1z = ref_z[3*i+1];
1509  BigReal ref2x = ref_x[3*i+2];
1510  BigReal ref2y = ref_y[3*i+2];
1511  BigReal ref2z = ref_z[3*i+2];
1512 
1513  BigReal pos0x = pos_x[3*i];
1514  BigReal pos0y = pos_y[3*i];
1515  BigReal pos0z = pos_z[3*i];
1516  BigReal pos1x = pos_x[3*i+1];
1517  BigReal pos1y = pos_y[3*i+1];
1518  BigReal pos1z = pos_z[3*i+1];
1519  BigReal pos2x = pos_x[3*i+2];
1520  BigReal pos2y = pos_y[3*i+2];
1521  BigReal pos2z = pos_z[3*i+2];
1522 
1523  // vectors in the plane of the original positions
1524  BigReal b0x = ref1x - ref0x;
1525  BigReal b0y = ref1y - ref0y;
1526  BigReal b0z = ref1z - ref0z;
1527 
1528  BigReal c0x = ref2x - ref0x;
1529  BigReal c0y = ref2y - ref0y;
1530  BigReal c0z = ref2z - ref0z;
1531 
1532  // new center of mass
1533  BigReal d0x = pos0x*mOrmT + ((pos1x + pos2x)*mHrmT);
1534  BigReal d0y = pos0y*mOrmT + ((pos1y + pos2y)*mHrmT);
1535  BigReal d0z = pos0z*mOrmT + ((pos1z + pos2z)*mHrmT);
1536 
1537  BigReal a1x = pos0x - d0x;
1538  BigReal a1y = pos0y - d0y;
1539  BigReal a1z = pos0z - d0z;
1540 
1541  BigReal b1x = pos1x - d0x;
1542  BigReal b1y = pos1y - d0y;
1543  BigReal b1z = pos1z - d0z;
1544 
1545  BigReal c1x = pos2x - d0x;
1546  BigReal c1y = pos2y - d0y;
1547  BigReal c1z = pos2z - d0z;
1548 
1549  // Vectors describing transformation from original coordinate system to
1550  // the 'primed' coordinate system as in the diagram.
1551  // n0 = b0 x c0
1552  BigReal n0x = b0y*c0z-c0y*b0z;
1553  BigReal n0y = c0x*b0z-b0x*c0z;
1554  BigReal n0z = b0x*c0y-c0x*b0y;
1555 
1556  // n1 = a1 x n0
1557  BigReal n1x = a1y*n0z-n0y*a1z;
1558  BigReal n1y = n0x*a1z-a1x*n0z;
1559  BigReal n1z = a1x*n0y-n0x*a1y;
1560 
1561  // n2 = n0 x n1
1562  BigReal n2x = n0y*n1z-n1y*n0z;
1563  BigReal n2y = n1x*n0z-n0x*n1z;
1564  BigReal n2z = n0x*n1y-n1x*n0y;
1565 
1566  // Normalize n0
1567  BigReal n0inv = 1.0/sqrt(n0x*n0x + n0y*n0y + n0z*n0z);
1568  n0x *= n0inv;
1569  n0y *= n0inv;
1570  n0z *= n0inv;
1571 
1572  BigReal n1inv = 1.0/sqrt(n1x*n1x + n1y*n1y + n1z*n1z);
1573  n1x *= n1inv;
1574  n1y *= n1inv;
1575  n1z *= n1inv;
1576 
1577  BigReal n2inv = 1.0/sqrt(n2x*n2x + n2y*n2y + n2z*n2z);
1578  n2x *= n2inv;
1579  n2y *= n2inv;
1580  n2z *= n2inv;
1581 
1582  //b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
1583  BigReal n1b0 = n1x*b0x + n1y*b0y + n1z*b0z;
1584  BigReal n2b0 = n2x*b0x + n2y*b0y + n2z*b0z;
1585 
1586  //c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
1587  BigReal n1c0 = n1x*c0x + n1y*c0y + n1z*c0z;
1588  BigReal n2c0 = n2x*c0x + n2y*c0y + n2z*c0z;
1589 
1590  BigReal A1Z = n0x*a1x + n0y*a1y + n0z*a1z;
1591 
1592  //b1 = Vector(n1*b1, n2*b1, n0*b1);
1593  BigReal n1b1 = n1x*b1x + n1y*b1y + n1z*b1z;
1594  BigReal n2b1 = n2x*b1x + n2y*b1y + n2z*b1z;
1595  BigReal n0b1 = n0x*b1x + n0y*b1y + n0z*b1z;
1596 
1597  //c1 = Vector(n1*c1, n2*c1, n0*c1);
1598  BigReal n1c1 = n1x*c1x + n1y*c1y + n1z*c1z;
1599  BigReal n2c1 = n2x*c1x + n2y*c1y + n2z*c1z;
1600  BigReal n0c1 = n0x*c1x + n0y*c1y + n0z*c1z;
1601 
1602  // now we can compute positions of canonical water
1603  BigReal sinphi = A1Z * rra;
1604  BigReal tmp = 1.0-sinphi*sinphi;
1605  BigReal cosphi = sqrt(tmp);
1606  BigReal sinpsi = (n0b1 - n0c1)/(2.0*rc*cosphi);
1607  tmp = 1.0-sinpsi*sinpsi;
1608  BigReal cospsi = sqrt(tmp);
1609 
1610  BigReal rbphi = -rb*cosphi;
1611  BigReal tmp1 = rc*sinpsi*sinphi;
1612  BigReal tmp2 = rc*sinpsi*cosphi;
1613 
1614  //Vector a2(0, ra*cosphi, ra*sinphi);
1615  BigReal a2y = ra*cosphi;
1616 
1617  //Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
1618  BigReal b2x = -rc*cospsi;
1619  BigReal b2y = rbphi - tmp1;
1620 
1621  //Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
1622  BigReal c2y = rbphi + tmp1;
1623 
1624  // there are no a0 terms because we've already subtracted the term off
1625  // when we first defined b0 and c0.
1626  BigReal alpha = b2x*(n1b0 - n1c0) + n2b0*b2y + n2c0*c2y;
1627  BigReal beta = b2x*(n2c0 - n2b0) + n1b0*b2y + n1c0*c2y;
1628  BigReal gama = n1b0*n2b1 - n1b1*n2b0 + n1c0*n2c1 - n1c1*n2c0;
1629 
1630  BigReal a2b2 = alpha*alpha + beta*beta;
1631  BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
1632  BigReal costheta = sqrt(1.0 - sintheta*sintheta);
1633 
1634  //Vector a3( -a2y*sintheta,
1635  // a2y*costheta,
1636  // A1Z);
1637  BigReal a3x = -a2y*sintheta;
1638  BigReal a3y = a2y*costheta;
1639  BigReal a3z = A1Z;
1640 
1641  // Vector b3(b2x*costheta - b2y*sintheta,
1642  // b2x*sintheta + b2y*costheta,
1643  // n0b1);
1644  BigReal b3x = b2x*costheta - b2y*sintheta;
1645  BigReal b3y = b2x*sintheta + b2y*costheta;
1646  BigReal b3z = n0b1;
1647 
1648  // Vector c3(-b2x*costheta - c2y*sintheta,
1649  // -b2x*sintheta + c2y*costheta,
1650  // n0c1);
1651  BigReal c3x = -b2x*costheta - c2y*sintheta;
1652  BigReal c3y = -b2x*sintheta + c2y*costheta;
1653  BigReal c3z = n0c1;
1654 
1655  // undo the transformation; generate new normal vectors from the transpose.
1656  // Vector m1(n1.x, n2.x, n0.x);
1657  BigReal m1x = n1x;
1658  BigReal m1y = n2x;
1659  BigReal m1z = n0x;
1660 
1661  // Vector m2(n1.y, n2.y, n0.y);
1662  BigReal m2x = n1y;
1663  BigReal m2y = n2y;
1664  BigReal m2z = n0y;
1665 
1666  // Vector m0(n1.z, n2.z, n0.z);
1667  BigReal m0x = n1z;
1668  BigReal m0y = n2z;
1669  BigReal m0z = n0z;
1670 
1671  //pos[i*3+0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
1672  pos0x = a3x*m1x + a3y*m1y + a3z*m1z + d0x;
1673  pos0y = a3x*m2x + a3y*m2y + a3z*m2z + d0y;
1674  pos0z = a3x*m0x + a3y*m0y + a3z*m0z + d0z;
1675 
1676  // pos[i*3+1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
1677  pos1x = b3x*m1x + b3y*m1y + b3z*m1z + d0x;
1678  pos1y = b3x*m2x + b3y*m2y + b3z*m2z + d0y;
1679  pos1z = b3x*m0x + b3y*m0y + b3z*m0z + d0z;
1680 
1681  // pos[i*3+2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
1682  pos2x = c3x*m1x + c3y*m1y + c3z*m1z + d0x;
1683  pos2y = c3x*m2x + c3y*m2y + c3z*m2z + d0y;
1684  pos2z = c3x*m0x + c3y*m0y + c3z*m0z + d0z;
1685 
1686  pos_x[3*i] = pos0x;
1687  pos_y[3*i] = pos0y;
1688  pos_z[3*i] = pos0z;
1689  pos_x[3*i+1] = pos1x;
1690  pos_y[3*i+1] = pos1y;
1691  pos_z[3*i+1] = pos1z;
1692  pos_x[3*i+2] = pos2x;
1693  pos_y[3*i+2] = pos2y;
1694  pos_z[3*i+2] = pos2z;
1695  }
1696 
1697 } // settle1_SOA()
1698 
1699 
template void settle1_SIMD< 1 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel, BigReal dt, Tensor *virial)
Definition: Settle.C:1420
int ib
Definition: Settle.h:58
void rattlePair(const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, bool &consFailure)
Definition: Settle.C:554
void settle1_SOA(const double *__restrict ref_x, const double *__restrict ref_y, const double *__restrict ref_z, double *__restrict pos_x, double *__restrict pos_y, double *__restrict pos_z, int numWaters, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
Definition: Settle.C:1487
NAMD_HOST_DEVICE Tensor outer(const Vector &v1, const Vector &v2)
Definition: Tensor.h:241
Definition: Vector.h:72
void MSHAKEIterate(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:830
BigReal z
Definition: Vector.h:74
BigReal dsq
Definition: Settle.h:59
template void settle1_SIMD< 2 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
int settle2(BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial)
Definition: Settle.C:1473
static void swap_row(BigReal A[4][4], BigReal b[4], int r1, int r2)
Definition: Settle.C:719
void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist, BigReal &mO, BigReal &mH, BigReal &mOrmT, BigReal &mHrmT, BigReal &ra, BigReal &rb, BigReal &rc, BigReal &rra)
initialize cached water properties
Definition: Settle.C:46
static void solve_4by4(BigReal lambda [4], BigReal A[4][4], BigReal sigma[4])
Definition: Settle.C:736
void LINCS(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:936
template void rattlePair< 1 >(const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, bool &consFailure)
static BigReal det_3by3(BigReal A[4][4])
Definition: Settle.C:711
BigReal x
Definition: Vector.h:74
int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
optimized settle1 algorithm, reuses water properties as much as possible
Definition: Settle.C:63
BigReal rmb
Definition: Settle.h:61
Definition: Tensor.h:15
BigReal y
Definition: Vector.h:74
BigReal rma
Definition: Settle.h:60
int ia
Definition: Settle.h:57
void settle1_SIMD(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
Definition: Settle.C:293
static void solveMatrix(BigReal lambda [4], BigReal A[4][4], BigReal sigma[4], int icnt)
Definition: Settle.C:775
void rattleN(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:1359
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
double BigReal
Definition: common.h:123
static void solveFullInverse(BigReal A[4][4], BigReal S[4][4], int icnt)
Definition: Settle.C:816