NAMD
Settle.C
Go to the documentation of this file.
1 
7 //#include "InfoStream.h"
8 #include "Settle.h"
9 #include <string.h>
10 #include <math.h>
11 //#include <charm++.h> // for CkPrintf
12 
13 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
14 #include <emmintrin.h> // SSE2
15 #if defined(__INTEL_COMPILER)
16 #define __align(X) __declspec(align(X) )
17 #elif defined(__PGI)
18 #define __align(X) __attribute__((aligned(X) ))
19 #define MISSING_mm_cvtsd_f64
20 #elif defined(__GNUC__)
21 #define __align(X) __attribute__((aligned(X) ))
22 #if (__GNUC__ < 4)
23 #define MISSING_mm_cvtsd_f64
24 #endif
25 #else
26 #define __align(X) __declspec(align(X) )
27 #endif
28 #endif
29 
30 //
31 // XXX static and global variables are unsafe for shared memory builds.
32 // The global and static vars should be eliminated.
33 // Unfortunately, the routines that use these below are actually
34 // in use in NAMD.
35 //
36 
37 // Initialize various properties of the waters
38 // settle1() assumes all waters are identical,
39 // and will generate bad results if they are not.
40 void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist,
41  BigReal &mOrmT, BigReal &mHrmT, BigReal &ra,
42  BigReal &rb, BigReal &rc, BigReal &rra) {
43 
44  BigReal rmT = 1.0 / (pmO+pmH+pmH);
45  mOrmT = pmO * rmT;
46  mHrmT = pmH * rmT;
47  BigReal t1 = 0.5*pmO/pmH;
48  rc = 0.5*hhdist;
49  ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
50  rb = t1*ra;
51  rra = 1.0 / ra;
52 }
53 
54 
55 int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt,
56  BigReal mOrmT, BigReal mHrmT, BigReal ra,
57  BigReal rb, BigReal rc, BigReal rra) {
58 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
59  // SSE acceleration of some of the costly parts of settle using
60  // the Intel C/C++ classes. This implementation uses the SSE units
61  // less efficiency than is potentially possible, but in order to do
62  // better, the settle algorithm will have to be vectorized and operate
63  // on multiple waters at a time. Doing so could give us the ability to
64  // do two (double precison) or four (single precision) waters at a time.
65  // This code achieves a modest speedup without the need to reorganize
66  // the NAMD structure. Once we have water molecules sorted in a single
67  // block we can do far better.
68 
69  // vectors in the plane of the original positions
70  Vector b0, c0;
71 
72  __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x); // ref0.y and ref0.x
73  __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x); // ref1.y and ref1.x
74 
75  __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
76  _mm_storeu_pd((double *) &b0.x, B0xy);
77  b0.z = ref[1].z - ref[0].z;
78 
79  __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x); // ref2.y and ref2.x
80 
81  __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
82  _mm_storeu_pd((double *) &c0.x, C0xy);
83  c0.z = ref[2].z - ref[0].z;
84 
85  // new center of mass
86  // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
87  __align(16) Vector a1;
88  __align(16) Vector b1;
89  __align(16) Vector c1;
90  __align(16) Vector d0;
91 
92  __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
93  __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
94  __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
95 
96  __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
97  __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
98  __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
99 
100  d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
101  a1.z = pos[0].z - d0.z;
102  b1.z = pos[1].z - d0.z;
103  c1.z = pos[2].z - d0.z;
104 
105  __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
106  _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
107 
108  __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
109  _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
110 
111  __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
112  _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
113 
114  _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
115 
116  // Vectors describing transformation from original coordinate system to
117  // the 'primed' coordinate system as in the diagram.
118  Vector n0 = cross(b0, c0);
119  Vector n1 = cross(a1, n0);
120  Vector n2 = cross(n0, n1);
121 #else
122  // vectors in the plane of the original positions
123  Vector b0 = ref[1]-ref[0];
124  Vector c0 = ref[2]-ref[0];
125 
126  // new center of mass
127  Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
128 
129  Vector a1 = pos[0] - d0;
130  Vector b1 = pos[1] - d0;
131  Vector c1 = pos[2] - d0;
132 
133  // Vectors describing transformation from original coordinate system to
134  // the 'primed' coordinate system as in the diagram.
135  Vector n0 = cross(b0, c0);
136  Vector n1 = cross(a1, n0);
137  Vector n2 = cross(n0, n1);
138 #endif
139 
140 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
141  __m128d l1 = _mm_set_pd(n0.x, n0.y);
142  l1 = _mm_mul_pd(l1, l1);
143  // n0.x^2 + n0.y^2
144  double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
145 
146  __m128d l3 = _mm_set_pd(n1.y, n1.z);
147  l3 = _mm_mul_pd(l3, l3);
148  // n1.y^2 + n1.z^2
149  double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
150 
151  __m128d l2 = _mm_set_pd(n1.x, n0.z);
152  // len(n1)^2 and len(n0)^2
153  __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
154 
155  __m128d l4 = _mm_set_pd(n2.x, n2.y);
156  l4 = _mm_mul_pd(l4, l4);
157  // n2.x^2 + n2.y^2
158  double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
159  double ts2 = l4xy2 + (n2.z * n2.z); // len(n2)^2
160 
161  double invlens[4];
162  // since rsqrt_nr() doesn't work with current compiler
163  // this is the next best option
164  static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
165 
166  // 1/len(n1) and 1/len(n0)
167  __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
168 
169  // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
170  _mm_storeu_pd(invlens, invlen12);
171 
172  n0 = n0 * invlens[0];
173 
174  // shuffle the order of operations around from the normal algorithm so
175  // that we can double pump sqrt() with n2 and cosphi at the same time
176  // these components are usually computed down in the canonical water block
177  BigReal A1Z = n0 * a1;
178  BigReal sinphi = A1Z * rra;
179  BigReal tmp = 1.0-sinphi*sinphi;
180 
181  __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
182  // invlens[2] = 1/len(n2), invlens[3] = cosphi
183  _mm_storeu_pd(invlens+2, n2cosphi);
184 
185  n1 = n1 * invlens[1];
186  n2 = n2 * (1.0 / invlens[2]);
187  BigReal cosphi = invlens[3];
188 
189  b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
190  c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
191 
192  b1 = Vector(n1*b1, n2*b1, n0*b1);
193  c1 = Vector(n1*c1, n2*c1, n0*c1);
194 
195  // now we can compute positions of canonical water
196  BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
197  tmp = 1.0-sinpsi*sinpsi;
198  BigReal cospsi = sqrt(tmp);
199 #else
200  n0 = n0.unit();
201  n1 = n1.unit();
202  n2 = n2.unit();
203 
204  b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
205  c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
206 
207  BigReal A1Z = n0 * a1;
208  b1 = Vector(n1*b1, n2*b1, n0*b1);
209  c1 = Vector(n1*c1, n2*c1, n0*c1);
210 
211  // now we can compute positions of canonical water
212  BigReal sinphi = A1Z * rra;
213  BigReal tmp = 1.0-sinphi*sinphi;
214  BigReal cosphi = sqrt(tmp);
215  BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
216  tmp = 1.0-sinpsi*sinpsi;
217  BigReal cospsi = sqrt(tmp);
218 #endif
219 
220  BigReal rbphi = -rb*cosphi;
221  BigReal tmp1 = rc*sinpsi*sinphi;
222  BigReal tmp2 = rc*sinpsi*cosphi;
223 
224  Vector a2(0, ra*cosphi, ra*sinphi);
225  Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
226  Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
227 
228  // there are no a0 terms because we've already subtracted the term off
229  // when we first defined b0 and c0.
230  BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
231  BigReal beta = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
232  BigReal gama = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
233 
234  BigReal a2b2 = alpha*alpha + beta*beta;
235  BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
236  BigReal costheta = sqrt(1.0 - sintheta*sintheta);
237 
238 #if 0
239  Vector a3( -a2.y*sintheta,
240  a2.y*costheta,
241  a2.z);
242  Vector b3(b2.x*costheta - b2.y*sintheta,
243  b2.x*sintheta + b2.y*costheta,
244  b2.z);
245  Vector c3(c2.x*costheta - c2.y*sintheta,
246  c2.x*sintheta + c2.y*costheta,
247  c2.z);
248 
249 #else
250  Vector a3( -a2.y*sintheta,
251  a2.y*costheta,
252  A1Z);
253  Vector b3(b2.x*costheta - b2.y*sintheta,
254  b2.x*sintheta + b2.y*costheta,
255  b1.z);
256  Vector c3(-b2.x*costheta - c2.y*sintheta,
257  -b2.x*sintheta + c2.y*costheta,
258  c1.z);
259 
260 #endif
261 
262  // undo the transformation; generate new normal vectors from the transpose.
263  Vector m1(n1.x, n2.x, n0.x);
264  Vector m2(n1.y, n2.y, n0.y);
265  Vector m0(n1.z, n2.z, n0.z);
266 
267  pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
268  pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
269  pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
270 
271  // dt can be negative during startup!
272  if (invdt != 0) {
273  vel[0] = (pos[0]-ref[0])*invdt;
274  vel[1] = (pos[1]-ref[1])*invdt;
275  vel[2] = (pos[2]-ref[2])*invdt;
276  }
277 
278  return 0;
279 }
280 
281 //
282 // Settle multiple waters using SIMD
283 //
284 template <int veclen>
285 void settle1_SIMD(const Vector *ref, Vector *pos,
286  BigReal mOrmT, BigReal mHrmT, BigReal ra,
287  BigReal rb, BigReal rc, BigReal rra) {
288 
289  BigReal ref0xt[veclen];
290  BigReal ref0yt[veclen];
291  BigReal ref0zt[veclen];
292  BigReal ref1xt[veclen];
293  BigReal ref1yt[veclen];
294  BigReal ref1zt[veclen];
295  BigReal ref2xt[veclen];
296  BigReal ref2yt[veclen];
297  BigReal ref2zt[veclen];
298 
299  BigReal pos0xt[veclen];
300  BigReal pos0yt[veclen];
301  BigReal pos0zt[veclen];
302  BigReal pos1xt[veclen];
303  BigReal pos1yt[veclen];
304  BigReal pos1zt[veclen];
305  BigReal pos2xt[veclen];
306  BigReal pos2yt[veclen];
307  BigReal pos2zt[veclen];
308 
309  for (int i=0;i < veclen;i++) {
310  ref0xt[i] = ref[i*3+0].x;
311  ref0yt[i] = ref[i*3+0].y;
312  ref0zt[i] = ref[i*3+0].z;
313  ref1xt[i] = ref[i*3+1].x;
314  ref1yt[i] = ref[i*3+1].y;
315  ref1zt[i] = ref[i*3+1].z;
316  ref2xt[i] = ref[i*3+2].x;
317  ref2yt[i] = ref[i*3+2].y;
318  ref2zt[i] = ref[i*3+2].z;
319 
320  pos0xt[i] = pos[i*3+0].x;
321  pos0yt[i] = pos[i*3+0].y;
322  pos0zt[i] = pos[i*3+0].z;
323  pos1xt[i] = pos[i*3+1].x;
324  pos1yt[i] = pos[i*3+1].y;
325  pos1zt[i] = pos[i*3+1].z;
326  pos2xt[i] = pos[i*3+2].x;
327  pos2yt[i] = pos[i*3+2].y;
328  pos2zt[i] = pos[i*3+2].z;
329  }
330 
331 #pragma omp simd
332  for (int i=0;i < veclen;i++) {
333 
334  BigReal ref0x = ref0xt[i];
335  BigReal ref0y = ref0yt[i];
336  BigReal ref0z = ref0zt[i];
337  BigReal ref1x = ref1xt[i];
338  BigReal ref1y = ref1yt[i];
339  BigReal ref1z = ref1zt[i];
340  BigReal ref2x = ref2xt[i];
341  BigReal ref2y = ref2yt[i];
342  BigReal ref2z = ref2zt[i];
343 
344  BigReal pos0x = pos0xt[i];
345  BigReal pos0y = pos0yt[i];
346  BigReal pos0z = pos0zt[i];
347  BigReal pos1x = pos1xt[i];
348  BigReal pos1y = pos1yt[i];
349  BigReal pos1z = pos1zt[i];
350  BigReal pos2x = pos2xt[i];
351  BigReal pos2y = pos2yt[i];
352  BigReal pos2z = pos2zt[i];
353 
354  // vectors in the plane of the original positions
355  BigReal b0x = ref1x - ref0x;
356  BigReal b0y = ref1y - ref0y;
357  BigReal b0z = ref1z - ref0z;
358 
359  BigReal c0x = ref2x - ref0x;
360  BigReal c0y = ref2y - ref0y;
361  BigReal c0z = ref2z - ref0z;
362 
363  // new center of mass
364  BigReal d0x = pos0x*mOrmT + ((pos1x + pos2x)*mHrmT);
365  BigReal d0y = pos0y*mOrmT + ((pos1y + pos2y)*mHrmT);
366  BigReal d0z = pos0z*mOrmT + ((pos1z + pos2z)*mHrmT);
367 
368  BigReal a1x = pos0x - d0x;
369  BigReal a1y = pos0y - d0y;
370  BigReal a1z = pos0z - d0z;
371 
372  BigReal b1x = pos1x - d0x;
373  BigReal b1y = pos1y - d0y;
374  BigReal b1z = pos1z - d0z;
375 
376  BigReal c1x = pos2x - d0x;
377  BigReal c1y = pos2y - d0y;
378  BigReal c1z = pos2z - d0z;
379 
380  // Vectors describing transformation from original coordinate system to
381  // the 'primed' coordinate system as in the diagram.
382  // n0 = b0 x c0
383  BigReal n0x = b0y*c0z-c0y*b0z;
384  BigReal n0y = c0x*b0z-b0x*c0z;
385  BigReal n0z = b0x*c0y-c0x*b0y;
386 
387  // n1 = a1 x n0
388  BigReal n1x = a1y*n0z-n0y*a1z;
389  BigReal n1y = n0x*a1z-a1x*n0z;
390  BigReal n1z = a1x*n0y-n0x*a1y;
391 
392  // n2 = n0 x n1
393  BigReal n2x = n0y*n1z-n1y*n0z;
394  BigReal n2y = n1x*n0z-n0x*n1z;
395  BigReal n2z = n0x*n1y-n1x*n0y;
396 
397  // Normalize n0
398  BigReal n0inv = 1.0/sqrt(n0x*n0x + n0y*n0y + n0z*n0z);
399  n0x *= n0inv;
400  n0y *= n0inv;
401  n0z *= n0inv;
402 
403  BigReal n1inv = 1.0/sqrt(n1x*n1x + n1y*n1y + n1z*n1z);
404  n1x *= n1inv;
405  n1y *= n1inv;
406  n1z *= n1inv;
407 
408  BigReal n2inv = 1.0/sqrt(n2x*n2x + n2y*n2y + n2z*n2z);
409  n2x *= n2inv;
410  n2y *= n2inv;
411  n2z *= n2inv;
412 
413  //b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
414  BigReal n1b0 = n1x*b0x + n1y*b0y + n1z*b0z;
415  BigReal n2b0 = n2x*b0x + n2y*b0y + n2z*b0z;
416 
417  //c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
418  BigReal n1c0 = n1x*c0x + n1y*c0y + n1z*c0z;
419  BigReal n2c0 = n2x*c0x + n2y*c0y + n2z*c0z;
420 
421  BigReal A1Z = n0x*a1x + n0y*a1y + n0z*a1z;
422 
423  //b1 = Vector(n1*b1, n2*b1, n0*b1);
424  BigReal n1b1 = n1x*b1x + n1y*b1y + n1z*b1z;
425  BigReal n2b1 = n2x*b1x + n2y*b1y + n2z*b1z;
426  BigReal n0b1 = n0x*b1x + n0y*b1y + n0z*b1z;
427 
428  //c1 = Vector(n1*c1, n2*c1, n0*c1);
429  BigReal n1c1 = n1x*c1x + n1y*c1y + n1z*c1z;
430  BigReal n2c1 = n2x*c1x + n2y*c1y + n2z*c1z;
431  BigReal n0c1 = n0x*c1x + n0y*c1y + n0z*c1z;
432 
433  // now we can compute positions of canonical water
434  BigReal sinphi = A1Z * rra;
435  BigReal tmp = 1.0-sinphi*sinphi;
436  BigReal cosphi = sqrt(tmp);
437  BigReal sinpsi = (n0b1 - n0c1)/(2.0*rc*cosphi);
438  tmp = 1.0-sinpsi*sinpsi;
439  BigReal cospsi = sqrt(tmp);
440 
441  BigReal rbphi = -rb*cosphi;
442  BigReal tmp1 = rc*sinpsi*sinphi;
443  BigReal tmp2 = rc*sinpsi*cosphi;
444 
445  //Vector a2(0, ra*cosphi, ra*sinphi);
446  BigReal a2y = ra*cosphi;
447 
448  //Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
449  BigReal b2x = -rc*cospsi;
450  BigReal b2y = rbphi - tmp1;
451 
452  //Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
453  BigReal c2y = rbphi + tmp1;
454 
455  // there are no a0 terms because we've already subtracted the term off
456  // when we first defined b0 and c0.
457  BigReal alpha = b2x*(n1b0 - n1c0) + n2b0*b2y + n2c0*c2y;
458  BigReal beta = b2x*(n2c0 - n2b0) + n1b0*b2y + n1c0*c2y;
459  BigReal gama = n1b0*n2b1 - n1b1*n2b0 + n1c0*n2c1 - n1c1*n2c0;
460 
461  BigReal a2b2 = alpha*alpha + beta*beta;
462  BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
463  BigReal costheta = sqrt(1.0 - sintheta*sintheta);
464 
465  //Vector a3( -a2y*sintheta,
466  // a2y*costheta,
467  // A1Z);
468  BigReal a3x = -a2y*sintheta;
469  BigReal a3y = a2y*costheta;
470  BigReal a3z = A1Z;
471 
472  // Vector b3(b2x*costheta - b2y*sintheta,
473  // b2x*sintheta + b2y*costheta,
474  // n0b1);
475  BigReal b3x = b2x*costheta - b2y*sintheta;
476  BigReal b3y = b2x*sintheta + b2y*costheta;
477  BigReal b3z = n0b1;
478 
479  // Vector c3(-b2x*costheta - c2y*sintheta,
480  // -b2x*sintheta + c2y*costheta,
481  // n0c1);
482  BigReal c3x = -b2x*costheta - c2y*sintheta;
483  BigReal c3y = -b2x*sintheta + c2y*costheta;
484  BigReal c3z = n0c1;
485 
486  // undo the transformation; generate new normal vectors from the transpose.
487  // Vector m1(n1.x, n2.x, n0.x);
488  BigReal m1x = n1x;
489  BigReal m1y = n2x;
490  BigReal m1z = n0x;
491 
492  // Vector m2(n1.y, n2.y, n0.y);
493  BigReal m2x = n1y;
494  BigReal m2y = n2y;
495  BigReal m2z = n0y;
496 
497  // Vector m0(n1.z, n2.z, n0.z);
498  BigReal m0x = n1z;
499  BigReal m0y = n2z;
500  BigReal m0z = n0z;
501 
502  //pos[i*3+0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
503  pos0x = a3x*m1x + a3y*m1y + a3z*m1z + d0x;
504  pos0y = a3x*m2x + a3y*m2y + a3z*m2z + d0y;
505  pos0z = a3x*m0x + a3y*m0y + a3z*m0z + d0z;
506 
507  // pos[i*3+1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
508  pos1x = b3x*m1x + b3y*m1y + b3z*m1z + d0x;
509  pos1y = b3x*m2x + b3y*m2y + b3z*m2z + d0y;
510  pos1z = b3x*m0x + b3y*m0y + b3z*m0z + d0z;
511 
512  // pos[i*3+2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
513  pos2x = c3x*m1x + c3y*m1y + c3z*m1z + d0x;
514  pos2y = c3x*m2x + c3y*m2y + c3z*m2z + d0y;
515  pos2z = c3x*m0x + c3y*m0y + c3z*m0z + d0z;
516 
517  pos0xt[i] = pos0x;
518  pos0yt[i] = pos0y;
519  pos0zt[i] = pos0z;
520  pos1xt[i] = pos1x;
521  pos1yt[i] = pos1y;
522  pos1zt[i] = pos1z;
523  pos2xt[i] = pos2x;
524  pos2yt[i] = pos2y;
525  pos2zt[i] = pos2z;
526  }
527 
528  for (int i=0;i < veclen;i++) {
529  pos[i*3+0].x = pos0xt[i];
530  pos[i*3+0].y = pos0yt[i];
531  pos[i*3+0].z = pos0zt[i];
532  pos[i*3+1].x = pos1xt[i];
533  pos[i*3+1].y = pos1yt[i];
534  pos[i*3+1].z = pos1zt[i];
535  pos[i*3+2].x = pos2xt[i];
536  pos[i*3+2].y = pos2yt[i];
537  pos[i*3+2].z = pos2zt[i];
538  }
539 
540 }
541 
542 //
543 // Rattle pair of atoms
544 //
545 template <int veclen>
546 void rattlePair(const RattleParam* rattleParam,
547  const BigReal *refx, const BigReal *refy, const BigReal *refz,
548  BigReal *posx, BigReal *posy, BigReal *posz, bool& consFailure) {
549 
550  int a = rattleParam[0].ia;
551  int b = rattleParam[0].ib;
552  BigReal pabx = posx[a] - posx[b];
553  BigReal paby = posy[a] - posy[b];
554  BigReal pabz = posz[a] - posz[b];
555  BigReal pabsq = pabx*pabx + paby*paby + pabz*pabz;
556  BigReal rabsq = rattleParam[0].dsq;
557  BigReal diffsq = rabsq - pabsq;
558  BigReal rabx = refx[a] - refx[b];
559  BigReal raby = refy[a] - refy[b];
560  BigReal rabz = refz[a] - refz[b];
561 
562  BigReal refsq = rabx*rabx + raby*raby + rabz*rabz;
563 
564  BigReal rpab = rabx*pabx + raby*paby + rabz*pabz;
565 
566  BigReal rma = rattleParam[0].rma;
567  BigReal rmb = rattleParam[0].rmb;
568 
569  BigReal gab;
570  BigReal sqrtarg = rpab*rpab + refsq*diffsq;
571  if ( sqrtarg < 0. ) {
572  consFailure = true;
573  gab = 0.;
574  } else {
575  consFailure = false;
576  gab = (-rpab + sqrt(sqrtarg))/(refsq*(rma + rmb));
577  }
578 
579  BigReal dpx = rabx * gab;
580  BigReal dpy = raby * gab;
581  BigReal dpz = rabz * gab;
582  posx[a] += rma * dpx;
583  posy[a] += rma * dpy;
584  posz[a] += rma * dpz;
585  posx[b] -= rmb * dpx;
586  posy[b] -= rmb * dpy;
587  posz[b] -= rmb * dpz;
588 
589 }
590 
591 void rattleN(const int icnt, const RattleParam* rattleParam,
592  const BigReal *refx, const BigReal *refy, const BigReal *refz,
593  BigReal *posx, BigReal *posy, BigReal *posz,
594  const BigReal tol2, const int maxiter,
595  bool& done, bool& consFailure) {
596 
597  for (int iter = 0; iter < maxiter; ++iter ) {
598  done = true;
599  consFailure = false;
600  for (int i = 0; i < icnt; ++i ) {
601  int a = rattleParam[i].ia;
602  int b = rattleParam[i].ib;
603  BigReal pabx = posx[a] - posx[b];
604  BigReal paby = posy[a] - posy[b];
605  BigReal pabz = posz[a] - posz[b];
606  BigReal pabsq = pabx*pabx + paby*paby + pabz*pabz;
607  BigReal rabsq = rattleParam[i].dsq;
608  BigReal diffsq = rabsq - pabsq;
609  if ( fabs(diffsq) > (rabsq * tol2) ) {
610  BigReal rabx = refx[a] - refx[b];
611  BigReal raby = refy[a] - refy[b];
612  BigReal rabz = refz[a] - refz[b];
613  BigReal rpab = rabx*pabx + raby*paby + rabz*pabz;
614  if ( rpab < ( rabsq * 1.0e-6 ) ) {
615  done = false;
616  consFailure = true;
617  continue;
618  }
619  BigReal rma = rattleParam[i].rma;
620  BigReal rmb = rattleParam[i].rmb;
621  BigReal gab = diffsq / ( 2.0 * ( rma + rmb ) * rpab );
622  BigReal dpx = rabx * gab;
623  BigReal dpy = raby * gab;
624  BigReal dpz = rabz * gab;
625  posx[a] += rma * dpx;
626  posy[a] += rma * dpy;
627  posz[a] += rma * dpz;
628  posx[b] -= rmb * dpx;
629  posy[b] -= rmb * dpy;
630  posz[b] -= rmb * dpz;
631  done = false;
632  }
633  }
634  if ( done ) break;
635  }
636 
637 }
638 
639 //
640 // Explicit instances of templated methods
641 //
642 template void rattlePair<1>(const RattleParam* rattleParam,
643  const BigReal *refx, const BigReal *refy, const BigReal *refz,
644  BigReal *posx, BigReal *posy, BigReal *posz, bool& consFailure);
645 template void settle1_SIMD<2>(const Vector *ref, Vector *pos,
646  BigReal mOrmT, BigReal mHrmT, BigReal ra,
647  BigReal rb, BigReal rc, BigReal rra);
648 template void settle1_SIMD<1>(const Vector *ref, Vector *pos,
649  BigReal mOrmT, BigReal mHrmT, BigReal ra,
650  BigReal rb, BigReal rc, BigReal rra);
651 
652 static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel,
653  BigReal dt, Tensor *virial) {
654 
655  Vector rAB = pos[1]-pos[0];
656  Vector rBC = pos[2]-pos[1];
657  Vector rCA = pos[0]-pos[2];
658 
659  Vector AB = rAB.unit();
660  Vector BC = rBC.unit();
661  Vector CA = rCA.unit();
662 
663  BigReal cosA = -AB * CA;
664  BigReal cosB = -BC * AB;
665  BigReal cosC = -CA * BC;
666 
667  BigReal vab = (vel[1]-vel[0])*AB;
668  BigReal vbc = (vel[2]-vel[1])*BC;
669  BigReal vca = (vel[0]-vel[2])*CA;
670 
671  BigReal mab = ma+mb;
672 
673  BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
674  - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
675 
676  BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
677  vbc*(mb*cosC*cosA - mab*cosB) +
678  vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
679 
680  BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
681  vca*ma*(mb*cosA*cosB - mab*cosC) +
682  vab*ma*(mb*cosC*cosA - mab*cosB))/d;
683 
684  BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
685  vab*(ma*cosB*cosC - 2*mb*cosA) +
686  vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
687 
688  Vector ga = tab*AB - tca*CA;
689  Vector gb = tbc*BC - tab*AB;
690  Vector gc = tca*CA - tbc*BC;
691 #if 0
692  if (virial) {
693  *virial += 0.5*outer(tab, rAB)/dt;
694  *virial += 0.5*outer(tbc, rBC)/dt;
695  *virial += 0.5*outer(tca, rCA)/dt;
696  }
697 #endif
698  vel[0] += (0.5/ma)*ga;
699  vel[1] += (0.5/mb)*gb;
700  vel[2] += (0.5/mb)*gc;
701 
702  return 0;
703 }
704 
705 int settle2(BigReal mO, BigReal mH, const Vector *pos,
706  Vector *vel, BigReal dt, Tensor *virial) {
707 
708  settlev(pos, mO, mH, vel, dt, virial);
709  return 0;
710 }
711 
template void settle1_SIMD< 1 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel, BigReal dt, Tensor *virial)
Definition: Settle.C:652
int ib
Definition: Settle.h:57
void rattlePair(const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, bool &consFailure)
Definition: Settle.C:546
Definition: Vector.h:64
__device__ __forceinline__ float3 cross(const float3 v1, const float3 v2)
BigReal z
Definition: Vector.h:66
BigReal dsq
Definition: Settle.h:58
template void settle1_SIMD< 2 >(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
int settle2(BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial)
Definition: Settle.C:705
template void rattlePair< 1 >(const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, bool &consFailure)
BigReal x
Definition: Vector.h:66
Tensor outer(const Vector &v1, const Vector &v2)
Definition: Tensor.h:241
int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
optimized settle1 algorithm, reuses water properties as much as possible
Definition: Settle.C:55
BigReal rmb
Definition: Settle.h:60
Definition: Tensor.h:15
BigReal y
Definition: Vector.h:66
BigReal rma
Definition: Settle.h:59
void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist, BigReal &mOrmT, BigReal &mHrmT, BigReal &ra, BigReal &rb, BigReal &rc, BigReal &rra)
initialize cached water properties
Definition: Settle.C:40
int ia
Definition: Settle.h:56
void settle1_SIMD(const Vector *ref, Vector *pos, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
Definition: Settle.C:285
void rattleN(const int icnt, const RattleParam *rattleParam, const BigReal *refx, const BigReal *refy, const BigReal *refz, BigReal *posx, BigReal *posy, BigReal *posz, const BigReal tol2, const int maxiter, bool &done, bool &consFailure)
Definition: Settle.C:591
gridSize x
Vector unit(void) const
Definition: Vector.h:182
double BigReal
Definition: common.h:114