#include "Vector.h"#include "Tensor.h"Go to the source code of this file.
Functions | |
| void | settle1init (BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist, BigReal &mOrmT, BigReal &mHrmT, BigReal &ra, BigReal &rb, BigReal &rc, BigReal &rra) |
| initialize cached water properties | |
| int | settle1 (const Vector *ref, Vector *pos, Vector *vel, BigReal invdt, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra) |
| optimized settle1 algorithm, reuses water properties as much as possible | |
| int | settle2 (BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial) |
|
||||||||||||||||||||||||||||||||||||||||||||
|
optimized settle1 algorithm, reuses water properties as much as possible
Definition at line 55 of file Settle.C. References BigReal, Vector::unit(), Vector::x, Vector::y, and Vector::z. Referenced by HomePatch::rattle1(). 00057 {
00058 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00059 // SSE acceleration of some of the costly parts of settle using
00060 // the Intel C/C++ classes. This implementation uses the SSE units
00061 // less efficiency than is potentially possible, but in order to do
00062 // better, the settle algorithm will have to be vectorized and operate
00063 // on multiple waters at a time. Doing so could give us the ability to
00064 // do two (double precison) or four (single precision) waters at a time.
00065 // This code achieves a modest speedup without the need to reorganize
00066 // the NAMD structure. Once we have water molecules sorted in a single
00067 // block we can do far better.
00068
00069 // vectors in the plane of the original positions
00070 Vector b0, c0;
00071
00072 __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x); // ref0.y and ref0.x
00073 __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x); // ref1.y and ref1.x
00074
00075 __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00076 _mm_storeu_pd((double *) &b0.x, B0xy);
00077 b0.z = ref[1].z - ref[0].z;
00078
00079 __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x); // ref2.y and ref2.x
00080
00081 __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00082 _mm_storeu_pd((double *) &c0.x, C0xy);
00083 c0.z = ref[2].z - ref[0].z;
00084
00085 // new center of mass
00086 // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
00087 __align(16) Vector a1;
00088 __align(16) Vector b1;
00089 __align(16) Vector c1;
00090 __align(16) Vector d0;
00091
00092 __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00093 __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00094 __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00095
00096 __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00097 __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00098 __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00099
00100 d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00101 a1.z = pos[0].z - d0.z;
00102 b1.z = pos[1].z - d0.z;
00103 c1.z = pos[2].z - d0.z;
00104
00105 __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00106 _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
00107
00108 __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00109 _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
00110
00111 __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00112 _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
00113
00114 _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
00115
00116 // Vectors describing transformation from original coordinate system to
00117 // the 'primed' coordinate system as in the diagram.
00118 Vector n0 = cross(b0, c0);
00119 Vector n1 = cross(a1, n0);
00120 Vector n2 = cross(n0, n1);
00121 #else
00122 // vectors in the plane of the original positions
00123 Vector b0 = ref[1]-ref[0];
00124 Vector c0 = ref[2]-ref[0];
00125
00126 // new center of mass
00127 Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00128
00129 Vector a1 = pos[0] - d0;
00130 Vector b1 = pos[1] - d0;
00131 Vector c1 = pos[2] - d0;
00132
00133 // Vectors describing transformation from original coordinate system to
00134 // the 'primed' coordinate system as in the diagram.
00135 Vector n0 = cross(b0, c0);
00136 Vector n1 = cross(a1, n0);
00137 Vector n2 = cross(n0, n1);
00138 #endif
00139
00140 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
00141 __m128d l1 = _mm_set_pd(n0.x, n0.y);
00142 l1 = _mm_mul_pd(l1, l1);
00143 // n0.x^2 + n0.y^2
00144 double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00145
00146 __m128d l3 = _mm_set_pd(n1.y, n1.z);
00147 l3 = _mm_mul_pd(l3, l3);
00148 // n1.y^2 + n1.z^2
00149 double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00150
00151 __m128d l2 = _mm_set_pd(n1.x, n0.z);
00152 // len(n1)^2 and len(n0)^2
00153 __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00154
00155 __m128d l4 = _mm_set_pd(n2.x, n2.y);
00156 l4 = _mm_mul_pd(l4, l4);
00157 // n2.x^2 + n2.y^2
00158 double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00159 double ts2 = l4xy2 + (n2.z * n2.z); // len(n2)^2
00160
00161 double invlens[4];
00162 // since rsqrt_nr() doesn't work with current compiler
00163 // this is the next best option
00164 static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00165
00166 // 1/len(n1) and 1/len(n0)
00167 __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00168
00169 // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
00170 _mm_storeu_pd(invlens, invlen12);
00171
00172 n0 = n0 * invlens[0];
00173
00174 // shuffle the order of operations around from the normal algorithm so
00175 // that we can double pump sqrt() with n2 and cosphi at the same time
00176 // these components are usually computed down in the canonical water block
00177 BigReal A1Z = n0 * a1;
00178 BigReal sinphi = A1Z * rra;
00179 BigReal tmp = 1.0-sinphi*sinphi;
00180
00181 __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00182 // invlens[2] = 1/len(n2), invlens[3] = cosphi
00183 _mm_storeu_pd(invlens+2, n2cosphi);
00184
00185 n1 = n1 * invlens[1];
00186 n2 = n2 * (1.0 / invlens[2]);
00187 BigReal cosphi = invlens[3];
00188
00189 b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00190 c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00191
00192 b1 = Vector(n1*b1, n2*b1, n0*b1);
00193 c1 = Vector(n1*c1, n2*c1, n0*c1);
00194
00195 // now we can compute positions of canonical water
00196 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00197 tmp = 1.0-sinpsi*sinpsi;
00198 BigReal cospsi = sqrt(tmp);
00199 #else
00200 n0 = n0.unit();
00201 n1 = n1.unit();
00202 n2 = n2.unit();
00203
00204 b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00205 c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00206
00207 BigReal A1Z = n0 * a1;
00208 b1 = Vector(n1*b1, n2*b1, n0*b1);
00209 c1 = Vector(n1*c1, n2*c1, n0*c1);
00210
00211 // now we can compute positions of canonical water
00212 BigReal sinphi = A1Z * rra;
00213 BigReal tmp = 1.0-sinphi*sinphi;
00214 BigReal cosphi = sqrt(tmp);
00215 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00216 tmp = 1.0-sinpsi*sinpsi;
00217 BigReal cospsi = sqrt(tmp);
00218 #endif
00219
00220 BigReal rbphi = -rb*cosphi;
00221 BigReal tmp1 = rc*sinpsi*sinphi;
00222 BigReal tmp2 = rc*sinpsi*cosphi;
00223
00224 Vector a2(0, ra*cosphi, ra*sinphi);
00225 Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00226 Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00227
00228 // there are no a0 terms because we've already subtracted the term off
00229 // when we first defined b0 and c0.
00230 BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00231 BigReal beta = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00232 BigReal gama = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00233
00234 BigReal a2b2 = alpha*alpha + beta*beta;
00235 BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00236 BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00237
00238 #if 0
00239 Vector a3( -a2.y*sintheta,
00240 a2.y*costheta,
00241 a2.z);
00242 Vector b3(b2.x*costheta - b2.y*sintheta,
00243 b2.x*sintheta + b2.y*costheta,
00244 b2.z);
00245 Vector c3(c2.x*costheta - c2.y*sintheta,
00246 c2.x*sintheta + c2.y*costheta,
00247 c2.z);
00248
00249 #else
00250 Vector a3( -a2.y*sintheta,
00251 a2.y*costheta,
00252 A1Z);
00253 Vector b3(b2.x*costheta - b2.y*sintheta,
00254 b2.x*sintheta + b2.y*costheta,
00255 b1.z);
00256 Vector c3(-b2.x*costheta - c2.y*sintheta,
00257 -b2.x*sintheta + c2.y*costheta,
00258 c1.z);
00259
00260 #endif
00261
00262 // undo the transformation; generate new normal vectors from the transpose.
00263 Vector m1(n1.x, n2.x, n0.x);
00264 Vector m2(n1.y, n2.y, n0.y);
00265 Vector m0(n1.z, n2.z, n0.z);
00266
00267 pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00268 pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00269 pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00270
00271 // dt can be negative during startup!
00272 if (invdt != 0) {
00273 vel[0] = (pos[0]-ref[0])*invdt;
00274 vel[1] = (pos[1]-ref[1])*invdt;
00275 vel[2] = (pos[2]-ref[2])*invdt;
00276 }
00277
00278 return 0;
00279 }
|
|
||||||||||||||||||||||||||||||||||||||||||||
|
initialize cached water properties Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 40 of file Settle.C. References BigReal. Referenced by HomePatch::rattle1(). 00042 {
00043
00044 BigReal rmT = 1.0 / (pmO+pmH+pmH);
00045 mOrmT = pmO * rmT;
00046 mHrmT = pmH * rmT;
00047 BigReal t1 = 0.5*pmO/pmH;
00048 rc = 0.5*hhdist;
00049 ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00050 rb = t1*ra;
00051 rra = 1.0 / ra;
00052 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 337 of file Settle.C. References settlev(). Referenced by HomePatch::rattle2(). 00338 {
00339
00340 settlev(pos, mO, mH, vel, dt, virial);
00341 return 0;
00342 }
|
1.3.9.1