#include "Settle.h"#include <string.h>#include <math.h>Go to the source code of this file.
Functions | |
| int | settle1isinitted (void) |
| return whether or not the settle algorithm is ready to use | |
| int | settle1init (BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist) |
| initialize cached water properties | |
| int | settle1 (const Vector *ref, Vector *pos, Vector *vel, BigReal invdt) |
| optimized settle1 algorithm, reuses water properties as much as possible | |
| int | settlev (const Vector *pos, BigReal ma, BigReal mb, Vector *vel, BigReal dt, Tensor *virial) |
| int | settle2 (BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial) |
Variables | |
| BigReal | mOrmT |
| BigReal | mHrmT |
| BigReal | ra |
| BigReal | rb |
| BigReal | rc |
| BigReal | rra |
| int | settle_first_time = 1 |
|
||||||||||||||||||||
|
optimized settle1 algorithm, reuses water properties as much as possible
Definition at line 66 of file Settle.C. References BigReal, mHrmT, mOrmT, ra, rb, rc, Vector::unit(), Vector::x, Vector::y, and Vector::z. Referenced by HomePatch::rattle1(). 00066 {
00067 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00068 // SSE acceleration of some of the costly parts of settle using
00069 // the Intel C/C++ classes. This implementation uses the SSE units
00070 // less efficiency than is potentially possible, but in order to do
00071 // better, the settle algorithm will have to be vectorized and operate
00072 // on multiple waters at a time. Doing so could give us the ability to
00073 // do two (double precison) or four (single precision) waters at a time.
00074 // This code achieves a modest speedup without the need to reorganize
00075 // the NAMD structure. Once we have water molecules sorted in a single
00076 // block we can do far better.
00077
00078 // vectors in the plane of the original positions
00079 Vector b0, c0;
00080
00081 __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x); // ref0.y and ref0.x
00082 __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x); // ref1.y and ref1.x
00083
00084 __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00085 _mm_storeu_pd((double *) &b0.x, B0xy);
00086 b0.z = ref[1].z - ref[0].z;
00087
00088 __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x); // ref2.y and ref2.x
00089
00090 __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00091 _mm_storeu_pd((double *) &c0.x, C0xy);
00092 c0.z = ref[2].z - ref[0].z;
00093
00094 // new center of mass
00095 // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
00096 __align(16) Vector a1;
00097 __align(16) Vector b1;
00098 __align(16) Vector c1;
00099 __align(16) Vector d0;
00100
00101 __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00102 __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00103 __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00104
00105 __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00106 __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00107 __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00108
00109 d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00110 a1.z = pos[0].z - d0.z;
00111 b1.z = pos[1].z - d0.z;
00112 c1.z = pos[2].z - d0.z;
00113
00114 __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00115 _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
00116
00117 __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00118 _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
00119
00120 __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00121 _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
00122
00123 _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
00124
00125 // Vectors describing transformation from original coordinate system to
00126 // the 'primed' coordinate system as in the diagram.
00127 Vector n0 = cross(b0, c0);
00128 Vector n1 = cross(a1, n0);
00129 Vector n2 = cross(n0, n1);
00130 #else
00131 // vectors in the plane of the original positions
00132 Vector b0 = ref[1]-ref[0];
00133 Vector c0 = ref[2]-ref[0];
00134
00135 // new center of mass
00136 Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00137
00138 Vector a1 = pos[0] - d0;
00139 Vector b1 = pos[1] - d0;
00140 Vector c1 = pos[2] - d0;
00141
00142 // Vectors describing transformation from original coordinate system to
00143 // the 'primed' coordinate system as in the diagram.
00144 Vector n0 = cross(b0, c0);
00145 Vector n1 = cross(a1, n0);
00146 Vector n2 = cross(n0, n1);
00147 #endif
00148
00149 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
00150 __m128d l1 = _mm_set_pd(n0.x, n0.y);
00151 l1 = _mm_mul_pd(l1, l1);
00152 // n0.x^2 + n0.y^2
00153 double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00154
00155 __m128d l3 = _mm_set_pd(n1.y, n1.z);
00156 l3 = _mm_mul_pd(l3, l3);
00157 // n1.y^2 + n1.z^2
00158 double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00159
00160 __m128d l2 = _mm_set_pd(n1.x, n0.z);
00161 // len(n1)^2 and len(n0)^2
00162 __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00163
00164 __m128d l4 = _mm_set_pd(n2.x, n2.y);
00165 l4 = _mm_mul_pd(l4, l4);
00166 // n2.x^2 + n2.y^2
00167 double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00168 double ts2 = l4xy2 + (n2.z * n2.z); // len(n2)^2
00169
00170 double invlens[4];
00171 // since rsqrt_nr() doesn't work with current compiler
00172 // this is the next best option
00173 static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00174
00175 // 1/len(n1) and 1/len(n0)
00176 __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00177
00178 // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
00179 _mm_storeu_pd(invlens, invlen12);
00180
00181 n0 = n0 * invlens[0];
00182
00183 // shuffle the order of operations around from the normal algorithm so
00184 // that we can double pump sqrt() with n2 and cosphi at the same time
00185 // these components are usually computed down in the canonical water block
00186 BigReal A1Z = n0 * a1;
00187 BigReal sinphi = A1Z * rra;
00188 BigReal tmp = 1.0-sinphi*sinphi;
00189
00190 __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00191 // invlens[2] = 1/len(n2), invlens[3] = cosphi
00192 _mm_storeu_pd(invlens+2, n2cosphi);
00193
00194 n1 = n1 * invlens[1];
00195 n2 = n2 * (1.0 / invlens[2]);
00196 BigReal cosphi = invlens[3];
00197
00198 b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00199 c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00200
00201 b1 = Vector(n1*b1, n2*b1, n0*b1);
00202 c1 = Vector(n1*c1, n2*c1, n0*c1);
00203
00204 // now we can compute positions of canonical water
00205 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00206 tmp = 1.0-sinpsi*sinpsi;
00207 BigReal cospsi = sqrt(tmp);
00208 #else
00209 n0 = n0.unit();
00210 n1 = n1.unit();
00211 n2 = n2.unit();
00212
00213 b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00214 c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00215
00216 BigReal A1Z = n0 * a1;
00217 b1 = Vector(n1*b1, n2*b1, n0*b1);
00218 c1 = Vector(n1*c1, n2*c1, n0*c1);
00219
00220 // now we can compute positions of canonical water
00221 BigReal sinphi = A1Z * rra;
00222 BigReal tmp = 1.0-sinphi*sinphi;
00223 BigReal cosphi = sqrt(tmp);
00224 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00225 tmp = 1.0-sinpsi*sinpsi;
00226 BigReal cospsi = sqrt(tmp);
00227 #endif
00228
00229 BigReal rbphi = -rb*cosphi;
00230 BigReal tmp1 = rc*sinpsi*sinphi;
00231 BigReal tmp2 = rc*sinpsi*cosphi;
00232
00233 Vector a2(0, ra*cosphi, ra*sinphi);
00234 Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00235 Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00236
00237 // there are no a0 terms because we've already subtracted the term off
00238 // when we first defined b0 and c0.
00239 BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00240 BigReal beta = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00241 BigReal gama = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00242
00243 BigReal a2b2 = alpha*alpha + beta*beta;
00244 BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00245 BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00246
00247 #if 0
00248 Vector a3( -a2.y*sintheta,
00249 a2.y*costheta,
00250 a2.z);
00251 Vector b3(b2.x*costheta - b2.y*sintheta,
00252 b2.x*sintheta + b2.y*costheta,
00253 b2.z);
00254 Vector c3(c2.x*costheta - c2.y*sintheta,
00255 c2.x*sintheta + c2.y*costheta,
00256 c2.z);
00257
00258 #else
00259 Vector a3( -a2.y*sintheta,
00260 a2.y*costheta,
00261 A1Z);
00262 Vector b3(b2.x*costheta - b2.y*sintheta,
00263 b2.x*sintheta + b2.y*costheta,
00264 b1.z);
00265 Vector c3(-b2.x*costheta - c2.y*sintheta,
00266 -b2.x*sintheta + c2.y*costheta,
00267 c1.z);
00268
00269 #endif
00270
00271 // undo the transformation; generate new normal vectors from the transpose.
00272 Vector m1(n1.x, n2.x, n0.x);
00273 Vector m2(n1.y, n2.y, n0.y);
00274 Vector m0(n1.z, n2.z, n0.z);
00275
00276 pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00277 pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00278 pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00279
00280 // dt can be negative during startup!
00281 if (invdt != 0) {
00282 vel[0] = (pos[0]-ref[0])*invdt;
00283 vel[1] = (pos[1]-ref[1])*invdt;
00284 vel[2] = (pos[2]-ref[2])*invdt;
00285 }
00286
00287 return 0;
00288 }
|
|
||||||||||||||||||||
|
initialize cached water properties
Definition at line 49 of file Settle.C. References BigReal, mHrmT, mOrmT, ra, rb, rc, rra, and settle_first_time. Referenced by HomePatch::rattle1(). 00049 {
00050 if (settle_first_time) {
00051 settle_first_time = 0;
00052 BigReal rmT = 1.0 / (pmO+pmH+pmH);
00053 mOrmT = pmO * rmT;
00054 mHrmT = pmH * rmT;
00055 BigReal t1 = 0.5*pmO/pmH;
00056 rc = 0.5*hhdist;
00057 ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00058 rb = t1*ra;
00059 rra = 1.0 / ra;
00060 }
00061
00062 return 0;
00063 }
|
|
|
return whether or not the settle algorithm is ready to use Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 42 of file Settle.C. Referenced by HomePatch::rattle1(). 00042 {
00043 return !settle_first_time;
00044 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 346 of file Settle.C. References settlev(). Referenced by HomePatch::rattle2(). 00347 {
00348
00349 settlev(pos, mO, mH, vel, dt, virial);
00350 return 0;
00351 }
|
|
||||||||||||||||||||||||||||
|
Definition at line 292 of file Settle.C. References BigReal, outer(), and Vector::unit(). Referenced by settle2(). 00293 {
00294
00295 Vector rAB = pos[1]-pos[0];
00296 Vector rBC = pos[2]-pos[1];
00297 Vector rCA = pos[0]-pos[2];
00298
00299 Vector AB = rAB.unit();
00300 Vector BC = rBC.unit();
00301 Vector CA = rCA.unit();
00302
00303 BigReal cosA = -AB * CA;
00304 BigReal cosB = -BC * AB;
00305 BigReal cosC = -CA * BC;
00306
00307 BigReal vab = (vel[1]-vel[0])*AB;
00308 BigReal vbc = (vel[2]-vel[1])*BC;
00309 BigReal vca = (vel[0]-vel[2])*CA;
00310
00311 BigReal mab = ma+mb;
00312
00313 BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
00314 - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
00315
00316 BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
00317 vbc*(mb*cosC*cosA - mab*cosB) +
00318 vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
00319
00320 BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
00321 vca*ma*(mb*cosA*cosB - mab*cosC) +
00322 vab*ma*(mb*cosC*cosA - mab*cosB))/d;
00323
00324 BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
00325 vab*(ma*cosB*cosC - 2*mb*cosA) +
00326 vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
00327
00328 Vector ga = tab*AB - tca*CA;
00329 Vector gb = tbc*BC - tab*AB;
00330 Vector gc = tca*CA - tbc*BC;
00331 #if 0
00332 if (virial) {
00333 *virial += 0.5*outer(tab, rAB)/dt;
00334 *virial += 0.5*outer(tbc, rBC)/dt;
00335 *virial += 0.5*outer(tca, rCA)/dt;
00336 }
00337 #endif
00338 vel[0] += (0.5/ma)*ga;
00339 vel[1] += (0.5/mb)*gb;
00340 vel[2] += (0.5/mb)*gc;
00341
00342 return 0;
00343 }
|
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 37 of file Settle.C. Referenced by settle1(), and settle1init(). |
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 37 of file Settle.C. Referenced by settle1(), and settle1init(). |
|
|
Definition at line 38 of file Settle.C. Referenced by settle1(), and settle1init(). |
|
|
Definition at line 38 of file Settle.C. Referenced by settle1(), and settle1init(). |
|
|
Definition at line 38 of file Settle.C. Referenced by settle1(), and settle1init(). |
|
|
Definition at line 39 of file Settle.C. Referenced by settle1init(). |
|
|
Definition at line 40 of file Settle.C. Referenced by settle1init(). |
1.3.9.1