00001
00007
00008 #include "Settle.h"
00009 #include <string.h>
00010 #include <math.h>
00011
00012
00013 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00014 #include <emmintrin.h>
00015 #if defined(__INTEL_COMPILER)
00016 #define __align(X) __declspec(align(X) )
00017 #elif defined(__PGI)
00018 #define __align(X) __attribute__((aligned(X) ))
00019 #define MISSING_mm_cvtsd_f64
00020 #elif defined(__GNUC__)
00021 #define __align(X) __attribute__((aligned(X) ))
00022 #if (__GNUC__ < 4)
00023 #define MISSING_mm_cvtsd_f64
00024 #endif
00025 #else
00026 #define __align(X) __declspec(align(X) )
00027 #endif
00028 #endif
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist,
00041 BigReal &mOrmT, BigReal &mHrmT, BigReal &ra,
00042 BigReal &rb, BigReal &rc, BigReal &rra) {
00043
00044 BigReal rmT = 1.0 / (pmO+pmH+pmH);
00045 mOrmT = pmO * rmT;
00046 mHrmT = pmH * rmT;
00047 BigReal t1 = 0.5*pmO/pmH;
00048 rc = 0.5*hhdist;
00049 ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00050 rb = t1*ra;
00051 rra = 1.0 / ra;
00052 }
00053
00054
00055 int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt,
00056 BigReal mOrmT, BigReal mHrmT, BigReal ra,
00057 BigReal rb, BigReal rc, BigReal rra) {
00058 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070 Vector b0, c0;
00071
00072 __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x);
00073 __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x);
00074
00075 __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00076 _mm_storeu_pd((double *) &b0.x, B0xy);
00077 b0.z = ref[1].z - ref[0].z;
00078
00079 __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x);
00080
00081 __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00082 _mm_storeu_pd((double *) &c0.x, C0xy);
00083 c0.z = ref[2].z - ref[0].z;
00084
00085
00086
00087 __align(16) Vector a1;
00088 __align(16) Vector b1;
00089 __align(16) Vector c1;
00090 __align(16) Vector d0;
00091
00092 __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00093 __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00094 __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00095
00096 __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00097 __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00098 __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00099
00100 d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00101 a1.z = pos[0].z - d0.z;
00102 b1.z = pos[1].z - d0.z;
00103 c1.z = pos[2].z - d0.z;
00104
00105 __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00106 _mm_store_pd((double *) &a1.x, A1xy);
00107
00108 __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00109 _mm_store_pd((double *) &b1.x, B1xy);
00110
00111 __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00112 _mm_store_pd((double *) &c1.x, C1xy);
00113
00114 _mm_store_pd((double *) &d0.x, D0xy);
00115
00116
00117
00118 Vector n0 = cross(b0, c0);
00119 Vector n1 = cross(a1, n0);
00120 Vector n2 = cross(n0, n1);
00121 #else
00122
00123 Vector b0 = ref[1]-ref[0];
00124 Vector c0 = ref[2]-ref[0];
00125
00126
00127 Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00128
00129 Vector a1 = pos[0] - d0;
00130 Vector b1 = pos[1] - d0;
00131 Vector c1 = pos[2] - d0;
00132
00133
00134
00135 Vector n0 = cross(b0, c0);
00136 Vector n1 = cross(a1, n0);
00137 Vector n2 = cross(n0, n1);
00138 #endif
00139
00140 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
00141 __m128d l1 = _mm_set_pd(n0.x, n0.y);
00142 l1 = _mm_mul_pd(l1, l1);
00143
00144 double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00145
00146 __m128d l3 = _mm_set_pd(n1.y, n1.z);
00147 l3 = _mm_mul_pd(l3, l3);
00148
00149 double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00150
00151 __m128d l2 = _mm_set_pd(n1.x, n0.z);
00152
00153 __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00154
00155 __m128d l4 = _mm_set_pd(n2.x, n2.y);
00156 l4 = _mm_mul_pd(l4, l4);
00157
00158 double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00159 double ts2 = l4xy2 + (n2.z * n2.z);
00160
00161 double invlens[4];
00162
00163
00164 static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00165
00166
00167 __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00168
00169
00170 _mm_storeu_pd(invlens, invlen12);
00171
00172 n0 = n0 * invlens[0];
00173
00174
00175
00176
00177 BigReal A1Z = n0 * a1;
00178 BigReal sinphi = A1Z * rra;
00179 BigReal tmp = 1.0-sinphi*sinphi;
00180
00181 __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00182
00183 _mm_storeu_pd(invlens+2, n2cosphi);
00184
00185 n1 = n1 * invlens[1];
00186 n2 = n2 * (1.0 / invlens[2]);
00187 BigReal cosphi = invlens[3];
00188
00189 b0 = Vector(n1*b0, n2*b0, n0*b0);
00190 c0 = Vector(n1*c0, n2*c0, n0*c0);
00191
00192 b1 = Vector(n1*b1, n2*b1, n0*b1);
00193 c1 = Vector(n1*c1, n2*c1, n0*c1);
00194
00195
00196 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00197 tmp = 1.0-sinpsi*sinpsi;
00198 BigReal cospsi = sqrt(tmp);
00199 #else
00200 n0 = n0.unit();
00201 n1 = n1.unit();
00202 n2 = n2.unit();
00203
00204 b0 = Vector(n1*b0, n2*b0, n0*b0);
00205 c0 = Vector(n1*c0, n2*c0, n0*c0);
00206
00207 BigReal A1Z = n0 * a1;
00208 b1 = Vector(n1*b1, n2*b1, n0*b1);
00209 c1 = Vector(n1*c1, n2*c1, n0*c1);
00210
00211
00212 BigReal sinphi = A1Z * rra;
00213 BigReal tmp = 1.0-sinphi*sinphi;
00214 BigReal cosphi = sqrt(tmp);
00215 BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00216 tmp = 1.0-sinpsi*sinpsi;
00217 BigReal cospsi = sqrt(tmp);
00218 #endif
00219
00220 BigReal rbphi = -rb*cosphi;
00221 BigReal tmp1 = rc*sinpsi*sinphi;
00222 BigReal tmp2 = rc*sinpsi*cosphi;
00223
00224 Vector a2(0, ra*cosphi, ra*sinphi);
00225 Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00226 Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00227
00228
00229
00230 BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00231 BigReal beta = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00232 BigReal gama = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00233
00234 BigReal a2b2 = alpha*alpha + beta*beta;
00235 BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00236 BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00237
00238 #if 0
00239 Vector a3( -a2.y*sintheta,
00240 a2.y*costheta,
00241 a2.z);
00242 Vector b3(b2.x*costheta - b2.y*sintheta,
00243 b2.x*sintheta + b2.y*costheta,
00244 b2.z);
00245 Vector c3(c2.x*costheta - c2.y*sintheta,
00246 c2.x*sintheta + c2.y*costheta,
00247 c2.z);
00248
00249 #else
00250 Vector a3( -a2.y*sintheta,
00251 a2.y*costheta,
00252 A1Z);
00253 Vector b3(b2.x*costheta - b2.y*sintheta,
00254 b2.x*sintheta + b2.y*costheta,
00255 b1.z);
00256 Vector c3(-b2.x*costheta - c2.y*sintheta,
00257 -b2.x*sintheta + c2.y*costheta,
00258 c1.z);
00259
00260 #endif
00261
00262
00263 Vector m1(n1.x, n2.x, n0.x);
00264 Vector m2(n1.y, n2.y, n0.y);
00265 Vector m0(n1.z, n2.z, n0.z);
00266
00267 pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00268 pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00269 pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00270
00271
00272 if (invdt != 0) {
00273 vel[0] = (pos[0]-ref[0])*invdt;
00274 vel[1] = (pos[1]-ref[1])*invdt;
00275 vel[2] = (pos[2]-ref[2])*invdt;
00276 }
00277
00278 return 0;
00279 }
00280
00281
00282
00283 static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel,
00284 BigReal dt, Tensor *virial) {
00285
00286 Vector rAB = pos[1]-pos[0];
00287 Vector rBC = pos[2]-pos[1];
00288 Vector rCA = pos[0]-pos[2];
00289
00290 Vector AB = rAB.unit();
00291 Vector BC = rBC.unit();
00292 Vector CA = rCA.unit();
00293
00294 BigReal cosA = -AB * CA;
00295 BigReal cosB = -BC * AB;
00296 BigReal cosC = -CA * BC;
00297
00298 BigReal vab = (vel[1]-vel[0])*AB;
00299 BigReal vbc = (vel[2]-vel[1])*BC;
00300 BigReal vca = (vel[0]-vel[2])*CA;
00301
00302 BigReal mab = ma+mb;
00303
00304 BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
00305 - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
00306
00307 BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
00308 vbc*(mb*cosC*cosA - mab*cosB) +
00309 vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
00310
00311 BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
00312 vca*ma*(mb*cosA*cosB - mab*cosC) +
00313 vab*ma*(mb*cosC*cosA - mab*cosB))/d;
00314
00315 BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
00316 vab*(ma*cosB*cosC - 2*mb*cosA) +
00317 vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
00318
00319 Vector ga = tab*AB - tca*CA;
00320 Vector gb = tbc*BC - tab*AB;
00321 Vector gc = tca*CA - tbc*BC;
00322 #if 0
00323 if (virial) {
00324 *virial += 0.5*outer(tab, rAB)/dt;
00325 *virial += 0.5*outer(tbc, rBC)/dt;
00326 *virial += 0.5*outer(tca, rCA)/dt;
00327 }
00328 #endif
00329 vel[0] += (0.5/ma)*ga;
00330 vel[1] += (0.5/mb)*gb;
00331 vel[2] += (0.5/mb)*gc;
00332
00333 return 0;
00334 }
00335
00336
00337 int settle2(BigReal mO, BigReal mH, const Vector *pos,
00338 Vector *vel, BigReal dt, Tensor *virial) {
00339
00340 settlev(pos, mO, mH, vel, dt, virial);
00341 return 0;
00342 }
00343