Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

Settle.C File Reference

#include "Settle.h"
#include <string.h>
#include <math.h>

Go to the source code of this file.

Functions

int settle1isinitted (void)
 return whether or not the settle algorithm is ready to use
int settle1init (BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist)
 initialize cached water properties
int settle1 (const Vector *ref, Vector *pos, Vector *vel, BigReal invdt)
 optimized settle1 algorithm, reuses water properties as much as possible
int settlev (const Vector *pos, BigReal ma, BigReal mb, Vector *vel, BigReal dt, Tensor *virial)
int settle2 (BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial)

Variables

BigReal mOrmT
BigReal mHrmT
BigReal ra
BigReal rb
BigReal rc
BigReal rra
int settle_first_time = 1


Function Documentation

int settle1 const Vector ref,
Vector pos,
Vector vel,
BigReal  invdt
 

optimized settle1 algorithm, reuses water properties as much as possible

Definition at line 53 of file Settle.C.

References BigReal, mHrmT, mOrmT, ra, rb, rc, Vector::unit(), Vector::x, Vector::y, and Vector::z.

Referenced by HomePatch::rattle1().

00053                                                                         {
00054 #if defined(NAMD_SSE) && defined(__INTEL_COMPILER) && defined(__SSE2__)
00055   // SSE acceleration of some of the costly parts of settle using
00056   // the Intel C/C++ classes.  This implementation uses the SSE units
00057   // less efficiency than is potentially possible, but in order to do
00058   // better, the settle algorithm will have to be vectorized and operate
00059   // on multiple waters at a time.  Doing so could give us the ability to
00060   // do two (double precison) or four (single precision) waters at a time.
00061   // This code achieves a modest speedup without the need to reorganize
00062   // the NAMD structure.  Once we have water molecules sorted in a single
00063   // block we can do far better.
00064 
00065   // vectors in the plane of the original positions
00066   Vector b0, c0;
00067 
00068   __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x);  // ref0.y and ref0.x
00069   __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x);  // ref1.y and ref1.x
00070 
00071   __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00072   _mm_storeu_pd((double *) &b0.x, B0xy);
00073   b0.z = ref[1].z - ref[0].z;
00074 
00075   __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x);  // ref2.y and ref2.x
00076 
00077   __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00078   _mm_storeu_pd((double *) &c0.x, C0xy);
00079   c0.z = ref[2].z - ref[0].z;
00080 
00081   // new center of mass
00082   // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
00083   __declspec(align(16)) Vector a1;
00084   __declspec(align(16)) Vector b1;
00085   __declspec(align(16)) Vector c1;
00086   __declspec(align(16)) Vector d0;
00087 
00088   __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00089   __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00090   __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00091 
00092   __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00093   __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00094   __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00095 
00096   d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00097   a1.z = pos[0].z - d0.z;
00098   b1.z = pos[1].z - d0.z;
00099   c1.z = pos[2].z - d0.z;
00100 
00101   __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00102   _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
00103 
00104   __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00105   _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
00106 
00107   __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00108   _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
00109 
00110   _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
00111   
00112   // Vectors describing transformation from original coordinate system to
00113   // the 'primed' coordinate system as in the diagram.  
00114   Vector n0 = cross(b0, c0);
00115   Vector n1 = cross(a1, n0); 
00116   Vector n2 = cross(n0, n1); 
00117 #else
00118   // vectors in the plane of the original positions
00119   Vector b0 = ref[1]-ref[0];
00120   Vector c0 = ref[2]-ref[0];
00121   
00122   // new center of mass
00123   Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00124  
00125   Vector a1 = pos[0] - d0;
00126   Vector b1 = pos[1] - d0;
00127   Vector c1 = pos[2] - d0;
00128   
00129   // Vectors describing transformation from original coordinate system to
00130   // the 'primed' coordinate system as in the diagram.  
00131   Vector n0 = cross(b0, c0);
00132   Vector n1 = cross(a1, n0); 
00133   Vector n2 = cross(n0, n1); 
00134 #endif
00135 
00136 #if defined(NAMD_SSE) && defined(__INTEL_COMPILER) && defined(__SSE2__)
00137   __m128d l1 = _mm_set_pd(n0.x, n0.y);
00138   l1 = _mm_mul_pd(l1, l1);
00139   // n0.x^2 + n0.y^2
00140   double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00141 
00142   __m128d l3 = _mm_set_pd(n1.y, n1.z);
00143   l3 = _mm_mul_pd(l3, l3);
00144   // n1.y^2 + n1.z^2
00145   double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00146 
00147   __m128d l2 = _mm_set_pd(n1.x, n0.z);
00148   // len(n1)^2 and len(n0)^2 
00149   __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00150 
00151   __m128d l4 = _mm_set_pd(n2.x, n2.y);
00152   l4 = _mm_mul_pd(l4, l4);
00153   // n2.x^2 + n2.y^2
00154   double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00155   double ts2 = l4xy2 + (n2.z * n2.z);              // len(n2)^2
00156 
00157   double  invlens[4];
00158   // since rsqrt_nr() doesn't work with current compiler
00159   // this is the next best option 
00160   static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00161 
00162   // 1/len(n1) and 1/len(n0)
00163   __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00164 
00165   // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
00166   _mm_storeu_pd(invlens, invlen12);
00167 
00168   n0 = n0 * invlens[0];
00169 
00170   // shuffle the order of operations around from the normal algorithm so
00171   // that we can double pump sqrt() with n2 and cosphi at the same time
00172   // these components are usually computed down in the canonical water block
00173   BigReal A1Z = n0 * a1;
00174   BigReal sinphi = A1Z * rra;
00175   BigReal tmp = 1.0-sinphi*sinphi;
00176 
00177   __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00178   // invlens[2] = 1/len(n2), invlens[3] = cosphi
00179   _mm_storeu_pd(invlens+2, n2cosphi);
00180 
00181   n1 = n1 * invlens[1];
00182   n2 = n2 * (1.0 / invlens[2]);
00183   BigReal cosphi = invlens[3];
00184 
00185   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00186   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00187  
00188   b1 = Vector(n1*b1, n2*b1, n0*b1);
00189   c1 = Vector(n1*c1, n2*c1, n0*c1);
00190 
00191   // now we can compute positions of canonical water 
00192   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00193   tmp = 1.0-sinpsi*sinpsi;
00194   BigReal cospsi = sqrt(tmp);
00195 #else
00196   n0 = n0.unit();
00197   n1 = n1.unit();
00198   n2 = n2.unit();
00199 
00200   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00201   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00202  
00203   BigReal A1Z = n0 * a1;
00204   b1 = Vector(n1*b1, n2*b1, n0*b1);
00205   c1 = Vector(n1*c1, n2*c1, n0*c1);
00206 
00207   // now we can compute positions of canonical water 
00208   BigReal sinphi = A1Z * rra;
00209   BigReal tmp = 1.0-sinphi*sinphi;
00210   BigReal cosphi = sqrt(tmp);
00211   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00212   tmp = 1.0-sinpsi*sinpsi;
00213   BigReal cospsi = sqrt(tmp);
00214 #endif
00215 
00216   BigReal rbphi = -rb*cosphi;
00217   BigReal tmp1 = rc*sinpsi*sinphi;
00218   BigReal tmp2 = rc*sinpsi*cosphi;
00219  
00220   Vector a2(0, ra*cosphi, ra*sinphi);
00221   Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00222   Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00223 
00224   // there are no a0 terms because we've already subtracted the term off 
00225   // when we first defined b0 and c0.
00226   BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00227   BigReal beta  = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00228   BigReal gama  = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00229  
00230   BigReal a2b2 = alpha*alpha + beta*beta;
00231   BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00232   BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00233   
00234 #if 0
00235   Vector a3( -a2.y*sintheta, 
00236               a2.y*costheta,
00237               a2.z);
00238   Vector b3(b2.x*costheta - b2.y*sintheta,
00239               b2.x*sintheta + b2.y*costheta,
00240               b2.z);
00241   Vector c3(c2.x*costheta - c2.y*sintheta,
00242               c2.x*sintheta + c2.y*costheta,
00243               c2.z);
00244   
00245 #else
00246   Vector a3( -a2.y*sintheta, 
00247               a2.y*costheta,
00248               A1Z);
00249   Vector b3(b2.x*costheta - b2.y*sintheta,
00250               b2.x*sintheta + b2.y*costheta,
00251               b1.z);
00252   Vector c3(-b2.x*costheta - c2.y*sintheta,
00253             -b2.x*sintheta + c2.y*costheta,
00254               c1.z);
00255 
00256 #endif
00257 
00258   // undo the transformation; generate new normal vectors from the transpose.
00259   Vector m1(n1.x, n2.x, n0.x);
00260   Vector m2(n1.y, n2.y, n0.y);
00261   Vector m0(n1.z, n2.z, n0.z);
00262 
00263   pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00264   pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00265   pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00266 
00267   // dt can be negative during startup!
00268   if (invdt != 0) {
00269     vel[0] = (pos[0]-ref[0])*invdt;
00270     vel[1] = (pos[1]-ref[1])*invdt;
00271     vel[2] = (pos[2]-ref[2])*invdt;
00272   }
00273 
00274   return 0;
00275 }

int settle1init BigReal  pmO,
BigReal  pmH,
BigReal  hhdist,
BigReal  ohdist
 

initialize cached water properties

Definition at line 36 of file Settle.C.

References BigReal, mHrmT, mOrmT, ra, rb, rc, rra, and settle_first_time.

Referenced by HomePatch::rattle1().

00036                                                                           {
00037   if (settle_first_time) {
00038     settle_first_time = 0;
00039     BigReal rmT = 1.0 / (pmO+pmH+pmH);
00040     mOrmT = pmO * rmT;
00041     mHrmT = pmH * rmT;
00042     BigReal t1 = 0.5*pmO/pmH;
00043     rc = 0.5*hhdist;
00044     ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00045     rb = t1*ra;
00046     rra = 1.0 / ra;
00047   }
00048 
00049   return 0;
00050 }

int settle1isinitted void   ) 
 

return whether or not the settle algorithm is ready to use

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 29 of file Settle.C.

Referenced by HomePatch::rattle1().

00029                            {
00030   return !settle_first_time;
00031 }

int settle2 BigReal  mO,
BigReal  mH,
const Vector pos,
Vector vel,
BigReal  dt,
Tensor virial
 

Definition at line 333 of file Settle.C.

References settlev().

Referenced by HomePatch::rattle2().

00334                                                            {
00335 
00336   settlev(pos, mO, mH, vel, dt, virial);
00337   return 0;
00338 }

int settlev const Vector pos,
BigReal  ma,
BigReal  mb,
Vector vel,
BigReal  dt,
Tensor virial
[static]
 

Definition at line 279 of file Settle.C.

References BigReal, outer(), and Vector::unit().

Referenced by settle2().

00280                                                                {
00281   
00282   Vector rAB = pos[1]-pos[0];
00283   Vector rBC = pos[2]-pos[1];
00284   Vector rCA = pos[0]-pos[2];
00285  
00286   Vector AB = rAB.unit();
00287   Vector BC = rBC.unit();
00288   Vector CA = rCA.unit();
00289   
00290   BigReal cosA = -AB * CA;
00291   BigReal cosB = -BC * AB;
00292   BigReal cosC = -CA * BC;
00293 
00294   BigReal vab = (vel[1]-vel[0])*AB;
00295   BigReal vbc = (vel[2]-vel[1])*BC;
00296   BigReal vca = (vel[0]-vel[2])*CA;
00297 
00298   BigReal mab = ma+mb;
00299   
00300   BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
00301                - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
00302 
00303   BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
00304                 vbc*(mb*cosC*cosA - mab*cosB) +
00305                 vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
00306             
00307   BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
00308                 vca*ma*(mb*cosA*cosB - mab*cosC) +
00309                 vab*ma*(mb*cosC*cosA - mab*cosB))/d;
00310   
00311   BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
00312                 vab*(ma*cosB*cosC - 2*mb*cosA) +
00313                 vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
00314  
00315   Vector ga = tab*AB - tca*CA;
00316   Vector gb = tbc*BC - tab*AB;
00317   Vector gc = tca*CA - tbc*BC;
00318 #if 0
00319   if (virial) {
00320     *virial += 0.5*outer(tab, rAB)/dt;
00321     *virial += 0.5*outer(tbc, rBC)/dt;
00322     *virial += 0.5*outer(tca, rCA)/dt;
00323   }
00324 #endif
00325   vel[0] += (0.5/ma)*ga;
00326   vel[1] += (0.5/mb)*gb;
00327   vel[2] += (0.5/mb)*gc;
00328 
00329   return 0;
00330 }


Variable Documentation

BigReal mHrmT [static]
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 24 of file Settle.C.

Referenced by settle1(), and settle1init().

BigReal mOrmT [static]
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 24 of file Settle.C.

Referenced by settle1(), and settle1init().

BigReal ra [static]
 

Definition at line 25 of file Settle.C.

Referenced by settle1(), and settle1init().

BigReal rb [static]
 

Definition at line 25 of file Settle.C.

Referenced by settle1(), and settle1init().

BigReal rc [static]
 

Definition at line 25 of file Settle.C.

Referenced by settle1(), and settle1init().

BigReal rra [static]
 

Definition at line 26 of file Settle.C.

Referenced by settle1init().

int settle_first_time = 1 [static]
 

Definition at line 27 of file Settle.C.

Referenced by settle1init().


Generated on Sat Aug 30 04:07:42 2008 for NAMD by  doxygen 1.3.9.1