Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Settle.h File Reference

#include "Vector.h"
#include "Tensor.h"

Go to the source code of this file.

Functions

void settle1init (BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist, BigReal &mOrmT, BigReal &mHrmT, BigReal &ra, BigReal &rb, BigReal &rc, BigReal &rra)
 initialize cached water properties
int settle1 (const Vector *ref, Vector *pos, Vector *vel, BigReal invdt, BigReal mOrmT, BigReal mHrmT, BigReal ra, BigReal rb, BigReal rc, BigReal rra)
 optimized settle1 algorithm, reuses water properties as much as possible
int settle2 (BigReal mO, BigReal mH, const Vector *pos, Vector *vel, BigReal dt, Tensor *virial)


Function Documentation

int settle1 const Vector ref,
Vector pos,
Vector vel,
BigReal  invdt,
BigReal  mOrmT,
BigReal  mHrmT,
BigReal  ra,
BigReal  rb,
BigReal  rc,
BigReal  rra
 

optimized settle1 algorithm, reuses water properties as much as possible

Definition at line 55 of file Settle.C.

References BigReal, Vector::unit(), Vector::x, Vector::y, and Vector::z.

Referenced by HomePatch::rattle1().

00057                                                       {
00058 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00059   // SSE acceleration of some of the costly parts of settle using
00060   // the Intel C/C++ classes.  This implementation uses the SSE units
00061   // less efficiency than is potentially possible, but in order to do
00062   // better, the settle algorithm will have to be vectorized and operate
00063   // on multiple waters at a time.  Doing so could give us the ability to
00064   // do two (double precison) or four (single precision) waters at a time.
00065   // This code achieves a modest speedup without the need to reorganize
00066   // the NAMD structure.  Once we have water molecules sorted in a single
00067   // block we can do far better.
00068 
00069   // vectors in the plane of the original positions
00070   Vector b0, c0;
00071 
00072   __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x);  // ref0.y and ref0.x
00073   __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x);  // ref1.y and ref1.x
00074 
00075   __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00076   _mm_storeu_pd((double *) &b0.x, B0xy);
00077   b0.z = ref[1].z - ref[0].z;
00078 
00079   __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x);  // ref2.y and ref2.x
00080 
00081   __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00082   _mm_storeu_pd((double *) &c0.x, C0xy);
00083   c0.z = ref[2].z - ref[0].z;
00084 
00085   // new center of mass
00086   // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
00087   __align(16) Vector a1;
00088   __align(16) Vector b1;
00089   __align(16) Vector c1;
00090   __align(16) Vector d0;
00091 
00092   __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00093   __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00094   __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00095 
00096   __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00097   __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00098   __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00099 
00100   d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00101   a1.z = pos[0].z - d0.z;
00102   b1.z = pos[1].z - d0.z;
00103   c1.z = pos[2].z - d0.z;
00104 
00105   __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00106   _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
00107 
00108   __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00109   _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
00110 
00111   __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00112   _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
00113 
00114   _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
00115   
00116   // Vectors describing transformation from original coordinate system to
00117   // the 'primed' coordinate system as in the diagram.  
00118   Vector n0 = cross(b0, c0);
00119   Vector n1 = cross(a1, n0); 
00120   Vector n2 = cross(n0, n1); 
00121 #else
00122   // vectors in the plane of the original positions
00123   Vector b0 = ref[1]-ref[0];
00124   Vector c0 = ref[2]-ref[0];
00125   
00126   // new center of mass
00127   Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00128  
00129   Vector a1 = pos[0] - d0;
00130   Vector b1 = pos[1] - d0;
00131   Vector c1 = pos[2] - d0;
00132   
00133   // Vectors describing transformation from original coordinate system to
00134   // the 'primed' coordinate system as in the diagram.  
00135   Vector n0 = cross(b0, c0);
00136   Vector n1 = cross(a1, n0); 
00137   Vector n2 = cross(n0, n1); 
00138 #endif
00139 
00140 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
00141   __m128d l1 = _mm_set_pd(n0.x, n0.y);
00142   l1 = _mm_mul_pd(l1, l1);
00143   // n0.x^2 + n0.y^2
00144   double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00145 
00146   __m128d l3 = _mm_set_pd(n1.y, n1.z);
00147   l3 = _mm_mul_pd(l3, l3);
00148   // n1.y^2 + n1.z^2
00149   double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00150 
00151   __m128d l2 = _mm_set_pd(n1.x, n0.z);
00152   // len(n1)^2 and len(n0)^2 
00153   __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00154 
00155   __m128d l4 = _mm_set_pd(n2.x, n2.y);
00156   l4 = _mm_mul_pd(l4, l4);
00157   // n2.x^2 + n2.y^2
00158   double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00159   double ts2 = l4xy2 + (n2.z * n2.z);              // len(n2)^2
00160 
00161   double  invlens[4];
00162   // since rsqrt_nr() doesn't work with current compiler
00163   // this is the next best option 
00164   static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00165 
00166   // 1/len(n1) and 1/len(n0)
00167   __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00168 
00169   // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
00170   _mm_storeu_pd(invlens, invlen12);
00171 
00172   n0 = n0 * invlens[0];
00173 
00174   // shuffle the order of operations around from the normal algorithm so
00175   // that we can double pump sqrt() with n2 and cosphi at the same time
00176   // these components are usually computed down in the canonical water block
00177   BigReal A1Z = n0 * a1;
00178   BigReal sinphi = A1Z * rra;
00179   BigReal tmp = 1.0-sinphi*sinphi;
00180 
00181   __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00182   // invlens[2] = 1/len(n2), invlens[3] = cosphi
00183   _mm_storeu_pd(invlens+2, n2cosphi);
00184 
00185   n1 = n1 * invlens[1];
00186   n2 = n2 * (1.0 / invlens[2]);
00187   BigReal cosphi = invlens[3];
00188 
00189   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00190   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00191  
00192   b1 = Vector(n1*b1, n2*b1, n0*b1);
00193   c1 = Vector(n1*c1, n2*c1, n0*c1);
00194 
00195   // now we can compute positions of canonical water 
00196   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00197   tmp = 1.0-sinpsi*sinpsi;
00198   BigReal cospsi = sqrt(tmp);
00199 #else
00200   n0 = n0.unit();
00201   n1 = n1.unit();
00202   n2 = n2.unit();
00203 
00204   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00205   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00206  
00207   BigReal A1Z = n0 * a1;
00208   b1 = Vector(n1*b1, n2*b1, n0*b1);
00209   c1 = Vector(n1*c1, n2*c1, n0*c1);
00210 
00211   // now we can compute positions of canonical water 
00212   BigReal sinphi = A1Z * rra;
00213   BigReal tmp = 1.0-sinphi*sinphi;
00214   BigReal cosphi = sqrt(tmp);
00215   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00216   tmp = 1.0-sinpsi*sinpsi;
00217   BigReal cospsi = sqrt(tmp);
00218 #endif
00219 
00220   BigReal rbphi = -rb*cosphi;
00221   BigReal tmp1 = rc*sinpsi*sinphi;
00222   BigReal tmp2 = rc*sinpsi*cosphi;
00223  
00224   Vector a2(0, ra*cosphi, ra*sinphi);
00225   Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00226   Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00227 
00228   // there are no a0 terms because we've already subtracted the term off 
00229   // when we first defined b0 and c0.
00230   BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00231   BigReal beta  = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00232   BigReal gama  = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00233  
00234   BigReal a2b2 = alpha*alpha + beta*beta;
00235   BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00236   BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00237   
00238 #if 0
00239   Vector a3( -a2.y*sintheta, 
00240               a2.y*costheta,
00241               a2.z);
00242   Vector b3(b2.x*costheta - b2.y*sintheta,
00243               b2.x*sintheta + b2.y*costheta,
00244               b2.z);
00245   Vector c3(c2.x*costheta - c2.y*sintheta,
00246               c2.x*sintheta + c2.y*costheta,
00247               c2.z);
00248   
00249 #else
00250   Vector a3( -a2.y*sintheta, 
00251               a2.y*costheta,
00252               A1Z);
00253   Vector b3(b2.x*costheta - b2.y*sintheta,
00254               b2.x*sintheta + b2.y*costheta,
00255               b1.z);
00256   Vector c3(-b2.x*costheta - c2.y*sintheta,
00257             -b2.x*sintheta + c2.y*costheta,
00258               c1.z);
00259 
00260 #endif
00261 
00262   // undo the transformation; generate new normal vectors from the transpose.
00263   Vector m1(n1.x, n2.x, n0.x);
00264   Vector m2(n1.y, n2.y, n0.y);
00265   Vector m0(n1.z, n2.z, n0.z);
00266 
00267   pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00268   pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00269   pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00270 
00271   // dt can be negative during startup!
00272   if (invdt != 0) {
00273     vel[0] = (pos[0]-ref[0])*invdt;
00274     vel[1] = (pos[1]-ref[1])*invdt;
00275     vel[2] = (pos[2]-ref[2])*invdt;
00276   }
00277 
00278   return 0;
00279 }

void settle1init BigReal  pmO,
BigReal  pmH,
BigReal  hhdist,
BigReal  ohdist,
BigReal mOrmT,
BigReal mHrmT,
BigReal ra,
BigReal rb,
BigReal rc,
BigReal rra
 

initialize cached water properties

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 40 of file Settle.C.

References BigReal.

Referenced by HomePatch::rattle1().

00042                                                          {
00043 
00044     BigReal rmT = 1.0 / (pmO+pmH+pmH);
00045     mOrmT = pmO * rmT;
00046     mHrmT = pmH * rmT;
00047     BigReal t1 = 0.5*pmO/pmH;
00048     rc = 0.5*hhdist;
00049     ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00050     rb = t1*ra;
00051     rra = 1.0 / ra;
00052 }

int settle2 BigReal  mO,
BigReal  mH,
const Vector pos,
Vector vel,
BigReal  dt,
Tensor virial
 

Definition at line 337 of file Settle.C.

References settlev().

Referenced by HomePatch::rattle2().

00338                                                            {
00339 
00340   settlev(pos, mO, mH, vel, dt, virial);
00341   return 0;
00342 }


Generated on Tue Jun 18 04:07:51 2013 for NAMD by  doxygen 1.3.9.1