Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Settle.C

Go to the documentation of this file.
00001 
00007 //#include "InfoStream.h"
00008 #include "Settle.h"
00009 #include <string.h>
00010 #include <math.h>
00011 //#include <charm++.h> // for CkPrintf
00012 
00013 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00014 #include <emmintrin.h>  // SSE2
00015 #if defined(__INTEL_COMPILER)
00016 #define __align(X) __declspec(align(X) )
00017 #elif defined(__PGI)
00018 #define __align(X)  __attribute__((aligned(X) ))
00019 #define MISSING_mm_cvtsd_f64
00020 #elif defined(__GNUC__)
00021 #define __align(X)  __attribute__((aligned(X) ))
00022 #if (__GNUC__ < 4)
00023 #define MISSING_mm_cvtsd_f64
00024 #endif
00025 #else
00026 #define __align(X) __declspec(align(X) )
00027 #endif
00028 #endif
00029 
00030 //
00031 // XXX static and global variables are unsafe for shared memory builds.
00032 // The global and static vars should be eliminated.
00033 // Unfortunately, the routines that use these below are actually
00034 // in use in NAMD.
00035 //
00036 
00037 // Initialize various properties of the waters
00038 // settle1() assumes all waters are identical, 
00039 // and will generate bad results if they are not.
00040 void settle1init(BigReal pmO, BigReal pmH, BigReal hhdist, BigReal ohdist,
00041                  BigReal &mOrmT, BigReal &mHrmT, BigReal &ra,
00042                  BigReal &rb, BigReal &rc, BigReal &rra) {
00043 
00044     BigReal rmT = 1.0 / (pmO+pmH+pmH);
00045     mOrmT = pmO * rmT;
00046     mHrmT = pmH * rmT;
00047     BigReal t1 = 0.5*pmO/pmH;
00048     rc = 0.5*hhdist;
00049     ra = sqrt(ohdist*ohdist-rc*rc)/(1.0+t1);
00050     rb = t1*ra;
00051     rra = 1.0 / ra;
00052 }
00053 
00054 
00055 int settle1(const Vector *ref, Vector *pos, Vector *vel, BigReal invdt,
00056                  BigReal mOrmT, BigReal mHrmT, BigReal ra,
00057                  BigReal rb, BigReal rc, BigReal rra) {
00058 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00059   // SSE acceleration of some of the costly parts of settle using
00060   // the Intel C/C++ classes.  This implementation uses the SSE units
00061   // less efficiency than is potentially possible, but in order to do
00062   // better, the settle algorithm will have to be vectorized and operate
00063   // on multiple waters at a time.  Doing so could give us the ability to
00064   // do two (double precison) or four (single precision) waters at a time.
00065   // This code achieves a modest speedup without the need to reorganize
00066   // the NAMD structure.  Once we have water molecules sorted in a single
00067   // block we can do far better.
00068 
00069   // vectors in the plane of the original positions
00070   Vector b0, c0;
00071 
00072   __m128d REF0xy = _mm_loadu_pd((double *) &ref[0].x);  // ref0.y and ref0.x
00073   __m128d REF1xy = _mm_loadu_pd((double *) &ref[1].x);  // ref1.y and ref1.x
00074 
00075   __m128d B0xy = _mm_sub_pd(REF1xy, REF0xy);
00076   _mm_storeu_pd((double *) &b0.x, B0xy);
00077   b0.z = ref[1].z - ref[0].z;
00078 
00079   __m128d REF2xy = _mm_loadu_pd((double *) &ref[2].x);  // ref2.y and ref2.x
00080 
00081   __m128d C0xy = _mm_sub_pd(REF2xy, REF0xy);
00082   _mm_storeu_pd((double *) &c0.x, C0xy);
00083   c0.z = ref[2].z - ref[0].z;
00084 
00085   // new center of mass
00086   // Vector d0 = pos[0] * mOrmT + ((pos[1] + pos[2]) * mHrmT);
00087   __align(16) Vector a1;
00088   __align(16) Vector b1;
00089   __align(16) Vector c1;
00090   __align(16) Vector d0;
00091 
00092   __m128d POS1xy = _mm_loadu_pd((double *) &pos[1].x);
00093   __m128d POS2xy = _mm_loadu_pd((double *) &pos[2].x);
00094   __m128d PMHrmTxy = _mm_mul_pd(_mm_add_pd(POS1xy, POS2xy), _mm_set1_pd(mHrmT));
00095 
00096   __m128d POS0xy = _mm_loadu_pd((double *) &pos[0].x);
00097   __m128d PMOrmTxy = _mm_mul_pd(POS0xy, _mm_set1_pd(mOrmT));
00098   __m128d D0xy = _mm_add_pd(PMOrmTxy, PMHrmTxy);
00099 
00100   d0.z = pos[0].z * mOrmT + ((pos[1].z + pos[2].z) * mHrmT);
00101   a1.z = pos[0].z - d0.z;
00102   b1.z = pos[1].z - d0.z;
00103   c1.z = pos[2].z - d0.z;
00104 
00105   __m128d A1xy = _mm_sub_pd(POS0xy, D0xy);
00106   _mm_store_pd((double *) &a1.x, A1xy); // must be aligned
00107 
00108   __m128d B1xy = _mm_sub_pd(POS1xy, D0xy);
00109   _mm_store_pd((double *) &b1.x, B1xy); // must be aligned
00110 
00111   __m128d C1xy = _mm_sub_pd(POS2xy, D0xy);
00112   _mm_store_pd((double *) &c1.x, C1xy); // must be aligned
00113 
00114   _mm_store_pd((double *) &d0.x, D0xy); // must be aligned
00115   
00116   // Vectors describing transformation from original coordinate system to
00117   // the 'primed' coordinate system as in the diagram.  
00118   Vector n0 = cross(b0, c0);
00119   Vector n1 = cross(a1, n0); 
00120   Vector n2 = cross(n0, n1); 
00121 #else
00122   // vectors in the plane of the original positions
00123   Vector b0 = ref[1]-ref[0];
00124   Vector c0 = ref[2]-ref[0];
00125   
00126   // new center of mass
00127   Vector d0 = pos[0]*mOrmT + ((pos[1] + pos[2])*mHrmT);
00128  
00129   Vector a1 = pos[0] - d0;
00130   Vector b1 = pos[1] - d0;
00131   Vector c1 = pos[2] - d0;
00132   
00133   // Vectors describing transformation from original coordinate system to
00134   // the 'primed' coordinate system as in the diagram.  
00135   Vector n0 = cross(b0, c0);
00136   Vector n1 = cross(a1, n0); 
00137   Vector n2 = cross(n0, n1); 
00138 #endif
00139 
00140 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) && ! defined(MISSING_mm_cvtsd_f64)
00141   __m128d l1 = _mm_set_pd(n0.x, n0.y);
00142   l1 = _mm_mul_pd(l1, l1);
00143   // n0.x^2 + n0.y^2
00144   double l1xy0 = _mm_cvtsd_f64(_mm_add_sd(l1, _mm_shuffle_pd(l1, l1, 1)));
00145 
00146   __m128d l3 = _mm_set_pd(n1.y, n1.z);
00147   l3 = _mm_mul_pd(l3, l3);
00148   // n1.y^2 + n1.z^2
00149   double l3yz1 = _mm_cvtsd_f64(_mm_add_sd(l3, _mm_shuffle_pd(l3, l3, 1)));
00150 
00151   __m128d l2 = _mm_set_pd(n1.x, n0.z);
00152   // len(n1)^2 and len(n0)^2 
00153   __m128d ts01 = _mm_add_pd(_mm_set_pd(l3yz1, l1xy0), _mm_mul_pd(l2, l2));
00154 
00155   __m128d l4 = _mm_set_pd(n2.x, n2.y);
00156   l4 = _mm_mul_pd(l4, l4);
00157   // n2.x^2 + n2.y^2
00158   double l4xy2 = _mm_cvtsd_f64(_mm_add_sd(l4, _mm_shuffle_pd(l4, l4, 1)));
00159   double ts2 = l4xy2 + (n2.z * n2.z);              // len(n2)^2
00160 
00161   double  invlens[4];
00162   // since rsqrt_nr() doesn't work with current compiler
00163   // this is the next best option 
00164   static const __m128d fvecd1p0 = _mm_set1_pd(1.0);
00165 
00166   // 1/len(n1) and 1/len(n0)
00167   __m128d invlen12 = _mm_div_pd(fvecd1p0, _mm_sqrt_pd(ts01));
00168 
00169   // invlens[0]=1/len(n0), invlens[1]=1/len(n1)
00170   _mm_storeu_pd(invlens, invlen12);
00171 
00172   n0 = n0 * invlens[0];
00173 
00174   // shuffle the order of operations around from the normal algorithm so
00175   // that we can double pump sqrt() with n2 and cosphi at the same time
00176   // these components are usually computed down in the canonical water block
00177   BigReal A1Z = n0 * a1;
00178   BigReal sinphi = A1Z * rra;
00179   BigReal tmp = 1.0-sinphi*sinphi;
00180 
00181   __m128d n2cosphi = _mm_sqrt_pd(_mm_set_pd(tmp, ts2));
00182   // invlens[2] = 1/len(n2), invlens[3] = cosphi
00183   _mm_storeu_pd(invlens+2, n2cosphi);
00184 
00185   n1 = n1 * invlens[1];
00186   n2 = n2 * (1.0 / invlens[2]);
00187   BigReal cosphi = invlens[3];
00188 
00189   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00190   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00191  
00192   b1 = Vector(n1*b1, n2*b1, n0*b1);
00193   c1 = Vector(n1*c1, n2*c1, n0*c1);
00194 
00195   // now we can compute positions of canonical water 
00196   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00197   tmp = 1.0-sinpsi*sinpsi;
00198   BigReal cospsi = sqrt(tmp);
00199 #else
00200   n0 = n0.unit();
00201   n1 = n1.unit();
00202   n2 = n2.unit();
00203 
00204   b0 = Vector(n1*b0, n2*b0, n0*b0); // note: b0.z is never referenced again
00205   c0 = Vector(n1*c0, n2*c0, n0*c0); // note: c0.z is never referenced again
00206  
00207   BigReal A1Z = n0 * a1;
00208   b1 = Vector(n1*b1, n2*b1, n0*b1);
00209   c1 = Vector(n1*c1, n2*c1, n0*c1);
00210 
00211   // now we can compute positions of canonical water 
00212   BigReal sinphi = A1Z * rra;
00213   BigReal tmp = 1.0-sinphi*sinphi;
00214   BigReal cosphi = sqrt(tmp);
00215   BigReal sinpsi = (b1.z - c1.z)/(2.0*rc*cosphi);
00216   tmp = 1.0-sinpsi*sinpsi;
00217   BigReal cospsi = sqrt(tmp);
00218 #endif
00219 
00220   BigReal rbphi = -rb*cosphi;
00221   BigReal tmp1 = rc*sinpsi*sinphi;
00222   BigReal tmp2 = rc*sinpsi*cosphi;
00223  
00224   Vector a2(0, ra*cosphi, ra*sinphi);
00225   Vector b2(-rc*cospsi, rbphi - tmp1, -rb*sinphi + tmp2);
00226   Vector c2( rc*cosphi, rbphi + tmp1, -rb*sinphi - tmp2);
00227 
00228   // there are no a0 terms because we've already subtracted the term off 
00229   // when we first defined b0 and c0.
00230   BigReal alpha = b2.x*(b0.x - c0.x) + b0.y*b2.y + c0.y*c2.y;
00231   BigReal beta  = b2.x*(c0.y - b0.y) + b0.x*b2.y + c0.x*c2.y;
00232   BigReal gama  = b0.x*b1.y - b1.x*b0.y + c0.x*c1.y - c1.x*c0.y;
00233  
00234   BigReal a2b2 = alpha*alpha + beta*beta;
00235   BigReal sintheta = (alpha*gama - beta*sqrt(a2b2 - gama*gama))/a2b2;
00236   BigReal costheta = sqrt(1.0 - sintheta*sintheta);
00237   
00238 #if 0
00239   Vector a3( -a2.y*sintheta, 
00240               a2.y*costheta,
00241               a2.z);
00242   Vector b3(b2.x*costheta - b2.y*sintheta,
00243               b2.x*sintheta + b2.y*costheta,
00244               b2.z);
00245   Vector c3(c2.x*costheta - c2.y*sintheta,
00246               c2.x*sintheta + c2.y*costheta,
00247               c2.z);
00248   
00249 #else
00250   Vector a3( -a2.y*sintheta, 
00251               a2.y*costheta,
00252               A1Z);
00253   Vector b3(b2.x*costheta - b2.y*sintheta,
00254               b2.x*sintheta + b2.y*costheta,
00255               b1.z);
00256   Vector c3(-b2.x*costheta - c2.y*sintheta,
00257             -b2.x*sintheta + c2.y*costheta,
00258               c1.z);
00259 
00260 #endif
00261 
00262   // undo the transformation; generate new normal vectors from the transpose.
00263   Vector m1(n1.x, n2.x, n0.x);
00264   Vector m2(n1.y, n2.y, n0.y);
00265   Vector m0(n1.z, n2.z, n0.z);
00266 
00267   pos[0] = Vector(a3*m1, a3*m2, a3*m0) + d0;
00268   pos[1] = Vector(b3*m1, b3*m2, b3*m0) + d0;
00269   pos[2] = Vector(c3*m1, c3*m2, c3*m0) + d0;
00270 
00271   // dt can be negative during startup!
00272   if (invdt != 0) {
00273     vel[0] = (pos[0]-ref[0])*invdt;
00274     vel[1] = (pos[1]-ref[1])*invdt;
00275     vel[2] = (pos[2]-ref[2])*invdt;
00276   }
00277 
00278   return 0;
00279 }
00280 
00281 
00282 
00283 static int settlev(const Vector *pos, BigReal ma, BigReal mb, Vector *vel,
00284                                    BigReal dt, Tensor *virial) {
00285   
00286   Vector rAB = pos[1]-pos[0];
00287   Vector rBC = pos[2]-pos[1];
00288   Vector rCA = pos[0]-pos[2];
00289  
00290   Vector AB = rAB.unit();
00291   Vector BC = rBC.unit();
00292   Vector CA = rCA.unit();
00293   
00294   BigReal cosA = -AB * CA;
00295   BigReal cosB = -BC * AB;
00296   BigReal cosC = -CA * BC;
00297 
00298   BigReal vab = (vel[1]-vel[0])*AB;
00299   BigReal vbc = (vel[2]-vel[1])*BC;
00300   BigReal vca = (vel[0]-vel[2])*CA;
00301 
00302   BigReal mab = ma+mb;
00303   
00304   BigReal d = (2*mab*mab + 2*ma*mb*cosA*cosB*cosC - 2*mb*mb*cosA*cosA
00305                - ma*mab*(cosB*cosB + cosC*cosC))*0.5/mb;
00306 
00307   BigReal tab = (vab*(2*mab - ma*cosC*cosC) +
00308                 vbc*(mb*cosC*cosA - mab*cosB) +
00309                 vca*(ma*cosB*cosC - 2*mb*cosA))*ma/d;
00310             
00311   BigReal tbc = (vbc*(mab*mab - mb*mb*cosA*cosA) +
00312                 vca*ma*(mb*cosA*cosB - mab*cosC) +
00313                 vab*ma*(mb*cosC*cosA - mab*cosB))/d;
00314   
00315   BigReal tca = (vca*(2*mab - ma*cosB*cosB) +
00316                 vab*(ma*cosB*cosC - 2*mb*cosA) +
00317                 vbc*(mb*cosA*cosB - mab*cosC))*ma/d;
00318  
00319   Vector ga = tab*AB - tca*CA;
00320   Vector gb = tbc*BC - tab*AB;
00321   Vector gc = tca*CA - tbc*BC;
00322 #if 0
00323   if (virial) {
00324     *virial += 0.5*outer(tab, rAB)/dt;
00325     *virial += 0.5*outer(tbc, rBC)/dt;
00326     *virial += 0.5*outer(tca, rCA)/dt;
00327   }
00328 #endif
00329   vel[0] += (0.5/ma)*ga;
00330   vel[1] += (0.5/mb)*gb;
00331   vel[2] += (0.5/mb)*gc;
00332 
00333   return 0;
00334 }
00335 
00336 
00337 int settle2(BigReal mO, BigReal mH, const Vector *pos,
00338                   Vector *vel, BigReal dt, Tensor *virial) {
00339 
00340   settlev(pos, mO, mH, vel, dt, virial);
00341   return 0;
00342 }
00343 

Generated on Mon May 20 04:07:18 2013 for NAMD by  doxygen 1.3.9.1