PmeKSpace.C

Go to the documentation of this file.
00001 
00007 #include "PmeKSpace.h"
00008 #include <math.h>
00009 #include <stdlib.h>
00010 
00011 #ifndef _NO_ALLOCA_H
00012 #include <alloca.h>
00013 #endif
00014 
00015 #include "PmeBase.inl"
00016 #include "SimParameters.h" 
00017 #include "Node.h"  
00018 #include "ComputeMoaMgr.decl.h"  
00019 
00020 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00021 
00022 static void dftmod(double *bsp_mod, double *bsp_arr, int nfft) {
00023   int j, k;
00024   double twopi, arg, sum1, sum2;
00025   double infft = 1.0/nfft;
00026 /* Computes the modulus of the discrete fourier transform of bsp_arr, */
00027 /*  storing it into bsp_mod */
00028   twopi =  2.0 * M_PI;
00029 
00030   for (k = 0; k <nfft; ++k) {
00031     sum1 = 0.;
00032     sum2 = 0.;
00033     for (j = 0; j < nfft; ++j) {
00034       arg = twopi * k * j * infft;
00035       sum1 += bsp_arr[j] * cos(arg);
00036       sum2 += bsp_arr[j] * sin(arg);
00037     }
00038     bsp_mod[k] = sum1*sum1 + sum2*sum2;
00039   }
00040 }
00041 
00042 void compute_b_moduli(double *bm, int K, int order) {
00043   int i;
00044   double fr[3];
00045 
00046   double *M = new double[3*order];
00047   double *dM = new double[3*order];
00048   double *scratch = new double[K];
00049 
00050   fr[0]=fr[1]=fr[2]=0.0;
00051   compute_b_spline(fr,M,dM,order);  
00052   for (i=0; i<order; i++) bm[i] = M[i];
00053   for (i=order; i<K; i++) bm[i] = 0.0;
00054   dftmod(scratch, bm, K);
00055   for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
00056 
00057 
00058   delete [] scratch;
00059   delete [] dM;
00060   delete [] M;
00061 }
00062 
00063 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end) 
00064   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00065         k3_start(K3_start), k3_end(K3_end) {
00066   int K1, K2, K3, order;
00067   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00068 
00069   bm1 = new double[K1];
00070   bm2 = new double[K2];
00071   bm3 = new double[K3];
00072 
00073   exp1 = new double[K1/2 + 1];
00074   exp2 = new double[K2/2 + 1];
00075   exp3 = new double[K3/2 + 1];
00076 
00077   compute_b_moduli(bm1, K1, order);
00078   compute_b_moduli(bm2, K2, order);
00079   compute_b_moduli(bm3, K3, order);
00080 
00081 }
00082 
00083 #ifdef OPENATOM_VERSION
00084 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end, CProxy_ComputeMoaMgr moaProxy) 
00085   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00086         k3_start(K3_start), k3_end(K3_end) {
00087   int K1, K2, K3, order;
00088   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00089 
00090   SimParameters *simParams = Node::Object()->simParameters;
00091 
00092   bm1 = new double[K1];
00093   bm2 = new double[K2];
00094   bm3 = new double[K3];
00095 
00096   exp1 = new double[K1/2 + 1];
00097   exp2 = new double[K2/2 + 1];
00098   exp3 = new double[K3/2 + 1];
00099 
00100   compute_b_moduli(bm1, K1, order);
00101   compute_b_moduli(bm2, K2, order);
00102   compute_b_moduli(bm3, K3, order);
00103 
00104   if ( simParams->openatomOn ) {
00105     CkPrintf("######################################################\n");
00106     CkPrintf("Entering recvBmX loop on processor %d \n", CkMyPe() );
00107     int i;
00108     for ( i=0; i<=K1; ++i) {
00109     CkPrintf("bm1 value pre-send %d = %d \n", i, bm1[i] );
00110     }
00111     for ( i=0; i<=K1; ++i) {
00112     CkPrintf("bm1 reference pre-send %d = %d \n", i, &bm1[i] );
00113     }
00114     CkPrintf("bm1: %d \n", *bm1);    
00115     moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, order);
00116 //    qmmmcp->recvBm1(bm1, K1, order);
00117 //    qmmmcp->recvBm2(bm2, K2, order);
00118 //    qmmmcp->recvBm3(bm3, K3, order);
00119   }
00120     
00121 
00122 }
00123 #endif // OPENATOM_VERSION
00124 
00125 PmeKSpace::~PmeKSpace() {
00126   delete [] bm1;
00127   delete [] bm2;
00128   delete [] bm3;
00129   
00130   delete [] exp1;
00131   delete [] exp2;
00132   delete [] exp3;
00133 }
00134 
00135 void PmeKSpace::compute_energy_orthogonal_subset(float q_arr[], double *recips, double *partialVirial, double *partialEnergy, int k1from, int k1to){
00136 
00137     double energy = 0.0;
00138     double v0 = 0.;
00139     double v1 = 0.;
00140     double v2 = 0.;
00141     double v3 = 0.;
00142     double v4 = 0.;
00143     double v5 = 0.;
00144     
00145     int k1, k2, k3;
00146     int K1, K2, K3;
00147     K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00148     
00149     double recipx = recips[0];
00150     double recipy = recips[1];
00151     double recipz = recips[2];
00152 
00153     int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
00154         
00155     for ( k1=k1from; k1<=k1to; ++k1 ) {        
00156         double m1, m11, b1, xp1;
00157         b1 = bm1[k1];
00158         int k1_s = k1<=K1/2 ? k1 : k1-K1;
00159         m1 = k1_s*recipx;
00160         m11 = m1*m1;
00161         xp1 = i_pi_volume*exp1[abs(k1_s)];
00162         for ( k2=k2_start; k2<k2_end; ++k2 ) {
00163             double m2, m22, b1b2, xp2;
00164             b1b2 = b1*bm2[k2];
00165             int k2_s = k2<=K2/2 ? k2 : k2-K2;
00166             m2 = k2_s*recipy;
00167             m22 = m2*m2;
00168             xp2 = exp2[abs(k2_s)]*xp1;
00169 
00170             k3 = k3_start;
00171             if (k1==0 && k2==0 && k3==0) {
00172               q_arr[ind++] = 0.0;
00173               q_arr[ind++] = 0.0;
00174               ++k3;
00175             }
00176             for (; k3<k3_end; ++k3 ) {
00177               double m3, m33, xp3, msq, imsq, vir, fac;
00178               double theta3, theta, q2, qr, qc, C;
00179               theta3 = bm3[k3] *b1b2;
00180               m3 = k3*recipz;
00181               m33 = m3*m3;
00182               xp3 = exp3[k3];
00183               qr = q_arr[ind]; qc=q_arr[ind+1];
00184               q2 = 2*(qr*qr + qc*qc)*theta3;
00185               if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00186               msq = m11 + m22 + m33;
00187               imsq = 1.0/msq;
00188               C = xp2*xp3*imsq;
00189               theta = theta3*C;
00190               q_arr[ind] *= theta;
00191               q_arr[ind+1] *= theta;
00192               vir = -2*(piob+imsq);
00193               fac = q2*C;
00194               energy += fac;
00195               v0 += fac*(1.0+vir*m11);
00196               v1 += fac*vir*m1*m2;
00197               v2 += fac*vir*m1*m3;
00198               v3 += fac*(1.0+vir*m22);
00199               v4 += fac*vir*m2*m3;
00200               v5 += fac*(1.0+vir*m33);
00201               ind += 2;
00202             }
00203         }
00204     }
00205     
00206     *partialEnergy = 0.5*energy;
00207     partialVirial[0] = 0.5*v0;
00208     partialVirial[1] = 0.5*v1;
00209     partialVirial[2] = 0.5*v2;
00210     partialVirial[3] = 0.5*v3;
00211     partialVirial[4] = 0.5*v4;
00212     partialVirial[5] = 0.5*v5;
00213 }
00214 static inline void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param){
00215   for ( int i = first; i <= last; ++i ) {
00216     void **params = (void **)param;
00217     PmeKSpace *kspace = (PmeKSpace *)params[0];
00218     float *q_arr = (float *)params[1];
00219     double *recips = (double *)params[2];
00220     double *partialEnergy = (double *)params[3];
00221     double *partialVirial = (double *)params[4];
00222     int *unitDist = (int *)params[5];
00223     
00224     int unit = unitDist[0];
00225     int remains = unitDist[1];
00226     int k1from, k1to;
00227     if(i<remains){
00228         k1from = i*(unit+1);
00229         k1to = k1from+unit;
00230     }else{
00231         k1from = remains*(unit+1)+(i-remains)*unit;
00232         k1to = k1from+unit-1;
00233     }
00234     double *pEnergy = partialEnergy+i;
00235     double *pVirial = partialVirial+i*6;
00236     kspace->compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00237   }
00238 }
00239 
00240 double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00241   double energy = 0.0;
00242   double v0 = 0.;
00243   double v1 = 0.;
00244   double v2 = 0.;
00245   double v3 = 0.;
00246   double v4 = 0.;
00247   double v5 = 0.;
00248 
00249   int n;
00250   int k1, k2, k3, ind;
00251   int K1, K2, K3;
00252 
00253   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00254 
00255   i_pi_volume = 1.0/(M_PI * lattice.volume());
00256   piob = M_PI/ewald;
00257   piob *= piob;
00258 
00259 
00260     double recipx = lattice.a_r().x;
00261     double recipy = lattice.b_r().y;
00262     double recipz = lattice.c_r().z;
00263         
00264     init_exp(exp1, K1, 0, K1, recipx);
00265     init_exp(exp2, K2, k2_start, k2_end, recipy);
00266     init_exp(exp3, K3, k3_start, k3_end, recipz);
00267 
00268     double recips[] = {recipx, recipy, recipz};
00269     int NPARTS=CmiMyNodeSize(); //this controls the granularity of loop parallelism
00270     int maxParts = ( K1 * ( k2_end - k2_start ) * ( k3_end - k3_start ) + 127 ) / 128;
00271     if ( NPARTS >  maxParts ) NPARTS = maxParts;
00272     if ( NPARTS >  K1 ) NPARTS = K1; 
00273     ALLOCA(double, partialEnergy, NPARTS);
00274     ALLOCA(double, partialVirial, 6*NPARTS);
00275     int unitDist[] = {K1/NPARTS, K1%NPARTS};
00276     
00277     //parallelize the following loop using CkLoop
00278     void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist};
00279 
00280 #if     CMK_SMP && USE_CKLOOP
00281     CkLoop_Parallelize(compute_energy_orthogonal_ckloop, 6, (void *)params, NPARTS, 0, NPARTS-1);
00282 #endif
00283 /*  
00284     //The transformed loop used to compute energy
00285     int unit = K1/NPARTS;
00286     int remains = K1%NPARTS;  
00287     for(int i=0; i<NPARTS; i++){
00288         int k1from, k1to;
00289         if(i<remains){
00290             k1from = i*(unit+1);
00291             k1to = k1from+unit;
00292         }else{
00293             k1from = remains*(unit+1)+(i-remains)*unit;
00294             k1to = k1from+unit-1;
00295         }
00296         double *pEnergy = partialEnergy+i;
00297         double *pVirial = partialVirial+i*6;
00298         compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00299     }
00300 */    
00301     
00302     for(int i=0; i<NPARTS; i++){
00303         v0 += partialVirial[i*6+0];
00304         v1 += partialVirial[i*6+1];
00305         v2 += partialVirial[i*6+2];
00306         v3 += partialVirial[i*6+3];
00307         v4 += partialVirial[i*6+4];
00308         v5 += partialVirial[i*6+5];
00309         energy += partialEnergy[i];
00310     }
00311     
00312     virial[0] = v0;
00313     virial[1] = v1;
00314     virial[2] = v2;
00315     virial[3] = v3;
00316     virial[4] = v4;
00317     virial[5] = v5;
00318     return energy;
00319 }
00320 
00321 double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial, int useCkLoop) {
00322   double energy = 0.0;
00323   double v0 = 0.;
00324   double v1 = 0.;
00325   double v2 = 0.;
00326   double v3 = 0.;
00327   double v4 = 0.;
00328   double v5 = 0.;
00329 
00330   int n;
00331   int k1, k2, k3, ind;
00332   int K1, K2, K3;
00333 
00334   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00335 
00336   i_pi_volume = 1.0/(M_PI * lattice.volume());
00337   piob = M_PI/ewald;
00338   piob *= piob;
00339 
00340   if ( lattice.orthogonal() ) {
00341   // if ( 0 ) { // JCP FOR TESTING
00342     //This branch is the usual call path.
00343 #if     CMK_SMP && USE_CKLOOP
00344     if ( useCkLoop ) {
00345         return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial);
00346     }
00347 #endif    
00348     double recipx = lattice.a_r().x;
00349     double recipy = lattice.b_r().y;
00350     double recipz = lattice.c_r().z;
00351         
00352     init_exp(exp1, K1, 0, K1, recipx);
00353     init_exp(exp2, K2, k2_start, k2_end, recipy);
00354     init_exp(exp3, K3, k3_start, k3_end, recipz);
00355 
00356     ind = 0;
00357     for ( k1=0; k1<K1; ++k1 ) {
00358       double m1, m11, b1, xp1;
00359       b1 = bm1[k1];
00360       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00361       m1 = k1_s*recipx;
00362       m11 = m1*m1;
00363       xp1 = i_pi_volume*exp1[abs(k1_s)];
00364       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00365         double m2, m22, b1b2, xp2;
00366         b1b2 = b1*bm2[k2];
00367         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00368         m2 = k2_s*recipy;
00369         m22 = m2*m2;
00370         xp2 = exp2[abs(k2_s)]*xp1;
00371         k3 = k3_start;
00372         if ( k1==0 && k2==0 && k3==0 ) {
00373           q_arr[ind++] = 0.0;
00374           q_arr[ind++] = 0.0;
00375           ++k3;
00376         }
00377         for ( ; k3<k3_end; ++k3 ) {
00378           double m3, m33, xp3, msq, imsq, vir, fac;
00379           double theta3, theta, q2, qr, qc, C;
00380           theta3 = bm3[k3] *b1b2;
00381           m3 = k3*recipz;
00382           m33 = m3*m3;
00383           xp3 = exp3[k3];
00384           qr = q_arr[ind]; qc=q_arr[ind+1];
00385           q2 = 2*(qr*qr + qc*qc)*theta3;
00386           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00387           msq = m11 + m22 + m33;
00388           imsq = 1.0/msq;
00389           C = xp2*xp3*imsq;
00390           theta = theta3*C;
00391           q_arr[ind] *= theta;
00392           q_arr[ind+1] *= theta;
00393           vir = -2*(piob+imsq);
00394           fac = q2*C;
00395           energy += fac;
00396           v0 += fac*(1.0+vir*m11);
00397           v1 += fac*vir*m1*m2;
00398           v2 += fac*vir*m1*m3;
00399           v3 += fac*(1.0+vir*m22);
00400           v4 += fac*vir*m2*m3;
00401           v5 += fac*(1.0+vir*m33);
00402           ind += 2;
00403         }
00404       }
00405     }
00406     
00407   } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) {
00408   // } else if ( 0 ) { // JCP FOR TESTING
00409     Vector recip1 = lattice.a_r();
00410     Vector recip2 = lattice.b_r();
00411     Vector recip3 = lattice.c_r();
00412     double recip3_x = recip3.x;
00413     double recip3_y = recip3.y;
00414     double recip3_z = recip3.z;
00415     init_exp(exp3, K3, k3_start, k3_end, recip3.length());
00416 
00417     ind = 0;
00418     for ( k1=0; k1<K1; ++k1 ) {
00419       double b1; Vector m1;
00420       b1 = bm1[k1];
00421       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00422       m1 = k1_s*recip1;
00423       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00424       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00425         double xp2, b1b2, m2_x, m2_y, m2_z;
00426         b1b2 = b1*bm2[k2];
00427         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00428         m2_x = m1.x + k2_s*recip2.x;
00429         m2_y = m1.y + k2_s*recip2.y;
00430         m2_z = m1.z + k2_s*recip2.z;
00431         // xp2 = exp2[abs(k2_s)]*xp1;
00432         xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
00433         k3 = k3_start;
00434         if ( k1==0 && k2==0 && k3==0 ) {
00435           q_arr[ind++] = 0.0;
00436           q_arr[ind++] = 0.0;
00437           ++k3;
00438         }
00439         for ( ; k3<k3_end; ++k3 ) {
00440           double xp3, msq, imsq, vir, fac;
00441           double theta3, theta, q2, qr, qc, C;
00442           double m_x, m_y, m_z;
00443           theta3 = bm3[k3] *b1b2;
00444           m_x = m2_x + k3*recip3_x;
00445           m_y = m2_y + k3*recip3_y;
00446           m_z = m2_z + k3*recip3_z;
00447           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00448           xp3 = exp3[k3];
00449           qr = q_arr[ind]; qc=q_arr[ind+1];
00450           q2 = 2*(qr*qr + qc*qc)*theta3;
00451           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00452           imsq = 1.0/msq;
00453           C = xp2*xp3*imsq;
00454           theta = theta3*C;
00455           q_arr[ind] *= theta;
00456           q_arr[ind+1] *= theta;
00457           vir = -2*(piob+imsq);
00458           fac = q2*C;
00459           energy += fac;
00460           v0 += fac*(1.0+vir*m_x*m_x);
00461           v1 += fac*vir*m_x*m_y;
00462           v2 += fac*vir*m_x*m_z;
00463           v3 += fac*(1.0+vir*m_y*m_y);
00464           v4 += fac*vir*m_y*m_z;
00465           v5 += fac*(1.0+vir*m_z*m_z);
00466           ind += 2;
00467         }
00468       }
00469     }
00470 
00471   } else {
00472     Vector recip1 = lattice.a_r();
00473     Vector recip2 = lattice.b_r();
00474     Vector recip3 = lattice.c_r();
00475     double recip3_x = recip3.x;
00476     double recip3_y = recip3.y;
00477     double recip3_z = recip3.z;
00478 
00479     ind = 0;
00480     for ( k1=0; k1<K1; ++k1 ) {
00481       double b1; Vector m1;
00482       b1 = bm1[k1];
00483       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00484       m1 = k1_s*recip1;
00485       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00486       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00487         double b1b2, m2_x, m2_y, m2_z;
00488         b1b2 = b1*bm2[k2];
00489         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00490         m2_x = m1.x + k2_s*recip2.x;
00491         m2_y = m1.y + k2_s*recip2.y;
00492         m2_z = m1.z + k2_s*recip2.z;
00493         // xp2 = exp2[abs(k2_s)]*xp1;
00494         k3 = k3_start;
00495         if ( k1==0 && k2==0 && k3==0 ) {
00496           q_arr[ind++] = 0.0;
00497           q_arr[ind++] = 0.0;
00498           ++k3;
00499         }
00500         for ( ; k3<k3_end; ++k3 ) {
00501           double xp3, msq, imsq, vir, fac;
00502           double theta3, theta, q2, qr, qc, C;
00503           double m_x, m_y, m_z;
00504           theta3 = bm3[k3] *b1b2;
00505           m_x = m2_x + k3*recip3_x;
00506           m_y = m2_y + k3*recip3_y;
00507           m_z = m2_z + k3*recip3_z;
00508           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00509           // xp3 = exp3[k3];
00510           xp3 = i_pi_volume*exp(-piob*msq);
00511           qr = q_arr[ind]; qc=q_arr[ind+1];
00512           q2 = 2*(qr*qr + qc*qc)*theta3;
00513           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00514           imsq = 1.0/msq;
00515           C = xp3*imsq;
00516           theta = theta3*C;
00517           q_arr[ind] *= theta;
00518           q_arr[ind+1] *= theta;
00519           vir = -2*(piob+imsq);
00520           fac = q2*C;
00521           energy += fac;
00522           v0 += fac*(1.0+vir*m_x*m_x);
00523           v1 += fac*vir*m_x*m_y;
00524           v2 += fac*vir*m_x*m_z;
00525           v3 += fac*(1.0+vir*m_y*m_y);
00526           v4 += fac*vir*m_y*m_z;
00527           v5 += fac*(1.0+vir*m_z*m_z);
00528           ind += 2;
00529         }
00530       }
00531     }
00532 
00533   }
00534 
00535   virial[0] = 0.5 * v0;
00536   virial[1] = 0.5 * v1;
00537   virial[2] = 0.5 * v2;
00538   virial[3] = 0.5 * v3;
00539   virial[4] = 0.5 * v4;
00540   virial[5] = 0.5 * v5;
00541   return 0.5*energy;
00542 }
00543 
00544 
00545 void PmeKSpace::init_exp(double *xp, int K, int k_start, int k_end, double recip) {
00546   int i;
00547   double fac;
00548   fac = -piob*recip*recip;
00549   int i_start = k_start;
00550   int i_end = k_end;
00551   if ( k_start > K/2 ) {
00552     i_start = K - k_end + 1;
00553     i_end = K - k_start + 1;
00554   } else if ( k_end > K/2 ) {
00555     i_end = K/2 + 1;
00556     i_start = K - k_end + 1;
00557     if ( k_start < i_start ) i_start = k_start;
00558   }
00559 
00560   for (i=i_start; i < i_end; i++)
00561     xp[i] = exp(fac*i*i);
00562 } 

Generated on Tue Sep 19 01:17:14 2017 for NAMD by  doxygen 1.4.7