Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

PmeKSpace.C

Go to the documentation of this file.
00001 
00007 #include "PmeKSpace.h"
00008 #include <math.h>
00009 #include <stdlib.h>
00010 
00011 #ifndef _NO_ALLOCA_H
00012 #include <alloca.h>
00013 #endif
00014 
00015 #include "PmeBase.inl"
00016 #include "SimParameters.h" 
00017 #include "Node.h"  
00018 #include "ComputeMoaMgr.decl.h"  
00019 
00020 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00021 
00022 static void dftmod(double *bsp_mod, double *bsp_arr, int nfft) {
00023   int j, k;
00024   double twopi, arg, sum1, sum2;
00025   double infft = 1.0/nfft;
00026 /* Computes the modulus of the discrete fourier transform of bsp_arr, */
00027 /*  storing it into bsp_mod */
00028   twopi =  2.0 * M_PI;
00029 
00030   for (k = 0; k <nfft; ++k) {
00031     sum1 = 0.;
00032     sum2 = 0.;
00033     for (j = 0; j < nfft; ++j) {
00034       arg = twopi * k * j * infft;
00035       sum1 += bsp_arr[j] * cos(arg);
00036       sum2 += bsp_arr[j] * sin(arg);
00037     }
00038     bsp_mod[k] = sum1*sum1 + sum2*sum2;
00039   }
00040 }
00041 
00042 static void compute_b_moduli(double *bm, int K, int order) {
00043   int i;
00044   double fr[3];
00045 
00046   double *M = new double[3*order];
00047   double *dM = new double[3*order];
00048   double *scratch = new double[K];
00049 
00050   fr[0]=fr[1]=fr[2]=0.0;
00051   compute_b_spline(fr,M,dM,order);  
00052   for (i=0; i<order; i++) bm[i] = M[i];
00053   for (i=order; i<K; i++) bm[i] = 0.0;
00054   dftmod(scratch, bm, K);
00055   for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
00056 
00057 
00058   delete [] scratch;
00059   delete [] dM;
00060   delete [] M;
00061 }
00062 
00063 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end) 
00064   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00065         k3_start(K3_start), k3_end(K3_end) {
00066   int K1, K2, K3, order;
00067   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00068 
00069   bm1 = new double[K1];
00070   bm2 = new double[K2];
00071   bm3 = new double[K3];
00072 
00073   exp1 = new double[K1/2 + 1];
00074   exp2 = new double[K2/2 + 1];
00075   exp3 = new double[K3/2 + 1];
00076 
00077   compute_b_moduli(bm1, K1, order);
00078   compute_b_moduli(bm2, K2, order);
00079   compute_b_moduli(bm3, K3, order);
00080 
00081 }
00082 
00083 #ifdef OPENATOM_VERSION
00084 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end, CProxy_ComputeMoaMgr moaProxy) 
00085   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00086         k3_start(K3_start), k3_end(K3_end) {
00087   int K1, K2, K3, order;
00088   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00089 
00090   SimParameters *simParams = Node::Object()->simParameters;
00091 
00092   bm1 = new double[K1];
00093   bm2 = new double[K2];
00094   bm3 = new double[K3];
00095 
00096   exp1 = new double[K1/2 + 1];
00097   exp2 = new double[K2/2 + 1];
00098   exp3 = new double[K3/2 + 1];
00099 
00100   compute_b_moduli(bm1, K1, order);
00101   compute_b_moduli(bm2, K2, order);
00102   compute_b_moduli(bm3, K3, order);
00103 
00104   if ( simParams->openatomOn ) {
00105     CkPrintf("######################################################\n");
00106     CkPrintf("Entering recvBmX loop on processor %d \n", CkMyPe() );
00107     int i;
00108     for ( i=0; i<=K1; ++i) {
00109     CkPrintf("bm1 value pre-send %d = %d \n", i, bm1[i] );
00110     }
00111     for ( i=0; i<=K1; ++i) {
00112     CkPrintf("bm1 reference pre-send %d = %d \n", i, &bm1[i] );
00113     }
00114     CkPrintf("bm1: %d \n", *bm1);    
00115     moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, order);
00116 //    qmmmcp->recvBm1(bm1, K1, order);
00117 //    qmmmcp->recvBm2(bm2, K2, order);
00118 //    qmmmcp->recvBm3(bm3, K3, order);
00119   }
00120     
00121 
00122 }
00123 #endif // OPENATOM_VERSION
00124 
00125 PmeKSpace::~PmeKSpace() {
00126   delete [] bm1;
00127   delete [] bm2;
00128   delete [] bm3;
00129   
00130   delete [] exp1;
00131   delete [] exp2;
00132   delete [] exp3;
00133 }
00134 
00135 void PmeKSpace::compute_energy_orthogonal_subset(float q_arr[], double *recips, double *partialVirial, double *partialEnergy, int k1from, int k1to){
00136 
00137     double energy = 0.0;
00138     double v0 = 0.;
00139     double v1 = 0.;
00140     double v2 = 0.;
00141     double v3 = 0.;
00142     double v4 = 0.;
00143     double v5 = 0.;
00144     
00145     int k1, k2, k3;
00146     int K1, K2, K3;
00147     K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00148     
00149     double recipx = recips[0];
00150     double recipy = recips[1];
00151     double recipz = recips[2];
00152 
00153     int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
00154         
00155     for ( k1=k1from; k1<=k1to; ++k1 ) {        
00156         double m1, m11, b1, xp1;
00157         b1 = bm1[k1];
00158         int k1_s = k1<=K1/2 ? k1 : k1-K1;
00159         m1 = k1_s*recipx;
00160         m11 = m1*m1;
00161         xp1 = i_pi_volume*exp1[abs(k1_s)];
00162         for ( k2=k2_start; k2<k2_end; ++k2 ) {
00163             double m2, m22, b1b2, xp2;
00164             b1b2 = b1*bm2[k2];
00165             int k2_s = k2<=K2/2 ? k2 : k2-K2;
00166             m2 = k2_s*recipy;
00167             m22 = m2*m2;
00168             xp2 = exp2[abs(k2_s)]*xp1;
00169 
00170             k3 = k3_start;
00171             if (k1==0 && k2==0 && k3==0) {
00172               q_arr[ind++] = 0.0;
00173               q_arr[ind++] = 0.0;
00174               ++k3;
00175             }
00176             for (; k3<k3_end; ++k3 ) {
00177               double m3, m33, xp3, msq, imsq, vir, fac;
00178               double theta3, theta, q2, qr, qc, C;
00179               theta3 = bm3[k3] *b1b2;
00180               m3 = k3*recipz;
00181               m33 = m3*m3;
00182               xp3 = exp3[k3];
00183               qr = q_arr[ind]; qc=q_arr[ind+1];
00184               q2 = 2*(qr*qr + qc*qc)*theta3;
00185               if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00186               msq = m11 + m22 + m33;
00187               imsq = 1.0/msq;
00188               C = xp2*xp3*imsq;
00189               theta = theta3*C;
00190               q_arr[ind] *= theta;
00191               q_arr[ind+1] *= theta;
00192               vir = -2*(piob+imsq);
00193               fac = q2*C;
00194               energy += fac;
00195               v0 += fac*(1.0+vir*m11);
00196               v1 += fac*vir*m1*m2;
00197               v2 += fac*vir*m1*m3;
00198               v3 += fac*(1.0+vir*m22);
00199               v4 += fac*vir*m2*m3;
00200               v5 += fac*(1.0+vir*m33);
00201               ind += 2;
00202             }
00203         }
00204     }
00205     
00206     *partialEnergy = 0.5*energy;
00207     partialVirial[0] = 0.5*v0;
00208     partialVirial[1] = 0.5*v1;
00209     partialVirial[2] = 0.5*v2;
00210     partialVirial[3] = 0.5*v3;
00211     partialVirial[4] = 0.5*v4;
00212     partialVirial[5] = 0.5*v5;
00213 }
00214 static inline void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param){
00215     CmiAssert(first==last);
00216     void **params = (void **)param;
00217     PmeKSpace *kspace = (PmeKSpace *)params[0];
00218     float *q_arr = (float *)params[1];
00219     double *recips = (double *)params[2];
00220     double *partialEnergy = (double *)params[3];
00221     double *partialVirial = (double *)params[4];
00222     int *unitDist = (int *)params[5];
00223     
00224     int i = first;
00225     int unit = unitDist[0];
00226     int remains = unitDist[1];
00227     int k1from, k1to;
00228     if(i<remains){
00229         k1from = i*(unit+1);
00230         k1to = k1from+unit;
00231     }else{
00232         k1from = remains*(unit+1)+(i-remains)*unit;
00233         k1to = k1from+unit-1;
00234     }
00235     double *pEnergy = partialEnergy+i;
00236     double *pVirial = partialVirial+i*6;
00237     kspace->compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00238 }
00239 
00240 double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00241   double energy = 0.0;
00242   double v0 = 0.;
00243   double v1 = 0.;
00244   double v2 = 0.;
00245   double v3 = 0.;
00246   double v4 = 0.;
00247   double v5 = 0.;
00248 
00249   int n;
00250   int k1, k2, k3, ind;
00251   int K1, K2, K3;
00252 
00253   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00254 
00255   i_pi_volume = 1.0/(M_PI * lattice.volume());
00256   piob = M_PI/ewald;
00257   piob *= piob;
00258 
00259 
00260     double recipx = lattice.a_r().x;
00261     double recipy = lattice.b_r().y;
00262     double recipz = lattice.c_r().z;
00263         
00264     init_exp(exp1, K1, 0, K1, recipx);
00265     init_exp(exp2, K2, k2_start, k2_end, recipy);
00266     init_exp(exp3, K3, k3_start, k3_end, recipz);
00267 
00268     double recips[] = {recipx, recipy, recipz};
00269     const int NPARTS=CmiMyNodeSize(); //this controls the granularity of loop parallelism
00270     ALLOCA(double, partialEnergy, NPARTS);
00271     ALLOCA(double, partialVirial, 6*NPARTS);
00272     int unitDist[] = {K1/NPARTS, K1%NPARTS};
00273     
00274     //parallelize the following loop using CkLoop
00275     void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist};
00276 
00277 #if     USE_CKLOOP
00278     CkLoop_Parallelize(compute_energy_orthogonal_ckloop, 6, (void *)params, NPARTS, 0, NPARTS-1);
00279 #endif
00280 /*  
00281     //The transformed loop used to compute energy
00282     int unit = K1/NPARTS;
00283     int remains = K1%NPARTS;  
00284     for(int i=0; i<NPARTS; i++){
00285         int k1from, k1to;
00286         if(i<remains){
00287             k1from = i*(unit+1);
00288             k1to = k1from+unit;
00289         }else{
00290             k1from = remains*(unit+1)+(i-remains)*unit;
00291             k1to = k1from+unit-1;
00292         }
00293         double *pEnergy = partialEnergy+i;
00294         double *pVirial = partialVirial+i*6;
00295         compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00296     }
00297 */    
00298     
00299     for(int i=0; i<NPARTS; i++){
00300         v0 += partialVirial[i*6+0];
00301         v1 += partialVirial[i*6+1];
00302         v2 += partialVirial[i*6+2];
00303         v3 += partialVirial[i*6+3];
00304         v4 += partialVirial[i*6+4];
00305         v5 += partialVirial[i*6+5];
00306         energy += partialEnergy[i];
00307     }
00308     
00309     virial[0] = v0;
00310     virial[1] = v1;
00311     virial[2] = v2;
00312     virial[3] = v3;
00313     virial[4] = v4;
00314     virial[5] = v5;
00315     return energy;
00316 }
00317 
00318 double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00319   double energy = 0.0;
00320   double v0 = 0.;
00321   double v1 = 0.;
00322   double v2 = 0.;
00323   double v3 = 0.;
00324   double v4 = 0.;
00325   double v5 = 0.;
00326 
00327   int n;
00328   int k1, k2, k3, ind;
00329   int K1, K2, K3;
00330 
00331   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00332 
00333   i_pi_volume = 1.0/(M_PI * lattice.volume());
00334   piob = M_PI/ewald;
00335   piob *= piob;
00336 
00337   if ( lattice.orthogonal() ) {
00338   // if ( 0 ) { // JCP FOR TESTING
00339     //This branch is the usual call path.
00340 #if     USE_CKLOOP
00341     int useCkLoop = Node::Object()->simParameters->useCkLoop;
00342     if(useCkLoop>=CKLOOP_CTRL_PME_KSPACE){
00343         return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial);
00344     }
00345 #endif    
00346     double recipx = lattice.a_r().x;
00347     double recipy = lattice.b_r().y;
00348     double recipz = lattice.c_r().z;
00349         
00350     init_exp(exp1, K1, 0, K1, recipx);
00351     init_exp(exp2, K2, k2_start, k2_end, recipy);
00352     init_exp(exp3, K3, k3_start, k3_end, recipz);
00353 
00354     ind = 0;
00355     for ( k1=0; k1<K1; ++k1 ) {
00356       double m1, m11, b1, xp1;
00357       b1 = bm1[k1];
00358       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00359       m1 = k1_s*recipx;
00360       m11 = m1*m1;
00361       xp1 = i_pi_volume*exp1[abs(k1_s)];
00362       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00363         double m2, m22, b1b2, xp2;
00364         b1b2 = b1*bm2[k2];
00365         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00366         m2 = k2_s*recipy;
00367         m22 = m2*m2;
00368         xp2 = exp2[abs(k2_s)]*xp1;
00369         k3 = k3_start;
00370         if ( k1==0 && k2==0 && k3==0 ) {
00371           q_arr[ind++] = 0.0;
00372           q_arr[ind++] = 0.0;
00373           ++k3;
00374         }
00375         for ( ; k3<k3_end; ++k3 ) {
00376           double m3, m33, xp3, msq, imsq, vir, fac;
00377           double theta3, theta, q2, qr, qc, C;
00378           theta3 = bm3[k3] *b1b2;
00379           m3 = k3*recipz;
00380           m33 = m3*m3;
00381           xp3 = exp3[k3];
00382           qr = q_arr[ind]; qc=q_arr[ind+1];
00383           q2 = 2*(qr*qr + qc*qc)*theta3;
00384           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00385           msq = m11 + m22 + m33;
00386           imsq = 1.0/msq;
00387           C = xp2*xp3*imsq;
00388           theta = theta3*C;
00389           q_arr[ind] *= theta;
00390           q_arr[ind+1] *= theta;
00391           vir = -2*(piob+imsq);
00392           fac = q2*C;
00393           energy += fac;
00394           v0 += fac*(1.0+vir*m11);
00395           v1 += fac*vir*m1*m2;
00396           v2 += fac*vir*m1*m3;
00397           v3 += fac*(1.0+vir*m22);
00398           v4 += fac*vir*m2*m3;
00399           v5 += fac*(1.0+vir*m33);
00400           ind += 2;
00401         }
00402       }
00403     }
00404     
00405   } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) {
00406   // } else if ( 0 ) { // JCP FOR TESTING
00407     Vector recip1 = lattice.a_r();
00408     Vector recip2 = lattice.b_r();
00409     Vector recip3 = lattice.c_r();
00410     double recip3_x = recip3.x;
00411     double recip3_y = recip3.y;
00412     double recip3_z = recip3.z;
00413     init_exp(exp3, K3, k3_start, k3_end, recip3.length());
00414 
00415     ind = 0;
00416     for ( k1=0; k1<K1; ++k1 ) {
00417       double b1; Vector m1;
00418       b1 = bm1[k1];
00419       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00420       m1 = k1_s*recip1;
00421       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00422       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00423         double xp2, b1b2, m2_x, m2_y, m2_z;
00424         b1b2 = b1*bm2[k2];
00425         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00426         m2_x = m1.x + k2_s*recip2.x;
00427         m2_y = m1.y + k2_s*recip2.y;
00428         m2_z = m1.z + k2_s*recip2.z;
00429         // xp2 = exp2[abs(k2_s)]*xp1;
00430         xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
00431         k3 = k3_start;
00432         if ( k1==0 && k2==0 && k3==0 ) {
00433           q_arr[ind++] = 0.0;
00434           q_arr[ind++] = 0.0;
00435           ++k3;
00436         }
00437         for ( ; k3<k3_end; ++k3 ) {
00438           double xp3, msq, imsq, vir, fac;
00439           double theta3, theta, q2, qr, qc, C;
00440           double m_x, m_y, m_z;
00441           theta3 = bm3[k3] *b1b2;
00442           m_x = m2_x + k3*recip3_x;
00443           m_y = m2_y + k3*recip3_y;
00444           m_z = m2_z + k3*recip3_z;
00445           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00446           xp3 = exp3[k3];
00447           qr = q_arr[ind]; qc=q_arr[ind+1];
00448           q2 = 2*(qr*qr + qc*qc)*theta3;
00449           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00450           imsq = 1.0/msq;
00451           C = xp2*xp3*imsq;
00452           theta = theta3*C;
00453           q_arr[ind] *= theta;
00454           q_arr[ind+1] *= theta;
00455           vir = -2*(piob+imsq);
00456           fac = q2*C;
00457           energy += fac;
00458           v0 += fac*(1.0+vir*m_x*m_x);
00459           v1 += fac*vir*m_x*m_y;
00460           v2 += fac*vir*m_x*m_z;
00461           v3 += fac*(1.0+vir*m_y*m_y);
00462           v4 += fac*vir*m_y*m_z;
00463           v5 += fac*(1.0+vir*m_z*m_z);
00464           ind += 2;
00465         }
00466       }
00467     }
00468 
00469   } else {
00470     Vector recip1 = lattice.a_r();
00471     Vector recip2 = lattice.b_r();
00472     Vector recip3 = lattice.c_r();
00473     double recip3_x = recip3.x;
00474     double recip3_y = recip3.y;
00475     double recip3_z = recip3.z;
00476 
00477     ind = 0;
00478     for ( k1=0; k1<K1; ++k1 ) {
00479       double b1; Vector m1;
00480       b1 = bm1[k1];
00481       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00482       m1 = k1_s*recip1;
00483       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00484       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00485         double b1b2, m2_x, m2_y, m2_z;
00486         b1b2 = b1*bm2[k2];
00487         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00488         m2_x = m1.x + k2_s*recip2.x;
00489         m2_y = m1.y + k2_s*recip2.y;
00490         m2_z = m1.z + k2_s*recip2.z;
00491         // xp2 = exp2[abs(k2_s)]*xp1;
00492         k3 = k3_start;
00493         if ( k1==0 && k2==0 && k3==0 ) {
00494           q_arr[ind++] = 0.0;
00495           q_arr[ind++] = 0.0;
00496           ++k3;
00497         }
00498         for ( ; k3<k3_end; ++k3 ) {
00499           double xp3, msq, imsq, vir, fac;
00500           double theta3, theta, q2, qr, qc, C;
00501           double m_x, m_y, m_z;
00502           theta3 = bm3[k3] *b1b2;
00503           m_x = m2_x + k3*recip3_x;
00504           m_y = m2_y + k3*recip3_y;
00505           m_z = m2_z + k3*recip3_z;
00506           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00507           // xp3 = exp3[k3];
00508           xp3 = i_pi_volume*exp(-piob*msq);
00509           qr = q_arr[ind]; qc=q_arr[ind+1];
00510           q2 = 2*(qr*qr + qc*qc)*theta3;
00511           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00512           imsq = 1.0/msq;
00513           C = xp3*imsq;
00514           theta = theta3*C;
00515           q_arr[ind] *= theta;
00516           q_arr[ind+1] *= theta;
00517           vir = -2*(piob+imsq);
00518           fac = q2*C;
00519           energy += fac;
00520           v0 += fac*(1.0+vir*m_x*m_x);
00521           v1 += fac*vir*m_x*m_y;
00522           v2 += fac*vir*m_x*m_z;
00523           v3 += fac*(1.0+vir*m_y*m_y);
00524           v4 += fac*vir*m_y*m_z;
00525           v5 += fac*(1.0+vir*m_z*m_z);
00526           ind += 2;
00527         }
00528       }
00529     }
00530 
00531   }
00532 
00533   virial[0] = 0.5 * v0;
00534   virial[1] = 0.5 * v1;
00535   virial[2] = 0.5 * v2;
00536   virial[3] = 0.5 * v3;
00537   virial[4] = 0.5 * v4;
00538   virial[5] = 0.5 * v5;
00539   return 0.5*energy;
00540 }
00541 
00542 
00543 void PmeKSpace::init_exp(double *xp, int K, int k_start, int k_end, double recip) {
00544   int i;
00545   double fac;
00546   fac = -piob*recip*recip;
00547   int i_start = k_start;
00548   int i_end = k_end;
00549   if ( k_start > K/2 ) {
00550     i_start = K - k_end + 1;
00551     i_end = K - k_start + 1;
00552   } else if ( k_end > K/2 ) {
00553     i_end = K/2 + 1;
00554     i_start = K - k_end + 1;
00555     if ( k_start < i_start ) i_start = k_start;
00556   }
00557 
00558   for (i=i_start; i < i_end; i++)
00559     xp[i] = exp(fac*i*i);
00560 } 

Generated on Thu May 23 04:07:18 2013 for NAMD by  doxygen 1.3.9.1