Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

PmeKSpace.C

Go to the documentation of this file.
00001 
00007 #include "PmeKSpace.h"
00008 #include <math.h>
00009 #include <stdlib.h>
00010 
00011 #ifndef _NO_ALLOCA_H
00012 #include <alloca.h>
00013 #endif
00014 
00015 #include "SimParameters.h" 
00016 #include "Node.h"  
00017 #include "ComputeMoaMgr.decl.h"  
00018 
00019 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00020 
00021 static void dftmod(double *bsp_mod, double *bsp_arr, int nfft) {
00022   int j, k;
00023   double twopi, arg, sum1, sum2;
00024   double infft = 1.0/nfft;
00025 /* Computes the modulus of the discrete fourier transform of bsp_arr, */
00026 /*  storing it into bsp_mod */
00027   twopi =  2.0 * M_PI;
00028 
00029   for (k = 0; k <nfft; ++k) {
00030     sum1 = 0.;
00031     sum2 = 0.;
00032     for (j = 0; j < nfft; ++j) {
00033       arg = twopi * k * j * infft;
00034       sum1 += bsp_arr[j] * cos(arg);
00035       sum2 += bsp_arr[j] * sin(arg);
00036     }
00037     bsp_mod[k] = sum1*sum1 + sum2*sum2;
00038   }
00039 }
00040 
00041 static void compute_b_moduli(double *bm, int K, int order) {
00042   int i;
00043   double fr[3];
00044 
00045   double *M = new double[3*order];
00046   double *dM = new double[3*order];
00047   double *scratch = new double[K];
00048 
00049   fr[0]=fr[1]=fr[2]=0.0;
00050   compute_b_spline(fr,M,dM,order);  
00051   for (i=0; i<order; i++) bm[i] = M[i];
00052   for (i=order; i<K; i++) bm[i] = 0.0;
00053   dftmod(scratch, bm, K);
00054   for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
00055 
00056 
00057   delete [] scratch;
00058   delete [] dM;
00059   delete [] M;
00060 }
00061 
00062 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end) 
00063   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00064         k3_start(K3_start), k3_end(K3_end) {
00065   int K1, K2, K3, order;
00066   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00067 
00068   bm1 = new double[K1];
00069   bm2 = new double[K2];
00070   bm3 = new double[K3];
00071 
00072   exp1 = new double[K1/2 + 1];
00073   exp2 = new double[K2/2 + 1];
00074   exp3 = new double[K3/2 + 1];
00075 
00076   compute_b_moduli(bm1, K1, order);
00077   compute_b_moduli(bm2, K2, order);
00078   compute_b_moduli(bm3, K3, order);
00079 
00080 }
00081 
00082 #ifdef OPENATOM_VERSION
00083 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end, CProxy_ComputeMoaMgr moaProxy) 
00084   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00085         k3_start(K3_start), k3_end(K3_end) {
00086   int K1, K2, K3, order;
00087   K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00088 
00089   SimParameters *simParams = Node::Object()->simParameters;
00090 
00091   bm1 = new double[K1];
00092   bm2 = new double[K2];
00093   bm3 = new double[K3];
00094 
00095   exp1 = new double[K1/2 + 1];
00096   exp2 = new double[K2/2 + 1];
00097   exp3 = new double[K3/2 + 1];
00098 
00099   compute_b_moduli(bm1, K1, order);
00100   compute_b_moduli(bm2, K2, order);
00101   compute_b_moduli(bm3, K3, order);
00102 
00103   if ( simParams->openatomOn ) {
00104     CkPrintf("######################################################\n");
00105     CkPrintf("Entering recvBmX loop on processor %d \n", CkMyPe() );
00106     int i;
00107     for ( i=0; i<=K1; ++i) {
00108     CkPrintf("bm1 value pre-send %d = %d \n", i, bm1[i] );
00109     }
00110     for ( i=0; i<=K1; ++i) {
00111     CkPrintf("bm1 reference pre-send %d = %d \n", i, &bm1[i] );
00112     }
00113     CkPrintf("bm1: %d \n", *bm1);    
00114     moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, order);
00115 //    qmmmcp->recvBm1(bm1, K1, order);
00116 //    qmmmcp->recvBm2(bm2, K2, order);
00117 //    qmmmcp->recvBm3(bm3, K3, order);
00118   }
00119     
00120 
00121 }
00122 #endif // OPENATOM_VERSION
00123 
00124 PmeKSpace::~PmeKSpace() {
00125   delete [] bm1;
00126   delete [] bm2;
00127   delete [] bm3;
00128   
00129   delete [] exp1;
00130   delete [] exp2;
00131   delete [] exp3;
00132 }
00133 
00134 void PmeKSpace::compute_energy_orthogonal_subset(float q_arr[], double *recips, double *partialVirial, double *partialEnergy, int k1from, int k1to){
00135 
00136     double energy = 0.0;
00137     double v0 = 0.;
00138     double v1 = 0.;
00139     double v2 = 0.;
00140     double v3 = 0.;
00141     double v4 = 0.;
00142     double v5 = 0.;
00143     
00144     int k1, k2, k3;
00145     int K1, K2, K3;
00146     K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00147     
00148     double recipx = recips[0];
00149     double recipy = recips[1];
00150     double recipz = recips[2];
00151 
00152     int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
00153         
00154     for ( k1=k1from; k1<=k1to; ++k1 ) {        
00155         double m1, m11, b1, xp1;
00156         b1 = bm1[k1];
00157         int k1_s = k1<=K1/2 ? k1 : k1-K1;
00158         m1 = k1_s*recipx;
00159         m11 = m1*m1;
00160         xp1 = i_pi_volume*exp1[abs(k1_s)];
00161         for ( k2=k2_start; k2<k2_end; ++k2 ) {
00162             double m2, m22, b1b2, xp2;
00163             b1b2 = b1*bm2[k2];
00164             int k2_s = k2<=K2/2 ? k2 : k2-K2;
00165             m2 = k2_s*recipy;
00166             m22 = m2*m2;
00167             xp2 = exp2[abs(k2_s)]*xp1;
00168 
00169             k3 = k3_start;
00170             if (k1==0 && k2==0 && k3==0) {
00171               q_arr[ind++] = 0.0;
00172               q_arr[ind++] = 0.0;
00173               ++k3;
00174             }
00175             for (; k3<k3_end; ++k3 ) {
00176               double m3, m33, xp3, msq, imsq, vir, fac;
00177               double theta3, theta, q2, qr, qc, C;
00178               theta3 = bm3[k3] *b1b2;
00179               m3 = k3*recipz;
00180               m33 = m3*m3;
00181               xp3 = exp3[k3];
00182               qr = q_arr[ind]; qc=q_arr[ind+1];
00183               q2 = 2*(qr*qr + qc*qc)*theta3;
00184               if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00185               msq = m11 + m22 + m33;
00186               imsq = 1.0/msq;
00187               C = xp2*xp3*imsq;
00188               theta = theta3*C;
00189               q_arr[ind] *= theta;
00190               q_arr[ind+1] *= theta;
00191               vir = -2*(piob+imsq);
00192               fac = q2*C;
00193               energy += fac;
00194               v0 += fac*(1.0+vir*m11);
00195               v1 += fac*vir*m1*m2;
00196               v2 += fac*vir*m1*m3;
00197               v3 += fac*(1.0+vir*m22);
00198               v4 += fac*vir*m2*m3;
00199               v5 += fac*(1.0+vir*m33);
00200               ind += 2;
00201             }
00202         }
00203     }
00204     
00205     *partialEnergy = 0.5*energy;
00206     partialVirial[0] = 0.5*v0;
00207     partialVirial[1] = 0.5*v1;
00208     partialVirial[2] = 0.5*v2;
00209     partialVirial[3] = 0.5*v3;
00210     partialVirial[4] = 0.5*v4;
00211     partialVirial[5] = 0.5*v5;
00212 }
00213 static inline void compute_energy_orthogonal_nodehelper(int first, int last, void *result, int paraNum, void *param){
00214     CmiAssert(first==last);
00215     void **params = (void **)param;
00216     PmeKSpace *kspace = (PmeKSpace *)params[0];
00217     float *q_arr = (float *)params[1];
00218     double *recips = (double *)params[2];
00219     double *partialEnergy = (double *)params[3];
00220     double *partialVirial = (double *)params[4];
00221     int *unitDist = (int *)params[5];
00222     
00223     int i = first;
00224     int unit = unitDist[0];
00225     int remains = unitDist[1];
00226     int k1from, k1to;
00227     if(i<remains){
00228         k1from = i*(unit+1);
00229         k1to = k1from+unit;
00230     }else{
00231         k1from = remains*(unit+1)+(i-remains)*unit;
00232         k1to = k1from+unit-1;
00233     }
00234     double *pEnergy = partialEnergy+i;
00235     double *pVirial = partialVirial+i*6;
00236     kspace->compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00237 }
00238 
00239 double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00240   double energy = 0.0;
00241   double v0 = 0.;
00242   double v1 = 0.;
00243   double v2 = 0.;
00244   double v3 = 0.;
00245   double v4 = 0.;
00246   double v5 = 0.;
00247 
00248   int n;
00249   int k1, k2, k3, ind;
00250   int K1, K2, K3;
00251 
00252   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00253 
00254   i_pi_volume = 1.0/(M_PI * lattice.volume());
00255   piob = M_PI/ewald;
00256   piob *= piob;
00257 
00258 
00259     double recipx = lattice.a_r().x;
00260     double recipy = lattice.b_r().y;
00261     double recipz = lattice.c_r().z;
00262         
00263     init_exp(exp1, K1, 0, K1, recipx);
00264     init_exp(exp2, K2, k2_start, k2_end, recipy);
00265     init_exp(exp3, K3, k3_start, k3_end, recipz);
00266 
00267     double recips[] = {recipx, recipy, recipz};
00268     const int NPARTS=CmiMyNodeSize(); //this controls the granularity of loop parallelism
00269     ALLOCA(double, partialEnergy, NPARTS);
00270     ALLOCA(double, partialVirial, 6*NPARTS);
00271     int unitDist[] = {K1/NPARTS, K1%NPARTS};
00272     
00273     //parallelize the following loop using NodeHelper
00274     void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist};
00275 
00276 #if     USE_NODEHELPER
00277     CProxy_FuncNodeHelper nodeHelper = CkpvAccess(BOCclass_group).nodeHelper;
00278     NodeHelper_Parallelize(nodeHelper, compute_energy_orthogonal_nodehelper, 6, (void *)params, NPARTS, 0, NPARTS-1);
00279 #endif
00280 /*  
00281     //The transformed loop used to compute energy
00282     int unit = K1/NPARTS;
00283     int remains = K1%NPARTS;  
00284     for(int i=0; i<NPARTS; i++){
00285         int k1from, k1to;
00286         if(i<remains){
00287             k1from = i*(unit+1);
00288             k1to = k1from+unit;
00289         }else{
00290             k1from = remains*(unit+1)+(i-remains)*unit;
00291             k1to = k1from+unit-1;
00292         }
00293         double *pEnergy = partialEnergy+i;
00294         double *pVirial = partialVirial+i*6;
00295         compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00296     }
00297 */    
00298     
00299     for(int i=0; i<NPARTS; i++){
00300         v0 += partialVirial[i*6+0];
00301         v1 += partialVirial[i*6+1];
00302         v2 += partialVirial[i*6+2];
00303         v3 += partialVirial[i*6+3];
00304         v4 += partialVirial[i*6+4];
00305         v5 += partialVirial[i*6+5];
00306         energy += partialEnergy[i];
00307     }
00308     
00309     virial[0] = v0;
00310     virial[1] = v1;
00311     virial[2] = v2;
00312     virial[3] = v3;
00313     virial[4] = v4;
00314     virial[5] = v5;
00315     return energy;
00316 }
00317 
00318 double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00319   double energy = 0.0;
00320   double v0 = 0.;
00321   double v1 = 0.;
00322   double v2 = 0.;
00323   double v3 = 0.;
00324   double v4 = 0.;
00325   double v5 = 0.;
00326 
00327   int n;
00328   int k1, k2, k3, ind;
00329   int K1, K2, K3;
00330 
00331   K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00332 
00333   i_pi_volume = 1.0/(M_PI * lattice.volume());
00334   piob = M_PI/ewald;
00335   piob *= piob;
00336 
00337   if ( lattice.orthogonal() ) {
00338   // if ( 0 ) { // JCP FOR TESTING
00339     //This branch is the usual call path.
00340 #if     USE_NODEHELPER
00341     int useNodeHelper = Node::Object()->simParameters->useNodeHelper;
00342     if(useNodeHelper>=NDH_CTRL_PME_KSPACE){
00343         return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial);
00344     }
00345 #endif    
00346     double recipx = lattice.a_r().x;
00347     double recipy = lattice.b_r().y;
00348     double recipz = lattice.c_r().z;
00349         
00350     init_exp(exp1, K1, 0, K1, recipx);
00351     init_exp(exp2, K2, k2_start, k2_end, recipy);
00352     init_exp(exp3, K3, k3_start, k3_end, recipz);
00353 
00354     ind = 0;
00355     for ( k1=0; k1<K1; ++k1 ) {
00356       double m1, m11, b1, xp1;
00357       b1 = bm1[k1];
00358       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00359       m1 = k1_s*recipx;
00360       m11 = m1*m1;
00361       xp1 = i_pi_volume*exp1[abs(k1_s)];
00362       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00363         double m2, m22, b1b2, xp2;
00364         b1b2 = b1*bm2[k2];
00365         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00366         m2 = k2_s*recipy;
00367         m22 = m2*m2;
00368         xp2 = exp2[abs(k2_s)]*xp1;
00369         k3 = k3_start;
00370         if ( k1==0 && k2==0 && k3==0 ) {
00371           q_arr[ind++] = 0.0;
00372           q_arr[ind++] = 0.0;
00373           ++k3;
00374         }
00375         for ( ; k3<k3_end; ++k3 ) {
00376           double m3, m33, xp3, msq, imsq, vir, fac;
00377           double theta3, theta, q2, qr, qc, C;
00378           theta3 = bm3[k3] *b1b2;
00379           m3 = k3*recipz;
00380           m33 = m3*m3;
00381           xp3 = exp3[k3];
00382           qr = q_arr[ind]; qc=q_arr[ind+1];
00383           q2 = 2*(qr*qr + qc*qc)*theta3;
00384           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00385           msq = m11 + m22 + m33;
00386           imsq = 1.0/msq;
00387           C = xp2*xp3*imsq;
00388           theta = theta3*C;
00389           q_arr[ind] *= theta;
00390           q_arr[ind+1] *= theta;
00391           vir = -2*(piob+imsq);
00392           fac = q2*C;
00393           energy += fac;
00394           v0 += fac*(1.0+vir*m11);
00395           v1 += fac*vir*m1*m2;
00396           v2 += fac*vir*m1*m3;
00397           v3 += fac*(1.0+vir*m22);
00398           v4 += fac*vir*m2*m3;
00399           v5 += fac*(1.0+vir*m33);
00400           ind += 2;
00401         }
00402       }
00403     }
00404     
00405   } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) {
00406   // } else if ( 0 ) { // JCP FOR TESTING
00407     Vector recip1 = lattice.a_r();
00408     Vector recip2 = lattice.b_r();
00409     Vector recip3 = lattice.c_r();
00410     double recip3_x = recip3.x;
00411     double recip3_y = recip3.y;
00412     double recip3_z = recip3.z;
00413     init_exp(exp3, K3, k3_start, k3_end, recip3.length());
00414 
00415     ind = 0;
00416     for ( k1=0; k1<K1; ++k1 ) {
00417       double b1; Vector m1;
00418       b1 = bm1[k1];
00419       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00420       m1 = k1_s*recip1;
00421       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00422       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00423         double xp2, b1b2, m2_x, m2_y, m2_z;
00424         b1b2 = b1*bm2[k2];
00425         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00426         m2_x = m1.x + k2_s*recip2.x;
00427         m2_y = m1.y + k2_s*recip2.y;
00428         m2_z = m1.z + k2_s*recip2.z;
00429         // xp2 = exp2[abs(k2_s)]*xp1;
00430         xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
00431         k3 = k3_start;
00432         if ( k1==0 && k2==0 && k3==0 ) {
00433           q_arr[ind++] = 0.0;
00434           q_arr[ind++] = 0.0;
00435           ++k3;
00436         }
00437         for ( ; k3<k3_end; ++k3 ) {
00438           double xp3, msq, imsq, vir, fac;
00439           double theta3, theta, q2, qr, qc, C;
00440           double m_x, m_y, m_z;
00441           theta3 = bm3[k3] *b1b2;
00442           m_x = m2_x + k3*recip3_x;
00443           m_y = m2_y + k3*recip3_y;
00444           m_z = m2_z + k3*recip3_z;
00445           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00446           xp3 = exp3[k3];
00447           qr = q_arr[ind]; qc=q_arr[ind+1];
00448           q2 = 2*(qr*qr + qc*qc)*theta3;
00449           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00450           imsq = 1.0/msq;
00451           C = xp2*xp3*imsq;
00452           theta = theta3*C;
00453           q_arr[ind] *= theta;
00454           q_arr[ind+1] *= theta;
00455           vir = -2*(piob+imsq);
00456           fac = q2*C;
00457           energy += fac;
00458           v0 += fac*(1.0+vir*m_x*m_x);
00459           v1 += fac*vir*m_x*m_y;
00460           v2 += fac*vir*m_x*m_z;
00461           v3 += fac*(1.0+vir*m_y*m_y);
00462           v4 += fac*vir*m_y*m_z;
00463           v5 += fac*(1.0+vir*m_z*m_z);
00464           ind += 2;
00465         }
00466       }
00467     }
00468 
00469   } else {
00470     Vector recip1 = lattice.a_r();
00471     Vector recip2 = lattice.b_r();
00472     Vector recip3 = lattice.c_r();
00473     double recip3_x = recip3.x;
00474     double recip3_y = recip3.y;
00475     double recip3_z = recip3.z;
00476 
00477     ind = 0;
00478     for ( k1=0; k1<K1; ++k1 ) {
00479       double b1; Vector m1;
00480       b1 = bm1[k1];
00481       int k1_s = k1<=K1/2 ? k1 : k1-K1;
00482       m1 = k1_s*recip1;
00483       // xp1 = i_pi_volume*exp1[abs(k1_s)];
00484       for ( k2=k2_start; k2<k2_end; ++k2 ) {
00485         double b1b2, m2_x, m2_y, m2_z;
00486         b1b2 = b1*bm2[k2];
00487         int k2_s = k2<=K2/2 ? k2 : k2-K2;
00488         m2_x = m1.x + k2_s*recip2.x;
00489         m2_y = m1.y + k2_s*recip2.y;
00490         m2_z = m1.z + k2_s*recip2.z;
00491         // xp2 = exp2[abs(k2_s)]*xp1;
00492         k3 = k3_start;
00493         if ( k1==0 && k2==0 && k3==0 ) {
00494           q_arr[ind++] = 0.0;
00495           q_arr[ind++] = 0.0;
00496           ++k3;
00497         }
00498         for ( ; k3<k3_end; ++k3 ) {
00499           double xp3, msq, imsq, vir, fac;
00500           double theta3, theta, q2, qr, qc, C;
00501           double m_x, m_y, m_z;
00502           theta3 = bm3[k3] *b1b2;
00503           m_x = m2_x + k3*recip3_x;
00504           m_y = m2_y + k3*recip3_y;
00505           m_z = m2_z + k3*recip3_z;
00506           msq = m_x*m_x + m_y*m_y + m_z*m_z;
00507           // xp3 = exp3[k3];
00508           xp3 = i_pi_volume*exp(-piob*msq);
00509           qr = q_arr[ind]; qc=q_arr[ind+1];
00510           q2 = 2*(qr*qr + qc*qc)*theta3;
00511           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00512           imsq = 1.0/msq;
00513           C = xp3*imsq;
00514           theta = theta3*C;
00515           q_arr[ind] *= theta;
00516           q_arr[ind+1] *= theta;
00517           vir = -2*(piob+imsq);
00518           fac = q2*C;
00519           energy += fac;
00520           v0 += fac*(1.0+vir*m_x*m_x);
00521           v1 += fac*vir*m_x*m_y;
00522           v2 += fac*vir*m_x*m_z;
00523           v3 += fac*(1.0+vir*m_y*m_y);
00524           v4 += fac*vir*m_y*m_z;
00525           v5 += fac*(1.0+vir*m_z*m_z);
00526           ind += 2;
00527         }
00528       }
00529     }
00530 
00531   }
00532 
00533   virial[0] = 0.5 * v0;
00534   virial[1] = 0.5 * v1;
00535   virial[2] = 0.5 * v2;
00536   virial[3] = 0.5 * v3;
00537   virial[4] = 0.5 * v4;
00538   virial[5] = 0.5 * v5;
00539   return 0.5*energy;
00540 }
00541 
00542 
00543 void PmeKSpace::init_exp(double *xp, int K, int k_start, int k_end, double recip) {
00544   int i;
00545   double fac;
00546   fac = -piob*recip*recip;
00547   int i_start = k_start;
00548   int i_end = k_end;
00549   if ( k_start > K/2 ) {
00550     i_start = K - k_end + 1;
00551     i_end = K - k_start + 1;
00552   } else if ( k_end > K/2 ) {
00553     i_end = K/2 + 1;
00554     i_start = K - k_end + 1;
00555     if ( k_start < i_start ) i_start = k_start;
00556   }
00557 
00558   for (i=i_start; i < i_end; i++)
00559     xp[i] = exp(fac*i*i);
00560 } 

Generated on Fri May 25 04:07:16 2012 for NAMD by  doxygen 1.3.9.1