00001
00007 #include "PmeKSpace.h"
00008 #include <math.h>
00009 #include <stdlib.h>
00010
00011 #ifndef _NO_ALLOCA_H
00012 #include <alloca.h>
00013 #endif
00014
00015 #include "PmeBase.inl"
00016 #include "SimParameters.h"
00017 #include "Node.h"
00018 #include "ComputeMoaMgr.decl.h"
00019
00020 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00021
00022 static void dftmod(double *bsp_mod, double *bsp_arr, int nfft) {
00023 int j, k;
00024 double twopi, arg, sum1, sum2;
00025 double infft = 1.0/nfft;
00026
00027
00028 twopi = 2.0 * M_PI;
00029
00030 for (k = 0; k <nfft; ++k) {
00031 sum1 = 0.;
00032 sum2 = 0.;
00033 for (j = 0; j < nfft; ++j) {
00034 arg = twopi * k * j * infft;
00035 sum1 += bsp_arr[j] * cos(arg);
00036 sum2 += bsp_arr[j] * sin(arg);
00037 }
00038 bsp_mod[k] = sum1*sum1 + sum2*sum2;
00039 }
00040 }
00041
00042 static void compute_b_moduli(double *bm, int K, int order) {
00043 int i;
00044 double fr[3];
00045
00046 double *M = new double[3*order];
00047 double *dM = new double[3*order];
00048 double *scratch = new double[K];
00049
00050 fr[0]=fr[1]=fr[2]=0.0;
00051 compute_b_spline(fr,M,dM,order);
00052 for (i=0; i<order; i++) bm[i] = M[i];
00053 for (i=order; i<K; i++) bm[i] = 0.0;
00054 dftmod(scratch, bm, K);
00055 for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
00056
00057
00058 delete [] scratch;
00059 delete [] dM;
00060 delete [] M;
00061 }
00062
00063 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end)
00064 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00065 k3_start(K3_start), k3_end(K3_end) {
00066 int K1, K2, K3, order;
00067 K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00068
00069 bm1 = new double[K1];
00070 bm2 = new double[K2];
00071 bm3 = new double[K3];
00072
00073 exp1 = new double[K1/2 + 1];
00074 exp2 = new double[K2/2 + 1];
00075 exp3 = new double[K3/2 + 1];
00076
00077 compute_b_moduli(bm1, K1, order);
00078 compute_b_moduli(bm2, K2, order);
00079 compute_b_moduli(bm3, K3, order);
00080
00081 }
00082
00083 #ifdef OPENATOM_VERSION
00084 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end, CProxy_ComputeMoaMgr moaProxy)
00085 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00086 k3_start(K3_start), k3_end(K3_end) {
00087 int K1, K2, K3, order;
00088 K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00089
00090 SimParameters *simParams = Node::Object()->simParameters;
00091
00092 bm1 = new double[K1];
00093 bm2 = new double[K2];
00094 bm3 = new double[K3];
00095
00096 exp1 = new double[K1/2 + 1];
00097 exp2 = new double[K2/2 + 1];
00098 exp3 = new double[K3/2 + 1];
00099
00100 compute_b_moduli(bm1, K1, order);
00101 compute_b_moduli(bm2, K2, order);
00102 compute_b_moduli(bm3, K3, order);
00103
00104 if ( simParams->openatomOn ) {
00105 CkPrintf("######################################################\n");
00106 CkPrintf("Entering recvBmX loop on processor %d \n", CkMyPe() );
00107 int i;
00108 for ( i=0; i<=K1; ++i) {
00109 CkPrintf("bm1 value pre-send %d = %d \n", i, bm1[i] );
00110 }
00111 for ( i=0; i<=K1; ++i) {
00112 CkPrintf("bm1 reference pre-send %d = %d \n", i, &bm1[i] );
00113 }
00114 CkPrintf("bm1: %d \n", *bm1);
00115 moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, order);
00116
00117
00118
00119 }
00120
00121
00122 }
00123 #endif // OPENATOM_VERSION
00124
00125 PmeKSpace::~PmeKSpace() {
00126 delete [] bm1;
00127 delete [] bm2;
00128 delete [] bm3;
00129
00130 delete [] exp1;
00131 delete [] exp2;
00132 delete [] exp3;
00133 }
00134
00135 void PmeKSpace::compute_energy_orthogonal_subset(float q_arr[], double *recips, double *partialVirial, double *partialEnergy, int k1from, int k1to){
00136
00137 double energy = 0.0;
00138 double v0 = 0.;
00139 double v1 = 0.;
00140 double v2 = 0.;
00141 double v3 = 0.;
00142 double v4 = 0.;
00143 double v5 = 0.;
00144
00145 int k1, k2, k3;
00146 int K1, K2, K3;
00147 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00148
00149 double recipx = recips[0];
00150 double recipy = recips[1];
00151 double recipz = recips[2];
00152
00153 int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
00154
00155 for ( k1=k1from; k1<=k1to; ++k1 ) {
00156 double m1, m11, b1, xp1;
00157 b1 = bm1[k1];
00158 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00159 m1 = k1_s*recipx;
00160 m11 = m1*m1;
00161 xp1 = i_pi_volume*exp1[abs(k1_s)];
00162 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00163 double m2, m22, b1b2, xp2;
00164 b1b2 = b1*bm2[k2];
00165 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00166 m2 = k2_s*recipy;
00167 m22 = m2*m2;
00168 xp2 = exp2[abs(k2_s)]*xp1;
00169
00170 k3 = k3_start;
00171 if (k1==0 && k2==0 && k3==0) {
00172 q_arr[ind++] = 0.0;
00173 q_arr[ind++] = 0.0;
00174 ++k3;
00175 }
00176 for (; k3<k3_end; ++k3 ) {
00177 double m3, m33, xp3, msq, imsq, vir, fac;
00178 double theta3, theta, q2, qr, qc, C;
00179 theta3 = bm3[k3] *b1b2;
00180 m3 = k3*recipz;
00181 m33 = m3*m3;
00182 xp3 = exp3[k3];
00183 qr = q_arr[ind]; qc=q_arr[ind+1];
00184 q2 = 2*(qr*qr + qc*qc)*theta3;
00185 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00186 msq = m11 + m22 + m33;
00187 imsq = 1.0/msq;
00188 C = xp2*xp3*imsq;
00189 theta = theta3*C;
00190 q_arr[ind] *= theta;
00191 q_arr[ind+1] *= theta;
00192 vir = -2*(piob+imsq);
00193 fac = q2*C;
00194 energy += fac;
00195 v0 += fac*(1.0+vir*m11);
00196 v1 += fac*vir*m1*m2;
00197 v2 += fac*vir*m1*m3;
00198 v3 += fac*(1.0+vir*m22);
00199 v4 += fac*vir*m2*m3;
00200 v5 += fac*(1.0+vir*m33);
00201 ind += 2;
00202 }
00203 }
00204 }
00205
00206 *partialEnergy = 0.5*energy;
00207 partialVirial[0] = 0.5*v0;
00208 partialVirial[1] = 0.5*v1;
00209 partialVirial[2] = 0.5*v2;
00210 partialVirial[3] = 0.5*v3;
00211 partialVirial[4] = 0.5*v4;
00212 partialVirial[5] = 0.5*v5;
00213 }
00214 static inline void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param){
00215 CmiAssert(first==last);
00216 void **params = (void **)param;
00217 PmeKSpace *kspace = (PmeKSpace *)params[0];
00218 float *q_arr = (float *)params[1];
00219 double *recips = (double *)params[2];
00220 double *partialEnergy = (double *)params[3];
00221 double *partialVirial = (double *)params[4];
00222 int *unitDist = (int *)params[5];
00223
00224 int i = first;
00225 int unit = unitDist[0];
00226 int remains = unitDist[1];
00227 int k1from, k1to;
00228 if(i<remains){
00229 k1from = i*(unit+1);
00230 k1to = k1from+unit;
00231 }else{
00232 k1from = remains*(unit+1)+(i-remains)*unit;
00233 k1to = k1from+unit-1;
00234 }
00235 double *pEnergy = partialEnergy+i;
00236 double *pVirial = partialVirial+i*6;
00237 kspace->compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00238 }
00239
00240 double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00241 double energy = 0.0;
00242 double v0 = 0.;
00243 double v1 = 0.;
00244 double v2 = 0.;
00245 double v3 = 0.;
00246 double v4 = 0.;
00247 double v5 = 0.;
00248
00249 int n;
00250 int k1, k2, k3, ind;
00251 int K1, K2, K3;
00252
00253 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00254
00255 i_pi_volume = 1.0/(M_PI * lattice.volume());
00256 piob = M_PI/ewald;
00257 piob *= piob;
00258
00259
00260 double recipx = lattice.a_r().x;
00261 double recipy = lattice.b_r().y;
00262 double recipz = lattice.c_r().z;
00263
00264 init_exp(exp1, K1, 0, K1, recipx);
00265 init_exp(exp2, K2, k2_start, k2_end, recipy);
00266 init_exp(exp3, K3, k3_start, k3_end, recipz);
00267
00268 double recips[] = {recipx, recipy, recipz};
00269 const int NPARTS=CmiMyNodeSize();
00270 ALLOCA(double, partialEnergy, NPARTS);
00271 ALLOCA(double, partialVirial, 6*NPARTS);
00272 int unitDist[] = {K1/NPARTS, K1%NPARTS};
00273
00274
00275 void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist};
00276
00277 #if USE_CKLOOP
00278 CkLoop_Parallelize(compute_energy_orthogonal_ckloop, 6, (void *)params, NPARTS, 0, NPARTS-1);
00279 #endif
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299 for(int i=0; i<NPARTS; i++){
00300 v0 += partialVirial[i*6+0];
00301 v1 += partialVirial[i*6+1];
00302 v2 += partialVirial[i*6+2];
00303 v3 += partialVirial[i*6+3];
00304 v4 += partialVirial[i*6+4];
00305 v5 += partialVirial[i*6+5];
00306 energy += partialEnergy[i];
00307 }
00308
00309 virial[0] = v0;
00310 virial[1] = v1;
00311 virial[2] = v2;
00312 virial[3] = v3;
00313 virial[4] = v4;
00314 virial[5] = v5;
00315 return energy;
00316 }
00317
00318 double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00319 double energy = 0.0;
00320 double v0 = 0.;
00321 double v1 = 0.;
00322 double v2 = 0.;
00323 double v3 = 0.;
00324 double v4 = 0.;
00325 double v5 = 0.;
00326
00327 int n;
00328 int k1, k2, k3, ind;
00329 int K1, K2, K3;
00330
00331 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00332
00333 i_pi_volume = 1.0/(M_PI * lattice.volume());
00334 piob = M_PI/ewald;
00335 piob *= piob;
00336
00337 if ( lattice.orthogonal() ) {
00338
00339
00340 #if USE_CKLOOP
00341 int useCkLoop = Node::Object()->simParameters->useCkLoop;
00342 if(useCkLoop>=CKLOOP_CTRL_PME_KSPACE){
00343 return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial);
00344 }
00345 #endif
00346 double recipx = lattice.a_r().x;
00347 double recipy = lattice.b_r().y;
00348 double recipz = lattice.c_r().z;
00349
00350 init_exp(exp1, K1, 0, K1, recipx);
00351 init_exp(exp2, K2, k2_start, k2_end, recipy);
00352 init_exp(exp3, K3, k3_start, k3_end, recipz);
00353
00354 ind = 0;
00355 for ( k1=0; k1<K1; ++k1 ) {
00356 double m1, m11, b1, xp1;
00357 b1 = bm1[k1];
00358 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00359 m1 = k1_s*recipx;
00360 m11 = m1*m1;
00361 xp1 = i_pi_volume*exp1[abs(k1_s)];
00362 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00363 double m2, m22, b1b2, xp2;
00364 b1b2 = b1*bm2[k2];
00365 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00366 m2 = k2_s*recipy;
00367 m22 = m2*m2;
00368 xp2 = exp2[abs(k2_s)]*xp1;
00369 k3 = k3_start;
00370 if ( k1==0 && k2==0 && k3==0 ) {
00371 q_arr[ind++] = 0.0;
00372 q_arr[ind++] = 0.0;
00373 ++k3;
00374 }
00375 for ( ; k3<k3_end; ++k3 ) {
00376 double m3, m33, xp3, msq, imsq, vir, fac;
00377 double theta3, theta, q2, qr, qc, C;
00378 theta3 = bm3[k3] *b1b2;
00379 m3 = k3*recipz;
00380 m33 = m3*m3;
00381 xp3 = exp3[k3];
00382 qr = q_arr[ind]; qc=q_arr[ind+1];
00383 q2 = 2*(qr*qr + qc*qc)*theta3;
00384 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00385 msq = m11 + m22 + m33;
00386 imsq = 1.0/msq;
00387 C = xp2*xp3*imsq;
00388 theta = theta3*C;
00389 q_arr[ind] *= theta;
00390 q_arr[ind+1] *= theta;
00391 vir = -2*(piob+imsq);
00392 fac = q2*C;
00393 energy += fac;
00394 v0 += fac*(1.0+vir*m11);
00395 v1 += fac*vir*m1*m2;
00396 v2 += fac*vir*m1*m3;
00397 v3 += fac*(1.0+vir*m22);
00398 v4 += fac*vir*m2*m3;
00399 v5 += fac*(1.0+vir*m33);
00400 ind += 2;
00401 }
00402 }
00403 }
00404
00405 } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) {
00406
00407 Vector recip1 = lattice.a_r();
00408 Vector recip2 = lattice.b_r();
00409 Vector recip3 = lattice.c_r();
00410 double recip3_x = recip3.x;
00411 double recip3_y = recip3.y;
00412 double recip3_z = recip3.z;
00413 init_exp(exp3, K3, k3_start, k3_end, recip3.length());
00414
00415 ind = 0;
00416 for ( k1=0; k1<K1; ++k1 ) {
00417 double b1; Vector m1;
00418 b1 = bm1[k1];
00419 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00420 m1 = k1_s*recip1;
00421
00422 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00423 double xp2, b1b2, m2_x, m2_y, m2_z;
00424 b1b2 = b1*bm2[k2];
00425 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00426 m2_x = m1.x + k2_s*recip2.x;
00427 m2_y = m1.y + k2_s*recip2.y;
00428 m2_z = m1.z + k2_s*recip2.z;
00429
00430 xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
00431 k3 = k3_start;
00432 if ( k1==0 && k2==0 && k3==0 ) {
00433 q_arr[ind++] = 0.0;
00434 q_arr[ind++] = 0.0;
00435 ++k3;
00436 }
00437 for ( ; k3<k3_end; ++k3 ) {
00438 double xp3, msq, imsq, vir, fac;
00439 double theta3, theta, q2, qr, qc, C;
00440 double m_x, m_y, m_z;
00441 theta3 = bm3[k3] *b1b2;
00442 m_x = m2_x + k3*recip3_x;
00443 m_y = m2_y + k3*recip3_y;
00444 m_z = m2_z + k3*recip3_z;
00445 msq = m_x*m_x + m_y*m_y + m_z*m_z;
00446 xp3 = exp3[k3];
00447 qr = q_arr[ind]; qc=q_arr[ind+1];
00448 q2 = 2*(qr*qr + qc*qc)*theta3;
00449 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00450 imsq = 1.0/msq;
00451 C = xp2*xp3*imsq;
00452 theta = theta3*C;
00453 q_arr[ind] *= theta;
00454 q_arr[ind+1] *= theta;
00455 vir = -2*(piob+imsq);
00456 fac = q2*C;
00457 energy += fac;
00458 v0 += fac*(1.0+vir*m_x*m_x);
00459 v1 += fac*vir*m_x*m_y;
00460 v2 += fac*vir*m_x*m_z;
00461 v3 += fac*(1.0+vir*m_y*m_y);
00462 v4 += fac*vir*m_y*m_z;
00463 v5 += fac*(1.0+vir*m_z*m_z);
00464 ind += 2;
00465 }
00466 }
00467 }
00468
00469 } else {
00470 Vector recip1 = lattice.a_r();
00471 Vector recip2 = lattice.b_r();
00472 Vector recip3 = lattice.c_r();
00473 double recip3_x = recip3.x;
00474 double recip3_y = recip3.y;
00475 double recip3_z = recip3.z;
00476
00477 ind = 0;
00478 for ( k1=0; k1<K1; ++k1 ) {
00479 double b1; Vector m1;
00480 b1 = bm1[k1];
00481 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00482 m1 = k1_s*recip1;
00483
00484 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00485 double b1b2, m2_x, m2_y, m2_z;
00486 b1b2 = b1*bm2[k2];
00487 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00488 m2_x = m1.x + k2_s*recip2.x;
00489 m2_y = m1.y + k2_s*recip2.y;
00490 m2_z = m1.z + k2_s*recip2.z;
00491
00492 k3 = k3_start;
00493 if ( k1==0 && k2==0 && k3==0 ) {
00494 q_arr[ind++] = 0.0;
00495 q_arr[ind++] = 0.0;
00496 ++k3;
00497 }
00498 for ( ; k3<k3_end; ++k3 ) {
00499 double xp3, msq, imsq, vir, fac;
00500 double theta3, theta, q2, qr, qc, C;
00501 double m_x, m_y, m_z;
00502 theta3 = bm3[k3] *b1b2;
00503 m_x = m2_x + k3*recip3_x;
00504 m_y = m2_y + k3*recip3_y;
00505 m_z = m2_z + k3*recip3_z;
00506 msq = m_x*m_x + m_y*m_y + m_z*m_z;
00507
00508 xp3 = i_pi_volume*exp(-piob*msq);
00509 qr = q_arr[ind]; qc=q_arr[ind+1];
00510 q2 = 2*(qr*qr + qc*qc)*theta3;
00511 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00512 imsq = 1.0/msq;
00513 C = xp3*imsq;
00514 theta = theta3*C;
00515 q_arr[ind] *= theta;
00516 q_arr[ind+1] *= theta;
00517 vir = -2*(piob+imsq);
00518 fac = q2*C;
00519 energy += fac;
00520 v0 += fac*(1.0+vir*m_x*m_x);
00521 v1 += fac*vir*m_x*m_y;
00522 v2 += fac*vir*m_x*m_z;
00523 v3 += fac*(1.0+vir*m_y*m_y);
00524 v4 += fac*vir*m_y*m_z;
00525 v5 += fac*(1.0+vir*m_z*m_z);
00526 ind += 2;
00527 }
00528 }
00529 }
00530
00531 }
00532
00533 virial[0] = 0.5 * v0;
00534 virial[1] = 0.5 * v1;
00535 virial[2] = 0.5 * v2;
00536 virial[3] = 0.5 * v3;
00537 virial[4] = 0.5 * v4;
00538 virial[5] = 0.5 * v5;
00539 return 0.5*energy;
00540 }
00541
00542
00543 void PmeKSpace::init_exp(double *xp, int K, int k_start, int k_end, double recip) {
00544 int i;
00545 double fac;
00546 fac = -piob*recip*recip;
00547 int i_start = k_start;
00548 int i_end = k_end;
00549 if ( k_start > K/2 ) {
00550 i_start = K - k_end + 1;
00551 i_end = K - k_start + 1;
00552 } else if ( k_end > K/2 ) {
00553 i_end = K/2 + 1;
00554 i_start = K - k_end + 1;
00555 if ( k_start < i_start ) i_start = k_start;
00556 }
00557
00558 for (i=i_start; i < i_end; i++)
00559 xp[i] = exp(fac*i*i);
00560 }