00001
00007 #include "PmeKSpace.h"
00008 #include <math.h>
00009 #include <stdlib.h>
00010
00011 #ifndef _NO_ALLOCA_H
00012 #include <alloca.h>
00013 #endif
00014
00015 #include "SimParameters.h"
00016 #include "Node.h"
00017 #include "ComputeMoaMgr.decl.h"
00018
00019 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))
00020
00021 static void dftmod(double *bsp_mod, double *bsp_arr, int nfft) {
00022 int j, k;
00023 double twopi, arg, sum1, sum2;
00024 double infft = 1.0/nfft;
00025
00026
00027 twopi = 2.0 * M_PI;
00028
00029 for (k = 0; k <nfft; ++k) {
00030 sum1 = 0.;
00031 sum2 = 0.;
00032 for (j = 0; j < nfft; ++j) {
00033 arg = twopi * k * j * infft;
00034 sum1 += bsp_arr[j] * cos(arg);
00035 sum2 += bsp_arr[j] * sin(arg);
00036 }
00037 bsp_mod[k] = sum1*sum1 + sum2*sum2;
00038 }
00039 }
00040
00041 static void compute_b_moduli(double *bm, int K, int order) {
00042 int i;
00043 double fr[3];
00044
00045 double *M = new double[3*order];
00046 double *dM = new double[3*order];
00047 double *scratch = new double[K];
00048
00049 fr[0]=fr[1]=fr[2]=0.0;
00050 compute_b_spline(fr,M,dM,order);
00051 for (i=0; i<order; i++) bm[i] = M[i];
00052 for (i=order; i<K; i++) bm[i] = 0.0;
00053 dftmod(scratch, bm, K);
00054 for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
00055
00056
00057 delete [] scratch;
00058 delete [] dM;
00059 delete [] M;
00060 }
00061
00062 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end)
00063 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00064 k3_start(K3_start), k3_end(K3_end) {
00065 int K1, K2, K3, order;
00066 K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00067
00068 bm1 = new double[K1];
00069 bm2 = new double[K2];
00070 bm3 = new double[K3];
00071
00072 exp1 = new double[K1/2 + 1];
00073 exp2 = new double[K2/2 + 1];
00074 exp3 = new double[K3/2 + 1];
00075
00076 compute_b_moduli(bm1, K1, order);
00077 compute_b_moduli(bm2, K2, order);
00078 compute_b_moduli(bm3, K3, order);
00079
00080 }
00081
00082 #ifdef OPENATOM_VERSION
00083 PmeKSpace::PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end, CProxy_ComputeMoaMgr moaProxy)
00084 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
00085 k3_start(K3_start), k3_end(K3_end) {
00086 int K1, K2, K3, order;
00087 K1=myGrid.K1; K2=myGrid.K2, K3=myGrid.K3; order=myGrid.order;
00088
00089 SimParameters *simParams = Node::Object()->simParameters;
00090
00091 bm1 = new double[K1];
00092 bm2 = new double[K2];
00093 bm3 = new double[K3];
00094
00095 exp1 = new double[K1/2 + 1];
00096 exp2 = new double[K2/2 + 1];
00097 exp3 = new double[K3/2 + 1];
00098
00099 compute_b_moduli(bm1, K1, order);
00100 compute_b_moduli(bm2, K2, order);
00101 compute_b_moduli(bm3, K3, order);
00102
00103 if ( simParams->openatomOn ) {
00104 CkPrintf("######################################################\n");
00105 CkPrintf("Entering recvBmX loop on processor %d \n", CkMyPe() );
00106 int i;
00107 for ( i=0; i<=K1; ++i) {
00108 CkPrintf("bm1 value pre-send %d = %d \n", i, bm1[i] );
00109 }
00110 for ( i=0; i<=K1; ++i) {
00111 CkPrintf("bm1 reference pre-send %d = %d \n", i, &bm1[i] );
00112 }
00113 CkPrintf("bm1: %d \n", *bm1);
00114 moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, order);
00115
00116
00117
00118 }
00119
00120
00121 }
00122 #endif // OPENATOM_VERSION
00123
00124 PmeKSpace::~PmeKSpace() {
00125 delete [] bm1;
00126 delete [] bm2;
00127 delete [] bm3;
00128
00129 delete [] exp1;
00130 delete [] exp2;
00131 delete [] exp3;
00132 }
00133
00134 void PmeKSpace::compute_energy_orthogonal_subset(float q_arr[], double *recips, double *partialVirial, double *partialEnergy, int k1from, int k1to){
00135
00136 double energy = 0.0;
00137 double v0 = 0.;
00138 double v1 = 0.;
00139 double v2 = 0.;
00140 double v3 = 0.;
00141 double v4 = 0.;
00142 double v5 = 0.;
00143
00144 int k1, k2, k3;
00145 int K1, K2, K3;
00146 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00147
00148 double recipx = recips[0];
00149 double recipy = recips[1];
00150 double recipz = recips[2];
00151
00152 int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
00153
00154 for ( k1=k1from; k1<=k1to; ++k1 ) {
00155 double m1, m11, b1, xp1;
00156 b1 = bm1[k1];
00157 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00158 m1 = k1_s*recipx;
00159 m11 = m1*m1;
00160 xp1 = i_pi_volume*exp1[abs(k1_s)];
00161 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00162 double m2, m22, b1b2, xp2;
00163 b1b2 = b1*bm2[k2];
00164 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00165 m2 = k2_s*recipy;
00166 m22 = m2*m2;
00167 xp2 = exp2[abs(k2_s)]*xp1;
00168
00169 k3 = k3_start;
00170 if (k1==0 && k2==0 && k3==0) {
00171 q_arr[ind++] = 0.0;
00172 q_arr[ind++] = 0.0;
00173 ++k3;
00174 }
00175 for (; k3<k3_end; ++k3 ) {
00176 double m3, m33, xp3, msq, imsq, vir, fac;
00177 double theta3, theta, q2, qr, qc, C;
00178 theta3 = bm3[k3] *b1b2;
00179 m3 = k3*recipz;
00180 m33 = m3*m3;
00181 xp3 = exp3[k3];
00182 qr = q_arr[ind]; qc=q_arr[ind+1];
00183 q2 = 2*(qr*qr + qc*qc)*theta3;
00184 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00185 msq = m11 + m22 + m33;
00186 imsq = 1.0/msq;
00187 C = xp2*xp3*imsq;
00188 theta = theta3*C;
00189 q_arr[ind] *= theta;
00190 q_arr[ind+1] *= theta;
00191 vir = -2*(piob+imsq);
00192 fac = q2*C;
00193 energy += fac;
00194 v0 += fac*(1.0+vir*m11);
00195 v1 += fac*vir*m1*m2;
00196 v2 += fac*vir*m1*m3;
00197 v3 += fac*(1.0+vir*m22);
00198 v4 += fac*vir*m2*m3;
00199 v5 += fac*(1.0+vir*m33);
00200 ind += 2;
00201 }
00202 }
00203 }
00204
00205 *partialEnergy = 0.5*energy;
00206 partialVirial[0] = 0.5*v0;
00207 partialVirial[1] = 0.5*v1;
00208 partialVirial[2] = 0.5*v2;
00209 partialVirial[3] = 0.5*v3;
00210 partialVirial[4] = 0.5*v4;
00211 partialVirial[5] = 0.5*v5;
00212 }
00213 static inline void compute_energy_orthogonal_nodehelper(int first, int last, void *result, int paraNum, void *param){
00214 CmiAssert(first==last);
00215 void **params = (void **)param;
00216 PmeKSpace *kspace = (PmeKSpace *)params[0];
00217 float *q_arr = (float *)params[1];
00218 double *recips = (double *)params[2];
00219 double *partialEnergy = (double *)params[3];
00220 double *partialVirial = (double *)params[4];
00221 int *unitDist = (int *)params[5];
00222
00223 int i = first;
00224 int unit = unitDist[0];
00225 int remains = unitDist[1];
00226 int k1from, k1to;
00227 if(i<remains){
00228 k1from = i*(unit+1);
00229 k1to = k1from+unit;
00230 }else{
00231 k1from = remains*(unit+1)+(i-remains)*unit;
00232 k1to = k1from+unit-1;
00233 }
00234 double *pEnergy = partialEnergy+i;
00235 double *pVirial = partialVirial+i*6;
00236 kspace->compute_energy_orthogonal_subset(q_arr, recips, pVirial, pEnergy, k1from, k1to);
00237 }
00238
00239 double PmeKSpace::compute_energy_orthogonal_helper(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00240 double energy = 0.0;
00241 double v0 = 0.;
00242 double v1 = 0.;
00243 double v2 = 0.;
00244 double v3 = 0.;
00245 double v4 = 0.;
00246 double v5 = 0.;
00247
00248 int n;
00249 int k1, k2, k3, ind;
00250 int K1, K2, K3;
00251
00252 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00253
00254 i_pi_volume = 1.0/(M_PI * lattice.volume());
00255 piob = M_PI/ewald;
00256 piob *= piob;
00257
00258
00259 double recipx = lattice.a_r().x;
00260 double recipy = lattice.b_r().y;
00261 double recipz = lattice.c_r().z;
00262
00263 init_exp(exp1, K1, 0, K1, recipx);
00264 init_exp(exp2, K2, k2_start, k2_end, recipy);
00265 init_exp(exp3, K3, k3_start, k3_end, recipz);
00266
00267 double recips[] = {recipx, recipy, recipz};
00268 const int NPARTS=CmiMyNodeSize();
00269 ALLOCA(double, partialEnergy, NPARTS);
00270 ALLOCA(double, partialVirial, 6*NPARTS);
00271 int unitDist[] = {K1/NPARTS, K1%NPARTS};
00272
00273
00274 void *params[] = {this, q_arr, recips, partialEnergy, partialVirial, unitDist};
00275
00276 #if USE_NODEHELPER
00277 CProxy_FuncNodeHelper nodeHelper = CkpvAccess(BOCclass_group).nodeHelper;
00278 NodeHelper_Parallelize(nodeHelper, compute_energy_orthogonal_nodehelper, 6, (void *)params, NPARTS, 0, NPARTS-1);
00279 #endif
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299 for(int i=0; i<NPARTS; i++){
00300 v0 += partialVirial[i*6+0];
00301 v1 += partialVirial[i*6+1];
00302 v2 += partialVirial[i*6+2];
00303 v3 += partialVirial[i*6+3];
00304 v4 += partialVirial[i*6+4];
00305 v5 += partialVirial[i*6+5];
00306 energy += partialEnergy[i];
00307 }
00308
00309 virial[0] = v0;
00310 virial[1] = v1;
00311 virial[2] = v2;
00312 virial[3] = v3;
00313 virial[4] = v4;
00314 virial[5] = v5;
00315 return energy;
00316 }
00317
00318 double PmeKSpace::compute_energy(float *q_arr, const Lattice &lattice, double ewald, double *virial) {
00319 double energy = 0.0;
00320 double v0 = 0.;
00321 double v1 = 0.;
00322 double v2 = 0.;
00323 double v3 = 0.;
00324 double v4 = 0.;
00325 double v5 = 0.;
00326
00327 int n;
00328 int k1, k2, k3, ind;
00329 int K1, K2, K3;
00330
00331 K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3;
00332
00333 i_pi_volume = 1.0/(M_PI * lattice.volume());
00334 piob = M_PI/ewald;
00335 piob *= piob;
00336
00337 if ( lattice.orthogonal() ) {
00338
00339
00340 #if USE_NODEHELPER
00341 int useNodeHelper = Node::Object()->simParameters->useNodeHelper;
00342 if(useNodeHelper>=NDH_CTRL_PME_KSPACE){
00343 return compute_energy_orthogonal_helper(q_arr, lattice, ewald, virial);
00344 }
00345 #endif
00346 double recipx = lattice.a_r().x;
00347 double recipy = lattice.b_r().y;
00348 double recipz = lattice.c_r().z;
00349
00350 init_exp(exp1, K1, 0, K1, recipx);
00351 init_exp(exp2, K2, k2_start, k2_end, recipy);
00352 init_exp(exp3, K3, k3_start, k3_end, recipz);
00353
00354 ind = 0;
00355 for ( k1=0; k1<K1; ++k1 ) {
00356 double m1, m11, b1, xp1;
00357 b1 = bm1[k1];
00358 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00359 m1 = k1_s*recipx;
00360 m11 = m1*m1;
00361 xp1 = i_pi_volume*exp1[abs(k1_s)];
00362 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00363 double m2, m22, b1b2, xp2;
00364 b1b2 = b1*bm2[k2];
00365 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00366 m2 = k2_s*recipy;
00367 m22 = m2*m2;
00368 xp2 = exp2[abs(k2_s)]*xp1;
00369 k3 = k3_start;
00370 if ( k1==0 && k2==0 && k3==0 ) {
00371 q_arr[ind++] = 0.0;
00372 q_arr[ind++] = 0.0;
00373 ++k3;
00374 }
00375 for ( ; k3<k3_end; ++k3 ) {
00376 double m3, m33, xp3, msq, imsq, vir, fac;
00377 double theta3, theta, q2, qr, qc, C;
00378 theta3 = bm3[k3] *b1b2;
00379 m3 = k3*recipz;
00380 m33 = m3*m3;
00381 xp3 = exp3[k3];
00382 qr = q_arr[ind]; qc=q_arr[ind+1];
00383 q2 = 2*(qr*qr + qc*qc)*theta3;
00384 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00385 msq = m11 + m22 + m33;
00386 imsq = 1.0/msq;
00387 C = xp2*xp3*imsq;
00388 theta = theta3*C;
00389 q_arr[ind] *= theta;
00390 q_arr[ind+1] *= theta;
00391 vir = -2*(piob+imsq);
00392 fac = q2*C;
00393 energy += fac;
00394 v0 += fac*(1.0+vir*m11);
00395 v1 += fac*vir*m1*m2;
00396 v2 += fac*vir*m1*m3;
00397 v3 += fac*(1.0+vir*m22);
00398 v4 += fac*vir*m2*m3;
00399 v5 += fac*(1.0+vir*m33);
00400 ind += 2;
00401 }
00402 }
00403 }
00404
00405 } else if ( cross(lattice.a(),lattice.b()).unit() == lattice.c().unit() ) {
00406
00407 Vector recip1 = lattice.a_r();
00408 Vector recip2 = lattice.b_r();
00409 Vector recip3 = lattice.c_r();
00410 double recip3_x = recip3.x;
00411 double recip3_y = recip3.y;
00412 double recip3_z = recip3.z;
00413 init_exp(exp3, K3, k3_start, k3_end, recip3.length());
00414
00415 ind = 0;
00416 for ( k1=0; k1<K1; ++k1 ) {
00417 double b1; Vector m1;
00418 b1 = bm1[k1];
00419 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00420 m1 = k1_s*recip1;
00421
00422 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00423 double xp2, b1b2, m2_x, m2_y, m2_z;
00424 b1b2 = b1*bm2[k2];
00425 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00426 m2_x = m1.x + k2_s*recip2.x;
00427 m2_y = m1.y + k2_s*recip2.y;
00428 m2_z = m1.z + k2_s*recip2.z;
00429
00430 xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
00431 k3 = k3_start;
00432 if ( k1==0 && k2==0 && k3==0 ) {
00433 q_arr[ind++] = 0.0;
00434 q_arr[ind++] = 0.0;
00435 ++k3;
00436 }
00437 for ( ; k3<k3_end; ++k3 ) {
00438 double xp3, msq, imsq, vir, fac;
00439 double theta3, theta, q2, qr, qc, C;
00440 double m_x, m_y, m_z;
00441 theta3 = bm3[k3] *b1b2;
00442 m_x = m2_x + k3*recip3_x;
00443 m_y = m2_y + k3*recip3_y;
00444 m_z = m2_z + k3*recip3_z;
00445 msq = m_x*m_x + m_y*m_y + m_z*m_z;
00446 xp3 = exp3[k3];
00447 qr = q_arr[ind]; qc=q_arr[ind+1];
00448 q2 = 2*(qr*qr + qc*qc)*theta3;
00449 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00450 imsq = 1.0/msq;
00451 C = xp2*xp3*imsq;
00452 theta = theta3*C;
00453 q_arr[ind] *= theta;
00454 q_arr[ind+1] *= theta;
00455 vir = -2*(piob+imsq);
00456 fac = q2*C;
00457 energy += fac;
00458 v0 += fac*(1.0+vir*m_x*m_x);
00459 v1 += fac*vir*m_x*m_y;
00460 v2 += fac*vir*m_x*m_z;
00461 v3 += fac*(1.0+vir*m_y*m_y);
00462 v4 += fac*vir*m_y*m_z;
00463 v5 += fac*(1.0+vir*m_z*m_z);
00464 ind += 2;
00465 }
00466 }
00467 }
00468
00469 } else {
00470 Vector recip1 = lattice.a_r();
00471 Vector recip2 = lattice.b_r();
00472 Vector recip3 = lattice.c_r();
00473 double recip3_x = recip3.x;
00474 double recip3_y = recip3.y;
00475 double recip3_z = recip3.z;
00476
00477 ind = 0;
00478 for ( k1=0; k1<K1; ++k1 ) {
00479 double b1; Vector m1;
00480 b1 = bm1[k1];
00481 int k1_s = k1<=K1/2 ? k1 : k1-K1;
00482 m1 = k1_s*recip1;
00483
00484 for ( k2=k2_start; k2<k2_end; ++k2 ) {
00485 double b1b2, m2_x, m2_y, m2_z;
00486 b1b2 = b1*bm2[k2];
00487 int k2_s = k2<=K2/2 ? k2 : k2-K2;
00488 m2_x = m1.x + k2_s*recip2.x;
00489 m2_y = m1.y + k2_s*recip2.y;
00490 m2_z = m1.z + k2_s*recip2.z;
00491
00492 k3 = k3_start;
00493 if ( k1==0 && k2==0 && k3==0 ) {
00494 q_arr[ind++] = 0.0;
00495 q_arr[ind++] = 0.0;
00496 ++k3;
00497 }
00498 for ( ; k3<k3_end; ++k3 ) {
00499 double xp3, msq, imsq, vir, fac;
00500 double theta3, theta, q2, qr, qc, C;
00501 double m_x, m_y, m_z;
00502 theta3 = bm3[k3] *b1b2;
00503 m_x = m2_x + k3*recip3_x;
00504 m_y = m2_y + k3*recip3_y;
00505 m_z = m2_z + k3*recip3_z;
00506 msq = m_x*m_x + m_y*m_y + m_z*m_z;
00507
00508 xp3 = i_pi_volume*exp(-piob*msq);
00509 qr = q_arr[ind]; qc=q_arr[ind+1];
00510 q2 = 2*(qr*qr + qc*qc)*theta3;
00511 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
00512 imsq = 1.0/msq;
00513 C = xp3*imsq;
00514 theta = theta3*C;
00515 q_arr[ind] *= theta;
00516 q_arr[ind+1] *= theta;
00517 vir = -2*(piob+imsq);
00518 fac = q2*C;
00519 energy += fac;
00520 v0 += fac*(1.0+vir*m_x*m_x);
00521 v1 += fac*vir*m_x*m_y;
00522 v2 += fac*vir*m_x*m_z;
00523 v3 += fac*(1.0+vir*m_y*m_y);
00524 v4 += fac*vir*m_y*m_z;
00525 v5 += fac*(1.0+vir*m_z*m_z);
00526 ind += 2;
00527 }
00528 }
00529 }
00530
00531 }
00532
00533 virial[0] = 0.5 * v0;
00534 virial[1] = 0.5 * v1;
00535 virial[2] = 0.5 * v2;
00536 virial[3] = 0.5 * v3;
00537 virial[4] = 0.5 * v4;
00538 virial[5] = 0.5 * v5;
00539 return 0.5*energy;
00540 }
00541
00542
00543 void PmeKSpace::init_exp(double *xp, int K, int k_start, int k_end, double recip) {
00544 int i;
00545 double fac;
00546 fac = -piob*recip*recip;
00547 int i_start = k_start;
00548 int i_end = k_end;
00549 if ( k_start > K/2 ) {
00550 i_start = K - k_end + 1;
00551 i_end = K - k_start + 1;
00552 } else if ( k_end > K/2 ) {
00553 i_end = K/2 + 1;
00554 i_start = K - k_end + 1;
00555 if ( k_start < i_start ) i_start = k_start;
00556 }
00557
00558 for (i=i_start; i < i_end; i++)
00559 xp[i] = exp(fac*i*i);
00560 }