18 #include "ComputeMoaMgr.decl.h" 20 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE)) 22 static void dftmod(
double *bsp_mod,
double *bsp_arr,
int nfft) {
24 double twopi, arg, sum1, sum2;
25 double infft = 1.0/nfft;
30 for (k = 0; k <nfft; ++k) {
33 for (j = 0; j < nfft; ++j) {
34 arg = twopi * k * j * infft;
35 sum1 += bsp_arr[j] * cos(arg);
36 sum2 += bsp_arr[j] * sin(arg);
38 bsp_mod[k] = sum1*sum1 + sum2*sum2;
46 double *M =
new double[3*
order];
47 double *dM =
new double[3*
order];
48 double *scratch =
new double[K];
50 fr[0]=fr[1]=fr[2]=0.0;
52 for (i=0; i<
order; i++) bm[i] = M[i];
53 for (i=
order; i<K; i++) bm[i] = 0.0;
55 for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
64 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
65 k3_start(K3_start), k3_end(K3_end) {
66 int K1, K2, K3,
order;
73 exp1 =
new double[K1/2 + 1];
74 exp2 =
new double[K2/2 + 1];
75 exp3 =
new double[K3/2 + 1];
83 #ifdef OPENATOM_VERSION 85 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
86 k3_start(K3_start), k3_end(K3_end) {
87 int K1, K2, K3,
order;
96 exp1 =
new double[K1/2 + 1];
97 exp2 =
new double[K2/2 + 1];
98 exp3 =
new double[K3/2 + 1];
105 CkPrintf(
"######################################################\n");
106 CkPrintf(
"Entering recvBmX loop on processor %d \n", CkMyPe() );
108 for ( i=0; i<=K1; ++i) {
109 CkPrintf(
"bm1 value pre-send %d = %d \n", i, bm1[i] );
111 for ( i=0; i<=K1; ++i) {
112 CkPrintf(
"bm1 reference pre-send %d = %d \n", i, &bm1[i] );
114 CkPrintf(
"bm1: %d \n", *bm1);
115 moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3,
order);
123 #endif // OPENATOM_VERSION 147 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
149 double recipx = recips[0];
150 double recipy = recips[1];
151 double recipz = recips[2];
153 int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
155 for ( k1=k1from; k1<=k1to; ++k1 ) {
156 double m1, m11, b1, xp1;
158 int k1_s = k1<=K1/2 ? k1 : k1-K1;
161 xp1 = i_pi_volume*exp1[abs(k1_s)];
162 for ( k2=k2_start; k2<k2_end; ++k2 ) {
163 double m2, m22, b1b2, xp2;
165 int k2_s = k2<=K2/2 ? k2 : k2-K2;
168 xp2 = exp2[abs(k2_s)]*xp1;
171 if (k1==0 && k2==0 && k3==0) {
176 for (; k3<k3_end; ++k3 ) {
177 double m3, m33, xp3, msq, imsq, vir, fac;
178 double theta3, theta, q2, qr, qc, C;
179 theta3 = bm3[k3] *b1b2;
183 qr = q_arr[ind]; qc=q_arr[ind+1];
184 q2 = 2*(qr*qr + qc*qc)*theta3;
185 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
186 msq = m11 + m22 + m33;
191 q_arr[ind+1] *= theta;
192 vir = -2*(piob+imsq);
195 v0 += fac*(1.0+vir*m11);
198 v3 += fac*(1.0+vir*m22);
200 v5 += fac*(1.0+vir*m33);
206 *partialEnergy = 0.5*energy;
207 partialVirial[0] = 0.5*v0;
208 partialVirial[1] = 0.5*v1;
209 partialVirial[2] = 0.5*v2;
210 partialVirial[3] = 0.5*v3;
211 partialVirial[4] = 0.5*v4;
212 partialVirial[5] = 0.5*v5;
215 for (
int i = first; i <= last; ++i ) {
216 void **params = (
void **)param;
218 float *q_arr = (
float *)params[1];
219 double *recips = (
double *)params[2];
220 double *partialEnergy = (
double *)params[3];
221 double *partialVirial = (
double *)params[4];
222 int *unitDist = (
int *)params[5];
224 int unit = unitDist[0];
225 int remains = unitDist[1];
231 k1from = remains*(unit+1)+(i-remains)*unit;
232 k1to = k1from+unit-1;
234 double *pEnergy = partialEnergy+i;
235 double *pVirial = partialVirial+i*6;
253 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
260 double recipx = lattice.
a_r().
x;
261 double recipy = lattice.
b_r().
y;
262 double recipz = lattice.
c_r().
z;
264 init_exp(exp1, K1, 0, K1, recipx);
265 init_exp(exp2, K2, k2_start, k2_end, recipy);
266 init_exp(exp3, K3, k3_start, k3_end, recipz);
268 double recips[] = {recipx, recipy, recipz};
269 int NPARTS=CmiMyNodeSize();
270 int maxParts = ( K1 * ( k2_end - k2_start ) * ( k3_end - k3_start ) + 127 ) / 128;
271 if ( NPARTS > maxParts ) NPARTS = maxParts;
272 if ( NPARTS > K1 ) NPARTS = K1;
273 ALLOCA(
double, partialEnergy, NPARTS);
274 ALLOCA(
double, partialVirial, 6*NPARTS);
275 int unitDist[] = {K1/NPARTS, K1%NPARTS};
278 void *params[] = {
this, q_arr, recips, partialEnergy, partialVirial, unitDist};
280 #if CMK_SMP && USE_CKLOOP 302 for(
int i=0; i<NPARTS; i++){
303 v0 += partialVirial[i*6+0];
304 v1 += partialVirial[i*6+1];
305 v2 += partialVirial[i*6+2];
306 v3 += partialVirial[i*6+3];
307 v4 += partialVirial[i*6+4];
308 v5 += partialVirial[i*6+5];
309 energy += partialEnergy[i];
334 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
343 #if CMK_SMP && USE_CKLOOP 348 double recipx = lattice.
a_r().
x;
349 double recipy = lattice.
b_r().
y;
350 double recipz = lattice.
c_r().
z;
352 init_exp(exp1, K1, 0, K1, recipx);
353 init_exp(exp2, K2, k2_start, k2_end, recipy);
354 init_exp(exp3, K3, k3_start, k3_end, recipz);
357 for ( k1=0; k1<K1; ++k1 ) {
358 double m1, m11, b1, xp1;
360 int k1_s = k1<=K1/2 ? k1 : k1-K1;
363 xp1 = i_pi_volume*exp1[abs(k1_s)];
364 for ( k2=k2_start; k2<k2_end; ++k2 ) {
365 double m2, m22, b1b2, xp2;
367 int k2_s = k2<=K2/2 ? k2 : k2-K2;
370 xp2 = exp2[abs(k2_s)]*xp1;
372 if ( k1==0 && k2==0 && k3==0 ) {
377 for ( ; k3<k3_end; ++k3 ) {
378 double m3, m33, xp3, msq, imsq, vir, fac;
379 double theta3, theta, q2, qr, qc, C;
380 theta3 = bm3[k3] *b1b2;
384 qr = q_arr[ind]; qc=q_arr[ind+1];
385 q2 = 2*(qr*qr + qc*qc)*theta3;
386 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
387 msq = m11 + m22 + m33;
392 q_arr[ind+1] *= theta;
393 vir = -2*(piob+imsq);
396 v0 += fac*(1.0+vir*m11);
399 v3 += fac*(1.0+vir*m22);
401 v5 += fac*(1.0+vir*m33);
407 }
else if ( cross(lattice.
a(),lattice.
b()).unit() == lattice.
c().
unit() ) {
412 double recip3_x = recip3.
x;
413 double recip3_y = recip3.
y;
414 double recip3_z = recip3.
z;
415 init_exp(exp3, K3, k3_start, k3_end, recip3.
length());
418 for ( k1=0; k1<K1; ++k1 ) {
421 int k1_s = k1<=K1/2 ? k1 : k1-K1;
424 for ( k2=k2_start; k2<k2_end; ++k2 ) {
425 double xp2, b1b2, m2_x, m2_y, m2_z;
427 int k2_s = k2<=K2/2 ? k2 : k2-K2;
428 m2_x = m1.
x + k2_s*recip2.
x;
429 m2_y = m1.
y + k2_s*recip2.
y;
430 m2_z = m1.
z + k2_s*recip2.
z;
432 xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
434 if ( k1==0 && k2==0 && k3==0 ) {
439 for ( ; k3<k3_end; ++k3 ) {
440 double xp3, msq, imsq, vir, fac;
441 double theta3, theta, q2, qr, qc, C;
442 double m_x, m_y, m_z;
443 theta3 = bm3[k3] *b1b2;
444 m_x = m2_x + k3*recip3_x;
445 m_y = m2_y + k3*recip3_y;
446 m_z = m2_z + k3*recip3_z;
447 msq = m_x*m_x + m_y*m_y + m_z*m_z;
449 qr = q_arr[ind]; qc=q_arr[ind+1];
450 q2 = 2*(qr*qr + qc*qc)*theta3;
451 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
456 q_arr[ind+1] *= theta;
457 vir = -2*(piob+imsq);
460 v0 += fac*(1.0+vir*m_x*m_x);
461 v1 += fac*vir*m_x*m_y;
462 v2 += fac*vir*m_x*m_z;
463 v3 += fac*(1.0+vir*m_y*m_y);
464 v4 += fac*vir*m_y*m_z;
465 v5 += fac*(1.0+vir*m_z*m_z);
475 double recip3_x = recip3.
x;
476 double recip3_y = recip3.
y;
477 double recip3_z = recip3.
z;
480 for ( k1=0; k1<K1; ++k1 ) {
483 int k1_s = k1<=K1/2 ? k1 : k1-K1;
486 for ( k2=k2_start; k2<k2_end; ++k2 ) {
487 double b1b2, m2_x, m2_y, m2_z;
489 int k2_s = k2<=K2/2 ? k2 : k2-K2;
490 m2_x = m1.
x + k2_s*recip2.
x;
491 m2_y = m1.
y + k2_s*recip2.
y;
492 m2_z = m1.
z + k2_s*recip2.
z;
495 if ( k1==0 && k2==0 && k3==0 ) {
500 for ( ; k3<k3_end; ++k3 ) {
501 double xp3, msq, imsq, vir, fac;
502 double theta3, theta, q2, qr, qc, C;
503 double m_x, m_y, m_z;
504 theta3 = bm3[k3] *b1b2;
505 m_x = m2_x + k3*recip3_x;
506 m_y = m2_y + k3*recip3_y;
507 m_z = m2_z + k3*recip3_z;
508 msq = m_x*m_x + m_y*m_y + m_z*m_z;
510 xp3 = i_pi_volume*exp(-piob*msq);
511 qr = q_arr[ind]; qc=q_arr[ind+1];
512 q2 = 2*(qr*qr + qc*qc)*theta3;
513 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
518 q_arr[ind+1] *= theta;
519 vir = -2*(piob+imsq);
522 v0 += fac*(1.0+vir*m_x*m_x);
523 v1 += fac*vir*m_x*m_y;
524 v2 += fac*vir*m_x*m_z;
525 v3 += fac*(1.0+vir*m_y*m_y);
526 v4 += fac*vir*m_y*m_z;
527 v5 += fac*(1.0+vir*m_z*m_z);
535 virial[0] = 0.5 * v0;
536 virial[1] = 0.5 * v1;
537 virial[2] = 0.5 * v2;
538 virial[3] = 0.5 * v3;
539 virial[4] = 0.5 * v4;
540 virial[5] = 0.5 * v5;
545 void PmeKSpace::init_exp(
double *xp,
int K,
int k_start,
int k_end,
double recip) {
548 fac = -piob*recip*recip;
549 int i_start = k_start;
551 if ( k_start > K/2 ) {
552 i_start = K - k_end + 1;
553 i_end = K - k_start + 1;
554 }
else if ( k_end > K/2 ) {
556 i_start = K - k_end + 1;
557 if ( k_start < i_start ) i_start = k_start;
560 for (i=i_start; i < i_end; i++)
561 xp[i] = exp(fac*i*i);
void compute_energy_orthogonal_subset(float q_arr[], double *recips, double partialVirial[], double *partialEnergy, int k1from, int k1to)
void compute_b_moduli(double *bm, int K, int order)
NAMD_HOST_DEVICE Vector c() const
SimParameters * simParameters
NAMD_HOST_DEVICE int orthogonal() const
static void dftmod(double *bsp_mod, double *bsp_arr, int nfft)
double compute_energy(float q_arr[], const Lattice &lattice, double ewald, double virial[], int useCkLoop)
NAMD_HOST_DEVICE BigReal length(void) const
NAMD_HOST_DEVICE BigReal volume(void) const
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
NAMD_HOST_DEVICE Vector c_r() const
NAMD_HOST_DEVICE Vector b() const
double compute_energy_orthogonal_helper(float q_arr[], const Lattice &lattice, double ewald, double virial[])
#define ALLOCA(TYPE, NAME, SIZE)
NAMD_HOST_DEVICE Vector a() const
static void compute_b_spline(REAL *__restrict frac, REAL *M, REAL *dM, int order)
static void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param)
NAMD_HOST_DEVICE Vector unit(void) const
PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end)