NAMD
PmeRealSpace.C
Go to the documentation of this file.
1 
7 #include <string.h>
8 #include "PmeBase.inl"
9 #include "PmeRealSpace.h"
10 #include "Node.h"
11 #include "SimParameters.h"
12 
14  : myGrid(grid) {
15 }
16 
18 }
19 
20 void PmeRealSpace::set_num_atoms(int natoms) {
21  N = natoms;
22  int order = myGrid.order;
23  M_alloc.resize(3*N*order);
24  M = M_alloc.begin();
25  dM_alloc.resize(3*N*order);
26  dM = dM_alloc.begin();
27 }
28 
29 template <int order>
30 void PmeRealSpace::fill_b_spline(PmeParticle p[]) {
31  float fr[3];
32  float *Mi, *dMi;
33  int i, stride;
34 
35  stride = 3*order;
36  Mi = M; dMi = dM;
37  for (i=0; i<N; i++) {
38  fr[0] = (float)(p[i].x - (double)(int)(p[i].x)); // subtract in double precision
39  fr[1] = (float)(p[i].y - (double)(int)(p[i].y));
40  fr[2] = (float)(p[i].z - (double)(int)(p[i].z));
41  compute_b_spline(fr, Mi, dMi, order);
42  Mi += stride;
43  dMi += stride;
44  }
45 }
46 
47 void PmeRealSpace::fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count,
48  int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
49 
50  switch (myGrid.order) {
51  case 4:
52  fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
53  break;
54  case 6:
55  fill_charges_order<6>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
56  break;
57  case 8:
58  fill_charges_order<8>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
59  break;
60  case 10:
61  fill_charges_order<10>(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
62  break;
63  default: NAMD_die("unsupported PMEInterpOrder");
64  }
65 
66 }
67 
68 template <int order>
69 void PmeRealSpace::fill_charges_order(float **q_arr, float **q_arr_list, int &q_arr_count,
70  int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
71 
72  int i, j, k, l;
73  int stride;
74  int K1, K2, K3, dim2, dim3;
75 
76  if ( order != myGrid.order ) NAMD_bug("fill_charges_order template mismatch");
77 
78  if ( order == 4 ) {
79  fill_charges_order4(q_arr, q_arr_list, q_arr_count, stray_count, f_arr, fz_arr, p);
80  return;
81  }
82 
83  float * __restrict Mi;
84  Mi = M;
85  K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
86  stride = 3*order;
87 
88  fill_b_spline<order>(p);
89 
90  for (i=0; i<N; i++) {
91  float q;
92  int u1, u2, u2i, u3i;
93  q = p[i].cg;
94  u1 = (int)(p[i].x);
95  u2i = (int)(p[i].y);
96  u3i = (int)(p[i].z);
97  u1 -= order;
98  u2i -= order;
99  u3i -= order;
100  u3i += 1;
101  if ( u3i < 0 ) u3i += K3;
102  for (j=0; j<order; j++) {
103  float m1;
104  int ind1;
105  m1 = Mi[j]*q;
106  u1++;
107  ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
108  u2 = u2i;
109  for (k=0; k<order; k++) {
110  float m1m2;
111  int ind2;
112  m1m2 = m1*Mi[order+k];
113  u2++;
114  ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
115  float * __restrict qline = q_arr[ind2];
116  if ( ! qline ) {
117  if ( f_arr[ind2] ) {
118  f_arr[ind2] = 3;
119  ++stray_count;
120  continue;
121  }
122  qline = q_arr[ind2] = q_arr_list[q_arr_count++]
123  = new float[K3+order-1];
124  memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
125  }
126  f_arr[ind2] = 1;
127  for (l=0; l<order; l++) {
128  qline[u3i+l] += m1m2 * Mi[2*order + l];
129  }
130  }
131  }
132  Mi += stride;
133  for (l=0; l<order; l++) {
134  int u3 = u3i + l;
135  int ind = u3 + (u3 < 0 ? K3 : 0);
136  fz_arr[ind] = 1;
137  }
138  }
139 }
140 
141 void PmeRealSpace::compute_forces(const float * const *q_arr,
142  const PmeParticle p[], Vector f[]) {
143 
144  switch (myGrid.order) {
145  case 4:
146  compute_forces_order4(q_arr, p, f);
147  break;
148  case 6:
149  compute_forces_order<6>(q_arr, p, f);
150  break;
151  case 8:
152  compute_forces_order<8>(q_arr, p, f);
153  break;
154  case 10:
155  compute_forces_order<10>(q_arr, p, f);
156  break;
157  default: NAMD_die("unsupported PMEInterpOrder");
158  }
159 
160 }
161 
162 template <int order>
163 void PmeRealSpace::compute_forces_order(const float * const *q_arr,
164  const PmeParticle p[], Vector f[]) {
165 
166  int i, j, k, l, stride;
167  float f1, f2, f3;
168  float *Mi, *dMi;
169  int K1, K2, K3, dim2;
170 
171  if ( order != myGrid.order ) NAMD_bug("compute_forces_order template mismatch");
172 
173  if ( order == 4 ) {
174  compute_forces_order4(q_arr, p, f);
175  return;
176  }
177 
178  K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
179  stride=3*order;
180  Mi = M; dMi = dM;
181 
182  for (i=0; i<N; i++) {
183  float q;
184  int u1, u2, u2i, u3i;
185  q = p[i].cg;
186  f1=f2=f3=0.0;
187  u1 = (int)(p[i].x);
188  u2i = (int)(p[i].y);
189  u3i = (int)(p[i].z);
190  u1 -= order;
191  u2i -= order;
192  u3i -= order;
193  u3i += 1;
194  if ( u3i < 0 ) u3i += K3;
195  for (j=0; j<order; j++) {
196  float m1, d1;
197  int ind1;
198  m1=Mi[j]*q;
199  d1=K1*dMi[j]*q;
200  u1++;
201  ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
202  u2 = u2i;
203  for (k=0; k<order; k++) {
204  float m2, d2, m1m2, m1d2, d1m2;
205  int ind2;
206  m2=Mi[order+k];
207  d2=K2*dMi[order+k];
208  m1m2=m1*m2;
209  m1d2=m1*d2;
210  d1m2=d1*m2;
211  u2++;
212  ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
213  const float *qline = q_arr[ind2];
214  if ( ! qline ) continue;
215  for (l=0; l<order; l++) {
216  float term, m3, d3;
217  m3=Mi[2*order+l];
218  d3=K3*dMi[2*order+l];
219  term = qline[u3i+l];
220  f1 -= d1m2 * m3 * term;
221  f2 -= m1d2 * m3 * term;
222  f3 -= m1m2 * d3 * term;
223  }
224  }
225  }
226  Mi += stride;
227  dMi += stride;
228  f[i].x = f1;
229  f[i].y = f2;
230  f[i].z = f3;
231  }
232 }
233 
234 // this should definitely help the compiler
235 #define order 4
236 
237 void PmeRealSpace::fill_charges_order4(float **q_arr, float **q_arr_list, int &q_arr_count,
238  int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[]) {
239 
240  int i, j, k, l;
241  int stride;
242  int K1, K2, K3, dim2, dim3;
243  float * __restrict Mi, * __restrict dMi;
244  Mi = M; dMi = dM;
245  K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2; dim3=myGrid.dim3;
246  // order = myGrid.order;
247  stride = 3*order;
248 
249  // fill_b_spline(p); leave this off! replaced with code below
250 
251 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
252  for ( int istart = 0; istart < N; istart += 1000 ) {
253  int iend = istart + 1000;
254  if ( iend > N ) iend = N;
255  CmiNetworkProgress();
256  for (i=istart; i<iend; i++) {
257 #else
258  {
259  for (i=0; i<N; i++) {
260 #endif
261  float q;
262  int u1, u2, u2i, u3i;
263  u1 = (int)p[i].x;
264  u2i = (int)p[i].y;
265  u3i = (int)p[i].z;
266  float fr1 = (float)(p[i].x - (double)u1); // subtract in double precision
267  float fr2 = (float)(p[i].y - (double)u2i);
268  float fr3 = (float)(p[i].z - (double)u3i);
269  q = p[i].cg;
270 
271  // calculate b_spline for order = 4
272  Mi[0] = ( ( (-1./6.) * fr1 + 0.5 ) * fr1 - 0.5 ) * fr1 + (1./6.);
273  Mi[1] = ( ( 0.5 * fr1 - 1.0 ) * fr1 ) * fr1 + (2./3.);
274  Mi[2] = ( ( -0.5 * fr1 + 0.5 ) * fr1 + 0.5 ) * fr1 + (1./6.);
275  Mi[3] = (1./6.) * fr1 * fr1 * fr1;
276  dMi[0] = ( -0.5 * fr1 + 1.0 )* fr1 - 0.5;
277  dMi[1] = ( 1.5 * fr1 - 2.0 ) * fr1;
278  dMi[2] = ( -1.5 * fr1 + 1.0 ) * fr1 + 0.5;
279  dMi[3] = 0.5 * fr1 * fr1;
280  Mi[4] = ( ( (-1./6.) * fr2 + 0.5 ) * fr2 - 0.5 ) * fr2 + (1./6.);
281  Mi[5] = ( ( 0.5 * fr2 - 1.0 ) * fr2 ) * fr2 + (2./3.);
282  Mi[6] = ( ( -0.5 * fr2 + 0.5 ) * fr2 + 0.5 ) * fr2 + (1./6.);
283  Mi[7] = (1./6.) * fr2 * fr2 * fr2;
284  dMi[4] = ( -0.5 * fr2 + 1.0 )* fr2 - 0.5;
285  dMi[5] = ( 1.5 * fr2 - 2.0 ) * fr2;
286  dMi[6] = ( -1.5 * fr2 + 1.0 ) * fr2 + 0.5;
287  dMi[7] = 0.5 * fr2 * fr2;
288  Mi[8] = ( ( (-1./6.) * fr3 + 0.5 ) * fr3 - 0.5 ) * fr3 + (1./6.);
289  Mi[9] = ( ( 0.5 * fr3 - 1.0 ) * fr3 ) * fr3 + (2./3.);
290  Mi[10] = ( ( -0.5 * fr3 + 0.5 ) * fr3 + 0.5 ) * fr3 + (1./6.);
291  Mi[11] = (1./6.) * fr3 * fr3 * fr3;
292  dMi[8] = ( -0.5 * fr3 + 1.0 )* fr3 - 0.5;
293  dMi[9] = ( 1.5 * fr3 - 2.0 ) * fr3;
294  dMi[10] = ( -1.5 * fr3 + 1.0 ) * fr3 + 0.5;
295  dMi[11] = 0.5 * fr3 * fr3;
296 
297  u1 -= order;
298  u2i -= order;
299  u3i -= order;
300  u3i += 1;
301  if ( u3i < 0 ) u3i += K3;
302  for (j=0; j<order; j++) {
303  float m1;
304  int ind1;
305  m1 = Mi[j]*q;
306  u1++;
307  ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
308  u2 = u2i;
309  for (k=0; k<order; k++) {
310  float m1m2;
311  int ind2;
312  m1m2 = m1*Mi[order+k];
313  u2++;
314  ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
315  float * __restrict qline = q_arr[ind2];
316  if ( ! qline ) {
317  if ( f_arr[ind2] ) {
318  f_arr[ind2] = 3;
319  ++stray_count;
320  continue;
321  }
322  qline = q_arr[ind2] = q_arr_list[q_arr_count++]
323  = new float[K3+order-1];
324  memset( (void*) qline, 0, (K3+order-1) * sizeof(float) );
325  }
326  f_arr[ind2] = 1;
327  for (l=0; l<order; l++) {
328  qline[u3i+l] += m1m2 * Mi[2*order + l];
329  }
330  }
331  }
332  Mi += stride;
333  dMi += stride;
334  for (l=0; l<order; l++) {
335  int u3 = u3i + l;
336  int ind = u3 + (u3 < 0 ? K3 : 0);
337  fz_arr[ind] = 1;
338  }
339  }
340  }
341 }
342 
343 static inline void compute_forces_order4_helper(int first, int last, void *result, int paraNum, void *param){
344  void **params = (void **)param;
345  PmeRealSpace *rs = (PmeRealSpace *)params[0];
346  const float * const *q_arr = (const float * const *)params[1];
347  const PmeParticle *p = (const PmeParticle *)params[2];
348  Vector *f = (Vector *)params[3];
349  rs->compute_forces_order4_partial(first, last, q_arr, p, f);
350 }
351 
352 void PmeRealSpace::compute_forces_order4(const float * const *q_arr,
353  const PmeParticle p[], Vector f[]) {
354 
355  int i, j, k, l, stride;
356  float f1, f2, f3;
357  float *Mi, *dMi;
358  int K1, K2, K3, dim2;
359 
360 #if CMK_SMP && USE_CKLOOP
361  int useCkLoop = Node::Object()->simParameters->useCkLoop;
362  if(useCkLoop>=CKLOOP_CTRL_PME_UNGRIDCALC){
363  //compute_forces_order4_partial(0, N-1, q_arr, p, f);
364  void *params[] = {(void *)this, (void *)q_arr, (void *)p, (void *)f};
365  CkLoop_Parallelize(compute_forces_order4_helper, 4, (void *)params, CkMyNodeSize(), 0, N-1);
366  return;
367  }
368 #endif
369  K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
370  // order = myGrid.order;
371  stride=3*order;
372  Mi = M; dMi = dM;
373 
374  for (i=0; i<N; i++) {
375  float q;
376  int u1, u2, u2i, u3i;
377  q = p[i].cg;
378  f1=f2=f3=0.0;
379  u1 = (int)(p[i].x);
380  u2i = (int)(p[i].y);
381  u3i = (int)(p[i].z);
382  u1 -= order;
383  u2i -= order;
384  u3i -= order;
385  u3i += 1;
386  if ( u3i < 0 ) u3i += K3;
387  for (j=0; j<order; j++) {
388  float m1, d1;
389  int ind1;
390  m1=Mi[j]*q;
391  d1=K1*dMi[j]*q;
392  u1++;
393  ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
394  u2 = u2i;
395  for (k=0; k<order; k++) {
396  float m2, d2, m1m2, m1d2, d1m2;
397  int ind2;
398  m2=Mi[order+k];
399  d2=K2*dMi[order+k];
400  m1m2=m1*m2;
401  m1d2=m1*d2;
402  d1m2=d1*m2;
403  u2++;
404  ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
405  const float *qline = q_arr[ind2];
406  if ( ! qline ) continue;
407  for (l=0; l<order; l++) {
408  float term, m3, d3;
409  m3=Mi[2*order+l];
410  d3=K3*dMi[2*order+l];
411  term = qline[u3i+l];
412  f1 -= d1m2 * m3 * term;
413  f2 -= m1d2 * m3 * term;
414  f3 -= m1m2 * d3 * term;
415  }
416  }
417  }
418  Mi += stride;
419  dMi += stride;
420  f[i].x = f1;
421  f[i].y = f2;
422  f[i].z = f3;
423  }
424 }
425 
427  const float * const *q_arr,
428  const PmeParticle p[], Vector f[]) {
429 
430  int i, j, k, l, stride;
431  float f1, f2, f3;
432  float *Mi, *dMi;
433  int K1, K2, K3, dim2;
434 
435  K1=myGrid.K1; K2=myGrid.K2; K3=myGrid.K3; dim2=myGrid.dim2;
436  // order = myGrid.order;
437  stride=3*order;
438  Mi = M; dMi = dM;
439 
440  for (i=first; i<=last; i++) {
441  Mi = M + i*stride;
442  dMi = dM + i*stride;
443  float q;
444  int u1, u2, u2i, u3i;
445  q = p[i].cg;
446  f1=f2=f3=0.0;
447  u1 = (int)(p[i].x);
448  u2i = (int)(p[i].y);
449  u3i = (int)(p[i].z);
450  u1 -= order;
451  u2i -= order;
452  u3i -= order;
453  u3i += 1;
454  if ( u3i < 0 ) u3i += K3;
455  for (j=0; j<order; j++) {
456  float m1, d1;
457  int ind1;
458  m1=Mi[j]*q;
459  d1=K1*dMi[j]*q;
460  u1++;
461  ind1 = (u1 + (u1 < 0 ? K1 : 0))*dim2;
462  u2 = u2i;
463  for (k=0; k<order; k++) {
464  float m2, d2, m1m2, m1d2, d1m2;
465  int ind2;
466  m2=Mi[order+k];
467  d2=K2*dMi[order+k];
468  m1m2=m1*m2;
469  m1d2=m1*d2;
470  d1m2=d1*m2;
471  u2++;
472  ind2 = ind1 + (u2 + (u2 < 0 ? K2 : 0));
473  const float *qline = q_arr[ind2];
474  if ( ! qline ) continue;
475  for (l=0; l<order; l++) {
476  float term, m3, d3;
477  m3=Mi[2*order+l];
478  d3=K3*dMi[2*order+l];
479  term = qline[u3i+l];
480  f1 -= d1m2 * m3 * term;
481  f2 -= m1d2 * m3 * term;
482  f3 -= m1m2 * d3 * term;
483  }
484  }
485  }
486  f[i].x = f1;
487  f[i].y = f2;
488  f[i].z = f3;
489  }
490 }
static Node * Object()
Definition: Node.h:86
int dim2
Definition: PmeBase.h:19
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:141
int dim3
Definition: PmeBase.h:19
Definition: Vector.h:64
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
BigReal z
Definition: Vector.h:66
double cg
Definition: PmeBase.h:27
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:20
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
Definition: PmeRealSpace.C:47
gridSize z
BigReal x
Definition: Vector.h:66
void NAMD_die(const char *err_msg)
Definition: common.C:85
static void compute_forces_order4_helper(int first, int last, void *result, int paraNum, void *param)
Definition: PmeRealSpace.C:343
int K3
Definition: PmeBase.h:18
void resize(int i)
Definition: ResizeArray.h:84
PmeRealSpace(PmeGrid grid)
Definition: PmeRealSpace.C:13
#define CKLOOP_CTRL_PME_UNGRIDCALC
Definition: SimParameters.h:93
BigReal y
Definition: Vector.h:66
void set_num_atoms(int natoms)
Definition: PmeRealSpace.C:20
gridSize y
gridSize x
static void compute_b_spline(REAL *__restrict frac, REAL *M, REAL *dM, int order)
Definition: PmeBase.inl:86
void compute_forces_order4_partial(int first, int last, const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:426
iterator begin(void)
Definition: ResizeArray.h:36