NAMD
msm_longrng.c
Go to the documentation of this file.
1 
41 #include "msm_defn.h"
42 
43 static int anterpolation(NL_Msm *);
44 static int interpolation(NL_Msm *);
45 static int restriction(NL_Msm *, int level);
46 static int prolongation(NL_Msm *, int level);
47 static int restriction_factored(NL_Msm *, int level);
48 static int prolongation_factored(NL_Msm *, int level);
49 static int gridcutoff(NL_Msm *, int level);
50 
51 
53  double time_delta;
54  int rc = 0;
55  int level;
56  int nlevels = msm->nlevels;
57  int use_cuda_gridcutoff = (msm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF);
58  int fallback_cpu = (msm->msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK);
59  int use_nonfactored = (msm->msmflags & NL_MSM_COMPUTE_NONFACTORED);
60 
62  rc = anterpolation(msm);
63  if (rc) return rc;
65  time_delta = wkf_timer_time(msm->timer_longrng);
66  if (msm->report_timings) {
67  printf("MSM anterpolation: %6.3f sec\n", time_delta);
68  }
69 
71  if (use_nonfactored) {
72  for (level = 0; level < nlevels - 1; level++) {
73  rc = restriction(msm, level);
74  if (rc) return rc;
75  }
76  }
77  else {
78  for (level = 0; level < nlevels - 1; level++) {
79  rc = restriction_factored(msm, level);
80  if (rc) return rc;
81  }
82  }
84  time_delta = wkf_timer_time(msm->timer_longrng);
85  if (msm->report_timings) {
86  printf("MSM restriction: %6.3f sec\n", time_delta);
87  }
88 
90  if (use_cuda_gridcutoff && nlevels > 1) {
91 #ifdef NL_USE_CUDA
92  if ((rc = NL_msm_cuda_condense_qgrids(msm)) != NL_MSM_SUCCESS ||
93  (rc = NL_msm_cuda_compute_gridcutoff(msm)) != NL_MSM_SUCCESS ||
94  (rc = NL_msm_cuda_expand_egrids(msm)) != NL_MSM_SUCCESS) {
95  if (fallback_cpu) {
96  printf("Falling back on CPU for grid cutoff computation\n");
97  use_cuda_gridcutoff = 0;
98  }
99  else return rc;
100  }
101 #else
102  if (fallback_cpu) {
103  printf("Falling back on CPU for grid cutoff computation\n");
104  use_cuda_gridcutoff = 0;
105  }
106  else return NL_MSM_ERROR_SUPPORT;
107 #endif
108  }
109 
110  if ( ! use_cuda_gridcutoff ) {
111  for (level = 0; level < nlevels - 1; level++) {
112  rc = gridcutoff(msm, level);
113  if (rc) return rc;
114  }
115  }
116 
117  rc = gridcutoff(msm, level); /* top level */
118  if (rc) return rc;
119 
121  time_delta = wkf_timer_time(msm->timer_longrng);
122  if (msm->report_timings) {
123  printf("MSM grid cutoff: %6.3f sec\n", time_delta);
124  }
125 
127  if (use_nonfactored) {
128  for (level--; level >= 0; level--) {
129  rc = prolongation(msm, level);
130  if (rc) return rc;
131  }
132  }
133  else {
134  for (level--; level >= 0; level--) {
135  rc = prolongation_factored(msm, level);
136  if (rc) return rc;
137  }
138  }
140  time_delta = wkf_timer_time(msm->timer_longrng);
141  if (msm->report_timings) {
142  printf("MSM prolongation: %6.3f sec\n", time_delta);
143  }
144 
146  rc = interpolation(msm);
147  if (rc) return rc;
149  time_delta = wkf_timer_time(msm->timer_longrng);
150  if (msm->report_timings) {
151  printf("MSM interpolation: %6.3f sec\n", time_delta);
152  }
153 
154  return 0;
155 }
156 
157 
159 enum { MAX_POLY_DEGREE = 9 };
160 
163 static const int PolyDegree[NL_MSM_APPROX_END] = {
164  3, 5, 5, 7, 7, 9, 9, 3,
165 };
166 
169 static const double
171  /* cubic */
172  {-1./16, 0, 9./16, 1, 9./16, 0, -1./16},
173 
174  /* quintic C1 */
175  {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
176 
177  /* quintic C2 (same as quintic C1) */
178  {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
179 
180  /* septic C1 */
181  { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
182  0, -245./2048, 0, 49./2048, 0, -5./2048 },
183 
184  /* septic C3 (same as septic C3) */
185  { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
186  0, -245./2048, 0, 49./2048, 0, -5./2048 },
187 
188  /* nonic C1 */
189  { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
190  19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
191  -405./65536, 0, 35./65536 },
192 
193  /* nonic C4 (same as nonic C1) */
194  { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
195  19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
196  -405./65536, 0, 35./65536 },
197 
198  /* bspline */
199  { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
200 };
201 
202 
205 enum { MAX_NSTENCIL = 11 };
206 
208 static const int Nstencil[NL_MSM_APPROX_END] = {
209  5, 7, 7, 9, 9, 11, 11, 7
210 };
211 
215  /* cubic */
216  {-3, -1, 0, 1, 3},
217 
218  /* quintic C1 */
219  {-5, -3, -1, 0, 1, 3, 5},
220 
221  /* quintic C2 (same as quintic C1) */
222  {-5, -3, -1, 0, 1, 3, 5},
223 
224  /* septic C1 */
225  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
226 
227  /* septic C3 (same as septic C3) */
228  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
229 
230  /* nonic C1 */
231  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
232 
233  /* nonic C4 (same as nonic C1) */
234  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
235 
236  /* bspline */
237  {-3, -2, -1, 0, 1, 2, 3},
238 };
239 
242 static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
243  /* cubic */
244  {-1./16, 9./16, 1, 9./16, -1./16},
245 
246  /* quintic C1 */
247  {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
248 
249  /* quintic C2 (same as quintic C1) */
250  {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
251 
252  /* septic C1 */
253  { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
254  -245./2048, 49./2048, -5./2048 },
255 
256  /* septic C3 (same as septic C3) */
257  { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
258  -245./2048, 49./2048, -5./2048 },
259 
260  /* nonic C1 */
261  { 35./65536, -405./65536, 567./16384, -2205./16384,
262  19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
263  -405./65536, 35./65536 },
264 
265  /* nonic C4 (same as nonic C1) */
266  { 35./65536, -405./65536, 567./16384, -2205./16384,
267  19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
268  -405./65536, 35./65536 },
269 
270  /* bspline */
271  { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
272 };
273 
274 
280 #define STENCIL_1D(_phi, _delta, _approx) \
281  do { \
282  double *phi = _phi; \
283  double t = _delta; \
284  switch (_approx) { \
285  case NL_MSM_APPROX_CUBIC: \
286  phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
287  t--; \
288  phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
289  t--; \
290  phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
291  t--; \
292  phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
293  break; \
294  case NL_MSM_APPROX_QUINTIC: \
295  phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
296  t--; \
297  phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
298  t--; \
299  phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
300  t--; \
301  phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
302  t--; \
303  phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
304  t--; \
305  phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
306  break; \
307  case NL_MSM_APPROX_QUINTIC2: \
308  phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
309  t--; \
310  phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
311  t--; \
312  phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
313  t--; \
314  phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
315  t--; \
316  phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
317  t--; \
318  phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
319  break; \
320  case NL_MSM_APPROX_SEPTIC: \
321  phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
322  t--; \
323  phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
324  t--; \
325  phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
326  t--; \
327  phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
328  t--; \
329  phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
330  t--; \
331  phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
332  t--; \
333  phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
334  t--; \
335  phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
336  break; \
337  case NL_MSM_APPROX_SEPTIC3: \
338  phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
339  + t*((26383./144) + t*((-22807./720) + t*((727./240) \
340  + t*(-89./720))))))); \
341  t--; \
342  phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
343  + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
344  + t*(623./720))))))); \
345  t--; \
346  phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
347  + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
348  + t*(-623./240))))))); \
349  t--; \
350  phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
351  + t*((-727./48) + t*(623./144))))); \
352  t--; \
353  phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
354  + t*((-727./48) + t*(-623./144))))); \
355  t--; \
356  phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
357  + t*((4557./16) + t*((9583./80) + t*((2181./80) \
358  + t*(623./240))))))); \
359  t--; \
360  phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
361  + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
362  + t*(-623./720))))))); \
363  t--; \
364  phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
365  + t*((26383./144) + t*((22807./720) + t*((727./240) \
366  + t*(89./720))))))); \
367  break; \
368  case NL_MSM_APPROX_NONIC: \
369  phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
370  (t-2)*(t-1); \
371  t--; \
372  phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
373  (-8+t*(-35+9*t)); \
374  t--; \
375  phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
376  (-14+t*(-25+9*t)); \
377  t--; \
378  phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
379  (-6+t*(-5+3*t)); \
380  t--; \
381  phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
382  (-20+t*(-5+9*t)); \
383  t--; \
384  phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
385  (-20+t*(5+9*t)); \
386  t--; \
387  phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
388  (-6+t*(5+3*t)); \
389  t--; \
390  phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
391  (-14+t*(25+9*t)); \
392  t--; \
393  phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
394  (-8+t*(35+9*t)); \
395  t--; \
396  phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
397  (t+7)*(t+8); \
398  break; \
399  case NL_MSM_APPROX_NONIC4: \
400  phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
401  +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
402  +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
403  +t*(-3569./40320))))))))); \
404  t--; \
405  phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
406  +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
407  +t*(156927./448+t*(-28909./1152 \
408  +t*(3569./4480))))))))); \
409  t--; \
410  phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
411  +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
412  +t*(-19959./28+t*(144545./2016 \
413  +t*(-3569./1120))))))))); \
414  t--; \
415  phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
416  +t*(-576499./96+t*(686147./160+t*(-96277./48 \
417  +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
418  t--; \
419  phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
420  +t*(6337./96+t*(-2745./32+t*(28909./576 \
421  +t*(-3569./320))))))); \
422  t--; \
423  phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
424  +t*(6337./96+t*(2745./32+t*(28909./576 \
425  +t*(3569./320))))))); \
426  t--; \
427  phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
428  +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
429  +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
430  t--; \
431  phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
432  +t*(3548009./96+t*(2422263./160+t*(197255./48 \
433  +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
434  t--; \
435  phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
436  +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
437  +t*(-156927./448+t*(-28909./1152 \
438  +t*(-3569./4480))))))))); \
439  t--; \
440  phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
441  +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
442  +t*(129391./192+t*(259715./4032+t*(28909./8064 \
443  +t*(3569./40320))))))))); \
444  break; \
445  case NL_MSM_APPROX_BSPLINE: \
446  phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
447  t--; \
448  phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
449  t--; \
450  phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
451  t--; \
452  phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
453  break; \
454  default: \
455  return NL_MSM_ERROR_SUPPORT; \
456  } \
457  } while (0)
458  /* closing ';' from use as function call */
459 
460 
468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \
469  do { \
470  double *dphi = _dphi; \
471  double *phi = _phi; \
472  double h_1 = _h_1; \
473  double t = _delta; \
474  switch (_approx) { \
475  case NL_MSM_APPROX_CUBIC: \
476  phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
477  dphi[0] = (1.5 * t - 2) * (2 - t) * h_1; \
478  t--; \
479  phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
480  dphi[1] = (-5 + 4.5 * t) * t * h_1; \
481  t--; \
482  phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
483  dphi[2] = (-5 - 4.5 * t) * t * h_1; \
484  t--; \
485  phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
486  dphi[3] = (1.5 * t + 2) * (2 + t) * h_1; \
487  break; \
488  case NL_MSM_APPROX_QUINTIC: \
489  phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
490  dphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \
491  + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \
492  t--; \
493  phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
494  dphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))\
495  + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1; \
496  t--; \
497  phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
498  dphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t)) \
499  + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1; \
500  t--; \
501  phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
502  dphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t)) \
503  - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1; \
504  t--; \
505  phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
506  dphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t)) \
507  - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1; \
508  t--; \
509  phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
510  dphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \
511  + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \
512  break; \
513  case NL_MSM_APPROX_QUINTIC2: \
514  phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
515  dphi[0] = ((1./24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \
516  + 5*(t-2)*(3-t))) * h_1; \
517  t--; \
518  phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
519  dphi[1] = ((-1./24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \
520  (-48+t*(153+t*(-114+t*25))) \
521  + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \
522  t--; \
523  phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
524  dphi[2] = ((1./12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \
525  + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \
526  t--; \
527  phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
528  dphi[3] = ((1./12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \
529  + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \
530  t--; \
531  phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
532  dphi[4] = ((-1./24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \
533  (48+t*(153+t*(114+t*25))) \
534  + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \
535  t--; \
536  phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
537  dphi[5] = ((1./24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \
538  + 5*(t+2)*(3+t))) * h_1; \
539  break; \
540  case NL_MSM_APPROX_SEPTIC: \
541  phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
542  dphi[0] = (-1./720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \
543  +t*(-122+t*7))))) * h_1; \
544  t--; \
545  phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
546  dphi[1] = (1./720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \
547  +t*(-750+t*49)))))) * h_1; \
548  t--; \
549  phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
550  dphi[2] = (-1./240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \
551  +t*(-450+t*49)))))) * h_1; \
552  t--; \
553  phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
554  dphi[3] = (1./144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \
555  h_1; \
556  t--; \
557  phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
558  dphi[4] = (-1./144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \
559  h_1; \
560  t--; \
561  phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
562  dphi[5] = (1./240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \
563  +t*(450+t*49)))))) * h_1; \
564  t--; \
565  phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
566  dphi[6] = (-1./720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \
567  +t*(750+t*49)))))) * h_1; \
568  t--; \
569  phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
570  dphi[7] = (1./720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \
571  +t*(122+t*7))))) * h_1; \
572  break; \
573  case NL_MSM_APPROX_SEPTIC3: \
574  phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
575  + t*((26383./144) + t*((-22807./720) + t*((727./240) \
576  + t*(-89./720))))))); \
577  dphi[0] = ((-7456./5) + t*((117572./45) + t*(-1899 + t*((26383./36) \
578  + t*((-22807./144) + t*((727./40) + t*(-623./720)))))))*h_1; \
579  t--; \
580  phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
581  + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
582  + t*(623./720))))))); \
583  dphi[1] = ((25949./20) + t*((-117131./36) + t*((6741./2) \
584  + t*((-66437./36) + t*((81109./144) + t*((-727./8) \
585  + t*(4361./720))))))) * h_1; \
586  t--; \
587  phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
588  + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
589  + t*(-623./240))))))); \
590  dphi[2] = ((-8617./60) + t*((12873./20) + t*((-2373./2) + t*((4557./4) \
591  + t*((-9583./16) + t*((6543./40) + t*(-4361./240)))))))*h_1; \
592  t--; \
593  phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
594  + t*((-727./48) + t*(623./144))))); \
595  dphi[3] = (t*((-49./18) + t*t*((-959./36) + t*((12845./144) \
596  + t*((-727./8) + t*(4361./144)))))) * h_1; \
597  t--; \
598  phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
599  + t*((-727./48) + t*(-623./144))))); \
600  dphi[4] = (t*((-49./18) + t*t*((-959./36) + t*((-12845./144) \
601  + t*((-727./8) + t*(-4361./144)))))) * h_1; \
602  t--; \
603  phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
604  + t*((4557./16) + t*((9583./80) + t*((2181./80) \
605  + t*(623./240))))))); \
606  dphi[5] = ((8617./60) + t*((12873./20) + t*((2373./2) + t*((4557./4) \
607  + t*((9583./16) + t*((6543./40) + t*(4361./240)))))))*h_1; \
608  t--; \
609  phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
610  + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
611  + t*(-623./720))))))); \
612  dphi[6] = ((-25949./20) + t*((-117131./36) + t*((-6741./2) \
613  + t*((-66437./36) + t*((-81109./144) + t*((-727./8) \
614  + t*(-4361./720))))))) * h_1; \
615  t--; \
616  phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
617  + t*((26383./144) + t*((22807./720) + t*((727./240) \
618  + t*(89./720))))))); \
619  dphi[7] = ((7456./5) + t*((117572./45) + t*(1899 + t*((26383./36) \
620  + t*((22807./144) + t*((727./40) + t*(623./720)))))))*h_1; \
621  break; \
622  case NL_MSM_APPROX_NONIC: \
623  phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
624  (t-2)*(t-1); \
625  dphi[0] = (-1./40320)*(t-5)*(-117648+t*(256552+t*(-221416 \
626  +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \
627  t--; \
628  phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
629  (-8+t*(-35+9*t)); \
630  dphi[1] = (1./40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \
631  +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \
632  t--; \
633  phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
634  (-14+t*(-25+9*t)); \
635  dphi[2] = (1./10080)*(3384+t*(-69080+t*(55026 \
636  +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \
637  +t*(-81)))))))))*h_1; \
638  t--; \
639  phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
640  (-6+t*(-5+3*t)); \
641  dphi[3] = (1./1440)*(72+t*(-6344+t*(2070 \
642  +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \
643  t--; \
644  phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
645  (-20+t*(-5+9*t)); \
646  dphi[4] = (-1./2880)*t*(10792+t*(-972+t*(-12516 \
647  +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \
648  t--; \
649  phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
650  (-20+t*(5+9*t)); \
651  dphi[5] = (1./2880)*t*(-10792+t*(-972+t*(12516 \
652  +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \
653  t--; \
654  phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
655  (-6+t*(5+3*t)); \
656  dphi[6] = (1./1440)*(-72+t*(-6344+t*(-2070 \
657  +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \
658  t--; \
659  phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
660  (-14+t*(25+9*t)); \
661  dphi[7] = (1./10080)*(-3384+t*(-69080+t*(-55026 \
662  +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \
663  t--; \
664  phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
665  (-8+t*(35+9*t)); \
666  dphi[8] = (-1./40320)*(71856+t*(795368+t*(1569240 \
667  +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \
668  +t*81))))))))*h_1; \
669  t--; \
670  phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
671  (t+7)*(t+8); \
672  dphi[9] = (1./40320)*(t+5)*(117648+t*(256552+t*(221416 \
673  +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \
674  break; \
675  case NL_MSM_APPROX_NONIC4: \
676  phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
677  +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
678  +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
679  +t*(-3569./40320))))))))); \
680  dphi[0] = (-64188125./504+t*(231125375./1008 \
681  +t*(-17306975./96+t*(7761805./96+t*(-2895587./128 \
682  +t*(129391./32+t*(-259715./576+t*(28909./1008 \
683  +t*(-3569./4480)))))))))*h_1; \
684  t--; \
685  phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
686  +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
687  +t*(156927./448+t*(-28909./1152 \
688  +t*(3569./4480))))))))); \
689  dphi[1] = (8314091./56+t*(-49901303./144+t*(11290587./32 \
690  +t*(-19648027./96+t*(9469163./128+t*(-545977./32 \
691  +t*(156927./64+t*(-28909./144 \
692  +t*(32121./4480)))))))))*h_1; \
693  t--; \
694  phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
695  +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
696  +t*(-19959./28+t*(144545./2016 \
697  +t*(-3569./1120))))))))); \
698  dphi[2] = (-1038011./28+t*(31157515./252+t*(-2870007./16 \
699  +t*(3548009./24+t*(-2422263./32+t*(197255./8 \
700  +t*(-19959./4+t*(144545./252 \
701  +t*(-32121./1120)))))))))*h_1; \
702  t--; \
703  phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
704  +t*(-576499./96+t*(686147./160+t*(-96277./48 \
705  +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
706  dphi[3] = (12757./12+t*(-230123./36+t*(264481./16 \
707  +t*(-576499./24+t*(686147./32+t*(-96277./8 \
708  +t*(99547./24+t*(-28909./36 \
709  +t*(10707./160)))))))))*h_1; \
710  t--; \
711  phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
712  +t*(6337./96+t*(-2745./32+t*(28909./576 \
713  +t*(-3569./320))))))); \
714  dphi[4] = t*(-205./72+t*t*(91./48+t*(-6181./64 \
715  +t*(6337./16+t*(-19215./32+t*(28909./72 \
716  +t*(-32121./320)))))))*h_1; \
717  t--; \
718  phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
719  +t*(6337./96+t*(2745./32+t*(28909./576 \
720  +t*(3569./320))))))); \
721  dphi[5] = t*(-205./72+t*t*(91./48+t*(6181./64 \
722  +t*(6337./16+t*(19215./32+t*(28909./72 \
723  +t*(32121./320)))))))*h_1; \
724  t--; \
725  phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
726  +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
727  +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
728  dphi[6] = (-12757./12+t*(-230123./36+t*(-264481./16 \
729  +t*(-576499./24+t*(-686147./32+t*(-96277./8 \
730  +t*(-99547./24+t*(-28909./36 \
731  +t*(-10707./160)))))))))*h_1; \
732  t--; \
733  phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
734  +t*(3548009./96+t*(2422263./160+t*(197255./48 \
735  +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
736  dphi[7] = (1038011./28+t*(31157515./252+t*(2870007./16 \
737  +t*(3548009./24+t*(2422263./32+t*(197255./8 \
738  +t*(19959./4+t*(144545./252 \
739  +t*(32121./1120)))))))))*h_1; \
740  t--; \
741  phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
742  +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
743  +t*(-156927./448+t*(-28909./1152 \
744  +t*(-3569./4480))))))))); \
745  dphi[8] = (-8314091./56+t*(-49901303./144+t*(-11290587./32 \
746  +t*(-19648027./96+t*(-9469163./128+t*(-545977./32 \
747  +t*(-156927./64+t*(-28909./144 \
748  +t*(-32121./4480)))))))))*h_1; \
749  t--; \
750  phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
751  +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
752  +t*(129391./192+t*(259715./4032+t*(28909./8064 \
753  +t*(3569./40320))))))))); \
754  dphi[9] = (64188125./504+t*(231125375./1008 \
755  +t*(17306975./96+t*(7761805./96+t*(2895587./128 \
756  +t*(129391./32+t*(259715./576+t*(28909./1008 \
757  +t*(3569./4480)))))))))*h_1; \
758  break; \
759  case NL_MSM_APPROX_BSPLINE: \
760  phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
761  dphi[0] = -0.5 * (2-t) * (2-t) * h_1; \
762  t--; \
763  phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
764  dphi[1] = t*(-2 + 1.5*t) * h_1; \
765  t--; \
766  phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
767  dphi[2] = t*(-2 - 1.5*t) * h_1; \
768  t--; \
769  phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
770  dphi[3] = 0.5 * (2+t) * (2+t) * h_1; \
771  break; \
772  default: \
773  return NL_MSM_ERROR_SUPPORT; \
774  } \
775  } while (0)
776  /* closing ';' from use as function call */
777 
778 
780 {
781  const double *atom = msm->atom;
782  const int natoms = msm->numatoms;
783 
784  double xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */
785  double yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */
786  double zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */
787  double rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */
788 #ifndef TEST_INLINING
789  double delta; /* normalized distance to stencil point */
790 #else
791  double t; /* normalized distance to stencil point */
792 #endif
793  double ck, cjk;
794  const double hx_1 = 1/msm->hx;
795  const double hy_1 = 1/msm->hy;
796  const double hz_1 = 1/msm->hz;
797  const double xm0 = msm->gx; /* grid origin x */
798  const double ym0 = msm->gy; /* grid origin y */
799  const double zm0 = msm->gz; /* grid origin z */
800  double q;
801 
802  NL_Msmgrid_double *qhgrid = &(msm->qh[0]);
803  double *qh = qhgrid->data;
804  const int ni = qhgrid->ni;
805  const int nj = qhgrid->nj;
806  const int nk = qhgrid->nk;
807  const int ia = qhgrid->i0;
808  const int ib = ia + ni - 1;
809  const int ja = qhgrid->j0;
810  const int jb = ja + nj - 1;
811  const int ka = qhgrid->k0;
812  const int kb = ka + nk - 1;
813 
814  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
815  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
816  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
817  int iswithin;
818 
819  int n, i, j, k, ilo, jlo, klo, koff;
820  int jkoff, index;
821 
822  const int approx = msm->approx;
823  const int s_size = PolyDegree[approx] + 1; /* stencil size */
824  const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */
825 
826  GRID_ZERO(qhgrid);
827 
828  for (n = 0; n < natoms; n++) {
829 
830  /* atomic charge */
831  q = atom[4*n + 3];
832  if (0==q) continue;
833 
834  /* distance between atom and origin measured in grid points */
835  rx_hx = (atom[4*n ] - xm0) * hx_1;
836  ry_hy = (atom[4*n + 1] - ym0) * hy_1;
837  rz_hz = (atom[4*n + 2] - zm0) * hz_1;
838 
839  /* find smallest numbered grid point in stencil */
840  ilo = (int) floor(rx_hx) - s_edge;
841  jlo = (int) floor(ry_hy) - s_edge;
842  klo = (int) floor(rz_hz) - s_edge;
843 
844  /* calculate Phi stencils along each dimension */
845 #ifndef TEST_INLINING
846  delta = rx_hx - (double) ilo;
847  STENCIL_1D(xphi, delta, approx);
848  delta = ry_hy - (double) jlo;
849  STENCIL_1D(yphi, delta, approx);
850  delta = rz_hz - (double) klo;
851  STENCIL_1D(zphi, delta, approx);
852 #else
853  t = rx_hx - (double) ilo;
854  xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
855  t--; \
856  xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
857  t--; \
858  xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
859  t--; \
860  xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
861 
862  t = ry_hy - (double) jlo;
863  yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
864  t--; \
865  yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
866  t--; \
867  yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
868  t--; \
869  yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
870 
871  t = rz_hz - (double) klo;
872  zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
873  t--; \
874  zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
875  t--; \
876  zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
877  t--; \
878  zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
879 
880 #endif
881 
882  /* test to see if stencil is within edges of grid */
883  iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
884  ja <= jlo && (jlo+(s_size-1)) <= jb &&
885  ka <= klo && (klo+(s_size-1)) <= kb );
886 
887  if ( iswithin ) { /* no wrapping needed */
888 
889  /* determine charge on cube of grid points around atom */
890  for (k = 0; k < s_size; k++) {
891  koff = (k + klo) * nj;
892  ck = zphi[k] * q;
893  for (j = 0; j < s_size; j++) {
894  jkoff = (koff + (j + jlo)) * ni;
895  cjk = yphi[j] * ck;
896  for (i = 0; i < s_size; i++) {
897  index = jkoff + (i + ilo);
898  GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo);
899  ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index);
900  qh[index] += xphi[i] * cjk;
901  }
902  }
903  }
904  } /* if */
905 
906  else { /* requires wrapping around grid */
907  int ip, jp, kp;
908 
909  /* adjust ilo, jlo, klo so they are within grid indexing */
910  if (ispx) {
911  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
912  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
913  }
914  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
915  return NL_MSM_ERROR_RANGE;
916  }
917 
918  if (ispy) {
919  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
920  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
921  }
922  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
923  return NL_MSM_ERROR_RANGE;
924  }
925 
926  if (ispz) {
927  if (klo < ka) do { klo += nk; } while (klo < ka);
928  else if (klo > kb) do { klo -= nk; } while (klo > kb);
929  }
930  else if (klo < ka || (klo+(s_size-1)) > kb) {
931  return NL_MSM_ERROR_RANGE;
932  }
933 
934  /* determine charge on cube of grid points around atom, with wrapping */
935  for (k = 0, kp = klo; k < s_size; k++, kp++) {
936  if (kp > kb) kp = ka; /* wrap stencil around grid */
937  koff = kp * nj;
938  ck = zphi[k] * q;
939  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
940  if (jp > jb) jp = ja; /* wrap stencil around grid */
941  jkoff = (koff + jp) * ni;
942  cjk = yphi[j] * ck;
943  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
944  if (ip > ib) ip = ia; /* wrap stencil around grid */
945  index = jkoff + ip;
946  GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
947  ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index);
948  qh[index] += xphi[i] * cjk;
949  }
950  }
951  }
952  } /* else */
953 
954  } /* end loop over atoms */
955 
956  return NL_MSM_SUCCESS;
957 } /* anterpolation */
958 
959 
961 {
962  double *f = msm->felec;
963  const double *atom = msm->atom;
964  const int natoms = msm->numatoms;
965 
966  double xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */
967  double yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */
968  double zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */
969  double dxphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along x-dimension */
970  double dyphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along y-dimension */
971  double dzphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along z-dimension */
972  double rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */
973 #ifndef TEST_INLINING
974  double delta; /* normalized distance to stencil point */
975 #else
976  double t; /* normalized distance to stencil point */
977 #endif
978  const double hx_1 = 1/msm->hx;
979  const double hy_1 = 1/msm->hy;
980  const double hz_1 = 1/msm->hz;
981  const double xm0 = msm->gx; /* grid origin x */
982  const double ym0 = msm->gy; /* grid origin y */
983  const double zm0 = msm->gz; /* grid origin z */
984  double q;
985  double u = 0;
986 
987  const NL_Msmgrid_double *ehgrid = &(msm->eh[0]);
988  const double *eh = ehgrid->data;
989  const double *ebuf = ehgrid->buffer;
990  const NL_Msmgrid_double *qhgrid = &(msm->qh[0]);
991  const double *qbuf = qhgrid->buffer;
992  const int ni = ehgrid->ni;
993  const int nj = ehgrid->nj;
994  const int nk = ehgrid->nk;
995  const int ia = ehgrid->i0;
996  const int ib = ia + ni - 1;
997  const int ja = ehgrid->j0;
998  const int jb = ja + nj - 1;
999  const int ka = ehgrid->k0;
1000  const int kb = ka + nk - 1;
1001 
1002  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1003  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1004  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1005  int iswithin;
1006 
1007  double fx, fy, fz, cx, cy, cz;
1008 
1009  int n, i, j, k, ilo, jlo, klo, koff;
1010  long jkoff, index;
1011  const int nn = (ni*nj) * nk;
1012 
1013  const int approx = msm->approx;
1014  const int s_size = PolyDegree[approx] + 1; /* stencil size */
1015  const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */
1016 
1017  for (n = 0; n < natoms; n++) {
1018 
1019  /* atomic charge */
1020  q = atom[4*n + 3];
1021  if (0==q) continue;
1022 
1023  /* distance between atom and origin measured in grid points */
1024  rx_hx = (atom[4*n ] - xm0) * hx_1;
1025  ry_hy = (atom[4*n + 1] - ym0) * hy_1;
1026  rz_hz = (atom[4*n + 2] - zm0) * hz_1;
1027 
1028  /* find smallest numbered grid point in stencil */
1029  ilo = (int) floor(rx_hx) - s_edge;
1030  jlo = (int) floor(ry_hy) - s_edge;
1031  klo = (int) floor(rz_hz) - s_edge;
1032 
1033  /* calculate Phi stencils and derivatives along each dimension */
1034 #ifndef TEST_INLINING
1035  delta = rx_hx - (double) ilo;
1036  //STENCIL_1D(xphi, delta, approx);
1037  D_STENCIL_1D(dxphi, xphi, hx_1, delta, approx);
1038  delta = ry_hy - (double) jlo;
1039  //STENCIL_1D(yphi, delta, approx);
1040  D_STENCIL_1D(dyphi, yphi, hy_1, delta, approx);
1041  delta = rz_hz - (double) klo;
1042  //STENCIL_1D(zphi, delta, approx);
1043  D_STENCIL_1D(dzphi, zphi, hz_1, delta, approx);
1044 #else
1045  t = rx_hx - (double) ilo;
1046  xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1047  dxphi[0] = (1.5 * t - 2) * (2 - t) * hx_1; \
1048  t--; \
1049  xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1050  dxphi[1] = (-5 + 4.5 * t) * t * hx_1; \
1051  t--; \
1052  xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1053  dxphi[2] = (-5 - 4.5 * t) * t * hx_1; \
1054  t--; \
1055  xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1056  dxphi[3] = (1.5 * t + 2) * (2 + t) * hx_1; \
1057 
1058  t = ry_hy - (double) jlo;
1059  yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1060  dyphi[0] = (1.5 * t - 2) * (2 - t) * hy_1; \
1061  t--; \
1062  yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1063  dyphi[1] = (-5 + 4.5 * t) * t * hy_1; \
1064  t--; \
1065  yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1066  dyphi[2] = (-5 - 4.5 * t) * t * hy_1; \
1067  t--; \
1068  yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1069  dyphi[3] = (1.5 * t + 2) * (2 + t) * hy_1; \
1070 
1071  t = rz_hz - (double) klo;
1072  zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1073  dzphi[0] = (1.5 * t - 2) * (2 - t) * hz_1; \
1074  t--; \
1075  zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1076  dzphi[1] = (-5 + 4.5 * t) * t * hz_1; \
1077  t--; \
1078  zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1079  dzphi[2] = (-5 - 4.5 * t) * t * hz_1; \
1080  t--; \
1081  zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1082  dzphi[3] = (1.5 * t + 2) * (2 + t) * hz_1; \
1083 
1084 #endif
1085 
1086  /* test to see if stencil is within edges of grid */
1087  iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1088  ja <= jlo && (jlo+(s_size-1)) <= jb &&
1089  ka <= klo && (klo+(s_size-1)) <= kb );
1090 
1091  if ( iswithin ) { /* no wrapping needed */
1092 
1093  /* determine force on atom from cube of grid point potentials */
1094  fx = fy = fz = 0;
1095  for (k = 0; k < s_size; k++) {
1096  koff = (k + klo) * nj;
1097  for (j = 0; j < s_size; j++) {
1098  jkoff = (koff + (j + jlo)) * ni;
1099  cx = yphi[j] * zphi[k];
1100  cy = dyphi[j] * zphi[k];
1101  cz = yphi[j] * dzphi[k];
1102  for (i = 0; i < s_size; i++) {
1103  index = jkoff + (i + ilo);
1104  GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo);
1105  ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index);
1106  fx += eh[index] * dxphi[i] * cx;
1107  fy += eh[index] * xphi[i] * cy;
1108  fz += eh[index] * xphi[i] * cz;
1109  }
1110  }
1111  }
1112  } /* if */
1113 
1114  else { /* requires wrapping around grid */
1115  int ip, jp, kp;
1116 
1117  /* adjust ilo, jlo, klo so they are within grid indexing */
1118  if (ispx) {
1119  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1120  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
1121  }
1122  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1123  return NL_MSM_ERROR_RANGE;
1124  }
1125 
1126  if (ispy) {
1127  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1128  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
1129  }
1130  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1131  return NL_MSM_ERROR_RANGE;
1132  }
1133 
1134  if (ispz) {
1135  if (klo < ka) do { klo += nk; } while (klo < ka);
1136  else if (klo > kb) do { klo -= nk; } while (klo > kb);
1137  }
1138  else if (klo < ka || (klo+(s_size-1)) > kb) {
1139  return NL_MSM_ERROR_RANGE;
1140  }
1141 
1142  /* determine force on atom from cube of grid point potentials, wrapping */
1143  fx = fy = fz = 0;
1144  for (k = 0, kp = klo; k < s_size; k++, kp++) {
1145  if (kp > kb) kp = ka; /* wrap stencil around grid */
1146  koff = kp * nj;
1147  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1148  if (jp > jb) jp = ja; /* wrap stencil around grid */
1149  jkoff = (koff + jp) * ni;
1150  cx = yphi[j] * zphi[k];
1151  cy = dyphi[j] * zphi[k];
1152  cz = yphi[j] * dzphi[k];
1153  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1154  if (ip > ib) ip = ia; /* wrap stencil around grid */
1155  index = jkoff + ip;
1156  GRID_INDEX_CHECK(ehgrid, ip, jp, kp);
1157  ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index);
1158  fx += eh[index] * dxphi[i] * cx;
1159  fy += eh[index] * xphi[i] * cy;
1160  fz += eh[index] * xphi[i] * cz;
1161  }
1162  }
1163  }
1164  } /* else */
1165 
1166  /* update force */
1167  f[3*n ] -= q * fx;
1168  f[3*n+1] -= q * fy;
1169  f[3*n+2] -= q * fz;
1170 // printf("force[%d] = %g %g %g\n", n, -q*fx, -q*fy, -q*fz);
1171 
1172  } /* end loop over atoms */
1173 
1174  /* compute potential, subtract self potential */
1175  u = 0;
1176  if (1) {
1177  for (n = 0; n < natoms; n++) {
1178  double q = atom[4*n + 3];
1179  u += q * q;
1180  }
1181  u *= -msm->gzero;
1182  }
1183  for (index = 0; index < nn; index++) {
1184  u += qbuf[index] * ebuf[index];
1185  }
1186  msm->uelec += 0.5 * u;
1187 // printf("MSM long-range potential: %g\n", 0.5 * u);
1188 
1189  return NL_MSM_SUCCESS;
1190 } /* interpolation() */
1191 
1192 
1193 int restriction_factored(NL_Msm *msm, int level) {
1194  /* grids of charge, finer grid and coarser grid */
1195  const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
1196  const double *qh = qhgrid->data;
1197  NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]);
1198  double *q2h = q2hgrid->data;
1199 
1200  /* finer grid index ranges and dimensions */
1201  const int ia1 = qhgrid->i0; /* lowest x-index */
1202  const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */
1203  const int ja1 = qhgrid->j0; /* lowest y-index */
1204  const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */
1205  const int ka1 = qhgrid->k0; /* lowest z-index */
1206  const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */
1207  const int ni1 = qhgrid->ni; /* length along x-dim */
1208  const int nj1 = qhgrid->nj; /* length along y-dim */
1209  const int nk1 = qhgrid->nk; /* length along z-dim */
1210 
1211  /* coarser grid index ranges and dimensions */
1212  const int ia2 = q2hgrid->i0; /* lowest x-index */
1213  const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */
1214  const int ja2 = q2hgrid->j0; /* lowest y-index */
1215  const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */
1216  const int ka2 = q2hgrid->k0; /* lowest z-index */
1217  const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */
1218  const int nrow_q2 = q2hgrid->ni;
1219  const int nstride_q2 = nrow_q2 * q2hgrid->nj;
1220 
1221  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1222  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1223  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1224 
1225  /* set buffer using indexing offset, so that indexing matches qh grid */
1226  double *qzd = msm->lzd + (-ka1);
1227  double *qyzd = msm->lyzd + (-ka1*nj1 + -ja1);
1228  double qsum;
1229 
1230  const double *phi = NULL;
1231 
1232  int i2, j2, k2;
1233  int im, jm, km;
1234  int i, j, k;
1235  int index_plane_q2, index_q2;
1236  int index_jk, offset_k;
1237  int offset;
1238 
1239  const double *phi_factored = PhiStencilFactored[msm->approx];
1240  const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */
1241  const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */
1242 
1243  for (i2 = ia2; i2 <= ib2; i2++) {
1244 
1245  for (k = ka1; k <= kb1; k++) {
1246  offset_k = k * nj1;
1247 
1248  for (j = ja1; j <= jb1; j++) {
1249  index_jk = offset_k + j;
1250  offset = index_jk * ni1;
1251  im = (i2 << 1); /* = 2*i2 */
1252  qsum = 0;
1253  if ( ! ispx ) { /* nonperiodic */
1254  int lower = im - r_stencil;
1255  int upper = im + r_stencil;
1256  if (lower < ia1) lower = ia1;
1257  if (upper > ib1) upper = ib1; /* clip edges */
1258  phi = phi_factored + r_stencil; /* center of stencil */
1259  for (i = lower; i <= upper; i++) {
1260  qsum += phi[i-im] * qh[offset + i];
1261  }
1262  }
1263  else { /* periodic */
1264  int ip = im - r_stencil; /* index at left end of stencil */
1265  if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */
1266  phi = phi_factored; /* left end of stencil */
1267  for (i = 0; i < diam_stencil; i++, ip++) {
1268  if (ip > ib1) ip = ia1; /* wrap around edge of grid */
1269  qsum += phi[i] * qh[offset + ip];
1270  }
1271  }
1272  qyzd[index_jk] = qsum;
1273  } /* for j */
1274 
1275  } /* for k */
1276 
1277  for (j2 = ja2; j2 <= jb2; j2++) {
1278  index_plane_q2 = j2 * nrow_q2 + i2;
1279 
1280  for (k = ka1; k <= kb1; k++) {
1281  offset = k * nj1;
1282  jm = (j2 << 1); /* = 2*j2 */
1283  qsum = 0;
1284  if ( ! ispy ) { /* nonperiodic */
1285  int lower = jm - r_stencil;
1286  int upper = jm + r_stencil;
1287  if (lower < ja1) lower = ja1;
1288  if (upper > jb1) upper = jb1; /* clip edges */
1289  phi = phi_factored + r_stencil; /* center of stencil */
1290  for (j = lower; j <= upper; j++) {
1291  qsum += phi[j-jm] * qyzd[offset + j];
1292  }
1293  }
1294  else { /* periodic */
1295  int jp = jm - r_stencil; /* index at left end of stencil */
1296  if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */
1297  phi = phi_factored; /* left end of stencil */
1298  for (j = 0; j < diam_stencil; j++, jp++) {
1299  if (jp > jb1) jp = ja1; /* wrap around edge of grid */
1300  qsum += phi[j] * qyzd[offset + jp];
1301  }
1302  }
1303  qzd[k] = qsum;
1304  } /* for k */
1305 
1306  for (k2 = ka2; k2 <= kb2; k2++) {
1307  index_q2 = k2 * nstride_q2 + index_plane_q2;
1308  km = (k2 << 1); /* = 2*k2 */
1309  qsum = 0;
1310  if ( ! ispz ) { /* nonperiodic */
1311  int lower = km - r_stencil;
1312  int upper = km + r_stencil;
1313  if (lower < ka1) lower = ka1;
1314  if (upper > kb1) upper = kb1; /* clip edges */
1315  phi = phi_factored + r_stencil; /* center of stencil */
1316  for (k = lower; k <= upper; k++) {
1317  qsum += phi[k-km] * qzd[k];
1318  }
1319  }
1320  else { /* periodic */
1321  int kp = km - r_stencil; /* index at left end of stencil */
1322  if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */
1323  phi = phi_factored; /* left end of stencil */
1324  for (k = 0; k < diam_stencil; k++, kp++) {
1325  if (kp > kb1) kp = ka1; /* wrap around edge of grid */
1326  qsum += phi[k] * qzd[kp];
1327  }
1328  }
1329  q2h[index_q2] = qsum;
1330  } /* for k2 */
1331 
1332  } /* for j2 */
1333 
1334  } /* for i2 */
1335 
1336  return NL_MSM_SUCCESS;
1337 } /* restriction_factored */
1338 
1339 
1340 int prolongation_factored(NL_Msm *msm, int level) {
1341  /* grids of potential, finer grid and coarser grid */
1342  NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
1343  double *eh = ehgrid->data;
1344  const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]);
1345  const double *e2h = e2hgrid->data;
1346 
1347  /* finer grid index ranges and dimensions */
1348  const int ia1 = ehgrid->i0; /* lowest x-index */
1349  const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */
1350  const int ja1 = ehgrid->j0; /* lowest y-index */
1351  const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */
1352  const int ka1 = ehgrid->k0; /* lowest z-index */
1353  const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */
1354  const int ni1 = ehgrid->ni; /* length along x-dim */
1355  const int nj1 = ehgrid->nj; /* length along y-dim */
1356  const int nk1 = ehgrid->nk; /* length along z-dim */
1357 
1358  /* coarser grid index ranges and dimensions */
1359  const int ia2 = e2hgrid->i0; /* lowest x-index */
1360  const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */
1361  const int ja2 = e2hgrid->j0; /* lowest y-index */
1362  const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */
1363  const int ka2 = e2hgrid->k0; /* lowest z-index */
1364  const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */
1365  const int nrow_e2 = e2hgrid->ni;
1366  const int nstride_e2 = nrow_e2 * e2hgrid->nj;
1367 
1368  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1369  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1370  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1371 
1372  /* set buffer using indexing offset, so that indexing matches eh grid */
1373  double *ezd = msm->lzd + (-ka1);
1374  double *eyzd = msm->lyzd + (-ka1*nj1 + -ja1);
1375 
1376  const size_t size_lzd = nk1 * sizeof(double);
1377  const size_t size_lyzd = nj1 * nk1 * sizeof(double);
1378 
1379  const double *phi = NULL;
1380 
1381  int i2, j2, k2;
1382  int im, jm, km;
1383  int i, j, k;
1384  int index_plane_e2, index_e2;
1385  int index_jk, offset_k;
1386  int offset;
1387 
1388  const double *phi_factored = PhiStencilFactored[msm->approx];
1389  const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */
1390  const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */
1391 
1392  for (i2 = ia2; i2 <= ib2; i2++) {
1393  memset(msm->lyzd, 0, size_lyzd);
1394 
1395  for (j2 = ja2; j2 <= jb2; j2++) {
1396  memset(msm->lzd, 0, size_lzd);
1397  index_plane_e2 = j2 * nrow_e2 + i2;
1398 
1399  for (k2 = ka2; k2 <= kb2; k2++) {
1400  index_e2 = k2 * nstride_e2 + index_plane_e2;
1401  km = (k2 << 1); /* = 2*k2 */
1402  if ( ! ispz ) { /* nonperiodic */
1403  int lower = km - r_stencil;
1404  int upper = km + r_stencil;
1405  if (lower < ka1) lower = ka1;
1406  if (upper > kb1) upper = kb1; /* clip edges */
1407  phi = phi_factored + r_stencil; /* center of stencil */
1408  for (k = lower; k <= upper; k++) {
1409  ezd[k] += phi[k-km] * e2h[index_e2];
1410  }
1411  }
1412  else { /* periodic */
1413  int kp = km - r_stencil; /* index at left end of stencil */
1414  if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */
1415  phi = phi_factored; /* left end of stencil */
1416  for (k = 0; k < diam_stencil; k++, kp++) {
1417  if (kp > kb1) kp = ka1; /* wrap around edge of grid */
1418  ezd[kp] += phi[k] * e2h[index_e2];
1419  }
1420  }
1421  } /* for k2 */
1422 
1423  for (k = ka1; k <= kb1; k++) {
1424  offset = k * nj1;
1425  jm = (j2 << 1); /* = 2*j2 */
1426  if ( ! ispy ) { /* nonperiodic */
1427  int lower = jm - r_stencil;
1428  int upper = jm + r_stencil;
1429  if (lower < ja1) lower = ja1;
1430  if (upper > jb1) upper = jb1; /* clip edges */
1431  phi = phi_factored + r_stencil; /* center of stencil */
1432  for (j = lower; j <= upper; j++) {
1433  eyzd[offset + j] += phi[j-jm] * ezd[k];
1434  }
1435  }
1436  else { /* periodic */
1437  int jp = jm - r_stencil; /* index at left end of stencil */
1438  if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */
1439  phi = phi_factored; /* left end of stencil */
1440  for (j = 0; j < diam_stencil; j++, jp++) {
1441  if (jp > jb1) jp = ja1; /* wrap around edge of grid */
1442  eyzd[offset + jp] += phi[j] * ezd[k];
1443  }
1444  }
1445  } /* for k */
1446 
1447  } /* for j2 */
1448 
1449  for (k = ka1; k <= kb1; k++) {
1450  offset_k = k * nj1;
1451 
1452  for (j = ja1; j <= jb1; j++) {
1453  index_jk = offset_k + j;
1454  offset = index_jk * ni1;
1455  im = (i2 << 1); /* = 2*i2 */
1456  if ( ! ispx ) { /* nonperiodic */
1457  int lower = im - r_stencil;
1458  int upper = im + r_stencil;
1459  if (lower < ia1) lower = ia1;
1460  if (upper > ib1) upper = ib1; /* clip edges */
1461  phi = phi_factored + r_stencil; /* center of stencil */
1462  for (i = lower; i <= upper; i++) {
1463  eh[offset + i] += phi[i-im] * eyzd[index_jk];
1464  }
1465  }
1466  else { /* periodic */
1467  int ip = im - r_stencil; /* index at left end of stencil */
1468  if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */
1469  phi = phi_factored; /* left end of stencil */
1470  for (i = 0; i < diam_stencil; i++, ip++) {
1471  if (ip > ib1) ip = ia1; /* wrap around edge of grid */
1472  eh[offset + ip] += phi[i] * eyzd[index_jk];
1473  }
1474  }
1475  } /* for j */
1476 
1477  } /* for k */
1478 
1479  } /* for i2 */
1480 
1481  return NL_MSM_SUCCESS;
1482 } /* prolongation_factored */
1483 
1484 
1485 int restriction(NL_Msm *msm, int level) {
1486  /* grids of charge, finer grid and coarser grid */
1487  const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
1488  const double *qh = qhgrid->data; /* index the offset data buffer */
1489  NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]);
1490  double *q2h_buffer = q2hgrid->buffer; /* index the raw buffer */
1491 
1492  /* finer grid index ranges and dimensions */
1493  const int ia1 = qhgrid->i0; /* lowest x-index */
1494  const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */
1495  const int ja1 = qhgrid->j0; /* lowest y-index */
1496  const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */
1497  const int ka1 = qhgrid->k0; /* lowest z-index */
1498  const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */
1499  const int ni1 = qhgrid->ni; /* length along x-dim */
1500  const int nj1 = qhgrid->nj; /* length along y-dim */
1501  const int nk1 = qhgrid->nk; /* length along z-dim */
1502 
1503  /* coarser grid index ranges and dimensions */
1504  const int ia2 = q2hgrid->i0; /* lowest x-index */
1505  const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */
1506  const int ja2 = q2hgrid->j0; /* lowest y-index */
1507  const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */
1508  const int ka2 = q2hgrid->k0; /* lowest z-index */
1509  const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */
1510 
1511  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1512  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1513  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1514 
1515  const int nstencil = Nstencil[msm->approx];
1516  const int *offset = IndexOffset[msm->approx];
1517  const double *phi = PhiStencil[msm->approx];
1518 
1519  double q2h_sum, cjk;
1520 
1521  int i1, j1, k1, index1, jk1off, k1off;
1522  int i2, j2, k2;
1523  int index2;
1524  int i, j, k;
1525 
1526  for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1527  k1 = k2 * 2;
1528  for (j2 = ja2; j2 <= jb2; j2++) {
1529  j1 = j2 * 2;
1530  for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1531  i1 = i2 * 2;
1532 
1533  q2h_sum = 0;
1534  for (k = 0; k < nstencil; k++) {
1535  k1off = k1 + offset[k];
1536  if (k1off < ka1) {
1537  if (ispz) do { k1off += nk1; } while (k1off < ka1);
1538  else continue;
1539  }
1540  else if (k1off > kb1) {
1541  if (ispz) do { k1off -= nk1; } while (k1off > kb1);
1542  else break;
1543  }
1544  k1off *= nj1;
1545  for (j = 0; j < nstencil; j++) {
1546  jk1off = j1 + offset[j];
1547  if (jk1off < ja1) {
1548  if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
1549  else continue;
1550  }
1551  else if (jk1off > jb1) {
1552  if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
1553  else break;
1554  }
1555  jk1off = (k1off + jk1off) * ni1;
1556  cjk = phi[j] * phi[k];
1557  for (i = 0; i < nstencil; i++) {
1558  index1 = i1 + offset[i];
1559  if (index1 < ia1) {
1560  if (ispx) do { index1 += ni1; } while (index1 < ia1);
1561  else continue;
1562  }
1563  else if (index1 > ib1) {
1564  if (ispx) do { index1 -= ni1; } while (index1 > ib1);
1565  else break;
1566  }
1567  index1 += jk1off;
1568  q2h_sum += qh[index1] * phi[i] * cjk;
1569  }
1570  }
1571  } /* end loop over finer grid stencil */
1572 
1573  q2h_buffer[index2] = q2h_sum; /* store charge to coarser grid */
1574  }
1575  }
1576  } /* end loop over each coarser grid point */
1577 
1578  return NL_MSM_SUCCESS;
1579 }
1580 
1581 
1582 int prolongation(NL_Msm *msm, int level) {
1583  /* grids of charge, finer grid and coarser grid */
1584  NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
1585  double *eh = ehgrid->data; /* index the offset data buffer */
1586  const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]);
1587  const double *e2h_buffer = e2hgrid->buffer; /* index the raw buffer */
1588 
1589  /* finer grid index ranges and dimensions */
1590  const int ia1 = ehgrid->i0; /* lowest x-index */
1591  const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */
1592  const int ja1 = ehgrid->j0; /* lowest y-index */
1593  const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */
1594  const int ka1 = ehgrid->k0; /* lowest z-index */
1595  const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */
1596  const int ni1 = ehgrid->ni; /* length along x-dim */
1597  const int nj1 = ehgrid->nj; /* length along y-dim */
1598  const int nk1 = ehgrid->nk; /* length along z-dim */
1599 
1600  /* coarser grid index ranges and dimensions */
1601  const int ia2 = e2hgrid->i0; /* lowest x-index */
1602  const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */
1603  const int ja2 = e2hgrid->j0; /* lowest y-index */
1604  const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */
1605  const int ka2 = e2hgrid->k0; /* lowest z-index */
1606  const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */
1607 
1608  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1609  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1610  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1611 
1612  const int nstencil = Nstencil[msm->approx];
1613  const int *offset = IndexOffset[msm->approx];
1614  const double *phi = PhiStencil[msm->approx];
1615 
1616  double cjk;
1617 
1618  int i1, j1, k1, index1, jk1off, k1off;
1619  int i2, j2, k2;
1620  int index2;
1621  int i, j, k;
1622 
1623  for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1624  k1 = k2 * 2;
1625  for (j2 = ja2; j2 <= jb2; j2++) {
1626  j1 = j2 * 2;
1627  for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1628  i1 = i2 * 2;
1629 
1630  for (k = 0; k < nstencil; k++) {
1631  k1off = k1 + offset[k];
1632  if (k1off < ka1) {
1633  if (ispz) do { k1off += nk1; } while (k1off < ka1);
1634  else continue;
1635  }
1636  else if (k1off > kb1) {
1637  if (ispz) do { k1off -= nk1; } while (k1off > kb1);
1638  else break;
1639  }
1640  k1off *= nj1;
1641  for (j = 0; j < nstencil; j++) {
1642  jk1off = j1 + offset[j];
1643  if (jk1off < ja1) {
1644  if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
1645  else continue;
1646  }
1647  else if (jk1off > jb1) {
1648  if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
1649  else break;
1650  }
1651  jk1off = (k1off + jk1off) * ni1;
1652  cjk = phi[j] * phi[k];
1653  for (i = 0; i < nstencil; i++) {
1654  index1 = i1 + offset[i];
1655  if (index1 < ia1) {
1656  if (ispx) do { index1 += ni1; } while (index1 < ia1);
1657  else continue;
1658  }
1659  else if (index1 > ib1) {
1660  if (ispx) do { index1 -= ni1; } while (index1 > ib1);
1661  else break;
1662  }
1663  index1 += jk1off;
1664  eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
1665  }
1666  }
1667  } /* end loop over finer grid stencil */
1668 
1669  }
1670  }
1671  } /* end loop over each coarser grid point */
1672 
1673  return NL_MSM_SUCCESS;
1674 }
1675 
1676 
1677 int gridcutoff(NL_Msm *msm, int level)
1678 {
1679  double eh_sum;
1680 
1681  /* grids of charge and potential */
1682  const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
1683  const double *qh = qhgrid->data;
1684  NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
1685  double *eh = ehgrid->data;
1686  const int ia = qhgrid->i0; /* lowest x-index */
1687  const int ib = ia + qhgrid->ni - 1; /* highest x-index */
1688  const int ja = qhgrid->j0; /* lowest y-index */
1689  const int jb = ja + qhgrid->nj - 1; /* highest y-index */
1690  const int ka = qhgrid->k0; /* lowest z-index */
1691  const int kb = ka + qhgrid->nk - 1; /* highest z-index */
1692  const int ni = qhgrid->ni; /* length along x-dim */
1693  const int nj = qhgrid->nj; /* length along y-dim */
1694  const int nk = qhgrid->nk; /* length along z-dim */
1695 
1696  /* grid of weights for pairwise grid point interactions within cutoff */
1697  const NL_Msmgrid_double *gcgrid = &(msm->gc[level]);
1698  const double *gc = gcgrid->data;
1699  const int gia = gcgrid->i0; /* lowest x-index */
1700  const int gib = gia + gcgrid->ni - 1; /* highest x-index */
1701  const int gja = gcgrid->j0; /* lowest y-index */
1702  const int gjb = gja + gcgrid->nj - 1; /* highest y-index */
1703  const int gka = gcgrid->k0; /* lowest z-index */
1704  const int gkb = gka + gcgrid->nk - 1; /* highest z-index */
1705  const int gni = gcgrid->ni; /* length along x-dim */
1706  const int gnj = gcgrid->nj; /* length along y-dim */
1707 
1708  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1709  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1710  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1711 
1712  const int ispnone = !(ispx || ispy || ispz);
1713 
1714  int i, j, k;
1715  int gia_clip, gib_clip;
1716  int gja_clip, gjb_clip;
1717  int gka_clip, gkb_clip;
1718  int koff;
1719  long jkoff, index;
1720  int id, jd, kd;
1721  int knoff;
1722  long jknoff, nindex;
1723  int kgoff, jkgoff, ngindex;
1724 
1725  if ( ispnone ) { /* non-periodic boundaries */
1726 
1727  /* loop over all grid points */
1728  for (k = ka; k <= kb; k++) {
1729 
1730  /* clip gc ranges to keep offset for k index within grid */
1731  gka_clip = (k + gka < ka ? ka - k : gka);
1732  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1733 
1734  koff = k * nj; /* find eh flat index */
1735 
1736  for (j = ja; j <= jb; j++) {
1737 
1738  /* clip gc ranges to keep offset for j index within grid */
1739  gja_clip = (j + gja < ja ? ja - j : gja);
1740  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1741 
1742  jkoff = (koff + j) * ni; /* find eh flat index */
1743 
1744  for (i = ia; i <= ib; i++) {
1745 
1746  /* clip gc ranges to keep offset for i index within grid */
1747  gia_clip = (i + gia < ia ? ia - i : gia);
1748  gib_clip = (i + gib > ib ? ib - i : gib);
1749 
1750  index = jkoff + i; /* eh flat index */
1751 
1752  /* sum over "sphere" of weighted charge */
1753  eh_sum = 0;
1754  for (kd = gka_clip; kd <= gkb_clip; kd++) {
1755  knoff = (k + kd) * nj; /* find qh flat index */
1756  kgoff = kd * gnj; /* find gc flat index */
1757 
1758  for (jd = gja_clip; jd <= gjb_clip; jd++) {
1759  jknoff = (knoff + (j + jd)) * ni; /* find qh flat index */
1760  jkgoff = (kgoff + jd) * gni; /* find gc flat index */
1761 
1762  for (id = gia_clip; id <= gib_clip; id++) {
1763  nindex = jknoff + (i + id); /* qh flat index */
1764  ngindex = jkgoff + id; /* gc flat index */
1765 
1766  GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd);
1767  ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex);
1768 
1769  GRID_INDEX_CHECK(gcgrid, id, jd, kd);
1770  ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
1771 
1772  eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */
1773  }
1774  }
1775  } /* end loop over "sphere" of charge */
1776 
1777  GRID_INDEX_CHECK(ehgrid, i, j, k);
1778  ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
1779  eh[index] = eh_sum; /* store potential */
1780  }
1781  }
1782  } /* end loop over all grid points */
1783 
1784  } /* if nonperiodic boundaries */
1785  else {
1786  /* some boundary is periodic */
1787  int ilo, jlo, klo;
1788  int ip, jp, kp;
1789 
1790  /* loop over all grid points */
1791  for (k = ka; k <= kb; k++) {
1792  klo = k + gka;
1793  if ( ! ispz ) { /* nonperiodic z */
1794  /* clip gc ranges to keep offset for k index within grid */
1795  gka_clip = (k + gka < ka ? ka - k : gka);
1796  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1797  if (klo < ka) klo = ka; /* keep lowest qh index within grid */
1798  }
1799  else { /* periodic z */
1800  gka_clip = gka;
1801  gkb_clip = gkb;
1802  if (klo < ka) do { klo += nk; } while (klo < ka);
1803  }
1804  ASSERT(klo <= kb);
1805 
1806  koff = k * nj; /* find eh flat index */
1807 
1808  for (j = ja; j <= jb; j++) {
1809  jlo = j + gja;
1810  if ( ! ispy ) { /* nonperiodic y */
1811  /* clip gc ranges to keep offset for j index within grid */
1812  gja_clip = (j + gja < ja ? ja - j : gja);
1813  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1814  if (jlo < ja) jlo = ja; /* keep lowest qh index within grid */
1815  }
1816  else { /* periodic y */
1817  gja_clip = gja;
1818  gjb_clip = gjb;
1819  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1820  }
1821  ASSERT(jlo <= jb);
1822 
1823  jkoff = (koff + j) * ni; /* find eh flat index */
1824 
1825  for (i = ia; i <= ib; i++) {
1826  ilo = i + gia;
1827  if ( ! ispx ) { /* nonperiodic x */
1828  /* clip gc ranges to keep offset for i index within grid */
1829  gia_clip = (i + gia < ia ? ia - i : gia);
1830  gib_clip = (i + gib > ib ? ib - i : gib);
1831  if (ilo < ia) ilo = ia; /* keep lowest qh index within grid */
1832  }
1833  else { /* periodic x */
1834  gia_clip = gia;
1835  gib_clip = gib;
1836  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1837  }
1838  ASSERT(ilo <= ib);
1839 
1840  index = jkoff + i; /* eh flat index */
1841 
1842  /* sum over "sphere" of weighted charge */
1843  eh_sum = 0;
1844  for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
1845  /* clipping makes conditional always fail for nonperiodic */
1846  if (kp > kb) kp = ka; /* wrap z direction */
1847  knoff = kp * nj; /* find qh flat index */
1848  kgoff = kd * gnj; /* find gc flat index */
1849 
1850  for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
1851  /* clipping makes conditional always fail for nonperiodic */
1852  if (jp > jb) jp = ja; /* wrap y direction */
1853  jknoff = (knoff + jp) * ni; /* find qh flat index */
1854  jkgoff = (kgoff + jd) * gni; /* find gc flat index */
1855 
1856  for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) {
1857  /* clipping makes conditional always fail for nonperiodic */
1858  if (ip > ib) ip = ia; /* wrap x direction */
1859  nindex = jknoff + ip; /* qh flat index */
1860  ngindex = jkgoff + id; /* gc flat index */
1861 
1862  GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
1863  ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex);
1864 
1865  GRID_INDEX_CHECK(gcgrid, id, jd, kd);
1866  ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
1867 
1868  eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */
1869 
1870  }
1871  }
1872  } /* end loop over "sphere" of charge */
1873 
1874  GRID_INDEX_CHECK(ehgrid, i, j, k);
1875  ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
1876  eh[index] = eh_sum; /* store potential */
1877  }
1878  }
1879  } /* end loop over all grid points */
1880 
1881  } /* else some boundary is periodic */
1882 
1883  return NL_MSM_SUCCESS;
1884 }
double gz
Definition: msm_defn.h:647
double wkf_timer_time(wkf_timerhandle v)
Definition: wkfutils.c:134
static int prolongation_factored(NL_Msm *, int level)
Definition: msm_longrng.c:1340
double gzero
Definition: msm_defn.h:650
int msmflags
Definition: msm_defn.h:590
NL_Msmgrid_double * eh
Definition: msm_defn.h:667
NL_Msmgrid_double * gc
Definition: msm_defn.h:668
double * felec
Definition: msm_defn.h:568
static int restriction_factored(NL_Msm *, int level)
Definition: msm_longrng.c:1193
double * lzd
Definition: msm_defn.h:677
static const int Nstencil[NL_MSM_APPROX_END]
Definition: msm_longrng.c:208
static int anterpolation(NL_Msm *)
Definition: msm_longrng.c:779
int approx
Definition: msm_defn.h:642
#define GRID_INDEX_CHECK(a, _i, _j, _k)
Definition: msm_defn.h:114
int numatoms
Definition: msm_defn.h:599
static const double PhiStencilFactored[NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1]
Definition: msm_longrng.c:170
double hy
Definition: msm_defn.h:634
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
Definition: msm_longrng.c:214
void wkf_timer_stop(wkf_timerhandle v)
Definition: wkfutils.c:129
static int restriction(NL_Msm *, int level)
Definition: msm_longrng.c:1485
double uelec
Definition: msm_defn.h:566
#define GRID_ZERO(_p)
Definition: msm_defn.h:98
#define GRID_INDEX(_p, _i, _j, _k)
Definition: msm_defn.h:66
double * lyzd
Definition: msm_defn.h:678
#define ASSERT(E)
wkf_timerhandle timer_longrng
Definition: msm_defn.h:685
NL_Msmgrid_double * qh
Definition: msm_defn.h:666
double gy
Definition: msm_defn.h:647
int NL_msm_compute_long_range(NL_Msm *pm)
Definition: msm_longrng.c:52
#define STENCIL_1D(_phi, _delta, _approx)
Definition: msm_longrng.c:280
const double * atom
Definition: msm_defn.h:569
double gx
Definition: msm_defn.h:647
static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
Definition: msm_longrng.c:242
double hz
Definition: msm_defn.h:634
static int interpolation(NL_Msm *)
Definition: msm_longrng.c:960
double hx
Definition: msm_defn.h:634
static int gridcutoff(NL_Msm *, int level)
Definition: msm_longrng.c:1677
void wkf_timer_start(wkf_timerhandle v)
Definition: wkfutils.c:124
static const int PolyDegree[NL_MSM_APPROX_END]
Definition: msm_longrng.c:163
int nlevels
Definition: msm_defn.h:645
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
Definition: msm_longrng.c:468
static int prolongation(NL_Msm *, int level)
Definition: msm_longrng.c:1582
int report_timings
Definition: msm_defn.h:683