NAMD
msm_longrng_sprec.c
Go to the documentation of this file.
1 
41 #include "msm_defn.h"
42 
43 static int anterpolation(NL_Msm *);
44 static int interpolation(NL_Msm *);
45 static int restriction(NL_Msm *, int level);
46 static int prolongation(NL_Msm *, int level);
47 static int restriction_factored(NL_Msm *, int level);
48 static int prolongation_factored(NL_Msm *, int level);
49 static int gridcutoff(NL_Msm *, int level);
50 
51 
53  double time_delta;
54  int rc = 0;
55  int level;
56  int nlevels = msm->nlevels;
57  int use_cuda_gridcutoff = (msm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF);
58  int fallback_cpu = (msm->msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK);
59  int use_nonfactored = (msm->msmflags & NL_MSM_COMPUTE_NONFACTORED);
60 
62  rc = anterpolation(msm);
63  if (rc) return rc;
65  time_delta = wkf_timer_time(msm->timer_longrng);
66  if (msm->report_timings) {
67  printf("MSM anterpolation: %6.3f sec\n", time_delta);
68  }
69 
71  if (use_nonfactored) {
72  for (level = 0; level < nlevels - 1; level++) {
73  rc = restriction(msm, level);
74  if (rc) return rc;
75  }
76  }
77  else {
78  for (level = 0; level < nlevels - 1; level++) {
79  rc = restriction_factored(msm, level);
80  if (rc) return rc;
81  }
82  }
84  time_delta = wkf_timer_time(msm->timer_longrng);
85  if (msm->report_timings) {
86  printf("MSM restriction: %6.3f sec\n", time_delta);
87  }
88 
90  if (use_cuda_gridcutoff && nlevels > 1) {
91 #ifdef NL_USE_CUDA
92  if ((rc = NL_msm_cuda_condense_qgrids(msm)) != NL_MSM_SUCCESS ||
93  (rc = NL_msm_cuda_compute_gridcutoff(msm)) != NL_MSM_SUCCESS ||
94  (rc = NL_msm_cuda_expand_egrids(msm)) != NL_MSM_SUCCESS) {
95  if (fallback_cpu) {
96  printf("Falling back on CPU for grid cutoff computation\n");
97  use_cuda_gridcutoff = 0;
98  }
99  else return rc;
100  }
101 #else
102  if (fallback_cpu) {
103  printf("Falling back on CPU for grid cutoff computation\n");
104  use_cuda_gridcutoff = 0;
105  }
106  else return NL_MSM_ERROR_SUPPORT;
107 #endif
108  }
109 
110  if ( ! use_cuda_gridcutoff ) {
111  for (level = 0; level < nlevels - 1; level++) {
112  rc = gridcutoff(msm, level);
113  if (rc) return rc;
114  }
115  }
116 
117  rc = gridcutoff(msm, level); /* top level */
118  if (rc) return rc;
119 
121  time_delta = wkf_timer_time(msm->timer_longrng);
122  if (msm->report_timings) {
123  printf("MSM grid cutoff: %6.3f sec\n", time_delta);
124  }
125 
127  if (use_nonfactored) {
128  for (level--; level >= 0; level--) {
129  rc = prolongation(msm, level);
130  if (rc) return rc;
131  }
132  }
133  else {
134  for (level--; level >= 0; level--) {
135  rc = prolongation_factored(msm, level);
136  if (rc) return rc;
137  }
138  }
140  time_delta = wkf_timer_time(msm->timer_longrng);
141  if (msm->report_timings) {
142  printf("MSM prolongation: %6.3f sec\n", time_delta);
143  }
144 
146  rc = interpolation(msm);
147  if (rc) return rc;
149  time_delta = wkf_timer_time(msm->timer_longrng);
150  if (msm->report_timings) {
151  printf("MSM interpolation: %6.3f sec\n", time_delta);
152  }
153 
154  return 0;
155 }
156 
157 
159 enum { MAX_POLY_DEGREE = 9 };
160 
163 static const int PolyDegree[NL_MSM_APPROX_END] = {
164  3, 5, 5, 7, 7, 9, 9, 3,
165 };
166 
169 static const float
171  /* cubic */
172  {-1.f/16, 0, 9.f/16, 1, 9.f/16, 0, -1.f/16},
173 
174  /* quintic C1 */
175  {3.f/256, 0, -25.f/256, 0, 75.f/128, 1, 75.f/128, 0, -25.f/256, 0, 3.f/256},
176 
177  /* quintic C2 (same as quintic C1) */
178  {3.f/256, 0, -25.f/256, 0, 75.f/128, 1, 75.f/128, 0, -25.f/256, 0, 3.f/256},
179 
180  /* septic C1 */
181  { -5.f/2048, 0, 49.f/2048, 0, -245.f/2048, 0, 1225.f/2048, 1, 1225.f/2048,
182  0, -245.f/2048, 0, 49.f/2048, 0, -5.f/2048 },
183 
184  /* septic C3 (same as septic C3) */
185  { -5.f/2048, 0, 49.f/2048, 0, -245.f/2048, 0, 1225.f/2048, 1, 1225.f/2048,
186  0, -245.f/2048, 0, 49.f/2048, 0, -5.f/2048 },
187 
188  /* nonic C1 */
189  { 35.f/65536, 0, -405.f/65536, 0, 567.f/16384, 0, -2205.f/16384, 0,
190  19845.f/32768, 1, 19845.f/32768, 0, -2205.f/16384, 0, 567.f/16384, 0,
191  -405.f/65536, 0, 35.f/65536 },
192 
193  /* nonic C4 (same as nonic C1) */
194  { 35.f/65536, 0, -405.f/65536, 0, 567.f/16384, 0, -2205.f/16384, 0,
195  19845.f/32768, 1, 19845.f/32768, 0, -2205.f/16384, 0, 567.f/16384, 0,
196  -405.f/65536, 0, 35.f/65536 },
197 
198  /* bspline */
199  { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
200 };
201 
202 
205 enum { MAX_NSTENCIL = 11 };
206 
208 static const int Nstencil[NL_MSM_APPROX_END] = {
209  5, 7, 7, 9, 9, 11, 11, 7
210 };
211 
215  /* cubic */
216  {-3, -1, 0, 1, 3},
217 
218  /* quintic C1 */
219  {-5, -3, -1, 0, 1, 3, 5},
220 
221  /* quintic C2 (same as quintic C1) */
222  {-5, -3, -1, 0, 1, 3, 5},
223 
224  /* septic C1 */
225  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
226 
227  /* septic C3 (same as septic C3) */
228  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
229 
230  /* nonic C1 */
231  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
232 
233  /* nonic C4 (same as nonic C1) */
234  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
235 
236  /* bspline */
237  {-3, -2, -1, 0, 1, 2, 3},
238 };
239 
242 static const float PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
243  /* cubic */
244  {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
245 
246  /* quintic C1 */
247  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
248 
249  /* quintic C2 (same as quintic C1) */
250  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
251 
252  /* septic C1 */
253  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
254  -245.f/2048, 49.f/2048, -5.f/2048 },
255 
256  /* septic C3 (same as septic C3) */
257  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
258  -245.f/2048, 49.f/2048, -5.f/2048 },
259 
260  /* nonic C1 */
261  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
262  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
263  -405.f/65536, 35.f/65536 },
264 
265  /* nonic C4 (same as nonic C1) */
266  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
267  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
268  -405.f/65536, 35.f/65536 },
269 
270  /* bspline */
271  { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
272 };
273 
274 
280 #define STENCIL_1D(_phi, _delta, _approx) \
281  do { \
282  float *phi = _phi; \
283  float t = _delta; \
284  switch (_approx) { \
285  case NL_MSM_APPROX_CUBIC: \
286  phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
287  t--; \
288  phi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
289  t--; \
290  phi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
291  t--; \
292  phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
293  break; \
294  case NL_MSM_APPROX_QUINTIC: \
295  phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
296  t--; \
297  phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6) + t * (0.375f - (5.f/24)*t));\
298  t--; \
299  phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t)); \
300  t--; \
301  phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t)); \
302  t--; \
303  phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6) - t * (0.375f + (5.f/24)*t));\
304  t--; \
305  phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
306  break; \
307  case NL_MSM_APPROX_QUINTIC2: \
308  phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
309  t--; \
310  phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
311  t--; \
312  phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
313  t--; \
314  phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
315  t--; \
316  phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
317  t--; \
318  phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
319  break; \
320  case NL_MSM_APPROX_SEPTIC: \
321  phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
322  t--; \
323  phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
324  t--; \
325  phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
326  t--; \
327  phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
328  t--; \
329  phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
330  t--; \
331  phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
332  t--; \
333  phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
334  t--; \
335  phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
336  break; \
337  case NL_MSM_APPROX_SEPTIC3: \
338  phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633 \
339  + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240) \
340  + t*(-89.f/720))))))); \
341  t--; \
342  phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2) \
343  + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48) \
344  + t*(623.f/720))))))); \
345  t--; \
346  phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2) \
347  + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80) \
348  + t*(-623.f/240))))))); \
349  t--; \
350  phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144) \
351  + t*((-727.f/48) + t*(623.f/144))))); \
352  t--; \
353  phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144) \
354  + t*((-727.f/48) + t*(-623.f/144))))); \
355  t--; \
356  phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2) \
357  + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80) \
358  + t*(623.f/240))))))); \
359  t--; \
360  phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2) \
361  + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48) \
362  + t*(-623.f/720))))))); \
363  t--; \
364  phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633 \
365  + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240) \
366  + t*(89.f/720))))))); \
367  break; \
368  case NL_MSM_APPROX_NONIC: \
369  phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
370  (t-2)*(t-1); \
371  t--; \
372  phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
373  (-8+t*(-35+9*t)); \
374  t--; \
375  phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
376  (-14+t*(-25+9*t)); \
377  t--; \
378  phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
379  (-6+t*(-5+3*t)); \
380  t--; \
381  phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
382  (-20+t*(-5+9*t)); \
383  t--; \
384  phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
385  (-20+t*(5+9*t)); \
386  t--; \
387  phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
388  (-6+t*(5+3*t)); \
389  t--; \
390  phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
391  (-14+t*(25+9*t)); \
392  t--; \
393  phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
394  (-8+t*(35+9*t)); \
395  t--; \
396  phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
397  (t+7)*(t+8); \
398  break; \
399  case NL_MSM_APPROX_NONIC4: \
400  phi[0] = 439375.f/7+t*(-64188125.f/504+t*(231125375.f/2016 \
401  +t*(-17306975.f/288+t*(7761805.f/384+t*(-2895587.f/640 \
402  +t*(129391.f/192+t*(-259715.f/4032+t*(28909.f/8064 \
403  +t*(-3569.f/40320))))))))); \
404  t--; \
405  phi[1] = -56375+t*(8314091.f/56+t*(-49901303.f/288+t*(3763529.f/32 \
406  +t*(-19648027.f/384+t*(9469163.f/640+t*(-545977.f/192 \
407  +t*(156927.f/448+t*(-28909.f/1152 \
408  +t*(3569.f/4480))))))))); \
409  t--; \
410  phi[2] = 68776.f/7+t*(-1038011.f/28+t*(31157515.f/504+t*(-956669.f/16 \
411  +t*(3548009.f/96+t*(-2422263.f/160+t*(197255.f/48 \
412  +t*(-19959.f/28+t*(144545.f/2016 \
413  +t*(-3569.f/1120))))))))); \
414  t--; \
415  phi[3] = -154+t*(12757.f/12+t*(-230123.f/72+t*(264481.f/48 \
416  +t*(-576499.f/96+t*(686147.f/160+t*(-96277.f/48 \
417  +t*(14221.f/24+t*(-28909.f/288+t*(3569.f/480))))))))); \
418  t--; \
419  phi[4] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(-6181.f/320 \
420  +t*(6337.f/96+t*(-2745.f/32+t*(28909.f/576 \
421  +t*(-3569.f/320))))))); \
422  t--; \
423  phi[5] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(6181.f/320 \
424  +t*(6337.f/96+t*(2745.f/32+t*(28909.f/576 \
425  +t*(3569.f/320))))))); \
426  t--; \
427  phi[6] = -154+t*(-12757.f/12+t*(-230123.f/72+t*(-264481.f/48 \
428  +t*(-576499.f/96+t*(-686147.f/160+t*(-96277.f/48 \
429  +t*(-14221.f/24+t*(-28909.f/288+t*(-3569.f/480))))))))); \
430  t--; \
431  phi[7] = 68776.f/7+t*(1038011.f/28+t*(31157515.f/504+t*(956669.f/16 \
432  +t*(3548009.f/96+t*(2422263.f/160+t*(197255.f/48 \
433  +t*(19959.f/28+t*(144545.f/2016+t*(3569.f/1120))))))))); \
434  t--; \
435  phi[8] = -56375+t*(-8314091.f/56+t*(-49901303.f/288+t*(-3763529.f/32 \
436  +t*(-19648027.f/384+t*(-9469163.f/640+t*(-545977.f/192 \
437  +t*(-156927.f/448+t*(-28909.f/1152 \
438  +t*(-3569.f/4480))))))))); \
439  t--; \
440  phi[9] = 439375.f/7+t*(64188125.f/504+t*(231125375.f/2016 \
441  +t*(17306975.f/288+t*(7761805.f/384+t*(2895587.f/640 \
442  +t*(129391.f/192+t*(259715.f/4032+t*(28909.f/8064 \
443  +t*(3569.f/40320))))))))); \
444  break; \
445  case NL_MSM_APPROX_BSPLINE: \
446  phi[0] = (1.f/6) * (2-t) * (2-t) * (2-t); \
447  t--; \
448  phi[1] = (2.f/3) + t*t*(-1 + 0.5f*t); \
449  t--; \
450  phi[2] = (2.f/3) + t*t*(-1 - 0.5f*t); \
451  t--; \
452  phi[3] = (1.f/6) * (2+t) * (2+t) * (2+t); \
453  break; \
454  default: \
455  return NL_MSM_ERROR_SUPPORT; \
456  } \
457  } while (0)
458  /* closing ';' from use as function call */
459 
460 
468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \
469  do { \
470  float *dphi = _dphi; \
471  float *phi = _phi; \
472  float h_1 = _h_1; \
473  float t = _delta; \
474  switch (_approx) { \
475  case NL_MSM_APPROX_CUBIC: \
476  phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
477  dphi[0] = (1.5f * t - 2) * (2 - t) * h_1; \
478  t--; \
479  phi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
480  dphi[1] = (-5 + 4.5f * t) * t * h_1; \
481  t--; \
482  phi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
483  dphi[2] = (-5 - 4.5f * t) * t * h_1; \
484  t--; \
485  phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
486  dphi[3] = (1.5f * t + 2) * (2 + t) * h_1; \
487  break; \
488  case NL_MSM_APPROX_QUINTIC: \
489  phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
490  dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \
491  + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \
492  t--; \
493  phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6) + t * (0.375f - (5.f/24)*t));\
494  dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) * (11 + t * (-12 + 3*t))\
495  + (1-t) * (2-t) * (3-t) * (0.375f - (5.f/12)*t)) * h_1; \
496  t--; \
497  phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t)); \
498  dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t)) \
499  + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1; \
500  t--; \
501  phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t)); \
502  dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t)) \
503  - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1; \
504  t--; \
505  phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6) - t * (0.375f + (5.f/24)*t));\
506  dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) * (11 + t * (12 + 3*t)) \
507  - (1+t) * (2+t) * (3+t) * (0.375f + (5.f/12)*t)) * h_1; \
508  t--; \
509  phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
510  dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \
511  + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \
512  break; \
513  case NL_MSM_APPROX_QUINTIC2: \
514  phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
515  dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \
516  + 5*(t-2)*(3-t))) * h_1; \
517  t--; \
518  phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
519  dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \
520  (-48+t*(153+t*(-114+t*25))) \
521  + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \
522  t--; \
523  phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
524  dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \
525  + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \
526  t--; \
527  phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
528  dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \
529  + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \
530  t--; \
531  phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
532  dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \
533  (48+t*(153+t*(114+t*25))) \
534  + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \
535  t--; \
536  phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
537  dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \
538  + 5*(t+2)*(3+t))) * h_1; \
539  break; \
540  case NL_MSM_APPROX_SEPTIC: \
541  phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
542  dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \
543  +t*(-122+t*7))))) * h_1; \
544  t--; \
545  phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
546  dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \
547  +t*(-750+t*49)))))) * h_1; \
548  t--; \
549  phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
550  dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \
551  +t*(-450+t*49)))))) * h_1; \
552  t--; \
553  phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
554  dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \
555  h_1; \
556  t--; \
557  phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
558  dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \
559  h_1; \
560  t--; \
561  phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
562  dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \
563  +t*(450+t*49)))))) * h_1; \
564  t--; \
565  phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
566  dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \
567  +t*(750+t*49)))))) * h_1; \
568  t--; \
569  phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
570  dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \
571  +t*(122+t*7))))) * h_1; \
572  break; \
573  case NL_MSM_APPROX_SEPTIC3: \
574  phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633 \
575  + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240) \
576  + t*(-89.f/720))))))); \
577  dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899 + t*((26383.f/36) \
578  + t*((-22807.f/144) + t*((727.f/40) + t*(-623.f/720)))))))*h_1; \
579  t--; \
580  phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2) \
581  + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48) \
582  + t*(623.f/720))))))); \
583  dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2) \
584  + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8) \
585  + t*(4361.f/720))))))) * h_1; \
586  t--; \
587  phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2) \
588  + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80) \
589  + t*(-623.f/240))))))); \
590  dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2) + t*((4557.f/4) \
591  + t*((-9583.f/16) + t*((6543.f/40) + t*(-4361.f/240)))))))*h_1; \
592  t--; \
593  phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144) \
594  + t*((-727.f/48) + t*(623.f/144))))); \
595  dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144) \
596  + t*((-727.f/8) + t*(4361.f/144)))))) * h_1; \
597  t--; \
598  phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144) \
599  + t*((-727.f/48) + t*(-623.f/144))))); \
600  dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144) \
601  + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1; \
602  t--; \
603  phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2) \
604  + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80) \
605  + t*(623.f/240))))))); \
606  dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2) + t*((4557.f/4) \
607  + t*((9583.f/16) + t*((6543.f/40) + t*(4361.f/240)))))))*h_1; \
608  t--; \
609  phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2) \
610  + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48) \
611  + t*(-623.f/720))))))); \
612  dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2) \
613  + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8) \
614  + t*(-4361.f/720))))))) * h_1; \
615  t--; \
616  phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633 \
617  + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240) \
618  + t*(89.f/720))))))); \
619  dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899 + t*((26383.f/36) \
620  + t*((22807.f/144) + t*((727.f/40) + t*(623.f/720)))))))*h_1; \
621  break; \
622  case NL_MSM_APPROX_NONIC: \
623  phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
624  (t-2)*(t-1); \
625  dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416 \
626  +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \
627  t--; \
628  phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
629  (-8+t*(-35+9*t)); \
630  dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \
631  +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \
632  t--; \
633  phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
634  (-14+t*(-25+9*t)); \
635  dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026 \
636  +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \
637  +t*(-81)))))))))*h_1; \
638  t--; \
639  phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
640  (-6+t*(-5+3*t)); \
641  dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070 \
642  +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \
643  t--; \
644  phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
645  (-20+t*(-5+9*t)); \
646  dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516 \
647  +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \
648  t--; \
649  phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
650  (-20+t*(5+9*t)); \
651  dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516 \
652  +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \
653  t--; \
654  phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
655  (-6+t*(5+3*t)); \
656  dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070 \
657  +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \
658  t--; \
659  phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
660  (-14+t*(25+9*t)); \
661  dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026 \
662  +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \
663  t--; \
664  phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
665  (-8+t*(35+9*t)); \
666  dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240 \
667  +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \
668  +t*81))))))))*h_1; \
669  t--; \
670  phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
671  (t+7)*(t+8); \
672  dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416 \
673  +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \
674  break; \
675  case NL_MSM_APPROX_NONIC4: \
676  phi[0] = 439375.f/7+t*(-64188125.f/504+t*(231125375.f/2016 \
677  +t*(-17306975.f/288+t*(7761805.f/384+t*(-2895587.f/640 \
678  +t*(129391.f/192+t*(-259715.f/4032+t*(28909.f/8064 \
679  +t*(-3569.f/40320))))))))); \
680  dphi[0] = (-64188125.f/504+t*(231125375.f/1008 \
681  +t*(-17306975.f/96+t*(7761805.f/96+t*(-2895587.f/128 \
682  +t*(129391.f/32+t*(-259715.f/576+t*(28909.f/1008 \
683  +t*(-3569.f/4480)))))))))*h_1; \
684  t--; \
685  phi[1] = -56375+t*(8314091.f/56+t*(-49901303.f/288+t*(3763529.f/32 \
686  +t*(-19648027.f/384+t*(9469163.f/640+t*(-545977.f/192 \
687  +t*(156927.f/448+t*(-28909.f/1152 \
688  +t*(3569.f/4480))))))))); \
689  dphi[1] = (8314091.f/56+t*(-49901303.f/144+t*(11290587.f/32 \
690  +t*(-19648027.f/96+t*(9469163.f/128+t*(-545977.f/32 \
691  +t*(156927.f/64+t*(-28909.f/144 \
692  +t*(32121.f/4480)))))))))*h_1; \
693  t--; \
694  phi[2] = 68776.f/7+t*(-1038011.f/28+t*(31157515.f/504+t*(-956669.f/16 \
695  +t*(3548009.f/96+t*(-2422263.f/160+t*(197255.f/48 \
696  +t*(-19959.f/28+t*(144545.f/2016 \
697  +t*(-3569.f/1120))))))))); \
698  dphi[2] = (-1038011.f/28+t*(31157515.f/252+t*(-2870007.f/16 \
699  +t*(3548009.f/24+t*(-2422263.f/32+t*(197255.f/8 \
700  +t*(-19959.f/4+t*(144545.f/252 \
701  +t*(-32121.f/1120)))))))))*h_1; \
702  t--; \
703  phi[3] = -154+t*(12757.f/12+t*(-230123.f/72+t*(264481.f/48 \
704  +t*(-576499.f/96+t*(686147.f/160+t*(-96277.f/48 \
705  +t*(14221.f/24+t*(-28909.f/288+t*(3569.f/480))))))))); \
706  dphi[3] = (12757.f/12+t*(-230123.f/36+t*(264481.f/16 \
707  +t*(-576499.f/24+t*(686147.f/32+t*(-96277.f/8 \
708  +t*(99547.f/24+t*(-28909.f/36 \
709  +t*(10707.f/160)))))))))*h_1; \
710  t--; \
711  phi[4] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(-6181.f/320 \
712  +t*(6337.f/96+t*(-2745.f/32+t*(28909.f/576 \
713  +t*(-3569.f/320))))))); \
714  dphi[4] = t*(-205.f/72+t*t*(91.f/48+t*(-6181.f/64 \
715  +t*(6337.f/16+t*(-19215.f/32+t*(28909.f/72 \
716  +t*(-32121.f/320)))))))*h_1; \
717  t--; \
718  phi[5] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(6181.f/320 \
719  +t*(6337.f/96+t*(2745.f/32+t*(28909.f/576 \
720  +t*(3569.f/320))))))); \
721  dphi[5] = t*(-205.f/72+t*t*(91.f/48+t*(6181.f/64 \
722  +t*(6337.f/16+t*(19215.f/32+t*(28909.f/72 \
723  +t*(32121.f/320)))))))*h_1; \
724  t--; \
725  phi[6] = -154+t*(-12757.f/12+t*(-230123.f/72+t*(-264481.f/48 \
726  +t*(-576499.f/96+t*(-686147.f/160+t*(-96277.f/48 \
727  +t*(-14221.f/24+t*(-28909.f/288+t*(-3569.f/480))))))))); \
728  dphi[6] = (-12757.f/12+t*(-230123.f/36+t*(-264481.f/16 \
729  +t*(-576499.f/24+t*(-686147.f/32+t*(-96277.f/8 \
730  +t*(-99547.f/24+t*(-28909.f/36 \
731  +t*(-10707.f/160)))))))))*h_1; \
732  t--; \
733  phi[7] = 68776.f/7+t*(1038011.f/28+t*(31157515.f/504+t*(956669.f/16 \
734  +t*(3548009.f/96+t*(2422263.f/160+t*(197255.f/48 \
735  +t*(19959.f/28+t*(144545.f/2016+t*(3569.f/1120))))))))); \
736  dphi[7] = (1038011.f/28+t*(31157515.f/252+t*(2870007.f/16 \
737  +t*(3548009.f/24+t*(2422263.f/32+t*(197255.f/8 \
738  +t*(19959.f/4+t*(144545.f/252 \
739  +t*(32121.f/1120)))))))))*h_1; \
740  t--; \
741  phi[8] = -56375+t*(-8314091.f/56+t*(-49901303.f/288+t*(-3763529.f/32 \
742  +t*(-19648027.f/384+t*(-9469163.f/640+t*(-545977.f/192 \
743  +t*(-156927.f/448+t*(-28909.f/1152 \
744  +t*(-3569.f/4480))))))))); \
745  dphi[8] = (-8314091.f/56+t*(-49901303.f/144+t*(-11290587.f/32 \
746  +t*(-19648027.f/96+t*(-9469163.f/128+t*(-545977.f/32 \
747  +t*(-156927.f/64+t*(-28909.f/144 \
748  +t*(-32121.f/4480)))))))))*h_1; \
749  t--; \
750  phi[9] = 439375.f/7+t*(64188125.f/504+t*(231125375.f/2016 \
751  +t*(17306975.f/288+t*(7761805.f/384+t*(2895587.f/640 \
752  +t*(129391.f/192+t*(259715.f/4032+t*(28909.f/8064 \
753  +t*(3569.f/40320))))))))); \
754  dphi[9] = (64188125.f/504+t*(231125375.f/1008 \
755  +t*(17306975.f/96+t*(7761805.f/96+t*(2895587.f/128 \
756  +t*(129391.f/32+t*(259715.f/576+t*(28909.f/1008 \
757  +t*(3569.f/4480)))))))))*h_1; \
758  break; \
759  case NL_MSM_APPROX_BSPLINE: \
760  phi[0] = (1.f/6) * (2-t) * (2-t) * (2-t); \
761  dphi[0] = -0.5f * (2-t) * (2-t) * h_1; \
762  t--; \
763  phi[1] = (2.f/3) + t*t*(-1 + 0.5f*t); \
764  dphi[1] = t*(-2 + 1.5f*t) * h_1; \
765  t--; \
766  phi[2] = (2.f/3) + t*t*(-1 - 0.5f*t); \
767  dphi[2] = t*(-2 - 1.5f*t) * h_1; \
768  t--; \
769  phi[3] = (1.f/6) * (2+t) * (2+t) * (2+t); \
770  dphi[3] = 0.5f * (2+t) * (2+t) * h_1; \
771  break; \
772  default: \
773  return NL_MSM_ERROR_SUPPORT; \
774  } \
775  } while (0)
776  /* closing ';' from use as function call */
777 
778 
780 {
781  const float *atom = msm->atom_f;
782  const int natoms = msm->numatoms;
783 
784  float xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */
785  float yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */
786  float zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */
787  float rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */
788 #ifndef TEST_INLINING
789  float delta; /* normalized distance to stencil point */
790 #else
791  float t; /* normalized distance to stencil point */
792 #endif
793  float ck, cjk;
794  const float hx_1 = 1/msm->hx_f;
795  const float hy_1 = 1/msm->hy_f;
796  const float hz_1 = 1/msm->hz_f;
797  const float xm0 = msm->gx_f; /* grid origin x */
798  const float ym0 = msm->gy_f; /* grid origin y */
799  const float zm0 = msm->gz_f; /* grid origin z */
800  float q;
801 
802  NL_Msmgrid_float *qhgrid = &(msm->qh_f[0]);
803  float *qh = qhgrid->data;
804  const int ni = qhgrid->ni;
805  const int nj = qhgrid->nj;
806  const int nk = qhgrid->nk;
807  const int ia = qhgrid->i0;
808  const int ib = ia + ni - 1;
809  const int ja = qhgrid->j0;
810  const int jb = ja + nj - 1;
811  const int ka = qhgrid->k0;
812  const int kb = ka + nk - 1;
813 
814  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
815  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
816  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
817  int iswithin;
818 
819  int n, i, j, k, ilo, jlo, klo, koff;
820  int jkoff, index;
821 
822  const int approx = msm->approx;
823  const int s_size = PolyDegree[approx] + 1; /* stencil size */
824  const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */
825 
826  GRID_ZERO(qhgrid);
827 
828  for (n = 0; n < natoms; n++) {
829 
830  /* atomic charge */
831  q = atom[4*n + 3];
832  if (0==q) continue;
833 
834  /* distance between atom and origin measured in grid points */
835  rx_hx = (atom[4*n ] - xm0) * hx_1;
836  ry_hy = (atom[4*n + 1] - ym0) * hy_1;
837  rz_hz = (atom[4*n + 2] - zm0) * hz_1;
838 
839  /* find smallest numbered grid point in stencil */
840  ilo = (int) floorf(rx_hx) - s_edge;
841  jlo = (int) floorf(ry_hy) - s_edge;
842  klo = (int) floorf(rz_hz) - s_edge;
843 
844  /* calculate Phi stencils along each dimension */
845 #ifndef TEST_INLINING
846  delta = rx_hx - (float) ilo;
847  STENCIL_1D(xphi, delta, approx);
848  delta = ry_hy - (float) jlo;
849  STENCIL_1D(yphi, delta, approx);
850  delta = rz_hz - (float) klo;
851  STENCIL_1D(zphi, delta, approx);
852 #else
853  t = rx_hx - (float) ilo;
854  xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
855  t--; \
856  xphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
857  t--; \
858  xphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
859  t--; \
860  xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
861 
862  t = ry_hy - (double) jlo;
863  yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
864  t--; \
865  yphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
866  t--; \
867  yphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
868  t--; \
869  yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
870 
871  t = rz_hz - (double) klo;
872  zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
873  t--; \
874  zphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
875  t--; \
876  zphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
877  t--; \
878  zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
879 
880 #endif
881 
882  /* test to see if stencil is within edges of grid */
883  iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
884  ja <= jlo && (jlo+(s_size-1)) <= jb &&
885  ka <= klo && (klo+(s_size-1)) <= kb );
886 
887  if ( iswithin ) { /* no wrapping needed */
888 
889  /* determine charge on cube of grid points around atom */
890  for (k = 0; k < s_size; k++) {
891  koff = (k + klo) * nj;
892  ck = zphi[k] * q;
893  for (j = 0; j < s_size; j++) {
894  jkoff = (koff + (j + jlo)) * ni;
895  cjk = yphi[j] * ck;
896  for (i = 0; i < s_size; i++) {
897  index = jkoff + (i + ilo);
898  GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo);
899  ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index);
900  qh[index] += xphi[i] * cjk;
901  }
902  }
903  }
904  } /* if */
905 
906  else { /* requires wrapping around grid */
907  int ip, jp, kp;
908 
909  /* adjust ilo, jlo, klo so they are within grid indexing */
910  if (ispx) {
911  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
912  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
913  }
914  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
915  return NL_MSM_ERROR_RANGE;
916  }
917 
918  if (ispy) {
919  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
920  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
921  }
922  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
923  return NL_MSM_ERROR_RANGE;
924  }
925 
926  if (ispz) {
927  if (klo < ka) do { klo += nk; } while (klo < ka);
928  else if (klo > kb) do { klo -= nk; } while (klo > kb);
929  }
930  else if (klo < ka || (klo+(s_size-1)) > kb) {
931  return NL_MSM_ERROR_RANGE;
932  }
933 
934  /* determine charge on cube of grid points around atom, with wrapping */
935  for (k = 0, kp = klo; k < s_size; k++, kp++) {
936  if (kp > kb) kp = ka; /* wrap stencil around grid */
937  koff = kp * nj;
938  ck = zphi[k] * q;
939  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
940  if (jp > jb) jp = ja; /* wrap stencil around grid */
941  jkoff = (koff + jp) * ni;
942  cjk = yphi[j] * ck;
943  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
944  if (ip > ib) ip = ia; /* wrap stencil around grid */
945  index = jkoff + ip;
946  GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
947  ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index);
948  qh[index] += xphi[i] * cjk;
949  }
950  }
951  }
952  } /* else */
953 
954  } /* end loop over atoms */
955 
956  return NL_MSM_SUCCESS;
957 } /* anterpolation */
958 
959 
961 {
962  float *f = msm->felec_f;
963  const float *atom = msm->atom_f;
964  const int natoms = msm->numatoms;
965 
966  float xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */
967  float yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */
968  float zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */
969  float dxphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along x-dimension */
970  float dyphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along y-dimension */
971  float dzphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along z-dimension */
972  float rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */
973 #ifndef TEST_INLINING
974  float delta; /* normalized distance to stencil point */
975 #else
976  float t; /* normalized distance to stencil point */
977 #endif
978  const float hx_1 = 1/msm->hx_f;
979  const float hy_1 = 1/msm->hy_f;
980  const float hz_1 = 1/msm->hz_f;
981  const float xm0 = msm->gx_f; /* grid origin x */
982  const float ym0 = msm->gy_f; /* grid origin y */
983  const float zm0 = msm->gz_f; /* grid origin z */
984  float q;
985  double u = 0; /* need double precision for accumulating potential */
986 
987  const NL_Msmgrid_float *ehgrid = &(msm->eh_f[0]);
988  const float *eh = ehgrid->data;
989  const float *ebuf = ehgrid->buffer;
990  const NL_Msmgrid_float *qhgrid = &(msm->qh_f[0]);
991  const float *qbuf = qhgrid->buffer;
992  const int ni = ehgrid->ni;
993  const int nj = ehgrid->nj;
994  const int nk = ehgrid->nk;
995  const int ia = ehgrid->i0;
996  const int ib = ia + ni - 1;
997  const int ja = ehgrid->j0;
998  const int jb = ja + nj - 1;
999  const int ka = ehgrid->k0;
1000  const int kb = ka + nk - 1;
1001 
1002  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1003  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1004  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1005  int iswithin;
1006 
1007  float fx, fy, fz, cx, cy, cz;
1008 
1009  int n, i, j, k, ilo, jlo, klo, koff;
1010  long jkoff, index;
1011  const int nn = (ni*nj) * nk;
1012 
1013  const int approx = msm->approx;
1014  const int s_size = PolyDegree[approx] + 1; /* stencil size */
1015  const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */
1016 
1017 #ifdef NL_DEBUG
1018 #if 1
1019  printf("+++ eh[0,0,0] = %g\n", eh[0]);
1020 #else
1021  n = 0;
1022  for (k = ka; k <= kb; k++) {
1023  for (j = ja; j <= jb; j++) {
1024  for (i = ia; i <= ib; i++, n++) {
1025  printf("=== eh[%d,%d,%d] = %g\n", i, j, k, ebuf[n]);
1026  }
1027  }
1028  }
1029 #endif
1030 #endif
1031 
1032  for (n = 0; n < natoms; n++) {
1033 
1034  /* atomic charge */
1035  q = atom[4*n + 3];
1036  if (0==q) continue;
1037 
1038  /* distance between atom and origin measured in grid points */
1039  rx_hx = (atom[4*n ] - xm0) * hx_1;
1040  ry_hy = (atom[4*n + 1] - ym0) * hy_1;
1041  rz_hz = (atom[4*n + 2] - zm0) * hz_1;
1042 
1043  /* find smallest numbered grid point in stencil */
1044  ilo = (int) floorf(rx_hx) - s_edge;
1045  jlo = (int) floorf(ry_hy) - s_edge;
1046  klo = (int) floorf(rz_hz) - s_edge;
1047 
1048  /* calculate Phi stencils and derivatives along each dimension */
1049 #ifndef TEST_INLINING
1050  delta = rx_hx - (float) ilo;
1051  //STENCIL_1D(xphi, delta, approx);
1052  D_STENCIL_1D(dxphi, xphi, hx_1, delta, approx);
1053  delta = ry_hy - (float) jlo;
1054  //STENCIL_1D(yphi, delta, approx);
1055  D_STENCIL_1D(dyphi, yphi, hy_1, delta, approx);
1056  delta = rz_hz - (float) klo;
1057  //STENCIL_1D(zphi, delta, approx);
1058  D_STENCIL_1D(dzphi, zphi, hz_1, delta, approx);
1059 #else
1060  t = rx_hx - (float) ilo;
1061  xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
1062  dxphi[0] = (1.5f * t - 2) * (2 - t) * hx_1; \
1063  t--; \
1064  xphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
1065  dxphi[1] = (-5 + 4.5f * t) * t * hx_1; \
1066  t--; \
1067  xphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
1068  dxphi[2] = (-5 - 4.5f * t) * t * hx_1; \
1069  t--; \
1070  xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
1071  dxphi[3] = (1.5f * t + 2) * (2 + t) * hx_1; \
1072 
1073  t = ry_hy - (double) jlo;
1074  yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
1075  dyphi[0] = (1.5f * t - 2) * (2 - t) * hy_1; \
1076  t--; \
1077  yphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
1078  dyphi[1] = (-5 + 4.5f * t) * t * hy_1; \
1079  t--; \
1080  yphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
1081  dyphi[2] = (-5 - 4.5f * t) * t * hy_1; \
1082  t--; \
1083  yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
1084  dyphi[3] = (1.5f * t + 2) * (2 + t) * hy_1; \
1085 
1086  t = rz_hz - (double) klo;
1087  zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
1088  dzphi[0] = (1.5f * t - 2) * (2 - t) * hz_1; \
1089  t--; \
1090  zphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
1091  dzphi[1] = (-5 + 4.5f * t) * t * hz_1; \
1092  t--; \
1093  zphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
1094  dzphi[2] = (-5 - 4.5f * t) * t * hz_1; \
1095  t--; \
1096  zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
1097  dzphi[3] = (1.5f * t + 2) * (2 + t) * hz_1; \
1098 
1099 #endif
1100 
1101  /* test to see if stencil is within edges of grid */
1102  iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1103  ja <= jlo && (jlo+(s_size-1)) <= jb &&
1104  ka <= klo && (klo+(s_size-1)) <= kb );
1105 
1106  if ( iswithin ) { /* no wrapping needed */
1107 
1108  /* determine force on atom from cube of grid point potentials */
1109  fx = fy = fz = 0;
1110  for (k = 0; k < s_size; k++) {
1111  koff = (k + klo) * nj;
1112  for (j = 0; j < s_size; j++) {
1113  jkoff = (koff + (j + jlo)) * ni;
1114  cx = yphi[j] * zphi[k];
1115  cy = dyphi[j] * zphi[k];
1116  cz = yphi[j] * dzphi[k];
1117  for (i = 0; i < s_size; i++) {
1118  index = jkoff + (i + ilo);
1119  GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo);
1120  ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index);
1121  fx += eh[index] * dxphi[i] * cx;
1122  fy += eh[index] * xphi[i] * cy;
1123  fz += eh[index] * xphi[i] * cz;
1124  }
1125  }
1126  }
1127  } /* if */
1128 
1129  else { /* requires wrapping around grid */
1130  int ip, jp, kp;
1131 
1132  /* adjust ilo, jlo, klo so they are within grid indexing */
1133  if (ispx) {
1134  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1135  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
1136  }
1137  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1138  return NL_MSM_ERROR_RANGE;
1139  }
1140 
1141  if (ispy) {
1142  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1143  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
1144  }
1145  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1146  return NL_MSM_ERROR_RANGE;
1147  }
1148 
1149  if (ispz) {
1150  if (klo < ka) do { klo += nk; } while (klo < ka);
1151  else if (klo > kb) do { klo -= nk; } while (klo > kb);
1152  }
1153  else if (klo < ka || (klo+(s_size-1)) > kb) {
1154  return NL_MSM_ERROR_RANGE;
1155  }
1156 
1157  /* determine force on atom from cube of grid point potentials, wrapping */
1158  fx = fy = fz = 0;
1159  for (k = 0, kp = klo; k < s_size; k++, kp++) {
1160  if (kp > kb) kp = ka; /* wrap stencil around grid */
1161  koff = kp * nj;
1162  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1163  if (jp > jb) jp = ja; /* wrap stencil around grid */
1164  jkoff = (koff + jp) * ni;
1165  cx = yphi[j] * zphi[k];
1166  cy = dyphi[j] * zphi[k];
1167  cz = yphi[j] * dzphi[k];
1168  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1169  if (ip > ib) ip = ia; /* wrap stencil around grid */
1170  index = jkoff + ip;
1171  GRID_INDEX_CHECK(ehgrid, ip, jp, kp);
1172  ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index);
1173  fx += eh[index] * dxphi[i] * cx;
1174  fy += eh[index] * xphi[i] * cy;
1175  fz += eh[index] * xphi[i] * cz;
1176  }
1177  }
1178  }
1179  } /* else */
1180 
1181  /* update force */
1182  f[3*n ] -= q * fx;
1183  f[3*n+1] -= q * fy;
1184  f[3*n+2] -= q * fz;
1185 
1186  } /* end loop over atoms */
1187 
1188  /* compute potential, subtract self potential */
1189  u = 0;
1190  if (1) {
1191  for (n = 0; n < natoms; n++) {
1192  float q = atom[4*n + 3];
1193  u += q * q;
1194  }
1195  u *= -msm->gzero_f;
1196  }
1197  for (index = 0; index < nn; index++) {
1198  u += qbuf[index] * ebuf[index];
1199  }
1200  msm->uelec += 0.5 * u;
1201 
1202  return NL_MSM_SUCCESS;
1203 } /* interpolation() */
1204 
1205 
1206 int restriction_factored(NL_Msm *msm, int level) {
1207  /* grids of charge, finer grid and coarser grid */
1208  const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
1209  const float *qh = qhgrid->data;
1210  NL_Msmgrid_float *q2hgrid = &(msm->qh_f[level+1]);
1211  float *q2h = q2hgrid->data;
1212 
1213  /* finer grid index ranges and dimensions */
1214  const int ia1 = qhgrid->i0; /* lowest x-index */
1215  const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */
1216  const int ja1 = qhgrid->j0; /* lowest y-index */
1217  const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */
1218  const int ka1 = qhgrid->k0; /* lowest z-index */
1219  const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */
1220  const int ni1 = qhgrid->ni; /* length along x-dim */
1221  const int nj1 = qhgrid->nj; /* length along y-dim */
1222  const int nk1 = qhgrid->nk; /* length along z-dim */
1223 
1224  /* coarser grid index ranges and dimensions */
1225  const int ia2 = q2hgrid->i0; /* lowest x-index */
1226  const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */
1227  const int ja2 = q2hgrid->j0; /* lowest y-index */
1228  const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */
1229  const int ka2 = q2hgrid->k0; /* lowest z-index */
1230  const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */
1231  const int nrow_q2 = q2hgrid->ni;
1232  const int nstride_q2 = nrow_q2 * q2hgrid->nj;
1233 
1234  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1235  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1236  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1237 
1238  /* set buffer using indexing offset, so that indexing matches qh grid */
1239  float *qzd = msm->lzd_f + (-ka1);
1240  float *qyzd = msm->lyzd_f + (-ka1*nj1 + -ja1);
1241  float qsum;
1242 
1243  const float *phi = NULL;
1244 
1245  int i2, j2, k2;
1246  int im, jm, km;
1247  int i, j, k;
1248  int index_plane_q2, index_q2;
1249  int index_jk, offset_k;
1250  int offset;
1251 
1252  const float *phi_factored = PhiStencilFactored[msm->approx];
1253  const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */
1254  const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */
1255 
1256  for (i2 = ia2; i2 <= ib2; i2++) {
1257 
1258  for (k = ka1; k <= kb1; k++) {
1259  offset_k = k * nj1;
1260 
1261  for (j = ja1; j <= jb1; j++) {
1262  index_jk = offset_k + j;
1263  offset = index_jk * ni1;
1264  im = (i2 << 1); /* = 2*i2 */
1265  qsum = 0;
1266  if ( ! ispx ) { /* nonperiodic */
1267  int lower = im - r_stencil;
1268  int upper = im + r_stencil;
1269  if (lower < ia1) lower = ia1;
1270  if (upper > ib1) upper = ib1; /* clip edges */
1271  phi = phi_factored + r_stencil; /* center of stencil */
1272  for (i = lower; i <= upper; i++) {
1273  qsum += phi[i-im] * qh[offset + i];
1274  }
1275  }
1276  else { /* periodic */
1277  int ip = im - r_stencil; /* index at left end of stencil */
1278  if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */
1279  phi = phi_factored; /* left end of stencil */
1280  for (i = 0; i < diam_stencil; i++, ip++) {
1281  if (ip > ib1) ip = ia1; /* wrap around edge of grid */
1282  qsum += phi[i] * qh[offset + ip];
1283  }
1284  }
1285  qyzd[index_jk] = qsum;
1286  } /* for j */
1287 
1288  } /* for k */
1289 
1290  for (j2 = ja2; j2 <= jb2; j2++) {
1291  index_plane_q2 = j2 * nrow_q2 + i2;
1292 
1293  for (k = ka1; k <= kb1; k++) {
1294  offset = k * nj1;
1295  jm = (j2 << 1); /* = 2*j2 */
1296  qsum = 0;
1297  if ( ! ispy ) { /* nonperiodic */
1298  int lower = jm - r_stencil;
1299  int upper = jm + r_stencil;
1300  if (lower < ja1) lower = ja1;
1301  if (upper > jb1) upper = jb1; /* clip edges */
1302  phi = phi_factored + r_stencil; /* center of stencil */
1303  for (j = lower; j <= upper; j++) {
1304  qsum += phi[j-jm] * qyzd[offset + j];
1305  }
1306  }
1307  else { /* periodic */
1308  int jp = jm - r_stencil; /* index at left end of stencil */
1309  if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */
1310  phi = phi_factored; /* left end of stencil */
1311  for (j = 0; j < diam_stencil; j++, jp++) {
1312  if (jp > jb1) jp = ja1; /* wrap around edge of grid */
1313  qsum += phi[j] * qyzd[offset + jp];
1314  }
1315  }
1316  qzd[k] = qsum;
1317  } /* for k */
1318 
1319  for (k2 = ka2; k2 <= kb2; k2++) {
1320  index_q2 = k2 * nstride_q2 + index_plane_q2;
1321  km = (k2 << 1); /* = 2*k2 */
1322  qsum = 0;
1323  if ( ! ispz ) { /* nonperiodic */
1324  int lower = km - r_stencil;
1325  int upper = km + r_stencil;
1326  if (lower < ka1) lower = ka1;
1327  if (upper > kb1) upper = kb1; /* clip edges */
1328  phi = phi_factored + r_stencil; /* center of stencil */
1329  for (k = lower; k <= upper; k++) {
1330  qsum += phi[k-km] * qzd[k];
1331  }
1332  }
1333  else { /* periodic */
1334  int kp = km - r_stencil; /* index at left end of stencil */
1335  if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */
1336  phi = phi_factored; /* left end of stencil */
1337  for (k = 0; k < diam_stencil; k++, kp++) {
1338  if (kp > kb1) kp = ka1; /* wrap around edge of grid */
1339  qsum += phi[k] * qzd[kp];
1340  }
1341  }
1342  q2h[index_q2] = qsum;
1343  } /* for k2 */
1344 
1345  } /* for j2 */
1346 
1347  } /* for i2 */
1348 
1349  return NL_MSM_SUCCESS;
1350 } /* restriction_factored */
1351 
1352 
1353 int prolongation_factored(NL_Msm *msm, int level) {
1354  /* grids of potential, finer grid and coarser grid */
1355  NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
1356  float *eh = ehgrid->data;
1357  const NL_Msmgrid_float *e2hgrid = &(msm->eh_f[level+1]);
1358  const float *e2h = e2hgrid->data;
1359 
1360  /* finer grid index ranges and dimensions */
1361  const int ia1 = ehgrid->i0; /* lowest x-index */
1362  const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */
1363  const int ja1 = ehgrid->j0; /* lowest y-index */
1364  const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */
1365  const int ka1 = ehgrid->k0; /* lowest z-index */
1366  const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */
1367  const int ni1 = ehgrid->ni; /* length along x-dim */
1368  const int nj1 = ehgrid->nj; /* length along y-dim */
1369  const int nk1 = ehgrid->nk; /* length along z-dim */
1370 
1371  /* coarser grid index ranges and dimensions */
1372  const int ia2 = e2hgrid->i0; /* lowest x-index */
1373  const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */
1374  const int ja2 = e2hgrid->j0; /* lowest y-index */
1375  const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */
1376  const int ka2 = e2hgrid->k0; /* lowest z-index */
1377  const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */
1378  const int nrow_e2 = e2hgrid->ni;
1379  const int nstride_e2 = nrow_e2 * e2hgrid->nj;
1380 
1381  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1382  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1383  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1384 
1385  /* set buffer using indexing offset, so that indexing matches eh grid */
1386  float *ezd = msm->lzd_f + (-ka1);
1387  float *eyzd = msm->lyzd_f + (-ka1*nj1 + -ja1);
1388 
1389  const size_t size_lzd = nk1 * sizeof(float);
1390  const size_t size_lyzd = nj1 * nk1 * sizeof(float);
1391 
1392  const float *phi = NULL;
1393 
1394  int i2, j2, k2;
1395  int im, jm, km;
1396  int i, j, k;
1397  int index_plane_e2, index_e2;
1398  int index_jk, offset_k;
1399  int offset;
1400 
1401  const float *phi_factored = PhiStencilFactored[msm->approx];
1402  const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */
1403  const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */
1404 
1405  for (i2 = ia2; i2 <= ib2; i2++) {
1406  memset(msm->lyzd_f, 0, size_lyzd);
1407 
1408  for (j2 = ja2; j2 <= jb2; j2++) {
1409  memset(msm->lzd_f, 0, size_lzd);
1410  index_plane_e2 = j2 * nrow_e2 + i2;
1411 
1412  for (k2 = ka2; k2 <= kb2; k2++) {
1413  index_e2 = k2 * nstride_e2 + index_plane_e2;
1414  km = (k2 << 1); /* = 2*k2 */
1415  if ( ! ispz ) { /* nonperiodic */
1416  int lower = km - r_stencil;
1417  int upper = km + r_stencil;
1418  if (lower < ka1) lower = ka1;
1419  if (upper > kb1) upper = kb1; /* clip edges */
1420  phi = phi_factored + r_stencil; /* center of stencil */
1421  for (k = lower; k <= upper; k++) {
1422  ezd[k] += phi[k-km] * e2h[index_e2];
1423  }
1424  }
1425  else { /* periodic */
1426  int kp = km - r_stencil; /* index at left end of stencil */
1427  if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */
1428  phi = phi_factored; /* left end of stencil */
1429  for (k = 0; k < diam_stencil; k++, kp++) {
1430  if (kp > kb1) kp = ka1; /* wrap around edge of grid */
1431  ezd[kp] += phi[k] * e2h[index_e2];
1432  }
1433  }
1434  } /* for k2 */
1435 
1436  for (k = ka1; k <= kb1; k++) {
1437  offset = k * nj1;
1438  jm = (j2 << 1); /* = 2*j2 */
1439  if ( ! ispy ) { /* nonperiodic */
1440  int lower = jm - r_stencil;
1441  int upper = jm + r_stencil;
1442  if (lower < ja1) lower = ja1;
1443  if (upper > jb1) upper = jb1; /* clip edges */
1444  phi = phi_factored + r_stencil; /* center of stencil */
1445  for (j = lower; j <= upper; j++) {
1446  eyzd[offset + j] += phi[j-jm] * ezd[k];
1447  }
1448  }
1449  else { /* periodic */
1450  int jp = jm - r_stencil; /* index at left end of stencil */
1451  if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */
1452  phi = phi_factored; /* left end of stencil */
1453  for (j = 0; j < diam_stencil; j++, jp++) {
1454  if (jp > jb1) jp = ja1; /* wrap around edge of grid */
1455  eyzd[offset + jp] += phi[j] * ezd[k];
1456  }
1457  }
1458  } /* for k */
1459 
1460  } /* for j2 */
1461 
1462  for (k = ka1; k <= kb1; k++) {
1463  offset_k = k * nj1;
1464 
1465  for (j = ja1; j <= jb1; j++) {
1466  index_jk = offset_k + j;
1467  offset = index_jk * ni1;
1468  im = (i2 << 1); /* = 2*i2 */
1469  if ( ! ispx ) { /* nonperiodic */
1470  int lower = im - r_stencil;
1471  int upper = im + r_stencil;
1472  if (lower < ia1) lower = ia1;
1473  if (upper > ib1) upper = ib1; /* clip edges */
1474  phi = phi_factored + r_stencil; /* center of stencil */
1475  for (i = lower; i <= upper; i++) {
1476  eh[offset + i] += phi[i-im] * eyzd[index_jk];
1477  }
1478  }
1479  else { /* periodic */
1480  int ip = im - r_stencil; /* index at left end of stencil */
1481  if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */
1482  phi = phi_factored; /* left end of stencil */
1483  for (i = 0; i < diam_stencil; i++, ip++) {
1484  if (ip > ib1) ip = ia1; /* wrap around edge of grid */
1485  eh[offset + ip] += phi[i] * eyzd[index_jk];
1486  }
1487  }
1488  } /* for j */
1489 
1490  } /* for k */
1491 
1492  } /* for i2 */
1493 
1494  return NL_MSM_SUCCESS;
1495 } /* prolongation_factored */
1496 
1497 
1498 int restriction(NL_Msm *msm, int level) {
1499  /* grids of charge, finer grid and coarser grid */
1500  const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
1501  const float *qh = qhgrid->data; /* index the offset data buffer */
1502  NL_Msmgrid_float *q2hgrid = &(msm->qh_f[level+1]);
1503  float *q2h_buffer = q2hgrid->buffer; /* index the raw buffer */
1504 
1505  /* finer grid index ranges and dimensions */
1506  const int ia1 = qhgrid->i0; /* lowest x-index */
1507  const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */
1508  const int ja1 = qhgrid->j0; /* lowest y-index */
1509  const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */
1510  const int ka1 = qhgrid->k0; /* lowest z-index */
1511  const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */
1512  const int ni1 = qhgrid->ni; /* length along x-dim */
1513  const int nj1 = qhgrid->nj; /* length along y-dim */
1514  const int nk1 = qhgrid->nk; /* length along z-dim */
1515 
1516  /* coarser grid index ranges and dimensions */
1517  const int ia2 = q2hgrid->i0; /* lowest x-index */
1518  const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */
1519  const int ja2 = q2hgrid->j0; /* lowest y-index */
1520  const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */
1521  const int ka2 = q2hgrid->k0; /* lowest z-index */
1522  const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */
1523 
1524  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1525  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1526  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1527 
1528  const int nstencil = Nstencil[msm->approx];
1529  const int *offset = IndexOffset[msm->approx];
1530  const float *phi = PhiStencil[msm->approx];
1531 
1532  float q2h_sum, cjk;
1533 
1534  int i1, j1, k1, index1, jk1off, k1off;
1535  int i2, j2, k2;
1536  int index2;
1537  int i, j, k;
1538 
1539  for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1540  k1 = k2 * 2;
1541  for (j2 = ja2; j2 <= jb2; j2++) {
1542  j1 = j2 * 2;
1543  for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1544  i1 = i2 * 2;
1545 
1546  q2h_sum = 0;
1547  for (k = 0; k < nstencil; k++) {
1548  k1off = k1 + offset[k];
1549  if (k1off < ka1) {
1550  if (ispz) do { k1off += nk1; } while (k1off < ka1);
1551  else continue;
1552  }
1553  else if (k1off > kb1) {
1554  if (ispz) do { k1off -= nk1; } while (k1off > kb1);
1555  else break;
1556  }
1557  k1off *= nj1;
1558  for (j = 0; j < nstencil; j++) {
1559  jk1off = j1 + offset[j];
1560  if (jk1off < ja1) {
1561  if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
1562  else continue;
1563  }
1564  else if (jk1off > jb1) {
1565  if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
1566  else break;
1567  }
1568  jk1off = (k1off + jk1off) * ni1;
1569  cjk = phi[j] * phi[k];
1570  for (i = 0; i < nstencil; i++) {
1571  index1 = i1 + offset[i];
1572  if (index1 < ia1) {
1573  if (ispx) do { index1 += ni1; } while (index1 < ia1);
1574  else continue;
1575  }
1576  else if (index1 > ib1) {
1577  if (ispx) do { index1 -= ni1; } while (index1 > ib1);
1578  else break;
1579  }
1580  index1 += jk1off;
1581  q2h_sum += qh[index1] * phi[i] * cjk;
1582  }
1583  }
1584  } /* end loop over finer grid stencil */
1585 
1586  q2h_buffer[index2] = q2h_sum; /* store charge to coarser grid */
1587  }
1588  }
1589  } /* end loop over each coarser grid point */
1590 
1591  return NL_MSM_SUCCESS;
1592 }
1593 
1594 
1595 int prolongation(NL_Msm *msm, int level) {
1596  /* grids of charge, finer grid and coarser grid */
1597  NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
1598  float *eh = ehgrid->data; /* index the offset data buffer */
1599  const NL_Msmgrid_float *e2hgrid = &(msm->eh_f[level+1]);
1600  const float *e2h_buffer = e2hgrid->buffer; /* index the raw buffer */
1601 
1602  /* finer grid index ranges and dimensions */
1603  const int ia1 = ehgrid->i0; /* lowest x-index */
1604  const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */
1605  const int ja1 = ehgrid->j0; /* lowest y-index */
1606  const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */
1607  const int ka1 = ehgrid->k0; /* lowest z-index */
1608  const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */
1609  const int ni1 = ehgrid->ni; /* length along x-dim */
1610  const int nj1 = ehgrid->nj; /* length along y-dim */
1611  const int nk1 = ehgrid->nk; /* length along z-dim */
1612 
1613  /* coarser grid index ranges and dimensions */
1614  const int ia2 = e2hgrid->i0; /* lowest x-index */
1615  const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */
1616  const int ja2 = e2hgrid->j0; /* lowest y-index */
1617  const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */
1618  const int ka2 = e2hgrid->k0; /* lowest z-index */
1619  const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */
1620 
1621  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1622  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1623  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1624 
1625  const int nstencil = Nstencil[msm->approx];
1626  const int *offset = IndexOffset[msm->approx];
1627  const float *phi = PhiStencil[msm->approx];
1628 
1629  float cjk;
1630 
1631  int i1, j1, k1, index1, jk1off, k1off;
1632  int i2, j2, k2;
1633  int index2;
1634  int i, j, k;
1635 
1636  for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1637  k1 = k2 * 2;
1638  for (j2 = ja2; j2 <= jb2; j2++) {
1639  j1 = j2 * 2;
1640  for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1641  i1 = i2 * 2;
1642 
1643  for (k = 0; k < nstencil; k++) {
1644  k1off = k1 + offset[k];
1645  if (k1off < ka1) {
1646  if (ispz) do { k1off += nk1; } while (k1off < ka1);
1647  else continue;
1648  }
1649  else if (k1off > kb1) {
1650  if (ispz) do { k1off -= nk1; } while (k1off > kb1);
1651  else break;
1652  }
1653  k1off *= nj1;
1654  for (j = 0; j < nstencil; j++) {
1655  jk1off = j1 + offset[j];
1656  if (jk1off < ja1) {
1657  if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
1658  else continue;
1659  }
1660  else if (jk1off > jb1) {
1661  if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
1662  else break;
1663  }
1664  jk1off = (k1off + jk1off) * ni1;
1665  cjk = phi[j] * phi[k];
1666  for (i = 0; i < nstencil; i++) {
1667  index1 = i1 + offset[i];
1668  if (index1 < ia1) {
1669  if (ispx) do { index1 += ni1; } while (index1 < ia1);
1670  else continue;
1671  }
1672  else if (index1 > ib1) {
1673  if (ispx) do { index1 -= ni1; } while (index1 > ib1);
1674  else break;
1675  }
1676  index1 += jk1off;
1677  eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
1678  }
1679  }
1680  } /* end loop over finer grid stencil */
1681 
1682  }
1683  }
1684  } /* end loop over each coarser grid point */
1685 
1686  return NL_MSM_SUCCESS;
1687 }
1688 
1689 
1690 int gridcutoff(NL_Msm *msm, int level)
1691 {
1692  float eh_sum;
1693 
1694  /* grids of charge and potential */
1695  const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
1696  const float *qh = qhgrid->data;
1697  NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
1698  float *eh = ehgrid->data;
1699  const int ia = qhgrid->i0; /* lowest x-index */
1700  const int ib = ia + qhgrid->ni - 1; /* highest x-index */
1701  const int ja = qhgrid->j0; /* lowest y-index */
1702  const int jb = ja + qhgrid->nj - 1; /* highest y-index */
1703  const int ka = qhgrid->k0; /* lowest z-index */
1704  const int kb = ka + qhgrid->nk - 1; /* highest z-index */
1705  const int ni = qhgrid->ni; /* length along x-dim */
1706  const int nj = qhgrid->nj; /* length along y-dim */
1707  const int nk = qhgrid->nk; /* length along z-dim */
1708 
1709  /* grid of weights for pairwise grid point interactions within cutoff */
1710  const NL_Msmgrid_float *gcgrid = &(msm->gc_f[level]);
1711  const float *gc = gcgrid->data;
1712  const int gia = gcgrid->i0; /* lowest x-index */
1713  const int gib = gia + gcgrid->ni - 1; /* highest x-index */
1714  const int gja = gcgrid->j0; /* lowest y-index */
1715  const int gjb = gja + gcgrid->nj - 1; /* highest y-index */
1716  const int gka = gcgrid->k0; /* lowest z-index */
1717  const int gkb = gka + gcgrid->nk - 1; /* highest z-index */
1718  const int gni = gcgrid->ni; /* length along x-dim */
1719  const int gnj = gcgrid->nj; /* length along y-dim */
1720 
1721  const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
1722  const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
1723  const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
1724 
1725  const int ispnone = !(ispx || ispy || ispz);
1726 
1727  int i, j, k;
1728  int gia_clip, gib_clip;
1729  int gja_clip, gjb_clip;
1730  int gka_clip, gkb_clip;
1731  int koff;
1732  long jkoff, index;
1733  int id, jd, kd;
1734  int knoff;
1735  long jknoff, nindex;
1736  int kgoff, jkgoff, ngindex;
1737 
1738 #ifdef NL_DEBUG
1739  if (level==0) {
1740  const float *gcbuf = gcgrid->buffer;
1741  int index = 0;
1742  printf("+++ qh[0,0,0] = %g\n", qh[0]);
1743 #if 0
1744  for (k = gka; k <= gkb; k++) {
1745  for (j = gja; j <= gjb; j++) {
1746  for (i = gia; i <= gib; i++, index++) {
1747  printf("=== gc[%d,%d,%d] = %g\n", i, j, k, gcbuf[index]);
1748  }
1749  }
1750  }
1751 #endif
1752  }
1753 #endif
1754 
1755  if ( ispnone ) { /* non-periodic boundaries */
1756 
1757  /* loop over all grid points */
1758  for (k = ka; k <= kb; k++) {
1759 
1760  /* clip gc ranges to keep offset for k index within grid */
1761  gka_clip = (k + gka < ka ? ka - k : gka);
1762  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1763 
1764  koff = k * nj; /* find eh flat index */
1765 
1766  for (j = ja; j <= jb; j++) {
1767 
1768  /* clip gc ranges to keep offset for j index within grid */
1769  gja_clip = (j + gja < ja ? ja - j : gja);
1770  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1771 
1772  jkoff = (koff + j) * ni; /* find eh flat index */
1773 
1774  for (i = ia; i <= ib; i++) {
1775 
1776  /* clip gc ranges to keep offset for i index within grid */
1777  gia_clip = (i + gia < ia ? ia - i : gia);
1778  gib_clip = (i + gib > ib ? ib - i : gib);
1779 
1780  index = jkoff + i; /* eh flat index */
1781 
1782  /* sum over "sphere" of weighted charge */
1783  eh_sum = 0;
1784  for (kd = gka_clip; kd <= gkb_clip; kd++) {
1785  knoff = (k + kd) * nj; /* find qh flat index */
1786  kgoff = kd * gnj; /* find gc flat index */
1787 
1788  for (jd = gja_clip; jd <= gjb_clip; jd++) {
1789  jknoff = (knoff + (j + jd)) * ni; /* find qh flat index */
1790  jkgoff = (kgoff + jd) * gni; /* find gc flat index */
1791 
1792  for (id = gia_clip; id <= gib_clip; id++) {
1793  nindex = jknoff + (i + id); /* qh flat index */
1794  ngindex = jkgoff + id; /* gc flat index */
1795 
1796  GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd);
1797  ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex);
1798 
1799  GRID_INDEX_CHECK(gcgrid, id, jd, kd);
1800  ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
1801 
1802  eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */
1803  }
1804  }
1805  } /* end loop over "sphere" of charge */
1806 
1807  GRID_INDEX_CHECK(ehgrid, i, j, k);
1808  ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
1809  eh[index] = eh_sum; /* store potential */
1810  }
1811  }
1812  } /* end loop over all grid points */
1813 
1814  } /* if nonperiodic boundaries */
1815  else {
1816  /* some boundary is periodic */
1817  int ilo, jlo, klo;
1818  int ip, jp, kp;
1819 
1820  /* loop over all grid points */
1821  for (k = ka; k <= kb; k++) {
1822  klo = k + gka;
1823  if ( ! ispz ) { /* nonperiodic z */
1824  /* clip gc ranges to keep offset for k index within grid */
1825  gka_clip = (k + gka < ka ? ka - k : gka);
1826  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1827  if (klo < ka) klo = ka; /* keep lowest qh index within grid */
1828  }
1829  else { /* periodic z */
1830  gka_clip = gka;
1831  gkb_clip = gkb;
1832  if (klo < ka) do { klo += nk; } while (klo < ka);
1833  }
1834  ASSERT(klo <= kb);
1835 
1836  koff = k * nj; /* find eh flat index */
1837 
1838  for (j = ja; j <= jb; j++) {
1839  jlo = j + gja;
1840  if ( ! ispy ) { /* nonperiodic y */
1841  /* clip gc ranges to keep offset for j index within grid */
1842  gja_clip = (j + gja < ja ? ja - j : gja);
1843  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1844  if (jlo < ja) jlo = ja; /* keep lowest qh index within grid */
1845  }
1846  else { /* periodic y */
1847  gja_clip = gja;
1848  gjb_clip = gjb;
1849  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1850  }
1851  ASSERT(jlo <= jb);
1852 
1853  jkoff = (koff + j) * ni; /* find eh flat index */
1854 
1855  for (i = ia; i <= ib; i++) {
1856  ilo = i + gia;
1857  if ( ! ispx ) { /* nonperiodic x */
1858  /* clip gc ranges to keep offset for i index within grid */
1859  gia_clip = (i + gia < ia ? ia - i : gia);
1860  gib_clip = (i + gib > ib ? ib - i : gib);
1861  if (ilo < ia) ilo = ia; /* keep lowest qh index within grid */
1862  }
1863  else { /* periodic x */
1864  gia_clip = gia;
1865  gib_clip = gib;
1866  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1867  }
1868  ASSERT(ilo <= ib);
1869 
1870  index = jkoff + i; /* eh flat index */
1871 
1872  /* sum over "sphere" of weighted charge */
1873  eh_sum = 0;
1874  for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
1875  /* clipping makes conditional always fail for nonperiodic */
1876  if (kp > kb) kp = ka; /* wrap z direction */
1877  knoff = kp * nj; /* find qh flat index */
1878  kgoff = kd * gnj; /* find gc flat index */
1879 
1880  for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
1881  /* clipping makes conditional always fail for nonperiodic */
1882  if (jp > jb) jp = ja; /* wrap y direction */
1883  jknoff = (knoff + jp) * ni; /* find qh flat index */
1884  jkgoff = (kgoff + jd) * gni; /* find gc flat index */
1885 
1886  for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) {
1887  /* clipping makes conditional always fail for nonperiodic */
1888  if (ip > ib) ip = ia; /* wrap x direction */
1889  nindex = jknoff + ip; /* qh flat index */
1890  ngindex = jkgoff + id; /* gc flat index */
1891 
1892  GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
1893  ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex);
1894 
1895  GRID_INDEX_CHECK(gcgrid, id, jd, kd);
1896  ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
1897 
1898  eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */
1899 
1900  }
1901  }
1902  } /* end loop over "sphere" of charge */
1903 
1904  GRID_INDEX_CHECK(ehgrid, i, j, k);
1905  ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
1906  eh[index] = eh_sum; /* store potential */
1907  }
1908  }
1909  } /* end loop over all grid points */
1910 
1911  } /* else some boundary is periodic */
1912 
1913  return NL_MSM_SUCCESS;
1914 }
double wkf_timer_time(wkf_timerhandle v)
Definition: wkfutils.c:134
static int interpolation(NL_Msm *)
int msmflags
Definition: msm_defn.h:590
static const int PolyDegree[NL_MSM_APPROX_END]
static int prolongation_factored(NL_Msm *, int level)
int approx
Definition: msm_defn.h:642
#define GRID_INDEX_CHECK(a, _i, _j, _k)
Definition: msm_defn.h:114
const float * atom_f
Definition: msm_defn.h:572
static int gridcutoff(NL_Msm *, int level)
int numatoms
Definition: msm_defn.h:599
float gz_f
Definition: msm_defn.h:648
static const int Nstencil[NL_MSM_APPROX_END]
static const float PhiStencilFactored[NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1]
float hy_f
Definition: msm_defn.h:637
static int prolongation(NL_Msm *, int level)
float hx_f
Definition: msm_defn.h:637
void wkf_timer_stop(wkf_timerhandle v)
Definition: wkfutils.c:129
double uelec
Definition: msm_defn.h:566
float * lyzd_f
Definition: msm_defn.h:681
NL_Msmgrid_float * gc_f
Definition: msm_defn.h:672
int NL_msm_compute_long_range_sprec(NL_Msm *pm)
#define GRID_ZERO(_p)
Definition: msm_defn.h:98
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
#define GRID_INDEX(_p, _i, _j, _k)
Definition: msm_defn.h:66
float * felec_f
Definition: msm_defn.h:571
float hz_f
Definition: msm_defn.h:637
#define ASSERT(E)
wkf_timerhandle timer_longrng
Definition: msm_defn.h:685
NL_Msmgrid_float * eh_f
Definition: msm_defn.h:671
static int anterpolation(NL_Msm *)
static const float PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
#define STENCIL_1D(_phi, _delta, _approx)
static int restriction(NL_Msm *, int level)
float gx_f
Definition: msm_defn.h:648
NL_Msmgrid_float * qh_f
Definition: msm_defn.h:670
void wkf_timer_start(wkf_timerhandle v)
Definition: wkfutils.c:124
int nlevels
Definition: msm_defn.h:645
static int restriction_factored(NL_Msm *, int level)
float gy_f
Definition: msm_defn.h:648
float gzero_f
Definition: msm_defn.h:651
float * lzd_f
Definition: msm_defn.h:680
int report_timings
Definition: msm_defn.h:683