NAMD
msm_defn.h
Go to the documentation of this file.
1 /* msm_defn.h */
2 
3 #ifndef NL_MSM_DEFN_H
4 #define NL_MSM_DEFN_H
5 
6 /* #define TEST_INLINING */
7 
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <math.h>
12 
13 #ifdef WIN32
14 #define strcasecmp(s,t) stricmp(s,t)
15 #define strncasecmp(s,t,n) strnicmp(s,t,n)
16 #else
17 #include <strings.h>
18 #endif
19 
20 #include "msm.h"
21 
22 #include "wkfutils.h" /* timers from John Stone */
23 
24 /* avoid parameter name collisions with AIX5 "hz" macro */
25 #undef hz
26 
28 #undef X
29 #define X 0
30 #undef Y
31 #define Y 1
32 #undef Z
33 #define Z 2
34 #undef Q
35 #define Q 3
36 
38 #undef NELEMS
39 #define NELEMS(arr) (sizeof(arr)/sizeof(arr[0]))
40 
42 #undef GRID_TEMPLATE
43 #define GRID_TEMPLATE(TYPE) \
44  typedef struct NL_Msmgrid_##TYPE##_t { \
45  TYPE *buffer; /* raw buffer */ \
46  TYPE *data; /* data access offset from buffer */ \
47  size_t numbytes; /* number of bytes in use by buffer */ \
48  size_t maxbytes; /* actual allocation for buffer */ \
49  int i0, j0, k0; /* starting index value for each dimension */ \
50  int ni, nj, nk; /* number of elements in each dimension */ \
51  } NL_Msmgrid_##TYPE
52  /* expect closing ';' */
53 
55 #define GRID_INIT(_p) \
56  ((_p)->buffer=NULL, (_p)->data=NULL, (_p)->numbytes=0, (_p)->maxbytes=0, \
57  (_p)->i0=0, (_p)->j0=0, (_p)->k0=0, (_p)->ni=0, (_p)->nj=0, (_p)->nk=0)
58  /* expect closing ';' */
59 
61 #define GRID_DONE(_p) \
62  free((_p)->buffer)
63  /* expect closing ';' */
64 
66 #define GRID_INDEX(_p, _i, _j, _k) \
67  (((_k)*((_p)->nj) + (_j))*((_p)->ni) + (_i))
68  /* expect closing ';' */
69 
71 #define GRID_POINTER(_p, _i, _j, _k) \
72  ((_p)->data + GRID_INDEX(_p, _i, _j, _k))
73  /* expect closing ';' */
74 
77 #define GRID_RESIZE(_p, TYPE, __i0, __ni, __j0, __nj, __k0, __nk) \
78  do { \
79  int _i0=(__i0), _ni=(__ni); \
80  int _j0=(__j0), _nj=(__nj); \
81  int _k0=(__k0), _nk=(__nk); \
82  size_t _numbytes = (_nk * _nj) * (size_t) _ni * sizeof((_p)->buffer[0]); \
83  if ((_p)->maxbytes < _numbytes) { \
84  void *_t = realloc((_p)->buffer, _numbytes); \
85  if (NULL == _t) return NL_MSM_ERROR_MALLOC; \
86  (_p)->buffer = (TYPE *) _t; \
87  (_p)->maxbytes = _numbytes; \
88  } \
89  (_p)->numbytes = _numbytes; \
90  (_p)->i0 = _i0, (_p)->ni = _ni; \
91  (_p)->j0 = _j0, (_p)->nj = _nj; \
92  (_p)->k0 = _k0, (_p)->nk = _nk; \
93  (_p)->data = (_p)->buffer + GRID_INDEX((_p), -_i0, -_j0, -_k0); \
94  } while (0)
95  /* expect closing ';' */
96 
97 /* reset 3D grid data to 0 */
98 #define GRID_ZERO(_p) \
99  memset((_p)->buffer, 0, (_p)->numbytes) /* ; */
100 
101 /* check 3D grid index range when debugging
102  * (must be used within function returning int) */
103 #ifdef MSM_DEBUG
104 #define ASSERT(expr)
105 #define GRID_INDEX_CHECK(a, _i, _j, _k) \
106  do { \
107  ASSERT((a)->i0 <= (_i) && (_i) < (a)->ni + (a)->i0); \
108  ASSERT((a)->j0 <= (_j) && (_j) < (a)->nj + (a)->j0); \
109  ASSERT((a)->k0 <= (_k) && (_k) < (a)->nk + (a)->k0); \
110  } while (0)
111  /* expect closing ';' */
112 #else
113 #define ASSERT(expr)
114 #define GRID_INDEX_CHECK(a, _i, _j, _k)
115 #endif
116 
117 
119 #define DEFAULT_GRIDSPACING 2.5
120 #define DEFAULT_APPROX NL_MSM_APPROX_CUBIC
121 #define DEFAULT_SPLIT NL_MSM_SPLIT_TAYLOR2
122 #define DEFAULT_NLEVELS 0 /* set number of levels as needed */
123 
124 #define DEFAULT_DENSITY 0.1
125 #define DEFAULT_BINFILL 0.8
126 #define DEFAULT_NBINSLOTS 8
127 
128 
139 #undef SPOLY
140 #define SPOLY(pg, pdg, ra, split) \
141  do { \
142  double _r = ra; /* _r=(r/a) */ \
143  double _s = _r*_r; /* _s=(r/a)^2 */ \
144  double _g = 0, _dg = 0; \
145  ASSERT(0 <= _s && _s <= 1); \
146  switch (split) { \
147  /* from section 5.1 of thesis */ \
148  case NL_MSM_SPLIT_TAYLOR2: \
149  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8)); \
150  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4)); \
151  break; \
152  case NL_MSM_SPLIT_TAYLOR3: \
153  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16))); \
154  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16))); \
155  break; \
156  case NL_MSM_SPLIT_TAYLOR4: \
157  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \
158  + (_s-1)*(35./128)))); \
159  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 \
160  + (_s-1)*(35./32)))); \
161  break; \
162  case NL_MSM_SPLIT_TAYLOR5: \
163  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \
164  + (_s-1)*(35./128 + (_s-1)*(-63./256))))); \
165  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \
166  + (_s-1)*(-315./256))))); \
167  break; \
168  case NL_MSM_SPLIT_TAYLOR6: \
169  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \
170  + (_s-1)*(35./128 + (_s-1)*(-63./256 \
171  + (_s-1)*(231./1024)))))); \
172  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \
173  + (_s-1)*(-315./256 + (_s-1)*(693./512)))))); \
174  break; \
175  case NL_MSM_SPLIT_TAYLOR7: \
176  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \
177  + (_s-1)*(35./128 + (_s-1)*(-63./256 \
178  + (_s-1)*(231./1024 + (_s-1)*(-429./2048))))))); \
179  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \
180  + (_s-1)*(-315./256 + (_s-1)*(693./512 \
181  + (_s-1)*(-3003./2048))))))); \
182  break; \
183  case NL_MSM_SPLIT_TAYLOR8: \
184  _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \
185  + (_s-1)*(35./128 + (_s-1)*(-63./256 \
186  + (_s-1)*(231./1024 + (_s-1)*(-429./2048 \
187  + (_s-1)*(6435./32768)))))))); \
188  _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \
189  + (_s-1)*(-315./256 + (_s-1)*(693./512 \
190  + (_s-1)*(-3003./2048 + (_s-1)*(6435./4096)))))))); \
191  break; \
192  case NL_MSM_SPLIT_TAYLOR1: \
193  _g = 1 + (_s-1)*(-1./2); \
194  _dg = (2*_r)*(-1./2); \
195  break; \
196  /* from section 5.2 of thesis */ \
197  case NL_MSM_SPLIT_SIGMA2_3: /* the "perfect" smoothing */ \
198  _g = 2 + _s*(-2 + _r); \
199  _dg = _r*(-4 + _r*3); \
200  break; \
201  case NL_MSM_SPLIT_SIGMA3_5: \
202  _g = 9./4 + _s*(-5./2 + _s*(9./4 - _r)); \
203  _dg = _r*(-5 + _s*(9 + _r*(-5))); \
204  break; \
205  case NL_MSM_SPLIT_SIGMA4_6: \
206  _g = 21./8 + _s*(-35./8 + _s*(63./8 + _r*(-7 + _r*(15./8)))); \
207  _dg = _r*(-35./4 + _s*(63./2 + _r*(-35 + _r*(45./4)))); \
208  break; \
209  case NL_MSM_SPLIT_SIGMA4_7: \
210  _g = 5./2 + _s*(-7./2 + _s*(7./2 + _s*(-5./2 + _r))); \
211  _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7)))); \
212  break; \
213  case NL_MSM_SPLIT_SIGMA5_8: \
214  _g = 45./16 + _s*(-21./4 + _s*(63./8 + _s*(-45./4 \
215  + _r*(9 + _r*(-35./16))))); \
216  _dg = _r*(-21./2 + _s*(63./2 + _s*(-135./2 \
217  + _r*(63 + _r*(-35./2))))); \
218  break; \
219  case NL_MSM_SPLIT_SIGMA5_9: \
220  _g = 175./64 + _s*(-75./16 + _s*(189./32 + _s*(-75./16 \
221  + _s*(175./64 - _r)))); \
222  _dg = _r*(-75./8 + _s*(189./8 + _s*(-225./8 + _s*(175./8 \
223  + _r*(-9))))); \
224  break; \
225  case NL_MSM_SPLIT_SIGMA6_9: \
226  _g = 25./8 + _s*(-15./2 + _s*(63./4 + _s*(-75./2 \
227  + _r*(45 + _r*(-175./8 + _r*4))))); \
228  _dg = _r*(-15 + _s*(63 + _s*(-225 \
229  + _r*(315 + _r*(-175 + _r*36))))); \
230  break; \
231  case NL_MSM_SPLIT_SIGMA6_10: \
232  _g = 385./128 + _s*(-825./128 + _s*(693./64 + _s*(-825./64 \
233  + _s*(1925./128 + _r*(-11 + _r*(315./128)))))); \
234  _dg = _r*(-825./64 + _s*(693./16 + _s*(-2475./32 \
235  + _s*(1925./16 + _r*(-99 + _r*(1575./64)))))); \
236  break; \
237  case NL_MSM_SPLIT_SIGMA6_11: \
238  _g = 189./64 + _s*(-385./64 + _s*(297./32 + _s*(-297./32 \
239  + _s*(385./64 + _s*(-189./64 + _r))))); \
240  _dg = _r*(-385./32 + _s*(297./8 + _s*(-891./16 + _s*(385./8 \
241  + _s*(-945./32 + _r*(11)))))); \
242  break; \
243  case NL_MSM_SPLIT_SIGMA7_11: \
244  _g = 105./32 + _s*(-275./32 + _s*(297./16 + _s*(-495./16 \
245  + _s*(1925./32 + _r*(-66 + _r*(945./32 + _r*(-5))))))); \
246  _dg = _r*(-275./16 + _s*(297./4 + _s*(-1485./8 \
247  + _s*(1925./4 + _r*(-594 + _r*(4725./16 + _r*(-55))))))); \
248  break; \
249  case NL_MSM_SPLIT_SIGMA7_12: \
250  _g = 819./256 + _s*(-1001./128 + _s*(3861./256 \
251  + _s*(-1287./64 + _s*(5005./256 + _s*(-2457./128 \
252  + _r*(13 + _r*(-693./256))))))); \
253  _dg = _r*(-1001./64 + _s*(3861./64 + _s*(-3861./32 \
254  + _s*(5005./32 + _s*(-12285./64 + _r*(143 \
255  + _r*(-2079./64))))))); \
256  break; \
257  case NL_MSM_SPLIT_SIGMA7_13: \
258  _g = 1617./512 + _s*(-1911./256 + _s*(7007./512 + _s*(-2145./128 \
259  + _s*(7007./512 + _s*(-1911./256 + _s*(1617./512 - _r))))));\
260  _dg = _r*(-1911./128 + _s*(7007./128 + _s*(-6435./64 + _s*(7007./64 \
261  + _s*(-9555./128 + _s*(4851./128 + _r*(-13))))))); \
262  break; \
263  case NL_MSM_SPLIT_SIGMA8_12: \
264  _g = 455./128 + _s*(-715./64 + _s*(3861./128 + _s*(-2145./32 \
265  + _s*(25025./128 + _r*(-286 + _r*(12285./64 + _r*(-65 \
266  + _r*(1155./128)))))))); \
267  _dg = _r*(-715./32 + _s*(3861./32 + _s*(-6435./16 \
268  + _s*(25025./16 + _r*(-2574 + _r*(61425./32 + _r*(-715 \
269  + _r*(3465./32)))))))); \
270  break; \
271  case NL_MSM_SPLIT_SIGMA8_13: \
272  _g = 441./128 + _s*(-637./64 + _s*(3003./128 \
273  + _s*(-1287./32 + _s*(7007./128 + _s*(-5733./64 \
274  + _r*(91 + _r*(-4851./128 + _r*(6)))))))); \
275  _dg = _r*(-637./32 + _s*(3003./32 + _s*(-3861./16 \
276  + _s*(7007./16 + _s*(-28665./32 + _r*(1001 \
277  + _r*(-14553./32 + _r*(78)))))))); \
278  break; \
279  case NL_MSM_SPLIT_SIGMA8_14: \
280  _g = 3465./1024 + _s*(-9555./1024 + _s*(21021./1024 \
281  + _s*(-32175./1024 + _s*(35035./1024 + _s*(-28665./1024 \
282  + _s*(24255./1024 + _r*(-15 + _r*(3003./1024)))))))); \
283  _dg = _r*(-9555./512 + _s*(21021./256 + _s*(-96525./512 \
284  + _s*(35035./128 + _s*(-143325./512 + _s*(72765./256 \
285  + _r*(-195 + _r*(21021./512)))))))); \
286  break; \
287  case NL_MSM_SPLIT_SIGMA8_15: \
288  _g = 429./128 + _s*(-1155./128 + _s*(2457./128 + _s*(-3575./128 \
289  + _s*(3575./128 + _s*(-2457./128 + _s*(1155./128 \
290  + _s*(-429./128 + _r))))))); \
291  _dg = _r*(-1155./64 + _s*(2457./32 + _s*(-10725./64 \
292  + _s*(3575./16 + _s*(-12285./64 + _s*(3465./32 \
293  + _s*(-3003./64 + _r*(15)))))))); \
294  break; \
295  /* from section 5.3 of thesis */ \
296  case NL_MSM_SPLIT_SIGMA2_6: \
297  _g = (31./16) + _s*(-23./16 + _s*(9./16 + _s*(-1./16))); \
298  _dg = (2*_r)*(-23./16 + _s*(9./8 + _s*(-3./16))); \
299  break; \
300  /* from section 5.4 of thesis */ \
301  case NL_MSM_SPLIT_SWITCH1_2: \
302  if (_r > 1./2) { \
303  _g = 5./3 + _r + _s*(-3 + _r*(4./3)); \
304  _dg = 1 + _r*(-6 + _r*(4)); \
305  } \
306  else { \
307  _g = 11./6 - _s; \
308  _dg = _r*(-2); \
309  } \
310  break; \
311  case NL_MSM_SPLIT_SWITCH3_4: \
312  if (_r > 3./4) { \
313  _g = 5./7 + _r*(27./7 + _r*(-41./7 + _r*(16./7))); \
314  _dg = 27./7 + _r*(-82./7 + _r*(48./7)); \
315  } \
316  else { \
317  _g = 47./28 + _s*(-5./7); \
318  _dg = _r*(-10./7); \
319  } \
320  break; \
321  case NL_MSM_SPLIT_SWITCH7_8: \
322  if (_r > 7./8) { \
323  _g = -19./15 + _r*(49./5 + _r*(-59./5 + _r*(64./15))); \
324  _dg = 49./5 + _r*(-118./5 + _r*(64./5)); \
325  } \
326  else { \
327  _g = 191./120 + _s*(-3./5); \
328  _dg = _r*(-6./5); \
329  } \
330  break; \
331  default: \
332  return NL_MSM_ERROR_SUPPORT; \
333  } \
334  *(pg) = _g; \
335  *(pdg) = _dg; \
336  } while (0)
337  /* closing ';' from use as function call */
338 
339 
350 #undef SPOLY_SPREC
351 #define SPOLY_SPREC(pg, pdg, ra, split) \
352  do { \
353  float _r = ra; /* _r=(r/a) */ \
354  float _s = _r*_r; /* _s=(r/a)^2 */ \
355  float _g = 0, _dg = 0; \
356  ASSERT(0 <= _s && _s <= 1); \
357  switch (split) { \
358  /* from section 5.1 of thesis */ \
359  case NL_MSM_SPLIT_TAYLOR2: \
360  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8)); \
361  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4)); \
362  break; \
363  case NL_MSM_SPLIT_TAYLOR3: \
364  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16))); \
365  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16))); \
366  break; \
367  case NL_MSM_SPLIT_TAYLOR4: \
368  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
369  + (_s-1)*(35.f/128)))); \
370  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
371  + (_s-1)*(35.f/32)))); \
372  break; \
373  case NL_MSM_SPLIT_TAYLOR5: \
374  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
375  + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256))))); \
376  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
377  + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256))))); \
378  break; \
379  case NL_MSM_SPLIT_TAYLOR6: \
380  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
381  + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \
382  + (_s-1)*(231.f/1024)))))); \
383  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
384  + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 \
385  + (_s-1)*(693.f/512)))))); \
386  break; \
387  case NL_MSM_SPLIT_TAYLOR7: \
388  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
389  + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \
390  + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048))))))); \
391  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
392  + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512 \
393  + (_s-1)*(-3003.f/2048))))))); \
394  break; \
395  case NL_MSM_SPLIT_TAYLOR8: \
396  _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \
397  + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \
398  + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048 \
399  + (_s-1)*(6435.f/32768)))))))); \
400  _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
401  + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512 \
402  + (_s-1)*(-3003.f/2048 + (_s-1)*(6435.f/4096)))))))); \
403  break; \
404  case NL_MSM_SPLIT_TAYLOR1: \
405  _g = 1 + (_s-1)*(-1.f/2); \
406  _dg = (2*_r)*(-1.f/2); \
407  break; \
408  /* from section 5.2 of thesis */ \
409  case NL_MSM_SPLIT_SIGMA2_3: /* the "perfect" smoothing */ \
410  _g = 2 + _s*(-2 + _r); \
411  _dg = _r*(-4 + _r*3); \
412  break; \
413  case NL_MSM_SPLIT_SIGMA3_5: \
414  _g = 9.f/4 + _s*(-5.f/2 + _s*(9.f/4 - _r)); \
415  _dg = _r*(-5 + _s*(9 + _r*(-5))); \
416  break; \
417  case NL_MSM_SPLIT_SIGMA4_6: \
418  _g = 21.f/8 + _s*(-35.f/8 + _s*(63.f/8 + _r*(-7 + _r*(15.f/8)))); \
419  _dg = _r*(-35.f/4 + _s*(63.f/2 + _r*(-35 + _r*(45.f/4)))); \
420  break; \
421  case NL_MSM_SPLIT_SIGMA4_7: \
422  _g = 5.f/2 + _s*(-7.f/2 + _s*(7.f/2 + _s*(-5.f/2 + _r))); \
423  _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7)))); \
424  break; \
425  case NL_MSM_SPLIT_SIGMA5_8: \
426  _g = 45.f/16 + _s*(-21.f/4 + _s*(63.f/8 + _s*(-45.f/4 \
427  + _r*(9 + _r*(-35.f/16))))); \
428  _dg = _r*(-21.f/2 + _s*(63.f/2 + _s*(-135.f/2 \
429  + _r*(63 + _r*(-35.f/2))))); \
430  break; \
431  case NL_MSM_SPLIT_SIGMA5_9: \
432  _g = 175.f/64 + _s*(-75.f/16 + _s*(189.f/32 + _s*(-75.f/16 \
433  + _s*(175.f/64 - _r)))); \
434  _dg = _r*(-75.f/8 + _s*(189.f/8 + _s*(-225.f/8 + _s*(175.f/8 \
435  + _r*(-9))))); \
436  break; \
437  case NL_MSM_SPLIT_SIGMA6_9: \
438  _g = 25.f/8 + _s*(-15.f/2 + _s*(63.f/4 + _s*(-75.f/2 \
439  + _r*(45 + _r*(-175.f/8 + _r*4))))); \
440  _dg = _r*(-15 + _s*(63 + _s*(-225 \
441  + _r*(315 + _r*(-175 + _r*36))))); \
442  break; \
443  case NL_MSM_SPLIT_SIGMA6_10: \
444  _g = 385.f/128 + _s*(-825.f/128 + _s*(693.f/64 + _s*(-825.f/64 \
445  + _s*(1925.f/128 + _r*(-11 + _r*(315.f/128)))))); \
446  _dg = _r*(-825.f/64 + _s*(693.f/16 + _s*(-2475.f/32 \
447  + _s*(1925.f/16 + _r*(-99 + _r*(1575.f/64)))))); \
448  break; \
449  case NL_MSM_SPLIT_SIGMA6_11: \
450  _g = 189.f/64 + _s*(-385.f/64 + _s*(297.f/32 + _s*(-297.f/32 \
451  + _s*(385.f/64 + _s*(-189.f/64 + _r))))); \
452  _dg = _r*(-385.f/32 + _s*(297.f/8 + _s*(-891.f/16 + _s*(385.f/8 \
453  + _s*(-945.f/32 + _r*(11)))))); \
454  break; \
455  case NL_MSM_SPLIT_SIGMA7_11: \
456  _g = 105.f/32 + _s*(-275.f/32 + _s*(297.f/16 + _s*(-495.f/16 \
457  + _s*(1925.f/32 + _r*(-66 + _r*(945.f/32 + _r*(-5))))))); \
458  _dg = _r*(-275.f/16 + _s*(297.f/4 + _s*(-1485.f/8 \
459  + _s*(1925.f/4 + _r*(-594 + _r*(4725.f/16 + _r*(-55))))))); \
460  break; \
461  case NL_MSM_SPLIT_SIGMA7_12: \
462  _g = 819.f/256 + _s*(-1001.f/128 + _s*(3861.f/256 \
463  + _s*(-1287.f/64 + _s*(5005.f/256 + _s*(-2457.f/128 \
464  + _r*(13 + _r*(-693.f/256))))))); \
465  _dg = _r*(-1001.f/64 + _s*(3861.f/64 + _s*(-3861.f/32 \
466  + _s*(5005.f/32 + _s*(-12285.f/64 + _r*(143 \
467  + _r*(-2079.f/64))))))); \
468  break; \
469  case NL_MSM_SPLIT_SIGMA7_13: \
470  _g = 1617.f/512 + _s*(-1911.f/256 + _s*(7007.f/512 + _s*(-2145.f/128 \
471  + _s*(7007.f/512 + _s*(-1911.f/256 + _s*(1617.f/512 - _r))))));\
472  _dg = _r*(-1911.f/128 + _s*(7007.f/128 + _s*(-6435.f/64 + _s*(7007.f/64\
473  + _s*(-9555.f/128 + _s*(4851.f/128 + _r*(-13))))))); \
474  break; \
475  case NL_MSM_SPLIT_SIGMA8_12: \
476  _g = 455.f/128 + _s*(-715.f/64 + _s*(3861.f/128 + _s*(-2145.f/32 \
477  + _s*(25025.f/128 + _r*(-286 + _r*(12285.f/64 + _r*(-65 \
478  + _r*(1155.f/128)))))))); \
479  _dg = _r*(-715.f/32 + _s*(3861.f/32 + _s*(-6435.f/16 \
480  + _s*(25025.f/16 + _r*(-2574 + _r*(61425.f/32 + _r*(-715 \
481  + _r*(3465.f/32)))))))); \
482  break; \
483  case NL_MSM_SPLIT_SIGMA8_13: \
484  _g = 441.f/128 + _s*(-637.f/64 + _s*(3003.f/128 \
485  + _s*(-1287.f/32 + _s*(7007.f/128 + _s*(-5733.f/64 \
486  + _r*(91 + _r*(-4851.f/128 + _r*(6)))))))); \
487  _dg = _r*(-637.f/32 + _s*(3003.f/32 + _s*(-3861.f/16 \
488  + _s*(7007.f/16 + _s*(-28665.f/32 + _r*(1001 \
489  + _r*(-14553.f/32 + _r*(78)))))))); \
490  break; \
491  case NL_MSM_SPLIT_SIGMA8_14: \
492  _g = 3465.f/1024 + _s*(-9555.f/1024 + _s*(21021.f/1024 \
493  + _s*(-32175.f/1024 + _s*(35035.f/1024 + _s*(-28665.f/1024 \
494  + _s*(24255.f/1024 + _r*(-15 + _r*(3003.f/1024)))))))); \
495  _dg = _r*(-9555.f/512 + _s*(21021.f/256 + _s*(-96525.f/512 \
496  + _s*(35035.f/128 + _s*(-143325.f/512 + _s*(72765.f/256 \
497  + _r*(-195 + _r*(21021.f/512)))))))); \
498  break; \
499  case NL_MSM_SPLIT_SIGMA8_15: \
500  _g = 429.f/128 + _s*(-1155.f/128 + _s*(2457.f/128 + _s*(-3575.f/128 \
501  + _s*(3575.f/128 + _s*(-2457.f/128 + _s*(1155.f/128 \
502  + _s*(-429.f/128 + _r))))))); \
503  _dg = _r*(-1155.f/64 + _s*(2457.f/32 + _s*(-10725.f/64 \
504  + _s*(3575.f/16 + _s*(-12285.f/64 + _s*(3465.f/32 \
505  + _s*(-3003.f/64 + _r*(15)))))))); \
506  break; \
507  /* from section 5.3 of thesis */ \
508  case NL_MSM_SPLIT_SIGMA2_6: \
509  _g = (31.f/16) + _s*(-23.f/16 + _s*(9.f/16 + _s*(-1.f/16))); \
510  _dg = (2*_r)*(-23.f/16 + _s*(9.f/8 + _s*(-3.f/16))); \
511  break; \
512  /* from section 5.4 of thesis */ \
513  case NL_MSM_SPLIT_SWITCH1_2: \
514  if (_r > 1.f/2) { \
515  _g = 5.f/3 + _r + _s*(-3 + _r*(4.f/3)); \
516  _dg = 1 + _r*(-6 + _r*(4)); \
517  } \
518  else { \
519  _g = 11.f/6 - _s; \
520  _dg = _r*(-2); \
521  } \
522  break; \
523  case NL_MSM_SPLIT_SWITCH3_4: \
524  if (_r > 3.f/4) { \
525  _g = 5.f/7 + _r*(27.f/7 + _r*(-41.f/7 + _r*(16.f/7))); \
526  _dg = 27.f/7 + _r*(-82.f/7 + _r*(48.f/7)); \
527  } \
528  else { \
529  _g = 47.f/28 + _s*(-5.f/7); \
530  _dg = _r*(-10.f/7); \
531  } \
532  break; \
533  case NL_MSM_SPLIT_SWITCH7_8: \
534  if (_r > 7.f/8) { \
535  _g = -19.f/15 + _r*(49.f/5 + _r*(-59.f/5 + _r*(64.f/15))); \
536  _dg = 49.f/5 + _r*(-118.f/5 + _r*(64.f/5)); \
537  } \
538  else { \
539  _g = 191.f/120 + _s*(-3.f/5); \
540  _dg = _r*(-6.f/5); \
541  } \
542  break; \
543  default: \
544  return NL_MSM_ERROR_SUPPORT; \
545  } \
546  *(pg) = _g; \
547  *(pdg) = _dg; \
548  } while (0)
549  /* closing ';' from use as function call */
550 
551 
552 #ifdef __cplusplus
553 extern "C" {
554 #endif
555 
556 
557  /* creates type NL_Msmgrid_double */
558  GRID_TEMPLATE(double); /* for MSM charge and potential grids */
559 
560  /* creates type NL_Msmgrid_float */
561  GRID_TEMPLATE(float); /* single prec MSM charge and potential grids */
562 
563 
565  struct NL_Msm_t {
566  double uelec; /* calculate electrostatic potential energy */
567 
568  double *felec; /* calculate electrostatic forces x/y/z/... */
569  const double *atom; /* the original atom array stored x/y/z/q/... */
570 
571  float *felec_f; /* single prec elec forces x/y/z/... */
572  const float *atom_f; /* single prec atom array stored x/y/z/q/... */
573 
574  double cellvec1[3]; /* cell basis vector (along x) */
575  double cellvec2[3]; /* cell basis vector (along y) */
576  double cellvec3[3]; /* cell basis vector (along z) */
577  double cellcenter[3]; /* cell center */
578  double recipvec1[3]; /* reciprocal space row vector */
579  double recipvec2[3]; /* reciprocal space row vector */
580  double recipvec3[3]; /* reciprocal space row vector */
581 
582  float cellvec1_f[3]; /* single prec cell basis vector (along x) */
583  float cellvec2_f[3]; /* single prec cell basis vector (along y) */
584  float cellvec3_f[3]; /* single prec cell basis vector (along z) */
585  float cellcenter_f[3]; /* single prec cell center */
586  float recipvec1_f[3]; /* single prec reciprocal space row vector */
587  float recipvec2_f[3]; /* single prec reciprocal space row vector */
588  float recipvec3_f[3]; /* single prec reciprocal space row vector */
589 
590  int msmflags; /* bitwise flags from enum in msm.h */
591 
592  int numbins; /* number of bins (nbx * nby * nbz) */
593  int maxbins; /* maximum number of bins allocated */
594  int nbx; /* number of bins along first cell basis vector */
595  int nby; /* number of bins along second cell basis vector */
596  int nbz; /* number of bins along third cell basis vector */
597  int *bin; /* array length maxbins, index of first atom */
598 
599  int numatoms; /* number of atoms */
600  int maxatoms; /* maximum number of atoms allocated */
601  int *next; /* array length maxatoms, index next atom in bin */
602 
603  /* additional data members needed for k-away neighborhood */
604  double binfill; /* ratio of number of atoms per bin (default 0.8) */
605  double density; /* expected density of atoms (default 0.1) */
606  int nbinslots; /* how many atoms per bin (default 8) */
607 
608  double bu[3]; /* bin basis vector along u */
609  double bv[3]; /* bin basis vector along v */
610  double bw[3]; /* bin basis vector along w */
611 
612  int *nbrhd;
613  int nbrhdlen;
614  int nbrhdmax;
615  int nradius;
616 
617  /*
618  * Fundamental MSM parameters:
619  *
620  * Default grid spacing hmin = 2.5 A,
621  * C1 cubic interpolation, and C2 Taylor splitting,
622  * together give reasonable accuracy for atomistic systems
623  * using a cutoff between 8 and 12 A.
624  *
625  * Find grid spacings in range hmin <= hx, hy, hz <= (1.5 * hmin)
626  * such that along periodic dimensions the number of grid points is
627  * some power of 2 times one or zero powers of 3.
628  *
629  * For reasonable accuracy, maintain ratio 4 <= a/hx, a/hy, a/hz <= 6
630  * (and grid cutoff stencils fit into GPU constant memory cache).
631  */
632 
633  double gridspacing; /* smallest MSM grid spacing, hmax = 1.5 * hmin */
634  double hx, hy, hz; /* the finest level grid spacings */
635  double a; /* the MSM cutoff between short- and long-range */
636 
637  float hx_f, hy_f, hz_f;/* single prec finest level grid spacings */
638  float a_f; /* single prec MSM cutoff */
639 
640  int nx, ny, nz; /* count number of h spacings that cover cell */
641 
642  int approx; /* the approximation/interpolation: "MSM_APPROX_" */
643  int split; /* the splitting: "MSM_SPLIT_" */
644 
645  int nlevels; /* number of grid levels */
646 
647  double gx, gy, gz; /* coordinate of h-grid (0,0,0) point */
648  float gx_f, gy_f, gz_f;/* single prec coordinate of h-grid (0,0,0) point */
649 
650  double gzero; /* self energy factor from chosen splitting */
651  float gzero_f; /* single prec self energy factor */
652 
653  /*
654  * Grids for calculating long-range part:
655  * q[0] is finest-spaced grid (hx, hy, hz),
656  * grid level k has spacing 2^k * (hx, hy, hz).
657  *
658  * Use domain origin (px0, py0, pz0) for each grid origin, ao that
659  * q[0](i,j,k) has position (i*hx + px0, j*hy + py0, k*hz + pz0)
660  * the finest level grid is 0, e.g. q0 = qh[0].
661  *
662  * Grid indexes can be negative for non-periodic dimensions,
663  * the periodic dimensions wrap around edges of grid.
664  */
665 
666  NL_Msmgrid_double *qh; /* grids of charge, 0==finest */
667  NL_Msmgrid_double *eh; /* grids of potential, 0==finest */
668  NL_Msmgrid_double *gc; /* fixed-size grids of weights for convolution */
669 
670  NL_Msmgrid_float *qh_f;/* single prec grids of charge, 0==finest */
671  NL_Msmgrid_float *eh_f;/* single prec grids of potential, 0==finest */
672  NL_Msmgrid_float *gc_f;/* single prec fixed-size grids of weights */
673 
674  int maxlevels; /* maximum number of grid levels allocated */
675 
676  int max_lzd, max_lyzd; /* alloc length of factor temp buffer space */
677  double *lzd; /* factor temp row buffer, length z-dim of h-level */
678  double *lyzd; /* factor temp row buffer, (y*z)-dim of h-level */
679 
680  float *lzd_f; /* single prec factor temp row buffer, z-dim */
681  float *lyzd_f; /* single prec factor temp row buffer, (y*z)-dim */
682 
683  int report_timings; /* Do we report timings? */
684  wkf_timerhandle timer; /* timer from John Stone */
685  wkf_timerhandle timer_longrng; /* time individual long-range parts */
686 
687  /* CUDA atom cutoff kernel */
688 
689  /* CUDA grid cutoff kernel */
690  int lk_nlevels; /* number of levels for grid cutoff kernel */
691  int lk_srad; /* subcube radius for grid cutoff kernel */
692  int lk_padding; /* padding around internal array of subcubes */
693  int subcube_total; /* total number of subcubes for compressed grids */
694  int block_total; /* total number of thread blocks */
695  /*
696  * host_ --> memory allocated on host
697  * device_ --> global memory allocated on device
698  */
699  int *host_sinfo; /* subcube info copy to device const mem */
700  float *host_lfac; /* level factor copy to device const mem */
702 
703  float *host_wt; /* weights copy to device const mem */
705 
706  float *host_qgrids; /* q-grid subcubes copy to device global mem */
707  float *host_egrids; /* e-grid subcubes copy to device global mem */
708  float *device_qgrids; /* q-grid subcubes allocate on device */
709  float *device_egrids; /* e-grid subcubes allocate on device */
711  };
712 
713 
714  /* prototypes for internal use */
715  void NL_msm_cleanup(NL_Msm *pm);
718 
721 
722 #ifdef NL_USE_CUDA
723  /* CUDA routines for grid cutoff computation */
724  int NL_msm_cuda_setup_gridcutoff(NL_Msm *);
725  void NL_msm_cuda_cleanup_gridcutoff(NL_Msm *);
726 
727  int NL_msm_cuda_compute_gridcutoff(NL_Msm *);
728  int NL_msm_cuda_condense_qgrids(NL_Msm *);
729  int NL_msm_cuda_expand_egrids(NL_Msm *);
730 #endif
731 
732 
733 #ifdef __cplusplus
734 }
735 #endif
736 
737 #endif /* NL_MSM_DEFN_H */
float cellcenter_f[3]
Definition: msm_defn.h:585
double gz
Definition: msm_defn.h:647
int lk_maxwts
Definition: msm_defn.h:704
double recipvec2[3]
Definition: msm_defn.h:579
double recipvec3[3]
Definition: msm_defn.h:580
double gzero
Definition: msm_defn.h:650
int msmflags
Definition: msm_defn.h:590
NL_Msmgrid_double * eh
Definition: msm_defn.h:667
NL_Msmgrid_double * gc
Definition: msm_defn.h:668
double * felec
Definition: msm_defn.h:568
double cellvec2[3]
Definition: msm_defn.h:575
double bw[3]
Definition: msm_defn.h:610
int numbins
Definition: msm_defn.h:592
double * lzd
Definition: msm_defn.h:677
void * wkf_timerhandle
Definition: wkfutils.h:49
wkf_timerhandle timer
Definition: msm_defn.h:684
int approx
Definition: msm_defn.h:642
int * bin
Definition: msm_defn.h:597
const float * atom_f
Definition: msm_defn.h:572
float recipvec2_f[3]
Definition: msm_defn.h:587
int lk_srad
Definition: msm_defn.h:691
int numatoms
Definition: msm_defn.h:599
int lk_nlevels
Definition: msm_defn.h:690
float * device_egrids
Definition: msm_defn.h:709
float gz_f
Definition: msm_defn.h:648
double bu[3]
Definition: msm_defn.h:608
float cellvec1_f[3]
Definition: msm_defn.h:582
int * host_sinfo
Definition: msm_defn.h:699
float * host_qgrids
Definition: msm_defn.h:706
float hy_f
Definition: msm_defn.h:637
int subcube_total
Definition: msm_defn.h:693
int ny
Definition: msm_defn.h:640
double cellvec1[3]
Definition: msm_defn.h:574
int nradius
Definition: msm_defn.h:615
double density
Definition: msm_defn.h:605
double hy
Definition: msm_defn.h:634
float hx_f
Definition: msm_defn.h:637
double uelec
Definition: msm_defn.h:566
float * device_qgrids
Definition: msm_defn.h:708
double cellvec3[3]
Definition: msm_defn.h:576
int nbinslots
Definition: msm_defn.h:606
int NL_msm_compute_short_range_sprec(NL_Msm *pm)
#define GRID_TEMPLATE(TYPE)
Definition: msm_defn.h:43
float * lyzd_f
Definition: msm_defn.h:681
NL_Msmgrid_float * gc_f
Definition: msm_defn.h:672
int NL_msm_compute_long_range_sprec(NL_Msm *pm)
float * felec_f
Definition: msm_defn.h:571
double * lyzd
Definition: msm_defn.h:678
double cellcenter[3]
Definition: msm_defn.h:577
float hz_f
Definition: msm_defn.h:637
int NL_msm_compute_short_range(NL_Msm *pm)
Definition: msm_shortrng.c:37
int nx
Definition: msm_defn.h:640
float * host_wt
Definition: msm_defn.h:703
int max_lyzd
Definition: msm_defn.h:676
wkf_timerhandle timer_longrng
Definition: msm_defn.h:685
double a
Definition: msm_defn.h:635
NL_Msmgrid_float * eh_f
Definition: msm_defn.h:671
NL_Msmgrid_double * qh
Definition: msm_defn.h:666
double gy
Definition: msm_defn.h:647
int NL_msm_compute_long_range(NL_Msm *pm)
Definition: msm_longrng.c:52
float cellvec3_f[3]
Definition: msm_defn.h:584
double binfill
Definition: msm_defn.h:604
void NL_msm_cleanup(NL_Msm *pm)
Definition: msm_setup.c:7
const double * atom
Definition: msm_defn.h:569
double gx
Definition: msm_defn.h:647
int maxbins
Definition: msm_defn.h:593
float recipvec3_f[3]
Definition: msm_defn.h:588
int nbx
Definition: msm_defn.h:594
double gridspacing
Definition: msm_defn.h:633
double hz
Definition: msm_defn.h:634
int split
Definition: msm_defn.h:643
int block_total
Definition: msm_defn.h:694
int * nbrhd
Definition: msm_defn.h:612
int nbrhdmax
Definition: msm_defn.h:614
int lk_maxlevels
Definition: msm_defn.h:701
int lk_padding
Definition: msm_defn.h:692
double hx
Definition: msm_defn.h:634
float * host_egrids
Definition: msm_defn.h:707
float a_f
Definition: msm_defn.h:638
int lk_maxgridpts
Definition: msm_defn.h:710
double bv[3]
Definition: msm_defn.h:609
float recipvec1_f[3]
Definition: msm_defn.h:586
int nby
Definition: msm_defn.h:595
float gx_f
Definition: msm_defn.h:648
NL_Msmgrid_float * qh_f
Definition: msm_defn.h:670
float cellvec2_f[3]
Definition: msm_defn.h:583
int maxlevels
Definition: msm_defn.h:674
float * host_lfac
Definition: msm_defn.h:700
int nlevels
Definition: msm_defn.h:645
int nz
Definition: msm_defn.h:640
double recipvec1[3]
Definition: msm_defn.h:578
int nbz
Definition: msm_defn.h:596
int nbrhdlen
Definition: msm_defn.h:613
int maxatoms
Definition: msm_defn.h:600
float gy_f
Definition: msm_defn.h:648
int * next
Definition: msm_defn.h:601
float gzero_f
Definition: msm_defn.h:651
float * lzd_f
Definition: msm_defn.h:680
int report_timings
Definition: msm_defn.h:683
int max_lzd
Definition: msm_defn.h:676