msm_defn.h

Go to the documentation of this file.
00001 /* msm_defn.h */
00002 
00003 #ifndef NL_MSM_DEFN_H
00004 #define NL_MSM_DEFN_H
00005 
00006 /* #define TEST_INLINING */
00007 
00008 #include <stdlib.h>
00009 #include <stdio.h>
00010 #include <string.h>
00011 #include <math.h>
00012 
00013 #ifdef WIN32
00014 #define strcasecmp(s,t) stricmp(s,t)
00015 #define strncasecmp(s,t,n) strnicmp(s,t,n)
00016 #else
00017 #include <strings.h>
00018 #endif
00019 
00020 #include "msm.h"
00021 
00022 #include "wkfutils.h"  /* timers from John Stone */
00023 
00024 /* avoid parameter name collisions with AIX5 "hz" macro */
00025 #undef hz
00026 
00028 #undef  X
00029 #define X  0
00030 #undef  Y
00031 #define Y  1
00032 #undef  Z
00033 #define Z  2
00034 #undef  Q
00035 #define Q  3
00036 
00038 #undef  NELEMS
00039 #define NELEMS(arr)  (sizeof(arr)/sizeof(arr[0]))
00040 
00042 #undef GRID_TEMPLATE
00043 #define GRID_TEMPLATE(TYPE) \
00044   typedef struct NL_Msmgrid_##TYPE##_t { \
00045     TYPE *buffer;       /* raw buffer */ \
00046     TYPE *data;         /* data access offset from buffer */ \
00047     size_t numbytes;    /* number of bytes in use by buffer */ \
00048     size_t maxbytes;    /* actual allocation for buffer */ \
00049     int i0, j0, k0;     /* starting index value for each dimension */ \
00050     int ni, nj, nk;     /* number of elements in each dimension */ \
00051   } NL_Msmgrid_##TYPE
00052   /* expect closing ';' */
00053 
00055 #define GRID_INIT(_p) \
00056   ((_p)->buffer=NULL, (_p)->data=NULL, (_p)->numbytes=0, (_p)->maxbytes=0, \
00057    (_p)->i0=0, (_p)->j0=0, (_p)->k0=0, (_p)->ni=0, (_p)->nj=0, (_p)->nk=0)
00058   /* expect closing ';' */
00059 
00061 #define GRID_DONE(_p) \
00062   free((_p)->buffer)
00063   /* expect closing ';' */
00064 
00066 #define GRID_INDEX(_p, _i, _j, _k) \
00067   (((_k)*((_p)->nj) + (_j))*((_p)->ni) + (_i))
00068   /* expect closing ';' */
00069 
00071 #define GRID_POINTER(_p, _i, _j, _k) \
00072   ((_p)->data + GRID_INDEX(_p, _i, _j, _k))
00073   /* expect closing ';' */
00074 
00077 #define GRID_RESIZE(_p, TYPE, __i0, __ni, __j0, __nj, __k0, __nk) \
00078   do { \
00079     int _i0=(__i0), _ni=(__ni); \
00080     int _j0=(__j0), _nj=(__nj); \
00081     int _k0=(__k0), _nk=(__nk); \
00082     size_t _numbytes = (_nk * _nj) * (size_t) _ni * sizeof((_p)->buffer[0]); \
00083     if ((_p)->maxbytes < _numbytes) { \
00084       void *_t = realloc((_p)->buffer, _numbytes); \
00085       if (NULL == _t) return NL_MSM_ERROR_MALLOC; \
00086       (_p)->buffer = (TYPE *) _t; \
00087       (_p)->maxbytes = _numbytes; \
00088     } \
00089     (_p)->numbytes = _numbytes; \
00090     (_p)->i0 = _i0, (_p)->ni = _ni; \
00091     (_p)->j0 = _j0, (_p)->nj = _nj; \
00092     (_p)->k0 = _k0, (_p)->nk = _nk; \
00093     (_p)->data = (_p)->buffer + GRID_INDEX((_p), -_i0, -_j0, -_k0); \
00094   } while (0)
00095   /* expect closing ';' */
00096 
00097 /* reset 3D grid data to 0 */
00098 #define GRID_ZERO(_p) \
00099   memset((_p)->buffer, 0, (_p)->numbytes)  /* ; */
00100 
00101 /* check 3D grid index range when debugging
00102  * (must be used within function returning int) */
00103 #ifdef MSM_DEBUG
00104 #define ASSERT(expr)
00105 #define GRID_INDEX_CHECK(a, _i, _j, _k) \
00106   do { \
00107     ASSERT((a)->i0 <= (_i) && (_i) < (a)->ni + (a)->i0); \
00108     ASSERT((a)->j0 <= (_j) && (_j) < (a)->nj + (a)->j0); \
00109     ASSERT((a)->k0 <= (_k) && (_k) < (a)->nk + (a)->k0); \
00110   } while (0)
00111   /* expect closing ';' */
00112 #else
00113 #define ASSERT(expr)
00114 #define GRID_INDEX_CHECK(a, _i, _j, _k)
00115 #endif
00116 
00117 
00119 #define DEFAULT_GRIDSPACING  2.5
00120 #define DEFAULT_APPROX       NL_MSM_APPROX_CUBIC
00121 #define DEFAULT_SPLIT        NL_MSM_SPLIT_TAYLOR2
00122 #define DEFAULT_NLEVELS      0  /* set number of levels as needed */
00123 
00124 #define DEFAULT_DENSITY      0.1
00125 #define DEFAULT_BINFILL      0.8
00126 #define DEFAULT_NBINSLOTS    8
00127 
00128 
00139 #undef  SPOLY
00140 #define SPOLY(pg, pdg, ra, split) \
00141   do { \
00142     double _r = ra;  /* _r=(r/a) */ \
00143     double _s = _r*_r;  /* _s=(r/a)^2 */ \
00144     double _g = 0, _dg = 0; \
00145     ASSERT(0 <= _s && _s <= 1); \
00146     switch (split) { \
00147       /* from section 5.1 of thesis */ \
00148       case NL_MSM_SPLIT_TAYLOR2: \
00149         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8));                               \
00150         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4)); \
00151         break;                                                                 \
00152       case NL_MSM_SPLIT_TAYLOR3: \
00153         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16)));             \
00154         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16))); \
00155         break;                                                                 \
00156       case NL_MSM_SPLIT_TAYLOR4: \
00157         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16                 \
00158                 + (_s-1)*(35./128))));                                         \
00159         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 \
00160                 + (_s-1)*(35./32))));      \
00161         break;                                                                 \
00162       case NL_MSM_SPLIT_TAYLOR5: \
00163         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16                 \
00164                 + (_s-1)*(35./128 + (_s-1)*(-63./256)))));                     \
00165         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32  \
00166                 + (_s-1)*(-315./256)))));                                      \
00167         break;                                                                 \
00168       case NL_MSM_SPLIT_TAYLOR6: \
00169         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16                 \
00170                 + (_s-1)*(35./128 + (_s-1)*(-63./256                           \
00171                     + (_s-1)*(231./1024))))));                                 \
00172         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32  \
00173                 + (_s-1)*(-315./256 + (_s-1)*(693./512))))));                  \
00174         break;                                                                 \
00175       case NL_MSM_SPLIT_TAYLOR7: \
00176         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16                 \
00177                 + (_s-1)*(35./128 + (_s-1)*(-63./256                           \
00178                     + (_s-1)*(231./1024 + (_s-1)*(-429./2048)))))));           \
00179         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32  \
00180                 + (_s-1)*(-315./256 + (_s-1)*(693./512                         \
00181                     + (_s-1)*(-3003./2048)))))));                              \
00182         break;                                                                 \
00183       case NL_MSM_SPLIT_TAYLOR8: \
00184         _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16                 \
00185                 + (_s-1)*(35./128 + (_s-1)*(-63./256                           \
00186                     + (_s-1)*(231./1024 + (_s-1)*(-429./2048                   \
00187                         + (_s-1)*(6435./32768))))))));                         \
00188         _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32  \
00189                 + (_s-1)*(-315./256 + (_s-1)*(693./512                         \
00190                     + (_s-1)*(-3003./2048 + (_s-1)*(6435./4096))))))));        \
00191         break;                                                                 \
00192       case NL_MSM_SPLIT_TAYLOR1: \
00193         _g = 1 + (_s-1)*(-1./2); \
00194         _dg = (2*_r)*(-1./2); \
00195         break; \
00196       /* from section 5.2 of thesis */ \
00197       case NL_MSM_SPLIT_SIGMA2_3:  /* the "perfect" smoothing */ \
00198         _g = 2 + _s*(-2 + _r);                                                \
00199         _dg = _r*(-4 + _r*3);                                                  \
00200         break;                                                                 \
00201       case NL_MSM_SPLIT_SIGMA3_5: \
00202         _g = 9./4 + _s*(-5./2 + _s*(9./4 - _r));                             \
00203         _dg = _r*(-5 + _s*(9 + _r*(-5)));                                     \
00204         break;                                                                 \
00205       case NL_MSM_SPLIT_SIGMA4_6: \
00206         _g = 21./8 + _s*(-35./8 + _s*(63./8 + _r*(-7 + _r*(15./8))));        \
00207         _dg = _r*(-35./4 + _s*(63./2 + _r*(-35 + _r*(45./4))));               \
00208         break;                                                                 \
00209       case NL_MSM_SPLIT_SIGMA4_7: \
00210         _g = 5./2 + _s*(-7./2 + _s*(7./2 + _s*(-5./2 + _r)));               \
00211         _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7))));                         \
00212         break;                                                                 \
00213       case NL_MSM_SPLIT_SIGMA5_8: \
00214         _g = 45./16 + _s*(-21./4 + _s*(63./8 + _s*(-45./4                   \
00215                 + _r*(9 + _r*(-35./16)))));                                    \
00216         _dg = _r*(-21./2 + _s*(63./2 + _s*(-135./2                           \
00217                 + _r*(63 + _r*(-35./2)))));                                    \
00218         break;                                                                 \
00219       case NL_MSM_SPLIT_SIGMA5_9: \
00220         _g = 175./64 + _s*(-75./16 + _s*(189./32 + _s*(-75./16              \
00221                 + _s*(175./64 - _r))));                                       \
00222         _dg = _r*(-75./8 + _s*(189./8 + _s*(-225./8 + _s*(175./8            \
00223                   + _r*(-9)))));                                               \
00224         break;                                                                 \
00225       case NL_MSM_SPLIT_SIGMA6_9: \
00226         _g = 25./8 + _s*(-15./2 + _s*(63./4 + _s*(-75./2                    \
00227                 + _r*(45 + _r*(-175./8 + _r*4)))));                            \
00228         _dg = _r*(-15 + _s*(63 + _s*(-225                                    \
00229                 + _r*(315 + _r*(-175 + _r*36)))));                             \
00230         break;                                                                 \
00231       case NL_MSM_SPLIT_SIGMA6_10: \
00232         _g = 385./128 + _s*(-825./128 + _s*(693./64 + _s*(-825./64          \
00233                 + _s*(1925./128 + _r*(-11 + _r*(315./128))))));               \
00234         _dg = _r*(-825./64 + _s*(693./16 + _s*(-2475./32                     \
00235                 + _s*(1925./16 + _r*(-99 + _r*(1575./64))))));                \
00236         break;                                                                 \
00237       case NL_MSM_SPLIT_SIGMA6_11: \
00238         _g = 189./64 + _s*(-385./64 + _s*(297./32 + _s*(-297./32            \
00239                 + _s*(385./64 + _s*(-189./64 + _r)))));                      \
00240         _dg = _r*(-385./32 + _s*(297./8 + _s*(-891./16 + _s*(385./8         \
00241                   + _s*(-945./32 + _r*(11))))));                              \
00242         break;                                                                 \
00243       case NL_MSM_SPLIT_SIGMA7_11: \
00244         _g = 105./32 + _s*(-275./32 + _s*(297./16 + _s*(-495./16            \
00245                 + _s*(1925./32 + _r*(-66 + _r*(945./32 + _r*(-5)))))));       \
00246         _dg = _r*(-275./16 + _s*(297./4 + _s*(-1485./8                       \
00247                 + _s*(1925./4 + _r*(-594 + _r*(4725./16 + _r*(-55)))))));     \
00248         break;                                                                 \
00249       case NL_MSM_SPLIT_SIGMA7_12: \
00250         _g = 819./256 + _s*(-1001./128 + _s*(3861./256                       \
00251               + _s*(-1287./64 + _s*(5005./256 + _s*(-2457./128              \
00252                     + _r*(13 + _r*(-693./256)))))));                           \
00253         _dg = _r*(-1001./64 + _s*(3861./64 + _s*(-3861./32                   \
00254                 + _s*(5005./32 + _s*(-12285./64 + _r*(143                    \
00255                       + _r*(-2079./64)))))));                                  \
00256         break;                                                                 \
00257       case NL_MSM_SPLIT_SIGMA7_13: \
00258         _g = 1617./512 + _s*(-1911./256 + _s*(7007./512 + _s*(-2145./128    \
00259                 + _s*(7007./512 + _s*(-1911./256 + _s*(1617./512 - _r))))));\
00260         _dg = _r*(-1911./128 + _s*(7007./128 + _s*(-6435./64 + _s*(7007./64 \
00261                   + _s*(-9555./128 + _s*(4851./128 + _r*(-13)))))));         \
00262         break;                                                                 \
00263       case NL_MSM_SPLIT_SIGMA8_12: \
00264         _g = 455./128 + _s*(-715./64 + _s*(3861./128 + _s*(-2145./32        \
00265                 + _s*(25025./128 + _r*(-286 + _r*(12285./64 + _r*(-65         \
00266                         + _r*(1155./128))))))));                               \
00267         _dg = _r*(-715./32 + _s*(3861./32 + _s*(-6435./16                    \
00268                 + _s*(25025./16 + _r*(-2574 + _r*(61425./32 + _r*(-715        \
00269                         + _r*(3465./32))))))));                                \
00270         break;                                                                 \
00271       case NL_MSM_SPLIT_SIGMA8_13: \
00272         _g = 441./128 + _s*(-637./64 + _s*(3003./128                         \
00273               + _s*(-1287./32 + _s*(7007./128 + _s*(-5733./64               \
00274                     + _r*(91 + _r*(-4851./128 + _r*(6))))))));                 \
00275         _dg = _r*(-637./32 + _s*(3003./32 + _s*(-3861./16                    \
00276                 + _s*(7007./16 + _s*(-28665./32 + _r*(1001                   \
00277                       + _r*(-14553./32 + _r*(78))))))));                       \
00278         break;                                                                 \
00279       case NL_MSM_SPLIT_SIGMA8_14: \
00280         _g = 3465./1024 + _s*(-9555./1024 + _s*(21021./1024                  \
00281               + _s*(-32175./1024 + _s*(35035./1024 + _s*(-28665./1024       \
00282                     + _s*(24255./1024 + _r*(-15 + _r*(3003./1024))))))));     \
00283         _dg = _r*(-9555./512 + _s*(21021./256 + _s*(-96525./512              \
00284                 + _s*(35035./128 + _s*(-143325./512 + _s*(72765./256        \
00285                       + _r*(-195 + _r*(21021./512))))))));                     \
00286         break;                                                                 \
00287       case NL_MSM_SPLIT_SIGMA8_15: \
00288         _g = 429./128 + _s*(-1155./128 + _s*(2457./128 + _s*(-3575./128     \
00289                 + _s*(3575./128 + _s*(-2457./128 + _s*(1155./128            \
00290                       + _s*(-429./128 + _r)))))));                            \
00291         _dg = _r*(-1155./64 + _s*(2457./32 + _s*(-10725./64                  \
00292                 + _s*(3575./16 + _s*(-12285./64 + _s*(3465./32              \
00293                       + _s*(-3003./64 + _r*(15))))))));                       \
00294         break;                                                                 \
00295       /* from section 5.3 of thesis */ \
00296       case NL_MSM_SPLIT_SIGMA2_6: \
00297         _g = (31./16) + _s*(-23./16 + _s*(9./16 + _s*(-1./16))); \
00298         _dg = (2*_r)*(-23./16 + _s*(9./8 + _s*(-3./16))); \
00299         break; \
00300       /* from section 5.4 of thesis */ \
00301       case NL_MSM_SPLIT_SWITCH1_2:                                             \
00302         if (_r > 1./2) {                                                       \
00303           _g = 5./3 + _r + _s*(-3 + _r*(4./3));                               \
00304           _dg = 1 + _r*(-6 + _r*(4));                                          \
00305         }                                                                      \
00306         else {                                                                 \
00307           _g = 11./6 - _s;                                                    \
00308           _dg = _r*(-2);                                                       \
00309         }                                                                      \
00310         break;                                                                 \
00311       case NL_MSM_SPLIT_SWITCH3_4:                                             \
00312         if (_r > 3./4) {                                                       \
00313           _g = 5./7 + _r*(27./7 + _r*(-41./7 + _r*(16./7)));                   \
00314           _dg = 27./7 + _r*(-82./7 + _r*(48./7));                              \
00315         }                                                                      \
00316         else {                                                                 \
00317           _g = 47./28 + _s*(-5./7);                                           \
00318           _dg = _r*(-10./7);                                                   \
00319         }                                                                      \
00320         break;                                                                 \
00321       case NL_MSM_SPLIT_SWITCH7_8:                                             \
00322         if (_r > 7./8) {                                                       \
00323           _g = -19./15 + _r*(49./5 + _r*(-59./5 + _r*(64./15)));               \
00324           _dg = 49./5 + _r*(-118./5 + _r*(64./5));                             \
00325         }                                                                      \
00326         else {                                                                 \
00327           _g = 191./120 + _s*(-3./5);                                         \
00328           _dg = _r*(-6./5);                                                    \
00329         }                                                                      \
00330         break;                                                                 \
00331       default: \
00332         return NL_MSM_ERROR_SUPPORT; \
00333     } \
00334     *(pg) = _g; \
00335     *(pdg) = _dg; \
00336   } while (0)
00337   /* closing ';' from use as function call */
00338 
00339 
00350 #undef  SPOLY_SPREC
00351 #define SPOLY_SPREC(pg, pdg, ra, split) \
00352   do { \
00353     float _r = ra;  /* _r=(r/a) */ \
00354     float _s = _r*_r;  /* _s=(r/a)^2 */ \
00355     float _g = 0, _dg = 0; \
00356     ASSERT(0 <= _s && _s <= 1); \
00357     switch (split) { \
00358       /* from section 5.1 of thesis */ \
00359       case NL_MSM_SPLIT_TAYLOR2: \
00360         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8));                             \
00361         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4)); \
00362         break;                                                                 \
00363       case NL_MSM_SPLIT_TAYLOR3: \
00364         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16)));          \
00365         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16))); \
00366         break;                                                                 \
00367       case NL_MSM_SPLIT_TAYLOR4: \
00368         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16              \
00369                 + (_s-1)*(35.f/128))));                                        \
00370         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
00371                 + (_s-1)*(35.f/32))));      \
00372         break;                                                                 \
00373       case NL_MSM_SPLIT_TAYLOR5: \
00374         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16              \
00375                 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256)))));                   \
00376         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
00377                 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256)))));                   \
00378         break;                                                                 \
00379       case NL_MSM_SPLIT_TAYLOR6: \
00380         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16              \
00381                 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256                         \
00382                     + (_s-1)*(231.f/1024))))));                                \
00383         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
00384                 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 \
00385                     + (_s-1)*(693.f/512))))));                  \
00386         break;                                                                 \
00387       case NL_MSM_SPLIT_TAYLOR7: \
00388         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16              \
00389                 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256                         \
00390                     + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048)))))));         \
00391         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
00392                 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512     \
00393                     + (_s-1)*(-3003.f/2048)))))));                             \
00394         break;                                                                 \
00395       case NL_MSM_SPLIT_TAYLOR8: \
00396         _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16              \
00397                 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256                         \
00398                     + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048                 \
00399                         + (_s-1)*(6435.f/32768))))))));                        \
00400         _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \
00401                 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512     \
00402                     + (_s-1)*(-3003.f/2048 + (_s-1)*(6435.f/4096))))))));      \
00403         break;                                                                 \
00404       case NL_MSM_SPLIT_TAYLOR1: \
00405         _g = 1 + (_s-1)*(-1.f/2); \
00406         _dg = (2*_r)*(-1.f/2); \
00407         break; \
00408       /* from section 5.2 of thesis */ \
00409       case NL_MSM_SPLIT_SIGMA2_3:  /* the "perfect" smoothing */ \
00410         _g = 2 + _s*(-2 + _r);                                                \
00411         _dg = _r*(-4 + _r*3);                                                  \
00412         break;                                                                 \
00413       case NL_MSM_SPLIT_SIGMA3_5: \
00414         _g = 9.f/4 + _s*(-5.f/2 + _s*(9.f/4 - _r));                            \
00415         _dg = _r*(-5 + _s*(9 + _r*(-5)));                                     \
00416         break;                                                                 \
00417       case NL_MSM_SPLIT_SIGMA4_6: \
00418         _g = 21.f/8 + _s*(-35.f/8 + _s*(63.f/8 + _r*(-7 + _r*(15.f/8))));      \
00419         _dg = _r*(-35.f/4 + _s*(63.f/2 + _r*(-35 + _r*(45.f/4))));             \
00420         break;                                                                 \
00421       case NL_MSM_SPLIT_SIGMA4_7: \
00422         _g = 5.f/2 + _s*(-7.f/2 + _s*(7.f/2 + _s*(-5.f/2 + _r)));              \
00423         _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7))));                         \
00424         break;                                                                 \
00425       case NL_MSM_SPLIT_SIGMA5_8: \
00426         _g = 45.f/16 + _s*(-21.f/4 + _s*(63.f/8 + _s*(-45.f/4                  \
00427                 + _r*(9 + _r*(-35.f/16)))));                                   \
00428         _dg = _r*(-21.f/2 + _s*(63.f/2 + _s*(-135.f/2                          \
00429                 + _r*(63 + _r*(-35.f/2)))));                                   \
00430         break;                                                                 \
00431       case NL_MSM_SPLIT_SIGMA5_9: \
00432         _g = 175.f/64 + _s*(-75.f/16 + _s*(189.f/32 + _s*(-75.f/16             \
00433                 + _s*(175.f/64 - _r))));                                       \
00434         _dg = _r*(-75.f/8 + _s*(189.f/8 + _s*(-225.f/8 + _s*(175.f/8           \
00435                   + _r*(-9)))));                                               \
00436         break;                                                                 \
00437       case NL_MSM_SPLIT_SIGMA6_9: \
00438         _g = 25.f/8 + _s*(-15.f/2 + _s*(63.f/4 + _s*(-75.f/2                   \
00439                 + _r*(45 + _r*(-175.f/8 + _r*4)))));                           \
00440         _dg = _r*(-15 + _s*(63 + _s*(-225                                    \
00441                 + _r*(315 + _r*(-175 + _r*36)))));                             \
00442         break;                                                                 \
00443       case NL_MSM_SPLIT_SIGMA6_10: \
00444         _g = 385.f/128 + _s*(-825.f/128 + _s*(693.f/64 + _s*(-825.f/64         \
00445                 + _s*(1925.f/128 + _r*(-11 + _r*(315.f/128))))));              \
00446         _dg = _r*(-825.f/64 + _s*(693.f/16 + _s*(-2475.f/32                    \
00447                 + _s*(1925.f/16 + _r*(-99 + _r*(1575.f/64))))));               \
00448         break;                                                                 \
00449       case NL_MSM_SPLIT_SIGMA6_11: \
00450         _g = 189.f/64 + _s*(-385.f/64 + _s*(297.f/32 + _s*(-297.f/32           \
00451                 + _s*(385.f/64 + _s*(-189.f/64 + _r)))));                      \
00452         _dg = _r*(-385.f/32 + _s*(297.f/8 + _s*(-891.f/16 + _s*(385.f/8        \
00453                   + _s*(-945.f/32 + _r*(11))))));                              \
00454         break;                                                                 \
00455       case NL_MSM_SPLIT_SIGMA7_11: \
00456         _g = 105.f/32 + _s*(-275.f/32 + _s*(297.f/16 + _s*(-495.f/16           \
00457                 + _s*(1925.f/32 + _r*(-66 + _r*(945.f/32 + _r*(-5)))))));      \
00458         _dg = _r*(-275.f/16 + _s*(297.f/4 + _s*(-1485.f/8                      \
00459                 + _s*(1925.f/4 + _r*(-594 + _r*(4725.f/16 + _r*(-55)))))));    \
00460         break;                                                                 \
00461       case NL_MSM_SPLIT_SIGMA7_12: \
00462         _g = 819.f/256 + _s*(-1001.f/128 + _s*(3861.f/256                      \
00463               + _s*(-1287.f/64 + _s*(5005.f/256 + _s*(-2457.f/128              \
00464                     + _r*(13 + _r*(-693.f/256)))))));                          \
00465         _dg = _r*(-1001.f/64 + _s*(3861.f/64 + _s*(-3861.f/32                  \
00466                 + _s*(5005.f/32 + _s*(-12285.f/64 + _r*(143                    \
00467                       + _r*(-2079.f/64)))))));                                 \
00468         break;                                                                 \
00469       case NL_MSM_SPLIT_SIGMA7_13: \
00470         _g = 1617.f/512 + _s*(-1911.f/256 + _s*(7007.f/512 + _s*(-2145.f/128   \
00471                 + _s*(7007.f/512 + _s*(-1911.f/256 + _s*(1617.f/512 - _r))))));\
00472         _dg = _r*(-1911.f/128 + _s*(7007.f/128 + _s*(-6435.f/64 + _s*(7007.f/64\
00473                   + _s*(-9555.f/128 + _s*(4851.f/128 + _r*(-13)))))));         \
00474         break;                                                                 \
00475       case NL_MSM_SPLIT_SIGMA8_12: \
00476         _g = 455.f/128 + _s*(-715.f/64 + _s*(3861.f/128 + _s*(-2145.f/32       \
00477                 + _s*(25025.f/128 + _r*(-286 + _r*(12285.f/64 + _r*(-65        \
00478                         + _r*(1155.f/128))))))));                              \
00479         _dg = _r*(-715.f/32 + _s*(3861.f/32 + _s*(-6435.f/16                   \
00480                 + _s*(25025.f/16 + _r*(-2574 + _r*(61425.f/32 + _r*(-715       \
00481                         + _r*(3465.f/32))))))));                               \
00482         break;                                                                 \
00483       case NL_MSM_SPLIT_SIGMA8_13: \
00484         _g = 441.f/128 + _s*(-637.f/64 + _s*(3003.f/128                        \
00485               + _s*(-1287.f/32 + _s*(7007.f/128 + _s*(-5733.f/64               \
00486                     + _r*(91 + _r*(-4851.f/128 + _r*(6))))))));                \
00487         _dg = _r*(-637.f/32 + _s*(3003.f/32 + _s*(-3861.f/16                   \
00488                 + _s*(7007.f/16 + _s*(-28665.f/32 + _r*(1001                   \
00489                       + _r*(-14553.f/32 + _r*(78))))))));                      \
00490         break;                                                                 \
00491       case NL_MSM_SPLIT_SIGMA8_14: \
00492         _g = 3465.f/1024 + _s*(-9555.f/1024 + _s*(21021.f/1024                 \
00493               + _s*(-32175.f/1024 + _s*(35035.f/1024 + _s*(-28665.f/1024       \
00494                     + _s*(24255.f/1024 + _r*(-15 + _r*(3003.f/1024))))))));    \
00495         _dg = _r*(-9555.f/512 + _s*(21021.f/256 + _s*(-96525.f/512             \
00496                 + _s*(35035.f/128 + _s*(-143325.f/512 + _s*(72765.f/256        \
00497                       + _r*(-195 + _r*(21021.f/512))))))));                    \
00498         break;                                                                 \
00499       case NL_MSM_SPLIT_SIGMA8_15: \
00500         _g = 429.f/128 + _s*(-1155.f/128 + _s*(2457.f/128 + _s*(-3575.f/128    \
00501                 + _s*(3575.f/128 + _s*(-2457.f/128 + _s*(1155.f/128            \
00502                       + _s*(-429.f/128 + _r)))))));                            \
00503         _dg = _r*(-1155.f/64 + _s*(2457.f/32 + _s*(-10725.f/64                 \
00504                 + _s*(3575.f/16 + _s*(-12285.f/64 + _s*(3465.f/32              \
00505                       + _s*(-3003.f/64 + _r*(15))))))));                       \
00506         break;                                                                 \
00507       /* from section 5.3 of thesis */ \
00508       case NL_MSM_SPLIT_SIGMA2_6: \
00509         _g = (31.f/16) + _s*(-23.f/16 + _s*(9.f/16 + _s*(-1.f/16))); \
00510         _dg = (2*_r)*(-23.f/16 + _s*(9.f/8 + _s*(-3.f/16))); \
00511         break; \
00512       /* from section 5.4 of thesis */ \
00513       case NL_MSM_SPLIT_SWITCH1_2:                                             \
00514         if (_r > 1.f/2) {                                                      \
00515           _g = 5.f/3 + _r + _s*(-3 + _r*(4.f/3));                              \
00516           _dg = 1 + _r*(-6 + _r*(4));                                          \
00517         }                                                                      \
00518         else {                                                                 \
00519           _g = 11.f/6 - _s;                                                    \
00520           _dg = _r*(-2);                                                       \
00521         }                                                                      \
00522         break;                                                                 \
00523       case NL_MSM_SPLIT_SWITCH3_4:                                             \
00524         if (_r > 3.f/4) {                                                      \
00525           _g = 5.f/7 + _r*(27.f/7 + _r*(-41.f/7 + _r*(16.f/7)));               \
00526           _dg = 27.f/7 + _r*(-82.f/7 + _r*(48.f/7));                           \
00527         }                                                                      \
00528         else {                                                                 \
00529           _g = 47.f/28 + _s*(-5.f/7);                                          \
00530           _dg = _r*(-10.f/7);                                                  \
00531         }                                                                      \
00532         break;                                                                 \
00533       case NL_MSM_SPLIT_SWITCH7_8:                                             \
00534         if (_r > 7.f/8) {                                                      \
00535           _g = -19.f/15 + _r*(49.f/5 + _r*(-59.f/5 + _r*(64.f/15)));           \
00536           _dg = 49.f/5 + _r*(-118.f/5 + _r*(64.f/5));                          \
00537         }                                                                      \
00538         else {                                                                 \
00539           _g = 191.f/120 + _s*(-3.f/5);                                        \
00540           _dg = _r*(-6.f/5);                                                   \
00541         }                                                                      \
00542         break;                                                                 \
00543       default: \
00544         return NL_MSM_ERROR_SUPPORT; \
00545     } \
00546     *(pg) = _g; \
00547     *(pdg) = _dg; \
00548   } while (0)
00549   /* closing ';' from use as function call */
00550 
00551 
00552 #ifdef __cplusplus
00553 extern "C" {
00554 #endif
00555 
00556 
00557   /* creates type NL_Msmgrid_double */
00558   GRID_TEMPLATE(double);   /* for MSM charge and potential grids */
00559 
00560   /* creates type NL_Msmgrid_float */
00561   GRID_TEMPLATE(float);    /* single prec MSM charge and potential grids */
00562 
00563 
00565   struct NL_Msm_t {
00566     double uelec;          /* calculate electrostatic potential energy */
00567 
00568     double *felec;         /* calculate electrostatic forces x/y/z/... */
00569     const double *atom;    /* the original atom array stored x/y/z/q/... */
00570 
00571     float *felec_f;        /* single prec elec forces x/y/z/... */
00572     const float *atom_f;   /* single prec atom array stored x/y/z/q/... */
00573 
00574     double cellvec1[3];    /* cell basis vector (along x) */
00575     double cellvec2[3];    /* cell basis vector (along y) */
00576     double cellvec3[3];    /* cell basis vector (along z) */
00577     double cellcenter[3];  /* cell center */
00578     double recipvec1[3];   /* reciprocal space row vector */
00579     double recipvec2[3];   /* reciprocal space row vector */
00580     double recipvec3[3];   /* reciprocal space row vector */
00581 
00582     float cellvec1_f[3];   /* single prec cell basis vector (along x) */
00583     float cellvec2_f[3];   /* single prec cell basis vector (along y) */
00584     float cellvec3_f[3];   /* single prec cell basis vector (along z) */
00585     float cellcenter_f[3]; /* single prec cell center */
00586     float recipvec1_f[3];  /* single prec reciprocal space row vector */
00587     float recipvec2_f[3];  /* single prec reciprocal space row vector */
00588     float recipvec3_f[3];  /* single prec reciprocal space row vector */
00589 
00590     int msmflags;          /* bitwise flags from enum in msm.h */
00591 
00592     int numbins;           /* number of bins (nbx * nby * nbz) */
00593     int maxbins;           /* maximum number of bins allocated */
00594     int nbx;               /* number of bins along first cell basis vector */
00595     int nby;               /* number of bins along second cell basis vector */
00596     int nbz;               /* number of bins along third cell basis vector */
00597     int *bin;              /* array length maxbins, index of first atom */
00598 
00599     int numatoms;          /* number of atoms */
00600     int maxatoms;          /* maximum number of atoms allocated */
00601     int *next;             /* array length maxatoms, index next atom in bin */
00602 
00603     /* additional data members needed for k-away neighborhood */
00604     double binfill;        /* ratio of number of atoms per bin (default 0.8) */
00605     double density;        /* expected density of atoms (default 0.1) */
00606     int nbinslots;         /* how many atoms per bin (default 8) */
00607 
00608     double bu[3];          /* bin basis vector along u */
00609     double bv[3];          /* bin basis vector along v */
00610     double bw[3];          /* bin basis vector along w */
00611 
00612     int *nbrhd;
00613     int nbrhdlen;
00614     int nbrhdmax;
00615     int nradius;
00616 
00617     /*
00618      * Fundamental MSM parameters:
00619      *
00620      * Default grid spacing hmin = 2.5 A,
00621      * C1 cubic interpolation, and C2 Taylor splitting,
00622      * together give reasonable accuracy for atomistic systems
00623      * using a cutoff between 8 and 12 A.
00624      *
00625      * Find grid spacings in range hmin <= hx, hy, hz <= (1.5 * hmin)
00626      * such that along periodic dimensions the number of grid points is
00627      * some power of 2 times one or zero powers of 3.
00628      *
00629      * For reasonable accuracy, maintain ratio 4 <= a/hx, a/hy, a/hz <= 6
00630      * (and grid cutoff stencils fit into GPU constant memory cache).
00631      */
00632 
00633     double gridspacing;    /* smallest MSM grid spacing, hmax = 1.5 * hmin */
00634     double hx, hy, hz;     /* the finest level grid spacings */
00635     double a;              /* the MSM cutoff between short- and long-range */
00636 
00637     float hx_f, hy_f, hz_f;/* single prec finest level grid spacings */
00638     float a_f;             /* single prec MSM cutoff */
00639 
00640     int nx, ny, nz;        /* count number of h spacings that cover cell */
00641 
00642     int approx;            /* the approximation/interpolation: "MSM_APPROX_" */
00643     int split;             /* the splitting: "MSM_SPLIT_" */
00644 
00645     int nlevels;           /* number of grid levels */
00646 
00647     double gx, gy, gz;     /* coordinate of h-grid (0,0,0) point */
00648     float gx_f, gy_f, gz_f;/* single prec coordinate of h-grid (0,0,0) point */
00649 
00650     double gzero;          /* self energy factor from chosen splitting */
00651     float gzero_f;         /* single prec self energy factor */
00652 
00653     /*
00654      * Grids for calculating long-range part:
00655      * q[0] is finest-spaced grid (hx, hy, hz),
00656      * grid level k has spacing 2^k * (hx, hy, hz).
00657      *
00658      * Use domain origin (px0, py0, pz0) for each grid origin, ao that
00659      * q[0](i,j,k) has position (i*hx + px0, j*hy + py0, k*hz + pz0)
00660      * the finest level grid is 0, e.g. q0 = qh[0].
00661      *
00662      * Grid indexes can be negative for non-periodic dimensions,
00663      * the periodic dimensions wrap around edges of grid.
00664      */
00665 
00666     NL_Msmgrid_double *qh; /* grids of charge, 0==finest */
00667     NL_Msmgrid_double *eh; /* grids of potential, 0==finest */
00668     NL_Msmgrid_double *gc; /* fixed-size grids of weights for convolution */
00669 
00670     NL_Msmgrid_float *qh_f;/* single prec grids of charge, 0==finest */
00671     NL_Msmgrid_float *eh_f;/* single prec grids of potential, 0==finest */
00672     NL_Msmgrid_float *gc_f;/* single prec fixed-size grids of weights */
00673 
00674     int maxlevels;         /* maximum number of grid levels allocated */
00675 
00676     int max_lzd, max_lyzd; /* alloc length of factor temp buffer space */
00677     double *lzd;           /* factor temp row buffer, length z-dim of h-level */
00678     double *lyzd;          /* factor temp row buffer, (y*z)-dim of h-level */
00679 
00680     float *lzd_f;          /* single prec factor temp row buffer, z-dim */
00681     float *lyzd_f;         /* single prec factor temp row buffer, (y*z)-dim */
00682 
00683     int report_timings;    /* Do we report timings? */
00684     wkf_timerhandle timer; /* timer from John Stone */
00685     wkf_timerhandle timer_longrng; /* time individual long-range parts */
00686 
00687     /* CUDA atom cutoff kernel */
00688 
00689     /* CUDA grid cutoff kernel */
00690     int   lk_nlevels;      /* number of levels for grid cutoff kernel */
00691     int   lk_srad;         /* subcube radius for grid cutoff kernel */
00692     int   lk_padding;      /* padding around internal array of subcubes */
00693     int   subcube_total;   /* total number of subcubes for compressed grids */
00694     int   block_total;     /* total number of thread blocks */
00695     /*
00696      * host_   -->  memory allocated on host
00697      * device_ -->  global memory allocated on device
00698      */
00699     int   *host_sinfo;     /* subcube info copy to device const mem */
00700     float *host_lfac;      /* level factor copy to device const mem */
00701     int lk_maxlevels;
00702 
00703     float *host_wt;        /* weights copy to device const mem */
00704     int lk_maxwts;
00705 
00706     float *host_qgrids;    /* q-grid subcubes copy to device global mem */
00707     float *host_egrids;    /* e-grid subcubes copy to device global mem */
00708     float *device_qgrids;  /* q-grid subcubes allocate on device */
00709     float *device_egrids;  /* e-grid subcubes allocate on device */
00710     int lk_maxgridpts;   
00711   };
00712 
00713 
00714   /* prototypes for internal use */
00715   void NL_msm_cleanup(NL_Msm *pm);
00716   int NL_msm_compute_short_range(NL_Msm *pm);
00717   int NL_msm_compute_long_range(NL_Msm *pm);
00718 
00719   int NL_msm_compute_short_range_sprec(NL_Msm *pm);
00720   int NL_msm_compute_long_range_sprec(NL_Msm *pm);
00721 
00722 #ifdef NL_USE_CUDA
00723   /* CUDA routines for grid cutoff computation */
00724   int NL_msm_cuda_setup_gridcutoff(NL_Msm *);
00725   void NL_msm_cuda_cleanup_gridcutoff(NL_Msm *);
00726 
00727   int NL_msm_cuda_compute_gridcutoff(NL_Msm *);
00728   int NL_msm_cuda_condense_qgrids(NL_Msm *);
00729   int NL_msm_cuda_expand_egrids(NL_Msm *);
00730 #endif
00731 
00732 
00733 #ifdef __cplusplus
00734 }
00735 #endif
00736 
00737 #endif /* NL_MSM_DEFN_H */

Generated on Wed Nov 22 01:17:15 2017 for NAMD by  doxygen 1.4.7