14 #define strcasecmp(s,t) stricmp(s,t) 15 #define strncasecmp(s,t,n) strnicmp(s,t,n) 39 #define NELEMS(arr) (sizeof(arr)/sizeof(arr[0])) 43 #define GRID_TEMPLATE(TYPE) \ 44 typedef struct NL_Msmgrid_##TYPE##_t { \ 55 #define GRID_INIT(_p) \ 56 ((_p)->buffer=NULL, (_p)->data=NULL, (_p)->numbytes=0, (_p)->maxbytes=0, \ 57 (_p)->i0=0, (_p)->j0=0, (_p)->k0=0, (_p)->ni=0, (_p)->nj=0, (_p)->nk=0) 61 #define GRID_DONE(_p) \ 66 #define GRID_INDEX(_p, _i, _j, _k) \ 67 (((_k)*((_p)->nj) + (_j))*((_p)->ni) + (_i)) 71 #define GRID_POINTER(_p, _i, _j, _k) \ 72 ((_p)->data + GRID_INDEX(_p, _i, _j, _k)) 77 #define GRID_RESIZE(_p, TYPE, __i0, __ni, __j0, __nj, __k0, __nk) \ 79 int _i0=(__i0), _ni=(__ni); \ 80 int _j0=(__j0), _nj=(__nj); \ 81 int _k0=(__k0), _nk=(__nk); \ 82 size_t _numbytes = (_nk * _nj) * (size_t) _ni * sizeof((_p)->buffer[0]); \ 83 if ((_p)->maxbytes < _numbytes) { \ 84 void *_t = realloc((_p)->buffer, _numbytes); \ 85 if (NULL == _t) return NL_MSM_ERROR_MALLOC; \ 86 (_p)->buffer = (TYPE *) _t; \ 87 (_p)->maxbytes = _numbytes; \ 89 (_p)->numbytes = _numbytes; \ 90 (_p)->i0 = _i0, (_p)->ni = _ni; \ 91 (_p)->j0 = _j0, (_p)->nj = _nj; \ 92 (_p)->k0 = _k0, (_p)->nk = _nk; \ 93 (_p)->data = (_p)->buffer + GRID_INDEX((_p), -_i0, -_j0, -_k0); \ 98 #define GRID_ZERO(_p) \ 99 memset((_p)->buffer, 0, (_p)->numbytes) 105 #define GRID_INDEX_CHECK(a, _i, _j, _k) \ 107 ASSERT((a)->i0 <= (_i) && (_i) < (a)->ni + (a)->i0); \ 108 ASSERT((a)->j0 <= (_j) && (_j) < (a)->nj + (a)->j0); \ 109 ASSERT((a)->k0 <= (_k) && (_k) < (a)->nk + (a)->k0); \ 114 #define GRID_INDEX_CHECK(a, _i, _j, _k) 119 #define DEFAULT_GRIDSPACING 2.5 120 #define DEFAULT_APPROX NL_MSM_APPROX_CUBIC 121 #define DEFAULT_SPLIT NL_MSM_SPLIT_TAYLOR2 122 #define DEFAULT_NLEVELS 0 124 #define DEFAULT_DENSITY 0.1 125 #define DEFAULT_BINFILL 0.8 126 #define DEFAULT_NBINSLOTS 8 140 #define SPOLY(pg, pdg, ra, split) \ 144 double _g = 0, _dg = 0; \ 145 ASSERT(0 <= _s && _s <= 1); \ 148 case NL_MSM_SPLIT_TAYLOR2: \ 149 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8)); \ 150 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4)); \ 152 case NL_MSM_SPLIT_TAYLOR3: \ 153 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16))); \ 154 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16))); \ 156 case NL_MSM_SPLIT_TAYLOR4: \ 157 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \ 158 + (_s-1)*(35./128)))); \ 159 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 \ 160 + (_s-1)*(35./32)))); \ 162 case NL_MSM_SPLIT_TAYLOR5: \ 163 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \ 164 + (_s-1)*(35./128 + (_s-1)*(-63./256))))); \ 165 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \ 166 + (_s-1)*(-315./256))))); \ 168 case NL_MSM_SPLIT_TAYLOR6: \ 169 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \ 170 + (_s-1)*(35./128 + (_s-1)*(-63./256 \ 171 + (_s-1)*(231./1024)))))); \ 172 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \ 173 + (_s-1)*(-315./256 + (_s-1)*(693./512)))))); \ 175 case NL_MSM_SPLIT_TAYLOR7: \ 176 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \ 177 + (_s-1)*(35./128 + (_s-1)*(-63./256 \ 178 + (_s-1)*(231./1024 + (_s-1)*(-429./2048))))))); \ 179 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \ 180 + (_s-1)*(-315./256 + (_s-1)*(693./512 \ 181 + (_s-1)*(-3003./2048))))))); \ 183 case NL_MSM_SPLIT_TAYLOR8: \ 184 _g = 1 + (_s-1)*(-1./2 + (_s-1)*(3./8 + (_s-1)*(-5./16 \ 185 + (_s-1)*(35./128 + (_s-1)*(-63./256 \ 186 + (_s-1)*(231./1024 + (_s-1)*(-429./2048 \ 187 + (_s-1)*(6435./32768)))))))); \ 188 _dg = (2*_r)*(-1./2 + (_s-1)*(3./4 + (_s-1)*(-15./16 + (_s-1)*(35./32 \ 189 + (_s-1)*(-315./256 + (_s-1)*(693./512 \ 190 + (_s-1)*(-3003./2048 + (_s-1)*(6435./4096)))))))); \ 192 case NL_MSM_SPLIT_TAYLOR1: \ 193 _g = 1 + (_s-1)*(-1./2); \ 194 _dg = (2*_r)*(-1./2); \ 197 case NL_MSM_SPLIT_SIGMA2_3: \ 198 _g = 2 + _s*(-2 + _r); \ 199 _dg = _r*(-4 + _r*3); \ 201 case NL_MSM_SPLIT_SIGMA3_5: \ 202 _g = 9./4 + _s*(-5./2 + _s*(9./4 - _r)); \ 203 _dg = _r*(-5 + _s*(9 + _r*(-5))); \ 205 case NL_MSM_SPLIT_SIGMA4_6: \ 206 _g = 21./8 + _s*(-35./8 + _s*(63./8 + _r*(-7 + _r*(15./8)))); \ 207 _dg = _r*(-35./4 + _s*(63./2 + _r*(-35 + _r*(45./4)))); \ 209 case NL_MSM_SPLIT_SIGMA4_7: \ 210 _g = 5./2 + _s*(-7./2 + _s*(7./2 + _s*(-5./2 + _r))); \ 211 _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7)))); \ 213 case NL_MSM_SPLIT_SIGMA5_8: \ 214 _g = 45./16 + _s*(-21./4 + _s*(63./8 + _s*(-45./4 \ 215 + _r*(9 + _r*(-35./16))))); \ 216 _dg = _r*(-21./2 + _s*(63./2 + _s*(-135./2 \ 217 + _r*(63 + _r*(-35./2))))); \ 219 case NL_MSM_SPLIT_SIGMA5_9: \ 220 _g = 175./64 + _s*(-75./16 + _s*(189./32 + _s*(-75./16 \ 221 + _s*(175./64 - _r)))); \ 222 _dg = _r*(-75./8 + _s*(189./8 + _s*(-225./8 + _s*(175./8 \ 225 case NL_MSM_SPLIT_SIGMA6_9: \ 226 _g = 25./8 + _s*(-15./2 + _s*(63./4 + _s*(-75./2 \ 227 + _r*(45 + _r*(-175./8 + _r*4))))); \ 228 _dg = _r*(-15 + _s*(63 + _s*(-225 \ 229 + _r*(315 + _r*(-175 + _r*36))))); \ 231 case NL_MSM_SPLIT_SIGMA6_10: \ 232 _g = 385./128 + _s*(-825./128 + _s*(693./64 + _s*(-825./64 \ 233 + _s*(1925./128 + _r*(-11 + _r*(315./128)))))); \ 234 _dg = _r*(-825./64 + _s*(693./16 + _s*(-2475./32 \ 235 + _s*(1925./16 + _r*(-99 + _r*(1575./64)))))); \ 237 case NL_MSM_SPLIT_SIGMA6_11: \ 238 _g = 189./64 + _s*(-385./64 + _s*(297./32 + _s*(-297./32 \ 239 + _s*(385./64 + _s*(-189./64 + _r))))); \ 240 _dg = _r*(-385./32 + _s*(297./8 + _s*(-891./16 + _s*(385./8 \ 241 + _s*(-945./32 + _r*(11)))))); \ 243 case NL_MSM_SPLIT_SIGMA7_11: \ 244 _g = 105./32 + _s*(-275./32 + _s*(297./16 + _s*(-495./16 \ 245 + _s*(1925./32 + _r*(-66 + _r*(945./32 + _r*(-5))))))); \ 246 _dg = _r*(-275./16 + _s*(297./4 + _s*(-1485./8 \ 247 + _s*(1925./4 + _r*(-594 + _r*(4725./16 + _r*(-55))))))); \ 249 case NL_MSM_SPLIT_SIGMA7_12: \ 250 _g = 819./256 + _s*(-1001./128 + _s*(3861./256 \ 251 + _s*(-1287./64 + _s*(5005./256 + _s*(-2457./128 \ 252 + _r*(13 + _r*(-693./256))))))); \ 253 _dg = _r*(-1001./64 + _s*(3861./64 + _s*(-3861./32 \ 254 + _s*(5005./32 + _s*(-12285./64 + _r*(143 \ 255 + _r*(-2079./64))))))); \ 257 case NL_MSM_SPLIT_SIGMA7_13: \ 258 _g = 1617./512 + _s*(-1911./256 + _s*(7007./512 + _s*(-2145./128 \ 259 + _s*(7007./512 + _s*(-1911./256 + _s*(1617./512 - _r))))));\ 260 _dg = _r*(-1911./128 + _s*(7007./128 + _s*(-6435./64 + _s*(7007./64 \ 261 + _s*(-9555./128 + _s*(4851./128 + _r*(-13))))))); \ 263 case NL_MSM_SPLIT_SIGMA8_12: \ 264 _g = 455./128 + _s*(-715./64 + _s*(3861./128 + _s*(-2145./32 \ 265 + _s*(25025./128 + _r*(-286 + _r*(12285./64 + _r*(-65 \ 266 + _r*(1155./128)))))))); \ 267 _dg = _r*(-715./32 + _s*(3861./32 + _s*(-6435./16 \ 268 + _s*(25025./16 + _r*(-2574 + _r*(61425./32 + _r*(-715 \ 269 + _r*(3465./32)))))))); \ 271 case NL_MSM_SPLIT_SIGMA8_13: \ 272 _g = 441./128 + _s*(-637./64 + _s*(3003./128 \ 273 + _s*(-1287./32 + _s*(7007./128 + _s*(-5733./64 \ 274 + _r*(91 + _r*(-4851./128 + _r*(6)))))))); \ 275 _dg = _r*(-637./32 + _s*(3003./32 + _s*(-3861./16 \ 276 + _s*(7007./16 + _s*(-28665./32 + _r*(1001 \ 277 + _r*(-14553./32 + _r*(78)))))))); \ 279 case NL_MSM_SPLIT_SIGMA8_14: \ 280 _g = 3465./1024 + _s*(-9555./1024 + _s*(21021./1024 \ 281 + _s*(-32175./1024 + _s*(35035./1024 + _s*(-28665./1024 \ 282 + _s*(24255./1024 + _r*(-15 + _r*(3003./1024)))))))); \ 283 _dg = _r*(-9555./512 + _s*(21021./256 + _s*(-96525./512 \ 284 + _s*(35035./128 + _s*(-143325./512 + _s*(72765./256 \ 285 + _r*(-195 + _r*(21021./512)))))))); \ 287 case NL_MSM_SPLIT_SIGMA8_15: \ 288 _g = 429./128 + _s*(-1155./128 + _s*(2457./128 + _s*(-3575./128 \ 289 + _s*(3575./128 + _s*(-2457./128 + _s*(1155./128 \ 290 + _s*(-429./128 + _r))))))); \ 291 _dg = _r*(-1155./64 + _s*(2457./32 + _s*(-10725./64 \ 292 + _s*(3575./16 + _s*(-12285./64 + _s*(3465./32 \ 293 + _s*(-3003./64 + _r*(15)))))))); \ 296 case NL_MSM_SPLIT_SIGMA2_6: \ 297 _g = (31./16) + _s*(-23./16 + _s*(9./16 + _s*(-1./16))); \ 298 _dg = (2*_r)*(-23./16 + _s*(9./8 + _s*(-3./16))); \ 301 case NL_MSM_SPLIT_SWITCH1_2: \ 303 _g = 5./3 + _r + _s*(-3 + _r*(4./3)); \ 304 _dg = 1 + _r*(-6 + _r*(4)); \ 311 case NL_MSM_SPLIT_SWITCH3_4: \ 313 _g = 5./7 + _r*(27./7 + _r*(-41./7 + _r*(16./7))); \ 314 _dg = 27./7 + _r*(-82./7 + _r*(48./7)); \ 317 _g = 47./28 + _s*(-5./7); \ 321 case NL_MSM_SPLIT_SWITCH7_8: \ 323 _g = -19./15 + _r*(49./5 + _r*(-59./5 + _r*(64./15))); \ 324 _dg = 49./5 + _r*(-118./5 + _r*(64./5)); \ 327 _g = 191./120 + _s*(-3./5); \ 332 return NL_MSM_ERROR_SUPPORT; \ 351 #define SPOLY_SPREC(pg, pdg, ra, split) \ 355 float _g = 0, _dg = 0; \ 356 ASSERT(0 <= _s && _s <= 1); \ 359 case NL_MSM_SPLIT_TAYLOR2: \ 360 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8)); \ 361 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4)); \ 363 case NL_MSM_SPLIT_TAYLOR3: \ 364 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16))); \ 365 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16))); \ 367 case NL_MSM_SPLIT_TAYLOR4: \ 368 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \ 369 + (_s-1)*(35.f/128)))); \ 370 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \ 371 + (_s-1)*(35.f/32)))); \ 373 case NL_MSM_SPLIT_TAYLOR5: \ 374 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \ 375 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256))))); \ 376 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \ 377 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256))))); \ 379 case NL_MSM_SPLIT_TAYLOR6: \ 380 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \ 381 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \ 382 + (_s-1)*(231.f/1024)))))); \ 383 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \ 384 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 \ 385 + (_s-1)*(693.f/512)))))); \ 387 case NL_MSM_SPLIT_TAYLOR7: \ 388 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \ 389 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \ 390 + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048))))))); \ 391 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \ 392 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512 \ 393 + (_s-1)*(-3003.f/2048))))))); \ 395 case NL_MSM_SPLIT_TAYLOR8: \ 396 _g = 1 + (_s-1)*(-1.f/2 + (_s-1)*(3.f/8 + (_s-1)*(-5.f/16 \ 397 + (_s-1)*(35.f/128 + (_s-1)*(-63.f/256 \ 398 + (_s-1)*(231.f/1024 + (_s-1)*(-429.f/2048 \ 399 + (_s-1)*(6435.f/32768)))))))); \ 400 _dg = (2*_r)*(-1.f/2 + (_s-1)*(3.f/4 + (_s-1)*(-15.f/16 \ 401 + (_s-1)*(35.f/32 + (_s-1)*(-315.f/256 + (_s-1)*(693.f/512 \ 402 + (_s-1)*(-3003.f/2048 + (_s-1)*(6435.f/4096)))))))); \ 404 case NL_MSM_SPLIT_TAYLOR1: \ 405 _g = 1 + (_s-1)*(-1.f/2); \ 406 _dg = (2*_r)*(-1.f/2); \ 409 case NL_MSM_SPLIT_SIGMA2_3: \ 410 _g = 2 + _s*(-2 + _r); \ 411 _dg = _r*(-4 + _r*3); \ 413 case NL_MSM_SPLIT_SIGMA3_5: \ 414 _g = 9.f/4 + _s*(-5.f/2 + _s*(9.f/4 - _r)); \ 415 _dg = _r*(-5 + _s*(9 + _r*(-5))); \ 417 case NL_MSM_SPLIT_SIGMA4_6: \ 418 _g = 21.f/8 + _s*(-35.f/8 + _s*(63.f/8 + _r*(-7 + _r*(15.f/8)))); \ 419 _dg = _r*(-35.f/4 + _s*(63.f/2 + _r*(-35 + _r*(45.f/4)))); \ 421 case NL_MSM_SPLIT_SIGMA4_7: \ 422 _g = 5.f/2 + _s*(-7.f/2 + _s*(7.f/2 + _s*(-5.f/2 + _r))); \ 423 _dg = _r*(-7 + _s*(14 + _s*(-15 + _r*(7)))); \ 425 case NL_MSM_SPLIT_SIGMA5_8: \ 426 _g = 45.f/16 + _s*(-21.f/4 + _s*(63.f/8 + _s*(-45.f/4 \ 427 + _r*(9 + _r*(-35.f/16))))); \ 428 _dg = _r*(-21.f/2 + _s*(63.f/2 + _s*(-135.f/2 \ 429 + _r*(63 + _r*(-35.f/2))))); \ 431 case NL_MSM_SPLIT_SIGMA5_9: \ 432 _g = 175.f/64 + _s*(-75.f/16 + _s*(189.f/32 + _s*(-75.f/16 \ 433 + _s*(175.f/64 - _r)))); \ 434 _dg = _r*(-75.f/8 + _s*(189.f/8 + _s*(-225.f/8 + _s*(175.f/8 \ 437 case NL_MSM_SPLIT_SIGMA6_9: \ 438 _g = 25.f/8 + _s*(-15.f/2 + _s*(63.f/4 + _s*(-75.f/2 \ 439 + _r*(45 + _r*(-175.f/8 + _r*4))))); \ 440 _dg = _r*(-15 + _s*(63 + _s*(-225 \ 441 + _r*(315 + _r*(-175 + _r*36))))); \ 443 case NL_MSM_SPLIT_SIGMA6_10: \ 444 _g = 385.f/128 + _s*(-825.f/128 + _s*(693.f/64 + _s*(-825.f/64 \ 445 + _s*(1925.f/128 + _r*(-11 + _r*(315.f/128)))))); \ 446 _dg = _r*(-825.f/64 + _s*(693.f/16 + _s*(-2475.f/32 \ 447 + _s*(1925.f/16 + _r*(-99 + _r*(1575.f/64)))))); \ 449 case NL_MSM_SPLIT_SIGMA6_11: \ 450 _g = 189.f/64 + _s*(-385.f/64 + _s*(297.f/32 + _s*(-297.f/32 \ 451 + _s*(385.f/64 + _s*(-189.f/64 + _r))))); \ 452 _dg = _r*(-385.f/32 + _s*(297.f/8 + _s*(-891.f/16 + _s*(385.f/8 \ 453 + _s*(-945.f/32 + _r*(11)))))); \ 455 case NL_MSM_SPLIT_SIGMA7_11: \ 456 _g = 105.f/32 + _s*(-275.f/32 + _s*(297.f/16 + _s*(-495.f/16 \ 457 + _s*(1925.f/32 + _r*(-66 + _r*(945.f/32 + _r*(-5))))))); \ 458 _dg = _r*(-275.f/16 + _s*(297.f/4 + _s*(-1485.f/8 \ 459 + _s*(1925.f/4 + _r*(-594 + _r*(4725.f/16 + _r*(-55))))))); \ 461 case NL_MSM_SPLIT_SIGMA7_12: \ 462 _g = 819.f/256 + _s*(-1001.f/128 + _s*(3861.f/256 \ 463 + _s*(-1287.f/64 + _s*(5005.f/256 + _s*(-2457.f/128 \ 464 + _r*(13 + _r*(-693.f/256))))))); \ 465 _dg = _r*(-1001.f/64 + _s*(3861.f/64 + _s*(-3861.f/32 \ 466 + _s*(5005.f/32 + _s*(-12285.f/64 + _r*(143 \ 467 + _r*(-2079.f/64))))))); \ 469 case NL_MSM_SPLIT_SIGMA7_13: \ 470 _g = 1617.f/512 + _s*(-1911.f/256 + _s*(7007.f/512 + _s*(-2145.f/128 \ 471 + _s*(7007.f/512 + _s*(-1911.f/256 + _s*(1617.f/512 - _r))))));\ 472 _dg = _r*(-1911.f/128 + _s*(7007.f/128 + _s*(-6435.f/64 + _s*(7007.f/64\ 473 + _s*(-9555.f/128 + _s*(4851.f/128 + _r*(-13))))))); \ 475 case NL_MSM_SPLIT_SIGMA8_12: \ 476 _g = 455.f/128 + _s*(-715.f/64 + _s*(3861.f/128 + _s*(-2145.f/32 \ 477 + _s*(25025.f/128 + _r*(-286 + _r*(12285.f/64 + _r*(-65 \ 478 + _r*(1155.f/128)))))))); \ 479 _dg = _r*(-715.f/32 + _s*(3861.f/32 + _s*(-6435.f/16 \ 480 + _s*(25025.f/16 + _r*(-2574 + _r*(61425.f/32 + _r*(-715 \ 481 + _r*(3465.f/32)))))))); \ 483 case NL_MSM_SPLIT_SIGMA8_13: \ 484 _g = 441.f/128 + _s*(-637.f/64 + _s*(3003.f/128 \ 485 + _s*(-1287.f/32 + _s*(7007.f/128 + _s*(-5733.f/64 \ 486 + _r*(91 + _r*(-4851.f/128 + _r*(6)))))))); \ 487 _dg = _r*(-637.f/32 + _s*(3003.f/32 + _s*(-3861.f/16 \ 488 + _s*(7007.f/16 + _s*(-28665.f/32 + _r*(1001 \ 489 + _r*(-14553.f/32 + _r*(78)))))))); \ 491 case NL_MSM_SPLIT_SIGMA8_14: \ 492 _g = 3465.f/1024 + _s*(-9555.f/1024 + _s*(21021.f/1024 \ 493 + _s*(-32175.f/1024 + _s*(35035.f/1024 + _s*(-28665.f/1024 \ 494 + _s*(24255.f/1024 + _r*(-15 + _r*(3003.f/1024)))))))); \ 495 _dg = _r*(-9555.f/512 + _s*(21021.f/256 + _s*(-96525.f/512 \ 496 + _s*(35035.f/128 + _s*(-143325.f/512 + _s*(72765.f/256 \ 497 + _r*(-195 + _r*(21021.f/512)))))))); \ 499 case NL_MSM_SPLIT_SIGMA8_15: \ 500 _g = 429.f/128 + _s*(-1155.f/128 + _s*(2457.f/128 + _s*(-3575.f/128 \ 501 + _s*(3575.f/128 + _s*(-2457.f/128 + _s*(1155.f/128 \ 502 + _s*(-429.f/128 + _r))))))); \ 503 _dg = _r*(-1155.f/64 + _s*(2457.f/32 + _s*(-10725.f/64 \ 504 + _s*(3575.f/16 + _s*(-12285.f/64 + _s*(3465.f/32 \ 505 + _s*(-3003.f/64 + _r*(15)))))))); \ 508 case NL_MSM_SPLIT_SIGMA2_6: \ 509 _g = (31.f/16) + _s*(-23.f/16 + _s*(9.f/16 + _s*(-1.f/16))); \ 510 _dg = (2*_r)*(-23.f/16 + _s*(9.f/8 + _s*(-3.f/16))); \ 513 case NL_MSM_SPLIT_SWITCH1_2: \ 515 _g = 5.f/3 + _r + _s*(-3 + _r*(4.f/3)); \ 516 _dg = 1 + _r*(-6 + _r*(4)); \ 523 case NL_MSM_SPLIT_SWITCH3_4: \ 525 _g = 5.f/7 + _r*(27.f/7 + _r*(-41.f/7 + _r*(16.f/7))); \ 526 _dg = 27.f/7 + _r*(-82.f/7 + _r*(48.f/7)); \ 529 _g = 47.f/28 + _s*(-5.f/7); \ 530 _dg = _r*(-10.f/7); \ 533 case NL_MSM_SPLIT_SWITCH7_8: \ 535 _g = -19.f/15 + _r*(49.f/5 + _r*(-59.f/5 + _r*(64.f/15))); \ 536 _dg = 49.f/5 + _r*(-118.f/5 + _r*(64.f/5)); \ 539 _g = 191.f/120 + _s*(-3.f/5); \ 544 return NL_MSM_ERROR_SUPPORT; \ 666 NL_Msmgrid_double *
qh;
667 NL_Msmgrid_double *
eh;
668 NL_Msmgrid_double *
gc;
724 int NL_msm_cuda_setup_gridcutoff(
NL_Msm *);
725 void NL_msm_cuda_cleanup_gridcutoff(
NL_Msm *);
727 int NL_msm_cuda_compute_gridcutoff(
NL_Msm *);
728 int NL_msm_cuda_condense_qgrids(
NL_Msm *);
729 int NL_msm_cuda_expand_egrids(
NL_Msm *);
int NL_msm_compute_short_range_sprec(NL_Msm *pm)
#define GRID_TEMPLATE(TYPE)
int NL_msm_compute_long_range_sprec(NL_Msm *pm)
int NL_msm_compute_short_range(NL_Msm *pm)
wkf_timerhandle timer_longrng
int NL_msm_compute_long_range(NL_Msm *pm)
void NL_msm_cleanup(NL_Msm *pm)