67 printf(
"MSM anterpolation: %6.3f sec\n", time_delta);
71 if (use_nonfactored) {
72 for (level = 0; level < nlevels - 1; level++) {
78 for (level = 0; level < nlevels - 1; level++) {
86 printf(
"MSM restriction: %6.3f sec\n", time_delta);
90 if (use_cuda_gridcutoff && nlevels > 1) {
96 printf(
"Falling back on CPU for grid cutoff computation\n");
97 use_cuda_gridcutoff = 0;
103 printf(
"Falling back on CPU for grid cutoff computation\n");
104 use_cuda_gridcutoff = 0;
110 if ( ! use_cuda_gridcutoff ) {
111 for (level = 0; level < nlevels - 1; level++) {
123 printf(
"MSM grid cutoff: %6.3f sec\n", time_delta);
127 if (use_nonfactored) {
128 for (level--; level >= 0; level--) {
134 for (level--; level >= 0; level--) {
142 printf(
"MSM prolongation: %6.3f sec\n", time_delta);
151 printf(
"MSM interpolation: %6.3f sec\n", time_delta);
164 3, 5, 5, 7, 7, 9, 9, 3,
172 {-1./16, 0, 9./16, 1, 9./16, 0, -1./16},
175 {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
178 {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
181 { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
182 0, -245./2048, 0, 49./2048, 0, -5./2048 },
185 { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
186 0, -245./2048, 0, 49./2048, 0, -5./2048 },
189 { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
190 19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
191 -405./65536, 0, 35./65536 },
194 { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
195 19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
196 -405./65536, 0, 35./65536 },
199 { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
209 5, 7, 7, 9, 9, 11, 11, 7
219 {-5, -3, -1, 0, 1, 3, 5},
222 {-5, -3, -1, 0, 1, 3, 5},
225 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
228 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
231 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
234 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
237 {-3, -2, -1, 0, 1, 2, 3},
244 {-1./16, 9./16, 1, 9./16, -1./16},
247 {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
250 {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
253 { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
254 -245./2048, 49./2048, -5./2048 },
257 { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
258 -245./2048, 49./2048, -5./2048 },
261 { 35./65536, -405./65536, 567./16384, -2205./16384,
262 19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
263 -405./65536, 35./65536 },
266 { 35./65536, -405./65536, 567./16384, -2205./16384,
267 19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
268 -405./65536, 35./65536 },
271 { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
280 #define STENCIL_1D(_phi, _delta, _approx) \
282 double *phi = _phi; \
285 case NL_MSM_APPROX_CUBIC: \
286 phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
288 phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
290 phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
292 phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
294 case NL_MSM_APPROX_QUINTIC: \
295 phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
297 phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
299 phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
301 phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
303 phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
305 phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
307 case NL_MSM_APPROX_QUINTIC2: \
308 phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
310 phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
312 phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
314 phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
316 phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
318 phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
320 case NL_MSM_APPROX_SEPTIC: \
321 phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
323 phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
325 phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
327 phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
329 phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
331 phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
333 phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
335 phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
337 case NL_MSM_APPROX_SEPTIC3: \
338 phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
339 + t*((26383./144) + t*((-22807./720) + t*((727./240) \
340 + t*(-89./720))))))); \
342 phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
343 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
344 + t*(623./720))))))); \
346 phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
347 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
348 + t*(-623./240))))))); \
350 phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
351 + t*((-727./48) + t*(623./144))))); \
353 phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
354 + t*((-727./48) + t*(-623./144))))); \
356 phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
357 + t*((4557./16) + t*((9583./80) + t*((2181./80) \
358 + t*(623./240))))))); \
360 phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
361 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
362 + t*(-623./720))))))); \
364 phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
365 + t*((26383./144) + t*((22807./720) + t*((727./240) \
366 + t*(89./720))))))); \
368 case NL_MSM_APPROX_NONIC: \
369 phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
372 phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
375 phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
378 phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
381 phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
384 phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
387 phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
390 phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
393 phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
396 phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
399 case NL_MSM_APPROX_NONIC4: \
400 phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
401 +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
402 +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
403 +t*(-3569./40320))))))))); \
405 phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
406 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
407 +t*(156927./448+t*(-28909./1152 \
408 +t*(3569./4480))))))))); \
410 phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
411 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
412 +t*(-19959./28+t*(144545./2016 \
413 +t*(-3569./1120))))))))); \
415 phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
416 +t*(-576499./96+t*(686147./160+t*(-96277./48 \
417 +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
419 phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
420 +t*(6337./96+t*(-2745./32+t*(28909./576 \
421 +t*(-3569./320))))))); \
423 phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
424 +t*(6337./96+t*(2745./32+t*(28909./576 \
425 +t*(3569./320))))))); \
427 phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
428 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
429 +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
431 phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
432 +t*(3548009./96+t*(2422263./160+t*(197255./48 \
433 +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
435 phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
436 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
437 +t*(-156927./448+t*(-28909./1152 \
438 +t*(-3569./4480))))))))); \
440 phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
441 +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
442 +t*(129391./192+t*(259715./4032+t*(28909./8064 \
443 +t*(3569./40320))))))))); \
445 case NL_MSM_APPROX_BSPLINE: \
446 phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
448 phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
450 phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
452 phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
455 return NL_MSM_ERROR_SUPPORT; \
468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \
470 double *dphi = _dphi; \
471 double *phi = _phi; \
475 case NL_MSM_APPROX_CUBIC: \
476 phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
477 dphi[0] = (1.5 * t - 2) * (2 - t) * h_1; \
479 phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
480 dphi[1] = (-5 + 4.5 * t) * t * h_1; \
482 phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
483 dphi[2] = (-5 - 4.5 * t) * t * h_1; \
485 phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
486 dphi[3] = (1.5 * t + 2) * (2 + t) * h_1; \
488 case NL_MSM_APPROX_QUINTIC: \
489 phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
490 dphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \
491 + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \
493 phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
494 dphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))\
495 + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1; \
497 phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
498 dphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t)) \
499 + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1; \
501 phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
502 dphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t)) \
503 - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1; \
505 phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
506 dphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t)) \
507 - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1; \
509 phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
510 dphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \
511 + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \
513 case NL_MSM_APPROX_QUINTIC2: \
514 phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
515 dphi[0] = ((1./24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \
516 + 5*(t-2)*(3-t))) * h_1; \
518 phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
519 dphi[1] = ((-1./24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \
520 (-48+t*(153+t*(-114+t*25))) \
521 + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \
523 phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
524 dphi[2] = ((1./12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \
525 + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \
527 phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
528 dphi[3] = ((1./12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \
529 + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \
531 phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
532 dphi[4] = ((-1./24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \
533 (48+t*(153+t*(114+t*25))) \
534 + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \
536 phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
537 dphi[5] = ((1./24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \
538 + 5*(t+2)*(3+t))) * h_1; \
540 case NL_MSM_APPROX_SEPTIC: \
541 phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
542 dphi[0] = (-1./720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \
543 +t*(-122+t*7))))) * h_1; \
545 phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
546 dphi[1] = (1./720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \
547 +t*(-750+t*49)))))) * h_1; \
549 phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
550 dphi[2] = (-1./240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \
551 +t*(-450+t*49)))))) * h_1; \
553 phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
554 dphi[3] = (1./144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \
557 phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
558 dphi[4] = (-1./144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \
561 phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
562 dphi[5] = (1./240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \
563 +t*(450+t*49)))))) * h_1; \
565 phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
566 dphi[6] = (-1./720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \
567 +t*(750+t*49)))))) * h_1; \
569 phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
570 dphi[7] = (1./720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \
571 +t*(122+t*7))))) * h_1; \
573 case NL_MSM_APPROX_SEPTIC3: \
574 phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
575 + t*((26383./144) + t*((-22807./720) + t*((727./240) \
576 + t*(-89./720))))))); \
577 dphi[0] = ((-7456./5) + t*((117572./45) + t*(-1899 + t*((26383./36) \
578 + t*((-22807./144) + t*((727./40) + t*(-623./720)))))))*h_1; \
580 phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
581 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
582 + t*(623./720))))))); \
583 dphi[1] = ((25949./20) + t*((-117131./36) + t*((6741./2) \
584 + t*((-66437./36) + t*((81109./144) + t*((-727./8) \
585 + t*(4361./720))))))) * h_1; \
587 phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
588 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
589 + t*(-623./240))))))); \
590 dphi[2] = ((-8617./60) + t*((12873./20) + t*((-2373./2) + t*((4557./4) \
591 + t*((-9583./16) + t*((6543./40) + t*(-4361./240)))))))*h_1; \
593 phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
594 + t*((-727./48) + t*(623./144))))); \
595 dphi[3] = (t*((-49./18) + t*t*((-959./36) + t*((12845./144) \
596 + t*((-727./8) + t*(4361./144)))))) * h_1; \
598 phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
599 + t*((-727./48) + t*(-623./144))))); \
600 dphi[4] = (t*((-49./18) + t*t*((-959./36) + t*((-12845./144) \
601 + t*((-727./8) + t*(-4361./144)))))) * h_1; \
603 phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
604 + t*((4557./16) + t*((9583./80) + t*((2181./80) \
605 + t*(623./240))))))); \
606 dphi[5] = ((8617./60) + t*((12873./20) + t*((2373./2) + t*((4557./4) \
607 + t*((9583./16) + t*((6543./40) + t*(4361./240)))))))*h_1; \
609 phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
610 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
611 + t*(-623./720))))))); \
612 dphi[6] = ((-25949./20) + t*((-117131./36) + t*((-6741./2) \
613 + t*((-66437./36) + t*((-81109./144) + t*((-727./8) \
614 + t*(-4361./720))))))) * h_1; \
616 phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
617 + t*((26383./144) + t*((22807./720) + t*((727./240) \
618 + t*(89./720))))))); \
619 dphi[7] = ((7456./5) + t*((117572./45) + t*(1899 + t*((26383./36) \
620 + t*((22807./144) + t*((727./40) + t*(623./720)))))))*h_1; \
622 case NL_MSM_APPROX_NONIC: \
623 phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
625 dphi[0] = (-1./40320)*(t-5)*(-117648+t*(256552+t*(-221416 \
626 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \
628 phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
630 dphi[1] = (1./40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \
631 +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \
633 phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
635 dphi[2] = (1./10080)*(3384+t*(-69080+t*(55026 \
636 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \
637 +t*(-81)))))))))*h_1; \
639 phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
641 dphi[3] = (1./1440)*(72+t*(-6344+t*(2070 \
642 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \
644 phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
646 dphi[4] = (-1./2880)*t*(10792+t*(-972+t*(-12516 \
647 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \
649 phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
651 dphi[5] = (1./2880)*t*(-10792+t*(-972+t*(12516 \
652 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \
654 phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
656 dphi[6] = (1./1440)*(-72+t*(-6344+t*(-2070 \
657 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \
659 phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
661 dphi[7] = (1./10080)*(-3384+t*(-69080+t*(-55026 \
662 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \
664 phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
666 dphi[8] = (-1./40320)*(71856+t*(795368+t*(1569240 \
667 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \
670 phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
672 dphi[9] = (1./40320)*(t+5)*(117648+t*(256552+t*(221416 \
673 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \
675 case NL_MSM_APPROX_NONIC4: \
676 phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
677 +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
678 +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
679 +t*(-3569./40320))))))))); \
680 dphi[0] = (-64188125./504+t*(231125375./1008 \
681 +t*(-17306975./96+t*(7761805./96+t*(-2895587./128 \
682 +t*(129391./32+t*(-259715./576+t*(28909./1008 \
683 +t*(-3569./4480)))))))))*h_1; \
685 phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
686 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
687 +t*(156927./448+t*(-28909./1152 \
688 +t*(3569./4480))))))))); \
689 dphi[1] = (8314091./56+t*(-49901303./144+t*(11290587./32 \
690 +t*(-19648027./96+t*(9469163./128+t*(-545977./32 \
691 +t*(156927./64+t*(-28909./144 \
692 +t*(32121./4480)))))))))*h_1; \
694 phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
695 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
696 +t*(-19959./28+t*(144545./2016 \
697 +t*(-3569./1120))))))))); \
698 dphi[2] = (-1038011./28+t*(31157515./252+t*(-2870007./16 \
699 +t*(3548009./24+t*(-2422263./32+t*(197255./8 \
700 +t*(-19959./4+t*(144545./252 \
701 +t*(-32121./1120)))))))))*h_1; \
703 phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
704 +t*(-576499./96+t*(686147./160+t*(-96277./48 \
705 +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
706 dphi[3] = (12757./12+t*(-230123./36+t*(264481./16 \
707 +t*(-576499./24+t*(686147./32+t*(-96277./8 \
708 +t*(99547./24+t*(-28909./36 \
709 +t*(10707./160)))))))))*h_1; \
711 phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
712 +t*(6337./96+t*(-2745./32+t*(28909./576 \
713 +t*(-3569./320))))))); \
714 dphi[4] = t*(-205./72+t*t*(91./48+t*(-6181./64 \
715 +t*(6337./16+t*(-19215./32+t*(28909./72 \
716 +t*(-32121./320)))))))*h_1; \
718 phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
719 +t*(6337./96+t*(2745./32+t*(28909./576 \
720 +t*(3569./320))))))); \
721 dphi[5] = t*(-205./72+t*t*(91./48+t*(6181./64 \
722 +t*(6337./16+t*(19215./32+t*(28909./72 \
723 +t*(32121./320)))))))*h_1; \
725 phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
726 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
727 +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
728 dphi[6] = (-12757./12+t*(-230123./36+t*(-264481./16 \
729 +t*(-576499./24+t*(-686147./32+t*(-96277./8 \
730 +t*(-99547./24+t*(-28909./36 \
731 +t*(-10707./160)))))))))*h_1; \
733 phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
734 +t*(3548009./96+t*(2422263./160+t*(197255./48 \
735 +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
736 dphi[7] = (1038011./28+t*(31157515./252+t*(2870007./16 \
737 +t*(3548009./24+t*(2422263./32+t*(197255./8 \
738 +t*(19959./4+t*(144545./252 \
739 +t*(32121./1120)))))))))*h_1; \
741 phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
742 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
743 +t*(-156927./448+t*(-28909./1152 \
744 +t*(-3569./4480))))))))); \
745 dphi[8] = (-8314091./56+t*(-49901303./144+t*(-11290587./32 \
746 +t*(-19648027./96+t*(-9469163./128+t*(-545977./32 \
747 +t*(-156927./64+t*(-28909./144 \
748 +t*(-32121./4480)))))))))*h_1; \
750 phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
751 +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
752 +t*(129391./192+t*(259715./4032+t*(28909./8064 \
753 +t*(3569./40320))))))))); \
754 dphi[9] = (64188125./504+t*(231125375./1008 \
755 +t*(17306975./96+t*(7761805./96+t*(2895587./128 \
756 +t*(129391./32+t*(259715./576+t*(28909./1008 \
757 +t*(3569./4480)))))))))*h_1; \
759 case NL_MSM_APPROX_BSPLINE: \
760 phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
761 dphi[0] = -0.5 * (2-t) * (2-t) * h_1; \
763 phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
764 dphi[1] = t*(-2 + 1.5*t) * h_1; \
766 phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
767 dphi[2] = t*(-2 - 1.5*t) * h_1; \
769 phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
770 dphi[3] = 0.5 * (2+t) * (2+t) * h_1; \
773 return NL_MSM_ERROR_SUPPORT; \
781 const double *atom = msm->
atom;
787 double rx_hx, ry_hy, rz_hz;
788 #ifndef TEST_INLINING
794 const double hx_1 = 1/msm->
hx;
795 const double hy_1 = 1/msm->
hy;
796 const double hz_1 = 1/msm->
hz;
797 const double xm0 = msm->
gx;
798 const double ym0 = msm->
gy;
799 const double zm0 = msm->
gz;
802 NL_Msmgrid_double *qhgrid = &(msm->
qh[0]);
803 double *qh = qhgrid->data;
804 const int ni = qhgrid->ni;
805 const int nj = qhgrid->nj;
806 const int nk = qhgrid->nk;
807 const int ia = qhgrid->i0;
808 const int ib = ia + ni - 1;
809 const int ja = qhgrid->j0;
810 const int jb = ja + nj - 1;
811 const int ka = qhgrid->k0;
812 const int kb = ka + nk - 1;
819 int n, i, j, k, ilo, jlo, klo, koff;
822 const int approx = msm->
approx;
824 const int s_edge = (
PolyDegree[approx] - 1) >> 1;
828 for (n = 0; n < natoms; n++) {
835 rx_hx = (atom[4*n ] - xm0) * hx_1;
836 ry_hy = (atom[4*n + 1] - ym0) * hy_1;
837 rz_hz = (atom[4*n + 2] - zm0) * hz_1;
840 ilo = (int) floor(rx_hx) - s_edge;
841 jlo = (int) floor(ry_hy) - s_edge;
842 klo = (int) floor(rz_hz) - s_edge;
845 #ifndef TEST_INLINING
846 delta = rx_hx - (double) ilo;
848 delta = ry_hy - (double) jlo;
850 delta = rz_hz - (double) klo;
853 t = rx_hx - (double) ilo;
854 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
856 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
858 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
860 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
862 t = ry_hy - (double) jlo;
863 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
865 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
867 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
869 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
871 t = rz_hz - (double) klo;
872 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
874 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
876 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
878 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
883 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
884 ja <= jlo && (jlo+(s_size-1)) <= jb &&
885 ka <= klo && (klo+(s_size-1)) <= kb );
890 for (k = 0; k < s_size; k++) {
891 koff = (k + klo) * nj;
893 for (j = 0; j < s_size; j++) {
894 jkoff = (koff + (j + jlo)) * ni;
896 for (i = 0; i < s_size; i++) {
897 index = jkoff + (i + ilo);
900 qh[index] += xphi[i] * cjk;
911 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
912 else if (ilo > ib)
do { ilo -= ni; }
while (ilo > ib);
914 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
919 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
920 else if (jlo > jb)
do { jlo -= nj; }
while (jlo > jb);
922 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
927 if (klo < ka)
do { klo += nk; }
while (klo < ka);
928 else if (klo > kb)
do { klo -= nk; }
while (klo > kb);
930 else if (klo < ka || (klo+(s_size-1)) > kb) {
935 for (k = 0, kp = klo; k < s_size; k++, kp++) {
936 if (kp > kb) kp = ka;
939 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
940 if (jp > jb) jp = ja;
941 jkoff = (koff + jp) * ni;
943 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
944 if (ip > ib) ip = ia;
948 qh[index] += xphi[i] * cjk;
962 double *f = msm->
felec;
963 const double *atom = msm->
atom;
972 double rx_hx, ry_hy, rz_hz;
973 #ifndef TEST_INLINING
978 const double hx_1 = 1/msm->
hx;
979 const double hy_1 = 1/msm->
hy;
980 const double hz_1 = 1/msm->
hz;
981 const double xm0 = msm->
gx;
982 const double ym0 = msm->
gy;
983 const double zm0 = msm->
gz;
987 const NL_Msmgrid_double *ehgrid = &(msm->
eh[0]);
988 const double *eh = ehgrid->data;
989 const double *ebuf = ehgrid->buffer;
990 const NL_Msmgrid_double *qhgrid = &(msm->
qh[0]);
991 const double *qbuf = qhgrid->buffer;
992 const int ni = ehgrid->ni;
993 const int nj = ehgrid->nj;
994 const int nk = ehgrid->nk;
995 const int ia = ehgrid->i0;
996 const int ib = ia + ni - 1;
997 const int ja = ehgrid->j0;
998 const int jb = ja + nj - 1;
999 const int ka = ehgrid->k0;
1000 const int kb = ka + nk - 1;
1007 double fx, fy, fz, cx, cy, cz;
1009 int n, i, j, k, ilo, jlo, klo, koff;
1011 const int nn = (ni*nj) * nk;
1013 const int approx = msm->
approx;
1015 const int s_edge = (
PolyDegree[approx] - 1) >> 1;
1017 for (n = 0; n < natoms; n++) {
1024 rx_hx = (atom[4*n ] - xm0) * hx_1;
1025 ry_hy = (atom[4*n + 1] - ym0) * hy_1;
1026 rz_hz = (atom[4*n + 2] - zm0) * hz_1;
1029 ilo = (int) floor(rx_hx) - s_edge;
1030 jlo = (int) floor(ry_hy) - s_edge;
1031 klo = (int) floor(rz_hz) - s_edge;
1034 #ifndef TEST_INLINING
1035 delta = rx_hx - (double) ilo;
1038 delta = ry_hy - (double) jlo;
1041 delta = rz_hz - (double) klo;
1045 t = rx_hx - (double) ilo;
1046 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1047 dxphi[0] = (1.5 * t - 2) * (2 - t) * hx_1; \
1049 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1050 dxphi[1] = (-5 + 4.5 * t) * t * hx_1; \
1052 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1053 dxphi[2] = (-5 - 4.5 * t) * t * hx_1; \
1055 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1056 dxphi[3] = (1.5 * t + 2) * (2 + t) * hx_1; \
1058 t = ry_hy - (double) jlo;
1059 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1060 dyphi[0] = (1.5 * t - 2) * (2 - t) * hy_1; \
1062 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1063 dyphi[1] = (-5 + 4.5 * t) * t * hy_1; \
1065 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1066 dyphi[2] = (-5 - 4.5 * t) * t * hy_1; \
1068 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1069 dyphi[3] = (1.5 * t + 2) * (2 + t) * hy_1; \
1071 t = rz_hz - (double) klo;
1072 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
1073 dzphi[0] = (1.5 * t - 2) * (2 - t) * hz_1; \
1075 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
1076 dzphi[1] = (-5 + 4.5 * t) * t * hz_1; \
1078 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
1079 dzphi[2] = (-5 - 4.5 * t) * t * hz_1; \
1081 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
1082 dzphi[3] = (1.5 * t + 2) * (2 + t) * hz_1; \
1087 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1088 ja <= jlo && (jlo+(s_size-1)) <= jb &&
1089 ka <= klo && (klo+(s_size-1)) <= kb );
1095 for (k = 0; k < s_size; k++) {
1096 koff = (k + klo) * nj;
1097 for (j = 0; j < s_size; j++) {
1098 jkoff = (koff + (j + jlo)) * ni;
1099 cx = yphi[j] * zphi[k];
1100 cy = dyphi[j] * zphi[k];
1101 cz = yphi[j] * dzphi[k];
1102 for (i = 0; i < s_size; i++) {
1103 index = jkoff + (i + ilo);
1106 fx += eh[index] * dxphi[i] * cx;
1107 fy += eh[index] * xphi[i] * cy;
1108 fz += eh[index] * xphi[i] * cz;
1119 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
1120 else if (ilo > ib)
do { ilo -= ni; }
while (ilo > ib);
1122 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1127 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
1128 else if (jlo > jb)
do { jlo -= nj; }
while (jlo > jb);
1130 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1135 if (klo < ka)
do { klo += nk; }
while (klo < ka);
1136 else if (klo > kb)
do { klo -= nk; }
while (klo > kb);
1138 else if (klo < ka || (klo+(s_size-1)) > kb) {
1144 for (k = 0, kp = klo; k < s_size; k++, kp++) {
1145 if (kp > kb) kp = ka;
1147 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1148 if (jp > jb) jp = ja;
1149 jkoff = (koff + jp) * ni;
1150 cx = yphi[j] * zphi[k];
1151 cy = dyphi[j] * zphi[k];
1152 cz = yphi[j] * dzphi[k];
1153 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1154 if (ip > ib) ip = ia;
1158 fx += eh[index] * dxphi[i] * cx;
1159 fy += eh[index] * xphi[i] * cy;
1160 fz += eh[index] * xphi[i] * cz;
1177 for (n = 0; n < natoms; n++) {
1178 double q = atom[4*n + 3];
1183 for (index = 0; index < nn; index++) {
1184 u += qbuf[index] * ebuf[index];
1186 msm->
uelec += 0.5 * u;
1195 const NL_Msmgrid_double *qhgrid = &(msm->
qh[level]);
1196 const double *qh = qhgrid->data;
1197 NL_Msmgrid_double *q2hgrid = &(msm->
qh[level+1]);
1198 double *q2h = q2hgrid->data;
1201 const int ia1 = qhgrid->i0;
1202 const int ib1 = ia1 + qhgrid->ni - 1;
1203 const int ja1 = qhgrid->j0;
1204 const int jb1 = ja1 + qhgrid->nj - 1;
1205 const int ka1 = qhgrid->k0;
1206 const int kb1 = ka1 + qhgrid->nk - 1;
1207 const int ni1 = qhgrid->ni;
1208 const int nj1 = qhgrid->nj;
1209 const int nk1 = qhgrid->nk;
1212 const int ia2 = q2hgrid->i0;
1213 const int ib2 = ia2 + q2hgrid->ni - 1;
1214 const int ja2 = q2hgrid->j0;
1215 const int jb2 = ja2 + q2hgrid->nj - 1;
1216 const int ka2 = q2hgrid->k0;
1217 const int kb2 = ka2 + q2hgrid->nk - 1;
1218 const int nrow_q2 = q2hgrid->ni;
1219 const int nstride_q2 = nrow_q2 * q2hgrid->nj;
1226 double *qzd = msm->
lzd + (-ka1);
1227 double *qyzd = msm->
lyzd + (-ka1*nj1 + -ja1);
1230 const double *phi = NULL;
1235 int index_plane_q2, index_q2;
1236 int index_jk, offset_k;
1241 const int diam_stencil = 2*r_stencil + 1;
1243 for (i2 = ia2; i2 <= ib2; i2++) {
1245 for (k = ka1; k <= kb1; k++) {
1248 for (j = ja1; j <= jb1; j++) {
1249 index_jk = offset_k + j;
1250 offset = index_jk * ni1;
1254 int lower = im - r_stencil;
1255 int upper = im + r_stencil;
1256 if (lower < ia1) lower = ia1;
1257 if (upper > ib1) upper = ib1;
1258 phi = phi_factored + r_stencil;
1259 for (i = lower; i <= upper; i++) {
1260 qsum += phi[i-im] * qh[offset + i];
1264 int ip = im - r_stencil;
1265 if (ip < ia1)
do { ip += ni1; }
while (ip < ia1);
1267 for (i = 0; i < diam_stencil; i++, ip++) {
1268 if (ip > ib1) ip = ia1;
1269 qsum += phi[i] * qh[offset + ip];
1272 qyzd[index_jk] = qsum;
1277 for (j2 = ja2; j2 <= jb2; j2++) {
1278 index_plane_q2 = j2 * nrow_q2 + i2;
1280 for (k = ka1; k <= kb1; k++) {
1285 int lower = jm - r_stencil;
1286 int upper = jm + r_stencil;
1287 if (lower < ja1) lower = ja1;
1288 if (upper > jb1) upper = jb1;
1289 phi = phi_factored + r_stencil;
1290 for (j = lower; j <= upper; j++) {
1291 qsum += phi[j-jm] * qyzd[offset + j];
1295 int jp = jm - r_stencil;
1296 if (jp < ja1)
do { jp += nj1; }
while (jp < ja1);
1298 for (j = 0; j < diam_stencil; j++, jp++) {
1299 if (jp > jb1) jp = ja1;
1300 qsum += phi[j] * qyzd[offset + jp];
1306 for (k2 = ka2; k2 <= kb2; k2++) {
1307 index_q2 = k2 * nstride_q2 + index_plane_q2;
1311 int lower = km - r_stencil;
1312 int upper = km + r_stencil;
1313 if (lower < ka1) lower = ka1;
1314 if (upper > kb1) upper = kb1;
1315 phi = phi_factored + r_stencil;
1316 for (k = lower; k <= upper; k++) {
1317 qsum += phi[k-km] * qzd[k];
1321 int kp = km - r_stencil;
1322 if (kp < ka1)
do { kp += nk1; }
while (kp < ka1);
1324 for (k = 0; k < diam_stencil; k++, kp++) {
1325 if (kp > kb1) kp = ka1;
1326 qsum += phi[k] * qzd[kp];
1329 q2h[index_q2] = qsum;
1342 NL_Msmgrid_double *ehgrid = &(msm->
eh[level]);
1343 double *eh = ehgrid->data;
1344 const NL_Msmgrid_double *e2hgrid = &(msm->
eh[level+1]);
1345 const double *e2h = e2hgrid->data;
1348 const int ia1 = ehgrid->i0;
1349 const int ib1 = ia1 + ehgrid->ni - 1;
1350 const int ja1 = ehgrid->j0;
1351 const int jb1 = ja1 + ehgrid->nj - 1;
1352 const int ka1 = ehgrid->k0;
1353 const int kb1 = ka1 + ehgrid->nk - 1;
1354 const int ni1 = ehgrid->ni;
1355 const int nj1 = ehgrid->nj;
1356 const int nk1 = ehgrid->nk;
1359 const int ia2 = e2hgrid->i0;
1360 const int ib2 = ia2 + e2hgrid->ni - 1;
1361 const int ja2 = e2hgrid->j0;
1362 const int jb2 = ja2 + e2hgrid->nj - 1;
1363 const int ka2 = e2hgrid->k0;
1364 const int kb2 = ka2 + e2hgrid->nk - 1;
1365 const int nrow_e2 = e2hgrid->ni;
1366 const int nstride_e2 = nrow_e2 * e2hgrid->nj;
1373 double *ezd = msm->
lzd + (-ka1);
1374 double *eyzd = msm->
lyzd + (-ka1*nj1 + -ja1);
1376 const size_t size_lzd = nk1 *
sizeof(double);
1377 const size_t size_lyzd = nj1 * nk1 *
sizeof(double);
1379 const double *phi = NULL;
1384 int index_plane_e2, index_e2;
1385 int index_jk, offset_k;
1390 const int diam_stencil = 2*r_stencil + 1;
1392 for (i2 = ia2; i2 <= ib2; i2++) {
1393 memset(msm->
lyzd, 0, size_lyzd);
1395 for (j2 = ja2; j2 <= jb2; j2++) {
1396 memset(msm->
lzd, 0, size_lzd);
1397 index_plane_e2 = j2 * nrow_e2 + i2;
1399 for (k2 = ka2; k2 <= kb2; k2++) {
1400 index_e2 = k2 * nstride_e2 + index_plane_e2;
1403 int lower = km - r_stencil;
1404 int upper = km + r_stencil;
1405 if (lower < ka1) lower = ka1;
1406 if (upper > kb1) upper = kb1;
1407 phi = phi_factored + r_stencil;
1408 for (k = lower; k <= upper; k++) {
1409 ezd[k] += phi[k-km] * e2h[index_e2];
1413 int kp = km - r_stencil;
1414 if (kp < ka1)
do { kp += nk1; }
while (kp < ka1);
1416 for (k = 0; k < diam_stencil; k++, kp++) {
1417 if (kp > kb1) kp = ka1;
1418 ezd[kp] += phi[k] * e2h[index_e2];
1423 for (k = ka1; k <= kb1; k++) {
1427 int lower = jm - r_stencil;
1428 int upper = jm + r_stencil;
1429 if (lower < ja1) lower = ja1;
1430 if (upper > jb1) upper = jb1;
1431 phi = phi_factored + r_stencil;
1432 for (j = lower; j <= upper; j++) {
1433 eyzd[offset + j] += phi[j-jm] * ezd[k];
1437 int jp = jm - r_stencil;
1438 if (jp < ja1)
do { jp += nj1; }
while (jp < ja1);
1440 for (j = 0; j < diam_stencil; j++, jp++) {
1441 if (jp > jb1) jp = ja1;
1442 eyzd[offset + jp] += phi[j] * ezd[k];
1449 for (k = ka1; k <= kb1; k++) {
1452 for (j = ja1; j <= jb1; j++) {
1453 index_jk = offset_k + j;
1454 offset = index_jk * ni1;
1457 int lower = im - r_stencil;
1458 int upper = im + r_stencil;
1459 if (lower < ia1) lower = ia1;
1460 if (upper > ib1) upper = ib1;
1461 phi = phi_factored + r_stencil;
1462 for (i = lower; i <= upper; i++) {
1463 eh[offset + i] += phi[i-im] * eyzd[index_jk];
1467 int ip = im - r_stencil;
1468 if (ip < ia1)
do { ip += ni1; }
while (ip < ia1);
1470 for (i = 0; i < diam_stencil; i++, ip++) {
1471 if (ip > ib1) ip = ia1;
1472 eh[offset + ip] += phi[i] * eyzd[index_jk];
1487 const NL_Msmgrid_double *qhgrid = &(msm->
qh[level]);
1488 const double *qh = qhgrid->data;
1489 NL_Msmgrid_double *q2hgrid = &(msm->
qh[level+1]);
1490 double *q2h_buffer = q2hgrid->buffer;
1493 const int ia1 = qhgrid->i0;
1494 const int ib1 = ia1 + qhgrid->ni - 1;
1495 const int ja1 = qhgrid->j0;
1496 const int jb1 = ja1 + qhgrid->nj - 1;
1497 const int ka1 = qhgrid->k0;
1498 const int kb1 = ka1 + qhgrid->nk - 1;
1499 const int ni1 = qhgrid->ni;
1500 const int nj1 = qhgrid->nj;
1501 const int nk1 = qhgrid->nk;
1504 const int ia2 = q2hgrid->i0;
1505 const int ib2 = ia2 + q2hgrid->ni - 1;
1506 const int ja2 = q2hgrid->j0;
1507 const int jb2 = ja2 + q2hgrid->nj - 1;
1508 const int ka2 = q2hgrid->k0;
1509 const int kb2 = ka2 + q2hgrid->nk - 1;
1519 double q2h_sum, cjk;
1521 int i1, j1, k1, index1, jk1off, k1off;
1526 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1528 for (j2 = ja2; j2 <= jb2; j2++) {
1530 for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1534 for (k = 0; k < nstencil; k++) {
1535 k1off = k1 + offset[k];
1537 if (ispz)
do { k1off += nk1; }
while (k1off < ka1);
1540 else if (k1off > kb1) {
1541 if (ispz)
do { k1off -= nk1; }
while (k1off > kb1);
1545 for (j = 0; j < nstencil; j++) {
1546 jk1off = j1 + offset[j];
1548 if (ispy)
do { jk1off += nj1; }
while (jk1off < ja1);
1551 else if (jk1off > jb1) {
1552 if (ispy)
do { jk1off -= nj1; }
while (jk1off > jb1);
1555 jk1off = (k1off + jk1off) * ni1;
1556 cjk = phi[j] * phi[k];
1557 for (i = 0; i < nstencil; i++) {
1558 index1 = i1 + offset[i];
1560 if (ispx)
do { index1 += ni1; }
while (index1 < ia1);
1563 else if (index1 > ib1) {
1564 if (ispx)
do { index1 -= ni1; }
while (index1 > ib1);
1568 q2h_sum += qh[index1] * phi[i] * cjk;
1573 q2h_buffer[index2] = q2h_sum;
1584 NL_Msmgrid_double *ehgrid = &(msm->
eh[level]);
1585 double *eh = ehgrid->data;
1586 const NL_Msmgrid_double *e2hgrid = &(msm->
eh[level+1]);
1587 const double *e2h_buffer = e2hgrid->buffer;
1590 const int ia1 = ehgrid->i0;
1591 const int ib1 = ia1 + ehgrid->ni - 1;
1592 const int ja1 = ehgrid->j0;
1593 const int jb1 = ja1 + ehgrid->nj - 1;
1594 const int ka1 = ehgrid->k0;
1595 const int kb1 = ka1 + ehgrid->nk - 1;
1596 const int ni1 = ehgrid->ni;
1597 const int nj1 = ehgrid->nj;
1598 const int nk1 = ehgrid->nk;
1601 const int ia2 = e2hgrid->i0;
1602 const int ib2 = ia2 + e2hgrid->ni - 1;
1603 const int ja2 = e2hgrid->j0;
1604 const int jb2 = ja2 + e2hgrid->nj - 1;
1605 const int ka2 = e2hgrid->k0;
1606 const int kb2 = ka2 + e2hgrid->nk - 1;
1618 int i1, j1, k1, index1, jk1off, k1off;
1623 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
1625 for (j2 = ja2; j2 <= jb2; j2++) {
1627 for (i2 = ia2; i2 <= ib2; i2++, index2++) {
1630 for (k = 0; k < nstencil; k++) {
1631 k1off = k1 + offset[k];
1633 if (ispz)
do { k1off += nk1; }
while (k1off < ka1);
1636 else if (k1off > kb1) {
1637 if (ispz)
do { k1off -= nk1; }
while (k1off > kb1);
1641 for (j = 0; j < nstencil; j++) {
1642 jk1off = j1 + offset[j];
1644 if (ispy)
do { jk1off += nj1; }
while (jk1off < ja1);
1647 else if (jk1off > jb1) {
1648 if (ispy)
do { jk1off -= nj1; }
while (jk1off > jb1);
1651 jk1off = (k1off + jk1off) * ni1;
1652 cjk = phi[j] * phi[k];
1653 for (i = 0; i < nstencil; i++) {
1654 index1 = i1 + offset[i];
1656 if (ispx)
do { index1 += ni1; }
while (index1 < ia1);
1659 else if (index1 > ib1) {
1660 if (ispx)
do { index1 -= ni1; }
while (index1 > ib1);
1664 eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
1682 const NL_Msmgrid_double *qhgrid = &(msm->
qh[level]);
1683 const double *qh = qhgrid->data;
1684 NL_Msmgrid_double *ehgrid = &(msm->
eh[level]);
1685 double *eh = ehgrid->data;
1686 const int ia = qhgrid->i0;
1687 const int ib = ia + qhgrid->ni - 1;
1688 const int ja = qhgrid->j0;
1689 const int jb = ja + qhgrid->nj - 1;
1690 const int ka = qhgrid->k0;
1691 const int kb = ka + qhgrid->nk - 1;
1692 const int ni = qhgrid->ni;
1693 const int nj = qhgrid->nj;
1694 const int nk = qhgrid->nk;
1697 const NL_Msmgrid_double *gcgrid = &(msm->
gc[level]);
1698 const double *gc = gcgrid->data;
1699 const int gia = gcgrid->i0;
1700 const int gib = gia + gcgrid->ni - 1;
1701 const int gja = gcgrid->j0;
1702 const int gjb = gja + gcgrid->nj - 1;
1703 const int gka = gcgrid->k0;
1704 const int gkb = gka + gcgrid->nk - 1;
1705 const int gni = gcgrid->ni;
1706 const int gnj = gcgrid->nj;
1712 const int ispnone = !(ispx || ispy || ispz);
1715 int gia_clip, gib_clip;
1716 int gja_clip, gjb_clip;
1717 int gka_clip, gkb_clip;
1722 long jknoff, nindex;
1723 int kgoff, jkgoff, ngindex;
1728 for (k = ka; k <= kb; k++) {
1731 gka_clip = (k + gka < ka ? ka - k : gka);
1732 gkb_clip = (k + gkb > kb ? kb - k : gkb);
1736 for (j = ja; j <= jb; j++) {
1739 gja_clip = (j + gja < ja ? ja - j : gja);
1740 gjb_clip = (j + gjb > jb ? jb - j : gjb);
1742 jkoff = (koff + j) * ni;
1744 for (i = ia; i <= ib; i++) {
1747 gia_clip = (i + gia < ia ? ia - i : gia);
1748 gib_clip = (i + gib > ib ? ib - i : gib);
1754 for (kd = gka_clip; kd <= gkb_clip; kd++) {
1755 knoff = (k + kd) * nj;
1758 for (jd = gja_clip; jd <= gjb_clip; jd++) {
1759 jknoff = (knoff + (j + jd)) * ni;
1760 jkgoff = (kgoff + jd) * gni;
1762 for (
id = gia_clip;
id <= gib_clip;
id++) {
1763 nindex = jknoff + (i + id);
1764 ngindex = jkgoff + id;
1772 eh_sum += qh[nindex] * gc[ngindex];
1791 for (k = ka; k <= kb; k++) {
1795 gka_clip = (k + gka < ka ? ka - k : gka);
1796 gkb_clip = (k + gkb > kb ? kb - k : gkb);
1797 if (klo < ka) klo = ka;
1802 if (klo < ka)
do { klo += nk; }
while (klo < ka);
1808 for (j = ja; j <= jb; j++) {
1812 gja_clip = (j + gja < ja ? ja - j : gja);
1813 gjb_clip = (j + gjb > jb ? jb - j : gjb);
1814 if (jlo < ja) jlo = ja;
1819 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
1823 jkoff = (koff + j) * ni;
1825 for (i = ia; i <= ib; i++) {
1829 gia_clip = (i + gia < ia ? ia - i : gia);
1830 gib_clip = (i + gib > ib ? ib - i : gib);
1831 if (ilo < ia) ilo = ia;
1836 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
1844 for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
1846 if (kp > kb) kp = ka;
1850 for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
1852 if (jp > jb) jp = ja;
1853 jknoff = (knoff + jp) * ni;
1854 jkgoff = (kgoff + jd) * gni;
1856 for (
id = gia_clip, ip = ilo;
id <= gib_clip;
id++, ip++) {
1858 if (ip > ib) ip = ia;
1859 nindex = jknoff + ip;
1860 ngindex = jkgoff + id;
1868 eh_sum += qh[nindex] * gc[ngindex];
double wkf_timer_time(wkf_timerhandle v)
static int prolongation_factored(NL_Msm *, int level)
static int restriction_factored(NL_Msm *, int level)
static const int Nstencil[NL_MSM_APPROX_END]
static int anterpolation(NL_Msm *)
#define GRID_INDEX_CHECK(a, _i, _j, _k)
static const double PhiStencilFactored[NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1]
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
void wkf_timer_stop(wkf_timerhandle v)
static int restriction(NL_Msm *, int level)
#define GRID_INDEX(_p, _i, _j, _k)
wkf_timerhandle timer_longrng
int NL_msm_compute_long_range(NL_Msm *pm)
#define STENCIL_1D(_phi, _delta, _approx)
static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
static int interpolation(NL_Msm *)
static int gridcutoff(NL_Msm *, int level)
void wkf_timer_start(wkf_timerhandle v)
static const int PolyDegree[NL_MSM_APPROX_END]
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
static int prolongation(NL_Msm *, int level)