00001
00041 #include "msm_defn.h"
00042
00043 static int anterpolation(NL_Msm *);
00044 static int interpolation(NL_Msm *);
00045 static int restriction(NL_Msm *, int level);
00046 static int prolongation(NL_Msm *, int level);
00047 static int restriction_factored(NL_Msm *, int level);
00048 static int prolongation_factored(NL_Msm *, int level);
00049 static int gridcutoff(NL_Msm *, int level);
00050
00051
00052 int NL_msm_compute_long_range(NL_Msm *msm) {
00053 double time_delta;
00054 int rc = 0;
00055 int level;
00056 int nlevels = msm->nlevels;
00057 int use_cuda_gridcutoff = (msm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF);
00058 int fallback_cpu = (msm->msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK);
00059 int use_nonfactored = (msm->msmflags & NL_MSM_COMPUTE_NONFACTORED);
00060
00061 wkf_timer_start(msm->timer_longrng);
00062 rc = anterpolation(msm);
00063 if (rc) return rc;
00064 wkf_timer_stop(msm->timer_longrng);
00065 time_delta = wkf_timer_time(msm->timer_longrng);
00066 if (msm->report_timings) {
00067 printf("MSM anterpolation: %6.3f sec\n", time_delta);
00068 }
00069
00070 wkf_timer_start(msm->timer_longrng);
00071 if (use_nonfactored) {
00072 for (level = 0; level < nlevels - 1; level++) {
00073 rc = restriction(msm, level);
00074 if (rc) return rc;
00075 }
00076 }
00077 else {
00078 for (level = 0; level < nlevels - 1; level++) {
00079 rc = restriction_factored(msm, level);
00080 if (rc) return rc;
00081 }
00082 }
00083 wkf_timer_stop(msm->timer_longrng);
00084 time_delta = wkf_timer_time(msm->timer_longrng);
00085 if (msm->report_timings) {
00086 printf("MSM restriction: %6.3f sec\n", time_delta);
00087 }
00088
00089 wkf_timer_start(msm->timer_longrng);
00090 if (use_cuda_gridcutoff && nlevels > 1) {
00091 #ifdef NL_USE_CUDA
00092 if ((rc = NL_msm_cuda_condense_qgrids(msm)) != NL_MSM_SUCCESS ||
00093 (rc = NL_msm_cuda_compute_gridcutoff(msm)) != NL_MSM_SUCCESS ||
00094 (rc = NL_msm_cuda_expand_egrids(msm)) != NL_MSM_SUCCESS) {
00095 if (fallback_cpu) {
00096 printf("Falling back on CPU for grid cutoff computation\n");
00097 use_cuda_gridcutoff = 0;
00098 }
00099 else return rc;
00100 }
00101 #else
00102 if (fallback_cpu) {
00103 printf("Falling back on CPU for grid cutoff computation\n");
00104 use_cuda_gridcutoff = 0;
00105 }
00106 else return NL_MSM_ERROR_SUPPORT;
00107 #endif
00108 }
00109
00110 if ( ! use_cuda_gridcutoff ) {
00111 for (level = 0; level < nlevels - 1; level++) {
00112 rc = gridcutoff(msm, level);
00113 if (rc) return rc;
00114 }
00115 }
00116
00117 rc = gridcutoff(msm, level);
00118 if (rc) return rc;
00119
00120 wkf_timer_stop(msm->timer_longrng);
00121 time_delta = wkf_timer_time(msm->timer_longrng);
00122 if (msm->report_timings) {
00123 printf("MSM grid cutoff: %6.3f sec\n", time_delta);
00124 }
00125
00126 wkf_timer_start(msm->timer_longrng);
00127 if (use_nonfactored) {
00128 for (level--; level >= 0; level--) {
00129 rc = prolongation(msm, level);
00130 if (rc) return rc;
00131 }
00132 }
00133 else {
00134 for (level--; level >= 0; level--) {
00135 rc = prolongation_factored(msm, level);
00136 if (rc) return rc;
00137 }
00138 }
00139 wkf_timer_stop(msm->timer_longrng);
00140 time_delta = wkf_timer_time(msm->timer_longrng);
00141 if (msm->report_timings) {
00142 printf("MSM prolongation: %6.3f sec\n", time_delta);
00143 }
00144
00145 wkf_timer_start(msm->timer_longrng);
00146 rc = interpolation(msm);
00147 if (rc) return rc;
00148 wkf_timer_stop(msm->timer_longrng);
00149 time_delta = wkf_timer_time(msm->timer_longrng);
00150 if (msm->report_timings) {
00151 printf("MSM interpolation: %6.3f sec\n", time_delta);
00152 }
00153
00154 return 0;
00155 }
00156
00157
00159 enum { MAX_POLY_DEGREE = 9 };
00160
00163 static const int PolyDegree[NL_MSM_APPROX_END] = {
00164 3, 5, 5, 7, 7, 9, 9, 3,
00165 };
00166
00169 static const double
00170 PhiStencilFactored[NL_MSM_APPROX_END][2*MAX_POLY_DEGREE+1] = {
00171
00172 {-1./16, 0, 9./16, 1, 9./16, 0, -1./16},
00173
00174
00175 {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
00176
00177
00178 {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
00179
00180
00181 { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
00182 0, -245./2048, 0, 49./2048, 0, -5./2048 },
00183
00184
00185 { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
00186 0, -245./2048, 0, 49./2048, 0, -5./2048 },
00187
00188
00189 { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
00190 19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
00191 -405./65536, 0, 35./65536 },
00192
00193
00194 { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
00195 19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
00196 -405./65536, 0, 35./65536 },
00197
00198
00199 { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
00200 };
00201
00202
00205 enum { MAX_NSTENCIL = 11 };
00206
00208 static const int Nstencil[NL_MSM_APPROX_END] = {
00209 5, 7, 7, 9, 9, 11, 11, 7
00210 };
00211
00214 static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
00215
00216 {-3, -1, 0, 1, 3},
00217
00218
00219 {-5, -3, -1, 0, 1, 3, 5},
00220
00221
00222 {-5, -3, -1, 0, 1, 3, 5},
00223
00224
00225 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
00226
00227
00228 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
00229
00230
00231 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
00232
00233
00234 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
00235
00236
00237 {-3, -2, -1, 0, 1, 2, 3},
00238 };
00239
00242 static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
00243
00244 {-1./16, 9./16, 1, 9./16, -1./16},
00245
00246
00247 {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
00248
00249
00250 {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
00251
00252
00253 { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
00254 -245./2048, 49./2048, -5./2048 },
00255
00256
00257 { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
00258 -245./2048, 49./2048, -5./2048 },
00259
00260
00261 { 35./65536, -405./65536, 567./16384, -2205./16384,
00262 19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
00263 -405./65536, 35./65536 },
00264
00265
00266 { 35./65536, -405./65536, 567./16384, -2205./16384,
00267 19845./32768, 1, 19845./32768, -2205./16384, 567./16384,
00268 -405./65536, 35./65536 },
00269
00270
00271 { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
00272 };
00273
00274
00280 #define STENCIL_1D(_phi, _delta, _approx) \
00281 do { \
00282 double *phi = _phi; \
00283 double t = _delta; \
00284 switch (_approx) { \
00285 case NL_MSM_APPROX_CUBIC: \
00286 phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
00287 t--; \
00288 phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
00289 t--; \
00290 phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
00291 t--; \
00292 phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
00293 break; \
00294 case NL_MSM_APPROX_QUINTIC: \
00295 phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
00296 t--; \
00297 phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
00298 t--; \
00299 phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
00300 t--; \
00301 phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
00302 t--; \
00303 phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
00304 t--; \
00305 phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
00306 break; \
00307 case NL_MSM_APPROX_QUINTIC2: \
00308 phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
00309 t--; \
00310 phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
00311 t--; \
00312 phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
00313 t--; \
00314 phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
00315 t--; \
00316 phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
00317 t--; \
00318 phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
00319 break; \
00320 case NL_MSM_APPROX_SEPTIC: \
00321 phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
00322 t--; \
00323 phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
00324 t--; \
00325 phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
00326 t--; \
00327 phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
00328 t--; \
00329 phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
00330 t--; \
00331 phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
00332 t--; \
00333 phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
00334 t--; \
00335 phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
00336 break; \
00337 case NL_MSM_APPROX_SEPTIC3: \
00338 phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
00339 + t*((26383./144) + t*((-22807./720) + t*((727./240) \
00340 + t*(-89./720))))))); \
00341 t--; \
00342 phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
00343 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
00344 + t*(623./720))))))); \
00345 t--; \
00346 phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
00347 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
00348 + t*(-623./240))))))); \
00349 t--; \
00350 phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
00351 + t*((-727./48) + t*(623./144))))); \
00352 t--; \
00353 phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
00354 + t*((-727./48) + t*(-623./144))))); \
00355 t--; \
00356 phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
00357 + t*((4557./16) + t*((9583./80) + t*((2181./80) \
00358 + t*(623./240))))))); \
00359 t--; \
00360 phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
00361 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
00362 + t*(-623./720))))))); \
00363 t--; \
00364 phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
00365 + t*((26383./144) + t*((22807./720) + t*((727./240) \
00366 + t*(89./720))))))); \
00367 break; \
00368 case NL_MSM_APPROX_NONIC: \
00369 phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
00370 (t-2)*(t-1); \
00371 t--; \
00372 phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
00373 (-8+t*(-35+9*t)); \
00374 t--; \
00375 phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
00376 (-14+t*(-25+9*t)); \
00377 t--; \
00378 phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
00379 (-6+t*(-5+3*t)); \
00380 t--; \
00381 phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
00382 (-20+t*(-5+9*t)); \
00383 t--; \
00384 phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
00385 (-20+t*(5+9*t)); \
00386 t--; \
00387 phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
00388 (-6+t*(5+3*t)); \
00389 t--; \
00390 phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
00391 (-14+t*(25+9*t)); \
00392 t--; \
00393 phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
00394 (-8+t*(35+9*t)); \
00395 t--; \
00396 phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
00397 (t+7)*(t+8); \
00398 break; \
00399 case NL_MSM_APPROX_NONIC4: \
00400 phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
00401 +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
00402 +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
00403 +t*(-3569./40320))))))))); \
00404 t--; \
00405 phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
00406 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
00407 +t*(156927./448+t*(-28909./1152 \
00408 +t*(3569./4480))))))))); \
00409 t--; \
00410 phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
00411 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
00412 +t*(-19959./28+t*(144545./2016 \
00413 +t*(-3569./1120))))))))); \
00414 t--; \
00415 phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
00416 +t*(-576499./96+t*(686147./160+t*(-96277./48 \
00417 +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
00418 t--; \
00419 phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
00420 +t*(6337./96+t*(-2745./32+t*(28909./576 \
00421 +t*(-3569./320))))))); \
00422 t--; \
00423 phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
00424 +t*(6337./96+t*(2745./32+t*(28909./576 \
00425 +t*(3569./320))))))); \
00426 t--; \
00427 phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
00428 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
00429 +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
00430 t--; \
00431 phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
00432 +t*(3548009./96+t*(2422263./160+t*(197255./48 \
00433 +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
00434 t--; \
00435 phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
00436 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
00437 +t*(-156927./448+t*(-28909./1152 \
00438 +t*(-3569./4480))))))))); \
00439 t--; \
00440 phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
00441 +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
00442 +t*(129391./192+t*(259715./4032+t*(28909./8064 \
00443 +t*(3569./40320))))))))); \
00444 break; \
00445 case NL_MSM_APPROX_BSPLINE: \
00446 phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
00447 t--; \
00448 phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
00449 t--; \
00450 phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
00451 t--; \
00452 phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
00453 break; \
00454 default: \
00455 return NL_MSM_ERROR_SUPPORT; \
00456 } \
00457 } while (0)
00458
00459
00460
00468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \
00469 do { \
00470 double *dphi = _dphi; \
00471 double *phi = _phi; \
00472 double h_1 = _h_1; \
00473 double t = _delta; \
00474 switch (_approx) { \
00475 case NL_MSM_APPROX_CUBIC: \
00476 phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
00477 dphi[0] = (1.5 * t - 2) * (2 - t) * h_1; \
00478 t--; \
00479 phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
00480 dphi[1] = (-5 + 4.5 * t) * t * h_1; \
00481 t--; \
00482 phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
00483 dphi[2] = (-5 - 4.5 * t) * t * h_1; \
00484 t--; \
00485 phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
00486 dphi[3] = (1.5 * t + 2) * (2 + t) * h_1; \
00487 break; \
00488 case NL_MSM_APPROX_QUINTIC: \
00489 phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
00490 dphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \
00491 + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \
00492 t--; \
00493 phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \
00494 dphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))\
00495 + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1; \
00496 t--; \
00497 phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \
00498 dphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t)) \
00499 + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1; \
00500 t--; \
00501 phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \
00502 dphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t)) \
00503 - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1; \
00504 t--; \
00505 phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \
00506 dphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t)) \
00507 - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1; \
00508 t--; \
00509 phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
00510 dphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \
00511 + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \
00512 break; \
00513 case NL_MSM_APPROX_QUINTIC2: \
00514 phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
00515 dphi[0] = ((1./24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \
00516 + 5*(t-2)*(3-t))) * h_1; \
00517 t--; \
00518 phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
00519 dphi[1] = ((-1./24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \
00520 (-48+t*(153+t*(-114+t*25))) \
00521 + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \
00522 t--; \
00523 phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
00524 dphi[2] = ((1./12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \
00525 + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \
00526 t--; \
00527 phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
00528 dphi[3] = ((1./12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \
00529 + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \
00530 t--; \
00531 phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
00532 dphi[4] = ((-1./24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \
00533 (48+t*(153+t*(114+t*25))) \
00534 + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \
00535 t--; \
00536 phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
00537 dphi[5] = ((1./24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \
00538 + 5*(t+2)*(3+t))) * h_1; \
00539 break; \
00540 case NL_MSM_APPROX_SEPTIC: \
00541 phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
00542 dphi[0] = (-1./720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \
00543 +t*(-122+t*7))))) * h_1; \
00544 t--; \
00545 phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
00546 dphi[1] = (1./720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \
00547 +t*(-750+t*49)))))) * h_1; \
00548 t--; \
00549 phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
00550 dphi[2] = (-1./240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \
00551 +t*(-450+t*49)))))) * h_1; \
00552 t--; \
00553 phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
00554 dphi[3] = (1./144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \
00555 h_1; \
00556 t--; \
00557 phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
00558 dphi[4] = (-1./144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \
00559 h_1; \
00560 t--; \
00561 phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
00562 dphi[5] = (1./240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \
00563 +t*(450+t*49)))))) * h_1; \
00564 t--; \
00565 phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
00566 dphi[6] = (-1./720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \
00567 +t*(750+t*49)))))) * h_1; \
00568 t--; \
00569 phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
00570 dphi[7] = (1./720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \
00571 +t*(122+t*7))))) * h_1; \
00572 break; \
00573 case NL_MSM_APPROX_SEPTIC3: \
00574 phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \
00575 + t*((26383./144) + t*((-22807./720) + t*((727./240) \
00576 + t*(-89./720))))))); \
00577 dphi[0] = ((-7456./5) + t*((117572./45) + t*(-1899 + t*((26383./36) \
00578 + t*((-22807./144) + t*((727./40) + t*(-623./720)))))))*h_1; \
00579 t--; \
00580 phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \
00581 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \
00582 + t*(623./720))))))); \
00583 dphi[1] = ((25949./20) + t*((-117131./36) + t*((6741./2) \
00584 + t*((-66437./36) + t*((81109./144) + t*((-727./8) \
00585 + t*(4361./720))))))) * h_1; \
00586 t--; \
00587 phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \
00588 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \
00589 + t*(-623./240))))))); \
00590 dphi[2] = ((-8617./60) + t*((12873./20) + t*((-2373./2) + t*((4557./4) \
00591 + t*((-9583./16) + t*((6543./40) + t*(-4361./240)))))))*h_1; \
00592 t--; \
00593 phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \
00594 + t*((-727./48) + t*(623./144))))); \
00595 dphi[3] = (t*((-49./18) + t*t*((-959./36) + t*((12845./144) \
00596 + t*((-727./8) + t*(4361./144)))))) * h_1; \
00597 t--; \
00598 phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \
00599 + t*((-727./48) + t*(-623./144))))); \
00600 dphi[4] = (t*((-49./18) + t*t*((-959./36) + t*((-12845./144) \
00601 + t*((-727./8) + t*(-4361./144)))))) * h_1; \
00602 t--; \
00603 phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \
00604 + t*((4557./16) + t*((9583./80) + t*((2181./80) \
00605 + t*(623./240))))))); \
00606 dphi[5] = ((8617./60) + t*((12873./20) + t*((2373./2) + t*((4557./4) \
00607 + t*((9583./16) + t*((6543./40) + t*(4361./240)))))))*h_1; \
00608 t--; \
00609 phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \
00610 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \
00611 + t*(-623./720))))))); \
00612 dphi[6] = ((-25949./20) + t*((-117131./36) + t*((-6741./2) \
00613 + t*((-66437./36) + t*((-81109./144) + t*((-727./8) \
00614 + t*(-4361./720))))))) * h_1; \
00615 t--; \
00616 phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \
00617 + t*((26383./144) + t*((22807./720) + t*((727./240) \
00618 + t*(89./720))))))); \
00619 dphi[7] = ((7456./5) + t*((117572./45) + t*(1899 + t*((26383./36) \
00620 + t*((22807./144) + t*((727./40) + t*(623./720)))))))*h_1; \
00621 break; \
00622 case NL_MSM_APPROX_NONIC: \
00623 phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
00624 (t-2)*(t-1); \
00625 dphi[0] = (-1./40320)*(t-5)*(-117648+t*(256552+t*(-221416 \
00626 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \
00627 t--; \
00628 phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
00629 (-8+t*(-35+9*t)); \
00630 dphi[1] = (1./40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \
00631 +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \
00632 t--; \
00633 phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
00634 (-14+t*(-25+9*t)); \
00635 dphi[2] = (1./10080)*(3384+t*(-69080+t*(55026 \
00636 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \
00637 +t*(-81)))))))))*h_1; \
00638 t--; \
00639 phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
00640 (-6+t*(-5+3*t)); \
00641 dphi[3] = (1./1440)*(72+t*(-6344+t*(2070 \
00642 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \
00643 t--; \
00644 phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
00645 (-20+t*(-5+9*t)); \
00646 dphi[4] = (-1./2880)*t*(10792+t*(-972+t*(-12516 \
00647 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \
00648 t--; \
00649 phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
00650 (-20+t*(5+9*t)); \
00651 dphi[5] = (1./2880)*t*(-10792+t*(-972+t*(12516 \
00652 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \
00653 t--; \
00654 phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
00655 (-6+t*(5+3*t)); \
00656 dphi[6] = (1./1440)*(-72+t*(-6344+t*(-2070 \
00657 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \
00658 t--; \
00659 phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
00660 (-14+t*(25+9*t)); \
00661 dphi[7] = (1./10080)*(-3384+t*(-69080+t*(-55026 \
00662 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \
00663 t--; \
00664 phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
00665 (-8+t*(35+9*t)); \
00666 dphi[8] = (-1./40320)*(71856+t*(795368+t*(1569240 \
00667 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \
00668 +t*81))))))))*h_1; \
00669 t--; \
00670 phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
00671 (t+7)*(t+8); \
00672 dphi[9] = (1./40320)*(t+5)*(117648+t*(256552+t*(221416 \
00673 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \
00674 break; \
00675 case NL_MSM_APPROX_NONIC4: \
00676 phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \
00677 +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \
00678 +t*(129391./192+t*(-259715./4032+t*(28909./8064 \
00679 +t*(-3569./40320))))))))); \
00680 dphi[0] = (-64188125./504+t*(231125375./1008 \
00681 +t*(-17306975./96+t*(7761805./96+t*(-2895587./128 \
00682 +t*(129391./32+t*(-259715./576+t*(28909./1008 \
00683 +t*(-3569./4480)))))))))*h_1; \
00684 t--; \
00685 phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \
00686 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \
00687 +t*(156927./448+t*(-28909./1152 \
00688 +t*(3569./4480))))))))); \
00689 dphi[1] = (8314091./56+t*(-49901303./144+t*(11290587./32 \
00690 +t*(-19648027./96+t*(9469163./128+t*(-545977./32 \
00691 +t*(156927./64+t*(-28909./144 \
00692 +t*(32121./4480)))))))))*h_1; \
00693 t--; \
00694 phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \
00695 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \
00696 +t*(-19959./28+t*(144545./2016 \
00697 +t*(-3569./1120))))))))); \
00698 dphi[2] = (-1038011./28+t*(31157515./252+t*(-2870007./16 \
00699 +t*(3548009./24+t*(-2422263./32+t*(197255./8 \
00700 +t*(-19959./4+t*(144545./252 \
00701 +t*(-32121./1120)))))))))*h_1; \
00702 t--; \
00703 phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \
00704 +t*(-576499./96+t*(686147./160+t*(-96277./48 \
00705 +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \
00706 dphi[3] = (12757./12+t*(-230123./36+t*(264481./16 \
00707 +t*(-576499./24+t*(686147./32+t*(-96277./8 \
00708 +t*(99547./24+t*(-28909./36 \
00709 +t*(10707./160)))))))))*h_1; \
00710 t--; \
00711 phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \
00712 +t*(6337./96+t*(-2745./32+t*(28909./576 \
00713 +t*(-3569./320))))))); \
00714 dphi[4] = t*(-205./72+t*t*(91./48+t*(-6181./64 \
00715 +t*(6337./16+t*(-19215./32+t*(28909./72 \
00716 +t*(-32121./320)))))))*h_1; \
00717 t--; \
00718 phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \
00719 +t*(6337./96+t*(2745./32+t*(28909./576 \
00720 +t*(3569./320))))))); \
00721 dphi[5] = t*(-205./72+t*t*(91./48+t*(6181./64 \
00722 +t*(6337./16+t*(19215./32+t*(28909./72 \
00723 +t*(32121./320)))))))*h_1; \
00724 t--; \
00725 phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \
00726 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \
00727 +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \
00728 dphi[6] = (-12757./12+t*(-230123./36+t*(-264481./16 \
00729 +t*(-576499./24+t*(-686147./32+t*(-96277./8 \
00730 +t*(-99547./24+t*(-28909./36 \
00731 +t*(-10707./160)))))))))*h_1; \
00732 t--; \
00733 phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \
00734 +t*(3548009./96+t*(2422263./160+t*(197255./48 \
00735 +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \
00736 dphi[7] = (1038011./28+t*(31157515./252+t*(2870007./16 \
00737 +t*(3548009./24+t*(2422263./32+t*(197255./8 \
00738 +t*(19959./4+t*(144545./252 \
00739 +t*(32121./1120)))))))))*h_1; \
00740 t--; \
00741 phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \
00742 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \
00743 +t*(-156927./448+t*(-28909./1152 \
00744 +t*(-3569./4480))))))))); \
00745 dphi[8] = (-8314091./56+t*(-49901303./144+t*(-11290587./32 \
00746 +t*(-19648027./96+t*(-9469163./128+t*(-545977./32 \
00747 +t*(-156927./64+t*(-28909./144 \
00748 +t*(-32121./4480)))))))))*h_1; \
00749 t--; \
00750 phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \
00751 +t*(17306975./288+t*(7761805./384+t*(2895587./640 \
00752 +t*(129391./192+t*(259715./4032+t*(28909./8064 \
00753 +t*(3569./40320))))))))); \
00754 dphi[9] = (64188125./504+t*(231125375./1008 \
00755 +t*(17306975./96+t*(7761805./96+t*(2895587./128 \
00756 +t*(129391./32+t*(259715./576+t*(28909./1008 \
00757 +t*(3569./4480)))))))))*h_1; \
00758 break; \
00759 case NL_MSM_APPROX_BSPLINE: \
00760 phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \
00761 dphi[0] = -0.5 * (2-t) * (2-t) * h_1; \
00762 t--; \
00763 phi[1] = (2./3) + t*t*(-1 + 0.5*t); \
00764 dphi[1] = t*(-2 + 1.5*t) * h_1; \
00765 t--; \
00766 phi[2] = (2./3) + t*t*(-1 - 0.5*t); \
00767 dphi[2] = t*(-2 - 1.5*t) * h_1; \
00768 t--; \
00769 phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \
00770 dphi[3] = 0.5 * (2+t) * (2+t) * h_1; \
00771 break; \
00772 default: \
00773 return NL_MSM_ERROR_SUPPORT; \
00774 } \
00775 } while (0)
00776
00777
00778
00779 int anterpolation(NL_Msm *msm)
00780 {
00781 const double *atom = msm->atom;
00782 const int natoms = msm->numatoms;
00783
00784 double xphi[MAX_POLY_DEGREE+1];
00785 double yphi[MAX_POLY_DEGREE+1];
00786 double zphi[MAX_POLY_DEGREE+1];
00787 double rx_hx, ry_hy, rz_hz;
00788 #ifndef TEST_INLINING
00789 double delta;
00790 #else
00791 double t;
00792 #endif
00793 double ck, cjk;
00794 const double hx_1 = 1/msm->hx;
00795 const double hy_1 = 1/msm->hy;
00796 const double hz_1 = 1/msm->hz;
00797 const double xm0 = msm->gx;
00798 const double ym0 = msm->gy;
00799 const double zm0 = msm->gz;
00800 double q;
00801
00802 NL_Msmgrid_double *qhgrid = &(msm->qh[0]);
00803 double *qh = qhgrid->data;
00804 const int ni = qhgrid->ni;
00805 const int nj = qhgrid->nj;
00806 const int nk = qhgrid->nk;
00807 const int ia = qhgrid->i0;
00808 const int ib = ia + ni - 1;
00809 const int ja = qhgrid->j0;
00810 const int jb = ja + nj - 1;
00811 const int ka = qhgrid->k0;
00812 const int kb = ka + nk - 1;
00813
00814 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
00815 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
00816 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
00817 int iswithin;
00818
00819 int n, i, j, k, ilo, jlo, klo, koff;
00820 int jkoff, index;
00821
00822 const int approx = msm->approx;
00823 const int s_size = PolyDegree[approx] + 1;
00824 const int s_edge = (PolyDegree[approx] - 1) >> 1;
00825
00826 GRID_ZERO(qhgrid);
00827
00828 for (n = 0; n < natoms; n++) {
00829
00830
00831 q = atom[4*n + 3];
00832 if (0==q) continue;
00833
00834
00835 rx_hx = (atom[4*n ] - xm0) * hx_1;
00836 ry_hy = (atom[4*n + 1] - ym0) * hy_1;
00837 rz_hz = (atom[4*n + 2] - zm0) * hz_1;
00838
00839
00840 ilo = (int) floor(rx_hx) - s_edge;
00841 jlo = (int) floor(ry_hy) - s_edge;
00842 klo = (int) floor(rz_hz) - s_edge;
00843
00844
00845 #ifndef TEST_INLINING
00846 delta = rx_hx - (double) ilo;
00847 STENCIL_1D(xphi, delta, approx);
00848 delta = ry_hy - (double) jlo;
00849 STENCIL_1D(yphi, delta, approx);
00850 delta = rz_hz - (double) klo;
00851 STENCIL_1D(zphi, delta, approx);
00852 #else
00853 t = rx_hx - (double) ilo;
00854 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
00855 t--; \
00856 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
00857 t--; \
00858 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
00859 t--; \
00860 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
00861
00862 t = ry_hy - (double) jlo;
00863 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
00864 t--; \
00865 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
00866 t--; \
00867 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
00868 t--; \
00869 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
00870
00871 t = rz_hz - (double) klo;
00872 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
00873 t--; \
00874 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
00875 t--; \
00876 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
00877 t--; \
00878 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
00879
00880 #endif
00881
00882
00883 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
00884 ja <= jlo && (jlo+(s_size-1)) <= jb &&
00885 ka <= klo && (klo+(s_size-1)) <= kb );
00886
00887 if ( iswithin ) {
00888
00889
00890 for (k = 0; k < s_size; k++) {
00891 koff = (k + klo) * nj;
00892 ck = zphi[k] * q;
00893 for (j = 0; j < s_size; j++) {
00894 jkoff = (koff + (j + jlo)) * ni;
00895 cjk = yphi[j] * ck;
00896 for (i = 0; i < s_size; i++) {
00897 index = jkoff + (i + ilo);
00898 GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo);
00899 ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index);
00900 qh[index] += xphi[i] * cjk;
00901 }
00902 }
00903 }
00904 }
00905
00906 else {
00907 int ip, jp, kp;
00908
00909
00910 if (ispx) {
00911 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
00912 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
00913 }
00914 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
00915 return NL_MSM_ERROR_RANGE;
00916 }
00917
00918 if (ispy) {
00919 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
00920 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
00921 }
00922 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
00923 return NL_MSM_ERROR_RANGE;
00924 }
00925
00926 if (ispz) {
00927 if (klo < ka) do { klo += nk; } while (klo < ka);
00928 else if (klo > kb) do { klo -= nk; } while (klo > kb);
00929 }
00930 else if (klo < ka || (klo+(s_size-1)) > kb) {
00931 return NL_MSM_ERROR_RANGE;
00932 }
00933
00934
00935 for (k = 0, kp = klo; k < s_size; k++, kp++) {
00936 if (kp > kb) kp = ka;
00937 koff = kp * nj;
00938 ck = zphi[k] * q;
00939 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
00940 if (jp > jb) jp = ja;
00941 jkoff = (koff + jp) * ni;
00942 cjk = yphi[j] * ck;
00943 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
00944 if (ip > ib) ip = ia;
00945 index = jkoff + ip;
00946 GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
00947 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index);
00948 qh[index] += xphi[i] * cjk;
00949 }
00950 }
00951 }
00952 }
00953
00954 }
00955
00956 return NL_MSM_SUCCESS;
00957 }
00958
00959
00960 int interpolation(NL_Msm *msm)
00961 {
00962 double *f = msm->felec;
00963 const double *atom = msm->atom;
00964 const int natoms = msm->numatoms;
00965
00966 double xphi[MAX_POLY_DEGREE+1];
00967 double yphi[MAX_POLY_DEGREE+1];
00968 double zphi[MAX_POLY_DEGREE+1];
00969 double dxphi[MAX_POLY_DEGREE+1];
00970 double dyphi[MAX_POLY_DEGREE+1];
00971 double dzphi[MAX_POLY_DEGREE+1];
00972 double rx_hx, ry_hy, rz_hz;
00973 #ifndef TEST_INLINING
00974 double delta;
00975 #else
00976 double t;
00977 #endif
00978 const double hx_1 = 1/msm->hx;
00979 const double hy_1 = 1/msm->hy;
00980 const double hz_1 = 1/msm->hz;
00981 const double xm0 = msm->gx;
00982 const double ym0 = msm->gy;
00983 const double zm0 = msm->gz;
00984 double q;
00985 double u = 0;
00986
00987 const NL_Msmgrid_double *ehgrid = &(msm->eh[0]);
00988 const double *eh = ehgrid->data;
00989 const double *ebuf = ehgrid->buffer;
00990 const NL_Msmgrid_double *qhgrid = &(msm->qh[0]);
00991 const double *qbuf = qhgrid->buffer;
00992 const int ni = ehgrid->ni;
00993 const int nj = ehgrid->nj;
00994 const int nk = ehgrid->nk;
00995 const int ia = ehgrid->i0;
00996 const int ib = ia + ni - 1;
00997 const int ja = ehgrid->j0;
00998 const int jb = ja + nj - 1;
00999 const int ka = ehgrid->k0;
01000 const int kb = ka + nk - 1;
01001
01002 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01003 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01004 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01005 int iswithin;
01006
01007 double fx, fy, fz, cx, cy, cz;
01008
01009 int n, i, j, k, ilo, jlo, klo, koff;
01010 long jkoff, index;
01011 const int nn = (ni*nj) * nk;
01012
01013 const int approx = msm->approx;
01014 const int s_size = PolyDegree[approx] + 1;
01015 const int s_edge = (PolyDegree[approx] - 1) >> 1;
01016
01017 for (n = 0; n < natoms; n++) {
01018
01019
01020 q = atom[4*n + 3];
01021 if (0==q) continue;
01022
01023
01024 rx_hx = (atom[4*n ] - xm0) * hx_1;
01025 ry_hy = (atom[4*n + 1] - ym0) * hy_1;
01026 rz_hz = (atom[4*n + 2] - zm0) * hz_1;
01027
01028
01029 ilo = (int) floor(rx_hx) - s_edge;
01030 jlo = (int) floor(ry_hy) - s_edge;
01031 klo = (int) floor(rz_hz) - s_edge;
01032
01033
01034 #ifndef TEST_INLINING
01035 delta = rx_hx - (double) ilo;
01036
01037 D_STENCIL_1D(dxphi, xphi, hx_1, delta, approx);
01038 delta = ry_hy - (double) jlo;
01039
01040 D_STENCIL_1D(dyphi, yphi, hy_1, delta, approx);
01041 delta = rz_hz - (double) klo;
01042
01043 D_STENCIL_1D(dzphi, zphi, hz_1, delta, approx);
01044 #else
01045 t = rx_hx - (double) ilo;
01046 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
01047 dxphi[0] = (1.5 * t - 2) * (2 - t) * hx_1; \
01048 t--; \
01049 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
01050 dxphi[1] = (-5 + 4.5 * t) * t * hx_1; \
01051 t--; \
01052 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
01053 dxphi[2] = (-5 - 4.5 * t) * t * hx_1; \
01054 t--; \
01055 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
01056 dxphi[3] = (1.5 * t + 2) * (2 + t) * hx_1; \
01057
01058 t = ry_hy - (double) jlo;
01059 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
01060 dyphi[0] = (1.5 * t - 2) * (2 - t) * hy_1; \
01061 t--; \
01062 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
01063 dyphi[1] = (-5 + 4.5 * t) * t * hy_1; \
01064 t--; \
01065 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
01066 dyphi[2] = (-5 - 4.5 * t) * t * hy_1; \
01067 t--; \
01068 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
01069 dyphi[3] = (1.5 * t + 2) * (2 + t) * hy_1; \
01070
01071 t = rz_hz - (double) klo;
01072 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
01073 dzphi[0] = (1.5 * t - 2) * (2 - t) * hz_1; \
01074 t--; \
01075 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
01076 dzphi[1] = (-5 + 4.5 * t) * t * hz_1; \
01077 t--; \
01078 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
01079 dzphi[2] = (-5 - 4.5 * t) * t * hz_1; \
01080 t--; \
01081 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
01082 dzphi[3] = (1.5 * t + 2) * (2 + t) * hz_1; \
01083
01084 #endif
01085
01086
01087 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
01088 ja <= jlo && (jlo+(s_size-1)) <= jb &&
01089 ka <= klo && (klo+(s_size-1)) <= kb );
01090
01091 if ( iswithin ) {
01092
01093
01094 fx = fy = fz = 0;
01095 for (k = 0; k < s_size; k++) {
01096 koff = (k + klo) * nj;
01097 for (j = 0; j < s_size; j++) {
01098 jkoff = (koff + (j + jlo)) * ni;
01099 cx = yphi[j] * zphi[k];
01100 cy = dyphi[j] * zphi[k];
01101 cz = yphi[j] * dzphi[k];
01102 for (i = 0; i < s_size; i++) {
01103 index = jkoff + (i + ilo);
01104 GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo);
01105 ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index);
01106 fx += eh[index] * dxphi[i] * cx;
01107 fy += eh[index] * xphi[i] * cy;
01108 fz += eh[index] * xphi[i] * cz;
01109 }
01110 }
01111 }
01112 }
01113
01114 else {
01115 int ip, jp, kp;
01116
01117
01118 if (ispx) {
01119 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
01120 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
01121 }
01122 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
01123 return NL_MSM_ERROR_RANGE;
01124 }
01125
01126 if (ispy) {
01127 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
01128 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
01129 }
01130 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
01131 return NL_MSM_ERROR_RANGE;
01132 }
01133
01134 if (ispz) {
01135 if (klo < ka) do { klo += nk; } while (klo < ka);
01136 else if (klo > kb) do { klo -= nk; } while (klo > kb);
01137 }
01138 else if (klo < ka || (klo+(s_size-1)) > kb) {
01139 return NL_MSM_ERROR_RANGE;
01140 }
01141
01142
01143 fx = fy = fz = 0;
01144 for (k = 0, kp = klo; k < s_size; k++, kp++) {
01145 if (kp > kb) kp = ka;
01146 koff = kp * nj;
01147 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
01148 if (jp > jb) jp = ja;
01149 jkoff = (koff + jp) * ni;
01150 cx = yphi[j] * zphi[k];
01151 cy = dyphi[j] * zphi[k];
01152 cz = yphi[j] * dzphi[k];
01153 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
01154 if (ip > ib) ip = ia;
01155 index = jkoff + ip;
01156 GRID_INDEX_CHECK(ehgrid, ip, jp, kp);
01157 ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index);
01158 fx += eh[index] * dxphi[i] * cx;
01159 fy += eh[index] * xphi[i] * cy;
01160 fz += eh[index] * xphi[i] * cz;
01161 }
01162 }
01163 }
01164 }
01165
01166
01167 f[3*n ] -= q * fx;
01168 f[3*n+1] -= q * fy;
01169 f[3*n+2] -= q * fz;
01170
01171
01172 }
01173
01174
01175 u = 0;
01176 if (1) {
01177 for (n = 0; n < natoms; n++) {
01178 double q = atom[4*n + 3];
01179 u += q * q;
01180 }
01181 u *= -msm->gzero;
01182 }
01183 for (index = 0; index < nn; index++) {
01184 u += qbuf[index] * ebuf[index];
01185 }
01186 msm->uelec += 0.5 * u;
01187
01188
01189 return NL_MSM_SUCCESS;
01190 }
01191
01192
01193 int restriction_factored(NL_Msm *msm, int level) {
01194
01195 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
01196 const double *qh = qhgrid->data;
01197 NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]);
01198 double *q2h = q2hgrid->data;
01199
01200
01201 const int ia1 = qhgrid->i0;
01202 const int ib1 = ia1 + qhgrid->ni - 1;
01203 const int ja1 = qhgrid->j0;
01204 const int jb1 = ja1 + qhgrid->nj - 1;
01205 const int ka1 = qhgrid->k0;
01206 const int kb1 = ka1 + qhgrid->nk - 1;
01207 const int ni1 = qhgrid->ni;
01208 const int nj1 = qhgrid->nj;
01209 const int nk1 = qhgrid->nk;
01210
01211
01212 const int ia2 = q2hgrid->i0;
01213 const int ib2 = ia2 + q2hgrid->ni - 1;
01214 const int ja2 = q2hgrid->j0;
01215 const int jb2 = ja2 + q2hgrid->nj - 1;
01216 const int ka2 = q2hgrid->k0;
01217 const int kb2 = ka2 + q2hgrid->nk - 1;
01218 const int nrow_q2 = q2hgrid->ni;
01219 const int nstride_q2 = nrow_q2 * q2hgrid->nj;
01220
01221 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01222 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01223 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01224
01225
01226 double *qzd = msm->lzd + (-ka1);
01227 double *qyzd = msm->lyzd + (-ka1*nj1 + -ja1);
01228 double qsum;
01229
01230 const double *phi = NULL;
01231
01232 int i2, j2, k2;
01233 int im, jm, km;
01234 int i, j, k;
01235 int index_plane_q2, index_q2;
01236 int index_jk, offset_k;
01237 int offset;
01238
01239 const double *phi_factored = PhiStencilFactored[msm->approx];
01240 const int r_stencil = PolyDegree[msm->approx];
01241 const int diam_stencil = 2*r_stencil + 1;
01242
01243 for (i2 = ia2; i2 <= ib2; i2++) {
01244
01245 for (k = ka1; k <= kb1; k++) {
01246 offset_k = k * nj1;
01247
01248 for (j = ja1; j <= jb1; j++) {
01249 index_jk = offset_k + j;
01250 offset = index_jk * ni1;
01251 im = (i2 << 1);
01252 qsum = 0;
01253 if ( ! ispx ) {
01254 int lower = im - r_stencil;
01255 int upper = im + r_stencil;
01256 if (lower < ia1) lower = ia1;
01257 if (upper > ib1) upper = ib1;
01258 phi = phi_factored + r_stencil;
01259 for (i = lower; i <= upper; i++) {
01260 qsum += phi[i-im] * qh[offset + i];
01261 }
01262 }
01263 else {
01264 int ip = im - r_stencil;
01265 if (ip < ia1) do { ip += ni1; } while (ip < ia1);
01266 phi = phi_factored;
01267 for (i = 0; i < diam_stencil; i++, ip++) {
01268 if (ip > ib1) ip = ia1;
01269 qsum += phi[i] * qh[offset + ip];
01270 }
01271 }
01272 qyzd[index_jk] = qsum;
01273 }
01274
01275 }
01276
01277 for (j2 = ja2; j2 <= jb2; j2++) {
01278 index_plane_q2 = j2 * nrow_q2 + i2;
01279
01280 for (k = ka1; k <= kb1; k++) {
01281 offset = k * nj1;
01282 jm = (j2 << 1);
01283 qsum = 0;
01284 if ( ! ispy ) {
01285 int lower = jm - r_stencil;
01286 int upper = jm + r_stencil;
01287 if (lower < ja1) lower = ja1;
01288 if (upper > jb1) upper = jb1;
01289 phi = phi_factored + r_stencil;
01290 for (j = lower; j <= upper; j++) {
01291 qsum += phi[j-jm] * qyzd[offset + j];
01292 }
01293 }
01294 else {
01295 int jp = jm - r_stencil;
01296 if (jp < ja1) do { jp += nj1; } while (jp < ja1);
01297 phi = phi_factored;
01298 for (j = 0; j < diam_stencil; j++, jp++) {
01299 if (jp > jb1) jp = ja1;
01300 qsum += phi[j] * qyzd[offset + jp];
01301 }
01302 }
01303 qzd[k] = qsum;
01304 }
01305
01306 for (k2 = ka2; k2 <= kb2; k2++) {
01307 index_q2 = k2 * nstride_q2 + index_plane_q2;
01308 km = (k2 << 1);
01309 qsum = 0;
01310 if ( ! ispz ) {
01311 int lower = km - r_stencil;
01312 int upper = km + r_stencil;
01313 if (lower < ka1) lower = ka1;
01314 if (upper > kb1) upper = kb1;
01315 phi = phi_factored + r_stencil;
01316 for (k = lower; k <= upper; k++) {
01317 qsum += phi[k-km] * qzd[k];
01318 }
01319 }
01320 else {
01321 int kp = km - r_stencil;
01322 if (kp < ka1) do { kp += nk1; } while (kp < ka1);
01323 phi = phi_factored;
01324 for (k = 0; k < diam_stencil; k++, kp++) {
01325 if (kp > kb1) kp = ka1;
01326 qsum += phi[k] * qzd[kp];
01327 }
01328 }
01329 q2h[index_q2] = qsum;
01330 }
01331
01332 }
01333
01334 }
01335
01336 return NL_MSM_SUCCESS;
01337 }
01338
01339
01340 int prolongation_factored(NL_Msm *msm, int level) {
01341
01342 NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
01343 double *eh = ehgrid->data;
01344 const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]);
01345 const double *e2h = e2hgrid->data;
01346
01347
01348 const int ia1 = ehgrid->i0;
01349 const int ib1 = ia1 + ehgrid->ni - 1;
01350 const int ja1 = ehgrid->j0;
01351 const int jb1 = ja1 + ehgrid->nj - 1;
01352 const int ka1 = ehgrid->k0;
01353 const int kb1 = ka1 + ehgrid->nk - 1;
01354 const int ni1 = ehgrid->ni;
01355 const int nj1 = ehgrid->nj;
01356 const int nk1 = ehgrid->nk;
01357
01358
01359 const int ia2 = e2hgrid->i0;
01360 const int ib2 = ia2 + e2hgrid->ni - 1;
01361 const int ja2 = e2hgrid->j0;
01362 const int jb2 = ja2 + e2hgrid->nj - 1;
01363 const int ka2 = e2hgrid->k0;
01364 const int kb2 = ka2 + e2hgrid->nk - 1;
01365 const int nrow_e2 = e2hgrid->ni;
01366 const int nstride_e2 = nrow_e2 * e2hgrid->nj;
01367
01368 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01369 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01370 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01371
01372
01373 double *ezd = msm->lzd + (-ka1);
01374 double *eyzd = msm->lyzd + (-ka1*nj1 + -ja1);
01375
01376 const size_t size_lzd = nk1 * sizeof(double);
01377 const size_t size_lyzd = nj1 * nk1 * sizeof(double);
01378
01379 const double *phi = NULL;
01380
01381 int i2, j2, k2;
01382 int im, jm, km;
01383 int i, j, k;
01384 int index_plane_e2, index_e2;
01385 int index_jk, offset_k;
01386 int offset;
01387
01388 const double *phi_factored = PhiStencilFactored[msm->approx];
01389 const int r_stencil = PolyDegree[msm->approx];
01390 const int diam_stencil = 2*r_stencil + 1;
01391
01392 for (i2 = ia2; i2 <= ib2; i2++) {
01393 memset(msm->lyzd, 0, size_lyzd);
01394
01395 for (j2 = ja2; j2 <= jb2; j2++) {
01396 memset(msm->lzd, 0, size_lzd);
01397 index_plane_e2 = j2 * nrow_e2 + i2;
01398
01399 for (k2 = ka2; k2 <= kb2; k2++) {
01400 index_e2 = k2 * nstride_e2 + index_plane_e2;
01401 km = (k2 << 1);
01402 if ( ! ispz ) {
01403 int lower = km - r_stencil;
01404 int upper = km + r_stencil;
01405 if (lower < ka1) lower = ka1;
01406 if (upper > kb1) upper = kb1;
01407 phi = phi_factored + r_stencil;
01408 for (k = lower; k <= upper; k++) {
01409 ezd[k] += phi[k-km] * e2h[index_e2];
01410 }
01411 }
01412 else {
01413 int kp = km - r_stencil;
01414 if (kp < ka1) do { kp += nk1; } while (kp < ka1);
01415 phi = phi_factored;
01416 for (k = 0; k < diam_stencil; k++, kp++) {
01417 if (kp > kb1) kp = ka1;
01418 ezd[kp] += phi[k] * e2h[index_e2];
01419 }
01420 }
01421 }
01422
01423 for (k = ka1; k <= kb1; k++) {
01424 offset = k * nj1;
01425 jm = (j2 << 1);
01426 if ( ! ispy ) {
01427 int lower = jm - r_stencil;
01428 int upper = jm + r_stencil;
01429 if (lower < ja1) lower = ja1;
01430 if (upper > jb1) upper = jb1;
01431 phi = phi_factored + r_stencil;
01432 for (j = lower; j <= upper; j++) {
01433 eyzd[offset + j] += phi[j-jm] * ezd[k];
01434 }
01435 }
01436 else {
01437 int jp = jm - r_stencil;
01438 if (jp < ja1) do { jp += nj1; } while (jp < ja1);
01439 phi = phi_factored;
01440 for (j = 0; j < diam_stencil; j++, jp++) {
01441 if (jp > jb1) jp = ja1;
01442 eyzd[offset + jp] += phi[j] * ezd[k];
01443 }
01444 }
01445 }
01446
01447 }
01448
01449 for (k = ka1; k <= kb1; k++) {
01450 offset_k = k * nj1;
01451
01452 for (j = ja1; j <= jb1; j++) {
01453 index_jk = offset_k + j;
01454 offset = index_jk * ni1;
01455 im = (i2 << 1);
01456 if ( ! ispx ) {
01457 int lower = im - r_stencil;
01458 int upper = im + r_stencil;
01459 if (lower < ia1) lower = ia1;
01460 if (upper > ib1) upper = ib1;
01461 phi = phi_factored + r_stencil;
01462 for (i = lower; i <= upper; i++) {
01463 eh[offset + i] += phi[i-im] * eyzd[index_jk];
01464 }
01465 }
01466 else {
01467 int ip = im - r_stencil;
01468 if (ip < ia1) do { ip += ni1; } while (ip < ia1);
01469 phi = phi_factored;
01470 for (i = 0; i < diam_stencil; i++, ip++) {
01471 if (ip > ib1) ip = ia1;
01472 eh[offset + ip] += phi[i] * eyzd[index_jk];
01473 }
01474 }
01475 }
01476
01477 }
01478
01479 }
01480
01481 return NL_MSM_SUCCESS;
01482 }
01483
01484
01485 int restriction(NL_Msm *msm, int level) {
01486
01487 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
01488 const double *qh = qhgrid->data;
01489 NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]);
01490 double *q2h_buffer = q2hgrid->buffer;
01491
01492
01493 const int ia1 = qhgrid->i0;
01494 const int ib1 = ia1 + qhgrid->ni - 1;
01495 const int ja1 = qhgrid->j0;
01496 const int jb1 = ja1 + qhgrid->nj - 1;
01497 const int ka1 = qhgrid->k0;
01498 const int kb1 = ka1 + qhgrid->nk - 1;
01499 const int ni1 = qhgrid->ni;
01500 const int nj1 = qhgrid->nj;
01501 const int nk1 = qhgrid->nk;
01502
01503
01504 const int ia2 = q2hgrid->i0;
01505 const int ib2 = ia2 + q2hgrid->ni - 1;
01506 const int ja2 = q2hgrid->j0;
01507 const int jb2 = ja2 + q2hgrid->nj - 1;
01508 const int ka2 = q2hgrid->k0;
01509 const int kb2 = ka2 + q2hgrid->nk - 1;
01510
01511 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01512 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01513 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01514
01515 const int nstencil = Nstencil[msm->approx];
01516 const int *offset = IndexOffset[msm->approx];
01517 const double *phi = PhiStencil[msm->approx];
01518
01519 double q2h_sum, cjk;
01520
01521 int i1, j1, k1, index1, jk1off, k1off;
01522 int i2, j2, k2;
01523 int index2;
01524 int i, j, k;
01525
01526 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
01527 k1 = k2 * 2;
01528 for (j2 = ja2; j2 <= jb2; j2++) {
01529 j1 = j2 * 2;
01530 for (i2 = ia2; i2 <= ib2; i2++, index2++) {
01531 i1 = i2 * 2;
01532
01533 q2h_sum = 0;
01534 for (k = 0; k < nstencil; k++) {
01535 k1off = k1 + offset[k];
01536 if (k1off < ka1) {
01537 if (ispz) do { k1off += nk1; } while (k1off < ka1);
01538 else continue;
01539 }
01540 else if (k1off > kb1) {
01541 if (ispz) do { k1off -= nk1; } while (k1off > kb1);
01542 else break;
01543 }
01544 k1off *= nj1;
01545 for (j = 0; j < nstencil; j++) {
01546 jk1off = j1 + offset[j];
01547 if (jk1off < ja1) {
01548 if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
01549 else continue;
01550 }
01551 else if (jk1off > jb1) {
01552 if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
01553 else break;
01554 }
01555 jk1off = (k1off + jk1off) * ni1;
01556 cjk = phi[j] * phi[k];
01557 for (i = 0; i < nstencil; i++) {
01558 index1 = i1 + offset[i];
01559 if (index1 < ia1) {
01560 if (ispx) do { index1 += ni1; } while (index1 < ia1);
01561 else continue;
01562 }
01563 else if (index1 > ib1) {
01564 if (ispx) do { index1 -= ni1; } while (index1 > ib1);
01565 else break;
01566 }
01567 index1 += jk1off;
01568 q2h_sum += qh[index1] * phi[i] * cjk;
01569 }
01570 }
01571 }
01572
01573 q2h_buffer[index2] = q2h_sum;
01574 }
01575 }
01576 }
01577
01578 return NL_MSM_SUCCESS;
01579 }
01580
01581
01582 int prolongation(NL_Msm *msm, int level) {
01583
01584 NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
01585 double *eh = ehgrid->data;
01586 const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]);
01587 const double *e2h_buffer = e2hgrid->buffer;
01588
01589
01590 const int ia1 = ehgrid->i0;
01591 const int ib1 = ia1 + ehgrid->ni - 1;
01592 const int ja1 = ehgrid->j0;
01593 const int jb1 = ja1 + ehgrid->nj - 1;
01594 const int ka1 = ehgrid->k0;
01595 const int kb1 = ka1 + ehgrid->nk - 1;
01596 const int ni1 = ehgrid->ni;
01597 const int nj1 = ehgrid->nj;
01598 const int nk1 = ehgrid->nk;
01599
01600
01601 const int ia2 = e2hgrid->i0;
01602 const int ib2 = ia2 + e2hgrid->ni - 1;
01603 const int ja2 = e2hgrid->j0;
01604 const int jb2 = ja2 + e2hgrid->nj - 1;
01605 const int ka2 = e2hgrid->k0;
01606 const int kb2 = ka2 + e2hgrid->nk - 1;
01607
01608 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01609 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01610 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01611
01612 const int nstencil = Nstencil[msm->approx];
01613 const int *offset = IndexOffset[msm->approx];
01614 const double *phi = PhiStencil[msm->approx];
01615
01616 double cjk;
01617
01618 int i1, j1, k1, index1, jk1off, k1off;
01619 int i2, j2, k2;
01620 int index2;
01621 int i, j, k;
01622
01623 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) {
01624 k1 = k2 * 2;
01625 for (j2 = ja2; j2 <= jb2; j2++) {
01626 j1 = j2 * 2;
01627 for (i2 = ia2; i2 <= ib2; i2++, index2++) {
01628 i1 = i2 * 2;
01629
01630 for (k = 0; k < nstencil; k++) {
01631 k1off = k1 + offset[k];
01632 if (k1off < ka1) {
01633 if (ispz) do { k1off += nk1; } while (k1off < ka1);
01634 else continue;
01635 }
01636 else if (k1off > kb1) {
01637 if (ispz) do { k1off -= nk1; } while (k1off > kb1);
01638 else break;
01639 }
01640 k1off *= nj1;
01641 for (j = 0; j < nstencil; j++) {
01642 jk1off = j1 + offset[j];
01643 if (jk1off < ja1) {
01644 if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
01645 else continue;
01646 }
01647 else if (jk1off > jb1) {
01648 if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
01649 else break;
01650 }
01651 jk1off = (k1off + jk1off) * ni1;
01652 cjk = phi[j] * phi[k];
01653 for (i = 0; i < nstencil; i++) {
01654 index1 = i1 + offset[i];
01655 if (index1 < ia1) {
01656 if (ispx) do { index1 += ni1; } while (index1 < ia1);
01657 else continue;
01658 }
01659 else if (index1 > ib1) {
01660 if (ispx) do { index1 -= ni1; } while (index1 > ib1);
01661 else break;
01662 }
01663 index1 += jk1off;
01664 eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
01665 }
01666 }
01667 }
01668
01669 }
01670 }
01671 }
01672
01673 return NL_MSM_SUCCESS;
01674 }
01675
01676
01677 int gridcutoff(NL_Msm *msm, int level)
01678 {
01679 double eh_sum;
01680
01681
01682 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]);
01683 const double *qh = qhgrid->data;
01684 NL_Msmgrid_double *ehgrid = &(msm->eh[level]);
01685 double *eh = ehgrid->data;
01686 const int ia = qhgrid->i0;
01687 const int ib = ia + qhgrid->ni - 1;
01688 const int ja = qhgrid->j0;
01689 const int jb = ja + qhgrid->nj - 1;
01690 const int ka = qhgrid->k0;
01691 const int kb = ka + qhgrid->nk - 1;
01692 const int ni = qhgrid->ni;
01693 const int nj = qhgrid->nj;
01694 const int nk = qhgrid->nk;
01695
01696
01697 const NL_Msmgrid_double *gcgrid = &(msm->gc[level]);
01698 const double *gc = gcgrid->data;
01699 const int gia = gcgrid->i0;
01700 const int gib = gia + gcgrid->ni - 1;
01701 const int gja = gcgrid->j0;
01702 const int gjb = gja + gcgrid->nj - 1;
01703 const int gka = gcgrid->k0;
01704 const int gkb = gka + gcgrid->nk - 1;
01705 const int gni = gcgrid->ni;
01706 const int gnj = gcgrid->nj;
01707
01708 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01709 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01710 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01711
01712 const int ispnone = !(ispx || ispy || ispz);
01713
01714 int i, j, k;
01715 int gia_clip, gib_clip;
01716 int gja_clip, gjb_clip;
01717 int gka_clip, gkb_clip;
01718 int koff;
01719 long jkoff, index;
01720 int id, jd, kd;
01721 int knoff;
01722 long jknoff, nindex;
01723 int kgoff, jkgoff, ngindex;
01724
01725 if ( ispnone ) {
01726
01727
01728 for (k = ka; k <= kb; k++) {
01729
01730
01731 gka_clip = (k + gka < ka ? ka - k : gka);
01732 gkb_clip = (k + gkb > kb ? kb - k : gkb);
01733
01734 koff = k * nj;
01735
01736 for (j = ja; j <= jb; j++) {
01737
01738
01739 gja_clip = (j + gja < ja ? ja - j : gja);
01740 gjb_clip = (j + gjb > jb ? jb - j : gjb);
01741
01742 jkoff = (koff + j) * ni;
01743
01744 for (i = ia; i <= ib; i++) {
01745
01746
01747 gia_clip = (i + gia < ia ? ia - i : gia);
01748 gib_clip = (i + gib > ib ? ib - i : gib);
01749
01750 index = jkoff + i;
01751
01752
01753 eh_sum = 0;
01754 for (kd = gka_clip; kd <= gkb_clip; kd++) {
01755 knoff = (k + kd) * nj;
01756 kgoff = kd * gnj;
01757
01758 for (jd = gja_clip; jd <= gjb_clip; jd++) {
01759 jknoff = (knoff + (j + jd)) * ni;
01760 jkgoff = (kgoff + jd) * gni;
01761
01762 for (id = gia_clip; id <= gib_clip; id++) {
01763 nindex = jknoff + (i + id);
01764 ngindex = jkgoff + id;
01765
01766 GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd);
01767 ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex);
01768
01769 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01770 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01771
01772 eh_sum += qh[nindex] * gc[ngindex];
01773 }
01774 }
01775 }
01776
01777 GRID_INDEX_CHECK(ehgrid, i, j, k);
01778 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01779 eh[index] = eh_sum;
01780 }
01781 }
01782 }
01783
01784 }
01785 else {
01786
01787 int ilo, jlo, klo;
01788 int ip, jp, kp;
01789
01790
01791 for (k = ka; k <= kb; k++) {
01792 klo = k + gka;
01793 if ( ! ispz ) {
01794
01795 gka_clip = (k + gka < ka ? ka - k : gka);
01796 gkb_clip = (k + gkb > kb ? kb - k : gkb);
01797 if (klo < ka) klo = ka;
01798 }
01799 else {
01800 gka_clip = gka;
01801 gkb_clip = gkb;
01802 if (klo < ka) do { klo += nk; } while (klo < ka);
01803 }
01804 ASSERT(klo <= kb);
01805
01806 koff = k * nj;
01807
01808 for (j = ja; j <= jb; j++) {
01809 jlo = j + gja;
01810 if ( ! ispy ) {
01811
01812 gja_clip = (j + gja < ja ? ja - j : gja);
01813 gjb_clip = (j + gjb > jb ? jb - j : gjb);
01814 if (jlo < ja) jlo = ja;
01815 }
01816 else {
01817 gja_clip = gja;
01818 gjb_clip = gjb;
01819 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
01820 }
01821 ASSERT(jlo <= jb);
01822
01823 jkoff = (koff + j) * ni;
01824
01825 for (i = ia; i <= ib; i++) {
01826 ilo = i + gia;
01827 if ( ! ispx ) {
01828
01829 gia_clip = (i + gia < ia ? ia - i : gia);
01830 gib_clip = (i + gib > ib ? ib - i : gib);
01831 if (ilo < ia) ilo = ia;
01832 }
01833 else {
01834 gia_clip = gia;
01835 gib_clip = gib;
01836 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
01837 }
01838 ASSERT(ilo <= ib);
01839
01840 index = jkoff + i;
01841
01842
01843 eh_sum = 0;
01844 for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
01845
01846 if (kp > kb) kp = ka;
01847 knoff = kp * nj;
01848 kgoff = kd * gnj;
01849
01850 for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
01851
01852 if (jp > jb) jp = ja;
01853 jknoff = (knoff + jp) * ni;
01854 jkgoff = (kgoff + jd) * gni;
01855
01856 for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) {
01857
01858 if (ip > ib) ip = ia;
01859 nindex = jknoff + ip;
01860 ngindex = jkgoff + id;
01861
01862 GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
01863 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex);
01864
01865 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01866 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01867
01868 eh_sum += qh[nindex] * gc[ngindex];
01869
01870 }
01871 }
01872 }
01873
01874 GRID_INDEX_CHECK(ehgrid, i, j, k);
01875 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01876 eh[index] = eh_sum;
01877 }
01878 }
01879 }
01880
01881 }
01882
01883 return NL_MSM_SUCCESS;
01884 }