msm_longrng_sprec.c

Go to the documentation of this file.
00001 
00041 #include "msm_defn.h"
00042 
00043 static int anterpolation(NL_Msm *);
00044 static int interpolation(NL_Msm *);
00045 static int restriction(NL_Msm *, int level);
00046 static int prolongation(NL_Msm *, int level);
00047 static int restriction_factored(NL_Msm *, int level);
00048 static int prolongation_factored(NL_Msm *, int level);
00049 static int gridcutoff(NL_Msm *, int level);
00050 
00051 
00052 int NL_msm_compute_long_range_sprec(NL_Msm *msm) {
00053   double time_delta;
00054   int rc = 0;
00055   int level;
00056   int nlevels = msm->nlevels;
00057   int use_cuda_gridcutoff = (msm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF);
00058   int fallback_cpu = (msm->msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK);
00059   int use_nonfactored = (msm->msmflags & NL_MSM_COMPUTE_NONFACTORED);
00060 
00061   wkf_timer_start(msm->timer_longrng);
00062   rc = anterpolation(msm);
00063   if (rc) return rc;
00064   wkf_timer_stop(msm->timer_longrng);
00065   time_delta = wkf_timer_time(msm->timer_longrng);
00066   if (msm->report_timings) {
00067     printf("MSM anterpolation:  %6.3f sec\n", time_delta);
00068   }
00069 
00070   wkf_timer_start(msm->timer_longrng);
00071   if (use_nonfactored) {
00072     for (level = 0;  level < nlevels - 1;  level++) {
00073       rc = restriction(msm, level);
00074       if (rc) return rc;
00075     }
00076   }
00077   else {
00078     for (level = 0;  level < nlevels - 1;  level++) {
00079       rc = restriction_factored(msm, level);
00080       if (rc) return rc;
00081     }
00082   }
00083   wkf_timer_stop(msm->timer_longrng);
00084   time_delta = wkf_timer_time(msm->timer_longrng);
00085   if (msm->report_timings) {
00086     printf("MSM restriction:    %6.3f sec\n", time_delta);
00087   }
00088 
00089   wkf_timer_start(msm->timer_longrng);
00090   if (use_cuda_gridcutoff && nlevels > 1) {
00091 #ifdef NL_USE_CUDA
00092     if ((rc = NL_msm_cuda_condense_qgrids(msm)) != NL_MSM_SUCCESS ||
00093         (rc = NL_msm_cuda_compute_gridcutoff(msm)) != NL_MSM_SUCCESS ||
00094         (rc = NL_msm_cuda_expand_egrids(msm)) != NL_MSM_SUCCESS) {
00095       if (fallback_cpu) {
00096         printf("Falling back on CPU for grid cutoff computation\n");
00097         use_cuda_gridcutoff = 0;
00098       }
00099       else return rc;
00100     }
00101 #else
00102     if (fallback_cpu) {
00103       printf("Falling back on CPU for grid cutoff computation\n");
00104       use_cuda_gridcutoff = 0;
00105     }
00106     else return NL_MSM_ERROR_SUPPORT;
00107 #endif
00108   }
00109 
00110   if ( ! use_cuda_gridcutoff ) {
00111     for (level = 0;  level < nlevels - 1;  level++) {
00112       rc = gridcutoff(msm, level);
00113       if (rc) return rc;
00114     }
00115   }
00116 
00117   rc = gridcutoff(msm, level);  /* top level */
00118   if (rc) return rc;
00119 
00120   wkf_timer_stop(msm->timer_longrng);
00121   time_delta = wkf_timer_time(msm->timer_longrng);
00122   if (msm->report_timings) {
00123     printf("MSM grid cutoff:    %6.3f sec\n", time_delta);
00124   }
00125 
00126   wkf_timer_start(msm->timer_longrng);
00127   if (use_nonfactored) {
00128     for (level--;  level >= 0;  level--) {
00129       rc = prolongation(msm, level);
00130       if (rc) return rc;
00131     }
00132   }
00133   else {
00134     for (level--;  level >= 0;  level--) {
00135       rc = prolongation_factored(msm, level);
00136       if (rc) return rc;
00137     }
00138   }
00139   wkf_timer_stop(msm->timer_longrng);
00140   time_delta = wkf_timer_time(msm->timer_longrng);
00141   if (msm->report_timings) {
00142     printf("MSM prolongation:   %6.3f sec\n", time_delta);
00143   }
00144 
00145   wkf_timer_start(msm->timer_longrng);
00146   rc = interpolation(msm);
00147   if (rc) return rc;
00148   wkf_timer_stop(msm->timer_longrng);
00149   time_delta = wkf_timer_time(msm->timer_longrng);
00150   if (msm->report_timings) {
00151     printf("MSM interpolation:  %6.3f sec\n", time_delta);
00152   }
00153 
00154   return 0;
00155 }
00156 
00157 
00159 enum { MAX_POLY_DEGREE = 9 };
00160 
00163 static const int PolyDegree[NL_MSM_APPROX_END] = {
00164   3, 5, 5, 7, 7, 9, 9, 3,
00165 };
00166 
00169 static const float
00170 PhiStencilFactored[NL_MSM_APPROX_END][2*MAX_POLY_DEGREE+1] = {
00171   /* cubic */
00172   {-1.f/16, 0, 9.f/16, 1, 9.f/16, 0, -1.f/16},
00173 
00174   /* quintic C1 */
00175   {3.f/256, 0, -25.f/256, 0, 75.f/128, 1, 75.f/128, 0, -25.f/256, 0, 3.f/256},
00176 
00177   /* quintic C2  (same as quintic C1) */
00178   {3.f/256, 0, -25.f/256, 0, 75.f/128, 1, 75.f/128, 0, -25.f/256, 0, 3.f/256},
00179 
00180   /* septic C1 */
00181   { -5.f/2048, 0, 49.f/2048, 0, -245.f/2048, 0, 1225.f/2048, 1, 1225.f/2048,
00182     0, -245.f/2048, 0, 49.f/2048, 0, -5.f/2048 },
00183 
00184   /* septic C3  (same as septic C3) */
00185   { -5.f/2048, 0, 49.f/2048, 0, -245.f/2048, 0, 1225.f/2048, 1, 1225.f/2048,
00186     0, -245.f/2048, 0, 49.f/2048, 0, -5.f/2048 },
00187 
00188   /* nonic C1 */
00189   { 35.f/65536, 0, -405.f/65536, 0, 567.f/16384, 0, -2205.f/16384, 0,
00190     19845.f/32768, 1, 19845.f/32768, 0, -2205.f/16384, 0, 567.f/16384, 0,
00191     -405.f/65536, 0, 35.f/65536 },
00192 
00193   /* nonic C4  (same as nonic C1) */
00194   { 35.f/65536, 0, -405.f/65536, 0, 567.f/16384, 0, -2205.f/16384, 0,
00195     19845.f/32768, 1, 19845.f/32768, 0, -2205.f/16384, 0, 567.f/16384, 0,
00196     -405.f/65536, 0, 35.f/65536 },
00197 
00198   /* bspline */
00199   { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
00200 };
00201 
00202 
00205 enum { MAX_NSTENCIL = 11 };
00206 
00208 static const int Nstencil[NL_MSM_APPROX_END] = {
00209   5, 7, 7, 9, 9, 11, 11, 7
00210 };
00211 
00214 static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
00215   /* cubic */
00216   {-3, -1, 0, 1, 3},
00217 
00218   /* quintic C1 */
00219   {-5, -3, -1, 0, 1, 3, 5},
00220 
00221   /* quintic C2  (same as quintic C1) */
00222   {-5, -3, -1, 0, 1, 3, 5},
00223 
00224   /* septic C1 */
00225   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
00226 
00227   /* septic C3  (same as septic C3) */
00228   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
00229 
00230   /* nonic C1 */
00231   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
00232 
00233   /* nonic C4  (same as nonic C1) */
00234   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
00235 
00236   /* bspline */
00237   {-3, -2, -1, 0, 1, 2, 3},
00238 };
00239 
00242 static const float PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL] = {
00243   /* cubic */
00244   {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
00245 
00246   /* quintic C1 */
00247   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
00248 
00249   /* quintic C2  (same as quintic C1) */
00250   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
00251 
00252   /* septic C1 */
00253   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
00254     -245.f/2048, 49.f/2048, -5.f/2048 },
00255 
00256   /* septic C3  (same as septic C3) */
00257   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
00258     -245.f/2048, 49.f/2048, -5.f/2048 },
00259 
00260   /* nonic C1 */
00261   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
00262     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
00263     -405.f/65536, 35.f/65536 },
00264 
00265   /* nonic C4  (same as nonic C1) */
00266   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
00267     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
00268     -405.f/65536, 35.f/65536 },
00269 
00270   /* bspline */
00271   { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
00272 };
00273 
00274 
00280 #define STENCIL_1D(_phi, _delta, _approx) \
00281   do { \
00282     float *phi = _phi; \
00283     float t = _delta; \
00284     switch (_approx) { \
00285       case NL_MSM_APPROX_CUBIC: \
00286         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
00287         t--; \
00288         phi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
00289         t--; \
00290         phi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
00291         t--; \
00292         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
00293         break; \
00294       case NL_MSM_APPROX_QUINTIC: \
00295         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
00296         t--; \
00297         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6) + t * (0.375f - (5.f/24)*t));\
00298         t--; \
00299         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t)); \
00300         t--; \
00301         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t)); \
00302         t--; \
00303         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6) - t * (0.375f + (5.f/24)*t));\
00304         t--; \
00305         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
00306         break; \
00307       case NL_MSM_APPROX_QUINTIC2: \
00308         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
00309         t--; \
00310         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
00311         t--; \
00312         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
00313         t--; \
00314         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
00315         t--; \
00316         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
00317         t--; \
00318         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
00319         break; \
00320       case NL_MSM_APPROX_SEPTIC: \
00321         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
00322         t--; \
00323         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
00324         t--; \
00325         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
00326         t--; \
00327         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
00328         t--; \
00329         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
00330         t--; \
00331         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
00332         t--; \
00333         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
00334         t--; \
00335         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
00336         break; \
00337       case NL_MSM_APPROX_SEPTIC3: \
00338         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633 \
00339                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240) \
00340                       + t*(-89.f/720))))))); \
00341         t--; \
00342         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2) \
00343                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48) \
00344                       + t*(623.f/720))))))); \
00345         t--; \
00346         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2) \
00347                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80) \
00348                       + t*(-623.f/240))))))); \
00349         t--; \
00350         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144) \
00351                 + t*((-727.f/48) + t*(623.f/144))))); \
00352         t--; \
00353         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144) \
00354                 + t*((-727.f/48) + t*(-623.f/144))))); \
00355         t--; \
00356         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2) \
00357                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80) \
00358                       + t*(623.f/240))))))); \
00359         t--; \
00360         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2) \
00361                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48) \
00362                       + t*(-623.f/720))))))); \
00363         t--; \
00364         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633 \
00365                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240) \
00366                       + t*(89.f/720))))))); \
00367         break; \
00368       case NL_MSM_APPROX_NONIC: \
00369         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
00370           (t-2)*(t-1); \
00371         t--; \
00372         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
00373           (-8+t*(-35+9*t)); \
00374         t--; \
00375         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
00376           (-14+t*(-25+9*t)); \
00377         t--; \
00378         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
00379           (-6+t*(-5+3*t)); \
00380         t--; \
00381         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
00382           (-20+t*(-5+9*t)); \
00383         t--; \
00384         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
00385           (-20+t*(5+9*t)); \
00386         t--; \
00387         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
00388           (-6+t*(5+3*t)); \
00389         t--; \
00390         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
00391           (-14+t*(25+9*t)); \
00392         t--; \
00393         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
00394           (-8+t*(35+9*t)); \
00395         t--; \
00396         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
00397           (t+7)*(t+8); \
00398         break; \
00399       case NL_MSM_APPROX_NONIC4: \
00400         phi[0] = 439375.f/7+t*(-64188125.f/504+t*(231125375.f/2016 \
00401               +t*(-17306975.f/288+t*(7761805.f/384+t*(-2895587.f/640 \
00402                     +t*(129391.f/192+t*(-259715.f/4032+t*(28909.f/8064 \
00403                           +t*(-3569.f/40320))))))))); \
00404         t--; \
00405         phi[1] = -56375+t*(8314091.f/56+t*(-49901303.f/288+t*(3763529.f/32 \
00406                 +t*(-19648027.f/384+t*(9469163.f/640+t*(-545977.f/192 \
00407                       +t*(156927.f/448+t*(-28909.f/1152 \
00408                           +t*(3569.f/4480))))))))); \
00409         t--; \
00410         phi[2] = 68776.f/7+t*(-1038011.f/28+t*(31157515.f/504+t*(-956669.f/16 \
00411                 +t*(3548009.f/96+t*(-2422263.f/160+t*(197255.f/48 \
00412                       +t*(-19959.f/28+t*(144545.f/2016 \
00413                           +t*(-3569.f/1120))))))))); \
00414         t--; \
00415         phi[3] = -154+t*(12757.f/12+t*(-230123.f/72+t*(264481.f/48 \
00416                 +t*(-576499.f/96+t*(686147.f/160+t*(-96277.f/48 \
00417                       +t*(14221.f/24+t*(-28909.f/288+t*(3569.f/480))))))))); \
00418         t--; \
00419         phi[4] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(-6181.f/320 \
00420                 +t*(6337.f/96+t*(-2745.f/32+t*(28909.f/576 \
00421                       +t*(-3569.f/320))))))); \
00422         t--; \
00423         phi[5] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(6181.f/320 \
00424                 +t*(6337.f/96+t*(2745.f/32+t*(28909.f/576 \
00425                       +t*(3569.f/320))))))); \
00426         t--; \
00427         phi[6] = -154+t*(-12757.f/12+t*(-230123.f/72+t*(-264481.f/48 \
00428                 +t*(-576499.f/96+t*(-686147.f/160+t*(-96277.f/48 \
00429                       +t*(-14221.f/24+t*(-28909.f/288+t*(-3569.f/480))))))))); \
00430         t--; \
00431         phi[7] = 68776.f/7+t*(1038011.f/28+t*(31157515.f/504+t*(956669.f/16 \
00432                 +t*(3548009.f/96+t*(2422263.f/160+t*(197255.f/48 \
00433                       +t*(19959.f/28+t*(144545.f/2016+t*(3569.f/1120))))))))); \
00434         t--; \
00435         phi[8] = -56375+t*(-8314091.f/56+t*(-49901303.f/288+t*(-3763529.f/32 \
00436                 +t*(-19648027.f/384+t*(-9469163.f/640+t*(-545977.f/192 \
00437                       +t*(-156927.f/448+t*(-28909.f/1152 \
00438                           +t*(-3569.f/4480))))))))); \
00439         t--; \
00440         phi[9] = 439375.f/7+t*(64188125.f/504+t*(231125375.f/2016 \
00441               +t*(17306975.f/288+t*(7761805.f/384+t*(2895587.f/640 \
00442                     +t*(129391.f/192+t*(259715.f/4032+t*(28909.f/8064 \
00443                           +t*(3569.f/40320))))))))); \
00444         break; \
00445       case NL_MSM_APPROX_BSPLINE: \
00446         phi[0] = (1.f/6) * (2-t) * (2-t) * (2-t); \
00447         t--; \
00448         phi[1] = (2.f/3) + t*t*(-1 + 0.5f*t); \
00449         t--; \
00450         phi[2] = (2.f/3) + t*t*(-1 - 0.5f*t); \
00451         t--; \
00452         phi[3] = (1.f/6) * (2+t) * (2+t) * (2+t); \
00453         break; \
00454       default: \
00455         return NL_MSM_ERROR_SUPPORT; \
00456     } \
00457   } while (0)
00458   /* closing ';' from use as function call */
00459 
00460 
00468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \
00469   do { \
00470     float *dphi = _dphi; \
00471     float *phi = _phi; \
00472     float h_1 = _h_1; \
00473     float t = _delta; \
00474     switch (_approx) { \
00475       case NL_MSM_APPROX_CUBIC: \
00476         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
00477         dphi[0] = (1.5f * t - 2) * (2 - t) * h_1; \
00478         t--; \
00479         phi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
00480         dphi[1] = (-5 + 4.5f * t) * t * h_1; \
00481         t--; \
00482         phi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
00483         dphi[2] = (-5 - 4.5f * t) * t * h_1; \
00484         t--; \
00485         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
00486         dphi[3] = (1.5f * t + 2) * (2 + t) * h_1; \
00487         break; \
00488       case NL_MSM_APPROX_QUINTIC: \
00489         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \
00490         dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \
00491               + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \
00492         t--; \
00493         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6) + t * (0.375f - (5.f/24)*t));\
00494         dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) * (11 + t * (-12 + 3*t))\
00495             + (1-t) * (2-t) * (3-t) * (0.375f - (5.f/12)*t)) * h_1; \
00496         t--; \
00497         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t)); \
00498         dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t)) \
00499             + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1; \
00500         t--; \
00501         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t)); \
00502         dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t)) \
00503             - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1; \
00504         t--; \
00505         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6) - t * (0.375f + (5.f/24)*t));\
00506         dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) * (11 + t * (12 + 3*t)) \
00507             - (1+t) * (2+t) * (3+t) * (0.375f + (5.f/12)*t)) * h_1; \
00508         t--; \
00509         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \
00510         dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \
00511               + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \
00512         break; \
00513       case NL_MSM_APPROX_QUINTIC2: \
00514         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \
00515         dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \
00516               + 5*(t-2)*(3-t))) * h_1; \
00517         t--; \
00518         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \
00519         dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \
00520               (-48+t*(153+t*(-114+t*25))) \
00521               + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \
00522         t--; \
00523         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \
00524         dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \
00525               + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \
00526         t--; \
00527         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \
00528         dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \
00529               + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \
00530         t--; \
00531         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \
00532         dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \
00533               (48+t*(153+t*(114+t*25))) \
00534               + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \
00535         t--; \
00536         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \
00537         dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \
00538               + 5*(t+2)*(3+t))) * h_1; \
00539         break; \
00540       case NL_MSM_APPROX_SEPTIC: \
00541         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \
00542         dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \
00543                   +t*(-122+t*7))))) * h_1; \
00544         t--; \
00545         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \
00546         dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \
00547                     +t*(-750+t*49)))))) * h_1; \
00548         t--; \
00549         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \
00550         dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \
00551                     +t*(-450+t*49)))))) * h_1; \
00552         t--; \
00553         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \
00554         dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \
00555           h_1; \
00556         t--; \
00557         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \
00558         dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \
00559           h_1; \
00560         t--; \
00561         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \
00562         dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \
00563                     +t*(450+t*49)))))) * h_1; \
00564         t--; \
00565         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \
00566         dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \
00567                     +t*(750+t*49)))))) * h_1; \
00568         t--; \
00569         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \
00570         dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \
00571                   +t*(122+t*7))))) * h_1; \
00572         break; \
00573       case NL_MSM_APPROX_SEPTIC3: \
00574         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633 \
00575                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240) \
00576                       + t*(-89.f/720))))))); \
00577         dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899 + t*((26383.f/36) \
00578                   + t*((-22807.f/144) + t*((727.f/40) + t*(-623.f/720)))))))*h_1; \
00579         t--; \
00580         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2) \
00581                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48) \
00582                       + t*(623.f/720))))))); \
00583         dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2) \
00584                 + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8) \
00585                       + t*(4361.f/720))))))) * h_1; \
00586         t--; \
00587         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2) \
00588                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80) \
00589                       + t*(-623.f/240))))))); \
00590         dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2) + t*((4557.f/4) \
00591                   + t*((-9583.f/16) + t*((6543.f/40) + t*(-4361.f/240)))))))*h_1; \
00592         t--; \
00593         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144) \
00594                 + t*((-727.f/48) + t*(623.f/144))))); \
00595         dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144) \
00596                   + t*((-727.f/8) + t*(4361.f/144)))))) * h_1; \
00597         t--; \
00598         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144) \
00599                 + t*((-727.f/48) + t*(-623.f/144))))); \
00600         dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144) \
00601                   + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1; \
00602         t--; \
00603         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2) \
00604                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80) \
00605                       + t*(623.f/240))))))); \
00606         dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2) + t*((4557.f/4) \
00607                   + t*((9583.f/16) + t*((6543.f/40) + t*(4361.f/240)))))))*h_1; \
00608         t--; \
00609         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2) \
00610                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48) \
00611                       + t*(-623.f/720))))))); \
00612         dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2) \
00613                 + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8) \
00614                       + t*(-4361.f/720))))))) * h_1; \
00615         t--; \
00616         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633 \
00617                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240) \
00618                       + t*(89.f/720))))))); \
00619         dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899 + t*((26383.f/36) \
00620                   + t*((22807.f/144) + t*((727.f/40) + t*(623.f/720)))))))*h_1; \
00621         break; \
00622       case NL_MSM_APPROX_NONIC: \
00623         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \
00624           (t-2)*(t-1); \
00625         dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416 \
00626                 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \
00627         t--; \
00628         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \
00629           (-8+t*(-35+9*t)); \
00630         dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \
00631                   +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \
00632         t--; \
00633         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \
00634           (-14+t*(-25+9*t)); \
00635         dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026 \
00636                 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \
00637                           +t*(-81)))))))))*h_1; \
00638         t--; \
00639         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \
00640           (-6+t*(-5+3*t)); \
00641         dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070 \
00642                 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \
00643         t--; \
00644         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \
00645           (-20+t*(-5+9*t)); \
00646         dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516 \
00647                 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \
00648         t--; \
00649         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \
00650           (-20+t*(5+9*t)); \
00651         dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516 \
00652                 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \
00653         t--; \
00654         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \
00655           (-6+t*(5+3*t)); \
00656         dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070 \
00657                 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \
00658         t--; \
00659         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \
00660           (-14+t*(25+9*t)); \
00661         dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026 \
00662                 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \
00663         t--; \
00664         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \
00665           (-8+t*(35+9*t)); \
00666         dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240 \
00667                 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \
00668                           +t*81))))))))*h_1; \
00669         t--; \
00670         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \
00671           (t+7)*(t+8); \
00672         dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416 \
00673                 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \
00674         break; \
00675       case NL_MSM_APPROX_NONIC4: \
00676         phi[0] = 439375.f/7+t*(-64188125.f/504+t*(231125375.f/2016 \
00677               +t*(-17306975.f/288+t*(7761805.f/384+t*(-2895587.f/640 \
00678                     +t*(129391.f/192+t*(-259715.f/4032+t*(28909.f/8064 \
00679                           +t*(-3569.f/40320))))))))); \
00680         dphi[0] = (-64188125.f/504+t*(231125375.f/1008 \
00681               +t*(-17306975.f/96+t*(7761805.f/96+t*(-2895587.f/128 \
00682                     +t*(129391.f/32+t*(-259715.f/576+t*(28909.f/1008 \
00683                           +t*(-3569.f/4480)))))))))*h_1; \
00684         t--; \
00685         phi[1] = -56375+t*(8314091.f/56+t*(-49901303.f/288+t*(3763529.f/32 \
00686                 +t*(-19648027.f/384+t*(9469163.f/640+t*(-545977.f/192 \
00687                       +t*(156927.f/448+t*(-28909.f/1152 \
00688                           +t*(3569.f/4480))))))))); \
00689         dphi[1] = (8314091.f/56+t*(-49901303.f/144+t*(11290587.f/32 \
00690                 +t*(-19648027.f/96+t*(9469163.f/128+t*(-545977.f/32 \
00691                       +t*(156927.f/64+t*(-28909.f/144 \
00692                           +t*(32121.f/4480)))))))))*h_1; \
00693         t--; \
00694         phi[2] = 68776.f/7+t*(-1038011.f/28+t*(31157515.f/504+t*(-956669.f/16 \
00695                 +t*(3548009.f/96+t*(-2422263.f/160+t*(197255.f/48 \
00696                       +t*(-19959.f/28+t*(144545.f/2016 \
00697                           +t*(-3569.f/1120))))))))); \
00698         dphi[2] = (-1038011.f/28+t*(31157515.f/252+t*(-2870007.f/16 \
00699                 +t*(3548009.f/24+t*(-2422263.f/32+t*(197255.f/8 \
00700                       +t*(-19959.f/4+t*(144545.f/252 \
00701                           +t*(-32121.f/1120)))))))))*h_1; \
00702         t--; \
00703         phi[3] = -154+t*(12757.f/12+t*(-230123.f/72+t*(264481.f/48 \
00704                 +t*(-576499.f/96+t*(686147.f/160+t*(-96277.f/48 \
00705                       +t*(14221.f/24+t*(-28909.f/288+t*(3569.f/480))))))))); \
00706         dphi[3] = (12757.f/12+t*(-230123.f/36+t*(264481.f/16 \
00707                 +t*(-576499.f/24+t*(686147.f/32+t*(-96277.f/8 \
00708                       +t*(99547.f/24+t*(-28909.f/36 \
00709                           +t*(10707.f/160)))))))))*h_1; \
00710         t--; \
00711         phi[4] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(-6181.f/320 \
00712                 +t*(6337.f/96+t*(-2745.f/32+t*(28909.f/576 \
00713                       +t*(-3569.f/320))))))); \
00714         dphi[4] = t*(-205.f/72+t*t*(91.f/48+t*(-6181.f/64 \
00715                 +t*(6337.f/16+t*(-19215.f/32+t*(28909.f/72 \
00716                       +t*(-32121.f/320)))))))*h_1; \
00717         t--; \
00718         phi[5] = 1+t*t*(-205.f/144+t*t*(91.f/192+t*(6181.f/320 \
00719                 +t*(6337.f/96+t*(2745.f/32+t*(28909.f/576 \
00720                       +t*(3569.f/320))))))); \
00721         dphi[5] = t*(-205.f/72+t*t*(91.f/48+t*(6181.f/64 \
00722                 +t*(6337.f/16+t*(19215.f/32+t*(28909.f/72 \
00723                       +t*(32121.f/320)))))))*h_1; \
00724         t--; \
00725         phi[6] = -154+t*(-12757.f/12+t*(-230123.f/72+t*(-264481.f/48 \
00726                 +t*(-576499.f/96+t*(-686147.f/160+t*(-96277.f/48 \
00727                       +t*(-14221.f/24+t*(-28909.f/288+t*(-3569.f/480))))))))); \
00728         dphi[6] = (-12757.f/12+t*(-230123.f/36+t*(-264481.f/16 \
00729                 +t*(-576499.f/24+t*(-686147.f/32+t*(-96277.f/8 \
00730                       +t*(-99547.f/24+t*(-28909.f/36 \
00731                           +t*(-10707.f/160)))))))))*h_1; \
00732         t--; \
00733         phi[7] = 68776.f/7+t*(1038011.f/28+t*(31157515.f/504+t*(956669.f/16 \
00734                 +t*(3548009.f/96+t*(2422263.f/160+t*(197255.f/48 \
00735                       +t*(19959.f/28+t*(144545.f/2016+t*(3569.f/1120))))))))); \
00736         dphi[7] = (1038011.f/28+t*(31157515.f/252+t*(2870007.f/16 \
00737                 +t*(3548009.f/24+t*(2422263.f/32+t*(197255.f/8 \
00738                       +t*(19959.f/4+t*(144545.f/252 \
00739                           +t*(32121.f/1120)))))))))*h_1; \
00740         t--; \
00741         phi[8] = -56375+t*(-8314091.f/56+t*(-49901303.f/288+t*(-3763529.f/32 \
00742                 +t*(-19648027.f/384+t*(-9469163.f/640+t*(-545977.f/192 \
00743                       +t*(-156927.f/448+t*(-28909.f/1152 \
00744                           +t*(-3569.f/4480))))))))); \
00745         dphi[8] = (-8314091.f/56+t*(-49901303.f/144+t*(-11290587.f/32 \
00746                 +t*(-19648027.f/96+t*(-9469163.f/128+t*(-545977.f/32 \
00747                       +t*(-156927.f/64+t*(-28909.f/144 \
00748                           +t*(-32121.f/4480)))))))))*h_1; \
00749         t--; \
00750         phi[9] = 439375.f/7+t*(64188125.f/504+t*(231125375.f/2016 \
00751               +t*(17306975.f/288+t*(7761805.f/384+t*(2895587.f/640 \
00752                     +t*(129391.f/192+t*(259715.f/4032+t*(28909.f/8064 \
00753                           +t*(3569.f/40320))))))))); \
00754         dphi[9] = (64188125.f/504+t*(231125375.f/1008 \
00755               +t*(17306975.f/96+t*(7761805.f/96+t*(2895587.f/128 \
00756                     +t*(129391.f/32+t*(259715.f/576+t*(28909.f/1008 \
00757                           +t*(3569.f/4480)))))))))*h_1; \
00758         break; \
00759       case NL_MSM_APPROX_BSPLINE: \
00760         phi[0] = (1.f/6) * (2-t) * (2-t) * (2-t); \
00761         dphi[0] = -0.5f * (2-t) * (2-t) * h_1; \
00762         t--; \
00763         phi[1] = (2.f/3) + t*t*(-1 + 0.5f*t); \
00764         dphi[1] = t*(-2 + 1.5f*t) * h_1; \
00765         t--; \
00766         phi[2] = (2.f/3) + t*t*(-1 - 0.5f*t); \
00767         dphi[2] = t*(-2 - 1.5f*t) * h_1; \
00768         t--; \
00769         phi[3] = (1.f/6) * (2+t) * (2+t) * (2+t); \
00770         dphi[3] = 0.5f * (2+t) * (2+t) * h_1; \
00771         break; \
00772       default: \
00773         return NL_MSM_ERROR_SUPPORT; \
00774     } \
00775   } while (0)
00776   /* closing ';' from use as function call */
00777 
00778 
00779 int anterpolation(NL_Msm *msm)
00780 {
00781   const float *atom = msm->atom_f;
00782   const int natoms = msm->numatoms;
00783 
00784   float xphi[MAX_POLY_DEGREE+1];  /* Phi stencil along x-dimension */
00785   float yphi[MAX_POLY_DEGREE+1];  /* Phi stencil along y-dimension */
00786   float zphi[MAX_POLY_DEGREE+1];  /* Phi stencil along z-dimension */
00787   float rx_hx, ry_hy, rz_hz;      /* normalized distance from atom to origin */
00788 #ifndef TEST_INLINING
00789   float delta;                    /* normalized distance to stencil point */
00790 #else
00791   float t;                    /* normalized distance to stencil point */
00792 #endif
00793   float ck, cjk;
00794   const float hx_1 = 1/msm->hx_f;
00795   const float hy_1 = 1/msm->hy_f;
00796   const float hz_1 = 1/msm->hz_f;
00797   const float xm0 = msm->gx_f;      /* grid origin x */
00798   const float ym0 = msm->gy_f;      /* grid origin y */
00799   const float zm0 = msm->gz_f;      /* grid origin z */
00800   float q;
00801 
00802   NL_Msmgrid_float *qhgrid = &(msm->qh_f[0]);
00803   float *qh = qhgrid->data;
00804   const int ni = qhgrid->ni;
00805   const int nj = qhgrid->nj;
00806   const int nk = qhgrid->nk;
00807   const int ia = qhgrid->i0;
00808   const int ib = ia + ni - 1;
00809   const int ja = qhgrid->j0;
00810   const int jb = ja + nj - 1;
00811   const int ka = qhgrid->k0;
00812   const int kb = ka + nk - 1;
00813 
00814   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
00815   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
00816   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
00817   int iswithin;
00818 
00819   int n, i, j, k, ilo, jlo, klo, koff;
00820   int jkoff, index;
00821 
00822   const int approx = msm->approx;
00823   const int s_size = PolyDegree[approx] + 1;         /* stencil size */
00824   const int s_edge = (PolyDegree[approx] - 1) >> 1;  /* stencil "edge" size */
00825 
00826   GRID_ZERO(qhgrid);
00827 
00828   for (n = 0;  n < natoms;  n++) {
00829 
00830     /* atomic charge */
00831     q = atom[4*n + 3];
00832     if (0==q) continue;
00833 
00834     /* distance between atom and origin measured in grid points */
00835     rx_hx = (atom[4*n    ] - xm0) * hx_1;
00836     ry_hy = (atom[4*n + 1] - ym0) * hy_1;
00837     rz_hz = (atom[4*n + 2] - zm0) * hz_1;
00838 
00839     /* find smallest numbered grid point in stencil */
00840     ilo = (int) floorf(rx_hx) - s_edge;
00841     jlo = (int) floorf(ry_hy) - s_edge;
00842     klo = (int) floorf(rz_hz) - s_edge;
00843 
00844     /* calculate Phi stencils along each dimension */
00845 #ifndef TEST_INLINING
00846     delta = rx_hx - (float) ilo;
00847     STENCIL_1D(xphi, delta, approx);
00848     delta = ry_hy - (float) jlo;
00849     STENCIL_1D(yphi, delta, approx);
00850     delta = rz_hz - (float) klo;
00851     STENCIL_1D(zphi, delta, approx);
00852 #else
00853     t = rx_hx - (float) ilo;
00854         xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
00855         t--; \
00856         xphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
00857         t--; \
00858         xphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
00859         t--; \
00860         xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
00861 
00862     t = ry_hy - (double) jlo;
00863         yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
00864         t--; \
00865         yphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
00866         t--; \
00867         yphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
00868         t--; \
00869         yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
00870 
00871     t = rz_hz - (double) klo;
00872         zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
00873         t--; \
00874         zphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
00875         t--; \
00876         zphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
00877         t--; \
00878         zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
00879 
00880 #endif
00881 
00882     /* test to see if stencil is within edges of grid */
00883     iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
00884                  ja <= jlo && (jlo+(s_size-1)) <= jb &&
00885                  ka <= klo && (klo+(s_size-1)) <= kb );
00886 
00887     if ( iswithin ) {  /* no wrapping needed */
00888 
00889       /* determine charge on cube of grid points around atom */
00890       for (k = 0;  k < s_size;  k++) {
00891         koff = (k + klo) * nj;
00892         ck = zphi[k] * q;
00893         for (j = 0;  j < s_size;  j++) {
00894           jkoff = (koff + (j + jlo)) * ni;
00895           cjk = yphi[j] * ck;
00896           for (i = 0;  i < s_size;  i++) {
00897             index = jkoff + (i + ilo);
00898             GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo);
00899             ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index);
00900             qh[index] += xphi[i] * cjk;
00901           }
00902         }
00903       }
00904     } /* if */
00905 
00906     else {  /* requires wrapping around grid */
00907       int ip, jp, kp;
00908 
00909       /* adjust ilo, jlo, klo so they are within grid indexing */
00910       if (ispx) {
00911         if      (ilo < ia) do { ilo += ni; } while (ilo < ia);
00912         else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
00913       }
00914       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
00915         return NL_MSM_ERROR_RANGE;
00916       }
00917 
00918       if (ispy) {
00919         if      (jlo < ja) do { jlo += nj; } while (jlo < ja);
00920         else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
00921       }
00922       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
00923         return NL_MSM_ERROR_RANGE;
00924       }
00925 
00926       if (ispz) {
00927         if      (klo < ka) do { klo += nk; } while (klo < ka);
00928         else if (klo > kb) do { klo -= nk; } while (klo > kb);
00929       }
00930       else if (klo < ka || (klo+(s_size-1)) > kb) {
00931         return NL_MSM_ERROR_RANGE;
00932       }
00933 
00934       /* determine charge on cube of grid points around atom, with wrapping */
00935       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
00936         if (kp > kb) kp = ka;  /* wrap stencil around grid */
00937         koff = kp * nj;
00938         ck = zphi[k] * q;
00939         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
00940           if (jp > jb) jp = ja;  /* wrap stencil around grid */
00941           jkoff = (koff + jp) * ni;
00942           cjk = yphi[j] * ck;
00943           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
00944             if (ip > ib) ip = ia;  /* wrap stencil around grid */
00945             index = jkoff + ip;
00946             GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
00947             ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index);
00948             qh[index] += xphi[i] * cjk;
00949           }
00950         }
00951       }
00952     } /* else */
00953 
00954   } /* end loop over atoms */
00955 
00956   return NL_MSM_SUCCESS;
00957 } /* anterpolation */
00958 
00959 
00960 int interpolation(NL_Msm *msm)
00961 {
00962   float *f = msm->felec_f;
00963   const float *atom = msm->atom_f;
00964   const int natoms = msm->numatoms;
00965 
00966   float xphi[MAX_POLY_DEGREE+1];  /* Phi stencil along x-dimension */
00967   float yphi[MAX_POLY_DEGREE+1];  /* Phi stencil along y-dimension */
00968   float zphi[MAX_POLY_DEGREE+1];  /* Phi stencil along z-dimension */
00969   float dxphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along x-dimension */
00970   float dyphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along y-dimension */
00971   float dzphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along z-dimension */
00972   float rx_hx, ry_hy, rz_hz;      /* normalized distance from atom to origin */
00973 #ifndef TEST_INLINING
00974   float delta;                    /* normalized distance to stencil point */
00975 #else
00976   float t;                    /* normalized distance to stencil point */
00977 #endif
00978   const float hx_1 = 1/msm->hx_f;
00979   const float hy_1 = 1/msm->hy_f;
00980   const float hz_1 = 1/msm->hz_f;
00981   const float xm0 = msm->gx_f;      /* grid origin x */
00982   const float ym0 = msm->gy_f;      /* grid origin y */
00983   const float zm0 = msm->gz_f;      /* grid origin z */
00984   float q;
00985   double u = 0;  /* need double precision for accumulating potential */
00986 
00987   const NL_Msmgrid_float *ehgrid = &(msm->eh_f[0]);
00988   const float *eh = ehgrid->data;
00989   const float *ebuf = ehgrid->buffer;
00990   const NL_Msmgrid_float *qhgrid = &(msm->qh_f[0]);
00991   const float *qbuf = qhgrid->buffer;
00992   const int ni = ehgrid->ni;
00993   const int nj = ehgrid->nj;
00994   const int nk = ehgrid->nk;
00995   const int ia = ehgrid->i0;
00996   const int ib = ia + ni - 1;
00997   const int ja = ehgrid->j0;
00998   const int jb = ja + nj - 1;
00999   const int ka = ehgrid->k0;
01000   const int kb = ka + nk - 1;
01001 
01002   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01003   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01004   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01005   int iswithin;
01006 
01007   float fx, fy, fz, cx, cy, cz;
01008 
01009   int n, i, j, k, ilo, jlo, klo, koff;
01010   long jkoff, index;
01011   const int nn = (ni*nj) * nk;
01012 
01013   const int approx = msm->approx;
01014   const int s_size = PolyDegree[approx] + 1;         /* stencil size */
01015   const int s_edge = (PolyDegree[approx] - 1) >> 1;  /* stencil "edge" size */
01016 
01017 #ifdef NL_DEBUG
01018 #if 1
01019   printf("+++ eh[0,0,0] = %g\n", eh[0]);
01020 #else
01021   n = 0;
01022   for (k = ka;  k <= kb;  k++) {
01023     for (j = ja;  j <= jb;  j++) {
01024       for (i = ia;  i <= ib;  i++, n++) {
01025         printf("=== eh[%d,%d,%d] = %g\n", i, j, k, ebuf[n]);
01026       }
01027     }
01028   }
01029 #endif
01030 #endif
01031 
01032   for (n = 0;  n < natoms;  n++) {
01033 
01034     /* atomic charge */
01035     q = atom[4*n + 3];
01036     if (0==q) continue;
01037 
01038     /* distance between atom and origin measured in grid points */
01039     rx_hx = (atom[4*n    ] - xm0) * hx_1;
01040     ry_hy = (atom[4*n + 1] - ym0) * hy_1;
01041     rz_hz = (atom[4*n + 2] - zm0) * hz_1;
01042 
01043     /* find smallest numbered grid point in stencil */
01044     ilo = (int) floorf(rx_hx) - s_edge;
01045     jlo = (int) floorf(ry_hy) - s_edge;
01046     klo = (int) floorf(rz_hz) - s_edge;
01047 
01048     /* calculate Phi stencils and derivatives along each dimension */
01049 #ifndef TEST_INLINING
01050     delta = rx_hx - (float) ilo;
01051     //STENCIL_1D(xphi, delta, approx);
01052     D_STENCIL_1D(dxphi, xphi, hx_1, delta, approx);
01053     delta = ry_hy - (float) jlo;
01054     //STENCIL_1D(yphi, delta, approx);
01055     D_STENCIL_1D(dyphi, yphi, hy_1, delta, approx);
01056     delta = rz_hz - (float) klo;
01057     //STENCIL_1D(zphi, delta, approx);
01058     D_STENCIL_1D(dzphi, zphi, hz_1, delta, approx);
01059 #else
01060     t = rx_hx - (float) ilo;
01061         xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
01062         dxphi[0] = (1.5f * t - 2) * (2 - t) * hx_1; \
01063         t--; \
01064         xphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
01065         dxphi[1] = (-5 + 4.5f * t) * t * hx_1; \
01066         t--; \
01067         xphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
01068         dxphi[2] = (-5 - 4.5f * t) * t * hx_1; \
01069         t--; \
01070         xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
01071         dxphi[3] = (1.5f * t + 2) * (2 + t) * hx_1; \
01072 
01073     t = ry_hy - (double) jlo;
01074         yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
01075         dyphi[0] = (1.5f * t - 2) * (2 - t) * hy_1; \
01076         t--; \
01077         yphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
01078         dyphi[1] = (-5 + 4.5f * t) * t * hy_1; \
01079         t--; \
01080         yphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
01081         dyphi[2] = (-5 - 4.5f * t) * t * hy_1; \
01082         t--; \
01083         yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
01084         dyphi[3] = (1.5f * t + 2) * (2 + t) * hy_1; \
01085 
01086     t = rz_hz - (double) klo;
01087         zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t); \
01088         dzphi[0] = (1.5f * t - 2) * (2 - t) * hz_1; \
01089         t--; \
01090         zphi[1] = (1 - t) * (1 + t - 1.5f * t * t); \
01091         dzphi[1] = (-5 + 4.5f * t) * t * hz_1; \
01092         t--; \
01093         zphi[2] = (1 + t) * (1 - t - 1.5f * t * t); \
01094         dzphi[2] = (-5 - 4.5f * t) * t * hz_1; \
01095         t--; \
01096         zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t); \
01097         dzphi[3] = (1.5f * t + 2) * (2 + t) * hz_1; \
01098 
01099 #endif
01100 
01101     /* test to see if stencil is within edges of grid */
01102     iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
01103                  ja <= jlo && (jlo+(s_size-1)) <= jb &&
01104                  ka <= klo && (klo+(s_size-1)) <= kb );
01105 
01106     if ( iswithin ) {  /* no wrapping needed */
01107 
01108       /* determine force on atom from cube of grid point potentials */
01109       fx = fy = fz = 0;
01110       for (k = 0;  k < s_size;  k++) {
01111         koff = (k + klo) * nj;
01112         for (j = 0;  j < s_size;  j++) {
01113           jkoff = (koff + (j + jlo)) * ni;
01114           cx = yphi[j] * zphi[k];
01115           cy = dyphi[j] * zphi[k];
01116           cz = yphi[j] * dzphi[k];
01117           for (i = 0;  i < s_size;  i++) {
01118             index = jkoff + (i + ilo);
01119             GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo);
01120             ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index);
01121             fx += eh[index] * dxphi[i] * cx;
01122             fy += eh[index] * xphi[i] * cy;
01123             fz += eh[index] * xphi[i] * cz;
01124           }
01125         }
01126       }
01127     } /* if */
01128 
01129     else {  /* requires wrapping around grid */
01130       int ip, jp, kp;
01131 
01132       /* adjust ilo, jlo, klo so they are within grid indexing */
01133       if (ispx) {
01134         if      (ilo < ia) do { ilo += ni; } while (ilo < ia);
01135         else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
01136       }
01137       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
01138         return NL_MSM_ERROR_RANGE;
01139       }
01140 
01141       if (ispy) {
01142         if      (jlo < ja) do { jlo += nj; } while (jlo < ja);
01143         else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
01144       }
01145       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
01146         return NL_MSM_ERROR_RANGE;
01147       }
01148 
01149       if (ispz) {
01150         if      (klo < ka) do { klo += nk; } while (klo < ka);
01151         else if (klo > kb) do { klo -= nk; } while (klo > kb);
01152       }
01153       else if (klo < ka || (klo+(s_size-1)) > kb) {
01154         return NL_MSM_ERROR_RANGE;
01155       }
01156 
01157       /* determine force on atom from cube of grid point potentials, wrapping */
01158       fx = fy = fz = 0;
01159       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
01160         if (kp > kb) kp = ka;  /* wrap stencil around grid */
01161         koff = kp * nj;
01162         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
01163           if (jp > jb) jp = ja;  /* wrap stencil around grid */
01164           jkoff = (koff + jp) * ni;
01165           cx = yphi[j] * zphi[k];
01166           cy = dyphi[j] * zphi[k];
01167           cz = yphi[j] * dzphi[k];
01168           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
01169             if (ip > ib) ip = ia;  /* wrap stencil around grid */
01170             index = jkoff + ip;
01171             GRID_INDEX_CHECK(ehgrid, ip, jp, kp);
01172             ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index);
01173             fx += eh[index] * dxphi[i] * cx;
01174             fy += eh[index] * xphi[i] * cy;
01175             fz += eh[index] * xphi[i] * cz;
01176           }
01177         }
01178       }
01179     } /* else */
01180 
01181     /* update force */
01182     f[3*n  ] -= q * fx;
01183     f[3*n+1] -= q * fy;
01184     f[3*n+2] -= q * fz;
01185 
01186   } /* end loop over atoms */
01187 
01188   /* compute potential, subtract self potential */
01189   u = 0;
01190   if (1) {
01191     for (n = 0;  n < natoms;  n++) {
01192       float q = atom[4*n + 3];
01193       u += q * q;
01194     }
01195     u *= -msm->gzero_f;
01196   }
01197   for (index = 0;  index < nn;  index++) {
01198     u += qbuf[index] * ebuf[index];
01199   }
01200   msm->uelec += 0.5 * u;
01201 
01202   return NL_MSM_SUCCESS;
01203 } /* interpolation() */
01204 
01205 
01206 int restriction_factored(NL_Msm *msm, int level) {
01207   /* grids of charge, finer grid and coarser grid */
01208   const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
01209   const float *qh = qhgrid->data;
01210   NL_Msmgrid_float *q2hgrid = &(msm->qh_f[level+1]);
01211   float *q2h = q2hgrid->data;
01212 
01213   /* finer grid index ranges and dimensions */
01214   const int ia1 = qhgrid->i0;             /* lowest x-index */
01215   const int ib1 = ia1 + qhgrid->ni - 1;   /* highest x-index */
01216   const int ja1 = qhgrid->j0;             /* lowest y-index */
01217   const int jb1 = ja1 + qhgrid->nj - 1;   /* highest y-index */
01218   const int ka1 = qhgrid->k0;             /* lowest z-index */
01219   const int kb1 = ka1 + qhgrid->nk - 1;   /* highest z-index */
01220   const int ni1 = qhgrid->ni;             /* length along x-dim */
01221   const int nj1 = qhgrid->nj;             /* length along y-dim */
01222   const int nk1 = qhgrid->nk;             /* length along z-dim */
01223 
01224   /* coarser grid index ranges and dimensions */
01225   const int ia2 = q2hgrid->i0;            /* lowest x-index */
01226   const int ib2 = ia2 + q2hgrid->ni - 1;  /* highest x-index */
01227   const int ja2 = q2hgrid->j0;            /* lowest y-index */
01228   const int jb2 = ja2 + q2hgrid->nj - 1;  /* highest y-index */
01229   const int ka2 = q2hgrid->k0;            /* lowest z-index */
01230   const int kb2 = ka2 + q2hgrid->nk - 1;  /* highest z-index */
01231   const int nrow_q2 = q2hgrid->ni;
01232   const int nstride_q2 = nrow_q2 * q2hgrid->nj;
01233 
01234   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01235   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01236   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01237 
01238   /* set buffer using indexing offset, so that indexing matches qh grid */
01239   float *qzd = msm->lzd_f + (-ka1);
01240   float *qyzd = msm->lyzd_f + (-ka1*nj1 + -ja1);
01241   float qsum;
01242 
01243   const float *phi = NULL;
01244 
01245   int i2, j2, k2;
01246   int im, jm, km;
01247   int i, j, k;
01248   int index_plane_q2, index_q2;
01249   int index_jk, offset_k;
01250   int offset;
01251 
01252   const float *phi_factored = PhiStencilFactored[msm->approx];
01253   const int r_stencil = PolyDegree[msm->approx];  /* "radius" of stencil */
01254   const int diam_stencil = 2*r_stencil + 1;       /* "diameter" of stencil */
01255 
01256   for (i2 = ia2;  i2 <= ib2;  i2++) {
01257 
01258     for (k = ka1;  k <= kb1;  k++) {
01259       offset_k = k * nj1;
01260 
01261       for (j = ja1;  j <= jb1;  j++) {
01262         index_jk = offset_k + j;
01263         offset = index_jk * ni1;
01264         im = (i2 << 1);  /* = 2*i2 */
01265         qsum = 0;
01266         if ( ! ispx ) {  /* nonperiodic */
01267           int lower = im - r_stencil;
01268           int upper = im + r_stencil;
01269           if (lower < ia1) lower = ia1;
01270           if (upper > ib1) upper = ib1;  /* clip edges */
01271           phi = phi_factored + r_stencil;  /* center of stencil */
01272           for (i = lower;  i <= upper;  i++) {
01273             qsum += phi[i-im] * qh[offset + i];
01274           }
01275         }
01276         else {  /* periodic */
01277           int ip = im - r_stencil;  /* index at left end of stencil */
01278           if (ip < ia1) do { ip += ni1; } while (ip < ia1);  /* start inside */
01279           phi = phi_factored;  /* left end of stencil */
01280           for (i = 0;  i < diam_stencil;  i++, ip++) {
01281             if (ip > ib1) ip = ia1;  /* wrap around edge of grid */
01282             qsum += phi[i] * qh[offset + ip];
01283           }
01284         }
01285         qyzd[index_jk] = qsum;
01286       } /* for j */
01287 
01288     } /* for k */
01289 
01290     for (j2 = ja2;  j2 <= jb2;  j2++) {
01291       index_plane_q2 = j2 * nrow_q2 + i2;
01292 
01293       for (k = ka1;  k <= kb1;  k++) {
01294         offset = k * nj1;
01295         jm = (j2 << 1);  /* = 2*j2 */
01296         qsum = 0;
01297         if ( ! ispy ) {  /* nonperiodic */
01298           int lower = jm - r_stencil;
01299           int upper = jm + r_stencil;
01300           if (lower < ja1) lower = ja1;
01301           if (upper > jb1) upper = jb1;  /* clip edges */
01302           phi = phi_factored + r_stencil;  /* center of stencil */
01303           for (j = lower;  j <= upper;  j++) {
01304             qsum += phi[j-jm] * qyzd[offset + j];
01305           }
01306         }
01307         else {  /* periodic */
01308           int jp = jm - r_stencil;  /* index at left end of stencil */
01309           if (jp < ja1) do { jp += nj1; } while (jp < ja1);  /* start inside */
01310           phi = phi_factored;  /* left end of stencil */
01311           for (j = 0;  j < diam_stencil;  j++, jp++) {
01312             if (jp > jb1) jp = ja1;  /* wrap around edge of grid */
01313             qsum += phi[j] * qyzd[offset + jp];
01314           }
01315         }
01316         qzd[k] = qsum;
01317       } /* for k */
01318 
01319       for (k2 = ka2;  k2 <= kb2;  k2++) {
01320         index_q2 = k2 * nstride_q2 + index_plane_q2;
01321         km = (k2 << 1);  /* = 2*k2 */
01322         qsum = 0;
01323         if ( ! ispz ) {  /* nonperiodic */
01324           int lower = km - r_stencil;
01325           int upper = km + r_stencil;
01326           if (lower < ka1) lower = ka1;
01327           if (upper > kb1) upper = kb1;  /* clip edges */
01328           phi = phi_factored + r_stencil;  /* center of stencil */
01329           for (k = lower;  k <= upper;  k++) {
01330             qsum += phi[k-km] * qzd[k];
01331           }
01332         }
01333         else {  /* periodic */
01334           int kp = km - r_stencil;  /* index at left end of stencil */
01335           if (kp < ka1) do { kp += nk1; } while (kp < ka1);  /* start inside */
01336           phi = phi_factored;  /* left end of stencil */
01337           for (k = 0;  k < diam_stencil;  k++, kp++) {
01338             if (kp > kb1) kp = ka1;  /* wrap around edge of grid */
01339             qsum += phi[k] * qzd[kp];
01340           }
01341         }
01342         q2h[index_q2] = qsum;
01343       } /* for k2 */
01344 
01345     } /* for j2 */
01346 
01347   } /* for i2 */
01348 
01349   return NL_MSM_SUCCESS;
01350 } /* restriction_factored */
01351 
01352 
01353 int prolongation_factored(NL_Msm *msm, int level) {
01354   /* grids of potential, finer grid and coarser grid */
01355   NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
01356   float *eh = ehgrid->data;
01357   const NL_Msmgrid_float *e2hgrid = &(msm->eh_f[level+1]);
01358   const float *e2h = e2hgrid->data;
01359 
01360   /* finer grid index ranges and dimensions */
01361   const int ia1 = ehgrid->i0;             /* lowest x-index */
01362   const int ib1 = ia1 + ehgrid->ni - 1;   /* highest x-index */
01363   const int ja1 = ehgrid->j0;             /* lowest y-index */
01364   const int jb1 = ja1 + ehgrid->nj - 1;   /* highest y-index */
01365   const int ka1 = ehgrid->k0;             /* lowest z-index */
01366   const int kb1 = ka1 + ehgrid->nk - 1;   /* highest z-index */
01367   const int ni1 = ehgrid->ni;             /* length along x-dim */
01368   const int nj1 = ehgrid->nj;             /* length along y-dim */
01369   const int nk1 = ehgrid->nk;             /* length along z-dim */
01370 
01371   /* coarser grid index ranges and dimensions */
01372   const int ia2 = e2hgrid->i0;            /* lowest x-index */
01373   const int ib2 = ia2 + e2hgrid->ni - 1;  /* highest x-index */
01374   const int ja2 = e2hgrid->j0;            /* lowest y-index */
01375   const int jb2 = ja2 + e2hgrid->nj - 1;  /* highest y-index */
01376   const int ka2 = e2hgrid->k0;            /* lowest z-index */
01377   const int kb2 = ka2 + e2hgrid->nk - 1;  /* highest z-index */
01378   const int nrow_e2 = e2hgrid->ni;
01379   const int nstride_e2 = nrow_e2 * e2hgrid->nj;
01380 
01381   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01382   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01383   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01384 
01385   /* set buffer using indexing offset, so that indexing matches eh grid */
01386   float *ezd = msm->lzd_f + (-ka1);
01387   float *eyzd = msm->lyzd_f + (-ka1*nj1 + -ja1);
01388 
01389   const size_t size_lzd = nk1 * sizeof(float);
01390   const size_t size_lyzd = nj1 * nk1 * sizeof(float);
01391 
01392   const float *phi = NULL;
01393 
01394   int i2, j2, k2;
01395   int im, jm, km;
01396   int i, j, k;
01397   int index_plane_e2, index_e2;
01398   int index_jk, offset_k;
01399   int offset;
01400 
01401   const float *phi_factored = PhiStencilFactored[msm->approx];
01402   const int r_stencil = PolyDegree[msm->approx];  /* "radius" of stencil */
01403   const int diam_stencil = 2*r_stencil + 1;       /* "diameter" of stencil */
01404 
01405   for (i2 = ia2;  i2 <= ib2;  i2++) {
01406     memset(msm->lyzd_f, 0, size_lyzd);
01407 
01408     for (j2 = ja2;  j2 <= jb2;  j2++) {
01409       memset(msm->lzd_f, 0, size_lzd);
01410       index_plane_e2 = j2 * nrow_e2 + i2;
01411 
01412       for (k2 = ka2;  k2 <= kb2;  k2++) {
01413         index_e2 = k2 * nstride_e2 + index_plane_e2;
01414         km = (k2 << 1);  /* = 2*k2 */
01415         if ( ! ispz ) {  /* nonperiodic */
01416           int lower = km - r_stencil;
01417           int upper = km + r_stencil;
01418           if (lower < ka1) lower = ka1;
01419           if (upper > kb1) upper = kb1;  /* clip edges */
01420           phi = phi_factored + r_stencil;  /* center of stencil */
01421           for (k = lower;  k <= upper;  k++) {
01422             ezd[k] += phi[k-km] * e2h[index_e2];
01423           }
01424         }
01425         else {  /* periodic */
01426           int kp = km - r_stencil;  /* index at left end of stencil */
01427           if (kp < ka1) do { kp += nk1; } while (kp < ka1);  /* start inside */
01428           phi = phi_factored;  /* left end of stencil */
01429           for (k = 0;  k < diam_stencil;  k++, kp++) {
01430             if (kp > kb1) kp = ka1;  /* wrap around edge of grid */
01431             ezd[kp] += phi[k] * e2h[index_e2];
01432           }
01433         }
01434       } /* for k2 */
01435 
01436       for (k = ka1;  k <= kb1;  k++) {
01437         offset = k * nj1;
01438         jm = (j2 << 1);  /* = 2*j2 */
01439         if ( ! ispy ) {  /* nonperiodic */
01440           int lower = jm - r_stencil;
01441           int upper = jm + r_stencil;
01442           if (lower < ja1) lower = ja1;
01443           if (upper > jb1) upper = jb1;  /* clip edges */
01444           phi = phi_factored + r_stencil;  /* center of stencil */
01445           for (j = lower;  j <= upper;  j++) {
01446             eyzd[offset + j] += phi[j-jm] * ezd[k];
01447           }
01448         }
01449         else {  /* periodic */
01450           int jp = jm - r_stencil;  /* index at left end of stencil */
01451           if (jp < ja1) do { jp += nj1; } while (jp < ja1);  /* start inside */
01452           phi = phi_factored;  /* left end of stencil */
01453           for (j = 0;  j < diam_stencil;  j++, jp++) {
01454             if (jp > jb1) jp = ja1;  /* wrap around edge of grid */
01455             eyzd[offset + jp] += phi[j] * ezd[k];
01456           }
01457         }
01458       } /* for k */
01459 
01460     } /* for j2 */
01461 
01462     for (k = ka1;  k <= kb1;  k++) {
01463       offset_k = k * nj1;
01464 
01465       for (j = ja1;  j <= jb1;  j++) {
01466         index_jk = offset_k + j;
01467         offset = index_jk * ni1;
01468         im = (i2 << 1);  /* = 2*i2 */
01469         if ( ! ispx ) {  /* nonperiodic */
01470           int lower = im - r_stencil;
01471           int upper = im + r_stencil;
01472           if (lower < ia1) lower = ia1;
01473           if (upper > ib1) upper = ib1;  /* clip edges */
01474           phi = phi_factored + r_stencil;  /* center of stencil */
01475           for (i = lower;  i <= upper;  i++) {
01476             eh[offset + i] += phi[i-im] * eyzd[index_jk];
01477           }
01478         }
01479         else {  /* periodic */
01480           int ip = im - r_stencil;  /* index at left end of stencil */
01481           if (ip < ia1) do { ip += ni1; } while (ip < ia1);  /* start inside */
01482           phi = phi_factored;  /* left end of stencil */
01483           for (i = 0;  i < diam_stencil;  i++, ip++) {
01484             if (ip > ib1) ip = ia1;  /* wrap around edge of grid */
01485             eh[offset + ip] += phi[i] * eyzd[index_jk];
01486           }
01487         }
01488       } /* for j */
01489 
01490     } /* for k */
01491 
01492   } /* for i2 */
01493 
01494   return NL_MSM_SUCCESS;
01495 } /* prolongation_factored */
01496 
01497 
01498 int restriction(NL_Msm *msm, int level) {
01499   /* grids of charge, finer grid and coarser grid */
01500   const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
01501   const float *qh = qhgrid->data;        /* index the offset data buffer */
01502   NL_Msmgrid_float *q2hgrid = &(msm->qh_f[level+1]);
01503   float *q2h_buffer = q2hgrid->buffer;   /* index the raw buffer */
01504 
01505   /* finer grid index ranges and dimensions */
01506   const int ia1 = qhgrid->i0;             /* lowest x-index */
01507   const int ib1 = ia1 + qhgrid->ni - 1;   /* highest x-index */
01508   const int ja1 = qhgrid->j0;             /* lowest y-index */
01509   const int jb1 = ja1 + qhgrid->nj - 1;   /* highest y-index */
01510   const int ka1 = qhgrid->k0;             /* lowest z-index */
01511   const int kb1 = ka1 + qhgrid->nk - 1;   /* highest z-index */
01512   const int ni1 = qhgrid->ni;             /* length along x-dim */
01513   const int nj1 = qhgrid->nj;             /* length along y-dim */
01514   const int nk1 = qhgrid->nk;             /* length along z-dim */
01515 
01516   /* coarser grid index ranges and dimensions */
01517   const int ia2 = q2hgrid->i0;            /* lowest x-index */
01518   const int ib2 = ia2 + q2hgrid->ni - 1;  /* highest x-index */
01519   const int ja2 = q2hgrid->j0;            /* lowest y-index */
01520   const int jb2 = ja2 + q2hgrid->nj - 1;  /* highest y-index */
01521   const int ka2 = q2hgrid->k0;            /* lowest z-index */
01522   const int kb2 = ka2 + q2hgrid->nk - 1;  /* highest z-index */
01523 
01524   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01525   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01526   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01527 
01528   const int nstencil = Nstencil[msm->approx];
01529   const int *offset = IndexOffset[msm->approx];
01530   const float *phi = PhiStencil[msm->approx];
01531 
01532   float q2h_sum, cjk;
01533 
01534   int i1, j1, k1, index1, jk1off, k1off;
01535   int i2, j2, k2;
01536   int index2;
01537   int i, j, k;
01538 
01539   for (index2 = 0, k2 = ka2;  k2 <= kb2;  k2++) {
01540     k1 = k2 * 2;
01541     for (j2 = ja2;  j2 <= jb2;  j2++) {
01542       j1 = j2 * 2;
01543       for (i2 = ia2;  i2 <= ib2;  i2++, index2++) {
01544         i1 = i2 * 2;
01545 
01546         q2h_sum = 0;
01547         for (k = 0;  k < nstencil;  k++) {
01548           k1off = k1 + offset[k];
01549           if (k1off < ka1) {
01550             if (ispz) do { k1off += nk1; } while (k1off < ka1);
01551             else continue;
01552           }
01553           else if (k1off > kb1) {
01554             if (ispz) do { k1off -= nk1; } while (k1off > kb1);
01555             else break;
01556           }
01557           k1off *= nj1;
01558           for (j = 0;  j < nstencil;  j++) {
01559             jk1off = j1 + offset[j];
01560             if (jk1off < ja1) {
01561               if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
01562               else continue;
01563             }
01564             else if (jk1off > jb1) {
01565               if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
01566               else break;
01567             }
01568             jk1off = (k1off + jk1off) * ni1;
01569             cjk = phi[j] * phi[k];
01570             for (i = 0;  i < nstencil;  i++) {
01571               index1 = i1 + offset[i];
01572               if (index1 < ia1) {
01573                 if (ispx) do { index1 += ni1; } while (index1 < ia1);
01574                 else continue;
01575               }
01576               else if (index1 > ib1) {
01577                 if (ispx) do { index1 -= ni1; } while (index1 > ib1);
01578                 else break;
01579               }
01580               index1 += jk1off;
01581               q2h_sum += qh[index1] * phi[i] * cjk;
01582             }
01583           }
01584         } /* end loop over finer grid stencil */
01585 
01586         q2h_buffer[index2] = q2h_sum;  /* store charge to coarser grid */
01587       }
01588     }
01589   } /* end loop over each coarser grid point */
01590 
01591   return NL_MSM_SUCCESS;
01592 }
01593 
01594 
01595 int prolongation(NL_Msm *msm, int level) {
01596   /* grids of charge, finer grid and coarser grid */
01597   NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
01598   float *eh = ehgrid->data;        /* index the offset data buffer */
01599   const NL_Msmgrid_float *e2hgrid = &(msm->eh_f[level+1]);
01600   const float *e2h_buffer = e2hgrid->buffer;   /* index the raw buffer */
01601 
01602   /* finer grid index ranges and dimensions */
01603   const int ia1 = ehgrid->i0;             /* lowest x-index */
01604   const int ib1 = ia1 + ehgrid->ni - 1;   /* highest x-index */
01605   const int ja1 = ehgrid->j0;             /* lowest y-index */
01606   const int jb1 = ja1 + ehgrid->nj - 1;   /* highest y-index */
01607   const int ka1 = ehgrid->k0;             /* lowest z-index */
01608   const int kb1 = ka1 + ehgrid->nk - 1;   /* highest z-index */
01609   const int ni1 = ehgrid->ni;             /* length along x-dim */
01610   const int nj1 = ehgrid->nj;             /* length along y-dim */
01611   const int nk1 = ehgrid->nk;             /* length along z-dim */
01612 
01613   /* coarser grid index ranges and dimensions */
01614   const int ia2 = e2hgrid->i0;            /* lowest x-index */
01615   const int ib2 = ia2 + e2hgrid->ni - 1;  /* highest x-index */
01616   const int ja2 = e2hgrid->j0;            /* lowest y-index */
01617   const int jb2 = ja2 + e2hgrid->nj - 1;  /* highest y-index */
01618   const int ka2 = e2hgrid->k0;            /* lowest z-index */
01619   const int kb2 = ka2 + e2hgrid->nk - 1;  /* highest z-index */
01620 
01621   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01622   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01623   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01624 
01625   const int nstencil = Nstencil[msm->approx];
01626   const int *offset = IndexOffset[msm->approx];
01627   const float *phi = PhiStencil[msm->approx];
01628 
01629   float cjk;
01630 
01631   int i1, j1, k1, index1, jk1off, k1off;
01632   int i2, j2, k2;
01633   int index2;
01634   int i, j, k;
01635 
01636   for (index2 = 0, k2 = ka2;  k2 <= kb2;  k2++) {
01637     k1 = k2 * 2;
01638     for (j2 = ja2;  j2 <= jb2;  j2++) {
01639       j1 = j2 * 2;
01640       for (i2 = ia2;  i2 <= ib2;  i2++, index2++) {
01641         i1 = i2 * 2;
01642 
01643         for (k = 0;  k < nstencil;  k++) {
01644           k1off = k1 + offset[k];
01645           if (k1off < ka1) {
01646             if (ispz) do { k1off += nk1; } while (k1off < ka1);
01647             else continue;
01648           }
01649           else if (k1off > kb1) {
01650             if (ispz) do { k1off -= nk1; } while (k1off > kb1);
01651             else break;
01652           }
01653           k1off *= nj1;
01654           for (j = 0;  j < nstencil;  j++) {
01655             jk1off = j1 + offset[j];
01656             if (jk1off < ja1) {
01657               if (ispy) do { jk1off += nj1; } while (jk1off < ja1);
01658               else continue;
01659             }
01660             else if (jk1off > jb1) {
01661               if (ispy) do { jk1off -= nj1; } while (jk1off > jb1);
01662               else break;
01663             }
01664             jk1off = (k1off + jk1off) * ni1;
01665             cjk = phi[j] * phi[k];
01666             for (i = 0;  i < nstencil;  i++) {
01667               index1 = i1 + offset[i];
01668               if (index1 < ia1) {
01669                 if (ispx) do { index1 += ni1; } while (index1 < ia1);
01670                 else continue;
01671               }
01672               else if (index1 > ib1) {
01673                 if (ispx) do { index1 -= ni1; } while (index1 > ib1);
01674                 else break;
01675               }
01676               index1 += jk1off;
01677               eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
01678             }
01679           }
01680         } /* end loop over finer grid stencil */
01681 
01682       }
01683     }
01684   } /* end loop over each coarser grid point */
01685 
01686   return NL_MSM_SUCCESS;
01687 }
01688 
01689 
01690 int gridcutoff(NL_Msm *msm, int level)
01691 {
01692   float eh_sum;
01693 
01694   /* grids of charge and potential */
01695   const NL_Msmgrid_float *qhgrid = &(msm->qh_f[level]);
01696   const float *qh = qhgrid->data;
01697   NL_Msmgrid_float *ehgrid = &(msm->eh_f[level]);
01698   float *eh = ehgrid->data;
01699   const int ia = qhgrid->i0;            /* lowest x-index */
01700   const int ib = ia + qhgrid->ni - 1;   /* highest x-index */
01701   const int ja = qhgrid->j0;            /* lowest y-index */
01702   const int jb = ja + qhgrid->nj - 1;   /* highest y-index */
01703   const int ka = qhgrid->k0;            /* lowest z-index */
01704   const int kb = ka + qhgrid->nk - 1;   /* highest z-index */
01705   const int ni = qhgrid->ni;            /* length along x-dim */
01706   const int nj = qhgrid->nj;            /* length along y-dim */
01707   const int nk = qhgrid->nk;            /* length along z-dim */
01708 
01709   /* grid of weights for pairwise grid point interactions within cutoff */
01710   const NL_Msmgrid_float *gcgrid = &(msm->gc_f[level]);
01711   const float  *gc = gcgrid->data;
01712   const int gia = gcgrid->i0;            /* lowest x-index */
01713   const int gib = gia + gcgrid->ni - 1;  /* highest x-index */
01714   const int gja = gcgrid->j0;            /* lowest y-index */
01715   const int gjb = gja + gcgrid->nj - 1;  /* highest y-index */
01716   const int gka = gcgrid->k0;            /* lowest z-index */
01717   const int gkb = gka + gcgrid->nk - 1;  /* highest z-index */
01718   const int gni = gcgrid->ni;            /* length along x-dim */
01719   const int gnj = gcgrid->nj;            /* length along y-dim */
01720 
01721   const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1);
01722   const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2);
01723   const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3);
01724 
01725   const int ispnone = !(ispx || ispy || ispz);
01726 
01727   int i, j, k;
01728   int gia_clip, gib_clip;
01729   int gja_clip, gjb_clip;
01730   int gka_clip, gkb_clip;
01731   int koff;
01732   long jkoff, index;
01733   int id, jd, kd;
01734   int knoff;
01735   long jknoff, nindex;
01736   int kgoff, jkgoff, ngindex;
01737 
01738 #ifdef NL_DEBUG
01739   if (level==0) {
01740     const float *gcbuf = gcgrid->buffer;
01741     int index = 0;
01742     printf("+++ qh[0,0,0] = %g\n", qh[0]);
01743 #if 0
01744     for (k = gka;  k <= gkb;  k++) {
01745       for (j = gja;  j <= gjb;  j++) {
01746         for (i = gia;  i <= gib;  i++, index++) {
01747           printf("=== gc[%d,%d,%d] = %g\n", i, j, k, gcbuf[index]);
01748         }
01749       }
01750     }
01751 #endif
01752   }
01753 #endif
01754 
01755   if ( ispnone ) {  /* non-periodic boundaries */
01756 
01757     /* loop over all grid points */
01758     for (k = ka;  k <= kb;  k++) {
01759 
01760       /* clip gc ranges to keep offset for k index within grid */
01761       gka_clip = (k + gka < ka ? ka - k : gka);
01762       gkb_clip = (k + gkb > kb ? kb - k : gkb);
01763 
01764       koff = k * nj;  /* find eh flat index */
01765 
01766       for (j = ja;  j <= jb;  j++) {
01767 
01768         /* clip gc ranges to keep offset for j index within grid */
01769         gja_clip = (j + gja < ja ? ja - j : gja);
01770         gjb_clip = (j + gjb > jb ? jb - j : gjb);
01771 
01772         jkoff = (koff + j) * ni;  /* find eh flat index */
01773 
01774         for (i = ia;  i <= ib;  i++) {
01775 
01776           /* clip gc ranges to keep offset for i index within grid */
01777           gia_clip = (i + gia < ia ? ia - i : gia);
01778           gib_clip = (i + gib > ib ? ib - i : gib);
01779 
01780           index = jkoff + i;  /* eh flat index */
01781 
01782           /* sum over "sphere" of weighted charge */
01783           eh_sum = 0;
01784           for (kd = gka_clip;  kd <= gkb_clip;  kd++) {
01785             knoff = (k + kd) * nj;  /* find qh flat index */
01786             kgoff = kd * gnj;       /* find gc flat index */
01787 
01788             for (jd = gja_clip;  jd <= gjb_clip;  jd++) {
01789               jknoff = (knoff + (j + jd)) * ni;  /* find qh flat index */
01790               jkgoff = (kgoff + jd) * gni;       /* find gc flat index */
01791 
01792               for (id = gia_clip;  id <= gib_clip;  id++) {
01793                 nindex = jknoff + (i + id);  /* qh flat index */
01794                 ngindex = jkgoff + id;       /* gc flat index */
01795 
01796                 GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd);
01797                 ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex);
01798 
01799                 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01800                 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01801 
01802                 eh_sum += qh[nindex] * gc[ngindex];  /* sum weighted charge */
01803               }
01804             }
01805           } /* end loop over "sphere" of charge */
01806 
01807           GRID_INDEX_CHECK(ehgrid, i, j, k);
01808           ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01809           eh[index] = eh_sum;  /* store potential */
01810         }
01811       }
01812     } /* end loop over all grid points */
01813 
01814   } /* if nonperiodic boundaries */
01815   else {
01816     /* some boundary is periodic */
01817     int ilo, jlo, klo;
01818     int ip, jp, kp;
01819 
01820     /* loop over all grid points */
01821     for (k = ka;  k <= kb;  k++) {
01822       klo = k + gka;
01823       if ( ! ispz ) {  /* nonperiodic z */
01824         /* clip gc ranges to keep offset for k index within grid */
01825         gka_clip = (k + gka < ka ? ka - k : gka);
01826         gkb_clip = (k + gkb > kb ? kb - k : gkb);
01827         if (klo < ka) klo = ka;  /* keep lowest qh index within grid */
01828       }
01829       else {  /* periodic z */
01830         gka_clip = gka;
01831         gkb_clip = gkb;
01832         if (klo < ka) do { klo += nk; } while (klo < ka);
01833       }
01834       ASSERT(klo <= kb);
01835 
01836       koff = k * nj;  /* find eh flat index */
01837 
01838       for (j = ja;  j <= jb;  j++) {
01839         jlo = j + gja;
01840         if ( ! ispy ) {  /* nonperiodic y */
01841           /* clip gc ranges to keep offset for j index within grid */
01842           gja_clip = (j + gja < ja ? ja - j : gja);
01843           gjb_clip = (j + gjb > jb ? jb - j : gjb);
01844           if (jlo < ja) jlo = ja;  /* keep lowest qh index within grid */
01845         }
01846         else {  /* periodic y */
01847           gja_clip = gja;
01848           gjb_clip = gjb;
01849           if (jlo < ja) do { jlo += nj; } while (jlo < ja);
01850         }
01851         ASSERT(jlo <= jb);
01852 
01853         jkoff = (koff + j) * ni;  /* find eh flat index */
01854 
01855         for (i = ia;  i <= ib;  i++) {
01856           ilo = i + gia;
01857           if ( ! ispx ) {  /* nonperiodic x */
01858             /* clip gc ranges to keep offset for i index within grid */
01859             gia_clip = (i + gia < ia ? ia - i : gia);
01860             gib_clip = (i + gib > ib ? ib - i : gib);
01861             if (ilo < ia) ilo = ia;  /* keep lowest qh index within grid */
01862           }
01863           else {  /* periodic x */
01864             gia_clip = gia;
01865             gib_clip = gib;
01866             if (ilo < ia) do { ilo += ni; } while (ilo < ia);
01867           }
01868           ASSERT(ilo <= ib);
01869 
01870           index = jkoff + i;  /* eh flat index */
01871 
01872           /* sum over "sphere" of weighted charge */
01873           eh_sum = 0;
01874           for (kd = gka_clip, kp = klo;  kd <= gkb_clip;  kd++, kp++) {
01875             /* clipping makes conditional always fail for nonperiodic */
01876             if (kp > kb) kp = ka;  /* wrap z direction */
01877             knoff = kp * nj;       /* find qh flat index */
01878             kgoff = kd * gnj;      /* find gc flat index */
01879 
01880             for (jd = gja_clip, jp = jlo;  jd <= gjb_clip;  jd++, jp++) {
01881               /* clipping makes conditional always fail for nonperiodic */
01882               if (jp > jb) jp = ja;              /* wrap y direction */
01883               jknoff = (knoff + jp) * ni;  /* find qh flat index */
01884               jkgoff = (kgoff + jd) * gni;       /* find gc flat index */
01885 
01886               for (id = gia_clip, ip = ilo;  id <= gib_clip;  id++, ip++) {
01887                 /* clipping makes conditional always fail for nonperiodic */
01888                 if (ip > ib) ip = ia;   /* wrap x direction */
01889                 nindex = jknoff +  ip;  /* qh flat index */
01890                 ngindex = jkgoff + id;  /* gc flat index */
01891 
01892                 GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
01893                 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex);
01894 
01895                 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01896                 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01897 
01898                 eh_sum += qh[nindex] * gc[ngindex];  /* sum weighted charge */
01899 
01900               }
01901             }
01902           } /* end loop over "sphere" of charge */
01903 
01904           GRID_INDEX_CHECK(ehgrid, i, j, k);
01905           ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01906           eh[index] = eh_sum;  /* store potential */
01907         }
01908       }
01909     } /* end loop over all grid points */
01910 
01911   } /* else some boundary is periodic */
01912 
01913   return NL_MSM_SUCCESS;
01914 }

Generated on Sat Sep 23 01:17:14 2017 for NAMD by  doxygen 1.4.7