00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "msmpot_internal.h"
00023
00024 static int anterpolation(Msmpot *msm);
00025 static int interpolation_factored(Msmpot *msm);
00026 static int interpolation(Msmpot *msm);
00027 static int restriction(Msmpot *msm, int level);
00028 static int prolongation(Msmpot *msm, int level);
00029 static int latticecutoff(Msmpot *msm, int level);
00030
00031 #undef OK
00032 #define OK MSMPOT_SUCCESS
00033
00034
00035
00036
00037
00038
00039
00040 #define USE_FACTORED_GRID_TRANSFERS
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 int Msmpot_compute_longrng_cubic(Msmpot *msm) {
00055 int do_cpu_latcut = 1;
00056 int err = 0;
00057 int level;
00058 int nlevels = msm->nlevels;
00059 #ifdef MSMPOT_REPORT
00060 char msg[120];
00061 #endif
00062
00063 REPORT("Using cubic interpolation for long-range part.");
00064
00065 REPORT("Doing anterpolation.");
00066 err = anterpolation(msm);
00067 if (err) return ERROR(err);
00068
00069 for (level = 0; level < nlevels - 1; level++) {
00070 #ifdef MSMPOT_REPORT
00071 sprintf(msg, "Doing restriction of grid level %d.", level);
00072 REPORT(msg);
00073 #endif
00074 err = restriction(msm, level);
00075 if (err) return ERROR(err);
00076 }
00077
00078 #ifdef MSMPOT_CUDA
00079 if (msm->use_cuda_latcut && nlevels > 1) {
00080 #ifdef MSMPOT_REPORT
00081 sprintf(msg, "Computing lattice cutoff part with CUDA for grid %s %d.",
00082 (nlevels > 2 ? "levels 0 -" : "level "), nlevels-2);
00083 REPORT(msg);
00084 #endif
00085 do_cpu_latcut = 0;
00086 if ((err = Msmpot_cuda_condense_qgrids(msm->msmcuda)) != OK ||
00087 (err = Msmpot_cuda_compute_latcut(msm->msmcuda)) != OK ||
00088 (err = Msmpot_cuda_expand_egrids(msm->msmcuda)) != OK) {
00089 if (msm->cuda_optional) {
00090 REPORT("Falling back on CPU for lattice cutoff part.");
00091 do_cpu_latcut = 1;
00092 }
00093 else return ERROR(err);
00094 }
00095 }
00096 #endif
00097
00098 if (do_cpu_latcut) {
00099 for (level = 0; level < nlevels - 1; level++) {
00100 #ifdef MSMPOT_REPORT
00101 sprintf(msg, "Doing cutoff calculation on grid level %d.", level);
00102 REPORT(msg);
00103 #endif
00104 err = latticecutoff(msm, level);
00105 if (err) return ERROR(err);
00106 }
00107 }
00108
00109 #ifdef MSMPOT_REPORT
00110 sprintf(msg, "Doing cutoff calculation on grid level %d.", level);
00111 REPORT(msg);
00112 #endif
00113 err = latticecutoff(msm, level);
00114 if (err) return ERROR(err);
00115
00116 for (level--; level >= 0; level--) {
00117 #ifdef MSMPOT_REPORT
00118 sprintf(msg, "Doing prolongation to grid level %d.", level);
00119 REPORT(msg);
00120 #endif
00121 err = prolongation(msm, level);
00122 if (err) return ERROR(err);
00123 }
00124
00125 #ifdef MSMPOT_VERBOSE
00126 #ifdef MSMPOT_CHECKMAPINDEX
00127 printf("epotmap[%d]=%g\n", MAPINDEX, msm->epotmap[MAPINDEX]);
00128 #endif
00129 #endif
00130
00131 if (msm->px == msm->lx && msm->py == msm->ly && msm->pz == msm->lz) {
00132 REPORT("Doing factored interpolation.");
00133 err = interpolation_factored(msm);
00134 }
00135 else {
00136 REPORT("Doing non-factored interpolation.");
00137 err = interpolation(msm);
00138 }
00139 if (err) return ERROR(err);
00140
00141 #ifdef MSMPOT_VERBOSE
00142 #ifdef MSMPOT_CHECKMAPINDEX
00143 printf("epotmap[%d]=%g\n", MAPINDEX, msm->epotmap[MAPINDEX]);
00144 #endif
00145 #endif
00146 return OK;
00147 }
00148
00149
00150 int anterpolation(Msmpot *msm)
00151 {
00152 const float *atom = msm->atom;
00153 const int natoms = msm->natoms;
00154
00155 float xphi[4], yphi[4], zphi[4];
00156 float rx_hx, ry_hy, rz_hz;
00157 float t;
00158 float ck, cjk;
00159 const float hx_1 = 1/msm->hx;
00160 const float hy_1 = 1/msm->hy;
00161 const float hz_1 = 1/msm->hz;
00162 #if 1
00163 const float xm0 = msm->px0;
00164 const float ym0 = msm->py0;
00165 const float zm0 = msm->pz0;
00166 #else
00167 const float xm0 = msm->lx0;
00168 const float ym0 = msm->ly0;
00169 const float zm0 = msm->lz0;
00170 #endif
00171 float q;
00172
00173 floatGrid *qhgrid = &(msm->qh[0]);
00174 float *qh = qhgrid->data;
00175 const int ni = qhgrid->ni;
00176 const int nj = qhgrid->nj;
00177 const int nk = qhgrid->nk;
00178 const int ia = qhgrid->i0;
00179 const int ib = ia + ni - 1;
00180 const int ja = qhgrid->j0;
00181 const int jb = ja + nj - 1;
00182 const int ka = qhgrid->k0;
00183 const int kb = ka + nk - 1;
00184
00185 const int ispany = IS_SET_ANY(msm->isperiodic);
00186 int iswrap;
00187
00188 int n, i, j, k, ilo, jlo, klo, koff;
00189 long jkoff, index;
00190
00191 GRID_ZERO(qhgrid);
00192
00193 for (n = 0; n < natoms; n++) {
00194
00195
00196 q = atom[4*n + 3];
00197 if (0==q) continue;
00198
00199
00200 rx_hx = (atom[4*n ] - xm0) * hx_1;
00201 ry_hy = (atom[4*n + 1] - ym0) * hy_1;
00202 rz_hz = (atom[4*n + 2] - zm0) * hz_1;
00203
00204
00205 ilo = (int) floorf(rx_hx) - 1;
00206 jlo = (int) floorf(ry_hy) - 1;
00207 klo = (int) floorf(rz_hz) - 1;
00208
00209
00210 t = rx_hx - (float) ilo;
00211 xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00212 t--;
00213 xphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00214 t--;
00215 xphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00216 t--;
00217 xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00218
00219
00220 t = ry_hy - (float) jlo;
00221 yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00222 t--;
00223 yphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00224 t--;
00225 yphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00226 t--;
00227 yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00228
00229
00230 t = rz_hz - (float) klo;
00231 zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00232 t--;
00233 zphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00234 t--;
00235 zphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00236 t--;
00237 zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00238
00239
00240 iswrap = ispany &&
00241 ( ilo < ia || (ilo+3) > ib ||
00242 jlo < ja || (jlo+3) > jb ||
00243 klo < ka || (klo+3) > kb);
00244
00245 if ( ! iswrap ) {
00246
00247 ASSERT(ia <= ilo && ilo + 3 <= ib);
00248 ASSERT(ja <= jlo && jlo + 3 <= jb);
00249 ASSERT(ka <= klo && klo + 3 <= kb);
00250
00251
00252 for (k = 0; k < 4; k++) {
00253 koff = (k + klo) * nj;
00254 ck = zphi[k] * q;
00255 for (j = 0; j < 4; j++) {
00256 jkoff = (koff + (j + jlo)) * (long)ni;
00257 cjk = yphi[j] * ck;
00258 for (i = 0; i < 4; i++) {
00259 index = jkoff + (i + ilo);
00260 GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo);
00261 ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index);
00262 qh[index] += xphi[i] * cjk;
00263 }
00264 }
00265 }
00266 }
00267 else {
00268 int ip, jp, kp;
00269
00270
00271 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
00272 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
00273 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
00274 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
00275 if (klo < ka) do { klo += nk; } while (klo < ka);
00276 else if (klo > kb) do { klo -= nk; } while (klo > kb);
00277
00278
00279 for (k = 0, kp = klo; k < 4; k++, kp++) {
00280 if (kp > kb) kp = ka;
00281 koff = kp * nj;
00282 ck = zphi[k] * q;
00283 for (j = 0, jp = jlo; j < 4; j++, jp++) {
00284 if (jp > jb) jp = ja;
00285 jkoff = (koff + jp) * (long)ni;
00286 cjk = yphi[j] * ck;
00287 for (i = 0, ip = ilo; i < 4; i++, ip++) {
00288 if (ip > ib) ip = ia;
00289 index = jkoff + ip;
00290 GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
00291 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index);
00292 qh[index] += xphi[i] * cjk;
00293 }
00294 }
00295 }
00296 }
00297
00298 }
00299 #ifdef MSMPOT_DEBUG
00300 ck = 0;
00301 for (k = ka; k <= kb; k++) {
00302 for (j = ja; j <= jb; j++) {
00303 for (i = ia; i <= ib; i++) {
00304 index = (k*nj + j)*(long)ni + i;
00305 ck += qh[index];
00306 }
00307 }
00308 }
00309 printf("# level = 0, grid sum = %e\n", ck);
00310 #endif
00311 return OK;
00312 }
00313
00314
00315 int interpolation_factored(Msmpot *msm) {
00316 float *epotmap = msm->epotmap;
00317
00318 float *ezd = msm->ezd;
00319 float *eyzd = msm->eyzd;
00320
00321 const floatGrid *ehgrid = &(msm->eh[0]);
00322 const float *eh = ehgrid->data;
00323 const int ia = ehgrid->i0;
00324 const int ib = ia + ehgrid->ni - 1;
00325 const int ja = ehgrid->j0;
00326 const int jb = ja + ehgrid->nj - 1;
00327 const int ka = ehgrid->k0;
00328 const int kb = ka + ehgrid->nk - 1;
00329 const int nrow_eh = ehgrid->ni;
00330 const int nstride_eh = nrow_eh * ehgrid->nj;
00331
00332 const int mx = msm->mx;
00333 const int my = msm->my;
00334 const int mz = msm->mz;
00335
00336 const int ispx = (IS_SET_X(msm->isperiodic) != 0);
00337 const int ispy = (IS_SET_Y(msm->isperiodic) != 0);
00338 const int ispz = (IS_SET_Z(msm->isperiodic) != 0);
00339
00340 const size_t size_ezd = mz * sizeof(float);
00341 const size_t size_eyzd = my * mz * sizeof(float);
00342
00343 const int ih_phi_cycle = msm->cycle_x;
00344 const int jh_phi_cycle = msm->cycle_y;
00345 const int kh_phi_cycle = msm->cycle_z;
00346 int ih_phi, jh_phi, kh_phi;
00347
00348 const int rmap_x = msm->rmap_x;
00349 const int rmap_y = msm->rmap_y;
00350 const int rmap_z = msm->rmap_z;
00351
00352 const int diam_x = 2*rmap_x + 1;
00353 const int diam_y = 2*rmap_y + 1;
00354 const int diam_z = 2*rmap_z + 1;
00355
00356 const float *base_phi_x = msm->phi_x;
00357 const float *base_phi_y = msm->phi_y;
00358 const float *base_phi_z = msm->phi_z;
00359 const float *phi = NULL;
00360
00361 const float hx_dx = msm->hx_dx;
00362 const float hy_dy = msm->hy_dy;
00363 const float hz_dz = msm->hz_dz;
00364
00365 int ih, jh, kh;
00366 int im, jm, km;
00367 int i, j, k;
00368 int index_plane_eh, index_eh;
00369 int index_jk, offset_k;
00370 long offset;
00371
00372 ih_phi = ia;
00373 while (ih_phi < 0) ih_phi += ih_phi_cycle;
00374 jh_phi = ja;
00375 while (jh_phi < 0) jh_phi += jh_phi_cycle;
00376 kh_phi = ka;
00377 while (kh_phi < 0) kh_phi += kh_phi_cycle;
00378
00379 for (ih = ia; ih <= ib; ih++, ih_phi++) {
00380 if (ih_phi == ih_phi_cycle) ih_phi = 0;
00381 memset(eyzd, 0, size_eyzd);
00382
00383 for (jh = ja; jh <= jb; jh++, jh_phi++) {
00384 if (jh_phi == jh_phi_cycle) jh_phi = 0;
00385 memset(ezd, 0, size_ezd);
00386 index_plane_eh = jh * nrow_eh + ih;
00387
00388 for (kh = ka; kh <= kb; kh++, kh_phi++) {
00389 if (kh_phi == kh_phi_cycle) kh_phi = 0;
00390 index_eh = kh * nstride_eh + index_plane_eh;
00391 km = (int) floorf(kh * hz_dz);
00392 if ( ! ispz ) {
00393 int lower = km - rmap_z;
00394 int upper = km + rmap_z;
00395 if (lower < 0) lower = 0;
00396 if (upper >= mz) upper = mz-1;
00397 phi = base_phi_z + diam_z * kh_phi + rmap_z;
00398 for (k = lower; k <= upper; k++) {
00399 ezd[k] += phi[k-km] * eh[index_eh];
00400 }
00401 }
00402 else {
00403 int kp = km - rmap_z;
00404 if (kp < 0) do { kp += mz; } while (kp < 0);
00405 phi = base_phi_z + diam_z * kh_phi;
00406 for (k = 0; k < diam_z; k++, kp++) {
00407 if (kp == mz) kp -= mz;
00408 ezd[kp] += phi[k] * eh[index_eh];
00409 }
00410 }
00411 }
00412
00413 for (k = 0; k < mz; k++) {
00414 offset = k * my;
00415 jm = (int) floorf(jh * hy_dy);
00416 if ( ! ispy ) {
00417 int lower = jm - rmap_y;
00418 int upper = jm + rmap_y;
00419 if (lower < 0) lower = 0;
00420 if (upper >= my) upper = my-1;
00421 phi = base_phi_y + diam_y * jh_phi + rmap_y;
00422 for (j = lower; j <= upper; j++) {
00423 eyzd[offset + j] += phi[j-jm] * ezd[k];
00424 }
00425 }
00426 else {
00427 int jp = jm - rmap_z;
00428 if (jp < 0) do { jp += my; } while (jp < 0);
00429 phi = base_phi_y + diam_y * jh_phi;
00430 for (j = 0; j < diam_y; j++, jp++) {
00431 if (jp == my) jp -= my;
00432 eyzd[offset + jp] += phi[j] * ezd[k];
00433 }
00434 }
00435 }
00436 }
00437
00438 for (k = 0; k < mz; k++) {
00439 offset_k = k * my;
00440
00441 for (j = 0; j < my; j++) {
00442 index_jk = offset_k + j;
00443 offset = index_jk * (long)mx;
00444 im = (int) floorf(ih * hx_dx);
00445 if ( ! ispx ) {
00446 int lower = im - rmap_x;
00447 int upper = im + rmap_x;
00448 if (lower < 0) lower = 0;
00449 if (upper >= mx) upper = mx-1;
00450 phi = base_phi_x + diam_x * ih_phi + rmap_x;
00451 for (i = lower; i <= upper; i++) {
00452 epotmap[offset + i] += phi[i-im] * eyzd[index_jk];
00453 }
00454 }
00455 else {
00456 int ip = im - rmap_z;
00457 if (ip < 0) do { ip += mx; } while (ip < 0);
00458 phi = base_phi_x + diam_x * ih_phi;
00459 for (i = 0; i < diam_x; i++, ip++) {
00460 if (ip == mx) ip -= mx;
00461 epotmap[offset + ip] += phi[i] * eyzd[index_jk];
00462 }
00463 }
00464 }
00465 }
00466
00467 }
00468 return OK;
00469 }
00470
00471
00472 int interpolation(Msmpot *msm)
00473 {
00474 float *epotmap = msm->epotmap;
00475
00476 float xphi[4], yphi[4], zphi[4];
00477 float rx_hx, ry_hy, rz_hz;
00478 float t;
00479 float ck, cjk;
00480 const float hx_1 = 1/msm->hx;
00481 const float hy_1 = 1/msm->hy;
00482 const float hz_1 = 1/msm->hz;
00483 const float px0 = msm->px0;
00484 const float py0 = msm->py0;
00485 const float pz0 = msm->pz0;
00486
00487 const int mx = msm->mx;
00488 const int my = msm->my;
00489 const int mz = msm->mz;
00490 const float dx = msm->dx;
00491 const float dy = msm->dy;
00492 const float dz = msm->dz;
00493 const float lx0 = msm->lx0;
00494 const float ly0 = msm->ly0;
00495 const float lz0 = msm->lz0;
00496
00497 const floatGrid *ehgrid = &(msm->eh[0]);
00498 const float *eh = ehgrid->data;
00499 const int ni = ehgrid->ni;
00500 const int nj = ehgrid->nj;
00501 const int nk = ehgrid->nk;
00502 const int ia = ehgrid->i0;
00503 const int ib = ia + ni - 1;
00504 const int ja = ehgrid->j0;
00505 const int jb = ja + nj - 1;
00506 const int ka = ehgrid->k0;
00507 const int kb = ka + nk - 1;
00508
00509 const int ispany = IS_SET_ANY(msm->isperiodic);
00510 int iswrap;
00511
00512 float x, y, z, esum;
00513
00514 int i, j, k, ii, jj, kk, ilo, jlo, klo;
00515 int koff, kmoff;
00516 long index, mindex, jkoff, jkmoff;
00517
00518 for (kk = 0; kk < mz; kk++) {
00519 kmoff = kk * my;
00520 z = kk*dz + lz0;
00521
00522 for (jj = 0; jj < my; jj++) {
00523 jkmoff = (kmoff + jj) * (long)mx;
00524 y = jj*dy + ly0;
00525
00526 for (ii = 0; ii < mx; ii++) {
00527 mindex = jkmoff + ii;
00528 x = ii*dx + lx0;
00529
00530
00531 rx_hx = (x - px0) * hx_1;
00532 ry_hy = (y - py0) * hy_1;
00533 rz_hz = (z - pz0) * hz_1;
00534
00535
00536 ilo = (int) floorf(rx_hx) - 1;
00537 jlo = (int) floorf(ry_hy) - 1;
00538 klo = (int) floorf(rz_hz) - 1;
00539
00540
00541 t = rx_hx - (float) ilo;
00542 xphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00543 t--;
00544 xphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00545 t--;
00546 xphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00547 t--;
00548 xphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00549
00550
00551 t = ry_hy - (float) jlo;
00552 yphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00553 t--;
00554 yphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00555 t--;
00556 yphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00557 t--;
00558 yphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00559
00560
00561 t = rz_hz - (float) klo;
00562 zphi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00563 t--;
00564 zphi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00565 t--;
00566 zphi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00567 t--;
00568 zphi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00569
00570
00571 iswrap = ispany &&
00572 ( ilo < ia || (ilo+3) > ib ||
00573 jlo < ja || (jlo+3) > jb ||
00574 klo < ka || (klo+3) > kb);
00575
00576 if ( ! iswrap ) {
00577
00578 ASSERT(ia <= ilo && ilo + 3 <= ib);
00579 ASSERT(ja <= jlo && jlo + 3 <= jb);
00580 ASSERT(ka <= klo && klo + 3 <= kb);
00581
00582
00583 esum = 0;
00584 for (k = 0; k < 4; k++) {
00585 koff = (k + klo) * nj;
00586 ck = zphi[k];
00587 for (j = 0; j < 4; j++) {
00588 jkoff = (koff + (j + jlo)) * (long)ni;
00589 cjk = yphi[j] * ck;
00590 for (i = 0; i < 4; i++) {
00591 index = jkoff + (i + ilo);
00592 GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo);
00593 ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index);
00594 esum += eh[index] * xphi[i] * cjk;
00595 }
00596 }
00597 }
00598 }
00599 else {
00600 int ip, jp, kp;
00601
00602
00603 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
00604 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
00605 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
00606 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
00607 if (klo < ka) do { klo += nk; } while (klo < ka);
00608 else if (klo > kb) do { klo -= nk; } while (klo > kb);
00609
00610
00611 esum = 0;
00612 for (k = 0, kp = klo; k < 4; k++, kp++) {
00613 if (kp > kb) kp = ka;
00614 koff = kp * nj;
00615 ck = zphi[k];
00616 for (j = 0, jp = jlo; j < 4; j++, jp++) {
00617 if (jp > jb) jp = ja;
00618 jkoff = (koff + jp) * (long)ni;
00619 cjk = yphi[j] * ck;
00620 for (i = 0, ip = ilo; i < 4; i++, ip++) {
00621 if (ip > ib) ip = ia;
00622 index = jkoff + ip;
00623 GRID_INDEX_CHECK(ehgrid, ip, jp, kp);
00624 ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index);
00625 esum += eh[index] * xphi[i] * cjk;
00626 }
00627 }
00628 }
00629 }
00630
00631 #ifdef MSMPOT_CHECKMAPINDEX
00632 if (MAPINDEX==mindex) {
00633 printf("shortrng=%g longrng=%g epotmap[%ld]=%g\n",
00634 epotmap[mindex], esum, mindex, epotmap[mindex]+esum);
00635 }
00636 #endif
00637
00638 epotmap[mindex] += esum;
00639 }
00640 }
00641 }
00642
00643 return OK;
00644 }
00645
00646
00647 #if !defined(USE_FACTORED_GRID_TRANSFERS)
00648
00649
00650
00651
00652
00653 enum { NSTENCIL = 5 };
00654
00655
00656 static const float Phi[NSTENCIL] = { -0.0625f, 0.5625f, 1, 0.5625f, -0.0625f };
00657
00658
00659
00660 static const int Offset[NSTENCIL] = { -3, -1, 0, 1, 3 };
00661
00662
00663 int restriction(Msmpot *msm, int level)
00664 {
00665 float cjk, q2h_sum;
00666
00667
00668 const floatGrid *qhgrid = &(msm->qh[level]);
00669 const float *qh = qhgrid->data;
00670 floatGrid *q2hgrid = &(msm->qh[level+1]);
00671 float *q2h = q2hgrid->data;
00672
00673
00674 const int ia1 = qhgrid->i0;
00675 const int ib1 = ia1 + qhgrid->ni - 1;
00676 const int ja1 = qhgrid->j0;
00677 const int jb1 = ja1 + qhgrid->nj - 1;
00678 const int ka1 = qhgrid->k0;
00679 const int kb1 = ka1 + qhgrid->nk - 1;
00680 const int ni1 = qhgrid->ni;
00681 const int nj1 = qhgrid->nj;
00682
00683
00684 const int ia2 = q2hgrid->i0;
00685 const int ib2 = ia2 + q2hgrid->ni - 1;
00686 const int ja2 = q2hgrid->j0;
00687 const int jb2 = ja2 + q2hgrid->nj - 1;
00688 const int ka2 = q2hgrid->k0;
00689 const int kb2 = ka2 + q2hgrid->nk - 1;
00690 const int ni2 = q2hgrid->ni;
00691 const int nj2 = q2hgrid->nj;
00692
00693
00694 int i1, j1, k1, k1off;
00695 int i2, j2, k2, k2off;
00696 int i, j, k;
00697 long index1, jk1off;
00698 long index2, jk2off;
00699
00700 if (msm->isperiodic) {
00701 return ERRMSG(MSMPOT_ERROR_SUPPORT,
00702 "non-factored grid transfer does not support periodic boundaries");
00703 }
00704
00705
00706 for (k2 = ka2; k2 <= kb2; k2++) {
00707 k2off = k2 * nj2;
00708 k1 = k2 * 2;
00709 for (j2 = ja2; j2 <= jb2; j2++) {
00710 jk2off = (k2off + j2) * (long)ni2;
00711 j1 = j2 * 2;
00712 for (i2 = ia2; i2 <= ib2; i2++) {
00713 index2 = jk2off + i2;
00714 i1 = i2 * 2;
00715
00716
00717 q2h_sum = 0;
00718 for (k = 0; k < NSTENCIL; k++) {
00719
00720 if (k1 + Offset[k] < ka1) continue;
00721 else if (k1 + Offset[k] > kb1) break;
00722 k1off = (k1 + Offset[k]) * nj1;
00723 for (j = 0; j < NSTENCIL; j++) {
00724
00725 if (j1 + Offset[j] < ja1) continue;
00726 else if (j1 + Offset[j] > jb1) break;
00727 jk1off = (k1off + (j1 + Offset[j])) * (long)ni1;
00728 cjk = Phi[j] * Phi[k];
00729 for (i = 0; i < NSTENCIL; i++) {
00730
00731 if (i1 + Offset[i] < ia1) continue;
00732 else if (i1 + Offset[i] > ib1) break;
00733 index1 = jk1off + (i1 + Offset[i]);
00734 GRID_INDEX_CHECK(qhgrid,
00735 i1+Offset[i], j1+Offset[j], k1+Offset[k]);
00736 ASSERT(GRID_INDEX(qhgrid,
00737 i1+Offset[i], j1+Offset[j], k1+Offset[k]) == index1);
00738 q2h_sum += Phi[i] * cjk * qh[index1];
00739 }
00740 }
00741 }
00742
00743 GRID_INDEX_CHECK(q2hgrid, i2, j2, k2);
00744 ASSERT(GRID_INDEX(q2hgrid, i2, j2, k2) == index2);
00745 q2h[index2] = q2h_sum;
00746
00747 }
00748 }
00749 }
00750 return OK;
00751 }
00752
00753
00754 int prolongation(Msmpot *msm, int level)
00755 {
00756 float ck, cjk;
00757
00758
00759 floatGrid *ehgrid = &(msm->eh[level]);
00760 float *eh = ehgrid->data;
00761 const floatGrid *e2hgrid = &(msm->eh[level+1]);
00762 const float *e2h = e2hgrid->data;
00763
00764
00765 const int ia1 = ehgrid->i0;
00766 const int ib1 = ia1 + ehgrid->ni - 1;
00767 const int ja1 = ehgrid->j0;
00768 const int jb1 = ja1 + ehgrid->nj - 1;
00769 const int ka1 = ehgrid->k0;
00770 const int kb1 = ka1 + ehgrid->nk - 1;
00771 const int ni1 = ehgrid->ni;
00772 const int nj1 = ehgrid->nj;
00773
00774
00775 const int ia2 = e2hgrid->i0;
00776 const int ib2 = ia2 + e2hgrid->ni - 1;
00777 const int ja2 = e2hgrid->j0;
00778 const int jb2 = ja2 + e2hgrid->nj - 1;
00779 const int ka2 = e2hgrid->k0;
00780 const int kb2 = ka2 + e2hgrid->nk - 1;
00781 const int ni2 = e2hgrid->ni;
00782 const int nj2 = e2hgrid->nj;
00783
00784
00785 int i1, j1, k1, k1off;
00786 int i2, j2, k2, k2off;
00787 int i, j, k;
00788 long index1, jk1off;
00789 long index2, jk2off;
00790
00791 if (msm->isperiodic) {
00792 return ERRMSG(MSMPOT_ERROR_SUPPORT,
00793 "non-factored grid transfer does not support periodic boundaries");
00794 }
00795
00796
00797 for (k2 = ka2; k2 <= kb2; k2++) {
00798 k2off = k2 * nj2;
00799 k1 = k2 * 2;
00800 for (j2 = ja2; j2 <= jb2; j2++) {
00801 jk2off = (k2off + j2) * (long)ni2;
00802 j1 = j2 * 2;
00803 for (i2 = ia2; i2 <= ib2; i2++) {
00804 index2 = jk2off + i2;
00805 i1 = i2 * 2;
00806
00807
00808 GRID_INDEX_CHECK(e2hgrid, i2, j2, k2);
00809 ASSERT(GRID_INDEX(e2hgrid, i2, j2, k2) == index2);
00810 for (k = 0; k < NSTENCIL; k++) {
00811
00812 if (k1 + Offset[k] < ka1) continue;
00813 else if (k1 + Offset[k] > kb1) break;
00814 k1off = (k1 + Offset[k]) * nj1;
00815 ck = Phi[k] * e2h[index2];
00816 for (j = 0; j < NSTENCIL; j++) {
00817
00818 if (j1 + Offset[j] < ja1) continue;
00819 else if (j1 + Offset[j] > jb1) break;
00820 jk1off = (k1off + (j1 + Offset[j])) * (long)ni1;
00821 cjk = Phi[j] * ck;
00822 for (i = 0; i < NSTENCIL; i++) {
00823
00824 if (i1 + Offset[i] < ia1) continue;
00825 else if (i1 + Offset[i] > ib1) break;
00826 index1 = jk1off + (i1 + Offset[i]);
00827 GRID_INDEX_CHECK(ehgrid,
00828 i1+Offset[i], j1+Offset[j], k1+Offset[k]);
00829 ASSERT(GRID_INDEX(ehgrid,
00830 i1+Offset[i], j1+Offset[j], k1+Offset[k]) == index1);
00831 eh[index1] += Phi[i] * cjk;
00832 }
00833 }
00834 }
00835
00836 }
00837 }
00838 }
00839 return OK;
00840 }
00841
00842 #else
00843
00844
00845
00846
00847 enum {
00848 R_STENCIL = 3,
00849 DIAM_STENCIL = 2*R_STENCIL + 1,
00850 };
00851
00852 static const float PHI_FACTORED[DIAM_STENCIL] = {
00853 -0.0625f, 0, 0.5625f, 1, 0.5625f, 0, -0.0625f
00854 };
00855
00856
00857 int restriction(Msmpot *msm, int level) {
00858
00859 const floatGrid *qhgrid = &(msm->qh[level]);
00860 const float *qh = qhgrid->data;
00861 floatGrid *q2hgrid = &(msm->qh[level+1]);
00862 float *q2h = q2hgrid->data;
00863
00864
00865 const int ia1 = qhgrid->i0;
00866 const int ib1 = ia1 + qhgrid->ni - 1;
00867 const int ja1 = qhgrid->j0;
00868 const int jb1 = ja1 + qhgrid->nj - 1;
00869 const int ka1 = qhgrid->k0;
00870 const int kb1 = ka1 + qhgrid->nk - 1;
00871 const int ni1 = qhgrid->ni;
00872 const int nj1 = qhgrid->nj;
00873 const int nk1 = qhgrid->nk;
00874
00875
00876 const int ia2 = q2hgrid->i0;
00877 const int ib2 = ia2 + q2hgrid->ni - 1;
00878 const int ja2 = q2hgrid->j0;
00879 const int jb2 = ja2 + q2hgrid->nj - 1;
00880 const int ka2 = q2hgrid->k0;
00881 const int kb2 = ka2 + q2hgrid->nk - 1;
00882 const int nrow_q2 = q2hgrid->ni;
00883 const int nstride_q2 = nrow_q2 * q2hgrid->nj;
00884
00885 const int ispx = (IS_SET_X(msm->isperiodic) != 0);
00886 const int ispy = (IS_SET_Y(msm->isperiodic) != 0);
00887 const int ispz = (IS_SET_Z(msm->isperiodic) != 0);
00888
00889
00890 float *qzd = msm->lzd + (-ka1);
00891 float *qyzd = msm->lyzd + (-ka1*nj1 + -ja1);
00892 float qsum;
00893
00894 const float *phi = NULL;
00895
00896 int i2, j2, k2;
00897 int im, jm, km;
00898 int i, j, k;
00899 int index_plane_q2, index_q2;
00900 int index_jk, offset_k;
00901 long offset;
00902
00903 for (i2 = ia2; i2 <= ib2; i2++) {
00904
00905 for (k = ka1; k <= kb1; k++) {
00906 offset_k = k * nj1;
00907
00908 for (j = ja1; j <= jb1; j++) {
00909 index_jk = offset_k + j;
00910 offset = index_jk * (long)ni1;
00911 im = (i2 << 1);
00912 qsum = 0;
00913 if ( ! ispx ) {
00914 int lower = im - R_STENCIL;
00915 int upper = im + R_STENCIL;
00916 if (lower < ia1) lower = ia1;
00917 if (upper > ib1) upper = ib1;
00918 phi = PHI_FACTORED + R_STENCIL;
00919 for (i = lower; i <= upper; i++) {
00920 qsum += phi[i-im] * qh[offset + i];
00921 }
00922 }
00923 else {
00924 int ip = im - R_STENCIL;
00925 if (ip < ia1) do { ip += ni1; } while (ip < ia1);
00926 phi = PHI_FACTORED;
00927 for (i = 0; i < DIAM_STENCIL; i++, ip++) {
00928 if (ip > ib1) ip = ia1;
00929 qsum += phi[i] * qh[offset + ip];
00930 }
00931 }
00932 qyzd[index_jk] = qsum;
00933 }
00934
00935 }
00936
00937 for (j2 = ja2; j2 <= jb2; j2++) {
00938 index_plane_q2 = j2 * nrow_q2 + i2;
00939
00940 for (k = ka1; k <= kb1; k++) {
00941 offset = k * nj1;
00942 jm = (j2 << 1);
00943 qsum = 0;
00944 if ( ! ispy ) {
00945 int lower = jm - R_STENCIL;
00946 int upper = jm + R_STENCIL;
00947 if (lower < ja1) lower = ja1;
00948 if (upper > jb1) upper = jb1;
00949 phi = PHI_FACTORED + R_STENCIL;
00950 for (j = lower; j <= upper; j++) {
00951 qsum += phi[j-jm] * qyzd[offset + j];
00952 }
00953 }
00954 else {
00955 int jp = jm - R_STENCIL;
00956 if (jp < ja1) do { jp += nj1; } while (jp < ja1);
00957 phi = PHI_FACTORED;
00958 for (j = 0; j < DIAM_STENCIL; j++, jp++) {
00959 if (jp > jb1) jp = ja1;
00960 qsum += phi[j] * qyzd[offset + jp];
00961 }
00962 }
00963 qzd[k] = qsum;
00964 }
00965
00966 for (k2 = ka2; k2 <= kb2; k2++) {
00967 index_q2 = k2 * nstride_q2 + index_plane_q2;
00968 km = (k2 << 1);
00969 qsum = 0;
00970 if ( ! ispz ) {
00971 int lower = km - R_STENCIL;
00972 int upper = km + R_STENCIL;
00973 if (lower < ka1) lower = ka1;
00974 if (upper > kb1) upper = kb1;
00975 phi = PHI_FACTORED + R_STENCIL;
00976 for (k = lower; k <= upper; k++) {
00977 qsum += phi[k-km] * qzd[k];
00978 }
00979 }
00980 else {
00981 int kp = km - R_STENCIL;
00982 if (kp < ka1) do { kp += nk1; } while (kp < ka1);
00983 phi = PHI_FACTORED;
00984 for (k = 0; k < DIAM_STENCIL; k++, kp++) {
00985 if (kp > kb1) kp = ka1;
00986 qsum += phi[k] * qzd[kp];
00987 }
00988 }
00989 q2h[index_q2] = qsum;
00990 }
00991
00992 }
00993
00994 }
00995 #ifdef MSMPOT_DEBUG
00996 qsum = 0;
00997 for (k = ka2; k <= kb2; k++) {
00998 for (j = ja2; j <= jb2; j++) {
00999 for (i = ia2; i <= ib2; i++) {
01000 index_q2 = k*nstride_q2 + j*nrow_q2 + i;
01001 qsum += q2h[index_q2];
01002 }
01003 }
01004 }
01005 printf("# level = %d, grid sum = %e\n", level+1, qsum);
01006 #endif
01007 return OK;
01008 }
01009
01010
01011 int prolongation(Msmpot *msm, int level) {
01012
01013 floatGrid *ehgrid = &(msm->eh[level]);
01014 float *eh = ehgrid->data;
01015 const floatGrid *e2hgrid = &(msm->eh[level+1]);
01016 const float *e2h = e2hgrid->data;
01017
01018
01019 const int ia1 = ehgrid->i0;
01020 const int ib1 = ia1 + ehgrid->ni - 1;
01021 const int ja1 = ehgrid->j0;
01022 const int jb1 = ja1 + ehgrid->nj - 1;
01023 const int ka1 = ehgrid->k0;
01024 const int kb1 = ka1 + ehgrid->nk - 1;
01025 const int ni1 = ehgrid->ni;
01026 const int nj1 = ehgrid->nj;
01027 const int nk1 = ehgrid->nk;
01028
01029
01030 const int ia2 = e2hgrid->i0;
01031 const int ib2 = ia2 + e2hgrid->ni - 1;
01032 const int ja2 = e2hgrid->j0;
01033 const int jb2 = ja2 + e2hgrid->nj - 1;
01034 const int ka2 = e2hgrid->k0;
01035 const int kb2 = ka2 + e2hgrid->nk - 1;
01036 const int nrow_e2 = e2hgrid->ni;
01037 const int nstride_e2 = nrow_e2 * e2hgrid->nj;
01038
01039 const int ispx = (IS_SET_X(msm->isperiodic) != 0);
01040 const int ispy = (IS_SET_Y(msm->isperiodic) != 0);
01041 const int ispz = (IS_SET_Z(msm->isperiodic) != 0);
01042
01043
01044 float *ezd = msm->lzd + (-ka1);
01045 float *eyzd = msm->lyzd + (-ka1*nj1 + -ja1);
01046
01047 const size_t size_lzd = nk1 * sizeof(float);
01048 const size_t size_lyzd = nj1 * nk1 * sizeof(float);
01049
01050 const float *phi = NULL;
01051
01052 int i2, j2, k2;
01053 int im, jm, km;
01054 int i, j, k;
01055 int index_plane_e2, index_e2;
01056 int index_jk, offset_k;
01057 long offset;
01058
01059 for (i2 = ia2; i2 <= ib2; i2++) {
01060 memset(msm->lyzd, 0, size_lyzd);
01061
01062 for (j2 = ja2; j2 <= jb2; j2++) {
01063 memset(msm->lzd, 0, size_lzd);
01064 index_plane_e2 = j2 * nrow_e2 + i2;
01065
01066 for (k2 = ka2; k2 <= kb2; k2++) {
01067 index_e2 = k2 * nstride_e2 + index_plane_e2;
01068 km = (k2 << 1);
01069 if ( ! ispz ) {
01070 int lower = km - R_STENCIL;
01071 int upper = km + R_STENCIL;
01072 if (lower < ka1) lower = ka1;
01073 if (upper > kb1) upper = kb1;
01074 phi = PHI_FACTORED + R_STENCIL;
01075 for (k = lower; k <= upper; k++) {
01076 ezd[k] += phi[k-km] * e2h[index_e2];
01077 }
01078 }
01079 else {
01080 int kp = km - R_STENCIL;
01081 if (kp < ka1) do { kp += nk1; } while (kp < ka1);
01082 phi = PHI_FACTORED;
01083 for (k = 0; k < DIAM_STENCIL; k++, kp++) {
01084 if (kp > kb1) kp = ka1;
01085 ezd[kp] += phi[k] * e2h[index_e2];
01086 }
01087 }
01088 }
01089
01090 for (k = ka1; k <= kb1; k++) {
01091 offset = k * nj1;
01092 jm = (j2 << 1);
01093 if ( ! ispy ) {
01094 int lower = jm - R_STENCIL;
01095 int upper = jm + R_STENCIL;
01096 if (lower < ja1) lower = ja1;
01097 if (upper > jb1) upper = jb1;
01098 phi = PHI_FACTORED + R_STENCIL;
01099 for (j = lower; j <= upper; j++) {
01100 eyzd[offset + j] += phi[j-jm] * ezd[k];
01101 }
01102 }
01103 else {
01104 int jp = jm - R_STENCIL;
01105 if (jp < ja1) do { jp += nj1; } while (jp < ja1);
01106 phi = PHI_FACTORED;
01107 for (j = 0; j < DIAM_STENCIL; j++, jp++) {
01108 if (jp > jb1) jp = ja1;
01109 eyzd[offset + jp] += phi[j] * ezd[k];
01110 }
01111 }
01112 }
01113
01114 }
01115
01116 for (k = ka1; k <= kb1; k++) {
01117 offset_k = k * nj1;
01118
01119 for (j = ja1; j <= jb1; j++) {
01120 index_jk = offset_k + j;
01121 offset = index_jk * (long)ni1;
01122 im = (i2 << 1);
01123 if ( ! ispx ) {
01124 int lower = im - R_STENCIL;
01125 int upper = im + R_STENCIL;
01126 if (lower < ia1) lower = ia1;
01127 if (upper > ib1) upper = ib1;
01128 phi = PHI_FACTORED + R_STENCIL;
01129 for (i = lower; i <= upper; i++) {
01130 eh[offset + i] += phi[i-im] * eyzd[index_jk];
01131 }
01132 }
01133 else {
01134 int ip = im - R_STENCIL;
01135 if (ip < ia1) do { ip += ni1; } while (ip < ia1);
01136 phi = PHI_FACTORED;
01137 for (i = 0; i < DIAM_STENCIL; i++, ip++) {
01138 if (ip > ib1) ip = ia1;
01139 eh[offset + ip] += phi[i] * eyzd[index_jk];
01140 }
01141 }
01142 }
01143
01144 }
01145
01146 }
01147
01148 return OK;
01149 }
01150
01151 #endif
01152
01153
01154 int latticecutoff(Msmpot *msm, int level)
01155 {
01156 float eh_sum;
01157
01158
01159 const floatGrid *qhgrid = &(msm->qh[level]);
01160 const float *qh = qhgrid->data;
01161 floatGrid *ehgrid = &(msm->eh[level]);
01162 float *eh = ehgrid->data;
01163 const int ia = qhgrid->i0;
01164 const int ib = ia + qhgrid->ni - 1;
01165 const int ja = qhgrid->j0;
01166 const int jb = ja + qhgrid->nj - 1;
01167 const int ka = qhgrid->k0;
01168 const int kb = ka + qhgrid->nk - 1;
01169 const int ni = qhgrid->ni;
01170 const int nj = qhgrid->nj;
01171 const int nk = qhgrid->nk;
01172
01173
01174 const floatGrid *gcgrid = &(msm->gc[level]);
01175 const float *gc = gcgrid->data;
01176 const int gia = gcgrid->i0;
01177 const int gib = gia + gcgrid->ni - 1;
01178 const int gja = gcgrid->j0;
01179 const int gjb = gja + gcgrid->nj - 1;
01180 const int gka = gcgrid->k0;
01181 const int gkb = gka + gcgrid->nk - 1;
01182 const int gni = gcgrid->ni;
01183 const int gnj = gcgrid->nj;
01184
01185 const int ispx = (IS_SET_X(msm->isperiodic) != 0);
01186 const int ispy = (IS_SET_Y(msm->isperiodic) != 0);
01187 const int ispz = (IS_SET_Z(msm->isperiodic) != 0);
01188
01189 const int ispnone = !(ispx || ispy || ispz);
01190
01191 int i, j, k;
01192 int gia_clip, gib_clip;
01193 int gja_clip, gjb_clip;
01194 int gka_clip, gkb_clip;
01195 int koff;
01196 long jkoff, index;
01197 int id, jd, kd;
01198 int knoff;
01199 long jknoff, nindex;
01200 int kgoff, jkgoff, ngindex;
01201
01202 if ( ispnone ) {
01203
01204
01205 for (k = ka; k <= kb; k++) {
01206
01207
01208 gka_clip = (k + gka < ka ? ka - k : gka);
01209 gkb_clip = (k + gkb > kb ? kb - k : gkb);
01210
01211 koff = k * nj;
01212
01213 for (j = ja; j <= jb; j++) {
01214
01215
01216 gja_clip = (j + gja < ja ? ja - j : gja);
01217 gjb_clip = (j + gjb > jb ? jb - j : gjb);
01218
01219 jkoff = (koff + j) * (long)ni;
01220
01221 for (i = ia; i <= ib; i++) {
01222
01223
01224 gia_clip = (i + gia < ia ? ia - i : gia);
01225 gib_clip = (i + gib > ib ? ib - i : gib);
01226
01227 index = jkoff + i;
01228
01229
01230 eh_sum = 0;
01231 for (kd = gka_clip; kd <= gkb_clip; kd++) {
01232 knoff = (k + kd) * nj;
01233 kgoff = kd * gnj;
01234
01235 for (jd = gja_clip; jd <= gjb_clip; jd++) {
01236 jknoff = (knoff + (j + jd)) * (long)ni;
01237 jkgoff = (kgoff + jd) * gni;
01238
01239 for (id = gia_clip; id <= gib_clip; id++) {
01240 nindex = jknoff + (i + id);
01241 ngindex = jkgoff + id;
01242
01243 GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd);
01244 ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex);
01245
01246 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01247 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01248
01249 eh_sum += qh[nindex] * gc[ngindex];
01250 }
01251 }
01252 }
01253
01254 GRID_INDEX_CHECK(ehgrid, i, j, k);
01255 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01256 eh[index] = eh_sum;
01257 }
01258 }
01259 }
01260
01261 }
01262 else {
01263
01264 int ilo, jlo, klo;
01265 int ip, jp, kp;
01266
01267
01268 for (k = ka; k <= kb; k++) {
01269 klo = k + gka;
01270 if ( ! ispz ) {
01271
01272 gka_clip = (k + gka < ka ? ka - k : gka);
01273 gkb_clip = (k + gkb > kb ? kb - k : gkb);
01274 if (klo < ka) klo = ka;
01275 }
01276 else {
01277 gka_clip = gka;
01278 gkb_clip = gkb;
01279 if (klo < ka) do { klo += nk; } while (klo < ka);
01280 }
01281 ASSERT(klo <= kb);
01282
01283 koff = k * nj;
01284
01285 for (j = ja; j <= jb; j++) {
01286 jlo = j + gja;
01287 if ( ! ispy ) {
01288
01289 gja_clip = (j + gja < ja ? ja - j : gja);
01290 gjb_clip = (j + gjb > jb ? jb - j : gjb);
01291 if (jlo < ja) jlo = ja;
01292 }
01293 else {
01294 gja_clip = gja;
01295 gjb_clip = gjb;
01296 if (jlo < ja) do { jlo += nj; } while (jlo < ja);
01297 }
01298 ASSERT(jlo <= jb);
01299
01300 jkoff = (koff + j) * (long)ni;
01301
01302 for (i = ia; i <= ib; i++) {
01303 ilo = i + gia;
01304 if ( ! ispx ) {
01305
01306 gia_clip = (i + gia < ia ? ia - i : gia);
01307 gib_clip = (i + gib > ib ? ib - i : gib);
01308 if (ilo < ia) ilo = ia;
01309 }
01310 else {
01311 gia_clip = gia;
01312 gib_clip = gib;
01313 if (ilo < ia) do { ilo += ni; } while (ilo < ia);
01314 }
01315 ASSERT(ilo <= ib);
01316
01317 index = jkoff + i;
01318
01319
01320 eh_sum = 0;
01321 for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
01322
01323 if (kp > kb) kp = ka;
01324 knoff = kp * nj;
01325 kgoff = kd * gnj;
01326
01327 for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
01328
01329 if (jp > jb) jp = ja;
01330 jknoff = (knoff + jp) * (long)ni;
01331 jkgoff = (kgoff + jd) * gni;
01332
01333 for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) {
01334
01335 if (ip > ib) ip = ia;
01336 nindex = jknoff + ip;
01337 ngindex = jkgoff + id;
01338
01339 GRID_INDEX_CHECK(qhgrid, ip, jp, kp);
01340 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex);
01341
01342 GRID_INDEX_CHECK(gcgrid, id, jd, kd);
01343 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex);
01344
01345 eh_sum += qh[nindex] * gc[ngindex];
01346
01347 }
01348 }
01349 }
01350
01351 GRID_INDEX_CHECK(ehgrid, i, j, k);
01352 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index);
01353 eh[index] = eh_sum;
01354 }
01355 }
01356 }
01357
01358 }
01359
01360 return OK;
01361 }