00001
00002
00003 #include "msm_defn.h"
00004
00005
00006
00007 void NL_msm_cleanup(NL_Msm *pm) {
00008 int i;
00009 #ifdef NL_USE_CUDA
00010 if (pm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF) {
00011 NL_msm_cuda_cleanup_gridcutoff(pm);
00012 }
00013 #endif
00014 if (pm->msmflags & NL_MSM_COMPUTE_SPREC) {
00015 for (i = 0; i < pm->maxlevels; i++) {
00016 GRID_DONE( &(pm->qh_f[i]) );
00017 GRID_DONE( &(pm->eh_f[i]) );
00018 GRID_DONE( &(pm->gc_f[i]) );
00019 }
00020 }
00021 else {
00022 for (i = 0; i < pm->maxlevels; i++) {
00023 GRID_DONE( &(pm->qh[i]) );
00024 GRID_DONE( &(pm->eh[i]) );
00025 GRID_DONE( &(pm->gc[i]) );
00026 }
00027 }
00028 free(pm->lzd);
00029 free(pm->lyzd);
00030 free(pm->lzd_f);
00031 free(pm->lyzd_f);
00032 free(pm->qh);
00033 free(pm->eh);
00034 free(pm->gc);
00035 free(pm->qh_f);
00036 free(pm->eh_f);
00037 free(pm->gc_f);
00038 }
00039
00040
00041 static int setup_bins_1away(NL_Msm *pm);
00042 static int setup_bins_k_away(NL_Msm *pm);
00043
00044 static int setup_cell_vectors(NL_Msm *pm);
00045
00046 static int setup_grids(NL_Msm *pm);
00047
00048
00049 static int setup_hgrid_1d(
00050 NL_Msm *pm,
00051 double len,
00052 double *hh,
00053 int *nn,
00054 int *aindex,
00055 int *bindex,
00056 int isperiodic
00057 );
00058
00059 static int print_status(NL_Msm *msm);
00060
00061
00062 int NL_msm_setup(
00063 NL_Msm *pm,
00064 double cutoff,
00065 double cellvec1[3],
00066 double cellvec2[3],
00067 double cellvec3[3],
00068 double cellcenter[3],
00069 int msmflags
00070 ) {
00071
00072 int rc = 0;
00073
00074
00075 if ((~NL_MSM_ALL_FLAGS & msmflags) != 0 ||
00076 (NL_MSM_COMPUTE_ALL & msmflags) == 0) {
00077 return NL_MSM_ERROR_PARAM;
00078 }
00079
00080
00081 pm->report_timings = ((msmflags & NL_MSM_REPORT_TIMINGS) != 0);
00082
00083
00084 if (cellvec1[1] != 0 || cellvec1[2] != 0 ||
00085 cellvec2[0] != 0 || cellvec2[2] != 0 ||
00086 cellvec3[0] != 0 || cellvec3[1] != 0 ||
00087 cellvec1[0] <= 0 || cellvec2[1] <= 0 || cellvec3[2] <= 0) {
00088 return NL_MSM_ERROR_SUPPORT;
00089 }
00090
00091
00092
00093 if (cutoff <= 0 ||
00094 (cutoff > cellvec1[0] && (msmflags & NL_MSM_PERIODIC_VEC1)) ||
00095 (cutoff > cellvec2[1] && (msmflags & NL_MSM_PERIODIC_VEC2)) ||
00096 (cutoff > cellvec3[2] && (msmflags & NL_MSM_PERIODIC_VEC3))) {
00097 return NL_MSM_ERROR_PARAM;
00098 }
00099
00100 pm->msmflags = msmflags;
00101 pm->cellvec1[0] = cellvec1[0];
00102 pm->cellvec1[1] = cellvec1[1];
00103 pm->cellvec1[2] = cellvec1[2];
00104 pm->cellvec2[0] = cellvec2[0];
00105 pm->cellvec2[1] = cellvec2[1];
00106 pm->cellvec2[2] = cellvec2[2];
00107 pm->cellvec3[0] = cellvec3[0];
00108 pm->cellvec3[1] = cellvec3[1];
00109 pm->cellvec3[2] = cellvec3[2];
00110 pm->cellcenter[0] = cellcenter[0];
00111 pm->cellcenter[1] = cellcenter[1];
00112 pm->cellcenter[2] = cellcenter[2];
00113
00114 pm->a = cutoff;
00115
00116 rc = setup_cell_vectors(pm);
00117 if (rc) return rc;
00118
00119 if (msmflags & NL_MSM_COMPUTE_SHORT_RANGE) {
00120
00121 if (msmflags & NL_MSM_COMPUTE_1AWAY) {
00122 rc = setup_bins_1away(pm);
00123 }
00124 else {
00125 rc = setup_bins_k_away(pm);
00126 }
00127 if (rc) return rc;
00128 }
00129
00130 if (msmflags & NL_MSM_COMPUTE_LONG_RANGE) {
00131
00132 rc = setup_grids(pm);
00133 if (rc) return rc;
00134 }
00135
00136 if (msmflags & NL_MSM_COMPUTE_SPREC) {
00137
00138 pm->cellvec1_f[0] = (float) pm->cellvec1[0];
00139 pm->cellvec1_f[1] = (float) pm->cellvec1[1];
00140 pm->cellvec1_f[2] = (float) pm->cellvec1[2];
00141 pm->cellvec2_f[0] = (float) pm->cellvec2[0];
00142 pm->cellvec2_f[1] = (float) pm->cellvec2[1];
00143 pm->cellvec2_f[2] = (float) pm->cellvec2[2];
00144 pm->cellvec3_f[0] = (float) pm->cellvec3[0];
00145 pm->cellvec3_f[1] = (float) pm->cellvec3[1];
00146 pm->cellvec3_f[2] = (float) pm->cellvec3[2];
00147 pm->cellcenter_f[0] = (float) pm->cellcenter[0];
00148 pm->cellcenter_f[1] = (float) pm->cellcenter[1];
00149 pm->cellcenter_f[2] = (float) pm->cellcenter[2];
00150 pm->recipvec1_f[0] = (float) pm->recipvec1[0];
00151 pm->recipvec1_f[1] = (float) pm->recipvec1[1];
00152 pm->recipvec1_f[2] = (float) pm->recipvec1[2];
00153 pm->recipvec2_f[0] = (float) pm->recipvec2[0];
00154 pm->recipvec2_f[1] = (float) pm->recipvec2[1];
00155 pm->recipvec2_f[2] = (float) pm->recipvec2[2];
00156 pm->recipvec3_f[0] = (float) pm->recipvec3[0];
00157 pm->recipvec3_f[1] = (float) pm->recipvec3[1];
00158 pm->recipvec3_f[2] = (float) pm->recipvec3[2];
00159 pm->hx_f = pm->hx;
00160 pm->hy_f = pm->hy;
00161 pm->hz_f = pm->hz;
00162 pm->a_f = pm->a;
00163 pm->gx_f = pm->gx;
00164 pm->gy_f = pm->gy;
00165 pm->gz_f = pm->gz;
00166 }
00167
00168 print_status(pm);
00169
00170 #ifdef NL_USE_CUDA
00171 if (msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF) {
00172 rc = NL_msm_cuda_setup_gridcutoff(pm);
00173 if (rc == NL_MSM_SUCCESS) {
00174 printf("Using CUDA for grid cutoff computation\n");
00175 }
00176 else {
00177 printf("Unable to set up CUDA for grid cutoff computation\n");
00178 if (msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK) {
00179 NL_msm_cuda_cleanup_gridcutoff(pm);
00180 printf("Falling back on CPU\n");
00181 pm->msmflags &= ~NL_MSM_COMPUTE_CUDA_GRID_CUTOFF;
00182 }
00183 else return rc;
00184 }
00185 }
00186 #else
00187 if (msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF) {
00188 if (msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK) {
00189 printf("Falling back on CPU\n");
00190 pm->msmflags &= ~NL_MSM_COMPUTE_CUDA_GRID_CUTOFF;
00191 }
00192 else return NL_MSM_ERROR_SUPPORT;
00193 }
00194 #endif
00195
00196 return NL_MSM_SUCCESS;
00197 }
00198
00199
00200 typedef struct InterpParams_t {
00201 int nu;
00202 int stencil;
00203 int omega;
00204 } InterpParams;
00205
00206 static InterpParams INTERP_PARAMS[] = {
00207 { 1, 4, 6 },
00208 { 2, 6, 10 },
00209 { 2, 6, 10 },
00210 { 3, 8, 14 },
00211 { 3, 8, 14 },
00212 { 4, 10, 18 },
00213 { 4, 10, 18 },
00214 { 1, 4, 6 },
00215 };
00216
00217
00218 int print_status(NL_Msm *pm) {
00219 int k;
00220 int ispx = (pm->msmflags & NL_MSM_PERIODIC_VEC1);
00221 int ispy = (pm->msmflags & NL_MSM_PERIODIC_VEC2);
00222 int ispz = (pm->msmflags & NL_MSM_PERIODIC_VEC3);
00223 int ispany = (pm->msmflags & NL_MSM_PERIODIC_ALL);
00224 int ispall = (ispany == NL_MSM_PERIODIC_ALL);
00225
00226 const double xlen = pm->cellvec1[0];
00227 const double ylen = pm->cellvec2[1];
00228 const double zlen = pm->cellvec3[2];
00229
00230 printf("#MSM SETUP\n");
00231 printf("# cell lengths= %g %g %g\n", xlen, ylen, zlen);
00232 printf("# grid origin= %g %g %g\n", pm->gx, pm->gy, pm->gz);
00233 if (ispall) {
00234 printf("# periodic boundaries\n");
00235 }
00236 else if (!ispany) {
00237 printf("# non-periodic boundaries\n");
00238 }
00239 else {
00240 printf("# periodic boundaries in:%s%s%s\n",
00241 (ispx ? " x" : ""), (ispy ? " y" : ""), (ispz ? " z" : ""));
00242 }
00243 printf("# cutoff= %g\n", pm->a);
00244 printf("# grid spacing= %g\n", pm->gridspacing);
00245 printf("# hx, hy, hz= %g, %g, %g\n", pm->hx, pm->hy, pm->hz);
00246 printf("# h-grid size= %d x %d x %d\n", pm->nx, pm->ny, pm->nz);
00247 printf("# number of levels= %d\n", pm->nlevels);
00248 printf("# approximation= %s\n", NL_msm_approx_name(pm->approx));
00249 printf("# splitting= %s\n", NL_msm_split_name(pm->split));
00250 printf("# grid hierarchy:\n");
00251 if (pm->msmflags & NL_MSM_COMPUTE_SPREC) {
00252 for (k = 0; k < pm->nlevels; k++) {
00253 NL_Msmgrid_float *qh = &(pm->qh_f[k]);
00254 int ia = qh->i0;
00255 int ib = ia + qh->ni - 1;
00256 int ja = qh->j0;
00257 int jb = ja + qh->nj - 1;
00258 int ka = qh->k0;
00259 int kb = ka + qh->nk - 1;
00260 printf("# level= %d: [%d..%d] x [%d..%d] x [%d..%d]\n",
00261 k, ia, ib, ja, jb, ka, kb);
00262 }
00263 }
00264 else {
00265 for (k = 0; k < pm->nlevels; k++) {
00266 NL_Msmgrid_double *qh = &(pm->qh[k]);
00267 int ia = qh->i0;
00268 int ib = ia + qh->ni - 1;
00269 int ja = qh->j0;
00270 int jb = ja + qh->nj - 1;
00271 int ka = qh->k0;
00272 int kb = ka + qh->nk - 1;
00273 printf("# level= %d: [%d..%d] x [%d..%d] x [%d..%d]\n",
00274 k, ia, ib, ja, jb, ka, kb);
00275 }
00276 }
00277 return NL_MSM_SUCCESS;
00278 }
00279
00280
00281 int setup_cell_vectors(NL_Msm *pm) {
00282 double *u = pm->cellvec1;
00283 double *v = pm->cellvec2;
00284 double *w = pm->cellvec3;
00285 double *bu = pm->recipvec1;
00286 double *bv = pm->recipvec2;
00287 double *bw = pm->recipvec3;
00288 double c[3], s;
00289
00290 c[X] = v[Y]*w[Z] - v[Z]*w[Y];
00291 c[Y] = v[Z]*w[X] - v[X]*w[Z];
00292 c[Z] = v[X]*w[Y] - v[Y]*w[X];
00293 s = 1 / (u[X]*c[X] + u[Y]*c[Y] + u[Z]*c[Z]);
00294 bu[X] = s*c[X];
00295 bu[Y] = s*c[Y];
00296 bu[Z] = s*c[Z];
00297
00298 c[X] = w[Y]*u[Z] - w[Z]*u[Y];
00299 c[Y] = w[Z]*u[X] - w[X]*u[Z];
00300 c[Z] = w[X]*u[Y] - w[Y]*u[X];
00301 s = 1 / (v[X]*c[X] + v[Y]*c[Y] + v[Z]*c[Z]);
00302 bv[X] = s*c[X];
00303 bv[Y] = s*c[Y];
00304 bv[Z] = s*c[Z];
00305
00306 c[X] = u[Y]*v[Z] - u[Z]*v[Y];
00307 c[Y] = u[Z]*v[X] - u[X]*v[Z];
00308 c[Z] = u[X]*v[Y] - u[Y]*v[X];
00309 s = 1 / (w[X]*c[X] + w[Y]*c[Y] + w[Z]*c[Z]);
00310 bw[X] = s*c[X];
00311 bw[Y] = s*c[Y];
00312 bw[Z] = s*c[Z];
00313
00314 return NL_MSM_SUCCESS;
00315 }
00316
00317
00318 int setup_bins_1away(NL_Msm *pm) {
00319 const double *u = pm->cellvec1;
00320 const double *v = pm->cellvec2;
00321 const double *w = pm->cellvec3;
00322 double *ru = pm->recipvec1;
00323 double *rv = pm->recipvec2;
00324 double *rw = pm->recipvec3;
00325 double p[3];
00326 double pulen, pvlen, pwlen, s;
00327 double cutoff = pm->a;
00328 int nbx, nby, nbz, numbins;
00329 int ispx = ((pm->msmflags & NL_MSM_PERIODIC_VEC1) != 0);
00330 int ispy = ((pm->msmflags & NL_MSM_PERIODIC_VEC2) != 0);
00331 int ispz = ((pm->msmflags & NL_MSM_PERIODIC_VEC3) != 0);
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342 p[X] = v[Y]*w[Z] - v[Z]*w[Y];
00343 p[Y] = v[Z]*w[X] - v[X]*w[Z];
00344 p[Z] = v[X]*w[Y] - v[Y]*w[X];
00345
00346 s = (u[X]*p[X] + u[Y]*p[Y] + u[Z]*p[Z]) / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00347 p[X] *= s;
00348 p[Y] *= s;
00349 p[Z] *= s;
00350 pulen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00351
00352 p[X] = w[Y]*u[Z] - w[Z]*u[Y];
00353 p[Y] = w[Z]*u[X] - w[X]*u[Z];
00354 p[Z] = w[X]*u[Y] - w[Y]*u[X];
00355
00356 s = (v[X]*p[X] + v[Y]*p[Y] + v[Z]*p[Z]) / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00357 p[X] *= s;
00358 p[Y] *= s;
00359 p[Z] *= s;
00360 pvlen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00361
00362 p[X] = u[Y]*v[Z] - u[Z]*v[Y];
00363 p[Y] = u[Z]*v[X] - u[X]*v[Z];
00364 p[Z] = u[X]*v[Y] - u[Y]*v[X];
00365
00366 s = (w[X]*p[X] + w[Y]*p[Y] + w[Z]*p[Z]) / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00367 p[X] *= s;
00368 p[Y] *= s;
00369 p[Z] *= s;
00370 pwlen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00371
00372
00373
00374 nbx = (ispx ? (int) floor(pulen / cutoff) : (int) ceil(pulen / cutoff));
00375 nby = (ispy ? (int) floor(pvlen / cutoff) : (int) ceil(pvlen / cutoff));
00376 nbz = (ispz ? (int) floor(pwlen / cutoff) : (int) ceil(pwlen / cutoff));
00377 if (nbx == 0 || nby == 0 || nbz == 0) {
00378 return NL_MSM_ERROR_PARAM;
00379 }
00380 numbins = nbx * nby * nbz;
00381
00382
00383
00384 if (pm->maxbins < numbins) {
00385 void *vptr = malloc(numbins * sizeof(int));
00386 if (vptr == NULL) return NL_MSM_ERROR_MALLOC;
00387 pm->bin = (int *) vptr;
00388 pm->maxbins = numbins;
00389 }
00390 pm->nbx = nbx;
00391 pm->nby = nby;
00392 pm->nbz = nbz;
00393 pm->numbins = numbins;
00394
00395
00396
00397
00398 if (ispx == 0) {
00399 s = pulen / (nbx * cutoff);
00400 ru[X] *= s;
00401 ru[Y] *= s;
00402 ru[Z] *= s;
00403 }
00404 if (ispy == 0) {
00405 s = pvlen / (nby * cutoff);
00406 rv[X] *= s;
00407 rv[Y] *= s;
00408 rv[Z] *= s;
00409 }
00410 if (ispz == 0) {
00411 s = pwlen / (nbz * cutoff);
00412 rw[X] *= s;
00413 rw[Y] *= s;
00414 rw[Z] *= s;
00415 }
00416 return NL_MSM_SUCCESS;
00417 }
00418
00419
00420 int setup_bins_k_away(NL_Msm *pm) {
00421 double *u = pm->cellvec1;
00422 double *v = pm->cellvec2;
00423 double *w = pm->cellvec3;
00424 double *ru = pm->recipvec1;
00425 double *rv = pm->recipvec2;
00426 double *rw = pm->recipvec3;
00427 double *bu = pm->bu;
00428 double *bv = pm->bv;
00429 double *bw = pm->bw;
00430 double p[3];
00431 double pulen, pvlen, pwlen, s;
00432 double cutoff = pm->a;
00433 int nbx, nby, nbz, numbins;
00434 int ispx = ((pm->msmflags & NL_MSM_PERIODIC_VEC1) != 0);
00435 int ispy = ((pm->msmflags & NL_MSM_PERIODIC_VEC2) != 0);
00436 int ispz = ((pm->msmflags & NL_MSM_PERIODIC_VEC3) != 0);
00437 int nbrhdmax, nradius, ndiameter, index, i, j, k;
00438 double volume;
00439 double binvol, abincnt, binlen, c;
00440 double min2;
00441
00442
00443
00444 p[X] = v[Y]*w[Z] - v[Z]*w[Y];
00445 p[Y] = v[Z]*w[X] - v[X]*w[Z];
00446 p[Z] = v[X]*w[Y] - v[Y]*w[X];
00447
00448 s = (u[X]*p[X] + u[Y]*p[Y] + u[Z]*p[Z]) / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00449 p[X] *= s;
00450 p[Y] *= s;
00451 p[Z] *= s;
00452 pulen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00453
00454 p[X] = w[Y]*u[Z] - w[Z]*u[Y];
00455 p[Y] = w[Z]*u[X] - w[X]*u[Z];
00456 p[Z] = w[X]*u[Y] - w[Y]*u[X];
00457
00458 s = (v[X]*p[X] + v[Y]*p[Y] + v[Z]*p[Z]) / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00459 p[X] *= s;
00460 p[Y] *= s;
00461 p[Z] *= s;
00462 pvlen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00463
00464 p[X] = u[Y]*v[Z] - u[Z]*v[Y];
00465 p[Y] = u[Z]*v[X] - u[X]*v[Z];
00466 p[Z] = u[X]*v[Y] - u[Y]*v[X];
00467 volume = w[X]*p[X] + w[Y]*p[Y] + w[Z]*p[Z];
00468
00469 s = volume / (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00470 p[X] *= s;
00471 p[Y] *= s;
00472 p[Z] *= s;
00473 pwlen = sqrt(p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00474
00475 if ((ispx && cutoff > pulen) ||
00476 (ispy && cutoff > pvlen) ||
00477 (ispz && cutoff > pwlen)) {
00478 printf("Cutoff %.3g too big for cell along basis%s%s%s\n",
00479 cutoff, (cutoff > pulen ? " x" : ""), (cutoff > pvlen ? " y" : ""),
00480 (cutoff > pwlen ? " z" : ""));
00481 return NL_MSM_ERROR_PARAM;
00482 }
00483
00484
00485
00486 binvol = pm->binfill * pm->nbinslots / pm->density;
00487 abincnt = volume / binvol;
00488 binlen = pow(binvol, 1./3);
00489
00490
00491
00492 c = pow(abincnt / (pulen*pvlen*pwlen), 1./3);
00493 nbx = (int) ceil(c * pulen);
00494 nby = (int) ceil(c * pvlen);
00495 nbz = (int) ceil(c * pwlen);
00496 numbins = nbx * nby * nbz;
00497
00498 printf("nbx=%d nby=%d nbz=%d numbins=%d\n", nbx, nby, nbz, numbins);
00499
00500
00501 if (pm->maxbins < numbins) {
00502 void *vptr = malloc(numbins * sizeof(int));
00503 if (vptr == NULL) return NL_MSM_ERROR_MALLOC;
00504 pm->bin = (int *) vptr;
00505 pm->maxbins = numbins;
00506 }
00507 pm->nbx = nbx;
00508 pm->nby = nby;
00509 pm->nbz = nbz;
00510 pm->numbins = numbins;
00511
00512
00513 if ( ! ispx) {
00514 s = pulen / (nbx * binlen);
00515 ru[X] *= s;
00516 ru[Y] *= s;
00517 ru[Z] *= s;
00518 s = 1./s;
00519 u[X] *= s;
00520 u[Y] *= s;
00521 u[Z] *= s;
00522 }
00523 if ( ! ispy) {
00524 s = pvlen / (nby * binlen);
00525 rv[X] *= s;
00526 rv[Y] *= s;
00527 rv[Z] *= s;
00528 s = 1./s;
00529 v[X] *= s;
00530 v[Y] *= s;
00531 v[Z] *= s;
00532 }
00533 if ( ! ispz) {
00534 s = pwlen / (nbz * binlen);
00535 rw[X] *= s;
00536 rw[Y] *= s;
00537 rw[Z] *= s;
00538 s = 1./s;
00539 w[X] *= s;
00540 w[Y] *= s;
00541 w[Z] *= s;
00542 }
00543
00544
00545 s = 1./nbx;
00546 bu[X] = s * u[X];
00547 bu[Y] = s * u[Y];
00548 bu[Z] = s * u[Z];
00549 s = 1./nby;
00550 bv[X] = s * v[X];
00551 bv[Y] = s * v[Y];
00552 bv[Z] = s * v[Z];
00553 s = 1./nbz;
00554 bw[X] = s * w[X];
00555 bw[Y] = s * w[Y];
00556 bw[Z] = s * w[Z];
00557
00558
00559
00560
00561 min2 = (bu[X]*bu[X] + bu[Y]*bu[Y] + bu[Z]*bu[Z]);
00562 s = (bv[X]*bv[X] + bv[Y]*bv[Y] + bv[Z]*bv[Z]);
00563 if (min2 > s) min2 = s;
00564 s = (bw[X]*bw[X] + bw[Y]*bw[Y] + bw[Z]*bw[Z]);
00565 if (min2 > s) min2 = s;
00566
00567
00568 p[X] = bu[X] + bv[X] + bw[X];
00569 p[Y] = bu[Y] + bv[Y] + bw[Y];
00570 p[Z] = bu[Z] + bv[Z] + bw[Z];
00571 s = (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00572 if (min2 > s) min2 = s;
00573 p[X] = -bu[X] + bv[X] + bw[X];
00574 p[Y] = -bu[Y] + bv[Y] + bw[Y];
00575 p[Z] = -bu[Z] + bv[Z] + bw[Z];
00576 s = (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00577 if (min2 > s) min2 = s;
00578 p[X] = bu[X] - bv[X] + bw[X];
00579 p[Y] = bu[Y] - bv[Y] + bw[Y];
00580 p[Z] = bu[Z] - bv[Z] + bw[Z];
00581 s = (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00582 if (min2 > s) min2 = s;
00583 p[X] = bu[X] + bv[X] - bw[X];
00584 p[Y] = bu[Y] + bv[Y] - bw[Y];
00585 p[Z] = bu[Z] + bv[Z] - bw[Z];
00586 s = (p[X]*p[X] + p[Y]*p[Y] + p[Z]*p[Z]);
00587 if (min2 > s) min2 = s;
00588
00589
00590
00591 nradius = (int) ceil(sqrt(cutoff*cutoff / min2));
00592 ndiameter = 2 * nradius + 1;
00593 nbrhdmax = (ndiameter * ndiameter * ndiameter) / 2 + 1;
00594
00595 #if 0
00596 printf("Neighborhood radius = %d\n", nradius);
00597 printf("min2 = %g\n", min2);
00598 #endif
00599
00600
00601 if (pm->nbrhd) free(pm->nbrhd);
00602 pm->nbrhd = (int *) calloc(3*nbrhdmax, sizeof(int));
00603 if (pm->nbrhd == NULL) return NL_MSM_ERROR_MALLOC;
00604 pm->nbrhdmax = nbrhdmax;
00605 pm->nradius = nradius;
00606
00607
00608 index = 0;
00609 for (k = -nradius; k <= nradius; k++) {
00610 for (j = -nradius; j <= nradius; j++) {
00611 for (i = -nradius; i <= nradius; i++) {
00612
00613 int ip, jp, kp, iq, jq, kq;
00614 int is_within = 0;
00615 int binindex = (k * ndiameter + j) * ndiameter + i;
00616 if (binindex < 0) continue;
00617 for (kp = 0; kp <= 1; kp++) {
00618 for (jp = 0; jp <= 1; jp++) {
00619 for (ip = 0; ip <= 1; ip++) {
00620 p[X] = (i+ip)*bu[X] + (j+jp)*bv[X] + (k+kp)*bw[X];
00621 p[Y] = (i+ip)*bu[Y] + (j+jp)*bv[Y] + (k+kp)*bw[Y];
00622 p[Z] = (i+ip)*bu[Z] + (j+jp)*bv[Z] + (k+kp)*bw[Z];
00623 for (kq = 0; kq <= 1; kq++) {
00624 for (jq = 0; jq <= 1; jq++) {
00625 for (iq = 0; iq <= 1; iq++) {
00626 double q[3];
00627 double dx, dy, dz, r2;
00628 q[X] = iq*bu[X] + jq*bv[X] + kq*bw[X];
00629 q[Y] = iq*bu[Y] + jq*bv[Y] + kq*bw[Y];
00630 q[Z] = iq*bu[Z] + jq*bv[Z] + kq*bw[Z];
00631 dx = p[X] - q[X];
00632 dy = p[Y] - q[Y];
00633 dz = p[Z] - q[Z];
00634 r2 = dx*dx + dy*dy + dz*dz;
00635 is_within |= (r2 < cutoff*cutoff);
00636 }
00637 }
00638 }
00639
00640 }
00641 }
00642 }
00643 if (is_within) {
00644 pm->nbrhd[index++] = i;
00645 pm->nbrhd[index++] = j;
00646 pm->nbrhd[index++] = k;
00647 }
00648
00649 }
00650 }
00651 }
00652 pm->nbrhdlen = index / 3;
00653
00654 return NL_MSM_SUCCESS;
00655 }
00656
00657
00658 int setup_hgrid_1d(
00659 NL_Msm *pm,
00660 double len,
00661 double *hh,
00662 int *nn,
00663 int *aindex,
00664 int *bindex,
00665 int isperiodic
00666 ) {
00667
00668 const int nu = INTERP_PARAMS[pm->approx].nu;
00669
00670 ASSERT(hmax > 0);
00671 if (isperiodic) {
00672 const double hmin = (4./5) * pm->gridspacing;
00673 const double hmax = 1.5 * hmin;
00674 double h = len;
00675 int n = 1;
00676 while (h >= hmax) {
00677 h *= 0.5;
00678 n <<= 1;
00679 }
00680 if (h < hmin) {
00681 if (n < 4) {
00682 return NL_MSM_ERROR_PARAM;
00683 }
00684 h *= (4./3);
00685 n >>= 2;
00686 n *= 3;
00687 }
00688
00689
00690 *hh = h;
00691 *nn = n;
00692 *aindex = 0;
00693 *bindex = n-1;
00694 }
00695 else {
00696 double h = pm->gridspacing;
00697 int n = (int) floorf(len / h) + 1;
00698 *hh = h;
00699 *nn = n;
00700 *aindex = -nu;
00701 *bindex = n + nu;
00702 }
00703 return NL_MSM_SUCCESS;
00704 }
00705
00706
00707 int setup_grids(NL_Msm *pm) {
00708 const int nu = INTERP_PARAMS[pm->approx].nu;
00709 const int omega = INTERP_PARAMS[pm->approx].omega;
00710 const int split = pm->split;
00711 const int ispx = (pm->msmflags & NL_MSM_PERIODIC_VEC1);
00712 const int ispy = (pm->msmflags & NL_MSM_PERIODIC_VEC2);
00713 const int ispz = (pm->msmflags & NL_MSM_PERIODIC_VEC3);
00714 const int ispany = (pm->msmflags & NL_MSM_PERIODIC_ALL);
00715
00716 const int issprec = (pm->msmflags & NL_MSM_COMPUTE_SPREC);
00717
00718 const double xlen = pm->cellvec1[0];
00719 const double ylen = pm->cellvec2[1];
00720 const double zlen = pm->cellvec3[2];
00721
00722 const double a = pm->a;
00723 double hx, hy, hz;
00724 double scaling;
00725 double d;
00726
00727 NL_Msmgrid_double *p = NULL;
00728 NL_Msmgrid_float *p_f = NULL;
00729 int ia, ib, ja, jb, ka, kb, ni, nj, nk;
00730 int nx, ny, nz;
00731
00732 int i, j, k, n;
00733 int index;
00734 int level, toplevel, nlevels, maxlevels;
00735 int lastnelems = 1;
00736 int isclamped = 0;
00737 int done, alldone;
00738
00739 int rc = 0;
00740
00741 rc = setup_hgrid_1d(pm, xlen, &hx, &nx, &ia, &ib, ispx);
00742 if (rc) return rc;
00743
00744 rc = setup_hgrid_1d(pm, ylen, &hy, &ny, &ja, &jb, ispy);
00745 if (rc) return rc;
00746
00747 rc = setup_hgrid_1d(pm, zlen, &hz, &nz, &ka, &kb, ispz);
00748 if (rc) return rc;
00749
00750 pm->hx = hx;
00751 pm->hy = hy;
00752 pm->hz = hz;
00753
00754
00755 pm->gx = pm->cellcenter[0] - ((nx >> 1) * hx);
00756 pm->gy = pm->cellcenter[1] - ((ny >> 1) * hy);
00757 pm->gz = pm->cellcenter[2] - ((nz >> 1) * hz);
00758
00759 pm->nx = nx;
00760 pm->ny = ny;
00761 pm->nz = nz;
00762
00763 ni = ib - ia + 1;
00764 nj = jb - ja + 1;
00765 nk = kb - ka + 1;
00766
00767
00768 n = (nk > omega ? nk : omega);
00769 if (pm->max_lzd < n) {
00770 if (issprec) {
00771 float *t;
00772 t = (float *) realloc(pm->lzd, n * sizeof(float));
00773 if (NULL == t) return NL_MSM_ERROR_MALLOC;
00774 pm->lzd_f = t;
00775 }
00776 else {
00777 double *t;
00778 t = (double *) realloc(pm->lzd, n * sizeof(double));
00779 if (NULL == t) return NL_MSM_ERROR_MALLOC;
00780 pm->lzd = t;
00781 }
00782 pm->max_lzd = n;
00783 }
00784 n *= (nj > omega ? nj : omega);
00785 if (pm->max_lyzd < n) {
00786 if (issprec) {
00787 float *t;
00788 t = (float *) realloc(pm->lyzd, n * sizeof(float));
00789 if (NULL == t) return NL_MSM_ERROR_MALLOC;
00790 pm->lyzd_f = t;
00791 }
00792 else {
00793 double *t;
00794 t = (double *) realloc(pm->lyzd, n * sizeof(double));
00795 if (NULL == t) return NL_MSM_ERROR_MALLOC;
00796 pm->lyzd = t;
00797 }
00798 pm->max_lyzd = n;
00799 }
00800
00801 nlevels = pm->nlevels;
00802 if (nlevels <= 0) {
00803
00804 n = ni;
00805 if (n < nj) n = nj;
00806 if (n < nk) n = nk;
00807 for (maxlevels = 1; n > 0; n >>= 1) maxlevels++;
00808 nlevels = maxlevels;
00809 if (ispany == 0) {
00810 int omega3 = omega * omega * omega;
00811 int nhalf = (int) sqrtf(ni*nj*nk);
00812 lastnelems = (nhalf > omega3 ? nhalf : omega3);
00813 isclamped = 1;
00814 }
00815 }
00816 else {
00817
00818 maxlevels = nlevels;
00819 }
00820
00821
00822 if (pm->maxlevels < maxlevels) {
00823 void *vqh, *veh, *vgc;
00824 if (issprec) {
00825 vqh = realloc(pm->qh, maxlevels * sizeof(NL_Msmgrid_float));
00826 if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
00827 veh = realloc(pm->eh, maxlevels * sizeof(NL_Msmgrid_float));
00828 if (NULL == veh) return NL_MSM_ERROR_MALLOC;
00829 vgc = realloc(pm->gc, maxlevels * sizeof(NL_Msmgrid_float));
00830 if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
00831 pm->qh_f = (NL_Msmgrid_float *) vqh;
00832 pm->eh_f = (NL_Msmgrid_float *) veh;
00833 pm->gc_f = (NL_Msmgrid_float *) vgc;
00834
00835 for (level = pm->maxlevels; level < maxlevels; level++) {
00836 GRID_INIT( &(pm->qh_f[level]) );
00837 GRID_INIT( &(pm->eh_f[level]) );
00838 GRID_INIT( &(pm->gc_f[level]) );
00839 }
00840 }
00841 else {
00842 vqh = realloc(pm->qh, maxlevels * sizeof(NL_Msmgrid_double));
00843 if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
00844 veh = realloc(pm->eh, maxlevels * sizeof(NL_Msmgrid_double));
00845 if (NULL == veh) return NL_MSM_ERROR_MALLOC;
00846 vgc = realloc(pm->gc, maxlevels * sizeof(NL_Msmgrid_double));
00847 if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
00848 pm->qh = (NL_Msmgrid_double *) vqh;
00849 pm->eh = (NL_Msmgrid_double *) veh;
00850 pm->gc = (NL_Msmgrid_double *) vgc;
00851
00852 for (level = pm->maxlevels; level < maxlevels; level++) {
00853 GRID_INIT( &(pm->qh[level]) );
00854 GRID_INIT( &(pm->eh[level]) );
00855 GRID_INIT( &(pm->gc[level]) );
00856 }
00857 }
00858 pm->maxlevels = maxlevels;
00859 }
00860
00861 level = 0;
00862 done = 0;
00863 alldone = 0;
00864 do {
00865 if (issprec) {
00866 GRID_RESIZE( &(pm->qh_f[level]), float, ia, ni, ja, nj, ka, nk);
00867 GRID_RESIZE( &(pm->eh_f[level]), float, ia, ni, ja, nj, ka, nk);
00868 }
00869 else {
00870 GRID_RESIZE( &(pm->qh[level]), double, ia, ni, ja, nj, ka, nk);
00871 GRID_RESIZE( &(pm->eh[level]), double, ia, ni, ja, nj, ka, nk);
00872 }
00873
00874 if (++level == nlevels) done |= 0x07;
00875
00876 alldone = (done == 0x07);
00877
00878 if (isclamped) {
00879 int nelems = ni * nj * nk;
00880 if (nelems <= lastnelems) done |= 0x07;
00881 }
00882
00883 if (ispx) {
00884 ni >>= 1;
00885 ib = ni-1;
00886 if (ni & 1) done |= 0x07;
00887 else if (ni == 2) done |= 0x01;
00888 }
00889 else {
00890 ia = -((-ia+1)/2) - nu;
00891 ib = (ib+1)/2 + nu;
00892 ni = ib - ia + 1;
00893 if (ni <= omega) done |= 0x01;
00894 }
00895
00896 if (ispy) {
00897 nj >>= 1;
00898 jb = nj-1;
00899 if (nj & 1) done |= 0x07;
00900 else if (nj == 2) done |= 0x02;
00901 }
00902 else {
00903 ja = -((-ja+1)/2) - nu;
00904 jb = (jb+1)/2 + nu;
00905 nj = jb - ja + 1;
00906 if (nj <= omega) done |= 0x02;
00907 }
00908
00909 if (ispz) {
00910 nk >>= 1;
00911 kb = nk-1;
00912 if (nk & 1) done |= 0x07;
00913 else if (nk == 2) done |= 0x04;
00914 }
00915 else {
00916 ka = -((-ka+1)/2) - nu;
00917 kb = (kb+1)/2 + nu;
00918 nk = kb - ka + 1;
00919 if (nk <= omega) done |= 0x04;
00920 }
00921
00922 } while ( ! alldone );
00923 pm->nlevels = level;
00924
00925 toplevel = (ispany ? pm->nlevels : pm->nlevels - 1);
00926
00927
00928 ni = (int) ceil(2*a/hx) - 1;
00929 nj = (int) ceil(2*a/hy) - 1;
00930 nk = (int) ceil(2*a/hz) - 1;
00931 scaling = 1;
00932 for (level = 0; level < toplevel; level++) {
00933 if (issprec) {
00934 p_f = &(pm->gc_f[level]);
00935 GRID_RESIZE(p_f, float, -ni, 2*ni+1, -nj, 2*nj+1, -nk, 2*nk+1);
00936 }
00937 else {
00938 p = &(pm->gc[level]);
00939 GRID_RESIZE(p, double, -ni, 2*ni+1, -nj, 2*nj+1, -nk, 2*nk+1);
00940 }
00941
00942 if (0 == level) {
00943 index = 0;
00944 for (k = -nk; k <= nk; k++) {
00945 for (j = -nj; j <= nj; j++) {
00946 for (i = -ni; i <= ni; i++) {
00947 double s, t, gs, gt, g;
00948 s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
00949 t = 0.5 * s;
00950 if (t >= 1) {
00951 g = 0;
00952 }
00953 else if (s >= 1) {
00954 gs = 1/s;
00955 SPOLY(>, &d, t, split);
00956 g = (gs - 0.5 * gt) / a;
00957 }
00958 else {
00959 SPOLY(&gs, &d, s, split);
00960 SPOLY(>, &d, t, split);
00961 g = (gs - 0.5 * gt) / a;
00962 }
00963 if (issprec) {
00964 GRID_INDEX_CHECK(p_f, i, j, k);
00965 ASSERT(p_f->buffer + index == p_f->data + GRID_INDEX(p_f,i,j,k));
00966 p_f->buffer[index] = (float) g;
00967 }
00968 else {
00969 GRID_INDEX_CHECK(p, i, j, k);
00970 ASSERT( p->buffer + index == p->data + GRID_INDEX(p, i, j, k) );
00971 p->buffer[index] = g;
00972 }
00973 index++;
00974 }
00975 }
00976 }
00977 }
00978 else {
00979
00980 if (issprec) {
00981 const NL_Msmgrid_float *first = &(pm->gc_f[0]);
00982 index = 0;
00983 for (k = -nk; k <= nk; k++) {
00984 for (j = -nj; j <= nj; j++) {
00985 for (i = -ni; i <= ni; i++) {
00986 GRID_INDEX_CHECK(p_f, i, j, k);
00987 ASSERT(p_f->buffer + index == p_f->data + GRID_INDEX(p_f,i,j,k));
00988 p_f->buffer[index] = (float) (scaling * first->buffer[index]);
00989 index++;
00990 }
00991 }
00992 }
00993 }
00994 else {
00995 const NL_Msmgrid_double *first = &(pm->gc[0]);
00996 index = 0;
00997 for (k = -nk; k <= nk; k++) {
00998 for (j = -nj; j <= nj; j++) {
00999 for (i = -ni; i <= ni; i++) {
01000 GRID_INDEX_CHECK(p, i, j, k);
01001 ASSERT( p->buffer + index == p->data + GRID_INDEX(p, i, j, k) );
01002 p->buffer[index] = scaling * first->buffer[index];
01003 index++;
01004 }
01005 }
01006 }
01007 }
01008 }
01009 scaling *= 0.5;
01010 }
01011
01012 if (toplevel < pm->nlevels) {
01013
01014
01015 if (issprec) {
01016 const NL_Msmgrid_float *qhtop_f = &(pm->qh_f[toplevel]);
01017 ni = qhtop_f->ni - 1;
01018 nj = qhtop_f->nj - 1;
01019 nk = qhtop_f->nk - 1;
01020 }
01021 else {
01022 const NL_Msmgrid_double *qhtop = &(pm->qh[toplevel]);
01023 ni = qhtop->ni - 1;
01024 nj = qhtop->nj - 1;
01025 nk = qhtop->nk - 1;
01026 }
01027 if (issprec) {
01028 p_f = &(pm->gc_f[toplevel]);
01029 GRID_RESIZE(p_f, float, -ni, 2*ni+1, -nj, 2*nj+1, -nk, 2*nk+1);
01030 }
01031 else {
01032 p = &(pm->gc[toplevel]);
01033 GRID_RESIZE(p, double, -ni, 2*ni+1, -nj, 2*nj+1, -nk, 2*nk+1);
01034 }
01035 index = 0;
01036 for (k = -nk; k <= nk; k++) {
01037 for (j = -nj; j <= nj; j++) {
01038 for (i = -ni; i <= ni; i++) {
01039 double s, gs;
01040 s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
01041 if (s >= 1) {
01042 gs = 1/s;
01043 }
01044 else {
01045 SPOLY(&gs, &d, s, split);
01046 }
01047 if (issprec) {
01048 GRID_INDEX_CHECK(p_f, i, j, k);
01049 ASSERT(p_f->buffer + index == p_f->data + GRID_INDEX(p_f,i,j,k));
01050 p_f->buffer[index] = (float) (scaling * gs/a);
01051 }
01052 else {
01053 GRID_INDEX_CHECK(p, i, j, k);
01054 ASSERT( p->buffer + index == p->data + GRID_INDEX(p, i, j, k) );
01055 p->buffer[index] = scaling * gs/a;
01056 }
01057 index++;
01058 }
01059 }
01060 }
01061 }
01062
01063
01064 if (1) {
01065 double s, gs;
01066 s = 0;
01067 SPOLY(&gs, &d, s, split);
01068 if (issprec) {
01069 pm->gzero_f = (float) (gs/a);
01070 }
01071 else {
01072 pm->gzero = gs/a;
01073 }
01074 }
01075
01076 return NL_MSM_SUCCESS;
01077 }