/*
 * Copyright (C) 2004-2005 by David J. Hardy.  All rights reserved.
 *
 * pquint1.c
 */

#include <stdio.h>
#include "mgrid/split.h"
/* #define DEBUG_WATCH */
#include "debug/debug.h"

#define NELEMS(a)  (sizeof(a)/sizeof((a)[0]))

/* prototypes of internal routines */
static void done(Mgrid *mg);
static int force(Mgrid *mg, MgridSystem *sys);
static int atoms_to_grid(Mgrid *mg, const MgridSystem *sys);
static int grid_to_atoms(Mgrid *mg, MgridSystem *sys);
static int restrict_grid(Mgrid *mg, int32 level);
static int prolong_grid(Mgrid *mg, int32 level);
static int direct_sum(Mgrid *mg, int32 level);


int mgrid_pquintic1_setup(Mgrid *mg, const MgridSystem *sys)
{
#if 0
  const double *q = sys->charge;

  /* omit top level when not debugging */
#ifdef DEBUG_SUPPORT
  const int32 nlevels = mg->param.nlevels;
#else
  const int32 nlevels = mg->param.nlevels - 1;
#endif

  const int32 natoms = mg->param.natoms;
  const int32 split = mg->param.split;
  double *gd;
  double r2, s, t, gs, gt;
  int32 i, j, k, n;
  int32 gdim, index;
  const double a = mg->param.cutoff;
  const double h = mg->param.spacing;
  const double h_1 = 1./h;
#endif

  /* assign virtual methods */
  mg->long_force = force;
  mg->long_done = done;

  return mgrid_setup_longrange(mg, sys);

#if 0
  /* set inverse of grid spacing */
  mg->inv_spacing = h_1;

  /* set location of multi-grids origin with lattice label (0,0,0) */
  mg->origin.x = mg->param.center.x - 0.5 * mg->param.length;
  mg->origin.y = mg->param.center.y - 0.5 * mg->param.length;
  mg->origin.z = mg->param.center.z - 0.5 * mg->param.length;

#ifdef DEBUG_WATCH
  printf("origin = (%g, %g, %g)\n", mg->origin.x, mg->origin.y, mg->origin.z);
#endif

  /* allocate arrays of charge and potential lattices */
  mg->qgrid = (MgridLattice *) malloc(nlevels * sizeof(MgridLattice));
  mg->egrid = (MgridLattice *) malloc(nlevels * sizeof(MgridLattice));
  if (mg->qgrid == NULL || mg->egrid == NULL) return MGRID_FAIL;

  /* setup charge and potential lattices for each level */
  n = mg->param.nspacings;
  for (k = 0;  k < nlevels;  k++, n >>= 1) {
    if (n == 0 || ((n & 1) && n != 1)) return MGRID_FAIL;
    if (mgrid_lattice_init(&(mg->qgrid[k]))
        || mgrid_lattice_init(&(mg->egrid[k]))) {
      return MGRID_FAIL;
    }
    if (mgrid_lattice_setup(&(mg->qgrid[k]), sizeof(double),
          0, n-1, 0, n-1, 0, n-1)
        || mgrid_lattice_setup(&(mg->egrid[k]), sizeof(double),
          0, n-1, 0, n-1, 0, n-1)) {
      return MGRID_FAIL;
    }
  }
#ifdef DEBUG_SUPPORT
  ASSERT(n == 0);  /* i.e. top level has 1 grid point */
#else
  if (n != 1) return MGRID_FAIL;  /* i.e. top level omitted */
#endif

  /* allocate and compute scalings for each direct sum on each level */
  mg->scaling = (double *) malloc(nlevels * sizeof(double));
  if (mg->scaling == NULL) return MGRID_FAIL;
  mg->scaling[0] = 1.0;
  for (k = 1;  k < nlevels;  k++) {
    mg->scaling[k] = 0.5 * mg->scaling[k-1];
  }

  /* allocate space for direct sum weights */
  n = (int32) (2.0 * a * h_1);
  mg->gdsum_radius = n;
  if (mgrid_lattice_setup(&(mg->gdsum), sizeof(double), -n, n, -n, n, -n, n)) {
    return MGRID_FAIL;
  }

  /* compute direct sum weights */
  if (mgrid_lattice_zero(&(mg->gdsum))) {
    return MGRID_FAIL;
  }
  gdim = mg->gdsum.ni;
  ASSERT(gdim == 2 * n + 1);
  gd = (double *)(mg->gdsum.data);
  for (k = -n;  k <= n;  k++) {
    for (j = -n;  j <= n;  j++) {
      for (i = -n;  i <= n;  i++) {
        r2 = (i*i +  j*j + k*k) * h*h;
        if (r2 >= 4*a*a) continue;
        s = r2 / (a*a);
        t = r2 / (4*a*a);
        ASSERT(t <= s);
        index = (k * gdim + j) * gdim + i;
        ASSERT(&gd[index] == mgrid_lattice_elem(&(mg->gdsum), i, j, k));
        if (s <= 1) gamma(&gs, s, split);
        else gs = 1/sqrt(s);
        if (t < 1) {
          gamma(&gt, t, split);
          gd[index] = gs/a - gt/(2*a);
        }
        else {
          /* then gt=1/sqrt(t), which implies that gs/a - gt(2*a) == 0 */
          gd[index] = 0;
        }
      }
    }
  }

#if 0
  /* allocate space for direct sum weights on last level */
  n = mg->qgrid[nlevels-1].ni - 1;
  mg->glast_radius = n;
  if (mgrid_lattice_setup((&mg->glast), sizeof(double), -n, n, -n, n, -n, n)) {
    return MGRID_FAIL;
  }

  /* compute direct sum weights on last level */
  if (mgrid_lattice_zero(&(mg->glast))) {
    return MGRID_FAIL;
  }
  gdim = mg->glast.ni;
  ASSERT(gdim == 2 * n + 1);
  gd = (double *)(mg->glast.data);
  for (k = -n;  k <= n;  k++) {
    for (j = -n;  j <= n;  j++) {
      for (i = -n;  i <= n;  i++) {
        r2 = (i*i + j*j + k*k) * h*h;
        s = r2 / (a*a);
        index = (k * gdim + j) * gdim + i;
        ASSERT(&gd[index] == mgrid_lattice_elem(&(mg->glast), i, j, k));
        /* unlike previous, this sum is not truncated */
        if (s <= 1) gamma(&gs, s, split);
        else gs = 1/sqrt(s);
        gd[index] = gs/a;
      }
    }
  }
#endif

  /* compute g_a(0) */
  s = 0;
  gamma(&gs, s, split);
  mg->g_zero = gs/a;

  /* compute self potential */
  s = 0;
  for (n = 0;  n < natoms;  n++) {
    s += q[n] * q[n];
  }
  mg->u_self = 0.5 * mg->g_zero * s;

  return 0;
#endif
}


void done(Mgrid *mg)
{
  /* omit top level when not debugging */
#ifdef DEBUG_SUPPORT
  const int32 nlevels = mg->param.nlevels;
#else
  const int32 nlevels = mg->param.nlevels - 1;
#endif
  int32 n;

  mgrid_lattice_done(&(mg->gdsum));
  mgrid_lattice_done(&(mg->glast));
  for (n = 0;  n < nlevels;  n++) {
    mgrid_lattice_done(&(mg->qgrid[n]));
    mgrid_lattice_done(&(mg->egrid[n]));
  }
  free(mg->qgrid);
  free(mg->egrid);
  free(mg->scaling);
}


int force(Mgrid *mg, MgridSystem *sys)
{
  /* omit top level when not debugging */
#ifdef DEBUG_SUPPORT
  const int32 nlevels = mg->param.nlevels;
#else
  const int32 nlevels = mg->param.nlevels - 1;
#endif
  int32 n;

  /* first charge grid must be zeroed */
  if (mgrid_lattice_zero(&(mg->qgrid[0]))) return MGRID_FAIL;

  /* computation is "inverse V-cycle" */
  if (atoms_to_grid(mg, sys)) return MGRID_FAIL;
  INT(nlevels);
  for (n = 0;  n < nlevels - 1;  n++) {
    if (direct_sum(mg, n)) return MGRID_FAIL;
    if (restrict_grid(mg, n)) return MGRID_FAIL;
  }
  INT(n);
  if (direct_sum(mg, n)) return MGRID_FAIL;
  for (--n;  n >= 0;  --n) {
    if (prolong_grid(mg, n)) return MGRID_FAIL;
  }
  if (grid_to_atoms(mg, sys)) return MGRID_FAIL;

  return 0;
}


int atoms_to_grid(Mgrid *mg, const MgridSystem *sys)
{
  double xphi[6], yphi[6], zphi[6];    /* phi grid func along x, y, z */
  double dx_h, dy_h, dz_h;             /* distance between atom and gridlo */
  double t;                            /* param to phi function */
  double ck, cjk;
  const double h_1 = mg->inv_spacing;  /* inverse grid spacing */
  const MD_Dvec origin = mg->origin;   /* lowest corner of domain */
  const double *q = sys->charge;       /* atom charges */
  const MD_Dvec *p = sys->pos;         /* atom positions */

  double *qh = (double *)(mg->qgrid[0].data);
      /* treat charge grid as 3D-lattice, index (i,j,k) */

  const int32 ndim = mg->qgrid[0].ni;  /* number points in each dimension */
  const int32 natoms = mg->param.natoms;  /* number of atoms */
  const int32 mask = ndim - 1;         /* indexing mask for periodic grids */
  int32 n, i, j, k, ilo, jlo, klo;
  int32 index, koff, jkoff;

  for (n = 0;  n < natoms;  n++) {

    /* distance between atom and corner measured in grid points */
    dx_h = (p[n].x - origin.x) * h_1;
    dy_h = (p[n].y - origin.y) * h_1;
    dz_h = (p[n].z - origin.z) * h_1;

    /* find smallest numbered grid point in stencil */
    ilo = ((int32) dx_h) - 2;
    jlo = ((int32) dy_h) - 2;
    klo = ((int32) dz_h) - 2;

    /* find t for x dimension and compute xphi */
    t = dx_h - (double) ilo;
    xphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    t--;
    xphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    t--;
    xphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    t--;
    xphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    t--;
    xphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    t--;
    xphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);

    /* find t for y dimension and compute yphi */
    t = dy_h - (double) jlo;
    yphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    t--;
    yphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    t--;
    yphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    t--;
    yphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    t--;
    yphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    t--;
    yphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);

    /* find t for z dimension and compute zphi */
    t = dz_h - (double) klo;
    zphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    t--;
    zphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    t--;
    zphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    t--;
    zphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    t--;
    zphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    t--;
    zphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);

    /* determine charge on 64=4*4*4 qh grid point stencil */
    for (k = 0;  k < 6;  k++) {
      koff = ((k + klo) & mask) * ndim;
      ck = zphi[k] * q[n];
      for (j = 0;  j < 6;  j++) {
        jkoff = (koff + ((j + jlo) & mask)) * ndim;
        cjk = yphi[j] * ck;
        for (i = 0;  i < 6;  i++) {
          index = jkoff + ((i + ilo) & mask);
          ASSERT(&qh[index]
              == mgrid_lattice_elem(&(mg->qgrid[0]),
                (i+ilo) & mask, (j+jlo) & mask, (k+klo) & mask));
          qh[index] += xphi[i] * cjk;
        }
      }
    }

  } /* end loop over n */

#ifdef DEBUG_WATCH
  mgrid_lattice_print(&(mg->qgrid[0]));
#endif

  return 0;
}


int grid_to_atoms(Mgrid *mg, MgridSystem *sys)
{
  double xphi[6], yphi[6], zphi[6];     /* phi grid func along x, y, z */
  double dxphi[6], dyphi[6], dzphi[6];  /* dphi grid func along x, y, z */
  double dx_h, dy_h, dz_h;              /* distance between atom and gridlo */
  double t;                             /* param to phi and dphi functions */
  MD_Dvec c, fv;
  double u;
  const double u_self = mg->u_self;     /* self potential */
  const double h_1 = mg->inv_spacing;   /* inverse grid spacing */
  const MD_Dvec origin = mg->origin;    /* lowest corner of domain */
  const double *q = sys->charge;        /* atom charges */
  const MD_Dvec *p = sys->pos;          /* atom positions */
  MD_Dvec *f = (sys->f_long ? sys->f_long : sys->f_elec);
      /* accumulate contributions to atom force in f_long if allocated */

  double *eh = (double *)(mg->egrid[0].data);
      /* treat potential grid as 3D-lattice, index (i,j,k) */

  const double *evh = (double *)(mg->egrid[0].databuffer);
  const double *qvh = (double *)(mg->qgrid[0].databuffer);
      /* treat potential and charge grids as 1D arrays, index n */

  const int32 nelems = mg->egrid[0].nelems;  /* total number of grid points */
  const int32 ndim = mg->egrid[0].ni;    /* number points in each dimension */
  const int32 natoms = mg->param.natoms;     /* number of atoms */
  const int32 mask = ndim - 1;          /* indexing mask for periodic grids */

  int32 n, i, j, k, ilo, jlo, klo;
  int32 index, koff, jkoff;

  /* compute force */
  for (n = 0;  n < natoms;  n++) {

    /* distance between atom and corner measured in grid points */
    dx_h = (p[n].x - origin.x) * h_1;
    dy_h = (p[n].y - origin.y) * h_1;
    dz_h = (p[n].z - origin.z) * h_1;

    /* find largest numbered grid point in stencil */
    ilo = ((int32) dx_h) - 2;
    jlo = ((int32) dy_h) - 2;
    klo = ((int32) dz_h) - 2;

    /* find t for x dimension, compute xphi and dxphi */ 
    t = dx_h - (double) ilo;
    xphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    dxphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
          + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
    t--;
    xphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    dxphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))
        + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1;
    t--;
    xphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    dxphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t))
        + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1;
    t--;
    xphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    dxphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t))
        - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1;
    t--;
    xphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    dxphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t))
        - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1;
    t--;
    xphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
    dxphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
          + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;

    /* find t for y dimension, compute yphi and dyphi */
    t = dy_h - (double) jlo;
    yphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    dyphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
          + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
    t--;
    yphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    dyphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))
        + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1;
    t--;
    yphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    dyphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t))
        + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1;
    t--;
    yphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    dyphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t))
        - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1;
    t--;
    yphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    dyphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t))
        - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1;
    t--;
    yphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
    dyphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
          + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;

    /* find t for z dimension, compute zphi and dzphi */
    t = dz_h - (double) klo;
    zphi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
    dzphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
          + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
    t--;
    zphi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t));
    dzphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))
        + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1;
    t--;
    zphi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t));
    dzphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t))
        + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1;
    t--;
    zphi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t));
    dzphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t))
        - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1;
    t--;
    zphi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t));
    dzphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t))
        - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1;
    t--;
    zphi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
    dzphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
          + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;

    /* determine 64=4*4*4 eh grid stencil contribution to force */
    fv.x = fv.y = fv.z = 0;
    for (k = 0;  k < 6;  k++) {
      koff = ((k + klo) & mask) * ndim;
      for (j = 0;  j < 6;  j++) {
        jkoff = (koff + ((j + jlo) & mask)) * ndim;
        c.x = yphi[j] * zphi[k];
        c.y = dyphi[j] * zphi[k];
        c.z = yphi[j] * dzphi[k];
        for (i = 0;  i < 6;  i++) {
          index = jkoff + ((i + ilo) & mask);
          ASSERT(&eh[index]
              == mgrid_lattice_elem(&(mg->egrid[0]),
                (i+ilo) & mask, (j+jlo) & mask, (k+klo) & mask));
          fv.x += eh[index] * dxphi[i] * c.x;
          fv.y += eh[index] * xphi[i] * c.y;
          fv.z += eh[index] * xphi[i] * c.z;
        }
      }
    }

    /* update force */
    f[n].x -= q[n] * fv.x;
    f[n].y -= q[n] * fv.y;
    f[n].z -= q[n] * fv.z;

  } /* end loop over n */

  /* compute potential */
  u = 0.0;
  for (n = 0;  n < nelems;  n++) {
    u += qvh[n] * evh[n];
  }

  /* must subtract self potential, save long range potential, add to total */
  sys->u_long = (0.5 * u) - u_self;
  sys->u_elec += sys->u_long;

  /* if separate long range force array, add to total force */
  if (sys->f_long != NULL) {
    for (n = 0;  n < natoms;  n++) {
      sys->f_elec[n].x += sys->f_long[n].x;
      sys->f_elec[n].y += sys->f_long[n].y;
      sys->f_elec[n].z += sys->f_long[n].z;
    }
  }

  return 0;
}



/*
 * constants for grid transfer operations
 * cubic numerical Hermite interpolant
 */

/* length of stencil */
enum { nstencil = 7 };

/* phi interpolating function along one dimension of grid stencil */
static const double phi[nstencil] = { 3./256, -25./256, 75./128, 1,
  75./128, -25./256, 3./256 };

/* stencil offsets from a central grid point on a finer grid level */
/* (these offsets are where phi weights above have been evaluated) */
static const int32 offset[nstencil] = { -5, -3, -1, 0, 1, 3, 5 };


int restrict_grid(Mgrid *mg, int32 level)
{
  double q2h_sum;
  double cjk;

  /* lattices of charge */
  const double *qh = (double *)(mg->qgrid[level].data);  /* finer grid */
  double *q2h = (double *)(mg->qgrid[level+1].data);     /* coarser grid */

  /* finer grid - grid size and labeling identical in each dimension */
  const int32 ndim1 = mg->qgrid[level].ni;    /* number points along dim */
  const int32 mask = ndim1 - 1;        /* indexing mask for periodic grids */

  /* coarser grid - grid size and labeling identical in each dimension */
  const int32 ndim2 = mg->qgrid[level+1].ni;  /* number points along dim */

  /* other variables */
  int32 i1, j1, k1, index1, jk1off, k1off;
  int32 i2, j2, k2, index2, jk2off, k2off;
  int32 i, j, k;

  /* loop over coarser grid points */
  for (k2 = 0;  k2 < ndim2;  k2++) {
    k2off = k2 * ndim2;  /* coarser grid index offset for k-coord */
    k1 = k2 * 2;         /* k-coord of same-space point on finer grid */
    for (j2 = 0;  j2 < ndim2;  j2++) {
      jk2off = (k2off + j2) * ndim2;  /* add offset for j-coord coarser */
      j1 = j2 * 2;                    /* j-coord same-space finer grid */
      for (i2 = 0;  i2 < ndim2;  i2++) {
        index2 = jk2off + i2;         /* index in coarser grid */
        i1 = i2 * 2;                  /* i-coord same-space finer grid */

        /* sum weighted charge contribution from finer grid stencil */
        q2h_sum = 0.0;
        for (k = 0;  k < nstencil;  k++) {
          /* offset k-coord finer grid */
          k1off = ((k1 + offset[k]) & mask) * ndim1;
          for (j = 0;  j < nstencil;  j++) {
            /* add offset j */
            jk1off = (k1off + ((j1 + offset[j]) & mask)) * ndim1;
            cjk = phi[j] * phi[k];              /* mult weights in each dim */
            for (i = 0;  i < nstencil;  i++) {
              /* index in finer grid */
              index1 = jk1off + ((i1 + offset[i]) & mask);
              ASSERT(&qh[index1]
                  == mgrid_lattice_elem(&(mg->qgrid[level]),
                    (i1 + offset[i]) & mask, (j1 + offset[j]) & mask,
                    (k1 + offset[k]) & mask));
              q2h_sum += qh[index1] * phi[i] * cjk;  /* sum weighted charge */
            }
          }
        } /* end loop over finer grid stencil */

        ASSERT(&q2h[index2]
            == mgrid_lattice_elem(&(mg->qgrid[level+1]), i2, j2, k2));
        q2h[index2] = q2h_sum;  /* store charge to coarser grid */

      }
    }
  } /* end loop over each coarser grid point */

  return 0;
}


int prolong_grid(Mgrid *mg, int32 level)
{
  double cjk;

  /* lattices of potential */
  double *eh = (double *)(mg->egrid[level].data);           /* finer grid */
  const double *e2h = (double *)(mg->egrid[level+1].data);  /* coarser grid */

  /* finer grid - grid size and labeling identical in each dimension */
  const int32 ndim1 = mg->egrid[level].ni;    /* number points along dim */
  const int32 mask = ndim1 - 1;        /* indexing mask for periodic grids */

  /* coarser grid - grid size and labeling identical in each dimension */
  const int32 ndim2 = mg->egrid[level+1].ni;  /* number points along dim */

  /* other variables */
  int32 i1, j1, k1, index1, jk1off, k1off;
  int32 i2, j2, k2, index2, jk2off, k2off;
  int32 i, j, k;

  /* loop over coarser grid points */
  for (k2 = 0;  k2 < ndim2;  k2++) {
    k2off = k2 * ndim2;  /* coarser grid index offset for k-coord */
    k1 = k2 * 2;         /* k-coord of same-space point on finer grid */
    for (j2 = 0;  j2 < ndim2;  j2++) {
      jk2off = (k2off + j2) * ndim2;  /* add offset for j-coord coarser */
      j1 = j2 * 2;                    /* j-coord same-space finer grid */
      for (i2 = 0;  i2 < ndim2;  i2++) {
        index2 = jk2off + i2;         /* index in coarser grid */
        i1 = i2 * 2;                  /* i-coord same-space finer grid */

        /* sum weighted potential contribution to finer grid stencil */
        for (k = 0;  k < nstencil;  k++) {
          /* offset k-coord finer grid */
          k1off = ((k1 + offset[k]) & mask) * ndim1;
          for (j = 0;  j < nstencil;  j++) {
            /* add offset j */
            jk1off = (k1off + ((j1 + offset[j]) & mask)) * ndim1;
            cjk = phi[j] * phi[k];              /* mult weights in each dim */
            for (i = 0;  i < nstencil;  i++) {
              /* index in finer grid */
              index1 = jk1off + ((i1 + offset[i]) & mask);
              ASSERT(&eh[index1]
                  == mgrid_lattice_elem(&(mg->egrid[level]),
                    (i1 + offset[i]) & mask, (j1 + offset[j]) & mask,
                    (k1 + offset[k]) & mask));
              ASSERT(&e2h[index2]
                  == mgrid_lattice_elem(&(mg->egrid[level+1]), i2, j2, k2));
              /* sum weighted potential from coarser grid */
              eh[index1] += e2h[index2] * phi[i] * cjk;
            }
          }
        } /* end loop over finer grid stencil */

      }
    }
  } /* end loop over each coarser grid point */

  return 0;
}


int direct_sum(Mgrid *mg, int32 level)
{
  double eh_sum;
  double scaling = mg->scaling[level];     /* scaling factor for gd */

  /* lattices of charge and potential */
  const double *qh = (double *)(mg->qgrid[level].data);
  double *eh = (double *)(mg->egrid[level].data);
  const int32 ndim = mg->qgrid[level].ni;  /* number points in each dim */
  const int32 mask = ndim - 1;        /* indexing mask for periodic grids */

  /*
   * for last level use different block of weights that
   * produces pairwise interactions between all grid points
   */

  /* lattice of weights for short range charge influence on potential */
  const double *gd = (double *)(mg->gdsum.data);

  /* dimension of gd lattice */
  const int32 ngdim = mg->gdsum.ni;

  /* "radius" of gd "sphere" */
  const int32 ngrad = mg->gdsum_radius;

  int32 i, j, k, ig, jg, kg;
  int32 index, jkoff, koff;
  int32 nindex, jknoff, knoff;
  int32 ngindex, jkgoff, kgoff;

#ifdef DEBUG_SUPPORT
  if (ndim == 1) {
    ASSERT(level == mg->param.nlevels - 1);
    printf("#++ top level charge:  %g\n", qh[0]);
    eh[0] = 0;
    return 0;
  }
#endif

  PTR(mg->glast.data);
  PTR(gd);

  INT(mg->glast.ni);
  INT(ngdim);

  INT(mg->glast_radius);
  INT(ngrad);

#ifdef DEBUG_WATCH
  mgrid_lattice_print(&(mg->glast));
#endif

  /* loop over all grid points */
  for (k = 0;  k < ndim;  k++) {
    /* grid index offset for k-coord */
    koff = k * ndim;

    for (j = 0;  j < ndim;  j++) {
      /* add offset for j-coord */
      jkoff = (koff + j) * ndim;

      for (i = 0;  i < ndim;  i++) {
        /* index for potential grid point */
        index = jkoff + i;

        /* sum over "sphere" of weighted charge */
        eh_sum = 0.0;
        for (kg = -ngrad;  kg <= ngrad;  kg++) {
          /* grid index offset for k-coord */
          knoff = ((k + kg) & mask) * ndim;
          kgoff = kg * ngdim;       /* gd index offset for k-coord */

          for (jg = -ngrad;  jg <= ngrad;  jg++) {
            /* add offset for j-coord */
            jknoff = (knoff + ((j + jg) & mask)) * ndim;
            jkgoff = (kgoff + jg) * ngdim;       /* gd add offset j-coord */

            for (ig = -ngrad;  ig <= ngrad;  ig++) {
              /* index of charge contribution */
              nindex = jknoff + ((i + ig) & mask);
              ngindex = jkgoff + ig;       /* index of gd weighting factor */
              ASSERT(&qh[nindex]
                  == mgrid_lattice_elem(&(mg->qgrid[level]),
                    (i + ig) & mask, (j + jg) & mask, (k + kg) & mask));
              ASSERT(&gd[ngindex]
                  == mgrid_lattice_elem(&(mg->gdsum), ig, jg, kg));
              eh_sum += qh[nindex] * gd[ngindex];  /* sum weighted charge */
            }
          }
        } /* end loop over "sphere" of charge */

        ASSERT(&eh[index]
            == mgrid_lattice_elem(&(mg->egrid[level]), i, j, k));
        eh[index] = scaling * eh_sum;  /* scale and store potential */
      }
    }
  } /* end loop over all grid points */

  INT(level);
#ifdef DEBUG_WATCH
  mgrid_lattice_print(&(mg->egrid[level]));
#endif

  return 0;
}
