/*
 * Copyright (C) 2004-2005 by David J. Hardy.  All rights reserved.
 *
 * nonbond.c
 *
 * Routines to evaluate cutoff nonbonded forces.
 */

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "force/force.h"
#include "debug/debug.h"


/* index offsets into van der Waals parameter table */
enum {
  A = 0,
  B = 1,
  A_14 = 2,
  B_14 = 3
};


static double compute_elec(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double c);

static double compute_elec_smooth(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double c);

static double compute_vdw(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double a, double b);

static double compute_vdw_switch(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double a, double b);

static int geometric_hash(Force *f, const MD_Dvec *pos, MD_Dvec *wrap);

static int interactions(Force *f, const MD_Dvec *pos, const MD_Dvec *wrap);

static int direct(Force *f, const MD_Dvec *pos, const MD_Dvec *wrap);

static int exclusions(Force *f, const MD_Dvec *pos);


int force_compute_nonbonded(Force *f, const MD_Dvec *pos, MD_Dvec *wrap)
{
  MD_Dvec *f_elec = f->result->f_elec;
  MD_Dvec *f_vdw = f->result->f_vdw;
  MD_Dvec *f_total = f->result->f;
  const int32 atom_len = f->param->atom_len;
  const int32 flags = f->param->flags;
  int32 k;

  ASSERT(flags & FORCE_NONBONDED);
  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  if ((flags & (FORCE_ELEC | FORCE_ELEC_EXCL | FORCE_ELEC_DIRECT))
      == FORCE_ELEC
      || (flags & (FORCE_VDW | FORCE_VDW_EXCL | FORCE_VDW_DIRECT))
      == FORCE_VDW) {
    ASSERT(f->param->cutoff > 0.0);
    /* geometric hashing of atoms into cells */
    if (geometric_hash(f, pos, wrap)) return FORCE_FAIL;
    /* accumulate pairwise interactions using cutoff */
    if (interactions(f, pos, wrap)) return FORCE_FAIL;
  }

  if (flags & FORCE_DIRECT) {
    /* direct all pairs computation */
    if (direct(f, pos, wrap)) return FORCE_FAIL;
  }

  if (flags & FORCE_EXCL) {
    /* subtract excluded interactions */
    if (exclusions(f, pos)) return FORCE_FAIL;
  }

  if ((flags & FORCE_ELEC) && f_elec) {
    /* accumulate electrostatic to total force array */
    for (k = 0;  k < atom_len;  k++) {
      f_total[k].x += f_elec[k].x;
      f_total[k].y += f_elec[k].y;
      f_total[k].z += f_elec[k].z;
    }
  }

  if ((flags & FORCE_VDW) && f_vdw) {
    /* accumulate van der Waals to total force array */
    for (k = 0;  k < atom_len;  k++) {
      f_total[k].x += f_vdw[k].x;
      f_total[k].y += f_vdw[k].y;
      f_total[k].z += f_vdw[k].z;
    }
  }

  return 0;
}


int geometric_hash(Force *f, const MD_Dvec *pos, MD_Dvec *wrap)
{
  MD_Dvec center = f->param->center;
  MD_Dvec lo = f->lo;
  const double cutoff = f->param->cutoff;
  const double inv_cutoff = f->inv_cutoff;
  double xlen = f->param->xlen;
  double ylen = f->param->ylen;
  double zlen = f->param->zlen;
  double inv_xcellsize = f->inv_xcellsize;
  double inv_ycellsize = f->inv_ycellsize;
  double inv_zcellsize = f->inv_zcellsize;
  double xmin, xmax;
  double ymin, ymax;
  double zmin, zmax;
  int32 *next = f->next;
  ForceCell *cell = f->cell;
  int32 ncells = f->ncells;
  int32 nxcells = f->nxcells;
  int32 nycells = f->nycells;
  int32 nzcells = f->nzcells;
  const int32 flags = f->param->flags;
  const int32 natoms = f->param->atom_len;
  const int32 is_resize = f->is_resize;
  const int32 is_xresize = f->is_xresize;
  const int32 is_yresize = f->is_yresize;
  const int32 is_zresize = f->is_zresize;
  int32 i, j, k, n, index;

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  /* routine should not be called if we are only subtracting exclusions */
  ASSERT((flags & (FORCE_ELEC | FORCE_ELEC_EXCL)) == FORCE_ELEC
      || (flags & (FORCE_VDW | FORCE_VDW_EXCL)) == FORCE_VDW);

  if (is_resize && (flags & FORCE_DIRECT) != FORCE_DIRECT) {
    if (is_xresize) {
      xmin = xmax = pos[0].x;
      for (n = 1;  n < natoms;  n++) {
        if (xmin > pos[n].x) xmin = pos[n].x;
        else if (xmax < pos[n].x) xmax = pos[n].x;
      }
      nxcells = ((int32) ((xmax - xmin) * inv_cutoff)) + 1;
      center.x = 0.5 * (xmax - xmin);
    }

    if (is_yresize) {
      ymin = ymax = pos[0].y;
      for (n = 1;  n < natoms;  n++) {
        if (ymin > pos[n].y) ymin = pos[n].y;
        else if (ymax < pos[n].y) ymax = pos[n].y;
      }
      nycells = ((int32) ((ymax - ymin) * inv_cutoff)) + 1;
      center.y = 0.5 * (ymax - ymin);
    }

    if (is_zresize) {
      zmin = zmax = pos[0].z;
      for (n = 1;  n < natoms;  n++) {
        if (zmin > pos[n].z) zmin = pos[n].z;
        else if (zmax < pos[n].z) zmax = pos[n].z;
      }
      nzcells = ((int32) ((zmax - zmin) * inv_cutoff)) + 1;
      center.z = 0.5 * (zmax - zmin);
    }

    ncells = nxcells * nycells * nzcells;
    if ((ncells >= 2 * f->ncells /* && ncells <= natoms */)
        || ncells < f->ncells / 64) {
      void *v = realloc(f->cell, ncells * sizeof(ForceCell));
      if (v == NULL) return FORCE_FAIL;
      cell = f->cell = (ForceCell *) v;
      f->ncells = ncells;
      f->nxcells = nxcells;
      f->nycells = nycells;
      f->nzcells = nzcells;
      if (force_setup_nonbonded_cells(f)) return FORCE_FAIL;
    }
    else {
      ncells = f->ncells;
      nxcells = f->nxcells;
      nycells = f->nycells;
      nzcells = f->nzcells;
    }

    if (is_xresize) {
      ASSERT(nxcells > 0);
      xlen = nxcells * cutoff;
      lo.x = center.x - 0.5 * xlen;
      ASSERT(inv_xcellsize == inv_cutoff);
    }

    if (is_yresize) {
      ASSERT(nycells > 0);
      ylen = nycells * cutoff;
      lo.y = center.y - 0.5 * ylen;
      ASSERT(inv_ycellsize == inv_cutoff);
    }

    if (is_zresize) {
      ASSERT(nzcells > 0);
      zlen = nzcells * cutoff;
      lo.z = center.z - 0.5 * zlen;
      ASSERT(inv_zcellsize == inv_cutoff);
    }
  }

  /* clear cells */
  for (n = 0;  n < ncells;  n++) {
    cell[n].head = -1;
    cell[n].cnt = 0;
  }

  /* place each atom in a cell */
  for (n = 0;  n < natoms;  n++) {

    /* determine i cell index */
    ASSERT(inv_xcellsize > 0.0 || (flags & FORCE_DIRECT) == FORCE_DIRECT);
    i = (int32) floor((pos[n].x + wrap[n].x - lo.x) * inv_xcellsize);
#if 0
    i = (int32) (((pos[n].x + wrap[n].x - lo.x) * inv_xcellsize) + nxcells);
    i -= nxcells;
#endif
    if (flags & FORCE_X_PERIODIC) {
      ASSERT(!is_xresize);
      /* periodic in x - set wrap for this atom coordinate */
      if (i < 0) {
        do { wrap[n].x += xlen;  i += nxcells; } while (i < 0);
        ASSERT(pos[n].x + wrap[n].x >= lo.x);
      }
      else if (i >= nxcells) {
        do { wrap[n].x -= xlen;  i -= nxcells; } while (i >= nxcells);
        ASSERT(pos[n].x + wrap[n].x < lo.x + xlen);
      }
    }
    else {
      /* nonperiodic in x - edge cells have infinite extent */
      ASSERT(wrap[n].x == 0.0);
      if (i < 0) i = 0;
      else if (i >= nxcells) i = nxcells - 1;
    }

    /* determine j cell index */
    ASSERT(inv_ycellsize > 0.0 || (flags & FORCE_DIRECT) == FORCE_DIRECT);
    j = (int32) floor((pos[n].y + wrap[n].y - lo.y) * inv_ycellsize);
#if 0
    j = (int32) (((pos[n].y + wrap[n].y - lo.y) * inv_ycellsize) + nycells);
    j -= nycells;
#endif
    if (flags & FORCE_Y_PERIODIC) {
      ASSERT(!is_yresize);
      /* periodic in y - set wrap for this atom coordinate */
      if (j < 0) {
        do { wrap[n].y += ylen;  j += nycells; } while (j < 0);
        ASSERT(pos[n].y + wrap[n].y >= lo.y);
      }
      else if (j >= nycells) {
        do { wrap[n].y -= ylen;  j -= nycells; } while (j >= nycells);
        ASSERT(pos[n].y + wrap[n].y < lo.y + ylen);
      }
    }
    else {
      /* nonperiodic in y - edge cells have infinite extent */
      ASSERT(wrap[n].y == 0.0);
      if (j < 0) j = 0;
      else if (j >= nycells) j = nycells - 1;
    }

    /* determine k cell index */
    ASSERT(inv_zcellsize > 0.0 || (flags & FORCE_DIRECT) == FORCE_DIRECT);
    k = (int32) floor((pos[n].z + wrap[n].z - lo.z) * inv_zcellsize);
#if 0
    k = (int32) (((pos[n].z + wrap[n].z - lo.z) * inv_zcellsize) + nzcells);
    k -= nzcells;
#endif

#if 0
if (n == 149) {
  printf("pos = %g\n", pos[n].z);
  printf("wrap = %g\n", wrap[n].z);
  printf("lo = %g\n", lo.z);
  printf("pos + wrap - lo = %g\n", pos[n].z + wrap[n].z - lo.z);
  printf("inv_zcellsize = %g\n", inv_zcellsize);
  printf("(pos + wrap - lo) * inv_zcellsize = %g\n",
      (pos[n].z + wrap[n].z - lo.z) * inv_zcellsize);
  printf("(int32) ((pos + wrap - lo) * inv_zcellsize) = %d\n",
      (int32) ((pos[n].z + wrap[n].z - lo.z) * inv_zcellsize));
}
#endif

    if (flags & FORCE_Z_PERIODIC) {
      ASSERT(!is_zresize);
      /* periodic in z - set wrap for this atom coordinate */
      if (k < 0) {
        do { wrap[n].z += zlen;  k += nzcells; } while (k < 0);
        ASSERT(pos[n].z + wrap[n].z >= lo.z);
      }
      else if (k >= nzcells) {
        do { wrap[n].z -= zlen;  k -= nzcells; } while (k >= nzcells);
        ASSERT(pos[n].z + wrap[n].z < lo.z + zlen);
      }
    }
    else {
      /* nonperiodic in z - edge cells have infinite extent */
      ASSERT(wrap[n].z == 0.0);
      if (k < 0) k = 0;
      else if (k >= nzcells) k = nzcells - 1;
    }

    /* insert atom into front of (i,j,k)th cell list */
    ASSERT(i >= 0 && i < nxcells);
    ASSERT(j >= 0 && j < nycells);
    ASSERT(k >= 0 && k < nzcells);
    index = (k * nycells + j) * nxcells + i;
    ASSERT(index >= 0 && index < ncells);
    next[n] = cell[index].head;
    cell[index].head = n;
    cell[index].cnt++;
  }

  return 0;
}


int interactions(Force *f, const MD_Dvec *pos, const MD_Dvec *wrap)
{
  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij, offset;
  const double scaling14 = f->param->scaling14;
  const double elec_const = f->elec_const;
  double elec_cutoff2 = f->elec_cutoff2;
  double vdw_cutoff2 = f->vdw_cutoff2;
  double a, b, c, c_elec, r2, u_elec, u_vdw;
  MD_Dvec *f_elec = (f->result->f_elec ? f->result->f_elec : f->result->f);
  MD_Dvec *f_vdw = (f->result->f_vdw ? f->result->f_vdw : f->result->f);
  const double *vdwtable = f->vdwtable;
  const double *entry;
  int32 **excl_list = f->excl_list;
  int32 **scaled14_list = f->scaled14_list;
  const MD_Atom *atom = f->param->atom;
  const int32 atomprm_len = f->param->atomprm_len;
  const int32 flags = f->param->flags;
  const int32 elec_flags = (flags
      & (FORCE_ELEC | FORCE_SMOOTH | FORCE_ELEC_EXCL));
  const int32 vdw_flags = (flags
      & (FORCE_VDW | FORCE_SWITCH | FORCE_VDW_EXCL));
  const int32 *next = f->next;
  int32 *excl;
  const ForceCell *cell = f->cell;
  const int32 ncells = f->ncells;
  int32 i, j, k, n;
  int32 nnbrs, ihead, jhead;
  int32 is_scaled14 = 0;

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  /* routine should not be called if we are only subtracting exclusions */
  ASSERT((flags & (FORCE_ELEC | FORCE_ELEC_EXCL)) == FORCE_ELEC
      || (flags & (FORCE_VDW | FORCE_VDW_EXCL)) == FORCE_VDW);

  /* routine should not be called if only direct computation */
  ASSERT((flags & FORCE_DIRECT) != FORCE_DIRECT);

  /* reset energy accumulation variables */
  u_elec = 0.0;
  u_vdw = 0.0;

  /* reset cutoff to zero if subtracting exclusions or direct computation */
  if (flags & (FORCE_ELEC_EXCL | FORCE_ELEC_DIRECT)) {
    elec_cutoff2 = 0.0;
  }
  else if (flags & (FORCE_VDW_EXCL | FORCE_VDW_DIRECT)) {
    vdw_cutoff2 = 0.0;
  }

  /* loop over cells */
  for (n = 0;  n < ncells;  n++) {
    nnbrs = cell[n].nnbrs;
    jhead = cell[n].head;

    /* loop over all of this cell's neighbors */
    for (k = 0;  k < nnbrs;  k++) {
      offset = cell[n].offset[k];
      ihead = cell[ cell[n].nbr[k] ].head;

      /* loop over all pairs of atoms */
      for (j = jhead;  j != -1;  j = next[j]) {

        /* subtracting offset from p[j] is same as adding it to p[i] */
        pj.x = (pos[j].x + wrap[j].x) - offset.x;
        pj.y = (pos[j].y + wrap[j].y) - offset.y;
        pj.z = (pos[j].z + wrap[j].z) - offset.z;

        /* accumulate into local storage for efficiency */
        f_elec_j.x = 0.0;
        f_elec_j.y = 0.0;
        f_elec_j.z = 0.0;
        f_vdw_j.x = 0.0;
        f_vdw_j.y = 0.0;
        f_vdw_j.z = 0.0;
        c_elec = elec_const * atom[j].q;

        /* 0th neighbor cell is self referential, must modify ihead */
        if (k == 0) ihead = next[j];

        for (i = ihead;  i != -1;  i = next[i]) {
          /* check to see if this pair is excluded */
          if (excl_list) {
            for (excl = excl_list[i];  *excl < j;  excl++) ;
            if (j == *excl) continue;
          }

          /* check to see if this is scaled 1-4 interaction */
          if (scaled14_list) {
            for (excl = scaled14_list[i];  *excl < j;  excl++) ;
            is_scaled14 = (j == *excl);
          }

          /* r_ij is vector from atom i to atom j */
          r_ij.x = pj.x - (pos[i].x + wrap[i].x);
          r_ij.y = pj.y - (pos[i].y + wrap[i].y);
          r_ij.z = pj.z - (pos[i].z + wrap[i].z);

          /* r2 is square of pairwise distance */
          r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

          /* deal with electrostatics if within cutoff */
          if (r2 < elec_cutoff2) {
            ASSERT(i != j);
            c = c_elec * atom[i].q;
            if (is_scaled14) c *= scaling14;
            switch (elec_flags) {
              case FORCE_ELEC:
                u_elec += compute_elec(f, &f_ij, &r_ij, r2, c);
                break;
              case (FORCE_ELEC | FORCE_SMOOTH):
                u_elec += compute_elec_smooth(f, &f_ij, &r_ij, r2, c);
                break;
            }
            f_elec[i].x += f_ij.x;
            f_elec[i].y += f_ij.y;
            f_elec[i].z += f_ij.z;
            f_elec_j.x -= f_ij.x;
            f_elec_j.y -= f_ij.y;
            f_elec_j.z -= f_ij.z;
          }

          /* deal with van der Waals if within cutoff */
          if (r2 < vdw_cutoff2) {
            ASSERT(i != j);
            entry = vdwtable + 4 * (atom[i].prm * atomprm_len + atom[j].prm);
            if (is_scaled14) {
              a = entry[A_14];
              b = entry[B_14];
            }
            else {
              a = entry[A];
              b = entry[B];
            }
            switch (vdw_flags) {
              case FORCE_VDW:
                u_vdw += compute_vdw(f, &f_ij, &r_ij, r2, a, b);
                break;
              case (FORCE_VDW | FORCE_SWITCH):
                u_vdw += compute_vdw_switch(f, &f_ij, &r_ij, r2, a, b);
                break;
            }
            f_vdw[i].x += f_ij.x;
            f_vdw[i].y += f_ij.y;
            f_vdw[i].z += f_ij.z;
            f_vdw_j.x -= f_ij.x;
            f_vdw_j.y -= f_ij.y;
            f_vdw_j.z -= f_ij.z;
          }

        } /* end i-loop over atoms in neighbor cell */

        /* add accumulated force into array */
        f_elec[j].x += f_elec_j.x;
        f_elec[j].y += f_elec_j.y;
        f_elec[j].z += f_elec_j.z;
        f_vdw[j].x += f_vdw_j.x;
        f_vdw[j].y += f_vdw_j.y;
        f_vdw[j].z += f_vdw_j.z;

      } /* end j-loop over atoms in this cell */

    } /* end k-loop over cell neighbors */

  } /* end n-loop over all cells */

  /* accumulate energy */
  f->energy->elec += u_elec;
  f->energy->vdw += u_vdw;

  return 0;
}


int direct(Force *f, const MD_Dvec *pos, const MD_Dvec *wrap)
{
  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij;
  const double scaling14 = f->param->scaling14;
  const double elec_const = f->elec_const;
  const double xlen = f->param->xlen;
  const double ylen = f->param->ylen;
  const double zlen = f->param->zlen;
  double a, b, c, c_elec, r2, u_elec, u_vdw;
  MD_Dvec *f_elec = (f->result->f_elec ? f->result->f_elec : f->result->f);
  MD_Dvec *f_vdw = (f->result->f_vdw ? f->result->f_vdw : f->result->f);
  const double *vdwtable = f->vdwtable;
  const double *entry;
  int32 **excl_list = f->excl_list;
  int32 **scaled14_list = f->scaled14_list;
  int32 *excl;
  const MD_Atom *atom = f->param->atom;
  const int32 atomprm_len = f->param->atomprm_len;
  const int32 natoms = f->param->atom_len;
  const int32 flags = f->param->flags;
  int32 i, j;
  int32 is_scaled14 = 0;

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  ASSERT(flags & FORCE_DIRECT);

  /* reset energy accumulation variables */
  u_elec = 0.0;
  u_vdw = 0.0;

  /* loop over all atoms */
  for (j = 0;  j < natoms;  j++) {

    /* use local storage over inner loop for efficiency */
    pj.x = pos[j].x + wrap[j].x;
    pj.y = pos[j].y + wrap[j].y;
    pj.z = pos[j].z + wrap[j].z;

    /* accumulate into local storage for efficiency */
    f_elec_j.x = 0.0;
    f_elec_j.y = 0.0;
    f_elec_j.z = 0.0;
    f_vdw_j.x = 0.0;
    f_vdw_j.y = 0.0;
    f_vdw_j.z = 0.0;
    c_elec = elec_const * atom[j].q;

    /* loop over remaining atoms */
    for (i = j + 1;  i < natoms;  i++) {

      /* check to see if this pair is excluded */
      if (excl_list) {
        for (excl = excl_list[i];  *excl < j;  excl++) ;
        if (j == *excl) continue;
      }

      /* check to see if this is scaled 1-4 interaction */
      if (scaled14_list) {
        for (excl = scaled14_list[i];  *excl < j;  excl++) ;
        is_scaled14 = (j == *excl);
      }

      /* r_ij is vector from atom i to atom j */
      r_ij.x = pj.x - (pos[i].x + wrap[i].x);
      r_ij.y = pj.y - (pos[i].y + wrap[i].y);
      r_ij.z = pj.z - (pos[i].z + wrap[i].z);

      /* for periodicity, use nearest image convention */
      if (flags & FORCE_X_PERIODIC) {
        if (r_ij.x > 0.5 * xlen) r_ij.x -= xlen;
        else if (r_ij.x < -0.5 * xlen) r_ij.x += xlen;
      }
      if (flags & FORCE_Y_PERIODIC) {
        if (r_ij.y > 0.5 * ylen) r_ij.y -= ylen;
        else if (r_ij.y < -0.5 * ylen) r_ij.y += ylen;
      }
      if (flags & FORCE_Z_PERIODIC) {
        if (r_ij.z > 0.5 * zlen) r_ij.z -= zlen;
        else if (r_ij.z < -0.5 * zlen) r_ij.z += zlen;
      }

      /* r2 is square of pairwise distance */
      r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

      /* deal with electrostatics based on flag */
      if (flags & FORCE_ELEC_DIRECT) {
        ASSERT(i != j);
        c = c_elec * atom[i].q;
        if (is_scaled14) c *= scaling14;
        u_elec += compute_elec(f, &f_ij, &r_ij, r2, c);
        f_elec[i].x += f_ij.x;
        f_elec[i].y += f_ij.y;
        f_elec[i].z += f_ij.z;
        f_elec_j.x -= f_ij.x;
        f_elec_j.y -= f_ij.y;
        f_elec_j.z -= f_ij.z;
      }

      /* deal with van der Waals based on flag */
      if (flags & FORCE_VDW_DIRECT) {
        ASSERT(i != j);
        entry = vdwtable + 4 * (atom[i].prm * atomprm_len + atom[j].prm);
        if (is_scaled14) {
          a = entry[A_14];
          b = entry[B_14];
        }
        else {
          a = entry[A];
          b = entry[B];
        }
        u_vdw += compute_vdw(f, &f_ij, &r_ij, r2, a, b);
        f_vdw[i].x += f_ij.x;
        f_vdw[i].y += f_ij.y;
        f_vdw[i].z += f_ij.z;
        f_vdw_j.x -= f_ij.x;
        f_vdw_j.y -= f_ij.y;
        f_vdw_j.z -= f_ij.z;
      }

    } /* end i-loop over remaining atoms */

    /* add accumulated force into array */
    f_elec[j].x += f_elec_j.x;
    f_elec[j].y += f_elec_j.y;
    f_elec[j].z += f_elec_j.z;
    f_vdw[j].x += f_vdw_j.x;
    f_vdw[j].y += f_vdw_j.y;
    f_vdw[j].z += f_vdw_j.z;

  } /* end j-loop over all atoms */

  /* accumulate energy */
  f->energy->elec += u_elec;
  f->energy->vdw += u_vdw;

  return 0;
}


int exclusions(Force *f, const MD_Dvec *pos)
{
  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij;
  const double scaling14 = f->param->scaling14;
  const double elec_const = f->elec_const;
  double a, b, c, c_elec, r2, u_elec, u_vdw;
  MD_Dvec *f_elec = (f->result->f_elec ? f->result->f_elec : f->result->f);
  MD_Dvec *f_vdw = (f->result->f_vdw ? f->result->f_vdw : f->result->f);
  const double *vdwtable = f->vdwtable;
  const double *entry;
  int32 **excl_list = f->excl_list;
  int32 **scaled14_list = f->scaled14_list;
  const MD_Atom *atom = f->param->atom;
  const int32 atom_len = f->param->atom_len;
  const int32 atomprm_len = f->param->atomprm_len;
  const int32 flags = f->param->flags;
  const int32 is_elec_excl = ((flags & (FORCE_ELEC | FORCE_ELEC_EXCL))
      == (FORCE_ELEC | FORCE_ELEC_EXCL));
  const int32 is_vdw_excl = ((flags & (FORCE_VDW | FORCE_VDW_EXCL))
      == (FORCE_VDW | FORCE_VDW_EXCL));
  const int32 elec_flags = (flags
      & (FORCE_ELEC | FORCE_SMOOTH | FORCE_ELEC_EXCL));
  const int32 vdw_flags = (flags
      & (FORCE_VDW | FORCE_SWITCH | FORCE_VDW_EXCL));
  int32 *excl;
  int32 i, j;

  /* routine should be called only if we are subtracting some exclusions */
  ASSERT(is_elec_excl || is_vdw_excl);

  /* exclusion list should always exist */
  ASSERT(excl_list !=  NULL);

  /* reset energy accumulation variables */
  u_elec = 0.0;
  u_vdw = 0.0;

  /* loop over all atoms, process exclusions */
  for (j = 0;  j < atom_len;  j++) {

    /* copy into local storage for efficiency */
    pj = pos[j];
    f_elec_j.x = 0.0;
    f_elec_j.y = 0.0;
    f_elec_j.z = 0.0;
    f_vdw_j.x = 0.0;
    f_vdw_j.y = 0.0;
    f_vdw_j.z = 0.0;
    c_elec = elec_const * atom[j].q;

    for (excl = excl_list[j];  *excl < j;  excl++) {
      i = *excl;

      /* r_ij is vector from atom i to atom j */
      r_ij.x = pj.x - pos[i].x;
      r_ij.y = pj.y - pos[i].y;
      r_ij.z = pj.z - pos[i].z;

      /* r2 is square of pairwise distance */
      r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

      /* deal with electrostatics */
      if (is_elec_excl) {
        c = c_elec * atom[i].q;
        switch (elec_flags) {
          case (FORCE_ELEC | FORCE_ELEC_EXCL):
            u_elec -= compute_elec(f, &f_ij, &r_ij, r2, c);
            break;
          case (FORCE_ELEC | FORCE_SMOOTH | FORCE_ELEC_EXCL):
            u_elec -= compute_elec_smooth(f, &f_ij, &r_ij, r2, c);
            break;
        }
        f_elec[i].x -= f_ij.x;
        f_elec[i].y -= f_ij.y;
        f_elec[i].z -= f_ij.z;
        f_elec_j.x += f_ij.x;
        f_elec_j.y += f_ij.y;
        f_elec_j.z += f_ij.z;
      }

      /* deal with van der Waals */
      if (is_vdw_excl) {
        entry = vdwtable + 4 * (atom[i].prm * atomprm_len + atom[j].prm);
        a = entry[A];
        b = entry[B];
        switch (vdw_flags) {
          case (FORCE_VDW | FORCE_VDW_EXCL):
            u_vdw -= compute_vdw(f, &f_ij, &r_ij, r2, a, b);
            break;
          case (FORCE_VDW | FORCE_SWITCH | FORCE_VDW_EXCL):
            u_vdw -= compute_vdw_switch(f, &f_ij, &r_ij, r2, a, b);
            break;
        }
        f_vdw[i].x -= f_ij.x;
        f_vdw[i].y -= f_ij.y;
        f_vdw[i].z -= f_ij.z;
        f_vdw_j.x += f_ij.x;
        f_vdw_j.y += f_ij.y;
        f_vdw_j.z += f_ij.z;
      }

    } /* end loop over atoms in this exclusion list */

    /* add accumulated force into array */
    f_elec[j].x += f_elec_j.x;
    f_elec[j].y += f_elec_j.y;
    f_elec[j].z += f_elec_j.z;
    f_vdw[j].x += f_vdw_j.x;
    f_vdw[j].y += f_vdw_j.y;
    f_vdw[j].z += f_vdw_j.z;

  } /* end j-loop over all atoms */

  if (scaled14_list) {
    /* loop over all atoms */
    for (j = 0;  j < atom_len;  j++) {

      /* copy into local storage for efficiency */
      pj = pos[j];
      f_elec_j.x = 0.0;
      f_elec_j.y = 0.0;
      f_elec_j.z = 0.0;
      f_vdw_j.x = 0.0;
      f_vdw_j.y = 0.0;
      f_vdw_j.z = 0.0;
      c_elec = (1.0 - scaling14) * elec_const * atom[j].q;

      for (excl = scaled14_list[j];  *excl < j;  excl++) {
        i = *excl;

        /* r_ij is vector from atom i to atom j */
        r_ij.x = pj.x - pos[i].x;
        r_ij.y = pj.y - pos[i].y;
        r_ij.z = pj.z - pos[i].z;

        /* r2 is square of pairwise distance */
        r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

        /* deal with electrostatics */
        if (is_elec_excl) {
          c = c_elec * atom[i].q;
          switch (elec_flags) {
            case (FORCE_ELEC | FORCE_ELEC_EXCL):
              u_elec -= compute_elec(f, &f_ij, &r_ij, r2, c);
              break;
            case (FORCE_ELEC | FORCE_SMOOTH | FORCE_ELEC_EXCL):
              u_elec -= compute_elec_smooth(f, &f_ij, &r_ij, r2, c);
              break;
          }
          f_elec[i].x -= f_ij.x;
          f_elec[i].y -= f_ij.y;
          f_elec[i].z -= f_ij.z;
          f_elec_j.x += f_ij.x;
          f_elec_j.y += f_ij.y;
          f_elec_j.z += f_ij.z;
        }

        /* deal with van der Waals */
        if (is_vdw_excl) {
          entry = vdwtable + 4 * (atom[i].prm * atomprm_len + atom[j].prm);
          a = entry[A_14];
          b = entry[B_14];
          switch (vdw_flags) {
            case (FORCE_VDW | FORCE_VDW_EXCL):
              u_vdw -= compute_vdw(f, &f_ij, &r_ij, r2, a, b);
              break;
            case (FORCE_VDW | FORCE_SWITCH | FORCE_VDW_EXCL):
              u_vdw -= compute_vdw_switch(f, &f_ij, &r_ij, r2, a, b);
              break;
          }
          f_vdw[i].x -= f_ij.x;
          f_vdw[i].y -= f_ij.y;
          f_vdw[i].z -= f_ij.z;
          f_vdw_j.x += f_ij.x;
          f_vdw_j.y += f_ij.y;
          f_vdw_j.z += f_ij.z;
        }

      } /* end loop over atoms in this exclusion list */

      /* add accumulated force into array */
      f_elec[j].x += f_elec_j.x;
      f_elec[j].y += f_elec_j.y;
      f_elec[j].z += f_elec_j.z;
      f_vdw[j].x += f_vdw_j.x;
      f_vdw[j].y += f_vdw_j.y;
      f_vdw[j].z += f_vdw_j.z;

    } /* end j-loop over all atoms */

  } /* end subtracting scaled 1-4 interactions */

  /* accumulate energy */
  f->energy->elec += u_elec;
  f->energy->vdw += u_vdw;

  return 0;
}



/******************************************************************************
 *
 * Routines for computing individual interactions:
 *
 *   compute_elec() - electrostatic
 *   compute_elec_smooth() - smoothed electrostatic
 *   compute_vdw() - van der Waals
 *   compute_vdw_switch() - switched van der Waals
 *
 ******************************************************************************/


/*
 * compute electrostatic force f_ij on atom i due to atom j
 *
 * r_ij = r_j - r_i (vector from i to j)
 * r2 is square of distance between atoms (r_ij dot r_ij)
 * c is constant (product involving charges, Coulomb, dielectric, etc.)
 *
 * return electrostatic potential energy
 */
double compute_elec(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double c)
{
  double u;       /* potential */
  double du_r;    /* du/r */
  double inv_r;   /* 1/r */
  double inv_r2;  /* 1/r^2 */

  ASSERT(r2 > 0.0);
  inv_r2 = 1.0 / r2;
  inv_r = sqrt(inv_r2);
  u = c * inv_r;
  du_r = -u * inv_r2;
  f_ij->x = du_r * r_ij->x;
  f_ij->y = du_r * r_ij->y;
  f_ij->z = du_r * r_ij->z;
  return u;
}


/*
 * compute smoothed electrostatic force f_ij on atom i due to atom j
 *
 * r_ij = r_j - r_i (vector from i to j)
 * r2 is square of distance between atoms (r_ij dot r_ij)
 * c is constant (product involving charges, Coulomb, dielectric, etc.)
 * inv_rc2 is inverse of square of cutoff distance
 *
 * assume that atoms are within cutoff
 *
 * return electrostatic potential energy
 */
double compute_elec_smooth(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double c)
{
  const double inv_rc2 = f->inv_elec_cutoff2;
  double u;       /* potential */
  double du_r;    /* du/r */
  double inv_r;   /* 1/r */
  double inv_r2;  /* 1/r^2 */
  double s;       /* switching function */
  double ds_r;    /* ds/r */
  double fc;      /* force coefficient */
  double e;       /* energy from switched potential function */

  ASSERT(r2 > 0.0);
  inv_r2 = 1.0 / r2;
  ASSERT(inv_r2 >= inv_rc2);
  inv_r = sqrt(inv_r2);
  u = c * inv_r;
  du_r = -u * inv_r2;
  s = (1.0 - r2 * inv_rc2) * (1.0 - r2 * inv_rc2);
  ds_r = -4.0 * inv_rc2 * (1.0 - r2 * inv_rc2);
  fc = u * ds_r + du_r * s;
  e = u * s;
  f_ij->x = fc * r_ij->x;
  f_ij->y = fc * r_ij->y;
  f_ij->z = fc * r_ij->z;
  return e;
}


/*
 * compute van der Waals force f_ij on atom i due to atom j
 *
 * r_ij = r_j - r_i (vector from i to j)
 * r2 is square of distance between atoms (r_ij dot r_ij)
 * a, b are constant (depend on atom types and 1-4 scaling)
 *
 * return van der Waals potential energy
 */
double compute_vdw(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double a, double b)
{
  double u;        /* potential */
  double du_r;     /* du/r */
  double inv_r2;   /* 1/r^2 */
  double inv_r6;   /* 1/r^6 */
  double inv_r12;  /* 1/r^12 */
  double a_r12;    /* a/r^12 */
  double b_r6;     /* b/r^6 */

  ASSERT(r2 > 0.0);
  inv_r2 = 1.0 / r2;
  inv_r6 = inv_r2 * inv_r2 * inv_r2;
  inv_r12 = inv_r6 * inv_r6;
  a_r12 = a * inv_r12;
  b_r6 = b * inv_r6;
  u = a_r12 - b_r6;
  du_r = (-12.0 * a_r12 + 6.0 * b_r6) * inv_r2;
  f_ij->x = du_r * r_ij->x;
  f_ij->y = du_r * r_ij->y;
  f_ij->z = du_r * r_ij->z;
  return u;
}


/*
 * compute van der Waals force f_ij on atom i due to atom j
 *
 * r_ij = r_j - r_i (vector from i to j)
 * r2 is square of distance between atoms (r_ij dot r_ij)
 * a, b are constant (depend on atom types and 1-4 scaling)
 * ron2 is square of switching distance
 * roff2 is square of cutoff distance
 * denom is inverse of denominator of switching function
 *
 * return van der Waals potential energy
 */
double compute_vdw_switch(Force *f, MD_Dvec *f_ij,
    const MD_Dvec *r_ij, double r2, double a, double b)
{
  const double ron2 = f->switchdist2;
  const double roff2 = f->vdw_cutoff2;
  const double denom = f->inv_denom_switch;
  double u;        /* potential */
  double du_r;     /* du/r */
  double inv_r2;   /* 1/r^2 */
  double inv_r6;   /* 1/r^6 */
  double inv_r12;  /* 1/r^12 */
  double a_r12;    /* a/r^12 */
  double b_r6;     /* b/r^6 */
  double s;        /* switching function */
  double ds_r;     /* ds/r */
  double fc;       /* force coefficient */
  double e;        /* energy from switched potential function */

  ASSERT(ron2 < roff2);
  ASSERT(r2 <= roff2);
  ASSERT(r2 > 0.0);
  inv_r2 = 1.0 / r2;
  inv_r6 = inv_r2 * inv_r2 * inv_r2;
  inv_r12 = inv_r6 * inv_r6;
  a_r12 = a * inv_r12;
  b_r6 = b * inv_r6;
  u = a_r12 - b_r6;
  du_r = (-12.0 * a_r12 + 6.0 * b_r6) * inv_r2;
  if (r2 > ron2) {
    s = (roff2 - r2) * (roff2 - r2) * (roff2 + 2.0 * r2 - 3.0 * ron2) * denom;
    ds_r = 12.0 * (roff2 - r2) * (ron2 - r2) * denom;
    fc = u * ds_r + du_r * s;
    e = u * s;
  }
  else {
    fc = du_r;
    e = u;
  }
  f_ij->x = fc * r_ij->x;
  f_ij->y = fc * r_ij->y;
  f_ij->z = fc * r_ij->z;
  return e;
}
