/*
 * Copyright (C) 2004-2006 by David J. Hardy.  All rights reserved.
 *
 * nbpairs.c
 *
 * Compute nonbonded pairwise forces and potentials.
 */

#include <stdlib.h>
#include <math.h>
#include "force/intdefn.h"
#undef DEBUG_WATCH
#include "debug/debug.h"

#define WARN_BUCK
#undef WARN_BUCK

#define BUCK_INNER_CUTOFF  1.0

#define DEBUG_VIRIAL
#undef DEBUG_VIRIAL

#define DEBUG_PAIRLISTS
#undef DEBUG_PAIRLISTS

#define OUTPUT_PAIRLIST_INFO

/* #define RMIN2  1.0 */
#define RMIN2  (entry->rmin2)

/* for Drude oscillators */
#define RDRMIN2  0.5

/******************************************************************************
 *
 * Macros for computing individual pairwise interactions
 *
 ******************************************************************************/


/*
 * compute electrostatic potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns potential
 * du_r returns force scaling
 * r2 is square of pairwise distance r
 * c is constant
 */
#define FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(u, du_r, r2, c) \
do { \
  const double _r2 = (r2); \
  double *_u = (u); \
  double inv_r;   /* 1/r */ \
  double inv_r2;  /* 1/r^2 */ \
\
  ASSERT(_r2 > 0.0); \
  inv_r2 = 1.0 / _r2; \
  inv_r = sqrt(inv_r2); \
  *_u = (c) * inv_r; \
  *(du_r) = -(*_u) * inv_r2; \
} while (0)

void force_compute_nbpairs_elec_standard(double *u, double *du_r,
    double r2, double c)
{
  FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(u, du_r, r2, c);
}


/*
 * compute electrostatic potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns smoothed potential
 * du_r returns smoothed force scaling
 * r2 is square of distance
 * c is constant
 * inv_rc2 is inverse square of elec cutoff
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(u, du_r, r2, c, inv_rc2) \
do { \
  const double _r2 = (r2); \
  const double _inv_rc2 = (inv_rc2); \
  double inv_r;   /* 1/r */ \
  double inv_r2;  /* 1/r^2 */ \
  double w;       /* full elec potential */ \
  double dw_r;    /* dw/r */ \
  double s;       /* smoothing function */ \
  double ds_r;    /* ds/r */ \
\
  ASSERT(_r2 > 0.0); \
  inv_r2 = 1.0 / _r2; \
  /* ASSERT(inv_r2 >= inv_elec_cutoff2); */ \
  inv_r = sqrt(inv_r2); \
  w = (c) * inv_r; \
  dw_r = -w * inv_r2; \
  s = (1.0 - _r2 * _inv_rc2) * (1.0 - _r2 * _inv_rc2); \
  ds_r = -4.0 * _inv_rc2 * (1.0 - _r2 * _inv_rc2); \
  *(u) = w * s; \
  *(du_r) = w * ds_r + dw_r * s; \
} while (0)

void force_compute_nbpairs_elec_shifted(double *u, double *du_r,
    double r2, double c, double inv_rc2)
{
  FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(u, du_r, r2, c, inv_rc2);
}


/*
 * compute electrostatic potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns direct Ewald potential
 * du_r returns direct Ewald force scaling
 * r2 is square of distance
 * c is electrostatic constant
 * ewald_coef is coefficient for direct Ewald energy
 * grad_coef is coefficient for gradient of direct Ewald energy
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(u, du_r, r2, c, ewald_coef, grad_coef)\
do { \
  const double _r2 = (r2); \
  double *_u = (u); \
  double r; \
  double inv_r;   /* 1/r */ \
  double inv_r2;  /* 1/r^2 */ \
  double a, b; \
  double cc = (c); \
\
  ASSERT(_r2 > 0.0); \
  r = sqrt(_r2); \
  inv_r = 1.0 / r; \
  inv_r2 = inv_r * inv_r; \
  a = r * (ewald_coef); \
  b = erfc(a); \
  *_u = cc * b * inv_r; \
  *(du_r) = -(cc * (grad_coef) * exp(-a*a) + *_u) * inv_r2; \
} while (0)

void force_compute_nbpairs_elec_ewald(double *u, double *du_r,
    double r2, double c, double ewald_coef, double grad_coef)
{
  FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(u, du_r, r2, c, ewald_coef, grad_coef);
}


/*
 * compute van der Waals potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns potential
 * du_r returns force scaling
 * r2 is square of pairwise distance r
 * a, b are constants
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(u, du_r, r2, a, b) \
do { \
  const double _r2 = (r2); \
  double inv_r2;   /* 1/r^2 */ \
  double inv_r6;   /* 1/r^6 */ \
  double inv_r12;  /* 1/r^12 */ \
  double a_r12;    /* a/r^12 */ \
  double b_r6;     /* b/r^6 */ \
\
  ASSERT(_r2 > 0.0); \
  inv_r2 = 1.0 / _r2; \
  inv_r6 = inv_r2 * inv_r2 * inv_r2; \
  inv_r12 = inv_r6 * inv_r6; \
  a_r12 = (a) * inv_r12; \
  b_r6 = (b) * inv_r6; \
  *(u) = a_r12 - b_r6; \
  *(du_r) = (-12.0 * a_r12 + 6.0 * b_r6) * inv_r2; \
} while (0)

void force_compute_nbpairs_vdw_standard(double *u, double *du_r,
    double r2, double a, double b)
{
  FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(u, du_r, r2, a, b);
}


/*
 * compute van der Waals potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns switched potential
 * du_r returns switched force scaling
 * r2 is square of pairwise distance
 * a, b are constants
 * roff2 is square of cutoff distance (vdw_cutoff2)
 * ron2 is square of switching distance (switch_dist2)
 * denom is inverse of denominator of switching function
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(u,du_r,r2,a,b,roff2,ron2,denom) \
do { \
  const double _r2 = r2; \
  const double _ron2 = ron2; \
  const double _roff2 = roff2; \
  const double _denom = denom; \
  double inv_r2;   /* 1/r^2 */ \
  double inv_r6;   /* 1/r^6 */ \
  double inv_r12;  /* 1/r^12 */ \
  double a_r12;    /* a/r^12 */ \
  double b_r6;     /* b/r^6 */ \
  double w;        /* full vdw potential */ \
  double dw_r;     /* dw/r */ \
  double s;        /* switching function */ \
  double ds_r;     /* ds/r */ \
\
  ASSERT(_ron2 < _roff2); \
  ASSERT(_r2 <= _roff2); \
  ASSERT(_r2 > 0.0); \
  inv_r2 = 1.0 / _r2; \
  inv_r6 = inv_r2 * inv_r2 * inv_r2; \
  inv_r12 = inv_r6 * inv_r6; \
  a_r12 = (a) * inv_r12; \
  b_r6 = (b) * inv_r6; \
  w = a_r12 - b_r6; \
  dw_r = (-12.0 * a_r12 + 6.0 * b_r6) * inv_r2; \
  if (_r2 > _ron2) { \
    s = (_roff2 - _r2) * (_roff2 - _r2) \
      * (_roff2 + 2.0 * _r2 - 3.0 * _ron2) * _denom; \
    ds_r = 12.0 * (_roff2 - _r2) * (_ron2 - _r2) * _denom; \
    *(u) = w * s; \
    *(du_r) = w * ds_r + dw_r * s; \
  } \
  else { \
    *(u) = w; \
    *(du_r) = dw_r; \
  } \
} while (0)

void force_compute_nbpairs_vdw_switched(double *u, double *du_r,
    double r2, double a, double b,
    double roff2, double ron2, double denom)
{
  FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(u, du_r, r2, a, b, roff2, ron2, denom);
}


/*
 * compute Buckingham potential U(r) and force scaling (1/r)*(dU/dr)
 * (replaces van der Waals)
 *
 * u returns Buckingham potential
 * du_r returns Buckingham force scaling
 * r2 is square of pairwise distance
 * a, b, c are constant parameters
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_BUCK(u, du_r, r2, a, b, c) \
do { \
  const double _r2 = r2; \
  double inv_r2;  /* 1/r^2 */ \
  double inv_r6;  /* 1/r^6 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double c_r6;    /* c/r^6 */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
\
  ASSERT(_r2 > 0.0); \
  r = sqrt(_r2); \
  inv_b = 1.0 / (b); \
  a_exp_nr_b = (a) * exp(-r * inv_b); \
  inv_r2 = 1.0 / _r2; \
  inv_r6 = inv_r2 * inv_r2 * inv_r2; \
  c_r6 = (c) * inv_r6; \
  *(u) = a_exp_nr_b - c_r6; \
  *(du_r) = inv_r2 * (6.0 * c_r6 - r * a_exp_nr_b * inv_b); \
} while(0)

void force_compute_nbpairs_vdw_buck(double *u, double *du_r,
    double r2, double a, double b, double c)
{
  FORCE_COMPUTE_NBPAIRS_VDW_BUCK(u, du_r, r2, a, b, c);
}


/*
 * compute Buckingham potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns switched Buckingham potential
 * du_r returns switched Buckingham force scaling
 * r2 is square of pairwise distance
 * a, b, c are constant parameters
 * roff2 is square of cutoff distance (vdw_cutoff2)
 * ron2 is square of switching distance (switch_dist2)
 * denom is inverse of denominator of switching function
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(u,du_r,r2,a,b,c,roff2,ron2,denom)\
do { \
  const double _r2 = r2; \
  const double _ron2 = ron2; \
  const double _roff2 = roff2; \
  const double _denom = denom; \
  double inv_r2;  /* 1/r^2 */ \
  double inv_r6;  /* 1/r^6 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double c_r6;    /* c/r^6 */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
  double w;       /* full Buckingham potential */ \
  double dw_r;    /* dw/r */ \
  double s;       /* switching function */ \
  double ds_r;    /* ds/r */ \
\
  ASSERT(_ron2 < _roff2); \
  ASSERT(_r2 <= _roff2); \
  ASSERT(_r2 > 0.0); \
  r = sqrt(_r2); \
  inv_b = 1.0 / (b); \
  a_exp_nr_b = (a) * exp(-r * inv_b); \
  inv_r2 = 1.0 / _r2; \
  inv_r6 = inv_r2 * inv_r2 * inv_r2; \
  c_r6 = (c) * inv_r6; \
  w = a_exp_nr_b - c_r6; \
  dw_r = inv_r2 * (6.0 * c_r6 - r * a_exp_nr_b * inv_b); \
  if (_r2 > _ron2) { \
    s = (_roff2 - _r2) * (_roff2 - _r2) \
      * (_roff2 + 2.0 * _r2 - 3.0 * _ron2) * _denom; \
    ds_r = 12.0 * (_roff2 - _r2) * (_ron2 - _r2) * _denom; \
    *(u) = w * s; \
    *(du_r) = w * ds_r + dw_r * s; \
  } \
  else { \
    *(u) = w; \
    *(du_r) = dw_r; \
  } \
} while(0)

void force_compute_nbpairs_vdw_switchbuck(double *u, double *du_r,
    double r2, double a, double b, double c,
    double roff2, double ron2, double denom)
{
  FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(u,du_r,r2,a,b,c,roff2,ron2,denom);
}


/*
 * compute Buckingham without dispersion term
 * potential U(r) and force scaling (1/r)*(dU/dr)
 * (replaces van der Waals)
 *
 * u returns Buckingham without dispersion term potential
 * du_r returns Buckingham without dispersion term force scaling
 * r2 is square of pairwise distance
 * a, b are constant parameters
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(u, du_r, r2, a, b) \
do { \
  const double _r2 = r2; \
  double inv_r2;  /* 1/r^2 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
\
  ASSERT(_r2 > 0.0); \
  r = sqrt(_r2); \
  inv_b = 1.0 / (b); \
  a_exp_nr_b = (a) * exp(-r * inv_b); \
  inv_r2 = 1.0 / _r2; \
  *(u) = a_exp_nr_b; \
  *(du_r) = inv_r2 * (-r * a_exp_nr_b * inv_b); \
} while(0)

void force_compute_nbpairs_vdw_bucknd(double *u, double *du_r,
    double r2, double a, double b)
{
  FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(u, du_r, r2, a, b);
}


/*
 * compute Buckingham without dispersion term
 * potential U(r) and force scaling (1/r)*(dU/dr)
 *
 * u returns switched Buckingham without dispersion term potential
 * du_r returns switched Buckingham without dispersion term force scaling
 * r2 is square of pairwise distance
 * a, b are constant parameters
 * roff2 is square of cutoff distance (vdw_cutoff2)
 * ron2 is square of switching distance (switch_dist2)
 * denom is inverse of denominator of switching function
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(u,du_r,r2,a,b,roff2,ron2,denom)\
do { \
  const double _r2 = r2; \
  const double _ron2 = ron2; \
  const double _roff2 = roff2; \
  const double _denom = denom; \
  double inv_r2;  /* 1/r^2 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
  double w;       /* full Buckingham potential */ \
  double dw_r;    /* dw/r */ \
  double s;       /* switching function */ \
  double ds_r;    /* ds/r */ \
\
  ASSERT(_ron2 < _roff2); \
  ASSERT(_r2 <= _roff2); \
  ASSERT(_r2 > 0.0); \
  r = sqrt(_r2); \
  inv_b = 1.0 / (b); \
  a_exp_nr_b = (a) * exp(-r * inv_b); \
  inv_r2 = 1.0 / _r2; \
  w = a_exp_nr_b; \
  dw_r = inv_r2 * (-r * a_exp_nr_b * inv_b); \
  if (_r2 > _ron2) { \
    s = (_roff2 - _r2) * (_roff2 - _r2) \
      * (_roff2 + 2.0 * _r2 - 3.0 * _ron2) * _denom; \
    ds_r = 12.0 * (_roff2 - _r2) * (_ron2 - _r2) * _denom; \
    *(u) = w * s; \
    *(du_r) = w * ds_r + dw_r * s; \
  } \
  else { \
    *(u) = w; \
    *(du_r) = dw_r; \
  } \
} while(0)

void force_compute_nbpairs_vdw_switchbucknd(double *u, double *du_r,
    double r2, double a, double b,
    double roff2, double ron2, double denom)
{
  FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(u,du_r,r2,a,b,roff2,ron2,denom);
}


/*
 * compute Buckingham potential U(r) and force scaling (1/r)*(dU/dr)
 * with switched inner part for energy minimization
 * (replaces van der Waals)
 *
 * u returns Buckingham potential
 * du_r returns Buckingham force scaling
 * r2 is square of pairwise distance
 * a, b, c are constant parameters of Buckingham potential
 * rn2 is square inner switching distance
 * an, bn are constant parameters for inner function
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(u,du_r,r2,a,b,c,rn2,an,bn) \
do { \
  const double _r2 = (r2); \
  double inv_r2;  /* 1/r^2 */ \
  double inv_r6;  /* 1/r^6 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double c_r6;    /* c/r^6 */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
  double an_r6;   /* an/r^6 */ \
\
  ASSERT(_r2 > 0.0); \
  if (_r2 > (rn2)) { \
    r = sqrt(_r2); \
    inv_b = 1.0 / (b); \
    a_exp_nr_b = (a) * exp(-r * inv_b); \
    inv_r2 = 1.0 / _r2; \
    inv_r6 = inv_r2 * inv_r2 * inv_r2; \
    c_r6 = (c) * inv_r6; \
    *(u) = a_exp_nr_b - c_r6; \
    *(du_r) = inv_r2 * (6.0 * c_r6 - r * a_exp_nr_b * inv_b); \
  } \
  else { \
    inv_r2 = 1.0 / _r2; \
    inv_r6 = inv_r2 * inv_r2 * inv_r2; \
    an_r6 = (an) * inv_r6; \
    *(u) = an_r6 + (bn); \
    *(du_r) = -6.0 * an_r6 * inv_r2; \
  } \
} while(0)

void force_compute_nbpairs_vdw_bucksafe(double *u, double *du_r,
    double r2, double a, double b, double c,
    double rn2, double an, double bn)
{
  FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(u, du_r, r2, a, b, c, rn2, an, bn);
}


/*
 * compute Buckingham potential U(r) and force scaling (1/r)*(dU/dr)
 * with switched inner part for energy minimization
 * (replaces van der Waals)
 *
 * u returns switched Buckingham potential
 * du_r returns switched Buckingham force scaling
 * r2 is square of pairwise distance
 * a, b, c are constant parameters
 * rn2 is square inner switching distance
 * an, bn are constant parameters for inner function
 * roff2 is square of cutoff distance (vdw_cutoff2)
 * ron2 is square of switching distance (switch_dist2)
 * denom is inverse of denominator of switching function
 *
 * ASSUME ATOMS ARE WITHIN CUTOFF (i.e. r2 < cutoff2)
 */
#define FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(u,du_r,r2,a,b,c,rn2,an,bn,roff2,ron2,denom)\
do { \
  const double _r2 = (r2); \
  const double _ron2 = (ron2); \
  const double _roff2 = (roff2); \
  const double _denom = (denom); \
  double inv_r2;  /* 1/r^2 */ \
  double inv_r6;  /* 1/r^6 */ \
  double r; \
  double inv_b;   /* 1/b */ \
  double c_r6;    /* c/r^6 */ \
  double a_exp_nr_b;  /* a*exp(-r/b) */ \
  double an_r6;   /* an/r^6 */ \
  double w;       /* full Buckingham potential */ \
  double dw_r;    /* dw/r */ \
  double s;       /* switching function */ \
  double ds_r;    /* ds/r */ \
\
  ASSERT(_ron2 < _roff2); \
  ASSERT(_r2 <= _roff2); \
  ASSERT(_r2 > 0.0); \
  if (_r2 > (rn2)) { \
    r = sqrt(_r2); \
    inv_b = 1.0 / (b); \
    a_exp_nr_b = (a) * exp(-r * inv_b); \
    inv_r2 = 1.0 / _r2; \
    inv_r6 = inv_r2 * inv_r2 * inv_r2; \
    c_r6 = (c) * inv_r6; \
    w = a_exp_nr_b - c_r6; \
    dw_r = inv_r2 * (6.0 * c_r6 - r * a_exp_nr_b * inv_b); \
    if (_r2 > _ron2) { \
      s = (_roff2 - _r2) * (_roff2 - _r2) \
        * (_roff2 + 2.0 * _r2 - 3.0 * _ron2) * _denom; \
      ds_r = 12.0 * (_roff2 - _r2) * (_ron2 - _r2) * _denom; \
      *(u) = w * s; \
      *(du_r) = w * ds_r + dw_r * s; \
    } \
    else { \
      *(u) = w; \
      *(du_r) = dw_r; \
    } \
  } \
  else { \
    inv_r2 = 1.0 / _r2; \
    inv_r6 = inv_r2 * inv_r2 * inv_r2; \
    an_r6 = (an) * inv_r6; \
    *(u) = an_r6 + (bn); \
    *(du_r) = -6.0 * an_r6 * inv_r2; \
  } \
} while(0)

void force_compute_nbpairs_vdw_switchbucksafe(double *u, double *du_r,
    double r2, double a, double b, double c,
    double rn2, double an, double bn,
    double roff2, double ron2, double denom)
{
  FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(u,du_r,r2,a,b,c,rn2,an,bn,roff2,ron2,denom);
}


#ifdef WARN_BUCK
#define CHECK_BUCK(r2, i, j, u) \
do { \
  if (r2 < BUCK_INNER_CUTOFF) { \
    printf("# WARNING:  close encounter between atoms %d and %d,\n" \
           "#           r2 = %6.4f  potential = %10.3f\n", i, j, r2, u); \
  } \
} while(0)
#else
#define CHECK_BUCK(r2, i, j, u)
#endif



/******************************************************************************
 *
 * Routines implementing nonbonded pairwise interaction algorithms
 *
 ******************************************************************************/


int force_compute_nbpairs_geomhash(Force *fobj,
    const MD_Dvec trpos[], const int32 sel[], int32 sel_len)
{
  const MD_Dvec gscale = fobj->gscale;
  const MD_Dvec delta = fobj->delta;

  int32 *next = fobj->next;
  ForceCell *cell = fobj->cell;
  const int32 ncells = fobj->ncells;
  const int32 nucells = fobj->nucells;
  const int32 nvcells = fobj->nvcells;
  const int32 nwcells = fobj->nwcells;

  const int32 is_x_nonperiodic = (fobj->is_periodic & FORCE_X_PERIODIC) == 0;
  const int32 is_y_nonperiodic = (fobj->is_periodic & FORCE_Y_PERIODIC) == 0;
  const int32 is_z_nonperiodic = (fobj->is_periodic & FORCE_Z_PERIODIC) == 0;

  int32 m, n, i, j, k, index;

  ASSERT(trpos != NULL);

  /* clear cells */
  for (n = 0;  n < ncells;  n++) {
    cell[n].head = -1;
    cell[n].cnt = 0;
  }

  /* place each atom in a cell */
  for (m = 0;  m < sel_len;  m++) {
    n = sel[m];

    i = (int32) (gscale.x * (trpos[n].x + delta.x));
    j = (int32) (gscale.y * (trpos[n].y + delta.y));
    k = (int32) (gscale.z * (trpos[n].z + delta.z));

    /* need to adjust index range for nonperiodic directions */
    if (is_x_nonperiodic) {
      if (i < 0) i = 0;
      else if (i >= nucells) i = nucells-1;
    }
    if (is_y_nonperiodic) {
      if (j < 0) j = 0;
      else if (j >= nvcells) j = nvcells-1;
    }
    if (is_z_nonperiodic) {
      if (k < 0) k = 0;
      else if (k >= nwcells) k = nwcells-1;
    }

    /* insert atom into front of (i,j,k)th cell list */
    ASSERT(i >= 0 && i < nucells);
    ASSERT(j >= 0 && j < nvcells);
    ASSERT(k >= 0 && k < nwcells);
    index = (k * nvcells + j) * nucells + i;
    ASSERT(index >= 0 && index < ncells);
    next[n] = cell[index].head;
    cell[index].head = n;
    cell[index].cnt++;
  }

  return 0;
}


int force_compute_nbpairs_gridcells(Force *fobj, double virial[],
    double *u_elec, MD_Dvec f_elec[], double e_elec[], double e_epot[],
    int32 is_elec_gridcells, int32 elec_pair_potential,
    double *u_vdw, MD_Dvec f_vdw[], double e_vdw[],
    int32 is_vdw_gridcells, int32 vdw_pair_potential,
    const MD_Dvec pos[], int32 is_subtracted)
{
#ifdef DEBUG_VIRIAL
  double v_elec[9] = { 0.0 };
  double v_vdw[9] = { 0.0 };
#endif

  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij, offset;
  const double elec_const = (is_subtracted ?
      -fobj->elec_const : fobj->elec_const);
  const double elec_cutoff2 = (is_elec_gridcells ? fobj->elec_cutoff2 : 0.0);
  const double vdw_cutoff2 = (is_vdw_gridcells ? fobj->vdw_cutoff2 : 0.0);
  double a, b, r2, pot_i, pot_j, q_j;
  double e_elec_sum = 0.0, e_vdw_sum = 0.0;
  double e_elec_j, e_vdw_j, e_epot_j;
  double u = 0.0, du_r = 0.0;  /* else compiler generates warning */
  MD_Dvec fsum_ij;
  double virial_upper_xx = 0.0;
  double virial_upper_xy = 0.0;
  double virial_upper_xz = 0.0;
  double virial_upper_yy = 0.0;
  double virial_upper_yz = 0.0;
  double virial_upper_zz = 0.0;
  const double inv_elec_cutoff2 = fobj->inv_elec_cutoff2;
  const double ewald_coef = fobj->param->ewald_coef;
  const double ewald_grad_coef = fobj->ewald_grad_coef;
  const double glass_grad_coef = 2.0/sqrt(M_PI);
  const double roff2 = fobj->vdw_cutoff2;
  const double ron2 = fobj->switch_dist2;
  const double inv_denom_switch = fobj->inv_denom_switch;
  MD_Dvec *wrap = fobj->poswrap;
  const MD_Dvec *offset_table = fobj->offset_table;
  const ForceVdwparam *vdwtable = fobj->vdwparam;
  const ForceVdwparam *entry;
  const ForceBucksafe *bucksafetable = fobj->bucksafe;
  const ForceBucksafe *bentry;
  int32 **excl_list = fobj->excl_list;
  int32 **scaled14_list = fobj->scaled14_list;
  const MD_Atom *atom = fobj->param->atom;
  const int32 atomprm_len = fobj->param->atomprm_len;
  const int32 *next = fobj->next;
  int32 *excl;
  const ForceCell *cell = fobj->cell;
  const int32 ncells = fobj->ncells;
  int32 i, j, k, n;
  int32 nnbrs, ihead, jhead;
  int32 is_not_excluded;

  TEXT("entering force_compute_nbpairs_gridcells");

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  /* loop over cells */
  for (n = 0;  n < ncells;  n++) {
    nnbrs = cell[n].nnbrs;
    jhead = cell[n].head;

    /* loop over all of this cell's neighbors */
    for (k = 0;  k < nnbrs;  k++) {
      offset = offset_table[ (int)(cell[n].offset[k]) ];
      ihead = cell[ cell[n].nbr[k] ].head;

      /* loop over all pairs of atoms */
      for (j = jhead;  j != -1;  j = next[j]) {

        /* subtracting offset from p[j] is same as adding it to p[i] */
        pj.x = (pos[j].x + wrap[j].x) - offset.x;
        pj.y = (pos[j].y + wrap[j].y) - offset.y;
        pj.z = (pos[j].z + wrap[j].z) - offset.z;

        /* accumulate into local storage for efficiency */
        f_elec_j.x = 0.0;
        f_elec_j.y = 0.0;
        f_elec_j.z = 0.0;
        f_vdw_j.x = 0.0;
        f_vdw_j.y = 0.0;
        f_vdw_j.z = 0.0;
        e_elec_j = 0.0;
        e_vdw_j = 0.0;
        e_epot_j = 0.0;
        q_j = atom[j].q;

        /* 0th neighbor cell is self referential, must modify ihead */
        if (k == 0) ihead = next[j];

        for (i = ihead;  i != -1;  i = next[i]) {

          /* clear sum of nonbonded pairwise forces */
          fsum_ij.x = 0.0;
          fsum_ij.y = 0.0;
          fsum_ij.z = 0.0;

          /* r_ij is vector from atom i to atom j */
          r_ij.x = pj.x - (pos[i].x + wrap[i].x);
          r_ij.y = pj.y - (pos[i].y + wrap[i].y);
          r_ij.z = pj.z - (pos[i].z + wrap[i].z);

          /* r2 is square of pairwise distance */
          r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

          /* handle very small Drude oscillator lengths */
          is_not_excluded = 1;
          if (r2 < RDRMIN2) {
            /* check to see if this pair is excluded */
            if (excl_list) {
              for (excl = excl_list[i];  *excl < j;  excl++) ;
              if (j == *excl) is_not_excluded = 0;
            }
          }

          /* deal with electrostatics if within cutoff */
          if (is_not_excluded && r2 < elec_cutoff2) {
            switch (elec_pair_potential) {
              case FORCE_ELEC_STANDARD:
                FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r,
                    r2, elec_const);
                break;
              case FORCE_ELEC_SHIFTED:
                FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r,
                    r2, elec_const, inv_elec_cutoff2);
                break;
              case FORCE_ELEC_EWALD:
                FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                    r2, elec_const, ewald_coef, ewald_grad_coef);
                break;
              case FORCE_ELEC_GLASS:
                entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
                FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                    r2, elec_const, entry->b, (glass_grad_coef * entry->b));
                break;
            }
            pot_i = u * q_j;
            pot_j = u * atom[i].q;
            u *= atom[i].q * q_j;
            du_r *= atom[i].q * q_j;
            f_ij.x = du_r * r_ij.x;
            f_ij.y = du_r * r_ij.y;
            f_ij.z = du_r * r_ij.z;
            f_elec[i].x += f_ij.x;
            f_elec[i].y += f_ij.y;
            f_elec[i].z += f_ij.z;
            f_elec_j.x -= f_ij.x;
            f_elec_j.y -= f_ij.y;
            f_elec_j.z -= f_ij.z;

            /* assign electrostatic contribution to summed forces */
            fsum_ij = f_ij;

#ifdef DEBUG_VIRIAL
            /* accumulate upper triangle to virial */
            v_elec[0] -= f_ij.x * r_ij.x;
            v_elec[1] -= f_ij.x * r_ij.y;
            v_elec[2] -= f_ij.x * r_ij.z;
            v_elec[3] -= f_ij.y * r_ij.x;
            v_elec[4] -= f_ij.y * r_ij.y;
            v_elec[5] -= f_ij.y * r_ij.z;
            v_elec[6] -= f_ij.z * r_ij.x;
            v_elec[7] -= f_ij.z * r_ij.y;
            v_elec[8] -= f_ij.z * r_ij.z;
#endif

            /* equally divide interaction energy between atoms */
            e_elec[i] += 0.5 * u;
            e_elec_j += 0.5 * u;
            /* accumulate electrostatic potentials */
            e_epot[i] += pot_i;
            e_epot_j += pot_j;
            /* accumulate overall electrostatic potential energy */
            e_elec_sum += u;
          }

          /* deal with van der Waals if within cutoff */
          if (r2 < vdw_cutoff2) {
            entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

            /* choose default parameters and correct later if needed */
            a = entry->a;
            b = entry->b;

            if (r2 < RMIN2) {
              /*
               * interaction could be large enough in magnitude that we
               * need to compute it correctly now to avoid roundoff error
               */

              /* check to see if this pair is excluded */
              if (excl_list) {
                for (excl = excl_list[i];  *excl < j;  excl++) ;
                if (j == *excl) {
                  /* accumulate upper triangle to virial */
                  virial_upper_xx -= fsum_ij.x * r_ij.x;
                  virial_upper_xy -= fsum_ij.x * r_ij.y;
                  virial_upper_xz -= fsum_ij.x * r_ij.z;
                  virial_upper_yy -= fsum_ij.y * r_ij.y;
                  virial_upper_yz -= fsum_ij.y * r_ij.z;
                  virial_upper_zz -= fsum_ij.z * r_ij.z;
                  continue;  /* next i-loop iteration */
                }
              }

              /* check to see if this is scaled 1-4 interaction */
              if (scaled14_list) {
                for (excl = scaled14_list[i];  *excl < j;  excl++) ;
                if (j == *excl) {
                  a = entry->a14;
                  b = entry->b14;
                }
              }
            }

            /* reverse signs on constants to subtract interaction */
            if (is_subtracted) {
              a = -a;
              b = -b;
            }

            switch (vdw_pair_potential) {
              case FORCE_VDW_STANDARD:
                FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
                break;
              case FORCE_VDW_SWITCHED:
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                    roff2, ron2, inv_denom_switch);
                break;
              case FORCE_VDW_BUCK:
                FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                    entry->rmin2, b);
                CHECK_BUCK(r2, i, j, u);
                break;
              case FORCE_VDW_SWITCHBUCK:
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                    entry->rmin2, b, roff2, ron2, inv_denom_switch);
                CHECK_BUCK(r2, i, j, u);
                break;
              case FORCE_VDW_BUCKND:
                FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                    entry->rmin2);
                break;
              case FORCE_VDW_SWITCHBUCKND:
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                    entry->rmin2, roff2, ron2, inv_denom_switch);
                break;
              case FORCE_VDW_BUCKSAFE:
                bentry = bucksafetable
                  + (atom[i].prm * atomprm_len + atom[j].prm);
                FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                    entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
                break;
              case FORCE_VDW_SWITCHBUCKSAFE:
                bentry = bucksafetable
                  + (atom[i].prm * atomprm_len + atom[j].prm);
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                    entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                    roff2, ron2, inv_denom_switch);
                break;
              case FORCE_VDW_GLASS:
                FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                    entry->rmin2);
                break;
            }
            f_ij.x = du_r * r_ij.x;
            f_ij.y = du_r * r_ij.y;
            f_ij.z = du_r * r_ij.z;
            f_vdw[i].x += f_ij.x;
            f_vdw[i].y += f_ij.y;
            f_vdw[i].z += f_ij.z;
            f_vdw_j.x -= f_ij.x;
            f_vdw_j.y -= f_ij.y;
            f_vdw_j.z -= f_ij.z;

            /* accumulate van der Waals contribution to summed forces */
            fsum_ij.x += f_ij.x;
            fsum_ij.y += f_ij.y;
            fsum_ij.z += f_ij.z;

#ifdef DEBUG_VIRIAL
            /* accumulate upper triangle to virial */
            v_vdw[0] -= f_ij.x * r_ij.x;
            v_vdw[1] -= f_ij.x * r_ij.y;
            v_vdw[2] -= f_ij.x * r_ij.z;
            v_vdw[3] -= f_ij.y * r_ij.x;
            v_vdw[4] -= f_ij.y * r_ij.y;
            v_vdw[5] -= f_ij.y * r_ij.z;
            v_vdw[6] -= f_ij.z * r_ij.x;
            v_vdw[7] -= f_ij.z * r_ij.y;
            v_vdw[8] -= f_ij.z * r_ij.z;
#endif

            /* equally divide potential between atoms */
            e_vdw[i] += 0.5 * u;
            e_vdw_j += 0.5 * u;
            e_vdw_sum += u;
          }
         
          /* accumulate upper triangle to virial */
          virial_upper_xx -= fsum_ij.x * r_ij.x;
          virial_upper_xy -= fsum_ij.x * r_ij.y;
          virial_upper_xz -= fsum_ij.x * r_ij.z;
          virial_upper_yy -= fsum_ij.y * r_ij.y;
          virial_upper_yz -= fsum_ij.y * r_ij.z;
          virial_upper_zz -= fsum_ij.z * r_ij.z;

        } /* end i-loop over atoms in neighbor cell */

        /* add accumulated force into array */
        f_elec[j].x += f_elec_j.x;
        f_elec[j].y += f_elec_j.y;
        f_elec[j].z += f_elec_j.z;
        f_vdw[j].x += f_vdw_j.x;
        f_vdw[j].y += f_vdw_j.y;
        f_vdw[j].z += f_vdw_j.z;
        e_elec[j] += e_elec_j;
        e_vdw[j] += e_vdw_j;
        e_epot[j] += e_epot_j;

      } /* end j-loop over atoms in this cell */

    } /* end k-loop over cell neighbors */

  } /* end n-loop over all cells */

  /* accumulate energy */
  *u_elec += e_elec_sum;
  *u_vdw += e_vdw_sum;

  /* accumulate nonbonded contribution to virial */
  virial[FORCE_VIRIAL_XX] += virial_upper_xx;
  virial[FORCE_VIRIAL_XY] += virial_upper_xy;
  virial[FORCE_VIRIAL_XZ] += virial_upper_xz;
  virial[FORCE_VIRIAL_YX] += virial_upper_xy;
  virial[FORCE_VIRIAL_YY] += virial_upper_yy;
  virial[FORCE_VIRIAL_YZ] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZX] += virial_upper_xz;
  virial[FORCE_VIRIAL_ZY] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZZ] += virial_upper_zz;

#ifdef DEBUG_VIRIAL
  printf("elec(cutoff) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_elec[0], v_elec[1], v_elec[2], v_elec[3], v_elec[4],
      v_elec[5], v_elec[6], v_elec[7], v_elec[8]);
  printf("vdw(cutoff) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_vdw[0], v_vdw[1], v_vdw[2], v_vdw[3], v_vdw[4],
      v_vdw[5], v_vdw[6], v_vdw[7], v_vdw[8]);
#endif

#ifdef DEBUG_VIRIAL
  printf("summed virial: %g %g %g  %g %g %g  %g %g %g\n",
      virial[0], virial[1], virial[2], virial[3], virial[4],
      virial[5], virial[6], virial[7], virial[8]);
#endif
  TEXT("leaving force_compute_nbpairs_gridcells");

  return 0;
}



static int expand_list(Force *fobj, ForcePairlist *p)
{
  void *tmp;
  int32 newmax;
#define EXPANSION_TERM  20

#if 0
#ifdef DEBUG_PAIRLISTS
  static int32 nxpands = 0;
#endif
#endif

  ASSERT(p->len == p->max);
  newmax = p->max + EXPANSION_TERM;
#if 0
  newmax = (p->pmax << 1);
#endif
  tmp = realloc(p->index, newmax * sizeof(int32));
  if (tmp == NULL) {
    ERROR("realloc()");
    return FORCE_FAIL;
  }
  p->index = (int32 *) tmp;
  tmp = realloc(p->offset, newmax * sizeof(char));
  if (tmp == NULL) {
    ERROR("realloc()");
    return FORCE_FAIL;
  }
  p->offset = (char *) tmp;
  p->max = newmax;
#if 0
#ifdef DEBUG_PAIRLISTS
  nxpands++;
  printf("# pairlist expansion #%d\n", nxpands);
#endif
#endif
  return 0;
}


#if 0
static int shrink_list(Force *fobj, ForcePairlist *p)
{
  void *tmp;
  int32 newmax;

  ASSERT(p->len < (p->max >> 2) && p->len != 0);
  newmax = (p->max >> 1);
  tmp = realloc(p->index, newmax * sizeof(int32));
  if (tmp == NULL) {
    ERROR("realloc()");
    return FORCE_FAIL;
  }
  p->index = (int32 *) tmp;
  tmp = realloc(p->offset, newmax * sizeof(char));
  if (tmp == NULL) {
    ERROR("realloc()");
    return FORCE_FAIL;
  }
  p->offset = (char *) tmp;
  p->max = newmax;
  return 0;
}
#endif



int force_compute_nbpairs_isregen_pairlists(Force *fobj,
    double delta2, const MD_Dvec initpos[], const MD_Dvec pos[],
    const int32 sel[], int32 sel_len)
{
  double dis2, dx, dy, dz;
  int32 i, j;

  /* see if regeneration has been explicitly requested */
  if (fobj->need_pairlist_regen) return 1;

  /* otherwise check distance traveled to see if existing pairlist is valid */
  for (i = 0;  i < sel_len;  i++) {
    j = sel[i];
    dx = pos[j].x - initpos[j].x;
    dy = pos[j].y - initpos[j].y;
    dz = pos[j].z - initpos[j].z;
    dis2 = dx*dx + dy*dy + dz*dz;
    if (dis2 > delta2) return 1;
  }
  return 0;
}


int force_compute_nbpairs_regen_pairlists(Force *fobj,
    MD_Dvec initpos[], const MD_Dvec pos[],
    const int32 aset_sel[], int32 aset_sel_len,
    const int32 mapnb[], int32 aset_id, int32 bset_id)
{
  MD_Dvec pj, r_ij, offset;
  const double outer_cutoff2 = fobj->outer_cutoff2;
  double r2;
  MD_Dvec *wrap_save = fobj->wrap_save;
  const MD_Dvec *wrap = fobj->poswrap;
  const MD_Dvec *offset_table = fobj->offset_table;
  int32 **excl_list = fobj->excl_list;
  int32 **scaled14_list = fobj->scaled14_list;
  const int32 *next = fobj->next;
  int32 *excl;
  ForcePairlist *pairlist = fobj->pairlist;
  ForcePairlist *p;
  const ForceCell *cell = fobj->cell;
  const int32 ncells = fobj->ncells;
  int32 i, j, k, n;
  int32 nnbrs, ihead, jhead, indx, offset_index;

#ifdef OUTPUT_PAIRLIST_INFO
  printf("# building pairlists\n");
#endif

  /* clear pair lists (i.e. reset their used lengths to zero) */
  for (i = 0;  i < aset_sel_len;  i++) {
    j = aset_sel[i];
    p = &(pairlist[j]);

    /* save positions to initial reference value */
    initpos[j] = pos[j];

    /* save wrap to use until next regen */
    wrap_save[j] = wrap[j];

#if 0
    /* shrink list if too much space was initially allocated */
    if (p->len < (p->max >> 2) && p->len != 0 && shrink_list(fobj, p)) {
      ERROR("shrink_list()");
      return FORCE_FAIL;
    }
#endif
    p->len = 0;
  }

  /* loop over cells */
  for (n = 0;  n < ncells;  n++) {
    nnbrs = cell[n].nnbrs;
    jhead = cell[n].head;

    /* loop over all of this cell's neighbors */
    for (k = 0;  k < nnbrs;  k++) {
      offset_index = (int32) cell[n].offset[k];
      offset = offset_table[ offset_index ];
      ihead = cell[ cell[n].nbr[k] ].head;

      /* loop over all pairs of atoms */
      for (j = jhead;  j != -1;  j = next[j]) {

        /* find which set j belongs to */
        if (mapnb[j] & aset_id) {
          p = &(pairlist[j]);
        }
        else if (mapnb[j] & bset_id) {
          p = NULL;
        }
        else continue;  /* j doesn't belong to a set! */

        /* subtracting offset from p[j] is same as adding it to p[i] */
        pj.x = (pos[j].x + wrap[j].x) - offset.x;
        pj.y = (pos[j].y + wrap[j].y) - offset.y;
        pj.z = (pos[j].z + wrap[j].z) - offset.z;

        /* 0th neighbor cell is self referential, must modify ihead */
        if (k == 0) ihead = next[j];

        for (i = ihead;  i != -1;  i = next[i]) {

          /* find which set i belongs to */
          if (p && (mapnb[i] & bset_id)) {
            indx = i;
          }
          else if (!p && (mapnb[i] & aset_id)) {
            p = &(pairlist[i]);
            indx = j;
          }
          else continue;  /* i and j don't belong to different sets! */

          /* r_ij is vector from atom i to atom j */
          r_ij.x = pj.x - (pos[i].x + wrap[i].x);
          r_ij.y = pj.y - (pos[i].y + wrap[i].y);
          r_ij.z = pj.z - (pos[i].z + wrap[i].z);

          /* r2 is square of pairwise distance */
          r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

          /* make sure that pairwise distance is within outer cutoff */
          if (r2 >= outer_cutoff2) continue;

          /* make sure this pair is not to be excluded */
          if (excl_list) {
            for (excl = excl_list[i];  *excl < j;  excl++) ;
            if (j == *excl) continue;
          }
          if (scaled14_list) {
            for (excl = scaled14_list[i];  *excl < j;  excl++) ;
            if (j == *excl) continue;
          }

          /* extend list length if necessary */
          if (p->len == p->max && expand_list(fobj, p)) {
            ERROR("expand_list()");
            return FORCE_FAIL;
          }

          /* store the index */
          p->index[ p->len ] = indx;
          p->offset[ p->len ] = (char) offset_index;
          p->len++;

        } /* end i-loop over atoms in neighbor cell */

      } /* end j-loop over atoms in this cell */

    } /* end k-loop over cell neighbors */

  } /* end n-loop over all cells */

#ifdef DEBUG_PAIRLISTS
  {
    int32 len_used = 0, len_needed = 0;

    /* determine how much memory is used vs. needed for pairlists */
    for (i = 0;  i < aset_sel_len;  i++) {
      j = aset_sel[i];
      p = &(pairlist[j]);

      len_used += p->max;
      len_needed += p->len;

#if 0
      if (p->max - p->len >= 400) {
        printf("# atom %d: alloc'ed %d elements, using only %d elements\n",
            j, p->max, p->len);
      }
#endif
    }
    printf("# pairlist used memory: %d bytes = %g MB\n",
        len_used * 5, (len_used * 5)*1e-6);
    printf("# pairlist needed memory:  %d bytes = %g MB\n",
        len_needed * 5, (len_needed * 5)*1e-6);
  }
#endif

  fobj->need_pairlist_regen = 0;
  return 0;
}


int force_compute_nbpairs_eval_pairlists(Force *fobj, double virial[],
    double *u_elec, MD_Dvec f_elec[], double e_elec[], double e_epot[],
    int32 is_elec_gridcells, int32 elec_pair_potential,
    double *u_vdw, MD_Dvec f_vdw[], double e_vdw[],
    int32 is_vdw_gridcells, int32 vdw_pair_potential,
    const MD_Dvec pos[], const int32 sel[], int32 sel_len,
    const int32 mapnb[], int32 map_id)
{
  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij, offset;
  const double elec_const = fobj->elec_const;
  const double elec_cutoff2 = (is_elec_gridcells ? fobj->elec_cutoff2 : 0.0);
  const double vdw_cutoff2 = (is_vdw_gridcells ? fobj->vdw_cutoff2 : 0.0);
  double a, b, r2, pot_i, pot_j, q_j;
  double e_elec_sum = 0.0, e_vdw_sum = 0.0;
  double e_elec_j, e_vdw_j, e_epot_j;
  double u = 0.0, du_r = 0.0;  /* else compiler generates warning */
  MD_Dvec fsum_ij;
  double virial_upper_xx = 0.0;
  double virial_upper_xy = 0.0;
  double virial_upper_xz = 0.0;
  double virial_upper_yy = 0.0;
  double virial_upper_yz = 0.0;
  double virial_upper_zz = 0.0;
  const double inv_elec_cutoff2 = fobj->inv_elec_cutoff2;
  const double ewald_coef = fobj->param->ewald_coef;
  const double ewald_grad_coef = fobj->ewald_grad_coef;
  const double glass_grad_coef = 2.0/sqrt(M_PI);
  const double roff2 = fobj->vdw_cutoff2;
  const double ron2 = fobj->switch_dist2;
  const double inv_denom_switch = fobj->inv_denom_switch;
  const MD_Dvec *wrap = fobj->wrap_save;
  const MD_Dvec *offset_table = fobj->offset_table;
  const ForceVdwparam *vdwtable = fobj->vdwparam;
  const ForceVdwparam *entry;
  const ForceBucksafe *bucksafetable = fobj->bucksafe;
  const ForceBucksafe *bentry;
  int32 **scaled14_list = fobj->scaled14_list;
  const ForcePairlist *pairlist = fobj->pairlist;
  int32 *index_list;
  int32 index_list_len;
  char *offset_list;
  const MD_Atom *atom = fobj->param->atom;
  const int32 atomprm_len = fobj->param->atomprm_len;
  int32 *excl;
  int32 i, j, k, n;

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  /* loop over each atom's list of pairs */
  for (k = 0;  k < sel_len;  k++) {
    j = sel[k];  /* get next atom from selection array */

    /* subtracting offset from p[j] is same as adding it to p[i] */
    pj.x = (pos[j].x + wrap[j].x);
    pj.y = (pos[j].y + wrap[j].y);
    pj.z = (pos[j].z + wrap[j].z);

    /* accumulate into local storage for efficiency */
    f_elec_j.x = 0.0;
    f_elec_j.y = 0.0;
    f_elec_j.z = 0.0;
    f_vdw_j.x = 0.0;
    f_vdw_j.y = 0.0;
    f_vdw_j.z = 0.0;
    e_elec_j = 0.0;
    e_vdw_j = 0.0;
    e_epot_j = 0.0;
    q_j = atom[j].q;

    /* loop through the jth atom's list of pairwise interactions */
    index_list = pairlist[j].index;
    index_list_len = pairlist[j].len;
    offset_list = pairlist[j].offset;
    for (n = 0;  n < index_list_len;  n++) {
      i = index_list[n];
      offset = offset_table[ (int)(offset_list[n]) ];

      /* clear sum of nonbonded pairwise forces */
      fsum_ij.x = 0.0;
      fsum_ij.y = 0.0;
      fsum_ij.z = 0.0;

      /* r_ij is vector from atom i to atom j */
      r_ij.x = pj.x - (pos[i].x + wrap[i].x + offset.x);
      r_ij.y = pj.y - (pos[i].y + wrap[i].y + offset.y);
      r_ij.z = pj.z - (pos[i].z + wrap[i].z + offset.z);

      /* r2 is square of pairwise distance */
      r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

      /* deal with electrostatics if within cutoff */
      if (r2 < elec_cutoff2) {
        switch (elec_pair_potential) {
          case FORCE_ELEC_STANDARD:
            FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r,
                r2, elec_const);
            break;
          case FORCE_ELEC_SHIFTED:
            FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r,
                r2, elec_const, inv_elec_cutoff2);
            break;
          case FORCE_ELEC_EWALD:
            FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                r2, elec_const, ewald_coef, ewald_grad_coef);
            break;
          case FORCE_ELEC_GLASS:
            entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
            FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                r2, elec_const, entry->b, (glass_grad_coef * entry->b));
            break;
        }
        pot_i = u * q_j;
        pot_j = u * atom[i].q;
        u *= atom[i].q * q_j;
        du_r *= atom[i].q * q_j;
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_elec[i].x += f_ij.x;
        f_elec[i].y += f_ij.y;
        f_elec[i].z += f_ij.z;
        f_elec_j.x -= f_ij.x;
        f_elec_j.y -= f_ij.y;
        f_elec_j.z -= f_ij.z;

        /* assign electrostatic contribution to summed forces */
        fsum_ij = f_ij;

        /* equally divide interaction energy between atoms */
        e_elec[i] += 0.5 * u;
        e_elec_j += 0.5 * u;
        /* accumulate electrostatic potentials */
        e_epot[i] += pot_i;
        e_epot_j += pot_j;
        /* accumulate overall electrostatic potential energy */
        e_elec_sum += u;
      }

      /* deal with van der Waals if within cutoff */
      if (r2 < vdw_cutoff2) {
        entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

        /* get interaction parameters */
        a = entry->a;
        b = entry->b;

        switch (vdw_pair_potential) {
          case FORCE_VDW_STANDARD:
            FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
            break;
          case FORCE_VDW_SWITCHED:
            FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                roff2, ron2, inv_denom_switch);
            break;
          case FORCE_VDW_BUCK:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                entry->rmin2, b);
            CHECK_BUCK(r2, i, j, u);
            break;
          case FORCE_VDW_SWITCHBUCK:
            FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                entry->rmin2, b, roff2, ron2, inv_denom_switch);
            CHECK_BUCK(r2, i, j, u);
            break;
          case FORCE_VDW_BUCKND:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                entry->rmin2);
            break;
          case FORCE_VDW_SWITCHBUCKND:
            FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                entry->rmin2, roff2, ron2, inv_denom_switch);
            break;
          case FORCE_VDW_BUCKSAFE:
            bentry = bucksafetable
              + (atom[i].prm * atomprm_len + atom[j].prm);
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
            break;
          case FORCE_VDW_SWITCHBUCKSAFE:
            bentry = bucksafetable
              + (atom[i].prm * atomprm_len + atom[j].prm);
            FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                roff2, ron2, inv_denom_switch);
            break;
          case FORCE_VDW_GLASS:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                entry->rmin2);
            break;
        }
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_vdw[i].x += f_ij.x;
        f_vdw[i].y += f_ij.y;
        f_vdw[i].z += f_ij.z;
        f_vdw_j.x -= f_ij.x;
        f_vdw_j.y -= f_ij.y;
        f_vdw_j.z -= f_ij.z;

        /* accumulate van der Waals contribution to summed forces */
        fsum_ij.x += f_ij.x;
        fsum_ij.y += f_ij.y;
        fsum_ij.z += f_ij.z;

        /* equally divide potential between atoms */
        e_vdw[i] += 0.5 * u;
        e_vdw_j += 0.5 * u;
        e_vdw_sum += u;
      }

      /* accumulate upper triangle to virial */
      virial_upper_xx -= fsum_ij.x * r_ij.x;
      virial_upper_xy -= fsum_ij.x * r_ij.y;
      virial_upper_xz -= fsum_ij.x * r_ij.z;
      virial_upper_yy -= fsum_ij.y * r_ij.y;
      virial_upper_yz -= fsum_ij.y * r_ij.z;
      virial_upper_zz -= fsum_ij.z * r_ij.z;

    } /* end i-loop over atoms in interaction list */

    /* add accumulated force into array */
    f_elec[j].x += f_elec_j.x;
    f_elec[j].y += f_elec_j.y;
    f_elec[j].z += f_elec_j.z;
    f_vdw[j].x += f_vdw_j.x;
    f_vdw[j].y += f_vdw_j.y;
    f_vdw[j].z += f_vdw_j.z;
    e_elec[j] += e_elec_j;
    e_vdw[j] += e_vdw_j;
    e_epot[j] += e_epot_j;

  } /* end j-loop over atoms in selection array */

  /* take care of scaled 1-4 interactions */
  if (scaled14_list) {

    const double c_elec = fobj->param->scaling14 * elec_const;

    /* loop over selected atoms, process exclusions */
    for (n = 0;  n < sel_len;  n++) {
      j = sel[n];

      /* copy into local storage for efficiency */
      pj = pos[j];
      f_elec_j.x = 0.0;
      f_elec_j.y = 0.0;
      f_elec_j.z = 0.0;
      f_vdw_j.x = 0.0;
      f_vdw_j.y = 0.0;
      f_vdw_j.z = 0.0;
      e_elec_j = 0.0;
      e_vdw_j = 0.0;
      e_epot_j = 0.0;
      q_j = atom[j].q;

      for (excl = scaled14_list[j];  *excl < j;  excl++) {
        i = *excl;

        /* only subtract interactions between the two atom selection sets */
        if ((mapnb[i] & map_id) == 0) continue;

        /* clear sum of nonbonded pairwise forces */
        fsum_ij.x = 0.0;
        fsum_ij.y = 0.0;
        fsum_ij.z = 0.0;

        /* r_ij is vector from atom i to atom j */
        r_ij.x = pj.x - pos[i].x;
        r_ij.y = pj.y - pos[i].y;
        r_ij.z = pj.z - pos[i].z;

        /* r2 is square of pairwise distance */
        r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

        /* deal with electrostatics if within cutoff */
        if (r2 < elec_cutoff2) {
          switch (elec_pair_potential) {
            case FORCE_ELEC_STANDARD:
              FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r, r2, c_elec);
              break;
            case FORCE_ELEC_SHIFTED:
              FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r, r2, c_elec,
                  inv_elec_cutoff2);
              break;
            case FORCE_ELEC_EWALD:
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r, r2, c_elec,
                  ewald_coef, ewald_grad_coef);
              break;
            case FORCE_ELEC_GLASS:
              entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                  r2, elec_const, entry->b, (glass_grad_coef * entry->b));
              break;
          }
          pot_i = u * q_j;
          pot_j = u * atom[i].q;
          u *= atom[i].q * q_j;
          du_r *= atom[i].q * q_j;
          f_ij.x = du_r * r_ij.x;
          f_ij.y = du_r * r_ij.y;
          f_ij.z = du_r * r_ij.z;
          f_elec[i].x += f_ij.x;
          f_elec[i].y += f_ij.y;
          f_elec[i].z += f_ij.z;
          f_elec_j.x -= f_ij.x;
          f_elec_j.y -= f_ij.y;
          f_elec_j.z -= f_ij.z;

          /* assign electrostatic contribution to summed forces */
          fsum_ij = f_ij;

          /* equally divide potential between atoms */
          e_elec[i] += 0.5 * u;
          e_elec_j += 0.5 * u;
          /* accumulate electrostatic potentials */
          e_epot[i] += pot_i;
          e_epot_j += pot_j;
          /* accumulate overall electrostatic potential energy */
          e_elec_sum += u;
        }

        /* deal with van der Waals if within cutoff */
        if (r2 < vdw_cutoff2) {
          entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

          /* use scaled 1-4 parameters */
          a = entry->a14;
          b = entry->b14;

          switch (vdw_pair_potential) {
            case FORCE_VDW_STANDARD:
              FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
              break;
            case FORCE_VDW_SWITCHED:
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                  roff2, ron2, inv_denom_switch);
              break;
            case FORCE_VDW_BUCK:
              FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                  entry->rmin2, b);
              CHECK_BUCK(r2, i, j, u);
              break;
            case FORCE_VDW_SWITCHBUCK:
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                  entry->rmin2, b, roff2, ron2, inv_denom_switch);
              CHECK_BUCK(r2, i, j, u);
              break;
            case FORCE_VDW_BUCKND:
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                  entry->rmin2);
              break;
            case FORCE_VDW_SWITCHBUCKND:
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                  entry->rmin2, roff2, ron2, inv_denom_switch);
              break;
            case FORCE_VDW_BUCKSAFE:
              bentry = bucksafetable
                + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                  entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
              break;
            case FORCE_VDW_SWITCHBUCKSAFE:
              bentry = bucksafetable
                + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                  entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                  roff2, ron2, inv_denom_switch);
              break;
            case FORCE_VDW_GLASS:
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                  entry->rmin2);
              break;
          }
          f_ij.x = du_r * r_ij.x;
          f_ij.y = du_r * r_ij.y;
          f_ij.z = du_r * r_ij.z;
          f_vdw[i].x += f_ij.x;
          f_vdw[i].y += f_ij.y;
          f_vdw[i].z += f_ij.z;
          f_vdw_j.x -= f_ij.x;
          f_vdw_j.y -= f_ij.y;
          f_vdw_j.z -= f_ij.z;

          /* accumulate van der Waals contribution to summed forces */
          fsum_ij.x += f_ij.x;
          fsum_ij.y += f_ij.y;
          fsum_ij.z += f_ij.z;

          /* equally divide potential between atoms */
          e_vdw[i] += 0.5 * u;
          e_vdw_j += 0.5 * u;
          e_vdw_sum += u;
        }

        /* accumulate upper triangle to virial */
        virial_upper_xx -= fsum_ij.x * r_ij.x;
        virial_upper_xy -= fsum_ij.x * r_ij.y;
        virial_upper_xz -= fsum_ij.x * r_ij.z;
        virial_upper_yy -= fsum_ij.y * r_ij.y;
        virial_upper_yz -= fsum_ij.y * r_ij.z;
        virial_upper_zz -= fsum_ij.z * r_ij.z;

      } /* end loop over atoms in this exclusion list */

      /* add accumulated force into array */
      f_elec[j].x += f_elec_j.x;
      f_elec[j].y += f_elec_j.y;
      f_elec[j].z += f_elec_j.z;
      f_vdw[j].x += f_vdw_j.x;
      f_vdw[j].y += f_vdw_j.y;
      f_vdw[j].z += f_vdw_j.z;
      e_elec[j] += e_elec_j;
      e_vdw[j] += e_vdw_j;
      e_epot[j] += e_epot_j;

    } /* end j-loop over selected atoms */

  } /* end scaled 1-4 interactions */

  /* accumulate energy */
  *u_elec += e_elec_sum;
  *u_vdw += e_vdw_sum;

  /* accumulate nonbonded contribution to virial */
  virial[FORCE_VIRIAL_XX] += virial_upper_xx;
  virial[FORCE_VIRIAL_XY] += virial_upper_xy;
  virial[FORCE_VIRIAL_XZ] += virial_upper_xz;
  virial[FORCE_VIRIAL_YX] += virial_upper_xy;
  virial[FORCE_VIRIAL_YY] += virial_upper_yy;
  virial[FORCE_VIRIAL_YZ] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZX] += virial_upper_xz;
  virial[FORCE_VIRIAL_ZY] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZZ] += virial_upper_zz;

  return 0;
}



int force_compute_nbpairs_direct(Force *fobj, double virial[],
    double *u_elec, MD_Dvec f_elec[], double e_elec[], double e_epot[],
    int32 is_elec_direct, int32 elec_pair_potential,
    double *u_vdw, MD_Dvec f_vdw[], double e_vdw[],
    int32 is_vdw_direct, int32 vdw_pair_potential,
    const MD_Dvec pos[],
    const int32 aset_sel[], int32 aset_sel_len,
    const int32 bset_sel[], int32 bset_sel_len)
{
#ifdef DEBUG_VIRIAL
  double v_elec[9] = { 0.0 };
  double v_vdw[9] = { 0.0 };
#endif

  MD_Dvec f_elec_j, f_vdw_j, pj, sj;
  MD_Dvec r_ij, s_ij, f_ij;
  MD_Dvec v1, v2, v3;
  const double elec_const = fobj->elec_const;
  const double elec_cutoff2 = fobj->elec_cutoff2;
  const double vdw_cutoff2 = fobj->vdw_cutoff2;
  double a, b, r2, pot_i, pot_j, q_j;
  double e_elec_sum = 0.0, e_vdw_sum = 0.0;
  double e_elec_j, e_vdw_j, e_epot_j;
  double u = 0.0, du_r = 0.0;  /* else compiler generates warning */
  MD_Dvec fsum_ij;
  double virial_upper_xx = 0.0;
  double virial_upper_xy = 0.0;
  double virial_upper_xz = 0.0;
  double virial_upper_yy = 0.0;
  double virial_upper_yz = 0.0;
  double virial_upper_zz = 0.0;
  const double inv_elec_cutoff2 = fobj->inv_elec_cutoff2;
  const double ewald_coef = fobj->param->ewald_coef;
  const double ewald_grad_coef = fobj->ewald_grad_coef;
  const double glass_grad_coef = 2.0/sqrt(M_PI);
  const double roff2 = fobj->vdw_cutoff2;
  const double ron2 = fobj->switch_dist2;
  const double inv_denom_switch = fobj->inv_denom_switch;
  const MD_Dvec *trpos = fobj->trpos;
  const MD_Dvec *wrap = fobj->poswrap;
  const ForceVdwparam *vdwtable = fobj->vdwparam;
  const ForceVdwparam *entry;
  const ForceBucksafe *bucksafetable = fobj->bucksafe;
  const ForceBucksafe *bentry;
  int32 **excl_list = fobj->excl_list;
  int32 **scaled14_list = fobj->scaled14_list;
  int32 *excl;
  const MD_Atom *atom = fobj->param->atom;
  const int32 atomprm_len = fobj->param->atomprm_len;
  const int32 is_same_set = (aset_sel == bset_sel);
  const int32 is_x_periodic = (fobj->is_periodic & FORCE_X_PERIODIC) != 0;
  const int32 is_y_periodic = (fobj->is_periodic & FORCE_Y_PERIODIC) != 0;
  const int32 is_z_periodic = (fobj->is_periodic & FORCE_Z_PERIODIC) != 0;
  const int32 is_periodic = (is_x_periodic || is_y_periodic || is_z_periodic);
  int32 i, j, ii, jj, iistart;
  int32 is_not_excluded;

  ASSERT(pos != NULL);
  ASSERT(wrap != NULL);

  /* add/subt basis vectors for nearest image convention */
  v1 = fobj->v1;
  v2 = fobj->v2;
  v3 = fobj->v3;

  /* start index for inner loop */
  iistart = 0;

  /* loop over set A atoms */
  for (jj = 0;  jj < aset_sel_len;  jj++) {
    j = aset_sel[jj];

    /* use local storage over inner loop for efficiency */
    pj.x = pos[j].x + wrap[j].x;
    pj.y = pos[j].y + wrap[j].y;
    pj.z = pos[j].z + wrap[j].z;

    /* use scaled coords to determine nearest image for periodic boundaries */
    sj = trpos[j];

    /* accumulate into local storage for efficiency */
    f_elec_j.x = 0.0;
    f_elec_j.y = 0.0;
    f_elec_j.z = 0.0;
    f_vdw_j.x = 0.0;
    f_vdw_j.y = 0.0;
    f_vdw_j.z = 0.0;
    e_elec_j = 0.0;
    e_vdw_j = 0.0;
    e_epot_j = 0.0;
    q_j = atom[j].q;

    /* loop over set B atoms */
    if (is_same_set) {
      iistart = jj + 1;
    }
    for (ii = iistart;  ii < bset_sel_len;  ii++) {
      i = bset_sel[ii];

      /* clear sum of nonbonded pairwise forces */
      fsum_ij.x = 0.0;
      fsum_ij.y = 0.0;
      fsum_ij.z = 0.0;

      /* r_ij is vector from atom i to atom j */
      r_ij.x = pj.x - (pos[i].x + wrap[i].x);
      r_ij.y = pj.y - (pos[i].y + wrap[i].y);
      r_ij.z = pj.z - (pos[i].z + wrap[i].z);

      /* for periodic boundaries, use nearest image convention */
      if (is_periodic) {

        /* s_ij is vector from scaled coord i to scaled coord j */
        s_ij.x = sj.x - trpos[i].x;
        s_ij.y = sj.y - trpos[i].y;
        s_ij.z = sj.z - trpos[i].z;

        if (is_x_periodic) {
          if (s_ij.x > 0.5) {
            r_ij.x -= v1.x;
            r_ij.y -= v1.y;
            r_ij.z -= v1.z;
          }
          else if (s_ij.x < -0.5) {
            r_ij.x += v1.x;
            r_ij.y += v1.y;
            r_ij.z += v1.z;
          }
        }

        if (is_y_periodic) {
          if (s_ij.y > 0.5) {
            r_ij.x -= v2.x;
            r_ij.y -= v2.y;
            r_ij.z -= v2.z;
          }
          else if (s_ij.y < -0.5) {
            r_ij.x += v2.x;
            r_ij.y += v2.y;
            r_ij.z += v2.z;
          }
        }

        if (is_z_periodic) {
          if (s_ij.z > 0.5) {
            r_ij.x -= v3.x;
            r_ij.y -= v3.y;
            r_ij.z -= v3.z;
          }
          else if (s_ij.z < -0.5) {
            r_ij.x += v3.x;
            r_ij.y += v3.y;
            r_ij.z += v3.z;
          }
        }

      } /* end is periodic */

      /* r2 is square of pairwise distance */
      r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;
      ASSERT(i != j);

      /* handle very small Drude oscillator lengths */
      is_not_excluded = 1;
      if (r2 < RDRMIN2) {
        /* check to see if this pair is excluded */
        if (excl_list) {
          for (excl = excl_list[i];  *excl < j;  excl++) ;
          if (j == *excl) is_not_excluded = 0;
        }
      }

      if (is_not_excluded && is_elec_direct) {
        /* deal with electrostatics */
        switch (elec_pair_potential) {
          case FORCE_ELEC_STANDARD:
            FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r, r2, elec_const);
            break;
          case FORCE_ELEC_SHIFTED:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r,
                  r2, elec_const, inv_elec_cutoff2);
            }
            break;
          case FORCE_ELEC_EWALD:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                  r2, elec_const, ewald_coef, ewald_grad_coef);
            }
            break;
          case FORCE_ELEC_GLASS:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                  r2, elec_const, entry->b, (glass_grad_coef * entry->b));
            }
            break;
        }
        pot_i = u * q_j;
        pot_j = u * atom[i].q;
        u *= atom[i].q * q_j;
        du_r *= atom[i].q * q_j;
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_elec[i].x += f_ij.x;
        f_elec[i].y += f_ij.y;
        f_elec[i].z += f_ij.z;
        f_elec_j.x -= f_ij.x;
        f_elec_j.y -= f_ij.y;
        f_elec_j.z -= f_ij.z;

        /* assign electrostatic contribution to summed forces */
        fsum_ij = f_ij;

#ifdef DEBUG_VIRIAL
        /* accumulate upper triangle to virial */
        v_elec[0] -= f_ij.x * r_ij.x;
        v_elec[1] -= f_ij.x * r_ij.y;
        v_elec[2] -= f_ij.x * r_ij.z;
        v_elec[3] -= f_ij.y * r_ij.x;
        v_elec[4] -= f_ij.y * r_ij.y;
        v_elec[5] -= f_ij.y * r_ij.z;
        v_elec[6] -= f_ij.z * r_ij.x;
        v_elec[7] -= f_ij.z * r_ij.y;
        v_elec[8] -= f_ij.z * r_ij.z;
#endif

        /* equally divide potential between atoms */
        e_elec[i] += 0.5 * u;
        e_elec_j += 0.5 * u;
        /* accumulate electrostatic potentials */
        e_epot[i] += pot_i;
        e_epot_j += pot_j;
        /* accumulate overall electrostatic potential energy */
        e_elec_sum += u;
      } /* end is_elec_direct */

      if (is_vdw_direct) {
        /* deal with van der Waals */
        entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

        /* choose default parameters and correct later if needed */
        a = entry->a;
        b = entry->b;

        if (r2 < RMIN2) {
          /*
           * interaction could be large enough in magnitude that we
           * need to compute it correctly now to avoid roundoff error
           */

          /* check to see if this pair is excluded */
          if (excl_list) {
            for (excl = excl_list[i];  *excl < j;  excl++) ;
            if (j == *excl) {
              /* accumulate upper triangle to virial */
              virial_upper_xx -= fsum_ij.x * r_ij.x;
              virial_upper_xy -= fsum_ij.x * r_ij.y;
              virial_upper_xz -= fsum_ij.x * r_ij.z;
              virial_upper_yy -= fsum_ij.y * r_ij.y;
              virial_upper_yz -= fsum_ij.y * r_ij.z;
              virial_upper_zz -= fsum_ij.z * r_ij.z;
              continue;  /* next i-loop iteration */
            }
          }

          /* check to see if this is scaled 1-4 interaction */
          if (scaled14_list) {
            for (excl = scaled14_list[i];  *excl < j;  excl++) ;
            if (j == *excl) {
              a = entry->a14;
              b = entry->b14;
            }
          }
        }

        switch (vdw_pair_potential) {
          case FORCE_VDW_STANDARD:
            FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
            break;
          case FORCE_VDW_SWITCHED:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                  roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_BUCK:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                entry->rmin2, b);
            CHECK_BUCK(r2, i, j, u);
            break;
          case FORCE_VDW_SWITCHBUCK:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                  entry->rmin2, b, roff2, ron2, inv_denom_switch);
              CHECK_BUCK(r2, i, j, u);
            }
            break;
          case FORCE_VDW_BUCKND:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                entry->rmin2);
            break;
          case FORCE_VDW_SWITCHBUCKND:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                  entry->rmin2, roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_BUCKSAFE:
            bentry = bucksafetable
              + (atom[i].prm * atomprm_len + atom[j].prm);
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
            break;
          case FORCE_VDW_SWITCHBUCKSAFE:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              bentry = bucksafetable
                + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                  entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                  roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_GLASS:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                  entry->rmin2);
            }
            break;
        }
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_vdw[i].x += f_ij.x;
        f_vdw[i].y += f_ij.y;
        f_vdw[i].z += f_ij.z;
        f_vdw_j.x -= f_ij.x;
        f_vdw_j.y -= f_ij.y;
        f_vdw_j.z -= f_ij.z;

        /* accumulate van der Waals contribution to summed forces */
        fsum_ij.x += f_ij.x;
        fsum_ij.y += f_ij.y;
        fsum_ij.z += f_ij.z;

#ifdef DEBUG_VIRIAL
        /* accumulate upper triangle to virial */
        v_vdw[0] -= f_ij.x * r_ij.x;
        v_vdw[1] -= f_ij.x * r_ij.y;
        v_vdw[2] -= f_ij.x * r_ij.z;
        v_vdw[3] -= f_ij.y * r_ij.x;
        v_vdw[4] -= f_ij.y * r_ij.y;
        v_vdw[5] -= f_ij.y * r_ij.z;
        v_vdw[6] -= f_ij.z * r_ij.x;
        v_vdw[7] -= f_ij.z * r_ij.y;
        v_vdw[8] -= f_ij.z * r_ij.z;
#endif

        /* equally divide potential between atoms */
        e_vdw[i] += 0.5 * u;
        e_vdw_j += 0.5 * u;
        e_vdw_sum += u;
      } /* end is_vdw_direct */

      /* accumulate upper triangle to virial */
      virial_upper_xx -= fsum_ij.x * r_ij.x;
      virial_upper_xy -= fsum_ij.x * r_ij.y;
      virial_upper_xz -= fsum_ij.x * r_ij.z;
      virial_upper_yy -= fsum_ij.y * r_ij.y;
      virial_upper_yz -= fsum_ij.y * r_ij.z;
      virial_upper_zz -= fsum_ij.z * r_ij.z;

    } /* end i-loop over remaining atoms */

    /* add accumulated force into array */
    f_elec[j].x += f_elec_j.x;
    f_elec[j].y += f_elec_j.y;
    f_elec[j].z += f_elec_j.z;
    f_vdw[j].x += f_vdw_j.x;
    f_vdw[j].y += f_vdw_j.y;
    f_vdw[j].z += f_vdw_j.z;
    e_elec[j] += e_elec_j;
    e_vdw[j] += e_vdw_j;
    e_epot[j] += e_epot_j;

  } /* end j-loop over all atoms */

  /* accumulate energy */
  *u_elec += e_elec_sum;
  *u_vdw += e_vdw_sum;

  /* store nonbonded contribution to virial */
  virial[FORCE_VIRIAL_XX] += virial_upper_xx;
  virial[FORCE_VIRIAL_XY] += virial_upper_xy;
  virial[FORCE_VIRIAL_XZ] += virial_upper_xz;
  virial[FORCE_VIRIAL_YX] += virial_upper_xy;
  virial[FORCE_VIRIAL_YY] += virial_upper_yy;
  virial[FORCE_VIRIAL_YZ] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZX] += virial_upper_xz;
  virial[FORCE_VIRIAL_ZY] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZZ] += virial_upper_zz;

#ifdef DEBUG_VIRIAL
  printf("elec(direct) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_elec[0], v_elec[1], v_elec[2], v_elec[3], v_elec[4],
      v_elec[5], v_elec[6], v_elec[7], v_elec[8]);
  printf("vdw(direct) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_vdw[0], v_vdw[1], v_vdw[2], v_vdw[3], v_vdw[4],
      v_vdw[5], v_vdw[6], v_vdw[7], v_vdw[8]);
#endif

#ifdef DEBUG_VIRIAL
  printf("summed virial: %g %g %g  %g %g %g  %g %g %g\n",
      virial[0], virial[1], virial[2], virial[3], virial[4],
      virial[5], virial[6], virial[7], virial[8]);
#endif

  return 0;
}


int force_compute_nbpairs_subtexcl(Force *fobj, double virial[],
    double *u_elec, MD_Dvec f_elec[], double e_elec[], double e_epot[],
    int32 is_elec_subtexcl, int32 elec_pair_potential,
    double *u_vdw, MD_Dvec f_vdw[], double e_vdw[],
    int32 is_vdw_subtexcl, int32 vdw_pair_potential,
    const MD_Dvec *pos, const int32 sel[], int32 sel_len,
    const int32 mapnb[], int32 map_id)
{
#ifdef DEBUG_VIRIAL
  double v_elec[9] = { 0.0 };
  double v_vdw[9] = { 0.0 };
#endif

  MD_Dvec f_elec_j, f_vdw_j, pj;
  MD_Dvec r_ij, f_ij;
  const double scaling14 = fobj->param->scaling14;
  const double elec_const = fobj->elec_const;
  const double elec_cutoff2 = fobj->elec_cutoff2;
  const double vdw_cutoff2 = fobj->vdw_cutoff2;
  double a, b, c_elec, r2, pot_i, pot_j, q_j;
  double e_elec_sum = 0.0, e_vdw_sum = 0.0;
  double e_elec_j, e_vdw_j, e_epot_j;
  double u = 0.0, du_r = 0.0;  /* else compiler generates warning */
  MD_Dvec fsum_ij;
  double virial_upper_xx = 0.0;
  double virial_upper_xy = 0.0;
  double virial_upper_xz = 0.0;
  double virial_upper_yy = 0.0;
  double virial_upper_yz = 0.0;
  double virial_upper_zz = 0.0;
  const double inv_elec_cutoff2 = fobj->inv_elec_cutoff2;
  const double ewald_coef = fobj->param->ewald_coef;
  const double ewald_grad_coef = fobj->ewald_grad_coef;
  const double glass_grad_coef = 2.0/sqrt(M_PI);
  const double roff2 = fobj->vdw_cutoff2;
  const double ron2 = fobj->switch_dist2;
  const double inv_denom_switch = fobj->inv_denom_switch;
  const ForceVdwparam *vdwtable = fobj->vdwparam;
  const ForceVdwparam *entry;
  const ForceBucksafe *bucksafetable = fobj->bucksafe;
  const ForceBucksafe *bentry;
  int32 **excl_list = fobj->excl_list;
  int32 **scaled14_list = fobj->scaled14_list;
  const MD_Atom *atom = fobj->param->atom;
  const int32 atomprm_len = fobj->param->atomprm_len;
  int32 *excl;
  int32 i, j, n;
  int32 is_not_excluded;

  /* exclusion list should always exist */
  ASSERT(excl_list !=  NULL);

  /* subtract exclusions by negating constant */
  c_elec = -elec_const;

  /* loop over atoms, process exclusions */
  for (n = 0;  n < sel_len;  n++) {
    j = sel[n];

    /* copy into local storage for efficiency */
    pj = pos[j];
    f_elec_j.x = 0.0;
    f_elec_j.y = 0.0;
    f_elec_j.z = 0.0;
    f_vdw_j.x = 0.0;
    f_vdw_j.y = 0.0;
    f_vdw_j.z = 0.0;
    e_elec_j = 0.0;
    e_vdw_j = 0.0;
    e_epot_j = 0.0;
    q_j = atom[j].q;

    for (excl = excl_list[j];  *excl < j;  excl++) {
      i = *excl;

      /* only subtract interactions between the two atom selection sets */
      if ((mapnb[i] & map_id) == 0) continue;

      /* clear sum of nonbonded pairwise forces */
      fsum_ij.x = 0.0;
      fsum_ij.y = 0.0;
      fsum_ij.z = 0.0;

      /* r_ij is vector from atom i to atom j */
      r_ij.x = pj.x - pos[i].x;
      r_ij.y = pj.y - pos[i].y;
      r_ij.z = pj.z - pos[i].z;

      /* r2 is square of pairwise distance */
      r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

      /* handle very small Drude oscillator lengths */
      is_not_excluded = 1;
      if (r2 < RDRMIN2) {
        /* check to see if this pair was excluded earlier */
        if (excl_list) {
          for (excl = excl_list[i];  *excl < j;  excl++) ;
          if (j == *excl) is_not_excluded = 0;
        }
      }

      if (is_not_excluded && is_elec_subtexcl) {
        /* deal with electrostatics */
        switch (elec_pair_potential) {
          case FORCE_ELEC_STANDARD:
            FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r, r2, c_elec);
            break;
          case FORCE_ELEC_SHIFTED:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r, r2, c_elec,
                  inv_elec_cutoff2);
            }
            break;
          case FORCE_ELEC_EWALD:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r, r2, c_elec,
                  ewald_coef, ewald_grad_coef);
            }
            break;
          case FORCE_ELEC_GLASS:
            u = du_r = 0.0;
            if (r2 < elec_cutoff2) {
              entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                  r2, elec_const, entry->b, (glass_grad_coef * entry->b));
            }
            break;
        }
        pot_i = u * q_j;
        pot_j = u * atom[i].q;
        u *= atom[i].q * q_j;
        du_r *= atom[i].q * q_j;
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_elec[i].x += f_ij.x;
        f_elec[i].y += f_ij.y;
        f_elec[i].z += f_ij.z;
        f_elec_j.x -= f_ij.x;
        f_elec_j.y -= f_ij.y;
        f_elec_j.z -= f_ij.z;

        /* assign electrostatic contribution to summed forces */
        fsum_ij = f_ij;

#ifdef DEBUG_VIRIAL
        /* accumulate upper triangle to virial */
        v_elec[0] -= f_ij.x * r_ij.x;
        v_elec[1] -= f_ij.x * r_ij.y;
        v_elec[2] -= f_ij.x * r_ij.z;
        v_elec[3] -= f_ij.y * r_ij.x;
        v_elec[4] -= f_ij.y * r_ij.y;
        v_elec[5] -= f_ij.y * r_ij.z;
        v_elec[6] -= f_ij.z * r_ij.x;
        v_elec[7] -= f_ij.z * r_ij.y;
        v_elec[8] -= f_ij.z * r_ij.z;
#endif

        /* equally divide potential between atoms */
        e_elec[i] += 0.5 * u;
        e_elec_j += 0.5 * u;
        /* accumulate electrostatic potentials */
        e_epot[i] += pot_i;
        e_epot_j += pot_j;
        /* accumulate overall electrostatic potential energy */
        e_elec_sum += u;
      } /* end is_elec_subtexcl */

      if (is_vdw_subtexcl) {
        /* deal with van der Waals */
        entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

        if (r2 < RMIN2) {
          /* accumulate upper triangle to virial */
          virial_upper_xx -= fsum_ij.x * r_ij.x;
          virial_upper_xy -= fsum_ij.x * r_ij.y;
          virial_upper_xz -= fsum_ij.x * r_ij.z;
          virial_upper_yy -= fsum_ij.y * r_ij.y;
          virial_upper_yz -= fsum_ij.y * r_ij.z;
          virial_upper_zz -= fsum_ij.z * r_ij.z;
          continue;  /* next i-loop iteration */
        }

        /* subtract interaction by negating the constants */
        a = -entry->a;
        b = -entry->b;
        switch (vdw_pair_potential) {
          case FORCE_VDW_STANDARD:
            FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
            break;
          case FORCE_VDW_SWITCHED:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                  roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_BUCK:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                entry->rmin2, b);
            CHECK_BUCK(r2, i, j, u);
            break;
          case FORCE_VDW_SWITCHBUCK:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                  entry->rmin2, b, roff2, ron2, inv_denom_switch);
              CHECK_BUCK(r2, i, j, u);
            }
            break;
          case FORCE_VDW_BUCKND:
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                entry->rmin2);
            break;
          case FORCE_VDW_SWITCHBUCKND:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                  entry->rmin2, roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_BUCKSAFE:
            bentry = bucksafetable
              + (atom[i].prm * atomprm_len + atom[j].prm);
            FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
            break;
          case FORCE_VDW_SWITCHBUCKSAFE:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              bentry = bucksafetable
                + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                  entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                  roff2, ron2, inv_denom_switch);
            }
            break;
          case FORCE_VDW_GLASS:
            u = du_r = 0.0;
            if (r2 < vdw_cutoff2) {
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                  entry->rmin2);
            }
            break;
        }
        f_ij.x = du_r * r_ij.x;
        f_ij.y = du_r * r_ij.y;
        f_ij.z = du_r * r_ij.z;
        f_vdw[i].x += f_ij.x;
        f_vdw[i].y += f_ij.y;
        f_vdw[i].z += f_ij.z;
        f_vdw_j.x -= f_ij.x;
        f_vdw_j.y -= f_ij.y;
        f_vdw_j.z -= f_ij.z;

        /* accumulate van der Waals contribution to summed forces */
        fsum_ij.x += f_ij.x;
        fsum_ij.y += f_ij.y;
        fsum_ij.z += f_ij.z;

#ifdef DEBUG_VIRIAL
        /* accumulate upper triangle to virial */
        v_vdw[0] -= f_ij.x * r_ij.x;
        v_vdw[1] -= f_ij.x * r_ij.y;
        v_vdw[2] -= f_ij.x * r_ij.z;
        v_vdw[3] -= f_ij.y * r_ij.x;
        v_vdw[4] -= f_ij.y * r_ij.y;
        v_vdw[5] -= f_ij.y * r_ij.z;
        v_vdw[6] -= f_ij.z * r_ij.x;
        v_vdw[7] -= f_ij.z * r_ij.y;
        v_vdw[8] -= f_ij.z * r_ij.z;
#endif

        /* equally divide potential between atoms */
        e_vdw[i] += 0.5 * u;
        e_vdw_j += 0.5 * u;
        e_vdw_sum += u;
      } /* end is_vdw_subtexcl */

      /* accumulate upper triangle to virial */
      virial_upper_xx -= fsum_ij.x * r_ij.x;
      virial_upper_xy -= fsum_ij.x * r_ij.y;
      virial_upper_xz -= fsum_ij.x * r_ij.z;
      virial_upper_yy -= fsum_ij.y * r_ij.y;
      virial_upper_yz -= fsum_ij.y * r_ij.z;
      virial_upper_zz -= fsum_ij.z * r_ij.z;

    } /* end loop over atoms in this exclusion list */

    /* add accumulated force into array */
    f_elec[j].x += f_elec_j.x;
    f_elec[j].y += f_elec_j.y;
    f_elec[j].z += f_elec_j.z;
    f_vdw[j].x += f_vdw_j.x;
    f_vdw[j].y += f_vdw_j.y;
    f_vdw[j].z += f_vdw_j.z;
    e_elec[j] += e_elec_j;
    e_vdw[j] += e_vdw_j;
    e_epot[j] += e_epot_j;

  } /* end j-loop over all atoms */

  if (scaled14_list) {
    /* subtract previously added contribution and add scaled contribution */
    c_elec = (scaling14 - 1.0) * elec_const;

    /* loop over atoms, process exclusions */
    for (n = 0;  n < sel_len;  n++) {
      j = sel[n];

      /* copy into local storage for efficiency */
      pj = pos[j];
      f_elec_j.x = 0.0;
      f_elec_j.y = 0.0;
      f_elec_j.z = 0.0;
      f_vdw_j.x = 0.0;
      f_vdw_j.y = 0.0;
      f_vdw_j.z = 0.0;
      e_elec_j = 0.0;
      e_vdw_j = 0.0;
      e_epot_j = 0.0;
      q_j = atom[j].q;

      for (excl = scaled14_list[j];  *excl < j;  excl++) {
        i = *excl;

        /* only subtract interactions between the two atom selection sets */
        if ((mapnb[i] & map_id) == 0) continue;

        /* clear sum of nonbonded pairwise forces */
        fsum_ij.x = 0.0;
        fsum_ij.y = 0.0;
        fsum_ij.z = 0.0;

        /* r_ij is vector from atom i to atom j */
        r_ij.x = pj.x - pos[i].x;
        r_ij.y = pj.y - pos[i].y;
        r_ij.z = pj.z - pos[i].z;

        /* r2 is square of pairwise distance */
        r2 = r_ij.x * r_ij.x + r_ij.y * r_ij.y + r_ij.z * r_ij.z;

        if (is_elec_subtexcl) {
          /* deal with electrostatics */
          switch (elec_pair_potential) {
            case FORCE_ELEC_STANDARD:
              FORCE_COMPUTE_NBPAIRS_ELEC_STANDARD(&u, &du_r, r2, c_elec);
              break;
            case FORCE_ELEC_SHIFTED:
              u = du_r = 0.0;
              if (r2 < elec_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_ELEC_SHIFTED(&u, &du_r, r2, c_elec,
                    inv_elec_cutoff2);
              }
              break;
            case FORCE_ELEC_EWALD:
              u = du_r = 0.0;
              if (r2 < elec_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r, r2, c_elec,
                    ewald_coef, ewald_grad_coef);
              }
              break;
            case FORCE_ELEC_GLASS:
              u = du_r = 0.0;
              if (r2 < elec_cutoff2) {
                entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);
                FORCE_COMPUTE_NBPAIRS_ELEC_EWALD(&u, &du_r,
                    r2, elec_const, entry->b, (glass_grad_coef * entry->b));
              }
              break;
          }
          pot_i = u * q_j;
          pot_j = u * atom[i].q;
          u *= atom[i].q * q_j;
          du_r *= atom[i].q * q_j;
          f_ij.x = du_r * r_ij.x;
          f_ij.y = du_r * r_ij.y;
          f_ij.z = du_r * r_ij.z;
          f_elec[i].x += f_ij.x;
          f_elec[i].y += f_ij.y;
          f_elec[i].z += f_ij.z;
          f_elec_j.x -= f_ij.x;
          f_elec_j.y -= f_ij.y;
          f_elec_j.z -= f_ij.z;

          /* assign electrostatic contribution to summed forces */
          fsum_ij = f_ij;

#ifdef DEBUG_VIRIAL
          /* accumulate upper triangle to virial */
          v_elec[0] -= f_ij.x * r_ij.x;
          v_elec[1] -= f_ij.x * r_ij.y;
          v_elec[2] -= f_ij.x * r_ij.z;
          v_elec[3] -= f_ij.y * r_ij.x;
          v_elec[4] -= f_ij.y * r_ij.y;
          v_elec[5] -= f_ij.y * r_ij.z;
          v_elec[6] -= f_ij.z * r_ij.x;
          v_elec[7] -= f_ij.z * r_ij.y;
          v_elec[8] -= f_ij.z * r_ij.z;
#endif

          /* equally divide potential between atoms */
          e_elec[i] += 0.5 * u;
          e_elec_j += 0.5 * u;
          /* accumulate electrostatic potentials */
          e_epot[i] += pot_i;
          e_epot_j += pot_j;
          /* accumulate overall electrostatic potential energy */
          e_elec_sum += u;
        } /* end is_elec_subtexcl */

        if (is_vdw_subtexcl) {
          /* deal with van der Waals */
          entry = vdwtable + (atom[i].prm * atomprm_len + atom[j].prm);

          if (r2 < RMIN2)
          {
            /* accumulate upper triangle to virial */
            virial_upper_xx -= fsum_ij.x * r_ij.x;
            virial_upper_xy -= fsum_ij.x * r_ij.y;
            virial_upper_xz -= fsum_ij.x * r_ij.z;
            virial_upper_yy -= fsum_ij.y * r_ij.y;
            virial_upper_yz -= fsum_ij.y * r_ij.z;
            virial_upper_zz -= fsum_ij.z * r_ij.z;
            continue;  /* next "i-loop" iteration */
          }
          else {
            /* want to subtract out previously added term and add scaled one */
            a = entry->a14 - entry->a;
            b = entry->b14 - entry->b;
          }

          switch (vdw_pair_potential) {
            case FORCE_VDW_STANDARD:
              FORCE_COMPUTE_NBPAIRS_VDW_STANDARD(&u, &du_r, r2, a, b);
              break;
            case FORCE_VDW_SWITCHED:
              u = du_r = 0.0;
              if (r2 < vdw_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHED(&u, &du_r, r2, a, b,
                    roff2, ron2, inv_denom_switch);
              }
              break;
            case FORCE_VDW_BUCK:
              FORCE_COMPUTE_NBPAIRS_VDW_BUCK(&u, &du_r, r2, a,
                  entry->rmin2, b);
              CHECK_BUCK(r2, i, j, u);
              break;
            case FORCE_VDW_SWITCHBUCK:
              u = du_r = 0.0;
              if (r2 < vdw_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCK(&u, &du_r, r2, a,
                    entry->rmin2, b, roff2, ron2, inv_denom_switch);
                CHECK_BUCK(r2, i, j, u);
              }
              break;
            case FORCE_VDW_BUCKND:
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                  entry->rmin2);
              break;
            case FORCE_VDW_SWITCHBUCKND:
              u = du_r = 0.0;
              if (r2 < vdw_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKND(&u, &du_r, r2, a,
                    entry->rmin2, roff2, ron2, inv_denom_switch);
              }
              break;
            case FORCE_VDW_BUCKSAFE:
              bentry = bucksafetable
                + (atom[i].prm * atomprm_len + atom[j].prm);
              FORCE_COMPUTE_NBPAIRS_VDW_BUCKSAFE(&u, &du_r, r2, a,
                  entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b);
              break;
            case FORCE_VDW_SWITCHBUCKSAFE:
              u = du_r = 0.0;
              if (r2 < vdw_cutoff2) {
                bentry = bucksafetable
                  + (atom[i].prm * atomprm_len + atom[j].prm);
                FORCE_COMPUTE_NBPAIRS_VDW_SWITCHBUCKSAFE(&u, &du_r, r2, a,
                    entry->rmin2, b, bentry->rinner2, bentry->a, bentry->b,
                    roff2, ron2, inv_denom_switch);
              }
              break;
            case FORCE_VDW_GLASS:
              u = du_r = 0.0;
              if (r2 < vdw_cutoff2) {
                FORCE_COMPUTE_NBPAIRS_VDW_BUCKND(&u, &du_r, r2, a,
                    entry->rmin2);
              }
              break;
          }
          f_ij.x = du_r * r_ij.x;
          f_ij.y = du_r * r_ij.y;
          f_ij.z = du_r * r_ij.z;
          f_vdw[i].x += f_ij.x;
          f_vdw[i].y += f_ij.y;
          f_vdw[i].z += f_ij.z;
          f_vdw_j.x -= f_ij.x;
          f_vdw_j.y -= f_ij.y;
          f_vdw_j.z -= f_ij.z;

          /* accumulate van der Waals contribution to summed forces */
          fsum_ij.x += f_ij.x;
          fsum_ij.y += f_ij.y;
          fsum_ij.z += f_ij.z;

#ifdef DEBUG_VIRIAL
          /* accumulate upper triangle to virial */
          v_vdw[0] -= f_ij.x * r_ij.x;
          v_vdw[1] -= f_ij.x * r_ij.y;
          v_vdw[2] -= f_ij.x * r_ij.z;
          v_vdw[3] -= f_ij.y * r_ij.x;
          v_vdw[4] -= f_ij.y * r_ij.y;
          v_vdw[5] -= f_ij.y * r_ij.z;
          v_vdw[6] -= f_ij.z * r_ij.x;
          v_vdw[7] -= f_ij.z * r_ij.y;
          v_vdw[8] -= f_ij.z * r_ij.z;
#endif

          /* equally divide potential between atoms */
          e_vdw[i] += 0.5 * u;
          e_vdw_j += 0.5 * u;
          e_vdw_sum += u;
        } /* end is_vdw_subtexcl */

        /* accumulate upper triangle to virial */
        virial_upper_xx -= fsum_ij.x * r_ij.x;
        virial_upper_xy -= fsum_ij.x * r_ij.y;
        virial_upper_xz -= fsum_ij.x * r_ij.z;
        virial_upper_yy -= fsum_ij.y * r_ij.y;
        virial_upper_yz -= fsum_ij.y * r_ij.z;
        virial_upper_zz -= fsum_ij.z * r_ij.z;

      } /* end loop over atoms in this exclusion list */

      /* add accumulated force into array */
      f_elec[j].x += f_elec_j.x;
      f_elec[j].y += f_elec_j.y;
      f_elec[j].z += f_elec_j.z;
      f_vdw[j].x += f_vdw_j.x;
      f_vdw[j].y += f_vdw_j.y;
      f_vdw[j].z += f_vdw_j.z;
      e_elec[j] += e_elec_j;
      e_vdw[j] += e_vdw_j;
      e_epot[j] += e_epot_j;

    } /* end j-loop over all atoms */

  } /* end subtracting scaled 1-4 interactions */

  /* accumulate energy */
  *u_elec += e_elec_sum;
  *u_vdw += e_vdw_sum;

  /* store nonbonded contribution to virial */
  virial[FORCE_VIRIAL_XX] += virial_upper_xx;
  virial[FORCE_VIRIAL_XY] += virial_upper_xy;
  virial[FORCE_VIRIAL_XZ] += virial_upper_xz;
  virial[FORCE_VIRIAL_YX] += virial_upper_xy;
  virial[FORCE_VIRIAL_YY] += virial_upper_yy;
  virial[FORCE_VIRIAL_YZ] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZX] += virial_upper_xz;
  virial[FORCE_VIRIAL_ZY] += virial_upper_yz;
  virial[FORCE_VIRIAL_ZZ] += virial_upper_zz;

#ifdef DEBUG_VIRIAL
  printf("elec(excl) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_elec[0], v_elec[1], v_elec[2], v_elec[3], v_elec[4],
      v_elec[5], v_elec[6], v_elec[7], v_elec[8]);
  printf("vdw(excl) virial: %g %g %g  %g %g %g  %g %g %g\n",
      v_vdw[0], v_vdw[1], v_vdw[2], v_vdw[3], v_vdw[4],
      v_vdw[5], v_vdw[6], v_vdw[7], v_vdw[8]);
#endif

#ifdef DEBUG_VIRIAL
  printf("summed virial: %g %g %g  %g %g %g  %g %g %g\n",
      virial[0], virial[1], virial[2], virial[3], virial[4],
      virial[5], virial[6], virial[7], virial[8]);
#endif

  return 0;
}
