/*
 * Copyright (C) 2004-2006 by David J. Hardy.  All rights reserved.
 *
 * nbsetup.c
 *
 * Setup data structures for nonbonded pairwise force computation.
 */

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "force/intdefn.h"
#include "debug/debug.h"

#define OUTPUT_GRIDCELL_INFO

#define DEBUG_VDWPARAMS
#undef DEBUG_VDWPARAMS

/*
 * force_setup_gridcells()
 *
 * Determine grid cell sizes and dimensions of grid cell array.
 * A grid cell lattice is created with subcells that contain
 * at least cubic volume mincellsize^3.
 *
 * For now, grid cell hashing handles fixed 1-away neighbors,
 * so mincellsize must be at least as large as the cutoff.
 */

/* helper functions for grid cell hashing */
static int alloc_gridcells(Force *, int32 nucel, int32 nvcel, int32 nwcel);
static int setup_gridcell_nbrlists(Force *f, int32 shell);

void force_cleanup_gridcells(Force *f)
{
  free(f->next);
  free(f->cell);
}

int force_setup_gridcells(Force *f, double mincellsize, int32 shell)
{
  const MD_Dvec u = f->v1;   /* domain basis vectors */
  const MD_Dvec v = f->v2;
  const MD_Dvec w = f->v3;

  double inv_mincellsize;    /* reciprocal of mincellsize */

  double pulen, pvlen, pwlen;  /* lengths of orthogonal projection */

  MD_Dvec ug, vg, wg;  /* u,v,w possibly lengthened for grid cell hashing */
  double scale;        /* scaling factor for lengthened vectors */
  MD_Dvec diff;        /* difference between lower corners */

  int32 nucells, nvcells, nwcells;  /* number of cells in each direction */

  ASSERT(mincellsize > 0.0);

  /* prepare for a cutoff computation */
  inv_mincellsize = 1.0 / mincellsize;

  /* need to allocate "next" array for cursor link list */
  f->next = (int32 *) malloc(f->param->atom_len * sizeof(int32));
  if (f->next == NULL) return FORCE_FAIL;

  if (f->is_orthogonal) {

    /* if orthogonal, length of projection is length of basis vector */
    pulen = f->lv1;
    pvlen = f->lv2;
    pwlen = f->lv3;

  } /* end if orthogonal domain */

  else {  /* else nonorthogonal domain */

    /* for computing cross products and orthogonal projections */
    MD_Dvec vcw, wcu, ucv;  /* cross products, e.g. "vcw" = v cross w */
    MD_Dvec pu, pv, pw;     /* orthogonal projections of domain basis */
    double m;

    /* compute v cross w */
    vcw.x = v.y * w.z - v.z * w.y;
    vcw.y = v.z * w.x - v.x * w.z;
    vcw.z = v.x * w.y - v.y * w.x;

    /* compute w cross u */
    wcu.x = w.y * u.z - w.z * u.y;
    wcu.y = w.z * u.x - w.x * u.z;
    wcu.z = w.x * u.y - w.y * u.x;

    /* compute u cross v */
    ucv.x = u.y * v.z - u.z * v.y;
    ucv.y = u.z * v.x - u.x * v.z;
    ucv.z = u.x * v.y - u.y * v.x;

    /* compute orthogonal projection of u onto v cross w */
    m = (u.x*vcw.x + u.y*vcw.y + u.z*vcw.z) /
      (vcw.x*vcw.x + vcw.y*vcw.y + vcw.z*vcw.z);
    pu.x = m*vcw.x;
    pu.y = m*vcw.y;
    pu.z = m*vcw.z;
    pulen = sqrt(pu.x*pu.x + pu.y*pu.y + pu.z*pu.z);

    /* compute orthogonal projection of v onto w cross u */
    m = (v.x*wcu.x + v.y*wcu.y + v.z*wcu.z) /
      (wcu.x*wcu.x + wcu.y*wcu.y + wcu.z*wcu.z);
    pv.x = m*wcu.x;
    pv.y = m*wcu.y;
    pv.z = m*wcu.z;
    pvlen = sqrt(pv.x*pv.x + pv.y*pv.y + pv.z*pv.z);

    /* compute orthogonal projection of w onto u cross v */
    m = (w.x*ucv.x + w.y*ucv.y + w.z*ucv.z) /
      (ucv.x*ucv.x + ucv.y*ucv.y + ucv.z*ucv.z);
    pw.x = m*ucv.x;
    pw.y = m*ucv.y;
    pw.z = m*ucv.z;
    pwlen = sqrt(pw.x*pw.x + pw.y*pw.y + pw.z*pw.z);

  } /* end else nonorthogonal domain */

  /* determine number of grid cells along x-direction */
  if (f->is_periodic & FORCE_X_PERIODIC) {
    nucells = (int32) floor(pulen * inv_mincellsize);
    f->ucellsize = f->lv1 / (double) nucells;
    f->inv_ucellsize = 1.0 / f->ucellsize;
    ug = u;
  }
  else {
    nucells = (int32) ceil(pulen * inv_mincellsize);
    f->ucellsize = (f->lv1 / pulen) * mincellsize;
    f->inv_ucellsize = 1.0 / f->ucellsize;
    scale = (nucells * f->ucellsize) / f->lv1;
    ug.x = scale * u.x;
    ug.y = scale * u.y;
    ug.z = scale * u.z;
  }

  /* determine number of grid cells along y-direction */
  if (f->is_periodic & FORCE_Y_PERIODIC) {
    nvcells = (int32) floor(pvlen * inv_mincellsize);
    f->vcellsize = f->lv2 / (double) nvcells;
    f->inv_vcellsize = 1.0 / f->vcellsize;
    vg = v;
  }
  else {
    nvcells = (int32) ceil(pvlen * inv_mincellsize);
    f->vcellsize = (f->lv2 / pvlen) * mincellsize;
    f->inv_vcellsize = 1.0 / f->vcellsize;
    scale = (nvcells * f->vcellsize) / f->lv2;
    vg.x = scale * v.x;
    vg.y = scale * v.y;
    vg.z = scale * v.z;
  }

  /* determine number of grid cells along z-direction */
  if (f->is_periodic & FORCE_Z_PERIODIC) {
    nwcells = (int32) floor(pwlen * inv_mincellsize);
    f->wcellsize = f->lv3 / (double) nwcells;
    f->inv_wcellsize = 1.0 / f->wcellsize;
    wg = w;
  }
  else {
    nwcells = (int32) ceil(pwlen * inv_mincellsize);
    f->wcellsize = (f->lv3 / pwlen) * mincellsize;
    f->inv_wcellsize = 1.0 / f->wcellsize;
    scale = (nwcells * f->wcellsize) / f->lv3;
    wg.x = scale * w.x;
    wg.y = scale * w.y;
    wg.z = scale * w.z;
  }

  /* find lower corner of grid cells in real space */
  f->lowcell.x = f->center.x - 0.5 * (ug.x + vg.x + wg.x);
  f->lowcell.y = f->center.y - 0.5 * (ug.y + vg.y + wg.y);
  f->lowcell.z = f->center.z - 0.5 * (ug.z + vg.z + wg.z);

  /* compute delta displacement of transformed lower corner from origin */
  diff.x = f->lowerc.x - f->lowcell.x;
  diff.y = f->lowerc.y - f->lowcell.y;
  diff.z = f->lowerc.z - f->lowcell.z;
  if (f->is_orthogonal) {
    f->delta.x = f->ta1.x * diff.x;
    f->delta.y = f->ta2.y * diff.y;
    f->delta.z = f->ta3.z * diff.z;
  }
  else {
    f->delta.x = f->ta1.x * diff.x + f->ta1.y * diff.y + f->ta1.z * diff.z;
    f->delta.y = f->ta2.x * diff.x + f->ta2.y * diff.y + f->ta2.z * diff.z;
    f->delta.z = f->ta3.x * diff.x + f->ta3.y * diff.y + f->ta3.z * diff.z;
  }

  /* compute grid cell hashing scale factor for transformed coordinates */
  f->gscale.x = nucells / (1.0 + 2.0*f->delta.x);
  f->gscale.y = nvcells / (1.0 + 2.0*f->delta.y);
  f->gscale.z = nwcells / (1.0 + 2.0*f->delta.z);

  /* (re)allocate grid cell array (if needed) */
  if (alloc_gridcells(f, nucells, nvcells, nwcells)) {
    ERROR("alloc_gridcells()");
    return FORCE_FAIL;
  }

  /* determine list of neighbors for each cell */
  if (setup_gridcell_nbrlists(f, shell)) {
    ERROR("setup_gridcell_nbrlists()");
    return FORCE_FAIL;
  }

#ifdef OUTPUT_GRIDCELL_INFO
  printf("# grid cells:  %d (%s) by %d (%s) by %d (%s)\n",
      f->nucells,
      (f->is_periodic & FORCE_X_PERIODIC) ? "periodic" : "nonperiodic",
      f->nvcells,
      (f->is_periodic & FORCE_Y_PERIODIC) ? "periodic" : "nonperiodic",
      f->nwcells,
      (f->is_periodic & FORCE_Z_PERIODIC) ? "periodic" : "nonperiodic");
  printf("# overall density is %g atoms per grid cell\n",
      f->param->atom_len / (double) f->ncells);
#endif

  return 0;
}


int alloc_gridcells(Force *f, int32 nucells, int32 nvcells, int32 nwcells)
{
  int32 ncells;

  /* reset number of cells, resize grid cell allocation if needed */
  ncells = nucells * nvcells * nwcells;
  if (ncells == 0) {
    ERROR("attempt grid cell allocation with zero dimension");
    return FORCE_FAIL;  /* domain is too small for mincellsize */
  }
  else if (ncells > f->maxcells) {
    void *tmp;
    int32 maxcells = (f->maxcells > 0 ? f->maxcells : 3);
    do {
      maxcells *= 2;
    } while (maxcells < ncells);
    tmp = realloc(f->cell, maxcells * sizeof(ForceCell));
    if (tmp == NULL) {
      ERROR("realloc()");
      return FORCE_FAIL;
    }
    f->cell = (ForceCell *) tmp;
    f->maxcells = maxcells;
  }
  else if (ncells * 64 <= f->maxcells) {
    void *tmp;
    int32 maxcells = f->maxcells;
    do {
      maxcells /= 2;
    } while (ncells * 64 <= maxcells);
    tmp = realloc(f->cell, maxcells * sizeof(ForceCell));
    if (tmp == NULL) {
      ERROR("realloc()");
      return FORCE_FAIL;
    }
    f->cell = (ForceCell *) tmp;
    f->maxcells = maxcells;
  }
  f->ncells = ncells;
  f->nucells = nucells;
  f->nvcells = nvcells;
  f->nwcells = nwcells;

  return 0;
}


int setup_gridcell_nbrlists(Force *f, int32 shell)
{
  int32 u_offset;
  int32 v_offset;
  int32 w_offset;
  int32 offset;
  ForceCell *cell = f->cell;
  const int32 ncells = f->ncells;
  const int32 nucells = f->nucells;
  const int32 nvcells = f->nvcells;
  const int32 nwcells = f->nwcells;
  const int32 is_u_periodic = (f->is_periodic & FORCE_X_PERIODIC) != 0;
  const int32 is_v_periodic = (f->is_periodic & FORCE_Y_PERIODIC) != 0;
  const int32 is_w_periodic = (f->is_periodic & FORCE_Z_PERIODIC) != 0;
  int32 i, j, k, ii, jj, kk, in, jn, kn, n, nn;

  /* dimension of grid cell array and allocation should be done */
  ASSERT(nucells > 0);
  ASSERT(nvcells > 0);
  ASSERT(nwcells > 0);
  ASSERT(ncells == nucells * nvcells * nwcells);
  ASSERT(cell != NULL);
  ASSERT(shell >= 1 && shell <= FORCE_SHELLMAX);

  /* clear cell memory */
  memset(cell, 0, ncells * sizeof(ForceCell));

  /* loop through all cells */
  for (k = 0;  k < nwcells;  k++) {
    for (j = 0;  j < nvcells;  j++) {
      for (i = 0;  i < nucells;  i++) {

        /* index of this cell */
        n = (k * nvcells + j) * nucells + i;
        ASSERT(n >= 0 && n < ncells);

        /* loop through neighbors of this cell */
        for (kn = 0;  kn <= shell;  kn++) {
          kk = k + kn;
          w_offset = 0;
          if (kk >= nwcells) {
            if (is_w_periodic) {
              kk -= nwcells;
              w_offset = -1;
            }
            else continue;
          }

          for (jn = (kn == 0 ? 0 : -shell);  jn <= shell;  jn++) {
            jj = j + jn;
            v_offset = 0;
            if (jj >= nvcells) {
              if (is_v_periodic) {
                jj -= nvcells;
                v_offset = -1;
              }
              else continue;
            }
            else if (jj < 0) {
              if (is_v_periodic) {
                jj += nvcells;
                v_offset = 1;
              }
              else continue;
            }

            for (in = (kn == 0 && jn == 0 ? 0 : -shell);  in <= shell;  in++) {
              ii = i + in;
              u_offset = 0;
              if (ii >= nucells) {
                if (is_u_periodic) {
                  ii -= nucells;
                  u_offset = -1;
                }
                else continue;
              }
              else if (ii < 0) {
                if (is_u_periodic) {
                  ii += nucells;
                  u_offset = 1;
                }
                else continue;
              }

              /* determine index into offset table for this neighbor cell */
              offset = OFFSET_INDEX(u_offset, v_offset, w_offset);

              /* index of neighbor cell */
              nn = (kk * nvcells + jj) * nucells + ii;
              ASSERT(nn >= 0 && nn < ncells);

              /* store neighbor index and offset */
              cell[n].nbr[ cell[n].nnbrs ] = nn;
              cell[n].offset[ cell[n].nnbrs ] = (char) offset;
              cell[n].nnbrs++;

            } /* end in-loop */
          } /* end jn-loop */
        } /* end kn-loop */

      } /* end loop over cells */
    }
  }

  return 0;
}


int force_setup_pairlists(Force *f, double a, const int32 *sel, int32 sel_len)
{
  const int32 natoms = f->param->atom_len;
  const double density = natoms / f->volume;

  /* compute estimated length of interaction lists */
  const int32 estlen = (int32) floor((2./3) * M_PI * a * a * a * density);

  int32 i, j;

  printf("# number of atoms: %d\n", natoms);
  printf("# estimated interaction list length: %d\n", estlen);
  printf("# need (%d x %d x %d = %d) bytes for index lists\n",
      natoms, estlen, (int)sizeof(int32), natoms*estlen*(int)sizeof(int32));
  printf("# need (%d x %d x %d = %d) bytes for offset lists\n",
      natoms, estlen, (int)sizeof(char), natoms*estlen*(int)sizeof(char));

  f->pos_init = (MD_Dvec *) calloc(natoms, sizeof(MD_Dvec));
  if (f->pos_init == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  f->wrap_save = (MD_Dvec *) calloc(natoms, sizeof(MD_Dvec));
  if (f->wrap_save == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  f->pairlist = (ForcePairlist *) calloc(natoms, sizeof(ForcePairlist));
  if (f->pairlist == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  for (i = 0;  i < sel_len;  i++) {
    j = sel[i];
    f->pairlist[j].index = (int32 *) calloc(estlen, sizeof(int32));
    if (f->pairlist[j].index == NULL) {
      ERROR("calloc()");
      return FORCE_FAIL;
    }
    f->pairlist[j].offset = (char *) calloc(estlen, sizeof(char));
    if (f->pairlist[j].offset == NULL) {
      ERROR("calloc()");
      return FORCE_FAIL;
    }
    f->pairlist[j].max = estlen;
  }

  /* need to explicitly regenerate pairlists */
  f->need_pairlist_regen = 1;

  return 0;
}


void force_cleanup_pairlists(Force *f)
{
  const int32 natoms = f->param->atom_len;
  int32 i;

  if (f->pairlist) {
    for (i = 0;  i < natoms;  i++) {
      free(f->pairlist[i].index);
      free(f->pairlist[i].offset);
    }
  }
  free(f->pairlist);
  free(f->wrap_save);
  free(f->pos_init);
}


/*
 * force_setup_vdwparams()
 *
 * build the van der Waals parameter table
 *
 * table is a "square" symmetric matrix, dimension (natomprms * natomprms)
 * each entry of matrix contains ForceVdwparam element of parameter values
 * matrix is indexed by atom "types" (0..natomprms-1)
 *
 * vdwparam is stored as one-dimensional array of ForceVdwparam, where
 * index for (i,j) atom pair interaction is:  (i * natomprms + j)
 */

void force_cleanup_vdwparams(Force *f)
{
  free(f->vdwparam);
  free(f->bucksafe);
}

int force_setup_vdwparams(Force *f)
{
  MD_AtomPrm *atomprm = f->param->atomprm;
  const int32 natomprms = f->param->atomprm_len;
  MD_NbfixPrm *nbfixprm = f->param->nbfixprm;
  const int32 nnbfixprms = f->param->nbfixprm_len;
  ForceVdwparam *vdwparam, *ij_entry, *ji_entry;
  double neg_emin, rmin, rmin2, neg_emin14, rmin14, rmin2_14;
  int32 i, j, k;

  ASSERT(natomprms > 0);
  vdwparam = (ForceVdwparam *)
    malloc(natomprms * natomprms * sizeof(ForceVdwparam));
  if (vdwparam == NULL) {
    ERROR("malloc()");
    return FORCE_FAIL;
  }

  /* compute each table entry given separate i and j atom params */
  for (i = 0;  i < natomprms;  i++) {
    for (j = i;  j < natomprms;  j++) {
      ij_entry = vdwparam + (i * natomprms + j);
      ji_entry = vdwparam + (j * natomprms + i);

      /* compute vdw A and B coefficients for atom type ij interaction */
      neg_emin = sqrt(atomprm[i].emin * atomprm[j].emin);
      rmin = 0.5 * (atomprm[i].rmin + atomprm[j].rmin);
      neg_emin14 = sqrt(atomprm[i].emin14 * atomprm[j].emin14);
      rmin14 = 0.5 * (atomprm[i].rmin14 + atomprm[j].rmin14);

      /* raise rmin and rmin14 to 6th power */
      rmin2 = rmin * rmin;
      rmin *= rmin2;
      rmin *= rmin;
      rmin2_14 = rmin14 * rmin14;
      rmin14 *= rmin2_14;
      rmin14 *= rmin14;

      /* set ij entry and its transpose */
      ij_entry->a     = ji_entry->a     = neg_emin * rmin * rmin;
      ij_entry->b     = ji_entry->b     = 2.0 * neg_emin * rmin;
      ij_entry->a14   = ji_entry->a14   = neg_emin14 * rmin14 * rmin14;
      ij_entry->b14   = ji_entry->b14   = 2.0 * neg_emin14 * rmin14;
      /* store larger of the two possible VDW radius values */
      ij_entry->rmin2 = ji_entry->rmin2 = (rmin2_14 > rmin2 ? rmin2_14 : rmin2);
    }
  }

  /* now go back and update entries for nbfix params */
  for (k = 0;  k < nnbfixprms;  k++) {
    i = nbfixprm[k].prm[0];
    j = nbfixprm[k].prm[1];

    ij_entry = vdwparam + (i * natomprms + j);
    ji_entry = vdwparam + (j * natomprms + i);

    /* compute vdw A and B coefficients for this fixed type interaction */
    neg_emin = -nbfixprm[k].emin;
    rmin = nbfixprm[k].rmin;
    neg_emin14 = -nbfixprm[k].emin14;
    rmin14 = nbfixprm[k].rmin14;

    /* raise rmin and rmin14 to 6th power */
    rmin2 = rmin * rmin;
    rmin *= rmin2;
    rmin *= rmin;
    rmin2_14 = rmin14 * rmin14;
    rmin14 *= rmin2_14;
    rmin14 *= rmin14;

    /* set ij entry and its transpose */
    ij_entry->a     = ji_entry->a     = neg_emin * rmin * rmin;
    ij_entry->b     = ji_entry->b     = 2.0 * neg_emin * rmin;
    ij_entry->a14   = ji_entry->a14   = neg_emin14 * rmin14 * rmin14;
    ij_entry->b14   = ji_entry->b14   = 2.0 * neg_emin14 * rmin14;
    /* store larger of the two possible VDW radius values */
    ij_entry->rmin2 = ji_entry->rmin2 = (rmin2_14 > rmin2 ? rmin2_14 : rmin2);
  }

  /*
   * Buckingham potential for silica
   *
   * modify table for si-si, si-o, and o-o interactions,
   * sets others to zero (and prints warning message)
   *
   * see Flikkema & Bromley, Chem Phys Lett 378 (2003) 622-629,
   * FB parameter values taken from this reference
   *
   * for Coulombic potential you can use charges q_si = 2.4 e, q_o = -1.2 e
   */
  if (FORCE_MASK_VDW_BUCK & f->param->vdwopts) {
    double a_si_o, a_o_o, a_si_si;
    double b_si_o, b_o_o, b_si_si;
    double c_si_o, c_o_o, c_si_si;

    printf("# setting Buckingham parameterization\n");
    switch (FORCE_MASK_VDW_BUCKPRM & f->param->vdwopts) {
      case FORCE_VDW_BUCKPRM_BKS:
        printf("#   using BKS parameterization for silica\n");
        a_si_o  = 415020.541915277;  /* kcal/mol */
        a_o_o   = 32011.8325889865;  /* kcal/mol */
        a_si_si = 0.0;               /* kcal/mol */
        b_si_o  = 0.205205506984223; /* Ang */
        b_o_o   = 0.362323482844775; /* Ang */
        b_si_si = 0.333333333333333; /* Ang */
        c_si_o  = 3078.35790120536;  /* Ang^6 kcal/mol */
        c_o_o   = 4034.18491523635;  /* Ang^6 kcal/mol */
        c_si_si = 0.0;               /* Ang^6 kcal/mol */
        break;
      case FORCE_VDW_BUCKPRM_TTAM:
        printf("#   using TTAM parameterization for silica\n");
        a_si_o  = 247244.172408168;  /* kcal/mol */
        a_o_o   = 40514.4105526705;  /* kcal/mol */
        a_si_si = 20117023442.1621;  /* kcal/mol */
        b_si_o  = 0.20851;           /* Ang */
        b_o_o   = 0.35132;           /* Ang */
        b_si_si = 0.06570;           /* Ang */
        c_si_o  = 1631.17660000000;  /* Ang^6 kcal/mol */
        c_o_o   = 4951.93690000000;  /* Ang^6 kcal/mol */
        c_si_si = 537.312400000000;  /* Ang^6 kcal/mol */
        break;
      case FORCE_VDW_BUCKPRM_FB:
        printf("#   using Flikkema-Bromley parameterization for silica\n");
        a_si_o  = 241079.522480676;  /* kcal/mol */
        a_o_o   = 32939.8108424280;  /* kcal/mol */
        a_si_si = 1833361.49791679;  /* kcal/mol */
        b_si_o  = 0.208;             /* Ang */
        b_o_o   = 0.358;             /* Ang */
        b_si_si = 0.201;             /* Ang */
        c_si_o  = 1453.89773928600;  /* Ang^6 kcal/mol */
        c_o_o   = 954.106699212000;  /* Ang^6 kcal/mol */
        c_si_si = 10302.9871676400;  /* Ang^6 kcal/mol */
        break;
      case FORCE_VDW_GLASSPRM:
        /*
         * parameters from A.E. Kohler, S.H. Garofalini,
         * "Effect of Composition on the Penetration of Inert Gases
         * Adsorbed onto Silicate Glass Surfaces",
         * Langmuir, Vol. 10, No. 12, 1994, pp. 4664-4669
         *
         * use with elecopts=FORCE_ELEC_GLASS,
         * vdwopts=(FORCE_VDW_GLASS|FORCE_VDW_GLASSPRM)
         *
         * has potential form:  a exp(-r/b) + (q_i q_j) erfc(c r)
         */
        printf("#   using 2-body glass potential\n");
        a_si_o  = 42632.8503211759;  /* kcal/mol */
        a_o_o   = 10435.1169759799;  /* kcal/mol */
        a_si_si = 27016.1580191922;  /* kcal/mol */
        b_si_o  = 0.29;              /* Ang */
        b_o_o   = 0.29;              /* Ang */
        b_si_si = 0.29;              /* Ang */
        c_si_o  = (1./2.34);         /* 1/Ang */
        c_o_o   = (1./2.34);         /* 1/Ang */
        c_si_si = (1./2.30);         /* 1/Ang */
        break;
      default:
        ERROR("no parameterization indicated for silica");
        return FORCE_FAIL;
    }

    /* constructing the table */
    for (i = 0;  i < natomprms;  i++) {
      for (j = i;  j < natomprms;  j++) {
        const int is_i_si = (strcasecmp(atomprm[i].type, "si") == 0);
        const int is_i_o  = (strcasecmp(atomprm[i].type, "o")  == 0);
        const int is_j_si = (strcasecmp(atomprm[j].type, "si") == 0);
        const int is_j_o  = (strcasecmp(atomprm[j].type, "o")  == 0);

        ij_entry = vdwparam + (i * natomprms + j);
        ji_entry = vdwparam + (j * natomprms + i);

        if ((is_i_si || is_i_o) && (is_j_si || is_j_o)) {
          if (is_i_si && is_j_si) {
            ij_entry->a     = ji_entry->a     = a_si_si;
            ij_entry->a14   = ji_entry->a14   = a_si_si;
            ij_entry->rmin2 = ji_entry->rmin2 = b_si_si;
            ij_entry->b     = ji_entry->b     = c_si_si;
            ij_entry->b14   = ji_entry->b14   = c_si_si;
          }
          else if (is_i_o && is_j_o) {
            ij_entry->a     = ji_entry->a     = a_o_o;
            ij_entry->a14   = ji_entry->a14   = a_o_o;
            ij_entry->rmin2 = ji_entry->rmin2 = b_o_o;
            ij_entry->b     = ji_entry->b     = c_o_o;
            ij_entry->b14   = ji_entry->b14   = c_o_o;
          }
          else {
            ij_entry->a     = ji_entry->a     = a_si_o;
            ij_entry->a14   = ji_entry->a14   = a_si_o;
            ij_entry->rmin2 = ji_entry->rmin2 = b_si_o;
            ij_entry->b     = ji_entry->b     = c_si_o;
            ij_entry->b14   = ji_entry->b14   = c_si_o;
          }
        }
        else {
          printf("# WARNING:  no Buckingham parameterization for "
              "%s-%s, setting to zero\n", atomprm[i].type, atomprm[j].type);
          ij_entry->a     = ji_entry->a     = 0.0;
          ij_entry->a14   = ji_entry->a14   = 0.0;
          ij_entry->rmin2 = ji_entry->rmin2 = 1.0;
          ij_entry->b     = ji_entry->b     = 0.0;
          ij_entry->b14   = ji_entry->b14   = 0.0;
        }
      }
    }

    /*
     * Use safe Buckingham potential for energy minimization to
     * remove the unrealistic negative energy well when atoms are too close.
     */
    if ((FORCE_VDW_BUCKSAFE | FORCE_VDW_SWITCHBUCKSAFE) & f->param->vdwopts) {
      ForceBucksafe *bucksafe;
      ForceBucksafe *ij_entry, *ji_entry;

#if 0
      double r2_si_o, a_si_o, b_si_o;
      double r2_o_o, a_o_o, b_o_o;
      double r2_si_si, a_si_si, b_si_si;

      /*
       * Constants for inner switched potential a/r^6+b smoothly fitted
       * to join the change in curvature of Buckingham (where D^2 U(r) = 0).
       * Determined to 25 decimal places of accuracy using Mathematica.
       *
       * Note that fitting to 1/r^6 is done for computational efficiency.
       * Use of the extra parameter provides continuously differentiable join.
       */
      printf("# setting parameters for Buckingham minimization\n");
      switch (FORCE_MASK_VDW_BUCKPRM & f->param->vdwopts) {
        case FORCE_VDW_BUCKPRM_BKS:
          printf("#   (not yet determined for BKS)\n");
          r2_si_o = 0;  /* Ang^2 */
          a_si_o = 0;   /* Ang^6 kcal/mol */
          b_si_o = 0;   /* kcal/mol */
          r2_o_o = 0;   /* Ang^2 */
          a_o_o = 0;    /* Ang^6 kcal/mol */
          b_o_o = 0;    /* kcal/mol */
          r2_si_si = 0; /* Ang^2 */
          a_si_si = 0;  /* Ang^6 kcal/mol */
          b_si_si = 0;  /* kcal/mol */
          break;
        case FORCE_VDW_BUCKPRM_TTAM:
          printf("#   (not yet determined for TTAM)\n");
          r2_si_o = 0;  /* Ang^2 */
          a_si_o = 0;   /* Ang^6 kcal/mol */
          b_si_o = 0;   /* kcal/mol */
          r2_o_o = 0;   /* Ang^2 */
          a_o_o = 0;    /* Ang^6 kcal/mol */
          b_o_o = 0;    /* kcal/mol */
          r2_si_si = 0; /* Ang^2 */
          a_si_si = 0;  /* Ang^6 kcal/mol */
          b_si_si = 0;  /* kcal/mol */
          break;
        case FORCE_VDW_BUCKPRM_FB:
          printf("#   using Flikkema-Bromley parameterization for silica\n");
          r2_si_o  = 1.2211101829078113003;  /* Ang^2 */
          a_si_o   = 461.75952045687460022;  /* Ang^6 kcal/mol */
          b_si_o   = 136.11149010133529893;  /* kcal/mol */
          r2_o_o   = 1.4618768486982901883;  /* Ang^2 */
          a_o_o    = 1023.4211927964766715;  /* Ang^6 kcal/mol */
          b_o_o    = 491.54466747401262419;  /* kcal/mol */
          r2_si_si = 1.2677851866669356429;  /* Ang^2 */
          a_si_si  = 2571.6328424942832093;  /* Ang^6 kcal/mol */
          b_si_si  = 449.14173796245215111;  /* kcal/mol */
          break;
        default:
          ERROR("no parameterization indicated for silica");
          return FORCE_FAIL;
      }
#endif

      /* Buckingham constants:  a exp(-r/b) - c/r^6  */
      double a, b, c;

      /* constants for switched extension:  a/r^6 + b  */
      double as, bs;    /* the "a" and "b" for above */
      double rs;        /* switching distance (an inner cutoff) */

      /* reported for diagnostic purposes */
      double urs;       /* U(rs) - energy at switch */
      double rmax;      /* distance at barrier height, Umax = U(rmax) */
      double urmax;     /* U(rmax) - energy at barrier height */

      bucksafe = (ForceBucksafe *)
        calloc(natomprms * natomprms, sizeof(ForceBucksafe));
      if (NULL == bucksafe) {
        ERROR("calloc()");
        return FORCE_FAIL;
      }

      for (i = 0;  i < natomprms;  i++) {
        for (j = i;  j < natomprms;  j++) {
          const int is_i_si = (strcasecmp(atomprm[i].type, "si") == 0);
          const int is_i_o  = (strcasecmp(atomprm[i].type, "o")  == 0);
          const int is_j_si = (strcasecmp(atomprm[j].type, "si") == 0);
          const int is_j_o  = (strcasecmp(atomprm[j].type, "o")  == 0);

          ij_entry = bucksafe + (i * natomprms + j);
          ji_entry = bucksafe + (j * natomprms + i);

          /* retrieve Buckingham parameters */
          a = vdwparam[i * natomprms + j].a;
          b = vdwparam[i * natomprms + j].rmin2;
          c = vdwparam[i * natomprms + j].b;

          if ((is_i_si || is_i_o) && (is_j_si || is_j_o)) {
            if (force_safe_buckingham_params(&as, &bs, &rs, &urs,
                  &rmax, &urmax, a, b, c)) {
              ERROR("force_safe_buckingham_params()");
              return FORCE_FAIL;
            }
            printf("# safe Buckingham extension for %s-%s interaction:\n",
                atomprm[i].type, atomprm[j].type);
            printf("#   A=%.12g  B=%.12g\n", as, bs);
            printf("# switch to extension:\n"
	       	"#   R_switch=%.12g Ang  U(R_switch)=%.12g kcal/mol\n",rs,urs);
            printf("# Buckingham energy barrier:\n"
                "#   R_max=%.12g Ang  U(R_max)=%.12g kcal/mol\n", rmax, urmax);
            ij_entry->a       = ji_entry->a       = as;
            ij_entry->b       = ji_entry->b       = bs;
            ij_entry->rinner2 = ji_entry->rinner2 = rs * rs;
          }
          else {
            printf("# WARNING:  no Buckingham parameterization for "
                "%s-%s, setting to zero\n", atomprm[i].type, atomprm[j].type);
            ij_entry->a       = ji_entry->a       = 0.0;
            ij_entry->b       = ji_entry->b       = 0.0;
            ij_entry->rinner2 = ji_entry->rinner2 = 0.0;
          }

        }
      }

      f->bucksafe = bucksafe;

    } /* end BUCKSAFE parameterization */

  } /* end if Buckingham */

  f->vdwparam = vdwparam;

/* #define DEBUG_VDWPARAMS */
#ifdef DEBUG_VDWPARAMS
  printf("# VDW params table:  (%d x %d = %d) entries\n",
      natomprms, natomprms, natomprms*natomprms);
  printf("# requires %d bytes per entry, %d bytes for full table\n",
      (int)sizeof(ForceVdwparam),
      natomprms*natomprms*(int)sizeof(ForceVdwparam));
  printf("# only %d unique table entries\n", natomprms*(natomprms+1)/2);

  /*
  printf("# showing rmin2 entries:\n");
  printf("# showing A entries:\n");
  printf("# showing B entries:\n");
  */
  /*
  printf("#");
  for (i = 0;  i < natomprms;  i++) {
    printf(" %8s", atomprm[i].type);
  }
  printf("\n");
  */
  for (j = 0;  j < natomprms;  j++) {
    /* printf("#"); */
    for (i = 0;  i < natomprms;  i++) {
      ji_entry = vdwparam + (j * natomprms + i);
      printf("# [%2d,%2d]: a=%g b=%g a14=%g b14=%g rmin2=%g\n",
          i, j, ji_entry->a, ji_entry->b, ji_entry->a14, ji_entry->b14,
          ji_entry->rmin2);
      /*
      printf(" %8g", ji_entry->rmin2);
      printf(" %8g", ji_entry->a);
      printf(" %8g", ji_entry->b);
      */
    }
    /* printf("\n"); */
  }
#endif

  return 0;
}


/*
 * force_setup_exclusions()
 *
 * build the exclusion lists
 *
 * lists are built from MD_Excl and MD_Bond
 *
 * algorithm (using set notation):
 *   exclx[i] = { j : there is an explicit exclusion (i,j) }
 *   excl12[i] = { j : there is a bond (i,j) }
 *   excl13[i] = (excl12[i] U ( U_{ j \in excl12[i] } excl12[j] )) \ {i}
 *   excl14[i] = (excl13[i] U ( U_{ j \in excl13[i] } excl12[j] )) \ {i}
 *   scaled14[i] = excl14[i] \ excl13[i]
 *
 *   excl_list[i] = exclx[i],              if policy is FORCE_EXCL_NONE
 *                = exclx[i] U excl12[i],  if policy is FORCE_EXCL_12
 *                = exclx[i] U excl13[i],  if policy is FORCE_EXCL_13
 *                = exclx[i] U excl14[i],  if policy is FORCE_EXCL_14
 *
 *   excl_list[i] = exclx[i] U excl13[i]
 *     AND
 *   scaled14_list[i] = scaled14[i],       if policy is FORCE_EXCL_SCAL14
 *
 * allocate little extra memory
 * implement by merging sorted exclusion lists
 * each atom's exclusion array is terminated by MD_INT32_MAX sentinel
 */

/* prototypes of internal functions */
static void cleanup(Force *f);
static void sort(int32 *list, int32 len);
static int32 merge(int32 *dest, const int32 *src1, const int32 *src2, int32 n);

void force_cleanup_exclusions(Force *f)
{
  const int32 natoms = f->param->atom_len;
  int32 k;

  cleanup(f);
  if (f->excl_list) {
    for (k = 0;  k < natoms;  k++)  free(f->excl_list[k]);
    free(f->excl_list);
    f->excl_list = NULL;
  }
  if (f->scaled14_list) {
    for (k = 0;  k < natoms;  k++)  free(f->scaled14_list[k]);
    free(f->scaled14_list);
    f->scaled14_list = NULL;
  }
}

int force_setup_exclusions(Force *f)
{
  const MD_Excl *excl = f->param->excl;
  const MD_Bond *bond = f->param->bond;
  const int32 natoms = f->param->atom_len;
  const int32 nexcls = f->param->excl_len;
  const int32 nbonds = f->param->bond_len;
  const int32 excl_policy = f->param->exclpolicy;
  int32 *list;
  int32 len, i, j, k, ii, jj, kk, atom1, atom2;
  int32 size;     /* allocated length of accum and dest arrays */
  int32 maxsize;  /* largest length needed to be held by accum or dest */
  int32 accumlen; /* used length of accum (accumlen <= maxsize <= size) */

  /* initialize */
  f->excl_list = NULL;
  f->scaled14_list = NULL;

  /* error if there are no atoms */
  if (natoms == 0) {
    ERROR("there are 0 atoms");
    return FORCE_FAIL;
  }

  /* allocate memory for explicit exclusions list */
  f->exclx = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->exclx == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }
  f->lenx = (int32 *) calloc(natoms, sizeof(int32));
  if (f->lenx == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  /* count number of explicit exclusions for each atom */
  for (i = 0;  i < nexcls;  i++) {
    if (excl[i].atom[0] != excl[i].atom[1]) {
      f->lenx[ excl[i].atom[0] ]++;
      f->lenx[ excl[i].atom[1] ]++;
    }
  }

  /* allocate memory for each row of exclx, leave space for sentinel */
  for (i = 0;  i < natoms;  i++) {
    f->exclx[i] = (int32 *) malloc((f->lenx[i] + 1) * sizeof(int32));
    if (f->exclx[i] == NULL) {
      ERROR("malloc()");
      return FORCE_FAIL;
    }
    f->lenx[i] = 0;  /* zero this to be length counter */
  }

  /* loop over explicit exclusions to fill in the rows of exclx */
  for (i = 0;  i < nexcls;  i++) {
    atom1 = excl[i].atom[0];
    atom2 = excl[i].atom[1];
    if (atom1 != atom2) {
      f->exclx[atom1][ f->lenx[atom1]++ ] = atom2;
      f->exclx[atom2][ f->lenx[atom2]++ ] = atom1;
    }
  }

  /* place sentinel at end of each row */
  for (i = 0;  i < natoms;  i++) {
    f->exclx[i][ f->lenx[i] ] = MD_INT32_MAX;
  }

  /* sort each exclx row */
  for (i = 0;  i < natoms;  i++) {
    sort(f->exclx[i], f->lenx[i]);
  }

  /* if we're doing no bond exclusions, we're done */
  if (excl_policy == FORCE_EXCL_NONE) {
    f->excl_list = f->exclx;
    f->exclx = NULL;
    cleanup(f);
    return 0;
  }

  /* allocate memory for 1-2 exclusions list */
  f->excl12 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl12 == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }
  f->len12 = (int32 *) calloc(natoms, sizeof(int32));
  if (f->len12 == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  /* find the length of each row of excl12 */
  for (i = 0;  i < nbonds;  i++) {
    f->len12[ bond[i].atom[0] ]++;
    f->len12[ bond[i].atom[1] ]++;
  }

  /* allocate memory for each row of excl12 */
  /* leave space for explicit exclusion list and sentinel */
  /* also determine maxsize */
  maxsize = 0;
  for (i = 0;  i < natoms;  i++) {
    len = f->len12[i] + f->lenx[i] + 1;
    if (maxsize < len) maxsize = len;
    f->excl12[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl12[i] == NULL) {
      ERROR("malloc()");
      return FORCE_FAIL;
    }
    f->len12[i] = 0;   /* zero this to be length counter */
  }

  /* loop over bonds to fill in the rows of excl12 */
  for (i = 0;  i < nbonds;  i++) {
    atom1 = bond[i].atom[0];
    atom2 = bond[i].atom[1];
    f->excl12[atom1][ f->len12[atom1]++ ] = atom2;
    f->excl12[atom2][ f->len12[atom2]++ ] = atom1;
  }

  /* place sentinel at end of each row */
  for (i = 0;  i < natoms;  i++) {
    f->excl12[i][ f->len12[i] ] = MD_INT32_MAX;
  }

  /* sort each excl12 row */
  for (i = 0;  i < natoms;  i++) {
    sort(f->excl12[i], f->len12[i]);
  }

  /* initialize accum and dest arrays for merge and swap */
  size = 10;
  while (size < maxsize)  size *= 2;
  f->accum = (int32 *) malloc(size * sizeof(int32));
  if (f->accum == NULL) {
    ERROR("malloc()");
    return FORCE_FAIL;
  }
  f->dest = (int32 *) malloc(size * sizeof(int32));
  if (f->dest == NULL) {
    ERROR("malloc()");
    return FORCE_FAIL;
  }

  /* if we're excluding only 1-2 interactions, we're done */
  if (excl_policy == FORCE_EXCL_12) {
    /* merge each excl12 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl12[i], i);
      memcpy(f->excl12[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl12;
    f->excl12 = NULL;
    cleanup(f);
    return 0;
  }

  /* allocate memory for 1-3 exclusions list */
  f->excl13 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl13 == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }
  f->len13 = (int32 *) malloc(natoms * sizeof(int32));
  if (f->len13 == NULL) {
    ERROR("malloc()");
    return FORCE_FAIL;
  }

  /* merge the excl12 lists into excl13 lists */
  for (i = 0;  i < natoms;  i++) {
    memcpy(f->accum, f->excl12[i], (f->len12[i] + 1) * sizeof(int32));
    accumlen = f->len12[i];
    for (j = 0;  f->excl12[i][j] < MD_INT32_MAX;  j++) {
      k = f->excl12[i][j];
      if (k == i) continue;
      maxsize = accumlen + f->len12[k];
      if (size <= maxsize + f->lenx[i]) {
        do { size *= 2; } while (size <= maxsize + f->lenx[i]);
        list = (int32 *) realloc(f->accum, size * sizeof(int32));
        if (list == NULL) {
          ERROR("realloc()");
          return FORCE_FAIL;
        }
        f->accum = list;
        list = (int32 *) realloc(f->dest, size * sizeof(int32));
        if (list == NULL) {
          ERROR("realloc()");
          return FORCE_FAIL;
        }
        f->dest = list;
      }
      accumlen = merge(f->dest, f->accum, f->excl12[k], i);
      list = f->accum;
      f->accum = f->dest;
      f->dest = list;
    }
    len = accumlen + f->lenx[i] + 1;
    f->excl13[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl13[i] == NULL) {
      ERROR("malloc()");
      return FORCE_FAIL;
    }
    memcpy(f->excl13[i], f->accum, (accumlen + 1) * sizeof(int32));
    f->len13[i] = accumlen;
  }

  /* if we're excluding 1-2 and 1-3 interactions, we're done */
  if (excl_policy == FORCE_EXCL_13) {
    /* merge each excl13 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl13[i], i);
      memcpy(f->excl13[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl13;
    f->excl13 = NULL;
    cleanup(f);
    return 0;
  }

  /* allocate memory for 1-4 exclusions list */
  f->excl14 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl14 == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  /* merge the excl13 lists into excl14 lists */
  for (i = 0;  i < natoms;  i++) {
    memcpy(f->accum, f->excl13[i], (f->len13[i] + 1) * sizeof(int32));
    accumlen = f->len13[i];
    for (j = 0;  f->excl13[i][j] < MD_INT32_MAX;  j++) {
      k = f->excl13[i][j];
      if (k == i) continue;
      maxsize = accumlen + f->len12[k];
      if (size <= maxsize + f->lenx[i]) {
        do { size *= 2; } while (size <= maxsize + f->lenx[i]);
        list = (int32 *) realloc(f->accum, size * sizeof(int32));
        if (list == NULL) {
          ERROR("realloc()");
          return FORCE_FAIL;
        }
        f->accum = list;
        list = (int32 *) realloc(f->dest, size * sizeof(int32));
        if (list == NULL) {
          ERROR("realloc()");
          return FORCE_FAIL;
        }
        f->dest = list;
      }
      accumlen = merge(f->dest, f->accum, f->excl12[k], i);
      list = f->accum;
      f->accum = f->dest;
      f->dest = list;
    }
    len = accumlen + f->lenx[i] + 1;
    f->excl14[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl14[i] == NULL) {
      ERROR("malloc()");
      return FORCE_FAIL;
    }
    memcpy(f->excl14[i], f->accum, (accumlen + 1) * sizeof(int32));
  }

  /* if we're excluding 1-2, 1-3, and 1-4 interactions, we're done */
  if (excl_policy == FORCE_EXCL_14) {
    /* merge each excl14 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl14[i], i);
      memcpy(f->excl14[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl14;
    f->excl14 = NULL;
    cleanup(f);
    return 0;
  }

  /* must have scaled1-4 exclusion policy */
  ASSERT(excl_policy == FORCE_EXCL_SCAL14);

  /* allocate memory for scaled 1-4 list */
  f->scaled14 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->scaled14 == NULL) {
    ERROR("calloc()");
    return FORCE_FAIL;
  }

  /* scaled14_list includes everything in excl14 that is not in excl13 */
  for (i = 0;  i < natoms;  i++) {
    ii = jj = kk = 0;
    while (f->excl14[i][ii] < MD_INT32_MAX) {
      if (f->excl14[i][ii] != f->excl13[i][jj]) {
        f->dest[kk++] = f->excl14[i][ii++];
      }
      else {
        ii++;
        jj++;
      }
    }
    f->dest[kk] = MD_INT32_MAX;
    f->scaled14[i] = (int32 *) malloc((kk + 1) * sizeof(int32));
    if (f->scaled14[i] == NULL) {
      ERROR("malloc()");
      return FORCE_FAIL;
    }
    memcpy(f->scaled14[i], f->dest, (kk + 1) * sizeof(int32));
  }

  /* set pointers to the lists we need to keep */
  /* merge each excl13 row with exclx row */
  for (i = 0;  i < natoms;  i++) {
    len = merge(f->dest, f->exclx[i], f->excl13[i], i);
    memcpy(f->excl13[i], f->dest, (len + 1) * sizeof(int32));
  }
  f->excl_list = f->excl13;
  f->excl13 = NULL;
  f->scaled14_list = f->scaled14;
  f->scaled14 = NULL;
  cleanup(f);
  return 0;
}



/*
 * reclaim extra memory allocated from setting up exclusions data structures
 */
void cleanup(Force *f)
{
  const int32 natoms = f->param->atom_len;
  int32 k;

#if 0
  printf("scaled14 list:\n");
  for (k = 0;  k < natoms;  k++) {
    int32 *n;
    printf("%2d:", k);
    for (n = f->scaled14_list[k];  *n < natoms;  n++) {
      printf(" %d", *n);
    }
    printf(" %d\n", *n);
  }
#endif

  if (f->exclx != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->exclx[k]);
    free(f->exclx);
    f->exclx = NULL;
  }
  if (f->excl12 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl12[k]);
    free(f->excl12);
    f->excl12 = NULL;
  }
  if (f->excl13 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl13[k]);
    free(f->excl13);
    f->excl13 = NULL;
  }
  if (f->excl14 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl14[k]);
    free(f->excl14);
    f->excl14 = NULL;
  }
  if (f->scaled14 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->scaled14[k]);
    free(f->scaled14);
    f->scaled14 = NULL;
  }
  free(f->lenx);
  f->lenx = NULL;
  free(f->len12);
  f->len12 = NULL;
  free(f->len13);
  f->len13 = NULL;
  free(f->accum);
  f->accum = NULL;
  free(f->dest);
  f->dest = NULL;
}


/*
 * sort an array of integers
 * (use insertion sort because it is optimal for short arrays)
 *
 * assume MD_INT32_MAX sentinel is at end of array
 */
void sort(int32 *list, int32 len)
{
  int32 i, j, tmp;

  for (i = len - 2;  i >= 0;  i--) {
    tmp = list[i];
    j = i;
    while (tmp > list[j+1]) {
      list[j] = list[j+1];
      j++;
    }
    list[j] = tmp;
  }
}


/*
 * merge two sorted source arrays into a destination array,
 * keeping destination sorted and deleting duplicate entries
 * and excluding n from being merged (used for the self entry)
 *
 * assume destination array has enough space
 * assume each source array is terminated by sentinel MD_INT32_MAX
 * add terminating sentinel MD_INT32_MAX to destination array
 *
 * return length of destination (not including sentinel)
 */
int32 merge(int32 *dest, const int32 *src1, const int32 *src2, int32 n)
{
  int32 i = 0, j = 0, k = 0;

  while (src1[i] < MD_INT32_MAX || src2[j] < MD_INT32_MAX) {
    if      (src1[i] == n) i++;
    else if (src2[j] == n) j++;
    else if (src1[i] < src2[j]) dest[k++] = src1[i++];
    else if (src1[i] > src2[j]) dest[k++] = src2[j++];
    else    { dest[k++] = src1[i++];  j++; }
  }
  dest[k] = MD_INT32_MAX;
  return k;
}
