/*
 * Copyright (C) 2004-2005 by David J. Hardy.  All rights reserved.
 *
 * setupnb.c
 *
 * setup nonbonded data structures
 */

#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "force/force.h"
#include "debug/debug.h"


/* index offsets into van der Waals parameter table */
enum {
  A = 0,
  B = 1,
  A_14 = 2,
  B_14 = 3
};


/* prototypes of internal functions */
static int build_vdwtable(Force *f);
static int build_exclusions(Force *f);
static void build_exclusions_cleanup(Force *f);
static void sort(int32 *list, int32 len);
static int32 merge(int32 *dest, const int32 *src1, const int32 *src2, int32 n);


int force_setup_nonbonded(Force *f)
{
  const MD_Dvec center = f->param->center;
  const double cutoff = f->param->cutoff;
  const double elec_cutoff = f->param->elec_cutoff;
  const double vdw_cutoff = f->param->vdw_cutoff;
  const double switchdist = f->param->switchdist;
  const double xlen = f->param->xlen;
  const double ylen = f->param->ylen;
  const double zlen = f->param->zlen;
  const int32 flags = f->param->flags;

  if (flags & FORCE_ELEC) {
    ASSERT(f->param->dielectric >= 1.0);
    f->elec_const = f->param->elec_const / f->param->dielectric;
    ASSERT(elec_cutoff > 0.0
        || (flags & FORCE_ELEC_DIRECT) || (flags & FORCE_ELEC_EXCL));
    f->elec_cutoff2 = elec_cutoff * elec_cutoff;
    if (flags & FORCE_SMOOTH) {
      ASSERT(elec_cutoff > 0.0);
      f->inv_elec_cutoff2 = 1.0 / f->elec_cutoff2;
    }
  }

  if (flags & FORCE_VDW) {
    ASSERT(vdw_cutoff > 0.0
        || (flags & FORCE_VDW_DIRECT) || (flags & FORCE_VDW_EXCL));
    f->vdw_cutoff2 = vdw_cutoff * vdw_cutoff;
    if (flags & FORCE_SWITCH) {
      ASSERT(switchdist > 0.0);
      ASSERT(switchdist < vdw_cutoff);
      f->switchdist2 = switchdist * switchdist;
      f->inv_denom_switch = 1.0 /
        ((f->vdw_cutoff2 - f->switchdist2) *
         (f->vdw_cutoff2 - f->switchdist2) *
         (f->vdw_cutoff2 - f->switchdist2));
    }
    if (build_vdwtable(f)) return FORCE_FAIL;
  }

  f->is_xresize = (xlen == 0.0);
  f->is_yresize = (ylen == 0.0);
  f->is_zresize = (zlen == 0.0);
  f->is_resize = (f->is_xresize || f->is_yresize || f->is_zresize);

  ASSERT(cutoff > 0.0
      || (flags & FORCE_DIRECT) == FORCE_DIRECT
      || (flags & FORCE_EXCL) == FORCE_EXCL
      || ((flags & FORCE_ELEC_DIRECT) && (flags & FORCE_VDW_EXCL))
      || ((flags & FORCE_VDW_DIRECT) && (flags & FORCE_ELEC_EXCL))
      || ((flags & FORCE_ELEC_DIRECT) && (flags & FORCE_VDW) == 0)
      || ((flags & FORCE_ELEC_EXCL) && (flags & FORCE_VDW) == 0)
      || ((flags & FORCE_VDW_DIRECT) && (flags & FORCE_ELEC) == 0)
      || ((flags & FORCE_VDW_EXCL) && (flags & FORCE_ELEC) == 0));
  ASSERT(cutoff >= elec_cutoff);
  ASSERT(cutoff >= vdw_cutoff);

  if (cutoff > 0.0) {
    /* prepare for cutoff computation */
    f->inv_cutoff = 1.0 / cutoff;

    if (flags & FORCE_X_PERIODIC) {
      ASSERT(cutoff <= xlen);
      f->nxcells = (int32) (xlen * f->inv_cutoff);
      if (f->nxcells == 0) f->nxcells = 1;
      f->inv_xcellsize = (double) f->nxcells / xlen;
      f->lo.x = center.x - 0.5 * xlen;
    }
    else {
      f->inv_xcellsize = f->inv_cutoff;
      if ( ! f->is_xresize) {
        f->nxcells = ((int32) (xlen * f->inv_cutoff)) + 1;
        ASSERT(f->nxcells > 0);
        f->lo.x = center.x - 0.5 * (f->nxcells * cutoff);
      }
    }

    if (flags & FORCE_Y_PERIODIC) {
      ASSERT(cutoff <= ylen);
      f->nycells = (int32) (ylen * f->inv_cutoff);
      if (f->nycells == 0) f->nycells = 1;
      f->inv_ycellsize = (double) f->nycells / ylen;
      f->lo.y = center.y - 0.5 * ylen;
    }
    else {
      f->inv_ycellsize = f->inv_cutoff;
      if ( ! f->is_yresize) {
        f->nycells = ((int32) (ylen * f->inv_cutoff)) + 1;
        ASSERT(f->nycells > 0);
        f->lo.y = center.y - 0.5 * (f->nycells * cutoff);
      }
    }

    if (flags & FORCE_Z_PERIODIC) {
      ASSERT(cutoff <= zlen);
      f->nzcells = (int32) (zlen * f->inv_cutoff);
      if (f->nzcells == 0) f->nzcells = 1;
      f->inv_zcellsize = (double) f->nzcells / zlen;
      f->lo.z = center.z - 0.5 * zlen;
    }
    else {
      f->inv_zcellsize = f->inv_cutoff;
      if ( ! f->is_zresize) {
        f->nzcells = ((int32) (zlen * f->inv_cutoff)) + 1;
        ASSERT(f->nzcells > 0);
        f->lo.z = center.z - 0.5 * (f->nzcells * cutoff);
      }
    }

    f->ncells = f->nxcells * f->nycells * f->nzcells;
    if (f->ncells > 0) {
      f->cell = (ForceCell *) malloc(f->ncells * sizeof(ForceCell));
      if (f->cell == NULL) return FORCE_FAIL;
      if (force_setup_nonbonded_cells(f)) return FORCE_FAIL;
    }

    f->next = (int32 *) malloc(f->param->atom_len * sizeof(int32));
    if (f->next == NULL) return FORCE_FAIL;
  }

  if (build_exclusions(f)) return FORCE_FAIL;
  ASSERT(f->excl_list != NULL);
  ASSERT(!(flags & FORCE_EXCL_SCAL14) || f->scaled14_list != NULL);

  return 0;
}


int force_setup_nonbonded_cells(Force *f)
{
  MD_Dvec offset;
  const double xlen = f->param->xlen;
  const double ylen = f->param->ylen;
  const double zlen = f->param->zlen;
  ForceCell *cell = f->cell;
  const int32 ncells = f->ncells;
  const int32 nxcells = f->nxcells;
  const int32 nycells = f->nycells;
  const int32 nzcells = f->nzcells;
  const int32 flags = f->param->flags;
  int32 i, j, k, ii, jj, kk, in, jn, kn, n, nn;

  /* dimension of grid cell array and allocation should be done */
  ASSERT(nxcells > 0);
  ASSERT(nycells > 0);
  ASSERT(nzcells > 0);
  ASSERT(ncells == nxcells * nycells * nzcells);
  ASSERT(cell != NULL);

  /* clear cell memory */
  memset(cell, 0, ncells * sizeof(ForceCell));

  /* loop through all cells */
  for (k = 0;  k < nzcells;  k++) {
    for (j = 0;  j < nycells;  j++) {
      for (i = 0;  i < nxcells;  i++) {

        /* index of this cell */
        n = (k * nycells + j) * nxcells + i;
        ASSERT(n >= 0 && n < ncells);

        /* loop through neighbors of this cell */
        for (kn = 0;  kn <= 1;  kn++) {
          kk = k + kn;
          offset.z = 0.0;
          if (kk == nzcells) {
            if (flags & FORCE_Z_PERIODIC) {
              kk = 0;
              offset.z = zlen;
            }
            else continue;
          }

          for (jn = (kn == 0 ? 0 : -1);  jn <= 1;  jn++) {
            jj = j + jn;
            offset.y = 0.0;
            if (jj == nycells) {
              if (flags & FORCE_Y_PERIODIC) {
                jj = 0;
                offset.y = ylen;
              }
              else continue;
            }
            else if (jj == -1) {
              if (flags & FORCE_Y_PERIODIC) {
                jj = nycells - 1;
                offset.y = -ylen;
              }
              else continue;
            }

            for (in = (kn == 0 && jn == 0 ? 0 : -1);  in <= 1;  in++) {
              ii = i + in;
              offset.x = 0.0;
              if (ii == nxcells) {
                if (flags & FORCE_X_PERIODIC) {
                  ii = 0;
                  offset.x = xlen;
                }
                else continue;
              }
              else if (ii == -1) {
                if (flags & FORCE_X_PERIODIC) {
                  ii = nxcells - 1;
                  offset.x = -xlen;
                }
                else continue;
              }

              /* index of neighbor cell */
              nn = (kk * nycells + jj) * nxcells + ii;
              ASSERT(nn >= 0 && nn < ncells);

              /* store neighbor index and offset */
              cell[n].nbr[ cell[n].nnbrs ] = nn;
              cell[n].offset[ cell[n].nnbrs ] = offset;
              cell[n].nnbrs++;
            } /* end in-loop */
          } /* end jn-loop */
        } /* end kn-loop */

      } /* end loop over cells */
    }
  }
  return 0;
}


/*
 * build the van der Waals parameter table
 *
 * table is a "square" symmetric matrix, dimension (natomprms * natomprms)
 * each entry of matrix contains A, B, A_14, B_14 parameters
 * matrix is indexed by atom "types" (0..natomprms-1)
 *
 * vdwtable is stored as one-dimensional array
 * index for (i,j) atom pair interaction is:  4 * (i * natomprms + j)
 */
int build_vdwtable(Force *f)
{
  MD_AtomPrm *atomprm = f->param->atomprm;
  const int32 natomprms = f->param->atomprm_len;
  MD_NbfixPrm *nbfixprm = f->param->nbfixprm;
  const int32 nnbfixprms = f->param->nbfixprm_len;
  double *vdwtable, *ij_entry, *ji_entry;
  double neg_emin, rmin, neg_emin14, rmin14;
  int32 i, j, k;

  ASSERT(natomprms > 0);
  vdwtable = (double *) malloc(4 * natomprms * natomprms * sizeof(double));
  if (vdwtable == NULL) return FORCE_FAIL;

  /* compute each table entry given separate i and j atom params */
  for (i = 0;  i < natomprms;  i++) {
    for (j = i;  j < natomprms;  j++) {
      ij_entry = vdwtable + 4 * (i * natomprms + j);
      ji_entry = vdwtable + 4 * (j * natomprms + i);

      /* compute vdw A and B coefficients for atom type ij interaction */
      neg_emin = sqrt(atomprm[i].emin * atomprm[j].emin);
      rmin = 0.5 * (atomprm[i].rmin + atomprm[j].rmin);
      neg_emin14 = sqrt(atomprm[i].emin14 * atomprm[j].emin14);
      rmin14 = 0.5 * (atomprm[i].rmin14 + atomprm[j].rmin14);

      /* raise rmin and rmin14 to 6th power */
      rmin *= rmin * rmin;
      rmin *= rmin;
      rmin14 *= rmin14 * rmin14;
      rmin14 *= rmin14;

      /* set ij entry and its transpose */
      ij_entry[A]    = ji_entry[A]    = neg_emin * rmin * rmin;
      ij_entry[B]    = ji_entry[B]    = 2.0 * neg_emin * rmin;
      ij_entry[A_14] = ji_entry[A_14] = neg_emin14 * rmin14 * rmin14;
      ij_entry[B_14] = ji_entry[B_14] = 2.0 * neg_emin14 * rmin14;
    }
  }

  /* now go back and update entries for nbfix params */
  for (k = 0;  k < nnbfixprms;  k++) {
    i = nbfixprm[k].prm[0];
    j = nbfixprm[k].prm[1];

    ij_entry = vdwtable + 4 * (i * natomprms + j);
    ji_entry = vdwtable + 4 * (j * natomprms + i);

    /* compute vdw A and B coefficients for this fixed type interaction */
    neg_emin = -nbfixprm[k].emin;
    rmin = nbfixprm[k].rmin;
    neg_emin14 = -nbfixprm[k].emin14;
    rmin14 = nbfixprm[k].rmin14;

    /* raise rmin and rmin14 to 6th power */
    rmin *= rmin * rmin;
    rmin *= rmin;
    rmin14 *= rmin14 * rmin14;
    rmin14 *= rmin14;

    /* set ij entry and its transpose */
    ij_entry[A]    = ji_entry[A]    = neg_emin * rmin * rmin;
    ij_entry[B]    = ji_entry[B]    = 2.0 * neg_emin * rmin;
    ij_entry[A_14] = ji_entry[A_14] = neg_emin14 * rmin14 * rmin14;
    ij_entry[B_14] = ji_entry[B_14] = 2.0 * neg_emin14 * rmin14;
  }

  f->vdwtable = vdwtable;
  return 0;
}


/*
 * build the exclusion lists
 *
 * lists are built from MD_Excl and MD_Bond
 *
 * algorithm (using set notation):
 *   exclx[i] = { j : there is an explicit exclusion (i,j) }
 *   excl12[i] = { j : there is a bond (i,j) }
 *   excl13[i] = (excl12[i] U ( U_{ j \in excl12[i] } excl12[j] )) \ {i}
 *   excl14[i] = (excl13[i] U ( U_{ j \in excl13[i] } excl12[j] )) \ {i}
 *   scaled14[i] = excl14[i] \ excl13[i]
 *
 *   excl_list[i] = exclx[i],              if policy is FORCE_EXCL_NONE
 *                = exclx[i] U excl12[i],  if policy is FORCE_EXCL_12
 *                = exclx[i] U excl13[i],  if policy is FORCE_EXCL_13
 *                = exclx[i] U excl14[i],  if policy is FORCE_EXCL_14
 *
 *   excl_list[i] = exclx[i] U excl13[i]
 *     AND
 *   scaled14_list[i] = scaled14[i],       if policy is FORCE_EXCL_SCAL14
 *
 * allocate little extra memory
 * implement by merging sorted exclusion lists
 * each atom's exclusion array is terminated by MD_INT32_MAX sentinel
 */
int build_exclusions(Force *f)
{
  const MD_Excl *excl = f->param->excl;
  const MD_Bond *bond = f->param->bond;
  const int32 natoms = f->param->atom_len;
  const int32 nexcls = f->param->excl_len;
  const int32 nbonds = f->param->bond_len;
  const int32 excl_policy = (f->param->flags & FORCE_MASK_EXCL);
  int32 *list;
  int32 len, i, j, k, ii, jj, kk, atom1, atom2;
  int32 size;     /* allocated length of accum and dest arrays */
  int32 maxsize;  /* largest length needed to be held by accum or dest */
  int32 accumlen; /* used length of accum (accumlen <= maxsize <= size) */

  /* initialize */
  f->excl_list = NULL;
  f->scaled14_list = NULL;

  /* error if there are no atoms */
  if (natoms == 0)  return FORCE_FAIL;

  /* allocate memory for explicit exclusions list */
  f->exclx = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->exclx == NULL) return FORCE_FAIL;
  f->lenx = (int32 *) calloc(natoms, sizeof(int32));
  if (f->lenx == NULL) return FORCE_FAIL;

  /* count number of explicit exclusions for each atom */
  for (i = 0;  i < nexcls;  i++) {
    if (excl[i].atom[0] != excl[i].atom[1]) {
      f->lenx[ excl[i].atom[0] ]++;
      f->lenx[ excl[i].atom[1] ]++;
    }
  }

  /* allocate memory for each row of exclx, leave space for sentinel */
  for (i = 0;  i < natoms;  i++) {
    f->exclx[i] = (int32 *) malloc((f->lenx[i] + 1) * sizeof(int32));
    if (f->exclx[i] == NULL) return FORCE_FAIL;
    f->lenx[i] = 0;  /* zero this to be length counter */
  }

  /* loop over explicit exclusions to fill in the rows of exclx */
  for (i = 0;  i < nexcls;  i++) {
    atom1 = excl[i].atom[0];
    atom2 = excl[i].atom[1];
    if (atom1 != atom2) {
      f->exclx[atom1][ f->lenx[atom1]++ ] = atom2;
      f->exclx[atom2][ f->lenx[atom2]++ ] = atom1;
    }
  }

  /* place sentinel at end of each row */
  for (i = 0;  i < natoms;  i++) {
    f->exclx[i][ f->lenx[i] ] = MD_INT32_MAX;
  }

  /* sort each exclx row */
  for (i = 0;  i < natoms;  i++) {
    sort(f->exclx[i], f->lenx[i]);
  }

  /* if we're doing no bond exclusions, we're done */
  if (excl_policy == FORCE_EXCL_NONE) {
    f->excl_list = f->exclx;
    f->exclx = NULL;
    build_exclusions_cleanup(f);
    return 0;
  }

  /* allocate memory for 1-2 exclusions list */
  f->excl12 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl12 == NULL) return FORCE_FAIL;
  f->len12 = (int32 *) calloc(natoms, sizeof(int32));
  if (f->len12 == NULL) return FORCE_FAIL;

  /* find the length of each row of excl12 */
  for (i = 0;  i < nbonds;  i++) {
    f->len12[ bond[i].atom[0] ]++;
    f->len12[ bond[i].atom[1] ]++;
  }

  /* allocate memory for each row of excl12 */
  /* leave space for explicit exclusion list and sentinel */
  /* also determine maxsize */
  maxsize = 0;
  for (i = 0;  i < natoms;  i++) {
    len = f->len12[i] + f->lenx[i] + 1;
    if (maxsize < len) maxsize = len;
    f->excl12[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl12[i] == NULL) return FORCE_FAIL;
    f->len12[i] = 0;   /* zero this to be length counter */
  }

  /* loop over bonds to fill in the rows of excl12 */
  for (i = 0;  i < nbonds;  i++) {
    atom1 = bond[i].atom[0];
    atom2 = bond[i].atom[1];
    f->excl12[atom1][ f->len12[atom1]++ ] = atom2;
    f->excl12[atom2][ f->len12[atom2]++ ] = atom1;
  }

  /* place sentinel at end of each row */
  for (i = 0;  i < natoms;  i++) {
    f->excl12[i][ f->len12[i] ] = MD_INT32_MAX;
  }

  /* sort each excl12 row */
  for (i = 0;  i < natoms;  i++) {
    sort(f->excl12[i], f->len12[i]);
  }

  /* initialize accum and dest arrays for merge and swap */
  size = 10;
  while (size < maxsize)  size *= 2;
  f->accum = (int32 *) malloc(size * sizeof(int32));
  if (f->accum == NULL) return FORCE_FAIL;
  f->dest = (int32 *) malloc(size * sizeof(int32));
  if (f->dest == NULL) return FORCE_FAIL;

  /* if we're excluding only 1-2 interactions, we're done */
  if (excl_policy == FORCE_EXCL_12) {
    /* merge each excl12 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl12[i], i);
      memcpy(f->excl12[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl12;
    f->excl12 = NULL;
    build_exclusions_cleanup(f);
    return 0;
  }

  /* allocate memory for 1-3 exclusions list */
  f->excl13 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl13 == NULL) return FORCE_FAIL;
  f->len13 = (int32 *) malloc(natoms * sizeof(int32));
  if (f->len13 == NULL) return FORCE_FAIL;

  /* merge the excl12 lists into excl13 lists */
  for (i = 0;  i < natoms;  i++) {
    memcpy(f->accum, f->excl12[i], (f->len12[i] + 1) * sizeof(int32));
    accumlen = f->len12[i];
    for (j = 0;  f->excl12[i][j] < MD_INT32_MAX;  j++) {
      k = f->excl12[i][j];
      if (k == i) continue;
      maxsize = accumlen + f->len12[k];
      if (size <= maxsize + f->lenx[i]) {
        do { size *= 2; } while (size <= maxsize + f->lenx[i]);
        list = (int32 *) realloc(f->accum, size * sizeof(int32));
        if (list == NULL) return FORCE_FAIL;
        f->accum = list;
        list = (int32 *) realloc(f->dest, size * sizeof(int32));
        if (list == NULL) return FORCE_FAIL;
        f->dest = list;
      }
      accumlen = merge(f->dest, f->accum, f->excl12[k], i);
      list = f->accum;
      f->accum = f->dest;
      f->dest = list;
    }
    len = accumlen + f->lenx[i] + 1;
    f->excl13[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl13[i] == NULL) return FORCE_FAIL;
    memcpy(f->excl13[i], f->accum, (accumlen + 1) * sizeof(int32));
    f->len13[i] = accumlen;
  }

  /* if we're excluding 1-2 and 1-3 interactions, we're done */
  if (excl_policy == FORCE_EXCL_13) {
    /* merge each excl13 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl13[i], i);
      memcpy(f->excl13[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl13;
    f->excl13 = NULL;
    build_exclusions_cleanup(f);
    return 0;
  }

  /* allocate memory for 1-4 exclusions list */
  f->excl14 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->excl14 == NULL) return FORCE_FAIL;

  /* merge the excl13 lists into excl14 lists */
  for (i = 0;  i < natoms;  i++) {
    memcpy(f->accum, f->excl13[i], (f->len13[i] + 1) * sizeof(int32));
    accumlen = f->len13[i];
    for (j = 0;  f->excl13[i][j] < MD_INT32_MAX;  j++) {
      k = f->excl13[i][j];
      if (k == i) continue;
      maxsize = accumlen + f->len12[k];
      if (size <= maxsize + f->lenx[i]) {
        do { size *= 2; } while (size <= maxsize + f->lenx[i]);
        list = (int32 *) realloc(f->accum, size * sizeof(int32));
        if (list == NULL) return FORCE_FAIL;
        f->accum = list;
        list = (int32 *) realloc(f->dest, size * sizeof(int32));
        if (list == NULL) return FORCE_FAIL;
        f->dest = list;
      }
      accumlen = merge(f->dest, f->accum, f->excl12[k], i);
      list = f->accum;
      f->accum = f->dest;
      f->dest = list;
    }
    len = accumlen + f->lenx[i] + 1;
    f->excl14[i] = (int32 *) malloc(len * sizeof(int32));
    if (f->excl14[i] == NULL) return FORCE_FAIL;
    memcpy(f->excl14[i], f->accum, (accumlen + 1) * sizeof(int32));
  }

  /* if we're excluding 1-2, 1-3, and 1-4 interactions, we're done */
  if (excl_policy == FORCE_EXCL_14) {
    /* merge each excl14 row with exclx row */
    for (i = 0;  i < natoms;  i++) {
      len = merge(f->dest, f->exclx[i], f->excl14[i], i);
      memcpy(f->excl14[i], f->dest, (len + 1) * sizeof(int32));
    }
    f->excl_list = f->excl14;
    f->excl14 = NULL;
    build_exclusions_cleanup(f);
    return 0;
  }

  /* must have scaled1-4 exclusion policy */
  ASSERT(excl_policy == FORCE_EXCL_SCAL14);

  /* allocate memory for scaled 1-4 list */
  f->scaled14 = (int32 **) calloc(natoms, sizeof(int32 *));
  if (f->scaled14 == NULL) return FORCE_FAIL;

  /* scaled14_list includes everything in excl14 that is not in excl13 */
  for (i = 0;  i < natoms;  i++) {
    ii = jj = kk = 0;
    while (f->excl14[i][ii] < MD_INT32_MAX) {
      if (f->excl14[i][ii] != f->excl13[i][jj]) {
        f->dest[kk++] = f->excl14[i][ii++];
      }
      else {
        ii++;
        jj++;
      }
    }
    f->dest[kk] = MD_INT32_MAX;
    f->scaled14[i] = (int32 *) malloc((kk + 1) * sizeof(int32));
    if (f->scaled14[i] == NULL) return FORCE_FAIL;
    memcpy(f->scaled14[i], f->dest, (kk + 1) * sizeof(int32));
  }

  /* set pointers to the lists we need to keep */
  /* merge each excl13 row with exclx row */
  for (i = 0;  i < natoms;  i++) {
    len = merge(f->dest, f->exclx[i], f->excl13[i], i);
    memcpy(f->excl13[i], f->dest, (len + 1) * sizeof(int32));
  }
  f->excl_list = f->excl13;
  f->excl13 = NULL;
  f->scaled14_list = f->scaled14;
  f->scaled14 = NULL;
  build_exclusions_cleanup(f);
  return 0;
}



/*
 * reclaim extra memory allocated in build_exclusions
 */
void build_exclusions_cleanup(Force *f)
{
  const int32 natoms = f->param->atom_len;
  int32 k;

  if (f->exclx != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->exclx[k]);
    free(f->exclx);
    f->exclx = NULL;
  }
  if (f->excl12 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl12[k]);
    free(f->excl12);
    f->excl12 = NULL;
  }
  if (f->excl13 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl13[k]);
    free(f->excl13);
    f->excl13 = NULL;
  }
  if (f->excl14 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->excl14[k]);
    free(f->excl14);
    f->excl14 = NULL;
  }
  if (f->scaled14 != NULL) {
    for (k = 0;  k < natoms;  k++)  free(f->scaled14[k]);
    free(f->scaled14);
    f->scaled14 = NULL;
  }
  free(f->lenx);
  f->lenx = NULL;
  free(f->len12);
  f->len12 = NULL;
  free(f->len13);
  f->len13 = NULL;
  free(f->accum);
  f->accum = NULL;
  free(f->dest);
  f->dest = NULL;
}


/*
 * sort an array of integers
 * (use insertion sort because optimal for short arrays)
 *
 * assume MD_INT32_MAX sentinel is at end of array
 */
void sort(int32 *list, int32 len)
{
  int32 i, j, tmp;

  for (i = len - 2;  i >= 0;  i--) {
    tmp = list[i];
    j = i;
    while (tmp > list[j+1]) {
      list[j] = list[j+1];
      j++;
    }
    list[j] = tmp;
  }
}


/*
 * merge two sorted source arrays into a destination array,
 * keeping destination sorted and deleting duplicate entries
 * and excluding n from being merged (used for the self entry)
 *
 * assume destination array has enough space
 * assume each source array is terminated by sentinel MD_INT32_MAX
 * add terminating sentinel MD_INT32_MAX to destination array
 *
 * return length of destination (not including sentinel)
 */
int32 merge(int32 *dest, const int32 *src1, const int32 *src2, int32 n)
{
  int32 i = 0, j = 0, k = 0;

  while (src1[i] < MD_INT32_MAX || src2[j] < MD_INT32_MAX) {
    if      (src1[i] == n) i++;
    else if (src2[j] == n) j++;
    else if (src1[i] < src2[j]) dest[k++] = src1[i++];
    else if (src1[i] > src2[j]) dest[k++] = src2[j++];
    else    { dest[k++] = src1[i++];  j++; }
  }
  dest[k] = MD_INT32_MAX;
  return k;
}
