/***************************************************************************
 *cr                                                                       
 *cr            (C) Copyright 1995-2007 The Board of Trustees of the           
 *cr                        University of Illinois                       
 *cr                         All Rights Reserved                        
 *cr                                                                   
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *	$RCSfile: BaseMolecule.C,v $
 *	$Author: johns $	$Locker:  $		$State: Exp $
 *	$Revision: 1.190 $	$Date: 2007/02/25 20:49:33 $
 *
 ***************************************************************************
 * DESCRIPTION:
 *
 * Base class for all molecules, without display-specific information.  This
 * portion of a molecule contains the structural data, and all routines to
 * find the structure (backbone, residues, etc).  It does NOT contain the
 * animation list; that is maintained by Molecule (which is derived from
 * this class).
 *
 ***************************************************************************/

#include <ctype.h>
#include <stdlib.h>
#include "Inform.h"
#include "utilities.h"
#include "intstack.h"

#include "BaseMolecule.h"
#include "VolumetricData.h"

#define MAXBONDERRORS 25



////////////////////////////  constructor

BaseMolecule::BaseMolecule(int myID) : residueList(10) , fragList(1),
#if defined(VMDWITHCARBS)
     pfragList(1), nfragList(1), smallringList(1), ID(myID) {
#else
     pfragList(1), nfragList(1), ID(myID) {
#endif

  // initialize all variables
  nAtoms = 0;
  cur_atom = 0;
  atomList = NULL;
  moleculename = NULL;
  lastbonderratomid=-1;
  bonderrorcount=0;
}


////////////////////////////   destructor

BaseMolecule::~BaseMolecule(void) {
  int i;

  // delete structural data
  delete [] atomList;
  for (i=0; i<residueList.num(); i++) {
    delete residueList[i];
  }
  for (i=0; i<nfragList.num(); i++) {
    delete nfragList[i];
  }
  for (i=0; i<pfragList.num(); i++) {
    delete pfragList[i];
  }
  for (i=0; i<fragList.num(); i++) {
    delete fragList[i];
  }
  for (i=0; i<volumeList.num(); i++) {
    delete volumeList[i];
  }
  for (i=0; i<extra.num(); i++) {
    delete [] extra.data(i);
  }
}


///////////////////////  protected routines

// initialize the atom list ... should be called before adding any atoms
int BaseMolecule::init_atoms(int n) {

  if(n <= 0) {
    msgErr << "BaseMolecule: init_atoms called with invalid number of atoms: "
           << n << sendmsg;
    return FALSE;
  }
  if (cur_atom != 0 && nAtoms != n) {
    msgErr << "BaseMolecule: attempt to init atoms while structure building in progress!" << sendmsg;
    return FALSE;
  }

  if (!atomList) {
    // first call to init_atoms
    nAtoms = n; // only place where nAtoms is set!
    atomList = new MolAtom[nAtoms];
    memset(atomList, 0, nAtoms*sizeof(MolAtom));

    // initialize NULL extra data field, which is returned when
    // querying a non-existent field with extra.data("fielddoesntexist")
    extra.add_name("NULL", NULL);

    // initialize "default" extra data fields.
    extra.add_name("beta", new float[nAtoms]);
    extra.add_name("occupancy", new float[nAtoms]);
    extra.add_name("charge", new float[nAtoms]);
    extra.add_name("mass", new float[nAtoms]);
    extra.add_name("radius", new float[nAtoms]);
    for (int i=0; i<extra.num(); i++) {
      void *data = extra.data(i);
      if (data != NULL) 
        memset(data, 0, nAtoms*sizeof(float));
    }
    return TRUE;
  }
  if (n != nAtoms) {
    msgErr << "The number of atoms in this molecule has already been assigned."
           << sendmsg;
    return FALSE;
  }
  return TRUE;
}

// add a new atom; return it's index, or (-1) if error.
int BaseMolecule::add_atom(char *name, char *atomtype, int atomicnumber, 
    char *resname, int resid, const char *chain, const char *segname, 
    char *insertion, const char *altloc) {
  int nameindex, typeindex;
  int resnameindex, segnameindex, altlocindex, chainindex;

  if (!atomList || cur_atom >= nAtoms) {
    msgErr << "BaseMolecule: Cannot add new atom; currently " << nAtoms;
    msgErr << " atoms in structure." << sendmsg;
    return (-1);
  }

  // create atom  
  MolAtom *newatom = atom(cur_atom);
  newatom->init(cur_atom, resid, insertion);

  // add names to namelist, and put indices in MolAtom object
  nameindex = atomNames.add_name(name,atomNames.num());
  typeindex = atomTypes.add_name(atomtype, atomTypes.num());
  resnameindex = resNames.add_name(resname, resNames.num());
  segnameindex = segNames.add_name(segname, segNames.num());
  altlocindex = altlocNames.add_name(altloc, altlocNames.num());

  // use default of 'X' for chain if not given
  if(!chain || ! (*chain) || *chain == ' ')
    chainindex = chainNames.add_name("X", chainNames.num());
  else
    chainindex = chainNames.add_name(chain, chainNames.num());

  // set atom member variables
  newatom->nameindex = nameindex;
  newatom->typeindex = typeindex;
  newatom->atomicnumber = atomicnumber;
  newatom->resnameindex = resnameindex;
  newatom->segnameindex = segnameindex;
  newatom->altlocindex = altlocindex;
  newatom->chainindex = chainindex;

  // check for integer overflow/wraparound condition, which can occur
  // if an evil plugin defines 100,000 unique atom names, for example
  if (newatom->nameindex != nameindex ||
      newatom->typeindex != typeindex ||
      newatom->atomicnumber != atomicnumber ||
      newatom->resnameindex != resnameindex ||
      newatom->segnameindex != segnameindex ||
      newatom->altlocindex != altlocindex ||
      newatom->chainindex != chainindex) {
    msgErr << "BaseMolecule: Cannot add atom; namelist index value too large." << sendmsg;
    msgErr << "Recompile VMD with larger index types." << sendmsg;
    msgErr << "Atom namelist index values at time of overflow:" << sendmsg;
    msgErr << "  nameindex: " << nameindex << sendmsg;;
    msgErr << "  typeindex: " << typeindex << sendmsg;;
    msgErr << "  resnameindex: " << resnameindex << sendmsg;;
    msgErr << "  segnameindex: " << segnameindex << sendmsg;;
    msgErr << "  altlocindex: " << altlocindex << sendmsg;;
    msgErr << "  chainindex: " << chainindex << sendmsg;
    return -1;
  }

  return cur_atom++;
}


// add a new bond; return 0 on success, or (-1) if error.
int BaseMolecule::add_bond(int a, int b, float bondorder, int backbonetype) {
  if (!nAtoms || a >= nAtoms || b >= nAtoms) {
    msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
    return (-1);
  } 

  if (a == b) {
    msgErr << "BaseMolecule: Cannot bond atom " <<a<< " to itself." << sendmsg;
    return (-1);
  }

  // put the bond in the atom list
  if (atom(a)->add_bond(b, backbonetype)) {
    if (bonderrorcount < MAXBONDERRORS) {
      if (lastbonderratomid != a) {
        msgErr << "MolAtom " << a << ": Exceeded maximum number of bonds ("
               << atom(a)->bonds << ")." << sendmsg;
        lastbonderratomid=a;
        bonderrorcount++;
      }
    } else if (bonderrorcount == MAXBONDERRORS) {
      msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
      msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
      bonderrorcount++;
    }
    return (-1);
  }

  if (atom(b)->add_bond(a, backbonetype)) {
    if (bonderrorcount < MAXBONDERRORS) {
      if (lastbonderratomid != b) {
        msgErr << "MolAtom " << b << ": Exceeded maximum number of bonds ("
               << atom(b)->bonds << ")." << sendmsg;
        lastbonderratomid=b;
        bonderrorcount++;
      }
    } else if (bonderrorcount == MAXBONDERRORS) {
      msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
      msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
      bonderrorcount++;
    }
    return (-1);
  }

  // store bond orders
  setbondorder(a, atom(a)->bonds-1, bondorder);
  setbondorder(b, atom(b)->bonds-1, bondorder);

  return 0;
}

// Add a bond to a structure but check to make sure there isn't a 
// duplicate, as may be the case when merging bondlists from a file 
// and from a distance-based bond search
int BaseMolecule::add_bond_dupcheck(int a, int b, float bondorder) {
  int i;

  if (!nAtoms || a >= nAtoms || b >= nAtoms) {
    msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
    return (-1);
  }

  MolAtom *atm = atom(a);
  int nbonds = atm->bonds;
  const int *bonds = &atm->bondTo[0];
  for (i=0; i<nbonds; i++) {
    if (bonds[i] == b) {
      return 0; // skip bond that already exists
    }
  }
  add_bond(a, b, bondorder); // add it if it doesn't already exist

  return 0;
}

///////////////////////////  public routines

void BaseMolecule::setbondorder(int atom, int bond, float order) {
  float *bondOrders = extra.data("bondorders");

  // if not already there, add it
  if (bondOrders == NULL) {
    if (order != 1) {
      int i;
      extra.add_name("bondorders", new float[nAtoms*MAXATOMBONDS]);
      bondOrders = extra.data("bondorders");
      for (i=0; i<nAtoms*MAXATOMBONDS; i++)
        bondOrders[i] = 1.0f;    

      bondOrders[atom * MAXATOMBONDS + bond] = order;
    } 
    return;
  }

  bondOrders[atom * MAXATOMBONDS + bond] = order;
}

float BaseMolecule::getbondorder(int atom, int bond) {
  float *bondOrders = extra.data("bondorders");

  // if not already there, add it
  if (bondOrders == NULL) { 
    return 1;
  }
   
  return bondOrders[atom * MAXATOMBONDS + bond];
}


// return the Nth residue
Residue *BaseMolecule::residue(int n) {
  return residueList[n];
}


// return the Nth fragment
Fragment *BaseMolecule::fragment(int n) {
  return fragList[n];
}


// given an atom index, return the residue object for the residue it
// is in.  If it is not in a residue, return NULL.
Residue *BaseMolecule::atom_residue(int n) {
  MolAtom *atm = atom(n);
  if(atm->uniq_resid < 0)
    return NULL;
  else
    return residue(atm->uniq_resid);
}


// given an atom index, return the fragment object for the fragment it
// is in.  If it is not in a fragment, return NULL.
Fragment *BaseMolecule::atom_fragment(int n) {
  MolAtom *atm = atom(n);
  int frag = residue(atm->uniq_resid)->fragment;
  if(frag < 0)
    return NULL;
  else
    return fragment(frag);
}

// return a 'default' value for a given atom name
float BaseMolecule::default_radius(char *nm) {
  float val = 1.5;
  // some names start with a number
  while (*nm && isdigit(*nm))
    nm++;
  if(nm) {
    switch(toupper(nm[0])) {
      // These are similar to the values used by X-PLOR with sigma=0.8
      // see page 50 of the X-PLOR 3.1 manual
      case 'H' : val = 1.00f; break;
      case 'C' : val = 1.50f; break;
      case 'N' : val = 1.40f; break;
      case 'O' : val = 1.30f; break;
      case 'F' : val = 1.20f; break;
      case 'S' : val = 1.90f; break;
    }
  }

  return val;
}


// return a 'default' value for a given atom name
float BaseMolecule::default_mass(char *nm) {
  float val = 12.0;

  // some names start with a number
  while (*nm && isdigit(*nm))
    nm++;
  if(nm) {
    switch(toupper(nm[0])) {
      case 'H' : val = 1.00800f; break;
      case 'C' : val = 12.01100f; break;
      case 'N' : val = 14.00700f; break;
      case 'O' : val = 15.99900f; break;
      case 'F' : val = 55.84700f; break;
      case 'S' : val = 32.06000f; break;
    }
  }

  return val;
}


float BaseMolecule::default_charge(char *) {
  // return 0 for everything; later, when we put in a more reliable
  // system for determining the charge that's user-configurable,
  // we can start assigning more realistic charges.
  return 0.0f;
}


// count the number of unique bonds in the structure
int BaseMolecule::count_bonds(void) {
  int i, j;
  int count=0;

  for (i=0; i<nAtoms; i++) {
    int nbonds = atomList[i].bonds;
    const int *bonds = &atomList[i].bondTo[0];

    for (j=0; j<nbonds; j++) {
      if (bonds[j] > i)
        count++;
    }
  }

  return count;
}

void BaseMolecule::clear_bonds(void) {
  int i;
  for (i=0; i<nAtoms; i++)
    atomList[i].bonds = 0;
}


// analyze the molecule for more than just the atom/bond information
// This is here since it is called _after_ the molecule is added to
// the MoleculeList.  Thus, there is a Tcl callback to allow the
// user to update the bond information (or other fields?) before
// the actual search.
void BaseMolecule::analyze(void) {
  need_find_bonds = 0; // at this point it's too late

  // I have to let 0 atoms in because I want to be able to read things
  // like electron density maps, which have no atoms.
  // It is kinda wierd, then to make BaseMolecule be at the top of the
  // heirarchy.  Oh well.
  if(nAtoms < 1)
    return;

  // call routines to find different characteristics of the molecule
  msgInfo << "Analyzing structure ..." << sendmsg;
  msgInfo << "   Atoms: " << nAtoms << sendmsg;

  // count unique bonds
  msgInfo << "   Bonds: " << count_bonds() << sendmsg;

  // restore residue and fragment lists to pristine state
  residueList.clear();
  fragList.clear();
  pfragList.clear();   ///< clear list of protein fragments
  pfragCyclic.clear(); ///< clear cyclic fragment flags
  nfragList.clear();   ///< clear list of nucleic fragments
  nfragCyclic.clear(); ///< clear cyclic fragment flags

  // assign per-atom backbone types
  find_backbone();

  // find all the atoms in a resid connected to DNA/RNA/PROTEIN/WATER
  // also, assign a unique resid (uniq_resid) to each atom
  nResidues = find_residues();
  msgInfo << "   Residues: " << nResidues << sendmsg;

  nWaters = find_waters();
  msgInfo << "   Waters: " << nWaters << sendmsg;
  
  // determine which residues are connected to each other
  bonderrorcount=0; // reset error count before residue connectivity search
  find_connected_residues(nResidues); 
 
  nSegments = find_segments(); 
  msgInfo << "   Segments: " << nSegments << sendmsg;

  nFragments = find_fragments();
  msgInfo << "   Fragments: " << nFragments;

  nProteinFragments = pfragList.num();
  msgInfo << "   Protein: " << nProteinFragments;

  nNucleicFragments = nfragList.num();
  msgInfo << "   Nucleic: " << nNucleicFragments << sendmsg;
  
  // NOTE: The current procedure incorrectly identifies some lipid 
  // atoms as "ATOMNUCLEICBACK" (but not as "nucleic") as well
  // as some single water oxygens as "backbone". Here, we 
  // correct this by setting all atoms of non-polymeric residue types
  // to be "ATOMNORMAL" (i.e.: not backbone).
  MolAtom *a;
  int i;
  for (i=0; i<nAtoms; i++) {
    a = atom(i); 
    if ((a->residueType != RESNUCLEIC) && (a->residueType != RESPROTEIN)) 
      a->atomType = ATOMNORMAL;
  }

#if defined(VMDWITHCARBS)
  // find groups of atoms bonded into small rings
  // XXX 20 is the max ring size, should make this an option for the end user
  find_small_rings(20);
  int nCarbRings = smallringList.num();
  msgInfo << "   CarbRings: " << nCarbRings << sendmsg;
#endif

  // Search for hydrogens
  // XXX Must be done after the rest of the structure finding routines,
  // because those routines assume that anything that isn't NORMAL is
  // a backbone atom.
  // We use the name-based definition used in the IS_HYDROGEN macro
  for (i=0; i<nAtoms; i++) {
    MolAtom *a = atom(i);
    const char *aname = atomNames.name(a->nameindex);
    if (IS_HYDROGEN(aname))
      a->atomType = ATOMHYDROGEN;
  }
}


/// functions to find the backbone by matching atom names
int BaseMolecule::find_backbone(void) {
  int i, j, k;

  // Search for the protein backbone
  int protypes[4];
  protypes[0] = atomNames.typecode((char *) "CA");
  protypes[1] = atomNames.typecode((char *) "C");
  protypes[2] = atomNames.typecode((char *) "O");
  protypes[3] = atomNames.typecode((char *) "N");

  // special case for terminal oxygens that miss the search for O
  // by looking for ones connected to a C
  int termtypes[4];
  termtypes[0] = atomNames.typecode((char *) "OT1"); // standard names
  termtypes[1] = atomNames.typecode((char *) "OT2");
  termtypes[2] = atomNames.typecode((char *) "O1");  // gromacs force field 
  termtypes[3] = atomNames.typecode((char *) "O2");  // atom names

  // search for the DNA/RNA backbone;  the atom names are:
  // for the phosphate:  P, O1P, O2P
  // for the rest: O3', C3', C4', C5', O5'
  // (or O3*, C3*, C4*, C5*, O5*)
  int nuctypes[13];
  nuctypes[ 0] = atomNames.typecode((char *) "P");
  nuctypes[ 1] = atomNames.typecode((char *) "O1P");
  nuctypes[ 2] = atomNames.typecode((char *) "O2P");
  nuctypes[ 3] = atomNames.typecode((char *) "C3*");
  nuctypes[ 4] = atomNames.typecode((char *) "C3'");
  nuctypes[ 5] = atomNames.typecode((char *) "O3*");
  nuctypes[ 6] = atomNames.typecode((char *) "O3'");
  nuctypes[ 7] = atomNames.typecode((char *) "C4*");
  nuctypes[ 8] = atomNames.typecode((char *) "C4'");
  nuctypes[ 9] = atomNames.typecode((char *) "C5*");
  nuctypes[10] = atomNames.typecode((char *) "C5'");
  nuctypes[11] = atomNames.typecode((char *) "O5*");
  nuctypes[12] = atomNames.typecode((char *) "O5'");

#if 0
  // non-backbone nucleic acid atom names
  nuctypes[ 3] = atomNames.typecode((char *) "C1*");
  nuctypes[ 4] = atomNames.typecode((char *) "C1'");
  nuctypes[ 5] = atomNames.typecode((char *) "C2*");
  nuctypes[ 6] = atomNames.typecode((char *) "C2'");
  nuctypes[ 7] = atomNames.typecode((char *) "O2*");
  nuctypes[ 8] = atomNames.typecode((char *) "O2'");
  nuctypes[15] = atomNames.typecode((char *) "O4*");
  nuctypes[16] = atomNames.typecode((char *) "O4'");
#endif

  // special case for terminal nucleic residues
  int nuctermtypes[2];
  nuctermtypes[0] = atomNames.typecode((char *) "H5T"); // standard names
  nuctermtypes[1] = atomNames.typecode((char *) "H3T");


  // loop over all atoms assigning atom backbone type flags
  for (i=0; i<nAtoms; i++) {
    MolAtom *a = atom(i);
 
    // initialize atom type to non-backbone
    a->atomType = ATOMNORMAL;

    // check for protein backbone atom names
    for (j=0; j < 4; j++) {
      if (a->nameindex == protypes[j]) {
        a->atomType = ATOMPROTEINBACK;
        break;
      }
    }

    // check terminal residue names as well
    for (j=0; j < 4; j++) {
      if (a->nameindex == termtypes[j]) { // check if OT1, OT2
        for (k=0; k < a->bonds; k++) {
          if (atom(a->bondTo[k])->atomType == ATOMPROTEINBACK) {
            a->atomType = ATOMPROTEINBACK;
            break;
          }
        }
      }
    }
  
    // check if in nucleic backbone, if not already set
    if(!(a->atomType)) {
      for (j=0; j < 13; j++) {
        if (a->nameindex == nuctypes[j]) {
          a->atomType = ATOMNUCLEICBACK;
          break;
        }
      }
    }

    // check if nucleic terminal atom names
    for (j=0; j < 2; j++) {
      if (a->nameindex == nuctermtypes[j]) {
        for (k=0; k < a->bonds; k++) {
          if (atom(a->bondTo[k])->atomType == ATOMNUCLEICBACK) {
            a->atomType = ATOMNUCLEICBACK;
            break;
          }
        }
      }
    }
  }

  return 0; 
}



// find water molecules based on the residue name
// from the documentation for molscript, these are possible
// waters:
// type H2O HH0 OHH HOH OH2 SOL WAT
// as well, I add TIP, TIP2, TIP3, and TIP4
// The count is the number of sets of connected RESWATERS
int BaseMolecule::find_waters(void) {
  int i, j;
  MolAtom *a;

  int watertypes[11];
  watertypes[0] = resNames.typecode((char *) "H2O");
  watertypes[1] = resNames.typecode((char *) "HH0");
  watertypes[2] = resNames.typecode((char *) "OHH");
  watertypes[3] = resNames.typecode((char *) "HOH");
  watertypes[4] = resNames.typecode((char *) "OH2");
  watertypes[5] = resNames.typecode((char *) "SOL");
  watertypes[6] = resNames.typecode((char *) "WAT");
  watertypes[7] = resNames.typecode((char *) "TIP");
  watertypes[8] = resNames.typecode((char *) "TIP2");
  watertypes[9] = resNames.typecode((char *) "TIP3");
  watertypes[10] = resNames.typecode((char *) "TIP4");

  for (i=0; i<nAtoms; i++) {
    a = atom(i);
    if (a->residueType == RESNOTHING) {  // make sure it isn't named yet
      for (j=0; j<11; j++) {
        if (watertypes[j] == a->resnameindex) {
          a->residueType = RESWATERS;
          break;
        }
      }
    }
  }
 
  int count = find_connected_waters2();

  return count;   
}


// if this is a RESWATERS with index idx, mark it and find if
// any of its neighbors are RESWATERS
// this does a depth-first search with RECURSION.
void BaseMolecule::find_connected_waters(int i, char *tmp) {
  MolAtom *a = atom(i);
  int j;
  if (a->residueType == RESWATERS && !tmp[i]) {
    tmp[i] = TRUE;
    for (j=0; j<a->bonds; j++) {
      find_connected_waters(a->bondTo[j], tmp);
    }
  }
}


// if this is a RESWATERS with index idx, mark it and find if
// any of its neighbors are RESWATERS
int BaseMolecule::find_connected_waters2(void) {
  MolAtom *a;
  int count, i;
  IntStackHandle s;

  char *tmp = new char[nAtoms];
  memset(tmp, 0, nAtoms * sizeof(char));

  s = intstack_create(nAtoms);

  for (count=0, i=0; i<nAtoms; i++) {
    if (atom(i)->residueType == RESWATERS && !tmp[i]) {
      int nextatom;

      count++;
      intstack_push(s, i);
    
      // find and mark all connected waters 
      while (!intstack_pop(s, &nextatom)) { 
        int j;

        a = atom(nextatom);
        tmp[nextatom] = TRUE;

        for (j=a->bonds - 1; j>=0; j--) {
          int bi = a->bondTo[j];
          MolAtom *b = atom(bi);
          if (b->residueType == RESWATERS && !tmp[bi])
            intstack_push(s, bi);
        }
      }
    }
  }

  intstack_destroy(s);
  delete [] tmp;

  return count;
}


// find n backbone atoms connected together with the given residueid
// return the total count
// this assumes that the given atom (atomidx) is correct
int BaseMolecule::find_connected_backbone(IntStackHandle s, int backbone,
                         int atomidx, int residueid, int tmpid, int *flgs) {
  if (flgs[atomidx] != 0)
    return 0; // already done

  MolAtom *x = atom(atomidx);
  if (x->atomType != backbone || x->resid != residueid)
    return 0; // not a backbone atom, or resid doesn't match

  intstack_popall(s); // just in case
  intstack_push(s, atomidx);
  int nextatom;
  int count = 0;
   
  // find and mark connected backbone atoms
  while (!intstack_pop(s, &nextatom)) {
    MolAtom *a = atom(nextatom);
    flgs[nextatom] = tmpid;
    count++;

    int j;
    for (j=a->bonds - 1; j>=0; j--) {
      int bi = a->bondTo[j];
      if (flgs[bi] == 0) {
        MolAtom *b = atom(bi);

        // skip connections to atoms on different chains/segnames
        if (a->chainindex != b->chainindex || 
            a->segnameindex != b->segnameindex)
          continue;

        if (b->atomType == backbone && b->resid == residueid)
          intstack_push(s, bi);
      }
    }
  }

  return count;
}


// the find_connected_backbone left terms of flgs which need to be cleaned up
void BaseMolecule::clean_up_connection(IntStackHandle s, int i, int tmpid, int *flgs) {
  if (flgs[i] != tmpid)  // been here before
    return;

  intstack_popall(s); // just in case
  intstack_push(s, i);
  int nextatom;
 
  // find and null out non-matching atom flags
  while (!intstack_pop(s, &nextatom)) {
    flgs[nextatom] = 0;
    MolAtom *a = atom(nextatom);
    int j;
    for (j=a->bonds - 1; j>=0; j--) {
      int bi = a->bondTo[j];
      if (flgs[bi] == tmpid) {
        intstack_push(s, bi);
      }
    }
  }
}



// now that I know this resid is okay, mark it so
void BaseMolecule::find_connected_atoms_in_resid(IntStackHandle s,
    int restype, int i, int residueid, int tmpid, int *flgs)
{
  if (flgs[i] != 0 || atom(i)->resid != residueid)
    return;

  intstack_popall(s); // just in case
  intstack_push(s, i);
  int nextatom;

  // find and mark all connected residues in the same chain/segname
  while (!intstack_pop(s, &nextatom)) {
    flgs[nextatom] = tmpid;
    MolAtom *a = atom(nextatom);
    a->residueType = restype;

    int j;
    for (j=a->bonds - 1; j>=0; j--) {
      int bi = a->bondTo[j];
      MolAtom *b = atom(bi);
      if (flgs[bi] == 0 &&
          a->chainindex == b->chainindex &&
          a->segnameindex == b->segnameindex &&
          b->resid == residueid) {
        intstack_push(s, bi);
      }
    }
  }
}



// Find connected backbone atoms with the same resid
// if found, find all the atoms with the same resid
// which are connected to those backbone atoms only through
// atoms of the same resid
void BaseMolecule::find_and_mark(int n, int backbone,
  int restype, int *tmpid, int *flgs)
{
  int i;
  MolAtom *a;
  int residueid; // the real resid
  IntStackHandle s = intstack_create(nAtoms);

  for (i=0; i<nAtoms; i++) {
    a = atom(i);   // look for a new backbone atom
    if (a->atomType == backbone && flgs[i] == 0) {
      residueid = a->resid;
      if (find_connected_backbone(s, backbone, i, residueid, *tmpid, flgs) >= n) {
        // if find was successful, start all over again
        clean_up_connection(s, i, *tmpid, flgs);
        // but mark all the Atoms connected to here
        find_connected_atoms_in_resid(s, restype, i, residueid, *tmpid, flgs);
        // and one more was made
        (*tmpid)++;
      } else {
        // clean things up so I won't have problems later
        clean_up_connection(s, i, *tmpid, flgs);
      }
    }
  }

  intstack_destroy(s);
}



// assign a uniq resid (uniq_resid) to each set of connected atoms
// with the same residue id.  There could be many residues with the
// same id, but not connected (the SSN problem - SSNs are not unique
// so don't use them as the primary key)
int BaseMolecule::make_uniq_resids(int *flgs) {
  int i;
  int num_residues = 0;
  IntStackHandle s = intstack_create(nAtoms);

  for (i=0; i<nAtoms; i++) {
    if (!flgs[i]) {  // not been numbered
      // find connected atoms to i with the same resid and label
      // it with the uniq_resid
      MolAtom *a = atom(i);
      int resid = a->resid;
      char *insertion = a->insertionstr;

      intstack_push(s, i);
      int nextatom;

      // Loop over all atoms we're bonded to in the same chain/segname
      while (!intstack_pop(s, &nextatom)) {
        MolAtom *a = atom(nextatom);
        a->uniq_resid = num_residues;  // give it the new resid number
        flgs[nextatom] = TRUE;         // mark this atom done
  
        int j;
        for (j=a->bonds - 1; j>=0; j--) {
          int bi = a->bondTo[j];
          if (flgs[bi] == 0) {
            MolAtom *b = atom(bi);
            if (a->chainindex == b->chainindex && 
                a->segnameindex == b->segnameindex &&
                b->resid == resid && !strcmp(b->insertionstr, insertion))
              intstack_push(s, bi);
          }
        }
      }

      num_residues++;
    }
  }

  intstack_destroy(s);

  return num_residues;
}



int BaseMolecule::find_residues(void) {
  int *flgs = new int[nAtoms]; // flags used for connected atom searches
  memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
  
  // assign a uniq resid (uniq_resid) to each set of connected atoms
  // with the same residue id.  There could be many residues with the
  // same id, but not connected (the SSN problem - SSNs are not unique
  // so don't use them as the primary key)
  int num_residues = make_uniq_resids(flgs);
   
  int back_res_count = 1; // tmp count of number of residues on some backbone
  memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
  
  //  hunt for the proteins
  // there must be 4 PROTEINBACK atoms connected and with the same resid
  // then all connected atoms will be marked as PROTEIN atoms
  // this gets everything except the terminals
  find_and_mark(4, ATOMPROTEINBACK, RESPROTEIN, &back_res_count, flgs);
  
  // do the same for nucleic acids
  // XXX we might not want to check for the phosphate (P and 2 O's).  Here's
  // the quick way I can almost do that.  Unfortionately, that
  // messes up nfragList, since it needs a P to detect an end
  find_and_mark(4, ATOMNUCLEICBACK, RESNUCLEIC, &back_res_count, flgs);
  
  delete [] flgs;
  return num_residues;
}

int BaseMolecule::find_atom_in_residue(const char *name, int residue) {
  int nametype = atomNames.typecode(name);
  if (nametype < 0)
    return -2;

  return find_atom_in_residue(nametype, residue);
}


// find which residues are connected to which
// remember, I already have the uniq_id for each atom
void BaseMolecule::find_connected_residues(int num_residues) {
  int i, j;
  for (i=0; i<num_residues; i++) {   // init the list to NULLs
    residueList.append(NULL);
  }
  
  for (i=nAtoms-1; i>=0; i--) {      // go through all the atoms
    j = atom(i)->uniq_resid;
    if (residueList[j] == NULL) {    // then init the residue
      residueList[j] = new Residue(atom(i)->resid, atom(i)->residueType);
    }
    // Tell the residue that this atom is in it
    residueList[j]->add_atom(i);
  }

  // double check that everything was created
  for (i=0; i<num_residues; i++) {
    if (residueList[i] == NULL) { // no atom was found for this residue
      msgErr << "Mysterious residue creation in ";
      msgErr << "BaseMolecule::find_connected_residues." << sendmsg;
      residueList[i] = new Residue((int) -1, RESNOTHING);
    }
  }

#if defined(MAXRESIDUEBONDS)
  // now go through the atoms and find out which ones are connected
  // to which; with that info, tell the corresponding residues which
  // are connected to which
  MolAtom *a;
  for (i=0; i<nAtoms; i++) {
    a = atom(i);
    for (j=0; j<a->bonds; j++) {
      if (a->bondTo[j] > i){   // faster by only consid. larger atomnums
        MolAtom *b = atom(a->bondTo[j]);

        // skip connections to atoms on different chains/segnames
        if (a->chainindex != b->chainindex || 
            a->segnameindex != b->segnameindex)
          continue;
         
        if (residueList[a->uniq_resid]->add_bond(a->uniq_resid, b->uniq_resid)) {
          if (bonderrorcount < MAXBONDERRORS) {
            msgErr << "Residue " << a->uniq_resid
                   << " exceeded maximum number of bonds ("
                   << MAXRESIDUEBONDS << ")." << sendmsg;
            bonderrorcount++;
          } else if (bonderrorcount == MAXBONDERRORS) {
            msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
            msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
            bonderrorcount++;
          }
        }


        if (residueList[b->uniq_resid]->add_bond(b->uniq_resid, a->uniq_resid)) {
          if (bonderrorcount < MAXBONDERRORS) {
            msgErr << "Residue " << atom(a->bondTo[j])->uniq_resid
                   << " exceeded maximum number of bonds ("
                   << MAXRESIDUEBONDS << ")." << sendmsg;
            bonderrorcount++;
          } else if (bonderrorcount == MAXBONDERRORS) {
            msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
            msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
            bonderrorcount++;
          }
        }
      }
    }
  }
  
  // finally, check for unusual connections between residues, e.g. between
  // protein and water.
  int bondfrom, bondto;
  for (i=0; i<num_residues; i++) {
    bondfrom = residueList[i]->residueType;
    for (j=0; j<residueList[i]->bonds; j++) {
      bondto = residueList[residueList[i]->bondTo[j]]->residueType;
      if (bondfrom != bondto) {
        if (i < residueList[i]->bondTo[j] ) { // so that we only warn once
          msgWarn << "Unusual bond between residues:  ";
          msgWarn << residueList[i]->resid;
          switch (bondfrom) {
            case RESPROTEIN: msgWarn << " (protein)"; break;
            case RESNUCLEIC: msgWarn << " (nucleic)"; break;
            case RESWATERS:  msgWarn << " (waters)"; break;
            default:
            case RESNOTHING: msgWarn << " (none)"; break;
          }
          msgWarn << " and ";
          msgWarn << residueList[residueList[i]->bondTo[j]]->resid;
          switch (bondto) {
            case RESPROTEIN: msgWarn << " (protein)"; break;
            case RESNUCLEIC: msgWarn << " (nucleic)"; break;
            case RESWATERS:  msgWarn << " (waters)"; break;
            default:
            case RESNOTHING: msgWarn << " (none)"; break;
          }
          msgWarn << sendmsg;
        }
      }
    }
  }
#else
  // finally, check for unusual connections between residues, e.g. between
  // protein and water.
  for (i=0; i<num_residues; i++) {
    Residue *res = residueList[i];
    int bondfromtype = res->residueType;
    int numatoms = res->atoms.num();
    for (j=0; j<numatoms; j++) {
      MolAtom *a = atom(res->atoms[j]);

      // find off-residue bonds to residues of the same chain/segname
      int k;
      for (k=0; k<a->bonds; k++) {
        MolAtom *b = atom(a->bondTo[k]);

        // skip connections to atoms on different chains/segnames
        if (a->chainindex != b->chainindex || 
            a->segnameindex != b->segnameindex)
          continue;
         
        if (b->uniq_resid != i) {
          int bondtotype = residueList[b->uniq_resid]->residueType;

          if (bondfromtype != bondtotype) {
            if (i < b->uniq_resid) { // so that we only warn once
              msgWarn << "Unusual bond between residues:  ";
              msgWarn << residueList[i]->resid;
              switch (bondfromtype) {
                case RESPROTEIN: msgWarn << " (protein)"; break;
                case RESNUCLEIC: msgWarn << " (nucleic)"; break;
                case RESWATERS:  msgWarn << " (waters)"; break;
                default:
                case RESNOTHING: msgWarn << " (none)"; break;
              }
              msgWarn << " and ";
              msgWarn << residueList[b->uniq_resid]->resid;
              switch (bondtotype) {
                case RESPROTEIN: msgWarn << " (protein)"; break;
                case RESNUCLEIC: msgWarn << " (nucleic)"; break;
                case RESWATERS:  msgWarn << " (waters)"; break;
                default:
                case RESNOTHING: msgWarn << " (none)"; break;
              }
              msgWarn << sendmsg;
            }
          }
        }
      }
    }
  }
#endif
}


// find all the residues connected to a specific residue
int BaseMolecule::find_connected_fragments(void) {
  int i;
  int count = 0;
  char *flgs = new char[residueList.num()]; // set up temp space
  memset(flgs, 0, residueList.num() * sizeof(char)); // clear flags
  IntStackHandle s = intstack_create(residueList.num());

  int atomsg = atomNames.typecode((char *) "SG"); // to find disulfide bonds

  int nextres;
  for (i=0; i<residueList.num(); i++) { // find unmarked fragment
    if (!flgs[i]) {
      fragList.append(new Fragment);
      intstack_push(s, i);

      // find and mark all connected residues with the same chain/segname
      while (!intstack_pop(s, &nextres)) {
        fragList[count]->append(nextres);
        Residue *res = residueList[nextres];
        res->fragment = count; // store residue's fragment

#if defined(MAXRESIDUEBONDS)
        int j;
        for (j=res->bonds - 1; j>=0; j--) {
          int ri = res->bondTo[j];
          if (flgs[ri] == 0) {
            flgs[ri] = TRUE;
            intstack_push(s, ri);
          }
        }
#else
        int numatoms = res->atoms.num();
        int j;
        for (j=0; j<numatoms; j++) {
          MolAtom *a = atom(res->atoms[j]);

          // find all bonds to residues of the same chain/segname 
          int k;
          for (k=0; k<a->bonds; k++) {
            MolAtom *b = atom(a->bondTo[k]);
            int ri = b->uniq_resid;

            // skip connections to residues with different chains/segnames,
            // and don't follow disulfide bonds, as we want the order of
            // residue traversal to be correct so we can use it to build
            // subfragment lists later on
            if ((ri != i) &&
                (flgs[ri] == 0) &&
                (a->chainindex == b->chainindex) &&
                (a->segnameindex == b->segnameindex) &&
                ((a->nameindex != atomsg) || (b->nameindex != atomsg))) {
              flgs[ri] = TRUE;
              intstack_push(s, ri);
            }
          }
        }
#endif
      }

      count++;
    }
  }

  intstack_destroy(s);
  delete [] flgs;

  return count;
}


// find each collection of connected fragments
int BaseMolecule::find_fragments(void) {
  int count = find_connected_fragments();  // find and mark its neighbors

#if 1
  // find the protein subfragments
  find_subfragments(atomNames.typecode((char *) "N"), 
     -1,
     -1,
     atomNames.typecode((char *) "C"), 
     -1,
     -1,
     -1,
     RESPROTEIN, &pfragList);

#if 0
  // find the nucleic acid subfragments
  find_subfragments(atomNames.typecode((char *) "P"), 
     atomNames.typecode((char *) "H5T"),
     -1,
     atomNames.typecode((char *) "O3'"),
     atomNames.typecode((char *) "O3*"),
     atomNames.typecode((char *) "H3T"),
     -1,
     RESNUCLEIC, &nfragList);
#else
  // find the nucleic acid subfragments
  find_subfragments_topologically(
     RESNUCLEIC, &nfragList,
     atomNames.typecode((char *) "O3'"),
     atomNames.typecode((char *) "O3*"),
     atomNames.typecode((char *) "H3T"),
     -1);
#endif
#else
  find_subfragments_cyclic(&pfragList, RESPROTEIN);
  find_subfragments_cyclic(&nfragList, RESNUCLEIC);
#endif

  // determine whether fragments are cyclic or not
  find_cyclic_subfragments(&pfragList, &pfragCyclic);
  find_cyclic_subfragments(&nfragList, &nfragCyclic);

  return count;
}


void BaseMolecule::find_subfragments_cyclic(ResizeArray<Fragment *> *subfragList, int restype) {
  int numfrags = fragList.num();
  int i, frag;

  // test each fragment to see if it's a candidate for the subfraglist
  for (frag=0; frag<numfrags; frag++) {
    int numres = fragList[frag]->num();       // residues in this frag
    int match=1; // start true, and falsify

    // check each residue to see they are all the right restype
    for (i=0; i<numres; i++) {
      int resid = (*fragList[frag])[i];
      if (residueList[resid]->residueType != restype) {
        match=0;
        break;
      }
    }

    // if we found a matching fragment, add it to the subfraglist
    if (match) {
      Fragment *frg = new Fragment;

      // add all of the residues for this fragment to the subfraglist
      for (i=0; i<numres; i++) {
        int resid = (*fragList[frag])[i];
        frg->append(resid);
      } 

      subfragList->append(frg);
    }    
  }
}



void BaseMolecule::find_cyclic_subfragments(ResizeArray<Fragment *> *subfragList, ResizeArray<int> *subfragCyclic) {
  int i, j, frag;
  int numfrags = subfragList->num();

  // check each fragment for cycles
  for (frag=0; frag<numfrags; frag++) {
    int numres   = (*subfragList)[frag]->num();       // residues in this frag

    // skip testing fragments containing zero residues
    if (numres < 1)
      continue;

    int startres = (*(*subfragList)[frag])[0];        // first residue
    int endres   = (*(*subfragList)[frag])[numres-1]; // last residue
    int cyclic   = 0;

    // check for bonds between startres and endres
    int numatoms = residueList[endres]->atoms.num();
    int done = 0;
    for (i=0; (i < numatoms) && (!done); i++) {
      MolAtom *a = atom(residueList[endres]->atoms[i]);
      int nbonds = a->bonds;
      for (j=0; j < nbonds; j++) {
        MolAtom *b = atom(a->bondTo[j]);

        if (b->uniq_resid == startres) {
          cyclic=1;
          done=1;
          break;
        }
      }  
    }

    // record whether this fragment is cyclic or not
    subfragCyclic->append(cyclic);
  }
}


// this adds the current residue type to the *subfragList,
// this finds the residue connected to the endatom atom type
// and calls this function recursively on that residue
// this will NOT work across NORMAL bonds
void BaseMolecule::find_connected_subfragment(int resnum, int fragnum, 
         char *flgs, int endatom,  int altendatom, 
         int alt2endatom, int alt3endatom,
         int restype, 
         ResizeArray<Fragment *> *subfragList)
{
  if (flgs[resnum] || residueList[resnum]->residueType != restype) 
      return;  // been here before, or this is no good
  (*subfragList)[fragnum]->append(resnum);    // add to the list
  flgs[resnum] = TRUE;                        // and prevent repeats

  // find the atom in this residue with the right type
  int i, j, nextres;
  MolAtom *a;
  for (i=residueList[resnum]->atoms.num() - 1; i>=0; i--) {
    a = atom(residueList[resnum]->atoms[i]);
    if (a->nameindex == endatom ||
        a->nameindex == altendatom ||
        a->nameindex == alt2endatom ||
        a->nameindex == alt3endatom) {   // found the end atom
      for (j=a->bonds-1; j>=0; j--) {    // look at the bonds
        // I can't look at if the residue "bond" is a PRO-PRO or NUC-NUC, since
        // that won't tell me if the atom involved is the endatom atom
        // This is important because I need to avoid things like S-S bonds
        // (note that I never checked that the end was bonded to a start on
        //  the next residue! - c'est la vie, or something like that
        if ((!(a->atomType == ATOMNORMAL && atom(a->bondTo[j])->atomType == ATOMNORMAL)) && // not backbone 
            (nextres = atom(a->bondTo[j])->uniq_resid) != resnum &&
            !flgs[nextres] ) { // found next residue, and unvisited
          find_connected_subfragment(nextres, fragnum, flgs, endatom,
              altendatom, alt2endatom, alt3endatom, restype, subfragList);
          return; // only find one; assume no branching
        }
      } // end of for
    } // end of found correct endtype
  } // searching atoms
} // end of finding connected subfragment


// find a class of fragments, and add them to the subfragment list
void BaseMolecule::find_subfragments(int startatom, 
          int altstartatom, int alt2startatom,
          int endatom, int altendatom, int alt2endatom, int alt3endatom,
          int restype, ResizeArray<Fragment *> *subfragList)
{
  int i, j, k;
  MolAtom *a;
  char *flgs = new char[residueList.num()];
  memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags

  // Loop over all residues looking for candidate residues that start
  // a fragment.  A fragment starting residue must be an unvisited 
  // residue which has an startatom with no off residue bond to 
  // the same restype
  for (i=residueList.num()-1; i>=0; i--) {
    // test for previous visit, and whether it's the restype we want
    if (!flgs[i] && residueList[i]->residueType == restype) {
      // does this residue have a matching startatom
      for (j=residueList[i]->atoms.num()-1; j>=0; j--) { 
        int satom = (a=atom(residueList[i]->atoms[j]))->nameindex;
        if (satom == startatom || 
            satom == altstartatom || 
            satom == alt2startatom){
          for (k=a->bonds-1; k>=0; k--) {
            MolAtom *bondto = atom(a->bondTo[k]);
            // are there any off-residue bonds to the same restype
            if (bondto->uniq_resid != i && bondto->residueType == restype) {
              break; // if so then stop, so that k>=0
            }
          }

          // if we found a valid fragment start atom, find residues downchain
          if (k<0) { 
            subfragList->append(new Fragment);
            find_connected_subfragment(i, subfragList->num()-1, flgs, 
                  endatom, altendatom, alt2endatom, alt3endatom,
                  restype, subfragList);
          } // found starting residue
        } // found startatom
      } // going through atoms
    } // found restype
  } // going through residues

  // found 'em all
  delete [] flgs;
} 


// find a class of fragments, and add them to the subfragment list
void BaseMolecule::find_subfragments_topologically(int restype, 
  ResizeArray<Fragment *> *subfragList, 
  int endatom, int altendatom, int alt2endatom, int alt3endatom) {
  int i; 
  char *flgs = new char[residueList.num()];
  memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
  int numres = residueList.num();

  // Loop over all residues looking for candidate residues that start
  // a fragment.  A fragment starting residue must be an unvisited
  // residue which has an startatom with no off residue bond to
  // the same restype
  for (i=0; i<numres; i++) {
    Residue *res = residueList[i];

    // test for previous visit, and whether it's the restype we want
    if (!flgs[i] && res->residueType == restype) {
      // if this residue only has 1 bond to a residue of the same restype
      // it must be a terminal residue
      int offresbondcount = 0;
#if defined(MAXRESIDUEBONDS)
      int j;
      for (j=res->bonds-1; j>=0; j--) {
        int resbondto = res->bondTo[j];
        if (residueList[resbondto]->residueType == restype)
          offresbondcount++;
      }
#else
      int j, k;
      int numatoms = res->atoms.num();
      for (j=0; j<numatoms; j++) {
        MolAtom *a = atom(res->atoms[j]);

        // find off-residue bonds
        for (k=0; k<a->bonds; k++) {
          MolAtom *b = atom(a->bondTo[k]);
          if (b->uniq_resid != i && 
              residueList[b->uniq_resid]->residueType == restype) {
            offresbondcount++;
          }
        }
      }
#endif

      // if we found a valid fragment start atom, find residues downchain
      if (offresbondcount == 1) {
        subfragList->append(new Fragment);
        find_connected_subfragment(i, subfragList->num()-1, flgs,
              endatom, altendatom, alt2endatom, alt3endatom,
              restype, subfragList);
      }
    } // found restype
  } // going through residues

  // found 'em all
  delete [] flgs;
}




#if defined(VMDWITHCARBS)

// Routines for detecting carbohydrate rings
// contributed by Simon Cross and Michelle Kuttel
// XXX TODO: 
//   need to check use of STL Vector for portability, and replace with our
//   own if necessary.
//
// still to add:
//   rings* find_rings_with_edge(int edge)  
//   ring* orientate_ring(*ring)

// find all loops less than a given size
int BaseMolecule::find_small_rings(int maxringsize) {
  int n_back_edges, n_rings, i;
  SmallRing *sr;
  ResizeArray<int> back_edge_src, back_edge_dest;
    
  n_back_edges = find_back_edges(back_edge_src, back_edge_dest);

#if 0
  msgInfo << "  BACK EDGES: " << n_back_edges << sendmsg;
  for (i=0; i < n_back_edges; i++) {
    msgInfo << "       SRC:" << back_edge_src[i] << ", DST:" << back_edge_dest[i] << sendmsg;
  }
#endif

  n_rings = find_small_rings_from_back_edges(maxringsize, back_edge_src, back_edge_dest);

#if 0
  msgInfo << " SMALL RINGS: " << n_rings << sendmsg;
  for (i=0; i < n_rings; i++) {
    sr = smallringList[i];
    msgInfo << "    RING: " << *sr << sendmsg;
  }
#endif

  return n_rings; // number of rings found
}


// find the back edges of an arbitrary spanning tree (or set of trees if the molecule is disconnected)
int BaseMolecule::find_back_edges(ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
  int i;
  int n_back_edges = 0;  
  int *intree_flags = new int[nAtoms];
  int *intree_parents = new int[nAtoms];
  memset(intree_flags, 0, nAtoms * sizeof(int));  // clear flags
  memset(intree_parents, 0, nAtoms * sizeof(int));  // clear parents
      
  for (i=0; i<nAtoms; i++) {
    if (intree_flags[i] == 0) {  // not been visited
      n_back_edges += find_connected_subgraph_back_edges(i,back_edge_src,back_edge_dest,intree_flags,intree_parents);
    }
  }

  delete [] intree_flags; 
  delete [] intree_parents;
 
  return n_back_edges;
}


// find the back edges of a spanning tree (for a connected portion of the molecule)
int BaseMolecule::find_connected_subgraph_back_edges(int atomid, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest,
                                                     int *intree_flags, int *intree_parents) {
  int i, n_new_back_edges, cur_atom_id, child_atom_id, parent_atom_id;
  MolAtom *curatom;
  std::vector<int> node_queue;
  
  node_queue.push_back(atomid);
  intree_flags[atomid] = 1;
  n_new_back_edges = 0;

  while (node_queue.size() > 0) {
    cur_atom_id = node_queue[0];
    node_queue.erase(node_queue.begin());
    
    curatom = atom(cur_atom_id);
    parent_atom_id = intree_parents[cur_atom_id];
    
    for(i=0;i<curatom->bonds;i++) {
      child_atom_id = curatom->bondTo[i];
      if (intree_flags[child_atom_id]) {
        // back-edge found
        if ((child_atom_id != parent_atom_id) && (child_atom_id > cur_atom_id)) {
            // we ignore edges back to the parent
            // and only add each back edge once
            // (it'll crop up twice since each bond is listed on both atoms
            back_edge_src.append(cur_atom_id);
            back_edge_dest.append(child_atom_id);
            n_new_back_edges++;
        }
      } else {
        // extended tree
        intree_flags[child_atom_id] = 1;
        node_queue.push_back(child_atom_id);
      }
    }
    intree_parents[child_atom_id] = cur_atom_id;
  }

  return n_new_back_edges;
}


// find rings smaller than maxringsize given list of back edges
int BaseMolecule::find_small_rings_from_back_edges(int maxringsize, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
    int i, key;
    int n_rings = 0;
    int n_back_edges = back_edge_src.num();
    SmallRing *ring;
    inthash_t *used_edges = new inthash_t; // back edges which have been dealt with 
    inthash_t *used_atoms = new inthash_t; // atoms (other than the first) which are used in the current path (i.e. possible loop)
    inthash_init(used_edges,n_back_edges);
    inthash_init(used_atoms,maxringsize);
    
    for(i=0;i<n_back_edges;i++) {
      ring = new SmallRing();
      ring->append(back_edge_src[i]);
      ring->append(back_edge_dest[i]);
      
      n_rings += find_small_rings_from_partial(ring,maxringsize,used_edges,used_atoms);
      delete ring;
      
      key = get_edge_key(back_edge_src[i],back_edge_dest[i]);
      inthash_insert(used_edges,key,1);
    }
    
    inthash_destroy(used_edges);
    delete used_edges;
    inthash_destroy(used_atoms);
    delete used_atoms;

    return n_rings;
}


// find rings smaller than maxringsize from the given partial ring (don't reuse used_edges)
int BaseMolecule::find_small_rings_from_partial(SmallRing *ring, int maxringsize, inthash_t *used_edges, inthash_t *used_atoms) {
    int i, cur_atom_id, child_atom_id, bond_key;
    int n_rings = 0;
    MolAtom *curatom;
    
    cur_atom_id = ring->last_atom();
    curatom = atom(cur_atom_id);

    for(i=0;i<curatom->bonds;i++) {
      child_atom_id = curatom->bondTo[i];

      // check that this is not an edge immediately back to the previous atom
      if (child_atom_id == ring->atoms[ring->atoms.num()-2]) continue;
      
      // check that this is not an atom we've included
      // (an exception is the first atom, which we're allowed to try add, obviously :)
      if (inthash_lookup(used_atoms, child_atom_id) != HASH_FAIL) continue;
      
      // check is this not a back edge which has already been used
      bond_key = get_edge_key(cur_atom_id,child_atom_id);
      if (inthash_lookup(used_edges, bond_key) != HASH_FAIL) continue;
      
      // Try appending child atom, if this closes the ring add it to smallringList, else go deeper
      ring->append(child_atom_id);
      inthash_insert(used_atoms,child_atom_id,1);
      
      if (ring->closed()) {
          ring->remove_last();
          inthash_delete(used_atoms,child_atom_id);
          smallringList.append(ring->copy());
          n_rings += 1;
          continue;
      }

      // XXX recursion, will need to change this to an iterative 
      // implementation so that large structures don't crash.
      if (ring->num() < maxringsize) {
         n_rings += find_small_rings_from_partial(ring, maxringsize, used_edges, used_atoms);
      }
      
      ring->remove_last();
      inthash_delete(used_atoms,child_atom_id);
    }
    
    return n_rings;
}


// construct edge key
int BaseMolecule::get_edge_key(int edge_src, int edge_dest) {
    int t;
    if (edge_dest > edge_src) {
       t = edge_src;
       edge_src = edge_dest;
       edge_dest = t;
    }
    return edge_src * nAtoms + edge_dest;
}

#endif  // end of carbohydrate related stuff



void BaseMolecule::add_volume_data(const char *name, const float *o,
    const float *xa, const float *ya, const float *za, int x, int y, int z,
    float *data) {
  msgInfo << "Analyzing Volume..." << sendmsg;

  VolumetricData *vdata = new VolumetricData(name, o, xa, ya, za,
                                             x, y, z, data);
  
  // Print out grid size along with memory use for the grid itself plus
  // the memory required for the volume gradients (4x the scalar grid memory)
  // Color texture maps require another 0.75x the original scalar grid size.
  msgInfo << "   Grid size: " << x << "x" << y << "x" << z << "  (" 
          << (int) (4 * (x*y*z * sizeof(float)) / (1024.0 * 1024.0)) << " MB)" 
          << sendmsg;

  msgInfo << "   Total voxels: " << x*y*z << sendmsg;

  msgInfo << "   Min: " << vdata->datamin << "  Max: " << vdata->datamax 
          << "  Range: " << (vdata->datamax - vdata->datamin) << sendmsg;

  msgInfo << "   Computing volume gradient map for smooth shading" << sendmsg;

  vdata->compute_volume_gradient(); // calc gradients for smooth vertex normals

  volumeList.append(vdata);

  msgInfo << "Added volume data, name=" << vdata->name << sendmsg;
}

int BaseMolecule::num_volume_data() {
  return volumeList.num();
}

const VolumetricData *BaseMolecule::get_volume_data(int id) {
  if (id >= 0 && id < volumeList.num())
    return volumeList[id];
  return NULL;
}
