/***************************************************************************
 *cr                                                                       
 *cr            (C) Copyright 1995-2007 The Board of Trustees of the           
 *cr                        University of Illinois                       
 *cr                         All Rights Reserved                        
 *cr                                                                   
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *	$RCSfile: BaseMolecule.h,v $
 *	$Author: johns $	$Locker:  $		$State: Exp $
 *	$Revision: 1.106 $	$Date: 2007/02/20 20:07:20 $
 *
 ***************************************************************************
 * DESCRIPTION:
 *
 * Base class for all molecules, without display-specific information.  This
 * portion of a molecule contains the structural data, and all routines to
 * find the structure (backbone, residues, etc).  It contains the
 * animation list as well.
 *
 ***************************************************************************/
#ifndef BASEMOLECULE_H
#define BASEMOLECULE_H

#ifndef NAMELIST_TEMPLATE_H
#include "NameList.h"
#endif
#ifndef RESIZEARRAY_TEMPLATE_H
#include "ResizeArray.h"
#endif
#include "Atom.h"
#include "Animation.h"
#include "Residue.h"
#include "Timestep.h"
#include "Fragment.h"
#include "intstack.h"

#ifdef VMDWITHCARBS
#include <vector>
#include "SmallRing.h"
#include "inthash.h"
#endif

class VolumetricData;

/// Base class for all molecules, without display-specific information.  This
/// portion of a molecule contains the structural data, and all routines to
/// find the structure (backbone, residues, etc).  It contains the
/// animation list as well.
class BaseMolecule {
public:
  //
  // public molecular structure data (for ease of access):
  //
  int nAtoms;                         ///< number of atoms
  int nResidues;                      ///< number of residues
  int nWaters;                        ///< number of waters
  int nSegments;                      ///< number of segments
  int nFragments;                     ///< total number of fragments
  int nProteinFragments;              ///< number of protein fragments
  int nNucleicFragments;              ///< number of nucleic fragments

  NameList<int> atomNames;            ///< list of unique atom names
  NameList<int> atomTypes;            ///< list of unique atom types
  NameList<int> resNames;             ///< list of unique residue names
  NameList<int> chainNames;           ///< list of unique chain names
  NameList<int> segNames;             ///< list of unique segment names
  NameList<int> altlocNames;          ///< list of alternate location names

  ResizeArray<Residue *> residueList; ///< residue connectivity list
  ResizeArray<Fragment *> fragList;   ///< list of connected residues
  ResizeArray<Fragment *> pfragList;  ///< list of connected protein residues
  				      ///< this is a single chain from N to C
  ResizeArray<int> pfragCyclic;       ///< flag indicating cyclic fragment

  ResizeArray<Fragment *> nfragList;  ///< ditto for nuc; from 5' to 3'
  ResizeArray<int> nfragCyclic;       ///< flag indicating cyclic fragment

#ifdef VMDWITHCARBS
  ResizeArray<SmallRing *> smallringList; ///< list of small rings
                                  ///< each ring is a single orientated chain
#endif


  // Extra floating-point data for each atom.
  // If the name is unassigned then the values can be taken to be zero.
  NameList<float *> extra;

  // Interface to standard extra data.  These will exist and be initialized
  // to zero when init_atoms is called.
  float *radius() { return extra.data("radius"); }
  float *mass() { return extra.data("mass");   }
  float *charge() { return extra.data("charge"); }
  float *beta() { return extra.data("beta");   }
  float *occupancy() { return extra.data("occupancy"); }

  /// number of electron pairs, also fractional
  float *bondorders() { return extra.data("bondorders"); }
  void setbondorder(int atom, int bond, float order);
  float getbondorder(int atom, int bond);

  /// has structure information already been loaded for this molecule?
  int has_structure() const { return cur_atom > 0; }

  /// clear the entire bond list for all atoms
  void clear_bonds(void);

  /// return the number of unique bonds in the molecule 
  int count_bonds(void);

private:
  const int ID;          ///< unique mol. ID number

  //
  // molecular structure data:
  //
  int cur_atom;          ///< index of next atom added
  MolAtom *atomList;     ///< atom data
  int lastbonderratomid; ///< last atom id that generated a bonding error msg
  int bonderrorcount;    ///< number of bonds-exceeded errors we've printed
 
  //
  // routines to determine components of molecular structure
  //
 
  /// Stage 1 of structure building.
  /// (a) find_backbone: assign atomType to atoms based on their
  /// backbone type.  This is the only place where these types get assigned.
  /// protein backbone = name CA C O N or (name OT1 OT2 and bondedto backbone)
  /// nucleic backbone = name P O1P O2P O3' C3' C4' C5' O5' O3* C3* C4* C5* O5*
  /// XXX Might be nice for the user to be able to override these definitions.
  int find_backbone(void);

  // find the residues in the molecule; return number found.
  // I look for atoms with the same resid connected to 
  // backbone atoms with the same resid (find enough backbone
  // atoms, then find atoms connected to them)
  int find_connected_backbone(IntStackHandle, int, int, int, int, int *);
  void clean_up_connection(IntStackHandle, int, int, int *);
  void find_connected_atoms_in_resid(IntStackHandle, int, int, 
     int, int, int *);
  void find_and_mark(int, int, int,
     int *, int *);
  int make_uniq_resids(int *flgs); ///< give each residue a uniq resid

  /// Stage 2 of structure building.
  /// Called after atom types and bond types have been assigned to each atom.
  /// (a) Assign uniq_resid to each atom by finding atoms that are bonded to
  ///     each other and have the the same resid string and insertion string.
  /// (b) Assign residueType to each atom by checking for 4 atoms of a given
  ///     atomType in the sets of bonded atoms.  
  int find_residues(void);
  
  /// Find the waters, based on resname, and return number.
  // This should take place after find_residues to keep
  // from mistaking a protein resname as a water resname, maybe
  void find_connected_waters(int i, char *tmp);
  int find_connected_waters2(void);

  /// Stage 2b of structure building.  This is essentially a continuation
  /// of Stage 2.  For atoms that do not yet have a residue type, their resname
  /// is matched against a list of water residue names.  residueType is 
  /// assigned if there is match.
  int find_waters(void);

  /// Stage 3 of structure building: 
  ///   (a) Create new residues.  The residue type is determined by the first
  ///       atom added to the residue.
  ///   (b) assign atoms to residues;
  ///   (c) assign bonds to residues (i.e. which residues are bonded to which)
  void find_connected_residues(int num_residues);
  
  /// find the segments in the molecule; return number found.
  int find_segments(void) { return segNames.num(); }
  
  /// find the connected residues and put the info in fragList
  int find_connected_fragments();

  /// Stage 4 of structure building.
  ///    (a) Create new fragments, and assign residues to them.
  ///    (b) Assign fragment to each atom
  ///    (c) By some inscrutable means (go read the code!), sort fragments
  ///        into protein framgents and nucleic fragments.  I would have 
  ///        thought this would be simple to do based on Residue's residue
  ///        type, but apparently somebody wanted it to be complicated...
  ///        This creates pfragList and nfragList for the molecule.
  int find_fragments(void);

  void find_subfragments_cyclic(ResizeArray<Fragment *> *subfragList, int restype);  
  void find_cyclic_subfragments(ResizeArray<Fragment *> *subfragList, ResizeArray<int> *subfragCyclic);
 
  /// find ordered protein and nucleic subfragments
  void find_connected_subfragment(int resnum, int fragnum, char *flgs, 
         int endatom, int altendatom, int alt2endatom, int alt3endatom,
         int restype, 
         ResizeArray<Fragment *> *subfragList);

  void find_subfragments(int startatom, int altstartatom, int alt2startatom,
    int endatom, int altendatom, int alt2endatom, int alt3endatom,
    int restype, ResizeArray<Fragment *> *subfragList);

  void find_subfragments_topologically(int restype, ResizeArray<Fragment *> *subfragList, int endatom, int altendatom, int alt2endatom, int alt3endatom);

#ifdef VMDWITHCARBS
   /// find small rings
   int find_small_rings(int maxringsize);
   int find_back_edges(ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest);
   int find_connected_subgraph_back_edges(int atomid, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest,
                                          int *intree_flags, int *intree_parents);
   int find_small_rings_from_back_edges(int maxringsize, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest);
   int find_small_rings_from_partial(SmallRing *ring, int maxringsize, inthash_t *used_edges, inthash_t *used_atoms);
   int get_edge_key(int edge_src, int edge_dest);
#endif


protected:
  char *moleculename;  ///< name of the molcule
  int need_find_bonds; ///< whether to compute bonds from the first timestep

public:
  // constructor; just sets most things to null.  Derived classes must put
  // in structure in 'create' routine.  Typical sequence of creating a
  // molecule should be:
  //	mol = new Molecule(....)
  //	( specify parameters for creation )
  //	mol->create();	... return success
  //    mol->analyze(); ... find information about the structure
  BaseMolecule(int);      ///< constructor takes molecule ID
  virtual ~BaseMolecule(void); ///< destructor

  //
  // routines to develop molecular structure
  //
  
  /// Try to set the number of atoms to n.  n must be positive.  May be called
  /// more than once with the same n.  Return success. 
  int init_atoms(int n); 

  /// compute molecule's bonds using distance bond search from 1st timestep
  void find_bonds_from_timestep() { need_find_bonds = 1; }
  void find_unique_bonds_from_timestep() { need_find_bonds = 2; }

  /// add a new atom; return it's index.
  int add_atom(char *name, char *type, int atomicnumber, 
               char *resname, int resid,
	       const char *chainid,const char *segname,
	       char *insertion = (char *) " ", const char *altloc = "");

  /// add a new bond from a to b; return total number of bonds added so far.
  int add_bond(int, int, float, int = ATOMNORMAL);

  /// add a bond after checking for duplicates
  int add_bond_dupcheck(int, int, float);
 
  /// find higher level constructs given the atom/bond information
  // (By this time, the molecule is on the MoleculeList!)
  void analyze(void);

  //
  // query info about the molecule
  //
  int id(void) const { return ID; } ///< return id number of this molecule
  const char *molname() const {return moleculename; } ///< return molecule name

  // Return the Nth atom, residue, and fragment.  All assume correct index 
  // and that the structure has been initialized (for speed).
  MolAtom *atom(int n) { return atomList+n; } ///< return Nth atom
  Residue *residue(int);                      ///< return Nth residue
  Fragment *fragment(int);                    ///< return Nth fragment

  // return the residue or fragment in which the given atom is located.
  Residue *atom_residue(int);       ///< return residue atom is in
  Fragment *atom_fragment(int);     ///< return fragment atom is in

  //@{
  /// find first occurance of an atom name in the residue, returns -3 not found
  int find_atom_in_residue(int atomnameindex, int residue) {
    const ResizeArray<int> &atoms = residueList[residue]->atoms;
    int num = atoms.num();
    for (int i=0; i<num; i++) {
      if (atom(atoms[i])->nameindex == atomnameindex) return atoms[i];
    }
    return -3;
  }

  int find_atom_in_residue(const char *atomname, int residue);
  //@}

  //@{ 
  /// return 'default' charge, mass, occupancy value and beta value
  float default_charge(char *);
  float default_mass(char *);
  float default_radius(char *);
  float default_occup(void) { return 1.0; }
  float default_beta(void) { return 0.0; }
  //@}

  /// add volumetric data to a molecule
  void add_volume_data(const char *name, const float *o, 
    const float *xa, const float *ya, const float *za, int x, int y, int z,
    float *voldata); 

  int num_volume_data(); ///< return number of volume data sets in molecule 
  const VolumetricData *get_volume_data(int); ///< return requested data set
  void compute_volume_gradient(VolumetricData *);  ///< compute negated normalized volume gradient map

protected:
  ResizeArray<VolumetricData *>volumeList;    ///< array of volume data sets
};

// Hydrogen atom name detection macro
#define IS_HYDROGEN(s) (s[0] == 'H' || (isdigit(s[0]) && s[1] == 'H' ))

#endif

