Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

BaseMolecule.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2008 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: BaseMolecule.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.209 $      $Date: 2008/04/28 22:08:46 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Base class for all molecules, without display-specific information.  This
00020  * portion of a molecule contains the structural data, and all routines to
00021  * find the structure (backbone, residues, etc).  It does NOT contain the
00022  * animation list; that is maintained by Molecule (which is derived from
00023  * this class).
00024  *
00025  ***************************************************************************/
00026 
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include "Inform.h"
00031 #include "utilities.h"
00032 #include "intstack.h"
00033 
00034 #include "BaseMolecule.h"
00035 #include "VolumetricData.h"
00036 
00037 #define MAXBONDERRORS 25
00038 
00039 
00040 
00042 
00043 BaseMolecule::BaseMolecule(int myID) : residueList(10) , fragList(1),
00044 #if defined(VMDWITHCARBS)
00045      pfragList(1), nfragList(1), smallringList(), smallringLinkages(), currentMaxRingSize(-1), currentMaxPathLength(-1), ID(myID) {
00046 #else
00047      pfragList(1), nfragList(1), ID(myID) {
00048 #endif
00049 
00050   // initialize all variables
00051   nAtoms = 0;
00052   cur_atom = 0;
00053   atomList = NULL;
00054   moleculename = NULL;
00055   lastbonderratomid=-1;
00056   bonderrorcount=0;
00057 }
00058 
00059 
00061 
00062 BaseMolecule::~BaseMolecule(void) {
00063   int i;
00064 
00065   // delete structural data
00066   delete [] atomList;
00067   for (i=0; i<residueList.num(); i++) {
00068     delete residueList[i];
00069   }
00070   for (i=0; i<nfragList.num(); i++) {
00071     delete nfragList[i];
00072   }
00073 #if defined(VMDFASTRIBBONS)
00074   for (i=0; i<nfragCPList.num(); i++) {
00075     delete [] nfragCPList[i];
00076   }
00077 #endif
00078   for (i=0; i<pfragList.num(); i++) {
00079     delete pfragList[i];
00080   }
00081 #if defined(VMDFASTRIBBONS)
00082   for (i=0; i<pfragCPList.num(); i++) {
00083     delete [] pfragCPList[i];
00084   }
00085 #endif
00086   for (i=0; i<fragList.num(); i++) {
00087     delete fragList[i];
00088   }
00089   for (i=0; i<volumeList.num(); i++) {
00090     delete volumeList[i];
00091   }
00092   for (i=0; i<extra.num(); i++) {
00093     delete [] extra.data(i);
00094   }
00095 }
00096 
00097 
00099 
00100 // initialize the atom list ... should be called before adding any atoms
00101 int BaseMolecule::init_atoms(int n) {
00102 
00103   if(n <= 0) {
00104     msgErr << "BaseMolecule: init_atoms called with invalid number of atoms: "
00105            << n << sendmsg;
00106     return FALSE;
00107   }
00108   if (cur_atom != 0 && nAtoms != n) {
00109     msgErr << "BaseMolecule: attempt to init atoms while structure building in progress!" << sendmsg;
00110     return FALSE;
00111   }
00112 
00113   if (!atomList) {
00114     // first call to init_atoms
00115     nAtoms = n; // only place where nAtoms is set!
00116     atomList = new MolAtom[nAtoms];
00117     memset(atomList, 0, nAtoms*sizeof(MolAtom));
00118 
00119     // initialize NULL extra data field, which is returned when
00120     // querying a non-existent field with extra.data("fielddoesntexist")
00121     extra.add_name("NULL", NULL);
00122 
00123     // initialize "default" extra data fields.
00124     extra.add_name("beta", new float[nAtoms]);
00125     extra.add_name("occupancy", new float[nAtoms]);
00126     extra.add_name("charge", new float[nAtoms]);
00127     extra.add_name("mass", new float[nAtoms]);
00128     extra.add_name("radius", new float[nAtoms]);
00129     for (int i=0; i<extra.num(); i++) {
00130       void *data = extra.data(i);
00131       if (data != NULL) 
00132         memset(data, 0, nAtoms*sizeof(float));
00133     }
00134     return TRUE;
00135   }
00136   if (n != nAtoms) {
00137     msgErr << "The number of atoms in this molecule has already been assigned."
00138            << sendmsg;
00139     return FALSE;
00140   }
00141   return TRUE;
00142 }
00143 
00144 // add a new atom; return it's index, or (-1) if error.
00145 int BaseMolecule::add_atom(char *name, char *atomtype, int atomicnumber, 
00146     char *resname, int resid, const char *chain, const char *segname, 
00147     char *insertion, const char *altloc) {
00148   int nameindex, typeindex;
00149   int resnameindex, segnameindex, altlocindex, chainindex;
00150 
00151   if (!atomList || cur_atom >= nAtoms) {
00152     msgErr << "BaseMolecule: Cannot add new atom; currently " << nAtoms;
00153     msgErr << " atoms in structure." << sendmsg;
00154     return (-1);
00155   }
00156 
00157   // create atom  
00158   MolAtom *newatom = atom(cur_atom);
00159   newatom->init(cur_atom, resid, insertion);
00160 
00161   // add names to namelist, and put indices in MolAtom object
00162   nameindex = atomNames.add_name(name,atomNames.num());
00163   typeindex = atomTypes.add_name(atomtype, atomTypes.num());
00164   resnameindex = resNames.add_name(resname, resNames.num());
00165   segnameindex = segNames.add_name(segname, segNames.num());
00166   altlocindex = altlocNames.add_name(altloc, altlocNames.num());
00167 
00168   // use default of 'X' for chain if not given
00169   if(!chain || ! (*chain) || *chain == ' ')
00170     chainindex = chainNames.add_name("X", chainNames.num());
00171   else
00172     chainindex = chainNames.add_name(chain, chainNames.num());
00173 
00174   // set atom member variables
00175   newatom->nameindex = nameindex;
00176   newatom->typeindex = typeindex;
00177   newatom->atomicnumber = atomicnumber;
00178   newatom->resnameindex = resnameindex;
00179   newatom->segnameindex = segnameindex;
00180   newatom->altlocindex = altlocindex;
00181   newatom->chainindex = chainindex;
00182 
00183   // check for integer overflow/wraparound condition, which can occur
00184   // if an evil plugin defines 100,000 unique atom names, for example
00185   if (newatom->nameindex != nameindex ||
00186       newatom->typeindex != typeindex ||
00187       newatom->atomicnumber != atomicnumber ||
00188       newatom->resnameindex != resnameindex ||
00189       newatom->segnameindex != segnameindex ||
00190       newatom->altlocindex != altlocindex ||
00191       newatom->chainindex != chainindex) {
00192     msgErr << "BaseMolecule: Cannot add atom; namelist index value too large." << sendmsg;
00193     msgErr << "Recompile VMD with larger index types." << sendmsg;
00194     msgErr << "Atom namelist index values at time of overflow:" << sendmsg;
00195     msgErr << "  nameindex: " << nameindex << sendmsg;;
00196     msgErr << "  typeindex: " << typeindex << sendmsg;;
00197     msgErr << "  resnameindex: " << resnameindex << sendmsg;;
00198     msgErr << "  segnameindex: " << segnameindex << sendmsg;;
00199     msgErr << "  altlocindex: " << altlocindex << sendmsg;;
00200     msgErr << "  chainindex: " << chainindex << sendmsg;
00201     return -1;
00202   }
00203 
00204   return cur_atom++;
00205 }
00206 
00207 
00208 // add a new bond; return 0 on success, or (-1) if error.
00209 int BaseMolecule::add_bond(int a, int b, float bondorder, int backbonetype) {
00210   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00211     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00212     return (-1);
00213   } 
00214 
00215   if (a == b) {
00216     msgErr << "BaseMolecule: Cannot bond atom " <<a<< " to itself." << sendmsg;
00217     return (-1);
00218   }
00219 
00220   // put the bond in the atom list
00221   if (atom(a)->add_bond(b, backbonetype)) {
00222     if (bonderrorcount < MAXBONDERRORS) {
00223       if (lastbonderratomid != a) {
00224         msgErr << "MolAtom " << a << ": Exceeded maximum number of bonds ("
00225                << atom(a)->bonds << ")." << sendmsg;
00226         lastbonderratomid=a;
00227         bonderrorcount++;
00228       }
00229     } else if (bonderrorcount == MAXBONDERRORS) {
00230       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00231       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00232       bonderrorcount++;
00233     }
00234     return (-1);
00235   }
00236 
00237   if (atom(b)->add_bond(a, backbonetype)) {
00238     if (bonderrorcount < MAXBONDERRORS) {
00239       if (lastbonderratomid != b) {
00240         msgErr << "MolAtom " << b << ": Exceeded maximum number of bonds ("
00241                << atom(b)->bonds << ")." << sendmsg;
00242         lastbonderratomid=b;
00243         bonderrorcount++;
00244       }
00245     } else if (bonderrorcount == MAXBONDERRORS) {
00246       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00247       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00248       bonderrorcount++;
00249     }
00250     return (-1);
00251   }
00252 
00253   // store bond orders
00254   setbondorder(a, atom(a)->bonds-1, bondorder);
00255   setbondorder(b, atom(b)->bonds-1, bondorder);
00256 
00257   return 0;
00258 }
00259 
00260 // Add a bond to a structure but check to make sure there isn't a 
00261 // duplicate, as may be the case when merging bondlists from a file 
00262 // and from a distance-based bond search
00263 int BaseMolecule::add_bond_dupcheck(int a, int b, float bondorder) {
00264   int i;
00265 
00266   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00267     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00268     return (-1);
00269   }
00270 
00271   MolAtom *atm = atom(a);
00272   int nbonds = atm->bonds;
00273   const int *bonds = &atm->bondTo[0];
00274   for (i=0; i<nbonds; i++) {
00275     if (bonds[i] == b) {
00276       return 0; // skip bond that already exists
00277     }
00278   }
00279   add_bond(a, b, bondorder); // add it if it doesn't already exist
00280 
00281   return 0;
00282 }
00283 
00285 
00286 void BaseMolecule::setbondorder(int atom, int bond, float order) {
00287   float *bondOrders = extra.data("bondorders");
00288 
00289   // if not already there, add it
00290   if (bondOrders == NULL) {
00291     if (order != 1) {
00292       int i;
00293       extra.add_name("bondorders", new float[nAtoms*MAXATOMBONDS]);
00294       bondOrders = extra.data("bondorders");
00295       for (i=0; i<nAtoms*MAXATOMBONDS; i++)
00296         bondOrders[i] = 1.0f;    
00297 
00298       bondOrders[atom * MAXATOMBONDS + bond] = order;
00299     } 
00300     return;
00301   }
00302 
00303   bondOrders[atom * MAXATOMBONDS + bond] = order;
00304 }
00305 
00306 float BaseMolecule::getbondorder(int atom, int bond) {
00307   float *bondOrders = extra.data("bondorders");
00308 
00309   // if not already there, add it
00310   if (bondOrders == NULL) { 
00311     return 1;
00312   }
00313    
00314   return bondOrders[atom * MAXATOMBONDS + bond];
00315 }
00316 
00317 
00318 // return the Nth residue
00319 Residue *BaseMolecule::residue(int n) {
00320   return residueList[n];
00321 }
00322 
00323 
00324 // return the Nth fragment
00325 Fragment *BaseMolecule::fragment(int n) {
00326   return fragList[n];
00327 }
00328 
00329 
00330 // given an atom index, return the residue object for the residue it
00331 // is in.  If it is not in a residue, return NULL.
00332 Residue *BaseMolecule::atom_residue(int n) {
00333   MolAtom *atm = atom(n);
00334   if(atm->uniq_resid < 0)
00335     return NULL;
00336   else
00337     return residue(atm->uniq_resid);
00338 }
00339 
00340 
00341 // given an atom index, return the fragment object for the fragment it
00342 // is in.  If it is not in a fragment, return NULL.
00343 Fragment *BaseMolecule::atom_fragment(int n) {
00344   MolAtom *atm = atom(n);
00345   int frag = residue(atm->uniq_resid)->fragment;
00346   if(frag < 0)
00347     return NULL;
00348   else
00349     return fragment(frag);
00350 }
00351 
00352 // return a 'default' value for a given atom name
00353 float BaseMolecule::default_radius(char *nm) {
00354   float val = 1.5;
00355   // some names start with a number
00356   while (*nm && isdigit(*nm))
00357     nm++;
00358   if(nm) {
00359     switch(toupper(nm[0])) {
00360       // These are similar to the values used by X-PLOR with sigma=0.8
00361       // see page 50 of the X-PLOR 3.1 manual
00362       case 'H' : val = 1.00f; break;
00363       case 'C' : val = 1.50f; break;
00364       case 'N' : val = 1.40f; break;
00365       case 'O' : val = 1.30f; break;
00366       case 'F' : val = 1.20f; break;
00367       case 'S' : val = 1.90f; break;
00368     }
00369   }
00370 
00371   return val;
00372 }
00373 
00374 
00375 // return a 'default' value for a given atom name
00376 float BaseMolecule::default_mass(char *nm) {
00377   float val = 12.0;
00378 
00379   // some names start with a number
00380   while (*nm && isdigit(*nm)) nm++;
00381   if(nm) {
00382     switch(toupper(nm[0])) {
00383       case 'H' : val = 1.00800f; break;
00384       case 'C' : val = 12.01100f; break;
00385       case 'N' : val = 14.00700f; break;
00386       case 'O' : val = 15.99900f; break;
00387       case 'F' : val = 55.84700f; break;
00388       case 'P' : val = 30.97376f; break;
00389       case 'S' : val = 32.06000f; break;
00390     }
00391     if      (toupper(nm[0] == 'C') && toupper(nm[1] == 'L')) val = 35.453f;
00392     else if (toupper(nm[0] == 'N') && toupper(nm[1] == 'A')) val = 22.989770f;
00393     else if (toupper(nm[0] == 'M') && toupper(nm[1] == 'G')) val = 24.3050f;
00394   }
00395 
00396   return val;
00397 }
00398 
00399 
00400 float BaseMolecule::default_charge(char *) {
00401   // return 0 for everything; later, when we put in a more reliable
00402   // system for determining the charge that's user-configurable,
00403   // we can start assigning more realistic charges.
00404   return 0.0f;
00405 }
00406 
00407 
00408 // count the number of unique bonds in the structure
00409 int BaseMolecule::count_bonds(void) {
00410   int i, j;
00411   int count=0;
00412 
00413   for (i=0; i<nAtoms; i++) {
00414     int nbonds = atomList[i].bonds;
00415     const int *bonds = &atomList[i].bondTo[0];
00416 
00417     for (j=0; j<nbonds; j++) {
00418       if (bonds[j] > i)
00419         count++;
00420     }
00421   }
00422 
00423   return count;
00424 }
00425 
00426 void BaseMolecule::clear_bonds(void) {
00427   int i;
00428   for (i=0; i<nAtoms; i++)
00429     atomList[i].bonds = 0;
00430 }
00431 
00432 
00433 // analyze the molecule for more than just the atom/bond information
00434 // This is here since it is called _after_ the molecule is added to
00435 // the MoleculeList.  Thus, there is a Tcl callback to allow the
00436 // user to update the bond information (or other fields?) before
00437 // the actual search.
00438 void BaseMolecule::analyze(void) {
00439   need_find_bonds = 0; // at this point it's too late
00440 
00441   // I have to let 0 atoms in because I want to be able to read things
00442   // like electron density maps, which have no atoms.
00443   // It is kinda wierd, then to make BaseMolecule be at the top of the
00444   // heirarchy.  Oh well.
00445   if(nAtoms < 1)
00446     return;
00447 
00448   // call routines to find different characteristics of the molecule
00449   msgInfo << "Analyzing structure ..." << sendmsg;
00450   msgInfo << "   Atoms: " << nAtoms << sendmsg;
00451 
00452   // count unique bonds
00453   msgInfo << "   Bonds: " << count_bonds() << sendmsg;
00454 
00455   // restore residue and fragment lists to pristine state
00456   residueList.clear();
00457   fragList.clear();
00458   pfragList.clear();   
00459   pfragCyclic.clear(); 
00460 #if defined(VMDFASTRIBBONS)
00461   pfragCPList.clear(); 
00462 #endif
00463   nfragList.clear();   
00464   nfragCyclic.clear(); 
00465 #if defined(VMDFASTRIBBONS)
00466   nfragCPList.clear(); 
00467 #endif
00468 
00469   // assign per-atom backbone types
00470   find_backbone();
00471 
00472   // find all the atoms in a resid connected to DNA/RNA/PROTEIN/WATER
00473   // also, assign a unique resid (uniq_resid) to each atom
00474   nResidues = find_residues();
00475   msgInfo << "   Residues: " << nResidues << sendmsg;
00476 
00477   nWaters = find_waters();
00478   msgInfo << "   Waters: " << nWaters << sendmsg;
00479   
00480   // determine which residues are connected to each other
00481   bonderrorcount=0; // reset error count before residue connectivity search
00482   find_connected_residues(nResidues); 
00483 
00484  
00485   nSegments = find_segments(); 
00486   msgInfo << "   Segments: " << nSegments << sendmsg;
00487 
00488   nFragments = find_fragments();
00489   msgInfo << "   Fragments: " << nFragments;
00490 
00491   nProteinFragments = pfragList.num();
00492   msgInfo << "   Protein: " << nProteinFragments;
00493 
00494   nNucleicFragments = nfragList.num();
00495   msgInfo << "   Nucleic: " << nNucleicFragments << sendmsg;
00496   
00497   // NOTE: The current procedure incorrectly identifies some lipid 
00498   // atoms as "ATOMNUCLEICBACK" (but not as "nucleic") as well
00499   // as some single water oxygens as "backbone". Here, we 
00500   // correct this by setting all atoms of non-polymeric residue types
00501   // to be "ATOMNORMAL" (i.e.: not backbone).
00502   MolAtom *a;
00503   int i;
00504   for (i=0; i<nAtoms; i++) {
00505     a = atom(i); 
00506     if ((a->residueType != RESNUCLEIC) && (a->residueType != RESPROTEIN)) 
00507       a->atomType = ATOMNORMAL;
00508   }
00509 
00510   // Search for hydrogens
00511   // XXX Must be done after the rest of the structure finding routines,
00512   // because those routines assume that anything that isn't NORMAL is
00513   // a backbone atom.
00514   // We use the name-based definition used in the IS_HYDROGEN macro
00515   for (i=0; i<nAtoms; i++) {
00516     MolAtom *a = atom(i);
00517     const char *aname = atomNames.name(a->nameindex);
00518     if (IS_HYDROGEN(aname))
00519       a->atomType = ATOMHYDROGEN;
00520   }
00521 
00522 #if defined(VMDFASTRIBBONS)
00523   calculate_ribbon_controlpoints();
00524 #endif
00525   
00526 }
00527 
00528 
00530 int BaseMolecule::find_backbone(void) {
00531   int i, j, k;
00532 
00533   // Search for the protein backbone
00534   int protypes[4];
00535   protypes[0] = atomNames.typecode((char *) "CA");
00536   protypes[1] = atomNames.typecode((char *) "C");
00537   protypes[2] = atomNames.typecode((char *) "O");
00538   protypes[3] = atomNames.typecode((char *) "N");
00539 
00540   // special case for terminal oxygens that miss the search for O
00541   // by looking for ones connected to a C
00542   int termtypes[4];
00543   termtypes[0] = atomNames.typecode((char *) "OT1"); // standard names
00544   termtypes[1] = atomNames.typecode((char *) "OT2");
00545   termtypes[2] = atomNames.typecode((char *) "O1");  // gromacs force field 
00546   termtypes[3] = atomNames.typecode((char *) "O2");  // atom names
00547 
00548   // search for the DNA/RNA backbone;  the atom names are:
00549   // for the phosphate:  P, O1P, O2P, OP1, OP2
00550   // for the rest: O3', C3', C4', C5', O5'
00551   // (or O3*, C3*, C4*, C5*, O5*)
00552   int nuctypes[15];
00553   nuctypes[ 0] = atomNames.typecode((char *) "P");
00554   nuctypes[ 1] = atomNames.typecode((char *) "O1P"); // old PDB files
00555   nuctypes[ 2] = atomNames.typecode((char *) "O2P"); // old PDB files
00556   nuctypes[ 3] = atomNames.typecode((char *) "OP1"); // new PDB files
00557   nuctypes[ 4] = atomNames.typecode((char *) "OP2"); // new PDB files
00558   nuctypes[ 5] = atomNames.typecode((char *) "C3*");
00559   nuctypes[ 6] = atomNames.typecode((char *) "C3'");
00560   nuctypes[ 7] = atomNames.typecode((char *) "O3*");
00561   nuctypes[ 8] = atomNames.typecode((char *) "O3'");
00562   nuctypes[ 9] = atomNames.typecode((char *) "C4*");
00563   nuctypes[10] = atomNames.typecode((char *) "C4'");
00564   nuctypes[11] = atomNames.typecode((char *) "C5*");
00565   nuctypes[12] = atomNames.typecode((char *) "C5'");
00566   nuctypes[13] = atomNames.typecode((char *) "O5*");
00567   nuctypes[14] = atomNames.typecode((char *) "O5'");
00568 
00569 #if 0
00570   // non-backbone nucleic acid atom names
00571   nuctypes[ 3] = atomNames.typecode((char *) "C1*");
00572   nuctypes[ 4] = atomNames.typecode((char *) "C1'");
00573   nuctypes[ 5] = atomNames.typecode((char *) "C2*");
00574   nuctypes[ 6] = atomNames.typecode((char *) "C2'");
00575   nuctypes[ 7] = atomNames.typecode((char *) "O2*");
00576   nuctypes[ 8] = atomNames.typecode((char *) "O2'");
00577   nuctypes[15] = atomNames.typecode((char *) "O4*");
00578   nuctypes[16] = atomNames.typecode((char *) "O4'");
00579 #endif
00580 
00581   // special case for terminal nucleic residues
00582   int nuctermtypes[2];
00583   nuctermtypes[0] = atomNames.typecode((char *) "H5T"); // standard names
00584   nuctermtypes[1] = atomNames.typecode((char *) "H3T");
00585 
00586 
00587   // loop over all atoms assigning atom backbone type flags
00588   for (i=0; i<nAtoms; i++) {
00589     MolAtom *a = atom(i);
00590  
00591     // initialize atom type to non-backbone
00592     a->atomType = ATOMNORMAL;
00593 
00594     // check for protein backbone atom names
00595     for (j=0; j < 4; j++) {
00596       if (a->nameindex == protypes[j]) {
00597         a->atomType = ATOMPROTEINBACK;
00598         break;
00599       }
00600     }
00601 
00602     // check terminal residue names as well
00603     for (j=0; j < 4; j++) {
00604       if (a->nameindex == termtypes[j]) { // check if OT1, OT2
00605         for (k=0; k < a->bonds; k++) {
00606           if (atom(a->bondTo[k])->atomType == ATOMPROTEINBACK) {
00607             a->atomType = ATOMPROTEINBACK;
00608             break;
00609           }
00610         }
00611       }
00612     }
00613   
00614     // check if in nucleic backbone, if not already set
00615     if(!(a->atomType)) {
00616       for (j=0; j < 13; j++) {
00617         if (a->nameindex == nuctypes[j]) {
00618           a->atomType = ATOMNUCLEICBACK;
00619           break;
00620         }
00621       }
00622     }
00623 
00624     // check if nucleic terminal atom names
00625     for (j=0; j < 2; j++) {
00626       if (a->nameindex == nuctermtypes[j]) {
00627         for (k=0; k < a->bonds; k++) {
00628           if (atom(a->bondTo[k])->atomType == ATOMNUCLEICBACK) {
00629             a->atomType = ATOMNUCLEICBACK;
00630             break;
00631           }
00632         }
00633       }
00634     }
00635   }
00636 
00637   return 0; 
00638 }
00639 
00640 
00641 
00642 // find water molecules based on the residue name
00643 // from the documentation for molscript, these are possible
00644 // waters:
00645 // type H2O HH0 OHH HOH OH2 SOL WAT
00646 // as well, I add TIP, TIP2, TIP3, and TIP4
00647 // The count is the number of sets of connected RESWATERS
00648 int BaseMolecule::find_waters(void) {
00649   int i, j;
00650   MolAtom *a;
00651 
00652   int watertypes[12];
00653   watertypes[0] = resNames.typecode((char *) "H2O");
00654   watertypes[1] = resNames.typecode((char *) "HH0");
00655   watertypes[2] = resNames.typecode((char *) "OHH");
00656   watertypes[3] = resNames.typecode((char *) "HOH");
00657   watertypes[4] = resNames.typecode((char *) "OH2");
00658   watertypes[5] = resNames.typecode((char *) "SOL");
00659   watertypes[6] = resNames.typecode((char *) "WAT");
00660   watertypes[7] = resNames.typecode((char *) "TIP");
00661   watertypes[8] = resNames.typecode((char *) "TIP2");
00662   watertypes[9] = resNames.typecode((char *) "TIP3");
00663   watertypes[10] = resNames.typecode((char *) "TIP4");
00664   // this conflicts with a PDB compound:
00665   //   http://minerva.roca.csic.es/hicup/SPC/spc_pdb.txt
00666   //
00667   // XXX we should add a check to make sure that there are only 
00668   //     three atoms in the residue when all is said and done. If there
00669   //     are more, then we should undo the assignment as a water and mark it
00670   //     as protein etc as appropriate.  This is tricky since at this stage
00671   //     we haven't done any connectivity tests etc, and we're only working
00672   //     with individual atoms.  Perhaps its time to re-think this logic.
00673   watertypes[11] = resNames.typecode((char *) "SPC");
00674 
00675   for (i=0; i<nAtoms; i++) {
00676     a = atom(i);
00677     if (a->residueType == RESNOTHING) {  // make sure it isn't named yet
00678       for (j=0; j<12; j++) {
00679         if (watertypes[j] == a->resnameindex) {
00680           a->residueType = RESWATERS;
00681           break;
00682         }
00683       }
00684     }
00685   }
00686  
00687   int count = find_connected_waters2();
00688 
00689   return count;   
00690 }
00691 
00692 
00693 // if this is a RESWATERS with index idx, mark it and find if
00694 // any of its neighbors are RESWATERS
00695 // this does a depth-first search with RECURSION.
00696 void BaseMolecule::find_connected_waters(int i, char *tmp) {
00697   MolAtom *a = atom(i);
00698   int j;
00699   if (a->residueType == RESWATERS && !tmp[i]) {
00700     tmp[i] = TRUE;
00701     for (j=0; j<a->bonds; j++) {
00702       find_connected_waters(a->bondTo[j], tmp);
00703     }
00704   }
00705 }
00706 
00707 
00708 // if this is a RESWATERS with index idx, mark it and find if
00709 // any of its neighbors are RESWATERS
00710 int BaseMolecule::find_connected_waters2(void) {
00711   MolAtom *a;
00712   int count, i;
00713   IntStackHandle s;
00714 
00715   char *tmp = new char[nAtoms];
00716   memset(tmp, 0, nAtoms * sizeof(char));
00717 
00718   s = intstack_create(nAtoms);
00719 
00720   for (count=0, i=0; i<nAtoms; i++) {
00721     if (atom(i)->residueType == RESWATERS && !tmp[i]) {
00722       int nextatom;
00723 
00724       count++;
00725       intstack_push(s, i);
00726     
00727       // find and mark all connected waters 
00728       while (!intstack_pop(s, &nextatom)) { 
00729         int j;
00730 
00731         a = atom(nextatom);
00732         tmp[nextatom] = TRUE;
00733 
00734         for (j=a->bonds - 1; j>=0; j--) {
00735           int bi = a->bondTo[j];
00736           MolAtom *b = atom(bi);
00737           if (b->residueType == RESWATERS && !tmp[bi])
00738             intstack_push(s, bi);
00739         }
00740       }
00741     }
00742   }
00743 
00744   intstack_destroy(s);
00745   delete [] tmp;
00746 
00747   return count;
00748 }
00749 
00750 
00751 // find n backbone atoms connected together with the given residueid
00752 // return the total count
00753 // this assumes that the given atom (atomidx) is correct
00754 int BaseMolecule::find_connected_backbone(IntStackHandle s, int backbone,
00755                          int atomidx, int residueid, int tmpid, int *flgs) {
00756   if (flgs[atomidx] != 0)
00757     return 0; // already done
00758 
00759   MolAtom *x = atom(atomidx);
00760   if (x->atomType != backbone || x->resid != residueid)
00761     return 0; // not a backbone atom, or resid doesn't match
00762 
00763   intstack_popall(s); // just in case
00764   intstack_push(s, atomidx);
00765   int nextatom;
00766   int count = 0;
00767    
00768   // find and mark connected backbone atoms
00769   while (!intstack_pop(s, &nextatom)) {
00770     MolAtom *a = atom(nextatom);
00771     flgs[nextatom] = tmpid;
00772     count++;
00773 
00774     int j;
00775     for (j=a->bonds - 1; j>=0; j--) {
00776       int bi = a->bondTo[j];
00777       if (flgs[bi] == 0) {
00778         MolAtom *b = atom(bi);
00779 
00780         // skip connections to atoms on different chains/segnames
00781         if (a->chainindex != b->chainindex || 
00782             a->segnameindex != b->segnameindex)
00783           continue;
00784 
00785         if (b->atomType == backbone && b->resid == residueid)
00786           intstack_push(s, bi);
00787       }
00788     }
00789   }
00790 
00791   return count;
00792 }
00793 
00794 
00795 // the find_connected_backbone left terms of flgs which need to be cleaned up
00796 void BaseMolecule::clean_up_connection(IntStackHandle s, int i, int tmpid, int *flgs) {
00797   if (flgs[i] != tmpid)  // been here before
00798     return;
00799 
00800   intstack_popall(s); // just in case
00801   intstack_push(s, i);
00802   int nextatom;
00803  
00804   // find and null out non-matching atom flags
00805   while (!intstack_pop(s, &nextatom)) {
00806     flgs[nextatom] = 0;
00807     MolAtom *a = atom(nextatom);
00808     int j;
00809     for (j=a->bonds - 1; j>=0; j--) {
00810       int bi = a->bondTo[j];
00811       if (flgs[bi] == tmpid) {
00812         intstack_push(s, bi);
00813       }
00814     }
00815   }
00816 }
00817 
00818 
00819 
00820 // now that I know this resid is okay, mark it so
00821 void BaseMolecule::find_connected_atoms_in_resid(IntStackHandle s,
00822     int restype, int i, int residueid, int tmpid, int *flgs)
00823 {
00824   if (flgs[i] != 0 || atom(i)->resid != residueid)
00825     return;
00826 
00827   intstack_popall(s); // just in case
00828   intstack_push(s, i);
00829   int nextatom;
00830 
00831   // find and mark all connected residues in the same chain/segname
00832   while (!intstack_pop(s, &nextatom)) {
00833     flgs[nextatom] = tmpid;
00834     MolAtom *a = atom(nextatom);
00835     a->residueType = restype;
00836 
00837     int j;
00838     for (j=a->bonds - 1; j>=0; j--) {
00839       int bi = a->bondTo[j];
00840       MolAtom *b = atom(bi);
00841       if (flgs[bi] == 0 &&
00842           a->chainindex == b->chainindex &&
00843           a->segnameindex == b->segnameindex &&
00844           b->resid == residueid) {
00845         intstack_push(s, bi);
00846       }
00847     }
00848   }
00849 }
00850 
00851 
00852 
00853 // Find connected backbone atoms with the same resid
00854 // if found, find all the atoms with the same resid
00855 // which are connected to those backbone atoms only through
00856 // atoms of the same resid
00857 void BaseMolecule::find_and_mark(int n, int backbone,
00858   int restype, int *tmpid, int *flgs)
00859 {
00860   int i;
00861   MolAtom *a;
00862   int residueid; // the real resid
00863   IntStackHandle s = intstack_create(nAtoms);
00864 
00865   for (i=0; i<nAtoms; i++) {
00866     a = atom(i);   // look for a new backbone atom
00867     if (a->atomType == backbone && flgs[i] == 0) {
00868       residueid = a->resid;
00869       if (find_connected_backbone(s, backbone, i, residueid, *tmpid, flgs) >= n) {
00870         // if find was successful, start all over again
00871         clean_up_connection(s, i, *tmpid, flgs);
00872         // but mark all the Atoms connected to here
00873         find_connected_atoms_in_resid(s, restype, i, residueid, *tmpid, flgs);
00874         // and one more was made
00875         (*tmpid)++;
00876       } else {
00877         // clean things up so I won't have problems later
00878         clean_up_connection(s, i, *tmpid, flgs);
00879       }
00880     }
00881   }
00882 
00883   intstack_destroy(s);
00884 }
00885 
00886 
00887 
00888 // assign a uniq resid (uniq_resid) to each set of connected atoms
00889 // with the same residue id.  There could be many residues with the
00890 // same id, but not connected (the SSN problem - SSNs are not unique
00891 // so don't use them as the primary key)
00892 int BaseMolecule::make_uniq_resids(int *flgs) {
00893   int i;
00894   int num_residues = 0;
00895   IntStackHandle s = intstack_create(nAtoms);
00896 
00897   for (i=0; i<nAtoms; i++) {
00898     if (!flgs[i]) {  // not been numbered
00899       // find connected atoms to i with the same resid and label
00900       // it with the uniq_resid
00901       MolAtom *a = atom(i);
00902       int resid = a->resid;
00903       char *insertion = a->insertionstr;
00904 
00905       intstack_push(s, i);
00906       int nextatom;
00907 
00908       // Loop over all atoms we're bonded to in the same chain/segname
00909       while (!intstack_pop(s, &nextatom)) {
00910         MolAtom *a = atom(nextatom);
00911         a->uniq_resid = num_residues;  // give it the new resid number
00912         flgs[nextatom] = TRUE;         // mark this atom done
00913   
00914         int j;
00915         for (j=a->bonds - 1; j>=0; j--) {
00916           int bi = a->bondTo[j];
00917           if (flgs[bi] == 0) {
00918             MolAtom *b = atom(bi);
00919             if (a->chainindex == b->chainindex && 
00920                 a->segnameindex == b->segnameindex &&
00921                 b->resid == resid && !strcmp(b->insertionstr, insertion))
00922               intstack_push(s, bi);
00923           }
00924         }
00925       }
00926 
00927       num_residues++;
00928     }
00929   }
00930 
00931   intstack_destroy(s);
00932 
00933   return num_residues;
00934 }
00935 
00936 
00937 
00938 int BaseMolecule::find_residues(void) {
00939   int *flgs = new int[nAtoms]; // flags used for connected atom searches
00940   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
00941   
00942   // assign a uniq resid (uniq_resid) to each set of connected atoms
00943   // with the same residue id.  There could be many residues with the
00944   // same id, but not connected (the SSN problem - SSNs are not unique
00945   // so don't use them as the primary key)
00946   int num_residues = make_uniq_resids(flgs);
00947    
00948   int back_res_count = 1; // tmp count of number of residues on some backbone
00949   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
00950   
00951   //  hunt for the proteins
00952   // there must be 4 PROTEINBACK atoms connected and with the same resid
00953   // then all connected atoms will be marked as PROTEIN atoms
00954   // this gets everything except the terminals
00955   find_and_mark(4, ATOMPROTEINBACK, RESPROTEIN, &back_res_count, flgs);
00956   
00957   // do the same for nucleic acids
00958   // XXX we might not want to check for the phosphate (P and 2 O's).  Here's
00959   // the quick way I can almost do that.  Unfortionately, that
00960   // messes up nfragList, since it needs a P to detect an end
00961   find_and_mark(4, ATOMNUCLEICBACK, RESNUCLEIC, &back_res_count, flgs);
00962   
00963   delete [] flgs;
00964   return num_residues;
00965 }
00966 
00967 int BaseMolecule::find_atom_in_residue(const char *name, int residue) {
00968   int nametype = atomNames.typecode(name);
00969   if (nametype < 0)
00970     return -2;
00971 
00972   return find_atom_in_residue(nametype, residue);
00973 }
00974 
00975 
00976 // find which residues are connected to which
00977 // remember, I already have the uniq_id for each atom
00978 void BaseMolecule::find_connected_residues(int num_residues) {
00979   int i, j;
00980   for (i=0; i<num_residues; i++) {   // init the list to NULLs
00981     residueList.append(NULL);
00982   }
00983   
00984   for (i=nAtoms-1; i>=0; i--) {      // go through all the atoms
00985     j = atom(i)->uniq_resid;
00986     if (residueList[j] == NULL) {    // then init the residue
00987       residueList[j] = new Residue(atom(i)->resid, atom(i)->residueType);
00988     }
00989     // Tell the residue that this atom is in it
00990     residueList[j]->add_atom(i);
00991   }
00992 
00993   // double check that everything was created
00994   for (i=0; i<num_residues; i++) {
00995     if (residueList[i] == NULL) { // no atom was found for this residue
00996       msgErr << "Mysterious residue creation in ";
00997       msgErr << "BaseMolecule::find_connected_residues." << sendmsg;
00998       residueList[i] = new Residue((int) -1, RESNOTHING);
00999     }
01000   }
01001 
01002   // finally, check for unusual connections between residues, e.g. between
01003   // protein and water.
01004   for (i=0; i<num_residues; i++) {
01005     Residue *res = residueList[i];
01006     int bondfromtype = res->residueType;
01007     int numatoms = res->atoms.num();
01008     for (j=0; j<numatoms; j++) {
01009       MolAtom *a = atom(res->atoms[j]);
01010 
01011       // find off-residue bonds to residues of the same chain/segname
01012       int k;
01013       for (k=0; k<a->bonds; k++) {
01014         MolAtom *b = atom(a->bondTo[k]);
01015 
01016         // skip connections to atoms on different chains/segnames
01017         if (a->chainindex != b->chainindex || 
01018             a->segnameindex != b->segnameindex)
01019           continue;
01020          
01021         if (b->uniq_resid != i) {
01022           int bondtotype = residueList[b->uniq_resid]->residueType;
01023 
01024           if (bondfromtype != bondtotype) {
01025             if (i < b->uniq_resid) { // so that we only warn once
01026               msgWarn << "Unusual bond between residues:  ";
01027               msgWarn << residueList[i]->resid;
01028               switch (bondfromtype) {
01029                 case RESPROTEIN: msgWarn << " (protein)"; break;
01030                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01031                 case RESWATERS:  msgWarn << " (waters)"; break;
01032                 default:
01033                 case RESNOTHING: msgWarn << " (none)"; break;
01034               }
01035               msgWarn << " and ";
01036               msgWarn << residueList[b->uniq_resid]->resid;
01037               switch (bondtotype) {
01038                 case RESPROTEIN: msgWarn << " (protein)"; break;
01039                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01040                 case RESWATERS:  msgWarn << " (waters)"; break;
01041                 default:
01042                 case RESNOTHING: msgWarn << " (none)"; break;
01043               }
01044               msgWarn << sendmsg;
01045             }
01046           }
01047         }
01048       }
01049     }
01050   }
01051 }
01052 
01053 
01054 // find all the residues connected to a specific residue
01055 int BaseMolecule::find_connected_fragments(void) {
01056   int i;
01057   int count = 0;
01058   char *flgs = new char[residueList.num()]; // set up temp space
01059   memset(flgs, 0, residueList.num() * sizeof(char)); // clear flags
01060   IntStackHandle s = intstack_create(residueList.num());
01061 
01062   int atomsg = atomNames.typecode((char *) "SG"); // to find disulfide bonds
01063 
01064   int nextres;
01065   for (i=0; i<residueList.num(); i++) { // find unmarked fragment
01066     if (!flgs[i]) {
01067       fragList.append(new Fragment);
01068       intstack_push(s, i);
01069 
01070       // find and mark all connected residues with the same chain/segname
01071       while (!intstack_pop(s, &nextres)) {
01072         fragList[count]->append(nextres);
01073         Residue *res = residueList[nextres];
01074         res->fragment = count; // store residue's fragment
01075 
01076         int numatoms = res->atoms.num();
01077         int j;
01078         for (j=0; j<numatoms; j++) {
01079           MolAtom *a = atom(res->atoms[j]);
01080 
01081           // find all bonds to residues of the same chain/segname 
01082           int k;
01083           for (k=0; k<a->bonds; k++) {
01084             MolAtom *b = atom(a->bondTo[k]);
01085             int ri = b->uniq_resid;
01086 
01087             // skip connections to residues with different chains/segnames,
01088             // and don't follow disulfide bonds, as we want the order of
01089             // residue traversal to be correct so we can use it to build
01090             // subfragment lists later on
01091             if ((ri != i) &&
01092                 (flgs[ri] == 0) &&
01093                 (a->chainindex == b->chainindex) &&
01094                 (a->segnameindex == b->segnameindex) &&
01095                 ((a->nameindex != atomsg) || (b->nameindex != atomsg))) {
01096               flgs[ri] = TRUE;
01097               intstack_push(s, ri);
01098             }
01099           }
01100         }
01101       }
01102 
01103       count++;
01104     }
01105   }
01106 
01107   intstack_destroy(s);
01108   delete [] flgs;
01109 
01110   return count;
01111 }
01112 
01113 
01114 // find each collection of connected fragments
01115 int BaseMolecule::find_fragments(void) {
01116   int count = find_connected_fragments();  // find and mark its neighbors
01117 
01118 #if 1
01119   // find the protein subfragments
01120   find_subfragments(atomNames.typecode((char *) "N"), 
01121      -1,
01122      -1,
01123      atomNames.typecode((char *) "C"), 
01124      -1,
01125      -1,
01126      -1,
01127      RESPROTEIN, &pfragList);
01128 
01129 #if 0
01130   // find the nucleic acid subfragments
01131   find_subfragments(atomNames.typecode((char *) "P"), 
01132      atomNames.typecode((char *) "H5T"),
01133      -1,
01134      atomNames.typecode((char *) "O3'"),
01135      atomNames.typecode((char *) "O3*"),
01136      atomNames.typecode((char *) "H3T"),
01137      -1,
01138      RESNUCLEIC, &nfragList);
01139 #else
01140   // find the nucleic acid subfragments
01141   find_subfragments_topologically(
01142      RESNUCLEIC, &nfragList,
01143      atomNames.typecode((char *) "O3'"),
01144      atomNames.typecode((char *) "O3*"),
01145      atomNames.typecode((char *) "H3T"),
01146      -1);
01147 #endif
01148 #else
01149   find_subfragments_cyclic(&pfragList, RESPROTEIN);
01150   find_subfragments_cyclic(&nfragList, RESNUCLEIC);
01151 #endif
01152 
01153   // determine whether fragments are cyclic or not
01154   find_cyclic_subfragments(&pfragList, &pfragCyclic);
01155   find_cyclic_subfragments(&nfragList, &nfragCyclic);
01156 
01157   return count;
01158 }
01159 
01160 
01161 void BaseMolecule::find_subfragments_cyclic(ResizeArray<Fragment *> *subfragList, int restype) {
01162   int numfrags = fragList.num();
01163   int i, frag;
01164 
01165   // test each fragment to see if it's a candidate for the subfraglist
01166   for (frag=0; frag<numfrags; frag++) {
01167     int numres = fragList[frag]->num();       // residues in this frag
01168     int match=1; // start true, and falsify
01169 
01170     // check each residue to see they are all the right restype
01171     for (i=0; i<numres; i++) {
01172       int resid = (*fragList[frag])[i];
01173       if (residueList[resid]->residueType != restype) {
01174         match=0;
01175         break;
01176       }
01177     }
01178 
01179     // if we found a matching fragment, add it to the subfraglist
01180     if (match) {
01181       Fragment *frg = new Fragment;
01182 
01183       // add all of the residues for this fragment to the subfraglist
01184       for (i=0; i<numres; i++) {
01185         int resid = (*fragList[frag])[i];
01186         frg->append(resid);
01187       } 
01188 
01189       subfragList->append(frg);
01190     }    
01191   }
01192 }
01193 
01194 
01195 
01196 void BaseMolecule::find_cyclic_subfragments(ResizeArray<Fragment *> *subfragList, ResizeArray<int> *subfragCyclic) {
01197   int i, j, frag;
01198   int numfrags = subfragList->num();
01199 
01200   // check each fragment for cycles
01201   for (frag=0; frag<numfrags; frag++) {
01202     int numres   = (*subfragList)[frag]->num();       // residues in this frag
01203 
01204     // skip testing fragments containing zero residues
01205     if (numres < 1)
01206       continue;
01207 
01208     int startres = (*(*subfragList)[frag])[0];        // first residue
01209     int endres   = (*(*subfragList)[frag])[numres-1]; // last residue
01210     int cyclic   = 0;
01211 
01212     // check for bonds between startres and endres
01213     int numatoms = residueList[endres]->atoms.num();
01214     int done = 0;
01215     for (i=0; (i < numatoms) && (!done); i++) {
01216       MolAtom *a = atom(residueList[endres]->atoms[i]);
01217       int nbonds = a->bonds;
01218       for (j=0; j < nbonds; j++) {
01219         MolAtom *b = atom(a->bondTo[j]);
01220 
01221         if (b->uniq_resid == startres) {
01222           cyclic=1;
01223           done=1;
01224           break;
01225         }
01226       }  
01227     }
01228 
01229     // record whether this fragment is cyclic or not
01230     subfragCyclic->append(cyclic);
01231   }
01232 }
01233 
01234 
01235 // this adds the current residue type to the *subfragList,
01236 // this finds the residue connected to the endatom atom type
01237 // and calls this function recursively on that residue
01238 // this will NOT work across NORMAL bonds
01239 void BaseMolecule::find_connected_subfragment(int resnum, int fragnum, 
01240          char *flgs, int endatom,  int altendatom, 
01241          int alt2endatom, int alt3endatom,
01242          int restype, 
01243          ResizeArray<Fragment *> *subfragList)
01244 {
01245   if (flgs[resnum] || residueList[resnum]->residueType != restype) 
01246       return;  // been here before, or this is no good
01247   (*subfragList)[fragnum]->append(resnum);    // add to the list
01248   flgs[resnum] = TRUE;                        // and prevent repeats
01249 
01250   // find the atom in this residue with the right type
01251   int i, j, nextres;
01252   MolAtom *a;
01253   for (i=residueList[resnum]->atoms.num() - 1; i>=0; i--) {
01254     a = atom(residueList[resnum]->atoms[i]);
01255     if (a->nameindex == endatom ||
01256         a->nameindex == altendatom ||
01257         a->nameindex == alt2endatom ||
01258         a->nameindex == alt3endatom) {   // found the end atom
01259       for (j=a->bonds-1; j>=0; j--) {    // look at the bonds
01260         // I can't look at if the residue "bond" is a PRO-PRO or NUC-NUC, since
01261         // that won't tell me if the atom involved is the endatom atom
01262         // This is important because I need to avoid things like S-S bonds
01263         // (note that I never checked that the end was bonded to a start on
01264         //  the next residue! - c'est la vie, or something like that
01265         if ((!(a->atomType == ATOMNORMAL && atom(a->bondTo[j])->atomType == ATOMNORMAL)) && // not backbone 
01266             (nextres = atom(a->bondTo[j])->uniq_resid) != resnum &&
01267             !flgs[nextres] ) { // found next residue, and unvisited
01268           find_connected_subfragment(nextres, fragnum, flgs, endatom,
01269               altendatom, alt2endatom, alt3endatom, restype, subfragList);
01270           return; // only find one; assume no branching
01271         }
01272       } // end of for
01273     } // end of found correct endtype
01274   } // searching atoms
01275 } // end of finding connected subfragment
01276 
01277 
01278 // find a class of fragments, and add them to the subfragment list
01279 void BaseMolecule::find_subfragments(int startatom, 
01280           int altstartatom, int alt2startatom,
01281           int endatom, int altendatom, int alt2endatom, int alt3endatom,
01282           int restype, ResizeArray<Fragment *> *subfragList)
01283 {
01284   int i, j, k;
01285   MolAtom *a;
01286   char *flgs = new char[residueList.num()];
01287   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01288 
01289   // Loop over all residues looking for candidate residues that start
01290   // a fragment.  A fragment starting residue must be an unvisited 
01291   // residue which has an startatom with no off residue bond to 
01292   // the same restype
01293   for (i=residueList.num()-1; i>=0; i--) {
01294     // test for previous visit, and whether it's the restype we want
01295     if (!flgs[i] && residueList[i]->residueType == restype) {
01296       // does this residue have a matching startatom
01297       for (j=residueList[i]->atoms.num()-1; j>=0; j--) { 
01298         int satom = (a=atom(residueList[i]->atoms[j]))->nameindex;
01299         if (satom == startatom || 
01300             satom == altstartatom || 
01301             satom == alt2startatom){
01302           for (k=a->bonds-1; k>=0; k--) {
01303             MolAtom *bondto = atom(a->bondTo[k]);
01304             // are there any off-residue bonds to the same restype
01305             if (bondto->uniq_resid != i && bondto->residueType == restype) {
01306               break; // if so then stop, so that k>=0
01307             }
01308           }
01309 
01310           // if we found a valid fragment start atom, find residues downchain
01311           if (k<0) { 
01312             subfragList->append(new Fragment);
01313             find_connected_subfragment(i, subfragList->num()-1, flgs, 
01314                   endatom, altendatom, alt2endatom, alt3endatom,
01315                   restype, subfragList);
01316           } // found starting residue
01317         } // found startatom
01318       } // going through atoms
01319     } // found restype
01320   } // going through residues
01321 
01322   // found 'em all
01323   delete [] flgs;
01324 } 
01325 
01326 
01327 // find a class of fragments, and add them to the subfragment list
01328 void BaseMolecule::find_subfragments_topologically(int restype, 
01329   ResizeArray<Fragment *> *subfragList, 
01330   int endatom, int altendatom, int alt2endatom, int alt3endatom) {
01331   int i; 
01332   char *flgs = new char[residueList.num()];
01333   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01334   int numres = residueList.num();
01335 
01336   // Loop over all residues looking for candidate residues that start
01337   // a fragment.  A fragment starting residue must be an unvisited
01338   // residue which has an startatom with no off residue bond to
01339   // the same restype
01340   for (i=0; i<numres; i++) {
01341     Residue *res = residueList[i];
01342 
01343     // test for previous visit, and whether it's the restype we want
01344     if (!flgs[i] && res->residueType == restype) {
01345       // if this residue only has 1 bond to a residue of the same restype
01346       // it must be a terminal residue
01347       int offresbondcount = 0;
01348       int j, k;
01349       int numatoms = res->atoms.num();
01350       for (j=0; j<numatoms; j++) {
01351         MolAtom *a = atom(res->atoms[j]);
01352 
01353         // find off-residue bonds
01354         for (k=0; k<a->bonds; k++) {
01355           MolAtom *b = atom(a->bondTo[k]);
01356           if (b->uniq_resid != i && 
01357               residueList[b->uniq_resid]->residueType == restype) {
01358             offresbondcount++;
01359           }
01360         }
01361       }
01362 
01363       // if we found a valid fragment start atom, find residues downchain
01364       if (offresbondcount == 1) {
01365         subfragList->append(new Fragment);
01366         find_connected_subfragment(i, subfragList->num()-1, flgs,
01367               endatom, altendatom, alt2endatom, alt3endatom,
01368               restype, subfragList);
01369       }
01370     } // found restype
01371   } // going through residues
01372 
01373   // found 'em all
01374   delete [] flgs;
01375 }
01376 
01377 
01378 #if defined(VMDFASTRIBBONS)
01379 // routine to pre-calculate lists of control point indices to
01380 // be used by the ribbon/cartoon representations
01381 void BaseMolecule::calculate_ribbon_controlpoints() {
01382   int onum, canum, frag, num, res;
01383 
01384   vmd_timerhandle tm = vmd_timer_create();
01385 
01386   msgInfo << "XXX Building pre-computed control point lists" << sendmsg;
01387 
01388   vmd_timer_start(tm);
01389 
01390   // Lookup atom typecodes ahead of time so we can use the most
01391   // efficient variant of find_atom_in_residue() in the main loop.
01392   // If we can't find the atom types we need, bail out immediately
01393   int CAtypecode  = atomNames.typecode("CA");
01394   int Otypecode   = atomNames.typecode("O");
01395   int OT1typecode = atomNames.typecode("OT1");
01396    
01397   // We can't draw a ribbon without CA and O atoms for guidance!
01398   if (CAtypecode < 0) {
01399     msgErr << "This structure has no identifiable CA atoms,"
01400            << "ribbon and cartoon representations will not be usable." 
01401            << sendmsg;
01402     return;
01403   }
01404   if ((Otypecode < 0) && (OT1typecode < 0)) {
01405     msgErr << "This structure has no identifiable Oxygen atoms,"
01406            << "ribbon and cartoon representations will not be usable." 
01407            << sendmsg;
01408     return;
01409   }
01410 
01411   //
01412   // calculate control point lists for the protein fragments
01413   //
01414 
01415   msgInfo << "XXX Building protein control point lists" << sendmsg;
01416 
01417   // go through each protein and find the CA and O atoms which are
01418   // eventually used to construct control points and perpendicular vectors
01419   ResizeArray<int> cpind;
01420   for (frag=0; frag<pfragList.num(); frag++) {
01421     num = pfragList[frag]->num();  // number of residues in this fragment
01422     if (num < 2) {
01423       // with less than two residues, we can't do anything useful, so skip
01424       nfragCPList.append(NULL);
01425       continue;
01426     }
01427 
01428     // check that we have a valid structure before continuing
01429       res = (*pfragList[fra