Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

BaseMolecule.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: BaseMolecule.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.246 $      $Date: 2012/05/29 21:37:11 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Base class for all molecules, without display-specific information.  This
00020  * portion of a molecule contains the structural data, and all routines to
00021  * find the structure (backbone, residues, etc).  It does NOT contain the
00022  * animation list; that is maintained by Molecule (which is derived from
00023  * this class).
00024  *
00025  ***************************************************************************/
00026 
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include "Inform.h"
00031 #include "utilities.h"
00032 #include "intstack.h"
00033 #include "WKFUtils.h"
00034 
00035 #include "BaseMolecule.h"
00036 #include "VolumetricData.h"
00037 #include "QMData.h"
00038 #include "QMTimestep.h"
00039 
00040 #ifdef VMDWITHCARBS
00041 #include <vector> /* XXX this needs to go, gives trouble with MSVC */
00042 #endif
00043 
00044 #define MAXBONDERRORS 25
00045 
00047 
00048 BaseMolecule::BaseMolecule(int myID) : residueList(10) , fragList(1),
00049 #if defined(VMDWITHCARBS)
00050      pfragList(1), nfragList(1), smallringList(), smallringLinkages(), currentMaxRingSize(-1), currentMaxPathLength(-1), ID(myID) {
00051 #else
00052      pfragList(1), nfragList(1), ID(myID) {
00053 #endif
00054 
00055   // initialize all variables
00056   nAtoms = 0;
00057   cur_atom = 0;
00058   atomList = NULL;
00059   moleculename = NULL;
00060   lastbonderratomid=-1;
00061   bonderrorcount=0;
00062   datasetflags=NODATA;
00063   qm_data = NULL;
00064   radii_minmax_need_update = 1;
00065   radii_min = 0.0f;
00066   radii_max = 0.0f;
00067 }
00068 
00069 
00071 
00072 BaseMolecule::~BaseMolecule(void) {
00073   int i;
00074 
00075 #if 0
00076   // XXX still need to track down ringlist leak(s)
00077   // delete carbohydrate ring data structures
00078   smallringList.clear();
00079   smallringLinkages.clear();
00080 #endif
00081 
00082   // delete structural data
00083   delete [] atomList;
00084   for (i=0; i<residueList.num(); i++) {
00085     delete residueList[i];
00086   }
00087   for (i=0; i<nfragList.num(); i++) {
00088     delete nfragList[i];
00089   }
00090 #if defined(VMDFASTRIBBONS)
00091   for (i=0; i<nfragCPList.num(); i++) {
00092     delete [] nfragCPList[i];
00093   }
00094 #endif
00095   for (i=0; i<pfragList.num(); i++) {
00096     delete pfragList[i];
00097   }
00098 #if defined(VMDFASTRIBBONS)
00099   for (i=0; i<pfragCPList.num(); i++) {
00100     delete [] pfragCPList[i];
00101   }
00102 #endif
00103   for (i=0; i<fragList.num(); i++) {
00104     delete fragList[i];
00105   }
00106   for (i=0; i<volumeList.num(); i++) {
00107     delete volumeList[i];
00108   }
00109   for (i=0; i<extraflt.num(); i++) {
00110     delete [] extraflt.data(i);
00111   }
00112   for (i=0; i<extraint.num(); i++) {
00113     delete [] extraint.data(i);
00114   }
00115   if (qm_data)
00116     delete qm_data;
00117 }
00118 
00119 
00121 
00122 // initialize the atom list ... should be called before adding any atoms
00123 int BaseMolecule::init_atoms(int n) {
00124 
00125   if(n <= 0) {
00126     msgErr << "BaseMolecule: init_atoms called with invalid number of atoms: "
00127            << n << sendmsg;
00128     return FALSE;
00129   }
00130   if (cur_atom != 0 && nAtoms != n) {
00131     msgErr << "BaseMolecule: attempt to init atoms while structure building in progress!" << sendmsg;
00132     return FALSE;
00133   }
00134 
00135   if (!atomList) {
00136     int i;
00137     // first call to init_atoms
00138     nAtoms = n; // only place where nAtoms is set!
00139     atomList = new MolAtom[nAtoms];
00140     memset(atomList, 0, nAtoms*sizeof(MolAtom));
00141 
00142     // initialize NULL extra data field, which is returned when
00143     // querying a non-existent field with extra*.data("fielddoesntexist")
00144     extraflt.add_name("NULL", NULL);
00145     extraint.add_name("NULL", NULL);
00146 
00147     // initialize "default" extra data fields.
00148     extraflt.add_name("beta", new float[nAtoms]);
00149     extraflt.add_name("occupancy", new float[nAtoms]);
00150     extraflt.add_name("charge", new float[nAtoms]);
00151     extraflt.add_name("mass", new float[nAtoms]);
00152     extraflt.add_name("radius", new float[nAtoms]);
00153     for (i=0; i<extraflt.num(); i++) {
00154       void *data = extraflt.data(i);
00155       if (data != NULL) 
00156         memset(data, 0, nAtoms*sizeof(float));
00157     }
00158 
00159     // initialize "default" extra integer data fields.
00160     for (i=0; i<extraint.num(); i++) {
00161       void *data = extraint.data(i);
00162       if (data != NULL)
00163         memset(data, 0, nAtoms*sizeof(int));
00164     }
00165     return TRUE;
00166   }
00167   if (n != nAtoms) {
00168     msgErr << "The number of atoms in this molecule has already been assigned."
00169            << sendmsg;
00170     return FALSE;
00171   }
00172   return TRUE;
00173 }
00174 
00175 // add a new atom; return it's index, or (-1) if error.
00176 int BaseMolecule::add_atom(char *name, char *atomtype, int atomicnumber, 
00177     char *resname, int resid, const char *chain, const char *segname, 
00178     char *insertion, const char *altloc) {
00179   int nameindex, typeindex;
00180   int resnameindex, segnameindex, altlocindex, chainindex;
00181 
00182   if (!atomList || cur_atom >= nAtoms) {
00183     msgErr << "BaseMolecule: Cannot add new atom; currently " << nAtoms;
00184     msgErr << " atoms in structure." << sendmsg;
00185     return (-1);
00186   }
00187 
00188   // create atom  
00189   MolAtom *newatom = atom(cur_atom);
00190   newatom->init(cur_atom, resid, insertion);
00191 
00192   // add names to namelist, and put indices in MolAtom object
00193   nameindex = atomNames.add_name(name, atomNames.num());
00194   typeindex = atomTypes.add_name(atomtype, atomTypes.num());
00195   resnameindex = resNames.add_name(resname, resNames.num());
00196   segnameindex = segNames.add_name(segname, segNames.num());
00197   altlocindex = altlocNames.add_name(altloc, altlocNames.num());
00198 
00199   // use default of 'X' for chain if not given
00200   if(!chain || ! (*chain) || *chain == ' ')
00201     chainindex = chainNames.add_name("X", chainNames.num());
00202   else
00203     chainindex = chainNames.add_name(chain, chainNames.num());
00204 
00205   // set atom member variables
00206   newatom->nameindex = nameindex;
00207   newatom->typeindex = typeindex;
00208   newatom->atomicnumber = atomicnumber;
00209   newatom->resnameindex = resnameindex;
00210   newatom->segnameindex = segnameindex;
00211   newatom->altlocindex = altlocindex;
00212   newatom->chainindex = chainindex;
00213 
00214   // check for integer overflow/wraparound condition, which can occur
00215   // if an evil plugin defines 100,000 unique atom names, for example
00216   if (newatom->nameindex != nameindex ||
00217       newatom->typeindex != typeindex ||
00218       newatom->atomicnumber != atomicnumber ||
00219       newatom->resnameindex != resnameindex ||
00220       newatom->segnameindex != segnameindex ||
00221       newatom->altlocindex != altlocindex ||
00222       newatom->chainindex != chainindex) {
00223     msgErr << "BaseMolecule: Cannot add atom; namelist index value too large." << sendmsg;
00224     msgErr << "Recompile VMD with larger index types." << sendmsg;
00225     msgErr << "Atom namelist index values at time of overflow:" << sendmsg;
00226     msgErr << "  nameindex: " << nameindex << sendmsg;;
00227     msgErr << "  typeindex: " << typeindex << sendmsg;;
00228     msgErr << "  resnameindex: " << resnameindex << sendmsg;;
00229     msgErr << "  segnameindex: " << segnameindex << sendmsg;;
00230     msgErr << "  altlocindex: " << altlocindex << sendmsg;;
00231     msgErr << "  chainindex: " << chainindex << sendmsg;
00232     return -1;
00233   }
00234 
00235   return cur_atom++;
00236 }
00237 
00238 
00239 // add a new bond; return 0 on success, or (-1) if error.
00240 int BaseMolecule::add_bond(int a, int b, float bondorder, 
00241                            int bondtype, int backbonetype) {
00242   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00243     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00244     return (-1);
00245   } 
00246 
00247   if (a == b) {
00248     msgErr << "BaseMolecule: Cannot bond atom " <<a<< " to itself." << sendmsg;
00249     return (-1);
00250   }
00251 
00252   // put the bond in the atom list
00253   if (atom(a)->add_bond(b, backbonetype)) {
00254     if (bonderrorcount < MAXBONDERRORS) {
00255       if (lastbonderratomid != a) {
00256         msgErr << "MolAtom " << a << ": Exceeded maximum number of bonds ("
00257                << atom(a)->bonds << ")." << sendmsg;
00258         lastbonderratomid=a;
00259         bonderrorcount++;
00260       }
00261     } else if (bonderrorcount == MAXBONDERRORS) {
00262       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00263       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00264       bonderrorcount++;
00265     }
00266     return (-1);
00267   }
00268 
00269   if (atom(b)->add_bond(a, backbonetype)) {
00270     if (bonderrorcount < MAXBONDERRORS) {
00271       if (lastbonderratomid != b) {
00272         msgErr << "MolAtom " << b << ": Exceeded maximum number of bonds ("
00273                << atom(b)->bonds << ")." << sendmsg;
00274         lastbonderratomid=b;
00275         bonderrorcount++;
00276       }
00277     } else if (bonderrorcount == MAXBONDERRORS) {
00278       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00279       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00280       bonderrorcount++;
00281     }
00282     return (-1);
00283   }
00284 
00285   // store bond orders and types
00286   setbondorder(a, atom(a)->bonds-1, bondorder);
00287   setbondorder(b, atom(b)->bonds-1, bondorder);
00288 
00289   setbondtype(a, atom(a)->bonds-1, bondtype);
00290   setbondtype(b, atom(b)->bonds-1, bondtype);
00291 
00292   return 0;
00293 }
00294 
00295 // Add a bond to a structure but check to make sure there isn't a 
00296 // duplicate, as may be the case when merging bondlists from a file 
00297 // and from a distance-based bond search
00298 int BaseMolecule::add_bond_dupcheck(int a, int b, float bondorder, int bondtype) {
00299   int i;
00300 
00301   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00302     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00303     return (-1);
00304   }
00305 
00306   MolAtom *atm = atom(a);
00307   int nbonds = atm->bonds;
00308   const int *bonds = &atm->bondTo[0];
00309   for (i=0; i<nbonds; i++) {
00310     if (bonds[i] == b) {
00311       return 0; // skip bond that already exists
00312     }
00313   }
00314   add_bond(a, b, bondorder, bondtype); // add it if it doesn't already exist
00315 
00316   return 0;
00317 }
00318 
00319 int BaseMolecule::add_angle(int a, int b, int c, int type) {
00320   int i,n;
00321   // make sure that a < c to make it easier to find duplicates later.
00322   if (a > c) { i = a; a = c; c = i; }
00323   
00324   angles.append(a); 
00325   angles.append(b); 
00326   angles.append(c);
00327 
00328   n = num_angles()-1;
00329   set_angletype(n, type);
00330   return n;
00331 }
00332 
00333 
00334 int BaseMolecule::set_angletype(int nangle, int type)
00335 {
00336   int i;
00337 
00338   // type -1 is the anonymous type and is handled special w/o storage.
00339   // we also bail out in case we get an illegal angle index.
00340   if ((type < 0) || (nangle >= num_angles()))
00341     return -1;
00342 
00343   // fill array if not there
00344   if (angleTypes.num() <= nangle) {
00345     set_dataset_flag(BaseMolecule::ANGLETYPES);
00346     for (i=angleTypes.num(); i < num_angles(); i++)
00347       angleTypes.append(-1);
00348   }
00349   
00350   angleTypes[nangle] = type;
00351   return type;
00352 }
00353 
00354 int BaseMolecule::get_angletype(int nangle)
00355 {
00356   if ((nangle < 0) || (nangle >= angleTypes.num()))
00357     return -1;
00358 
00359   return angleTypes[nangle];
00360 }
00361 
00362 
00363 int BaseMolecule::add_dihedral(int a, int b, int c, int d, int type) {
00364   int i, j, n;
00365   // make sure that b < c so that it is easier to find duplicates later.
00366   if (b > c) { i = a; j = b ; a = d ; b = c; d = i; c = j; }
00367   
00368   dihedrals.append(a); 
00369   dihedrals.append(b); 
00370   dihedrals.append(c);
00371   dihedrals.append(d);
00372 
00373   n = num_dihedrals()-1;
00374   set_dihedraltype(n, type);
00375   return n;
00376 }
00377 
00378 
00379 int BaseMolecule::set_dihedraltype(int ndihedral, int type)
00380 {
00381   int i;
00382   
00383   // type -1 is the anonymous type and is handled special w/o storage.
00384   // we also bail out in case we get an illegal dihedral index.
00385   if ((type < 0) || (ndihedral >= num_dihedrals()))
00386     return -1;
00387 
00388   // fill array if not there
00389   if (dihedralTypes.num() <= ndihedral) {
00390     set_dataset_flag(BaseMolecule::ANGLETYPES);
00391     for (i=dihedralTypes.num(); i < num_dihedrals(); i++)
00392       dihedralTypes.append(-1);
00393   }
00394   
00395   dihedralTypes[ndihedral] = type;
00396   return type;
00397 }
00398 
00399 int BaseMolecule::get_dihedraltype(int ndihedral)
00400 {
00401   if ((ndihedral < 0) || (ndihedral >= dihedralTypes.num()))
00402     return -1;
00403 
00404   return dihedralTypes[ndihedral];
00405 }
00406 
00407 int BaseMolecule::add_improper(int a, int b, int c, int d, int type) {
00408   int i, j, n;
00409   // make sure that b < c so that it is easier to find duplicates later.
00410   if (b > c) { i = a; j = b ; a = d ; b = c; d = i; c = j; }
00411   
00412   impropers.append(a); 
00413   impropers.append(b); 
00414   impropers.append(c);
00415   impropers.append(d);
00416 
00417   n = num_impropers()-1;
00418   set_impropertype(n, type);
00419   return n;
00420 }
00421 
00422 
00423 int BaseMolecule::set_impropertype(int nimproper, int type)
00424 {
00425   int i;
00426   
00427   // type -1 is the anonymous type and is handled special w/o storage.
00428   // we also bail out in case we get an illegal improper index.
00429   if ((type < 0) || (nimproper >= num_impropers()))
00430     return -1;
00431 
00432   // fill array if not there
00433   if (improperTypes.num() <= nimproper) {
00434     set_dataset_flag(BaseMolecule::ANGLETYPES);
00435     for (i=improperTypes.num(); i < num_impropers(); i++)
00436       improperTypes.append(-1);
00437   }
00438   
00439   improperTypes[nimproper] = type;
00440   return type;
00441 }
00442 
00443 int BaseMolecule::get_impropertype(int nimproper)
00444 {
00445   if ((nimproper < 0) || (nimproper >= improperTypes.num()))
00446     return -1;
00447 
00448   return improperTypes[nimproper];
00449 }
00450 
00452 
00453 void BaseMolecule::setbondorder(int atom, int bond, float order) {
00454   float *bondOrders = extraflt.data("bondorders");
00455 
00456   // if not already there, add it
00457   if (bondOrders == NULL) {
00458     if (order != 1) {
00459       int i;
00460       extraflt.add_name("bondorders", new float[nAtoms*MAXATOMBONDS]);
00461       bondOrders = extraflt.data("bondorders");
00462       for (i=0; i<nAtoms*MAXATOMBONDS; i++)
00463         bondOrders[i] = 1.0f;    
00464 
00465       bondOrders[atom * MAXATOMBONDS + bond] = order;
00466     } 
00467     return;
00468   }
00469 
00470   bondOrders[atom * MAXATOMBONDS + bond] = order;
00471 }
00472 
00473 float BaseMolecule::getbondorder(int atom, int bond) {
00474   float *bondOrders = extraflt.data("bondorders");
00475 
00476   // if not already there, add it
00477   if (bondOrders == NULL) { 
00478     return 1;
00479   }
00480    
00481   return bondOrders[atom * MAXATOMBONDS + bond];
00482 }
00483 
00484 
00485 void BaseMolecule::setbondtype(int atom, int bond, int type) {
00486   int *bondTypes = extraint.data("bondtypes");
00487 
00488   // if not already there, add it
00489   if (bondTypes == NULL) {
00490     if (type != -1) {
00491       int i;
00492       extraint.add_name("bondtypes", new int[nAtoms*MAXATOMBONDS]);
00493       bondTypes = extraint.data("bondtypes");
00494       for (i=0; i<nAtoms*MAXATOMBONDS; i++)
00495         bondTypes[i] = -1;    
00496 
00497       bondTypes[atom * MAXATOMBONDS + bond] = type;
00498     } 
00499     return;
00500   }
00501 
00502   bondTypes[atom * MAXATOMBONDS + bond] = type;
00503 }
00504 
00505 int BaseMolecule::getbondtype(int atom, int bond) {
00506   int *bondTypes = extraint.data("bondtypes");
00507 
00508   // if not already there, add it
00509   if (bondTypes == NULL) { 
00510     return -1;
00511   }
00512    
00513   return bondTypes[atom * MAXATOMBONDS + bond];
00514 }
00515 
00516 
00517 // return the Nth residue
00518 Residue *BaseMolecule::residue(int n) {
00519   return residueList[n];
00520 }
00521 
00522 
00523 // return the Nth fragment
00524 Fragment *BaseMolecule::fragment(int n) {
00525   return fragList[n];
00526 }
00527 
00528 
00529 // given an atom index, return the residue object for the residue it
00530 // is in.  If it is not in a residue, return NULL.
00531 Residue *BaseMolecule::atom_residue(int n) {
00532   MolAtom *atm = atom(n);
00533   if(atm->uniq_resid < 0)
00534     return NULL;
00535   else
00536     return residue(atm->uniq_resid);
00537 }
00538 
00539 
00540 // given an atom index, return the fragment object for the fragment it
00541 // is in.  If it is not in a fragment, return NULL.
00542 Fragment *BaseMolecule::atom_fragment(int n) {
00543   MolAtom *atm = atom(n);
00544   int frag = residue(atm->uniq_resid)->fragment;
00545   if(frag < 0)
00546     return NULL;
00547   else
00548     return fragment(frag);
00549 }
00550 
00551 // return a 'default' value for a given atom name
00552 float BaseMolecule::default_radius(char *nm) {
00553   float val = 1.5;
00554   // some names start with a number
00555   while (*nm && isdigit(*nm))
00556     nm++;
00557   if(nm) {
00558     switch(toupper(nm[0])) {
00559       // These are similar to the values used by X-PLOR with sigma=0.8
00560       // see page 50 of the X-PLOR 3.1 manual
00561       case 'H' : val = 1.00f; break;
00562       case 'C' : val = 1.50f; break;
00563       case 'N' : val = 1.40f; break;
00564       case 'O' : val = 1.30f; break;
00565       case 'F' : val = 1.20f; break;
00566       case 'S' : val = 1.90f; break;
00567     }
00568   }
00569 
00570   return val;
00571 }
00572 
00573 
00574 // return a 'default' value for a given atom name
00575 float BaseMolecule::default_mass(char *nm) {
00576   float val = 12.0;
00577 
00578   // some names start with a number
00579   while (*nm && isdigit(*nm)) nm++;
00580   if(nm) {
00581     switch(toupper(nm[0])) {
00582       case 'H' : val = 1.00800f; break;
00583       case 'C' : val = 12.01100f; break;
00584       case 'N' : val = 14.00700f; break;
00585       case 'O' : val = 15.99900f; break;
00586       case 'F' : val = 55.84700f; break;
00587       case 'P' : val = 30.97376f; break;
00588       case 'S' : val = 32.06000f; break;
00589     }
00590     if      (toupper(nm[0] == 'C') && toupper(nm[1] == 'L')) val = 35.453f;
00591     else if (toupper(nm[0] == 'N') && toupper(nm[1] == 'A')) val = 22.989770f;
00592     else if (toupper(nm[0] == 'M') && toupper(nm[1] == 'G')) val = 24.3050f;
00593   }
00594 
00595   return val;
00596 }
00597 
00598 
00599 float BaseMolecule::default_charge(char *) {
00600   // return 0 for everything; later, when we put in a more reliable
00601   // system for determining the charge that's user-configurable,
00602   // we can start assigning more realistic charges.
00603   return 0.0f;
00604 }
00605 
00606 
00607 // count the number of unique bonds in the structure
00608 int BaseMolecule::count_bonds(void) {
00609   int i, j;
00610   int count=0;
00611 
00612   for (i=0; i<nAtoms; i++) {
00613     int nbonds = atomList[i].bonds;
00614     const int *bonds = &atomList[i].bondTo[0];
00615 
00616     for (j=0; j<nbonds; j++) {
00617       if (bonds[j] > i)
00618         count++;
00619     }
00620   }
00621 
00622   return count;
00623 }
00624 
00625 
00626 void BaseMolecule::clear_bonds(void) {
00627   int i;
00628   for (i=0; i<nAtoms; i++)
00629     atomList[i].bonds = 0;
00630 }
00631 
00632 
00633 // analyze the molecule for more than just the atom/bond information
00634 // This is here since it is called _after_ the molecule is added to
00635 // the MoleculeList.  Thus, there is a Tcl callback to allow the
00636 // user to update the bond information (or other fields?) before
00637 // the actual search.
00638 void BaseMolecule::analyze(void) {
00639   need_find_bonds = 0; // at this point it's too late
00640 
00641   // I have to let 0 atoms in because I want to be able to read things
00642   // like electron density maps, which have no atoms.
00643   // It is kinda wierd, then to make BaseMolecule be at the top of the
00644   // heirarchy.  Oh well.
00645   if(nAtoms < 1)
00646     return;
00647 
00648   // call routines to find different characteristics of the molecule
00649   msgInfo << "Analyzing structure ..." << sendmsg;
00650   msgInfo << "   Atoms: " << nAtoms << sendmsg;
00651 
00652   // count unique bonds/angles/dihedrals/impropers/cterms
00653   msgInfo << "   Bonds: " << count_bonds() << sendmsg;
00654   msgInfo << "   Angles: " << num_angles()
00655           << "  Dihedrals: " << num_dihedrals()
00656           << "  Impropers: " << num_impropers()
00657           << "  Cross-terms: " << num_cterms()
00658           << sendmsg;
00659 
00660   msgInfo << "   Bondtypes: " << bondTypeNames.num()
00661           << "  Angletypes: " << angleTypeNames.num()
00662           << "  Dihedraltypes: " << dihedralTypeNames.num()
00663           << "  Impropertypes: " << improperTypeNames.num()
00664           << sendmsg;
00665 
00666   // restore residue and fragment lists to pristine state
00667   residueList.clear();
00668   fragList.clear();
00669   pfragList.clear();   
00670   pfragCyclic.clear(); 
00671 #if defined(VMDFASTRIBBONS)
00672   pfragCPList.clear(); 
00673 #endif
00674   nfragList.clear();   
00675   nfragCyclic.clear(); 
00676 #if defined(VMDFASTRIBBONS)
00677   nfragCPList.clear(); 
00678 #endif
00679 
00680   // assign per-atom backbone types
00681   find_backbone();
00682 
00683   // find all the atoms in a resid connected to DNA/RNA/PROTEIN/WATER
00684   // also, assign a unique resid (uniq_resid) to each atom
00685   nResidues = find_residues();
00686   msgInfo << "   Residues: " << nResidues << sendmsg;
00687 
00688   nWaters = find_waters();
00689   msgInfo << "   Waters: " << nWaters << sendmsg;
00690   
00691   // determine which residues are connected to each other
00692   bonderrorcount=0; // reset error count before residue connectivity search
00693   find_connected_residues(nResidues); 
00694 
00695  
00696   nSegments = find_segments(); 
00697   msgInfo << "   Segments: " << nSegments << sendmsg;
00698 
00699   nFragments = find_fragments();
00700   msgInfo << "   Fragments: " << nFragments;
00701 
00702   nProteinFragments = pfragList.num();
00703   msgInfo << "   Protein: " << nProteinFragments;
00704 
00705   nNucleicFragments = nfragList.num();
00706   msgInfo << "   Nucleic: " << nNucleicFragments << sendmsg;
00707   
00708   // NOTE: The current procedure incorrectly identifies some lipid 
00709   // atoms as "ATOMNUCLEICBACK" (but not as "nucleic") as well
00710   // as some single water oxygens as "backbone". Here, we 
00711   // correct this by setting all atoms of non-polymeric residue types
00712   // to be "ATOMNORMAL" (i.e.: not backbone).
00713   MolAtom *a;
00714   int i;
00715   for (i=0; i<nAtoms; i++) {
00716     a = atom(i); 
00717     if ((a->residueType != RESNUCLEIC) && (a->residueType != RESPROTEIN)) 
00718       a->atomType = ATOMNORMAL;
00719   }
00720 
00721   // Search for hydrogens
00722   // XXX Must be done after the rest of the structure finding routines,
00723   // because those routines assume that anything that isn't NORMAL is
00724   // a backbone atom.
00725   // We use the name-based definition used in the IS_HYDROGEN macro
00726   for (i=0; i<nAtoms; i++) {
00727     MolAtom *a = atom(i);
00728     const char *aname = atomNames.name(a->nameindex);
00729     if (aname != NULL && IS_HYDROGEN(aname))
00730       a->atomType = ATOMHYDROGEN;
00731   }
00732 
00733 #if defined(VMDFASTRIBBONS)
00734   calculate_ribbon_controlpoints();
00735 #endif
00736   
00737 }
00738 
00739 
00741 int BaseMolecule::find_backbone(void) {
00742   int i, j, k;
00743 
00744   // Search for the protein backbone
00745   int protypes[4];
00746   protypes[0] = atomNames.typecode((char *) "CA");
00747   protypes[1] = atomNames.typecode((char *) "C");
00748   protypes[2] = atomNames.typecode((char *) "O");
00749   protypes[3] = atomNames.typecode((char *) "N");
00750 
00751   // special case for terminal oxygens that miss the search for O
00752   // by looking for ones connected to a C
00753   int termtypes[5];
00754   termtypes[0] = atomNames.typecode((char *) "OT1"); // standard PDB names
00755   termtypes[1] = atomNames.typecode((char *) "OT2");
00756   termtypes[2] = atomNames.typecode((char *) "OXT"); // synonym for OT2
00757   termtypes[3] = atomNames.typecode((char *) "O1");  // Gromacs force field 
00758   termtypes[4] = atomNames.typecode((char *) "O2");  // atom names
00759 
00760   // search for the DNA/RNA backbone;  the atom names are:
00761   // for the phosphate:  P, O1P, O2P, OP1, OP2
00762   // for the rest: O3', C3', C4', C5', O5'
00763   // (or O3*, C3*, C4*, C5*, O5*)
00764   int nuctypes[15];
00765   nuctypes[ 0] = atomNames.typecode((char *) "P");
00766   nuctypes[ 1] = atomNames.typecode((char *) "O1P"); // old PDB files
00767   nuctypes[ 2] = atomNames.typecode((char *) "O2P"); // old PDB files
00768   nuctypes[ 3] = atomNames.typecode((char *) "OP1"); // new PDB files
00769   nuctypes[ 4] = atomNames.typecode((char *) "OP2"); // new PDB files
00770   nuctypes[ 5] = atomNames.typecode((char *) "C3*");
00771   nuctypes[ 6] = atomNames.typecode((char *) "C3'");
00772   nuctypes[ 7] = atomNames.typecode((char *) "O3*");
00773   nuctypes[ 8] = atomNames.typecode((char *) "O3'");
00774   nuctypes[ 9] = atomNames.typecode((char *) "C4*");
00775   nuctypes[10] = atomNames.typecode((char *) "C4'");
00776   nuctypes[11] = atomNames.typecode((char *) "C5*");
00777   nuctypes[12] = atomNames.typecode((char *) "C5'");
00778   nuctypes[13] = atomNames.typecode((char *) "O5*");
00779   nuctypes[14] = atomNames.typecode((char *) "O5'");
00780 
00781 #if 0
00782   // non-backbone nucleic acid atom names
00783   nuctypes[  ] = atomNames.typecode((char *) "C1*");
00784   nuctypes[  ] = atomNames.typecode((char *) "C1'");
00785   nuctypes[  ] = atomNames.typecode((char *) "C2*");
00786   nuctypes[  ] = atomNames.typecode((char *) "C2'");
00787   nuctypes[  ] = atomNames.typecode((char *) "O2*");
00788   nuctypes[  ] = atomNames.typecode((char *) "O2'");
00789   nuctypes[  ] = atomNames.typecode((char *) "O4*");
00790   nuctypes[  ] = atomNames.typecode((char *) "O4'");
00791 #endif
00792 
00793   // special case for terminal nucleic residues
00794   int nuctermtypes[2];
00795   nuctermtypes[0] = atomNames.typecode((char *) "H5T"); // standard names
00796   nuctermtypes[1] = atomNames.typecode((char *) "H3T");
00797 
00798 
00799   // loop over all atoms assigning atom backbone type flags
00800   for (i=0; i<nAtoms; i++) {
00801     MolAtom *a = atom(i);
00802  
00803     // initialize atom type to non-backbone
00804     a->atomType = ATOMNORMAL;
00805 
00806     // check for protein backbone atom names
00807     for (j=0; j < 4; j++) {
00808       if (a->nameindex == protypes[j]) {
00809         a->atomType = ATOMPROTEINBACK;
00810         break;
00811       }
00812     }
00813 
00814     // check terminal residue names as well
00815     for (j=0; j < 4; j++) {
00816       if (a->nameindex == termtypes[j]) { // check if OT1, OT2
00817         for (k=0; k < a->bonds; k++) {
00818           if (atom(a->bondTo[k])->atomType == ATOMPROTEINBACK) {
00819             a->atomType = ATOMPROTEINBACK;
00820             break;
00821           }
00822         }
00823       }
00824     }
00825   
00826     // check if in nucleic backbone, if not already set
00827     if (!(a->atomType)) {
00828       for (j=0; j < 15; j++) {
00829         if (a->nameindex == nuctypes[j]) {
00830           a->atomType = ATOMNUCLEICBACK;
00831           break;
00832         }
00833       }
00834     }
00835 
00836     // check if nucleic terminal atom names
00837     for (j=0; j < 2; j++) {
00838       if (a->nameindex == nuctermtypes[j]) {
00839         for (k=0; k < a->bonds; k++) {
00840           if (atom(a->bondTo[k])->atomType == ATOMNUCLEICBACK) {
00841             a->atomType = ATOMNUCLEICBACK;
00842             break;
00843           }
00844         }
00845       }
00846     }
00847   }
00848 
00849   return 0; 
00850 }
00851 
00852 
00853 
00854 // find water molecules based on the residue name
00855 // from the documentation for molscript, these are possible
00856 // waters:
00857 // type H2O HH0 OHH HOH OH2 SOL WAT
00858 // as well, I add TIP, TIP2, TIP3, and TIP4
00859 // The count is the number of sets of connected RESWATERS
00860 int BaseMolecule::find_waters(void) {
00861   int i, j;
00862   MolAtom *a;
00863 
00864   int watertypes[12];
00865   watertypes[0] = resNames.typecode((char *) "H2O");
00866   watertypes[1] = resNames.typecode((char *) "HH0");
00867   watertypes[2] = resNames.typecode((char *) "OHH");
00868   watertypes[3] = resNames.typecode((char *) "HOH");
00869   watertypes[4] = resNames.typecode((char *) "OH2");
00870   watertypes[5] = resNames.typecode((char *) "SOL");
00871   watertypes[6] = resNames.typecode((char *) "WAT");
00872   watertypes[7] = resNames.typecode((char *) "TIP");
00873   watertypes[8] = resNames.typecode((char *) "TIP2");
00874   watertypes[9] = resNames.typecode((char *) "TIP3");
00875   watertypes[10] = resNames.typecode((char *) "TIP4");
00876   // this conflicts with a PDB compound:
00877   //   http://minerva.roca.csic.es/hicup/SPC/spc_pdb.txt
00878   //
00879   // XXX we should add a check to make sure that there are only 
00880   //     three atoms in the residue when all is said and done. If there
00881   //     are more, then we should undo the assignment as a water and mark it
00882   //     as protein etc as appropriate.  This is tricky since at this stage
00883   //     we haven't done any connectivity tests etc, and we're only working
00884   //     with individual atoms.  Perhaps its time to re-think this logic.
00885   watertypes[11] = resNames.typecode((char *) "SPC");
00886 
00887   for (i=0; i<nAtoms; i++) {
00888     a = atom(i);
00889     if (a->residueType == RESNOTHING) {  // make sure it isn't named yet
00890       for (j=0; j<12; j++) {
00891         if (watertypes[j] == a->resnameindex) {
00892           a->residueType = RESWATERS;
00893           break;
00894         }
00895       }
00896     }
00897   }
00898  
00899   int count = find_connected_waters2();
00900 
00901   return count;   
00902 }
00903 
00904 
00905 // if this is a RESWATERS with index idx, mark it and find if
00906 // any of its neighbors are RESWATERS
00907 // this does a depth-first search with RECURSION.
00908 void BaseMolecule::find_connected_waters(int i, char *tmp) {
00909   MolAtom *a = atom(i);
00910   int j;
00911   if (a->residueType == RESWATERS && !tmp[i]) {
00912     tmp[i] = TRUE;
00913     for (j=0; j<a->bonds; j++) {
00914       find_connected_waters(a->bondTo[j], tmp);
00915     }
00916   }
00917 }
00918 
00919 
00920 // if this is a RESWATERS with index idx, mark it and find if
00921 // any of its neighbors are RESWATERS
00922 int BaseMolecule::find_connected_waters2(void) {
00923   MolAtom *a;
00924   int count, i;
00925   IntStackHandle s;
00926 
00927   char *tmp = new char[nAtoms];
00928   memset(tmp, 0, nAtoms * sizeof(char));
00929 
00930   s = intstack_create(nAtoms);
00931 
00932   for (count=0, i=0; i<nAtoms; i++) {
00933     if (atom(i)->residueType == RESWATERS && !tmp[i]) {
00934       int nextatom;
00935 
00936       count++;
00937       intstack_push(s, i);
00938     
00939       // find and mark all connected waters 
00940       while (!intstack_pop(s, &nextatom)) { 
00941         int j;
00942 
00943         a = atom(nextatom);
00944         tmp[nextatom] = TRUE;
00945 
00946         for (j=a->bonds - 1; j>=0; j--) {
00947           int bi = a->bondTo[j];
00948           MolAtom *b = atom(bi);
00949           if (b->residueType == RESWATERS && !tmp[bi])
00950             intstack_push(s, bi);
00951         }
00952       }
00953     }
00954   }
00955 
00956   intstack_destroy(s);
00957   delete [] tmp;
00958 
00959   return count;
00960 }
00961 
00962 
00963 // find n backbone atoms connected together with the given residueid
00964 // return the total count
00965 // this assumes that the given atom (atomidx) is correct
00966 int BaseMolecule::find_connected_backbone(IntStackHandle s, int backbone,
00967                          int atomidx, int residueid, int tmpid, int *flgs) {
00968   if (flgs[atomidx] != 0)
00969     return 0; // already done
00970 
00971   MolAtom *x = atom(atomidx);
00972   if (x->atomType != backbone || x->resid != residueid)
00973     return 0; // not a backbone atom, or resid doesn't match
00974 
00975   intstack_popall(s); // just in case
00976   intstack_push(s, atomidx);
00977   int nextatom;
00978   int count = 0;
00979    
00980   // find and mark connected backbone atoms
00981   while (!intstack_pop(s, &nextatom)) {
00982     MolAtom *a = atom(nextatom);
00983     flgs[nextatom] = tmpid;
00984     count++;
00985 
00986     int j;
00987     for (j=a->bonds - 1; j>=0; j--) {
00988       int bi = a->bondTo[j];
00989       if (flgs[bi] == 0) {
00990         MolAtom *b = atom(bi);
00991 
00992         // skip connections to atoms on different chains/segnames
00993         if (a->chainindex != b->chainindex || 
00994             a->segnameindex != b->segnameindex)
00995           continue;
00996 
00997         if (b->atomType == backbone && b->resid == residueid)
00998           intstack_push(s, bi);
00999       }
01000     }
01001   }
01002 
01003   return count;
01004 }
01005 
01006 
01007 // the find_connected_backbone left terms of flgs which need to be cleaned up
01008 void BaseMolecule::clean_up_connection(IntStackHandle s, int i, int tmpid, int *flgs) {
01009   if (flgs[i] != tmpid)  // been here before
01010     return;
01011 
01012   intstack_popall(s); // just in case
01013   intstack_push(s, i);
01014   int nextatom;
01015  
01016   // find and null out non-matching atom flags
01017   while (!intstack_pop(s, &nextatom)) {
01018     flgs[nextatom] = 0;
01019     MolAtom *a = atom(nextatom);
01020     int j;
01021     for (j=a->bonds - 1; j>=0; j--) {
01022       int bi = a->bondTo[j];
01023       if (flgs[bi] == tmpid) {
01024         intstack_push(s, bi);
01025       }
01026     }
01027   }
01028 }
01029 
01030 
01031 
01032 // now that I know this resid is okay, mark it so
01033 void BaseMolecule::find_connected_atoms_in_resid(IntStackHandle s,
01034     int restype, int i, int residueid, int tmpid, int *flgs)
01035 {
01036   if (flgs[i] != 0 || atom(i)->resid != residueid)
01037     return;
01038 
01039   intstack_popall(s); // just in case
01040   intstack_push(s, i);
01041   int nextatom;
01042 
01043   // find and mark all connected residues in the same chain/segname
01044   while (!intstack_pop(s, &nextatom)) {
01045     flgs[nextatom] = tmpid;
01046     MolAtom *a = atom(nextatom);
01047     a->residueType = restype;
01048 
01049     int j;
01050     for (j=a->bonds - 1; j>=0; j--) {
01051       int bi = a->bondTo[j];
01052       MolAtom *b = atom(bi);
01053       if (flgs[bi] == 0 &&
01054           a->chainindex == b->chainindex &&
01055           a->segnameindex == b->segnameindex &&
01056           b->resid == residueid) {
01057         intstack_push(s, bi);
01058       }
01059     }
01060   }
01061 }
01062 
01063 
01064 
01065 // Find connected backbone atoms with the same resid
01066 // if found, find all the atoms with the same resid
01067 // which are connected to those backbone atoms only through
01068 // atoms of the same resid
01069 void BaseMolecule::find_and_mark(int n, int backbone,
01070   int restype, int *tmpid, int *flgs)
01071 {
01072   int i;
01073   MolAtom *a;
01074   int residueid; // the real resid
01075   IntStackHandle s = intstack_create(nAtoms);
01076 
01077   for (i=0; i<nAtoms; i++) {
01078     a = atom(i);   // look for a new backbone atom
01079     if (a->atomType == backbone && flgs[i] == 0) {
01080       residueid = a->resid;
01081       if (find_connected_backbone(s, backbone, i, residueid, *tmpid, flgs) >= n) {
01082         // if find was successful, start all over again
01083         clean_up_connection(s, i, *tmpid, flgs);
01084         // but mark all the Atoms connected to here
01085         find_connected_atoms_in_resid(s, restype, i, residueid, *tmpid, flgs);
01086         // and one more was made
01087         (*tmpid)++;
01088       } else {
01089         // clean things up so I won't have problems later
01090         clean_up_connection(s, i, *tmpid, flgs);
01091       }
01092     }
01093   }
01094 
01095   intstack_destroy(s);
01096 }
01097 
01098 
01099 
01100 // assign a uniq resid (uniq_resid) to each set of connected atoms
01101 // with the same residue id.  There could be many residues with the
01102 // same id, but not connected (the SSN problem - SSNs are not unique
01103 // so don't use them as the primary key)
01104 int BaseMolecule::make_uniq_resids(int *flgs) {
01105   int i;
01106   int num_residues = 0;
01107   IntStackHandle s = intstack_create(nAtoms);
01108 
01109   for (i=0; i<nAtoms; i++) {
01110     if (!flgs[i]) {  // not been numbered
01111       // find connected atoms to i with the same resid and label
01112       // it with the uniq_resid
01113       MolAtom *a = atom(i);
01114       int resid = a->resid;
01115       char *insertion = a->insertionstr;
01116 
01117       intstack_push(s, i);
01118       int nextatom;
01119 
01120       // Loop over all atoms we're bonded to in the same chain/segname
01121       while (!intstack_pop(s, &nextatom)) {
01122         MolAtom *a = atom(nextatom);
01123         a->uniq_resid = num_residues;  // give it the new resid number
01124         flgs[nextatom] = TRUE;         // mark this atom done
01125   
01126         int j;
01127         for (j=a->bonds - 1; j>=0; j--) {
01128           int bi = a->bondTo[j];
01129           if (flgs[bi] == 0) {
01130             MolAtom *b = atom(bi);
01131             if (a->chainindex == b->chainindex && 
01132                 a->segnameindex == b->segnameindex &&
01133                 b->resid == resid && !strcmp(b->insertionstr, insertion))
01134               intstack_push(s, bi);
01135           }
01136         }
01137       }
01138 
01139       num_residues++;
01140     }
01141   }
01142 
01143   intstack_destroy(s);
01144 
01145   return num_residues;
01146 }
01147 
01148 
01149 
01150 int BaseMolecule::find_residues(void) {
01151   int *flgs = new int[nAtoms]; // flags used for connected atom searches
01152   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
01153   
01154   // assign a uniq resid (uniq_resid) to each set of connected atoms
01155   // with the same residue id.  There could be many residues with the
01156   // same id, but not connected (the SSN problem - SSNs are not unique
01157   // so don't use them as the primary key)
01158   int num_residues = make_uniq_resids(flgs);
01159    
01160   int back_res_count = 1; // tmp count of number of residues on some backbone
01161   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
01162   
01163   //  hunt for the proteins
01164   // there must be 4 PROTEINBACK atoms connected and with the same resid
01165   // then all connected atoms will be marked as PROTEIN atoms
01166   // this gets everything except the terminals
01167   find_and_mark(4, ATOMPROTEINBACK, RESPROTEIN, &back_res_count, flgs);
01168   
01169   // do the same for nucleic acids
01170   // XXX we might not want to check for the phosphate (P and 2 O's).  Here's
01171   // the quick way I can almost do that.  Unfortionately, that
01172   // messes up nfragList, since it needs a P to detect an end
01173   find_and_mark(4, ATOMNUCLEICBACK, RESNUCLEIC, &back_res_count, flgs);
01174   
01175   delete [] flgs;
01176   return num_residues;
01177 }
01178 
01179 int BaseMolecule::find_atom_in_residue(const char *name, int residue) {
01180   int nametype = atomNames.typecode(name);
01181   if (nametype < 0)
01182     return -2;
01183 
01184   return find_atom_in_residue(nametype, residue);
01185 }
01186 
01187 
01188 // find which residues are connected to which
01189 // remember, I already have the uniq_id for each atom
01190 void BaseMolecule::find_connected_residues(int num_residues) {
01191   int i, j;
01192   for (i=0; i<num_residues; i++) {   // init the list to NULLs
01193     residueList.append(NULL);
01194   }
01195   
01196   for (i=nAtoms-1; i>=0; i--) {      // go through all the atoms
01197     j = atom(i)->uniq_resid;
01198     if (residueList[j] == NULL) {    // then init the residue
01199       residueList[j] = new Residue(atom(i)->resid, atom(i)->residueType);
01200     }
01201     // Tell the residue that this atom is in it
01202     residueList[j]->add_atom(i);
01203   }
01204 
01205   // double check that everything was created
01206   for (i=0; i<num_residues; i++) {
01207     if (residueList[i] == NULL) { // no atom was found for this residue
01208       msgErr << "Mysterious residue creation in ";
01209       msgErr << "BaseMolecule::find_connected_residues." << sendmsg;
01210       residueList[i] = new Residue((int) -1, RESNOTHING);
01211     }
01212   }
01213 
01214   // finally, check for unusual connections between residues, e.g. between
01215   // protein and water.
01216   for (i=0; i<num_residues; i++) {
01217     Residue *res = residueList[i];
01218     int bondfromtype = res->residueType;
01219     int numatoms = res->atoms.num();
01220     for (j=0; j<numatoms; j++) {
01221       MolAtom *a = atom(res->atoms[j]);
01222 
01223       // find off-residue bonds to residues of the same chain/segname
01224       int k;
01225       for (k=0; k<a->bonds; k++) {
01226         MolAtom *b = atom(a->bondTo[k]);
01227 
01228         // skip connections to atoms on different chains/segnames
01229         if (a->chainindex != b->chainindex || 
01230             a->segnameindex != b->segnameindex)
01231           continue;
01232          
01233         if (b->uniq_resid != i) {
01234           int bondtotype = residueList[b->uniq_resid]->residueType;
01235 
01236           if (bondfromtype != bondtotype) {
01237             if (i < b->uniq_resid) { // so that we only warn once
01238               msgWarn << "Unusual bond between residues:  ";
01239               msgWarn << residueList[i]->resid;
01240               switch (bondfromtype) {
01241                 case RESPROTEIN: msgWarn << " (protein)"; break;
01242                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01243                 case RESWATERS:  msgWarn << " (waters)"; break;
01244                 default:
01245                 case RESNOTHING: msgWarn << " (none)"; break;
01246               }
01247               msgWarn << " and ";
01248               msgWarn << residueList[b->uniq_resid]->resid;
01249               switch (bondtotype) {
01250                 case RESPROTEIN: msgWarn << " (protein)"; break;
01251                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01252                 case RESWATERS:  msgWarn << " (waters)"; break;
01253                 default:
01254                 case RESNOTHING: msgWarn << " (none)"; break;
01255               }
01256               msgWarn << sendmsg;
01257             }
01258           }
01259         }
01260       }
01261     }
01262   }
01263 }
01264 
01265 
01266 // find all the residues connected to a specific residue
01267 int BaseMolecule::find_connected_fragments(void) {
01268   int i;
01269   int count = 0;
01270   char *flgs = new char[residueList.num()]; // set up temp space
01271   memset(flgs, 0, residueList.num() * sizeof(char)); // clear flags
01272   IntStackHandle s = intstack_create(residueList.num());
01273 
01274   int atomsg = atomNames.typecode((char *) "SG"); // to find disulfide bonds
01275 
01276   int nextres;
01277   for (i=0; i<residueList.num(); i++) { // find unmarked fragment
01278     if (!flgs[i]) {
01279       fragList.append(new Fragment);
01280       intstack_push(s, i);
01281 
01282       // find and mark all connected residues with the same chain/segname
01283       while (!intstack_pop(s, &nextres)) {
01284         fragList[count]->append(nextres);
01285         Residue *res = residueList[nextres];
01286         res->fragment = count; // store residue's fragment
01287 
01288         int numatoms = res->atoms.num();
01289         int j;
01290         for (j=0; j<numatoms; j++) {
01291           MolAtom *a = atom(res->atoms[j]);
01292 
01293           // find all bonds to residues of the same chain/segname 
01294           int k;
01295           for (k=0; k<a->bonds; k++) {
01296             MolAtom *b = atom(a->bondTo[k]);
01297             int ri = b->uniq_resid;
01298 
01299             // skip connections to residues with different chains/segnames,
01300             // and don't follow disulfide bonds, as we want the order of
01301             // residue traversal to be correct so we can use it to build
01302             // subfragment lists later on
01303             if ((ri != i) &&
01304                 (flgs[ri] == 0) &&
01305                 (a->chainindex == b->chainindex) &&
01306                 (a->segnameindex == b->segnameindex) &&
01307                 ((a->nameindex != atomsg) || (b->nameindex != atomsg))) {
01308               flgs[ri] = TRUE;
01309               intstack_push(s, ri);
01310             }
01311           }
01312         }
01313       }
01314 
01315       count++;
01316     }
01317   }
01318 
01319   intstack_destroy(s);
01320   delete [] flgs;
01321 
01322   return count;
01323 }
01324 
01325 
01326 // find each collection of connected fragments
01327 int BaseMolecule::find_fragments(void) {
01328   int count = find_connected_fragments();  // find and mark its neighbors
01329 
01330 #if 1
01331   // find the protein subfragments
01332   find_subfragments(atomNames.typecode((char *) "N"), 
01333      -1,
01334      -1,
01335      atomNames.typecode((char *) "C"), 
01336      -1,
01337      -1,
01338      -1,
01339      RESPROTEIN, &pfragList);
01340 
01341 #if 0
01342   // find the nucleic acid subfragments
01343   find_subfragments(atomNames.typecode((char *) "P"), 
01344      atomNames.typecode((char *) "H5T"),
01345      -1,
01346      atomNames.typecode((char *) "O3'"),
01347      atomNames.typecode((char *) "O3*"),
01348      atomNames.typecode((char *) "H3T"),
01349      -1,
01350      RESNUCLEIC, &nfragList);
01351 #else
01352   // find the nucleic acid subfragments
01353   find_subfragments_topologically(
01354      RESNUCLEIC, &nfragList,
01355      atomNames.typecode((char *) "O3'"),
01356      atomNames.typecode((char *) "O3*"),
01357      atomNames.typecode((char *) "H3T"),
01358      -1);
01359 #endif
01360 #else
01361   find_subfragments_cyclic(&pfragList, RESPROTEIN);
01362   find_subfragments_cyclic(&nfragList, RESNUCLEIC);
01363 #endif
01364 
01365   // determine whether fragments are cyclic or not
01366   find_cyclic_subfragments(&pfragList, &pfragCyclic);
01367   find_cyclic_subfragments(&nfragList, &nfragCyclic);
01368 
01369   return count;
01370 }
01371 
01372 
01373 void BaseMolecule::find_subfragments_cyclic(ResizeArray<Fragment *> *subfragList, int restype) {
01374   int numfrags = fragList.num();
01375   int i, frag;
01376 
01377   // test each fragment to see if it's a candidate for the subfraglist
01378   for (frag=0; frag<numfrags; frag++) {
01379     int numres = fragList[frag]->num();       // residues in this frag
01380     int match=1; // start true, and falsify
01381 
01382     // check each residue to see they are all the right restype
01383     for (i=0; i<numres; i++) {
01384       int resid = (*fragList[frag])[i];
01385       if (residueList[resid]->residueType != restype) {
01386         match=0;
01387         break;
01388       }
01389     }
01390 
01391     // if we found a matching fragment, add it to the subfraglist
01392     if (match) {
01393       Fragment *frg = new Fragment;
01394 
01395       // add all of the residues for this fragment to the subfraglist
01396       for (i=0; i<numres; i++) {
01397         int resid = (*fragList[frag])[i];
01398         frg->append(resid);
01399       } 
01400 
01401       subfragList->append(frg);
01402     }    
01403   }
01404 }
01405 
01406 
01407 
01408 void BaseMolecule::find_cyclic_subfragments(ResizeArray<Fragment *> *subfragList, ResizeArray<int> *subfragCyclic) {
01409   int i, j, frag;
01410   int numfrags = subfragList->num();
01411 
01412   // check each fragment for cycles
01413   for (frag=0; frag<numfrags; frag++) {
01414     int numres   = (*subfragList)[frag]->num();       // residues in this frag
01415 
01416     // skip testing fragments containing zero residues
01417     if (numres < 1) {
01418       // record that this fragment is not cyclic
01419       subfragCyclic->append(0);
01420       continue;
01421     }
01422 
01423     int startres = (*(*subfragList)[frag])[0];        // first residue
01424     int endres   = (*(*subfragList)[frag])[numres-1]; // last residue
01425     int cyclic   = 0;
01426 
01427     // check for bonds between startres and endres
01428     int numatoms = residueList[endres]->atoms.num();
01429     int done = 0;
01430     for (i=0; (i < numatoms) && (!done); i++) {
01431       MolAtom *a = atom(residueList[endres]->atoms[i]);
01432       int nbonds = a->bonds;
01433       for (j=0; j < nbonds; j++) {
01434         MolAtom *b = atom(a->bondTo[j]);
01435 
01436         if (b->uniq_resid == startres) {
01437           cyclic=1;
01438           done=1;
01439           break;
01440         }
01441       }  
01442     }
01443 
01444     // record whether this fragment is cyclic or not
01445     subfragCyclic->append(cyclic);
01446   }
01447 }
01448 
01449 
01450 // this adds the current residue type to the *subfragList,
01451 // this finds the residue connected to the endatom atom type
01452 // and calls this function recursively on that residue
01453 // this will NOT work across NORMAL bonds
01454 void BaseMolecule::find_connected_subfragment(int resnum, int fragnum, 
01455          char *flgs, int endatom,  int altendatom, 
01456          int alt2endatom, int alt3endatom,
01457          int restype, 
01458          ResizeArray<Fragment *> *subfragList)
01459 {
01460   if (flgs[resnum] || residueList[resnum]->residueType != restype) 
01461       return;  // been here before, or this is no good
01462   (*subfragList)[fragnum]->append(resnum);    // add to the list
01463   flgs[resnum] = TRUE;                        // and prevent repeats
01464 
01465   // find the atom in this residue with the right type
01466   int i, j, nextres;
01467   MolAtom *a;
01468   for (i=residueList[resnum]->atoms.num() - 1; i>=0; i--) {
01469     a = atom(residueList[resnum]->atoms[i]);
01470     if (a->nameindex == endatom ||
01471         a->nameindex == altendatom ||
01472         a->nameindex == alt2endatom ||
01473         a->nameindex == alt3endatom) {   // found the end atom
01474       for (j=a->bonds-1; j>=0; j--) {    // look at the bonds
01475         // I can't look at if the residue "bond" is a PRO-PRO or NUC-NUC, since
01476         // that won't tell me if the atom involved is the endatom atom
01477         // This is important because I need to avoid things like S-S bonds
01478         // (note that I never checked that the end was bonded to a start on
01479         //  the next residue! - c'est la vie, or something like that
01480         if ((!(a->atomType == ATOMNORMAL && atom(a->bondTo[j])->atomType == ATOMNORMAL)) && // not backbone 
01481             (nextres = atom(a->bondTo[j])->uniq_resid) != resnum &&
01482             !flgs[nextres] ) { // found next residue, and unvisited
01483           find_connected_subfragment(nextres, fragnum, flgs, endatom,
01484               altendatom, alt2endatom, alt3endatom, restype, subfragList);
01485           return; // only find one; assume no branching
01486         }
01487       } // end of for
01488     } // end of found correct endtype
01489   } // searching atoms
01490 } // end of finding connected subfragment
01491 
01492 
01493 // find a class of fragments, and add them to the subfragment list
01494 void BaseMolecule::find_subfragments(int startatom, 
01495           int altstartatom, int alt2startatom,
01496           int endatom, int altendatom, int alt2endatom, int alt3endatom,
01497           int restype, ResizeArray<Fragment *> *subfragList)
01498 {
01499   int i, j, k;
01500   MolAtom *a;
01501   char *flgs = new char[residueList.num()];
01502   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01503 
01504   // Loop over all residues looking for candidate residues that start
01505   // a fragment.  A fragment starting residue must be an unvisited 
01506   // residue which has an startatom with no off residue bond to 
01507   // the same restype
01508   for (i=residueList.num()-1; i>=0; i--) {
01509     // test for previous visit, and whether it's the restype we want
01510     if (!flgs[i] && residueList[i]->residueType == restype) {
01511       // does this residue have a matching startatom
01512       for (j=residueList[i]->atoms.num()-1; j>=0; j--) { 
01513         int satom = (a=atom(residueList[i]->atoms[j]))->nameindex;
01514         if (satom == startatom || 
01515             satom == altstartatom || 
01516             satom == alt2startatom){
01517           for (k=a->bonds-1; k>=0; k--) {
01518             MolAtom *bondto = atom(a->bondTo[k]);
01519             // are there any off-residue bonds to the same restype
01520             if (bondto->uniq_resid != i && bondto->residueType == restype) {
01521               break; // if so then stop, so that k>=0
01522             }
01523           }
01524 
01525           // if we found a valid fragment start atom, find residues downchain
01526           if (k<0) { 
01527             subfragList->append(new Fragment);
01528             find_connected_subfragment(i, subfragList->num()-1, flgs, 
01529                   endatom, altendatom, alt2endatom, alt3endatom,
01530                   restype, subfragList);
01531           } // found starting residue
01532         } // found startatom
01533       } // going through atoms
01534     } // found restype
01535   } // going through residues
01536 
01537   // found 'em all
01538   delete [] flgs;
01539 } 
01540 
01541 
01542 // find a class of fragments, and add them to the subfragment list
01543 void BaseMolecule::find_subfragments_topologically(int restype, 
01544   ResizeArray<Fragment *> *subfragList, 
01545   int endatom, int altendatom, int alt2endatom, int alt3endatom) {
01546   int i; 
01547   char *flgs = new char[residueList.num()];
01548   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01549   int numres = residueList.num();
01550 
01551   // Loop over all residues looking for candidate residues that start
01552   // a fragment.  A fragment starting residue must be an unvisited
01553   // residue which has an startatom with no off residue bond to
01554   // the same restype
01555   for (i=0; i<numres; i++) {
01556     Residue *res = residueList[i];
01557 
01558     // test for previous visit, and whether it's the restype we want
01559     if (!flgs[i] && res->residueType == restype) {
01560       // if this residue only has 1 bond to a residue of the same restype
01561       // it must be a terminal residue
01562       int offresbondcount = 0;
01563       int j, k;
01564       int numatoms = res->atoms.num();
01565       for (j=0; j<numatoms; j++) {
01566         MolAtom *a = atom(res->atoms[j]);
01567 
01568         // find off-residue bonds
01569         for (k=0; k<a->bonds; k++) {
01570           MolAtom *b = atom(a->bondTo[k]);
01571           if (b->uniq_resid != i && 
01572               residueList[b->uniq_resid]->residueType == restype) {
01573             offresbondcount++;
01574           }
01575         }
01576       }
01577 
01578       // if we found a valid fragment start atom, find residues downchain
01579       if (offresbondcount == 1) {
01580         subfragList->append(new Fragment);
01581         find_connected_subfragment(i, subfragList->num()-1, flgs,
01582               endatom, altendatom, alt2endatom, alt3endatom,
01583               restype, subfragList);
01584       }
01585     } // found restype
01586   } // going through residues
01587 
01588   // found 'em all
01589   delete [] flgs;
01590 }
01591 
01592 
01593 #if defined(VMDFASTRIBBONS)
01594 // routine to pre-calculate lists of control point indices to
01595 // be used by the ribbon/cartoon representations
01596 void BaseMolecule::calculate_ribbon_controlpoints() {
01597   int onum, canum, frag, num, res;
01598 
01599   wkf_timerhandle tm = wkf_timer_create();
01600 
01601   msgInfo << "XXX Building pre-computed control point lists" << sendmsg;
01602 
01603   wkf_timer_start(tm);
01604 
01605   // Lookup atom typecodes ahead of time so we can use the most
01606   // efficient variant of find_atom_in_residue() in the main loop.
01607   // If we can't find the atom types we need, bail out immediately
01608   int CAtypecode  = atomNames.typecode("CA");
01609   int Otypecode   = atomNames.typecode("O");
01610   int OT1typecode = atomNames.typecode("OT1");
01611    
01612   // We can't draw a ribbon without CA and O atoms for guidance!
01613   if (CAtypecode < 0) {
01614     msgErr << "This structure has no identifiable CA atoms,"
01615            << "ribbon and cartoon representations will not be usable." 
01616            << sendmsg;
01617     return;
01618   }
01619   if ((Otypecode < 0) && (OT1typecode < 0)) {
01620     msgErr << "This structure has no identifiable Oxygen atoms,"
01621            << "ribbon and cartoon representations will not be usable." 
01622            << sendmsg;
01623     return;
01624   }
01625 
01626   //
01627   // calculate control point lists for the protein fragments
01628   //
01629 
01630   msgInfo << "XXX Building protein control point lists" << sendmsg;
01631 
01632   // go through each protein and find the CA and O atoms which are
01633   // eventually used to construct control points and perpendicular vectors
01634   ResizeArray<int> cpind;
01635   for (frag=0; frag<pfragList.num(); frag++) {
01636     num = pfragList[frag]->num();  // number of residues in this fragment
01637     if (num < 2) {
01638       // with less than two residues, we can't do anything useful, so skip
01639       nfragCPList.append(NULL);
01640       continue;
01641     }
01642 
01643     // check that we have a valid structure before continuing
01644       res = (*pfragList[frag])[0];
01645     canum = find_atom_in_residue(CAtypecode, res);
01646      onum = find_atom_in_residue(Otypecode, res);
01647 
01648     if (onum < 0 && OT1typecode >= 0) {
01649       onum = find_atom_in_residue(OT1typecode, res);
01650     }
01651     if (canum < 0 || onum < 0) {
01652       // can't find 1st CA or O of the protein fragment, so skip
01653       msgErr << "Fragment control point calc unable to identify target atoms" << sendmsg;
01654       nfragCPList.append(NULL);
01655       continue; 
01656     }
01657 
01658     // clear temp arrays before we begin
01659     cpind.clear();
01660 
01661     int loop;
01662     for (loop=0; loop<num; loop++) {
01663       res = (*pfragList[frag])[loop];
01664 
01665       // find next CA atom to be used as a spline control point
01666       canum = find_atom_in_residue(CAtypecode, res);
01667 
01668       // find next O atom to be used for spline orientation vector
01669       onum = find_atom_in_residue(Otypecode, res);
01670       if (onum < 0 && OT1typecode >= 0) {
01671         onum = find_atom_in_residue(OT1typecode, res);
01672       }
01673 
01674       // add the atom indices to the control point list
01675       cpind.append(canum);
01676       cpind.append(onum);
01677     }
01678 
01679     // copy this fragment's control point lists to permanent storage
01680     if (cpind.num() == 2*num) {
01681       int *cpindices = new int[cpind.num()];
01682       memcpy(cpindices, &cpind[0], cpind.num() * sizeof(int));
01683 
01684       // add control point list to the master list
01685       nfragCPList.append(cpindices);
01686     } else {
01687       msgErr << "Unexpected failure in control point calculation" << sendmsg;
01688       nfragCPList.append(NULL);
01689     }
01690   } 
01691 
01692   wkf_timer_stop(tm);
01693 
01694   msgInfo << "XXX Calc time for protein cplist: " 
01695           << wkf_timer_time(tm) << sendmsg;
01696   msgInfo << "XXX done building protein control point lists" << sendmsg;
01697 
01698   //
01699   // calculate control point lists for the nucleic fragments
01700   //
01701 
01702   wkf_timer_destroy(tm);
01703 }
01704 #endif
01705 
01706 #if defined(VMDWITHCARBS)
01707 
01708 // Routines for detecting carbohydrate rings
01709 // contributed by Simon Cross and Michelle Kuttel
01710 // XXX TODO:
01711 //   don't create hashes as pointers, rather just pass pointers for them
01712 //   into functions which need it.
01713 
01714 // find all small rings and links between them
01715 void BaseMolecule::find_small_rings_and_links(int maxpathlength, int maxringsize) {
01716   // skip ring finding if we've already done it
01717   if (maxpathlength == currentMaxPathLength && maxringsize == currentMaxRingSize)
01718     return;
01719   currentMaxPathLength = maxpathlength;
01720   currentMaxRingSize = maxringsize;
01721 
01722   // Clear smallringList, smallringLinkages
01723   smallringList.clear();
01724   smallringLinkages.clear();
01725 
01726   // find groups of atoms bonded into small rings  
01727   find_small_rings(maxringsize);
01728 #if 0
01729   msgInfo << "   Rings: " << smallringList.num() << sendmsg;
01730 #endif
01731   
01732   // orientate the rings if possible
01733   orientate_small_rings(maxringsize);
01734   int nOrientatedRings = 0;
01735   for (int i=0; i<smallringList.num(); i++) {
01736     if (smallringList[i]->orientated) 
01737       nOrientatedRings++;
01738   }
01739 #if 0
01740   msgInfo << "   Rings orientated: " << nOrientatedRings << sendmsg;
01741 #endif  
01742   // find paths between rings
01743   find_orientated_small_ring_linkages(maxpathlength, maxringsize);
01744 #if 0
01745   msgInfo << "   Ring Paths: " << smallringLinkages.paths.num() << sendmsg;
01746 #endif  
01747 }
01748 
01749 // find all loops less than a given size
01750 int BaseMolecule::find_small_rings(int maxringsize) {
01751   int n_back_edges, n_rings;
01752   ResizeArray<int> back_edge_src, back_edge_dest;
01753     
01754   n_back_edges = find_back_edges(back_edge_src, back_edge_dest);
01755 
01756 #if 0
01757   msgInfo << "  BACK EDGES: " << n_back_edges << sendmsg;
01758   for (int i=0; i < n_back_edges; i++) {
01759     msgInfo << "       SRC:" << back_edge_src[i] << ", DST:" << back_edge_dest[i] << sendmsg;
01760   }
01761 #endif
01762 
01763   n_rings = find_small_rings_from_back_edges(maxringsize, back_edge_src, back_edge_dest);
01764 
01765 #if 0
01766   msgInfo << " SMALL RINGS: " << n_rings << sendmsg;
01767   for (int i=0; i < n_rings; i++) {
01768     msgInfo << "    RING: " << *(smallringList[i]) << sendmsg;
01769   }
01770 #endif
01771 
01772   return n_rings; // number of rings found
01773 }
01774 
01775 
01776 #define INTREE_NOT      -1   // not in tree
01777 #define INTREE_NOPARENT -2   // no parent 
01778 
01779 // find the back edges of an arbitrary spanning tree (or set of trees if the molecule is disconnected)
01780 int BaseMolecule::find_back_edges(ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
01781   int i;
01782   int n_back_edges = 0;  
01783   int *intree_parents = new int[nAtoms];
01784   memset(intree_parents, INTREE_NOT, nAtoms * sizeof(int));  // clear parents, -1 = not in tree, -2 = no parent
01785       
01786   for (i=0; i<nAtoms; i++) {
01787     if (intree_parents[i] == INTREE_NOT) {  // not been visited
01788       n_back_edges += find_connected_subgraph_back_edges(i,back_edge_src,back_edge_dest,intree_parents);
01789     }
01790   }
01791 
01792   delete [] intree_parents;
01793  
01794   return n_back_edges;
01795 }
01796 
01797 
01798 // find the back edges of a spanning tree (for a connected portion of the molecule)
01799 int BaseMolecule::find_connected_subgraph_back_edges(int atomid, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest,
01800                                                      int *intree_parents) {
01801   int i, n_new_back_edges, cur_atom_id, child_atom_id, parent_atom_id;
01802   MolAtom *curatom;
01803   ResizeArray<int> node_stack;
01804   
01805   node_stack.append(atomid);
01806   intree_parents[atomid] = INTREE_NOPARENT; // in tree, but doesn't have parent
01807   n_new_back_edges = 0;
01808 
01809   while (node_stack.num() > 0) {
01810 #if 1
01811     cur_atom_id = node_stack.pop();
01812 #else
01813     cur_atom_id = node_stack[node_stack.num()-1];
01814     node_stack.remove(node_stack.num()-1);
01815 #endif
01816     
01817     curatom = atom(cur_atom_id);
01818     parent_atom_id = intree_parents[cur_atom_id];
01819     
01820     for(i=0;i<curatom->bonds;i++) {
01821       child_atom_id = curatom->bondTo[i];
01822       if (intree_parents[child_atom_id] != INTREE_NOT) {
01823         // back-edge found
01824         if ((child_atom_id != parent_atom_id) && (child_atom_id > cur_atom_id)) {
01825             // we ignore edges back to the parent
01826             // and only add each back edge once
01827             // (it'll crop up twice since each bond is listed on both atoms)
01828             back_edge_src.append(cur_atom_id);
01829             back_edge_dest.append(child_atom_id);
01830             n_new_back_edges++;
01831         }
01832       } else {
01833         // extended tree
01834         intree_parents[child_atom_id] = cur_atom_id;
01835         node_stack.append(child_atom_id);
01836       }
01837     }
01838   }
01839 
01840   return n_new_back_edges;
01841 }
01842 
01843 // find rings smaller than maxringsize given list of back edges
01844 int BaseMolecule::find_small_rings_from_back_edges(int maxringsize, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
01845     int i, key, max_rings;
01846     int n_rings = 0;
01847     int n_back_edges = back_edge_src.num();
01848     SmallRing *ring;
01849     inthash_t *used_edges = new inthash_t; // back edges which have been dealt with 
01850     inthash_t *used_atoms = new inthash_t; // atoms (other than the first) which are used in the current path (i.e. possible loop)
01851     inthash_init(used_edges,n_back_edges);
01852     inthash_init(used_atoms,maxringsize);
01853    
01854     // cap the peak number of rings to find based on the size of the
01855     // input structure.  This should help prevent unusual structures 
01856     // with very high connectivity, such as silicon nanodevices from 
01857     // blowing up the ring search code
01858     max_rings = 2000 + (int) (100.0*sqrt((double) nAtoms));
01859  
01860     for(i=0;i<n_back_edges;i++) {
01861       ring = new SmallRing();
01862       ring->append(back_edge_src[i]);
01863       ring->append(back_edge_dest[i]);
01864       
01865       // first atom is not marked used, since we're allowed to re-use it.
01866       inthash_insert(used_atoms,back_edge_dest[i],1);
01867       
01868       n_rings += find_small_rings_from_partial(ring,maxringsize,used_edges,used_atoms);
01869       delete ring;
01870       
01871       // abort if there are too many rings
01872       if (n_rings > max_rings) {
01873         msgWarn << "Maximum number of rings (" << max_rings << ") exceed."
01874                 << " Stopped looking for rings after " << n_rings << " rings found." << sendmsg;
01875         break;
01876       }
01877       
01878       key = get_edge_key(back_edge_src[i],back_edge_dest[i]);
01879       inthash_insert(used_edges,key,1);
01880       
01881       // remove last atom from used_atoms
01882       inthash_delete(used_atoms,back_edge_dest[i]);
01883     }
01884     
01885     inthash_destroy(used_edges);
01886     delete used_edges;
01887     inthash_destroy(used_atoms);
01888     delete used_atoms;
01889 
01890     return n_rings;
01891 }
01892 
01893 
01894 // find rings smaller than maxringsize from the given partial ring (don't reuse used_edges)
01895 int BaseMolecule::find_small_rings_from_partial(SmallRing *ring, int maxringsize, inthash_t *used_edges, inthash_t *used_atoms) {
01896     int i, next_bond_pos, cur_atom_id, child_atom_id, bond_key, barred, closes, do_pop;
01897     int n_rings = 0;
01898     MolAtom *curatom;
01899 
01900     IntStackHandle atom_id_stack = intstack_create(maxringsize);
01901     IntStackHandle bond_pos_stack = intstack_create(maxringsize);
01902     intstack_push(atom_id_stack,ring->last_atom());
01903     intstack_push(bond_pos_stack,0);
01904 
01905     while (!intstack_pop(atom_id_stack,&cur_atom_id)) {
01906       intstack_pop(bond_pos_stack,&next_bond_pos);
01907       curatom = atom(cur_atom_id);
01908       do_pop = 0; // whether we need to pop the current atom
01909 
01910       // if the ring is already maxringsize, just pop the current atom off.
01911       if (ring->num() > maxringsize)
01912          do_pop = 1;
01913 
01914       if (next_bond_pos == 0 && !do_pop) {
01915         // ignore barred rings (by checking for links back to earlier parts of the ring
01916         // before exploring further)
01917         barred = closes = 0;
01918         for(i=0;i<curatom->bonds;i++) {
01919           child_atom_id = curatom->bondTo[i];
01920 
01921           // check that this is not an edge immediately back to the previous atom
01922           if (child_atom_id == ring->atoms[ring->atoms.num()-2]) continue;
01923 
01924           // check that this is not an atom we've included
01925           // (an exception is the first atom, which we're allowed to try add, obviously :)
01926           if (inthash_lookup(used_atoms, child_atom_id) != HASH_FAIL) {
01927             barred = 1;
01928             continue;
01929           }
01930 
01931           // check is this not a back edge which has already been used
01932           bond_key = get_edge_key(cur_atom_id,child_atom_id);
01933           if (inthash_lookup(used_edges, bond_key) != HASH_FAIL) {
01934             if (child_atom_id == ring->first_atom())
01935               barred = 1; // used back-edge which closes the ring counts as barred
01936             continue;
01937           }
01938 
01939           // check whether ring closes
01940           if (child_atom_id == ring->first_atom()) {
01941             closes = 1;
01942             continue;
01943           }
01944         }
01945 
01946         if (closes && !barred) {
01947           smallringList.append(ring->copy());
01948           n_rings += 1;
01949         }
01950         if (closes || barred) {
01951           // adding this atom would create a barred ring, so skip to next atom on stack
01952           do_pop = 1;
01953         }
01954       }
01955 
01956       if (!do_pop) {
01957         for(i=next_bond_pos;i<curatom->bonds;i++) {
01958           child_atom_id = curatom->bondTo[i];
01959 
01960           // check that this is not an edge immediately back to the previous atom
01961           if (child_atom_id == ring->atoms[ring->atoms.num()-2]) continue;
01962                   
01963           // check is this not a back edge which has already been used
01964           bond_key = get_edge_key(cur_atom_id,child_atom_id);
01965           if (inthash_lookup(used_edges, bond_key) != HASH_FAIL) continue;
01966           
01967           // Append child atom and go deeper
01968           ring->append(child_atom_id);
01969           inthash_insert(used_atoms,child_atom_id,1);
01970           
01971           // push current state and new state onto stack and recurse
01972           intstack_push(atom_id_stack,cur_atom_id);
01973           intstack_push(bond_pos_stack,i+1);
01974           intstack_push(atom_id_stack,child_atom_id);
01975           intstack_push(bond_pos_stack,0);
01976           break;     
01977         }
01978 
01979         // we finished the children, pop one parent
01980         if(i>=curatom->bonds)
01981             do_pop = 1;        
01982       }
01983 
01984       if (do_pop && !intstack_empty(atom_id_stack)) {
01985           // finished with cur_atom_id and all its children      
01986           // clean up before returning from recurse
01987           ring->remove_last();
01988           inthash_delete(used_atoms,cur_atom_id);      
01989       }
01990     }
01991     
01992     intstack_destroy(atom_id_stack);
01993     intstack_destroy(bond_pos_stack);
01994     return n_rings;
01995 }
01996 
01997 
01998 // construct edge key
01999 int BaseMolecule::get_edge_key(int edge_src, int edge_dest) {
02000     int t;
02001     if (edge_dest > edge_src) {
02002        t = edge_src;
02003        edge_src = edge_dest;
02004        edge_dest = t;
02005     }
02006     return edge_src * nAtoms + edge_dest;
02007 }
02008 
02009 // Routines for orientating rings
02010 // contributed by Simon Cross and Michelle Kuttel
02011 
02012 void BaseMolecule::orientate_small_rings(int maxringsize) {
02013   for (int i=0;i<smallringList.num();i++)
02014     orientate_small_ring(*smallringList[i],maxringsize);
02015 }
02016 
02017 void BaseMolecule::orientate_small_ring(SmallRing &ring,int maxringsize) {
02018   short int oxygen = -1;
02019   int oxygen_typeindex, c1_nameindex, c1p_nameindex, cu1_nameindex;
02020   int c2_nameindex, c2p_nameindex, cu2_nameindex;
02021   MolAtom *curatom, *atom_before_O, *atom_after_O;
02022 
02023   // lookup atom name indices in advance
02024   oxygen_typeindex = atomTypes.typecode("O");
02025   c1_nameindex = atomNames.typecode("C1");
02026   c1p_nameindex = atomNames.typecode("C1'");
02027   cu1_nameindex = atomNames.typecode("C_1");
02028   c2_nameindex = atomNames.typecode("C2");
02029   c2p_nameindex = atomNames.typecode("C2'");
02030   cu2_nameindex = atomNames.typecode("C_2");
02031     
02032   // Find an oxygen (or something with two bonds)
02033   for (int i=0; i<ring.num(); i++) {
02034     curatom = atom(ring[i]);
02035     if (curatom->atomicnumber == 8 || curatom->typeindex == oxygen_typeindex || curatom->bonds == 2) {
02036       oxygen = i;
02037       break;
02038     }
02039   }
02040 
02041   // leave ring unorientated if no oxygen found
02042   if (oxygen == -1) return;
02043 
02044   // find atoms before and after oxygen (taking into account wrapping)
02045   if (oxygen == 0) atom_before_O = atom(ring.last_atom());
02046   else atom_before_O = atom(ring[oxygen-1]);
02047 
02048   if (oxygen == ring.num()-1) atom_after_O = atom(ring.first_atom());
02049   else atom_after_O = atom(ring[oxygen+1]);
02050     
02051   // ensure C1 carbon is after the oxygen
02052   // leave unorientated if the C1 carbon can't be found
02053   if (atom_before_O->nameindex == c1_nameindex || atom_before_O->nameindex == c1p_nameindex
02054       || atom_before_O->nameindex == cu1_nameindex
02055       || atom_before_O->nameindex == c2_nameindex || atom_before_O->nameindex == c2p_nameindex
02056       || atom_before_O->nameindex == cu2_nameindex) {
02057     ring.reverse();
02058     ring.orientated = 1;
02059   }
02060   else if (atom_after_O->nameindex == c1_nameindex || atom_after_O->nameindex == c1p_nameindex
02061            || atom_after_O->nameindex == cu1_nameindex
02062            || atom_after_O->nameindex == c2_nameindex || atom_after_O->nameindex == c2p_nameindex
02063            || atom_after_O->nameindex == cu2_nameindex) {
02064     ring.orientated = 1;
02065   }
02066 }
02067 
02068 
02069 // Routines for finding links between rings
02070 // contributed by Simon Cross and Michelle Kuttel
02071 
02072 // find all paths shorter than a given length between the small rings
02073 // listed in smallringList
02074 int BaseMolecule::find_orientated_small_ring_linkages(int maxpathlength, int maxringsize) {
02075   SmallRing *sr;
02076   LinkagePath *lp;
02077   int i, j, atom_id;
02078   int n_paths = 0;
02079   inthash_t *atom_to_ring = new inthash_t;
02080   inthash_t *multi_ring_atoms = new inthash_t;
02081   inthash_t *used_atoms = new inthash_t;
02082   inthash_init(atom_to_ring,smallringList.num()*maxringsize/2);
02083   inthash_init(multi_ring_atoms,smallringList.num());
02084   inthash_init(used_atoms,maxpathlength);
02085    
02086   // create lookup from atom id to ring id
02087   for (i=0;i<smallringList.num();i++) {
02088     sr = smallringList[i];
02089     // XXX: Uncomment this if we want to include non-orientated rings
02090     //      in linkage paths
02091     // if (!sr->orientated) continue;
02092     for (j=0;j<sr->num();j++) {
02093       atom_id = (*sr)[j];
02094       if (inthash_lookup(atom_to_ring,atom_id) == HASH_FAIL)
02095         inthash_insert(atom_to_ring,atom_id,i);
02096       else
02097         inthash_insert(multi_ring_atoms,atom_id,1);
02098     }
02099   }
02100 
02101   // look for ring linkage paths starting from each atom
02102   // of each orientated ring
02103   for (i=0;i<smallringList.num();i++) {
02104     sr = smallringList[i];
02105     if (!sr->orientated) continue;
02106     for (j=0;j<sr->num();j++) {
02107       if (inthash_lookup(multi_ring_atoms,(*sr)[j]) != HASH_FAIL) continue;
02108       lp = new LinkagePath();
02109       lp->start_ring = i;
02110       lp->path.append((*sr)[j]);    
02111       n_paths += find_linkages_for_ring_from_partial(*lp,maxpathlength,atom_to_ring,multi_ring_atoms,used_atoms);
02112       delete lp;
02113     }
02114   }
02115   
02116   inthash_destroy(atom_to_ring);
02117   delete atom_to_ring;
02118   inthash_destroy(multi_ring_atoms);
02119   delete multi_ring_atoms;
02120   inthash_destroy(used_atoms);
02121   delete used_atoms;
02122 
02123 #if 0
02124   msgInfo << smallringLinkages << sendmsg;
02125 #endif
02126   
02127   return n_paths; // number of paths found.
02128 }
02129 
02130 int BaseMolecule::find_linkages_for_ring_from_partial(LinkagePath &lp, int maxpathlength, inthash_t *atom_to_ring, inthash_t *multi_ring_atoms, inthash_t *used_atoms) {
02131   int i, cur_atom_id, next_bond_pos, child_atom_id, ringidx;
02132   int n_paths = 0;
02133   MolAtom *curatom;
02134 
02135   IntStackHandle atom_id_stack = intstack_create(maxpathlength);
02136   IntStackHandle bond_pos_stack = intstack_create(maxpathlength);
02137   intstack_push(atom_id_stack,lp.path.last_atom());
02138   intstack_push(bond_pos_stack,0);
02139 
02140   while (!intstack_pop(atom_id_stack,&cur_atom_id)) {
02141     intstack_pop(bond_pos_stack,&next_bond_pos);
02142     curatom = atom(cur_atom_id);
02143 
02144     if (next_bond_pos == 0)
02145       inthash_insert(used_atoms, cur_atom_id, 1);
02146 
02147     for(i=next_bond_pos;i<curatom->bonds;i++) {
02148       child_atom_id = curatom->bondTo[i];
02149 
02150       // check that this isn't an atom that belongs to multiple rings
02151       if (inthash_lookup(multi_ring_atoms,child_atom_id) != HASH_FAIL) continue;
02152 
02153       // check that this is not an edge immediately back to the previous atom
02154       // (when there is only one atom in the path, it can't be a link back)
02155       if (lp.num() > 1 && child_atom_id == lp[lp.num()-2]) continue;
02156       
02157       // check that we haven't arrived at a non-orientated ring
02158       ringidx = inthash_lookup(atom_to_ring, child_atom_id);
02159       if (ringidx != HASH_FAIL && !smallringList[ringidx]->orientated) continue;
02160 
02161       // only store paths from smaller ringidx to larger ringidx (to avoid getting a copy of each orientation of the path)
02162       // ignore paths which return to the same ring
02163       // check that we're leaving the starting ring
02164       if (ringidx != HASH_FAIL && ringidx <= lp.start_ring) continue;
02165       
02166       // see if this takes us to another ring
02167       if (ringidx != HASH_FAIL && (ringidx > lp.start_ring)) {
02168           lp.append(child_atom_id);
02169           lp.end_ring = ringidx;
02170           smallringLinkages.addLinkagePath(*lp.copy());
02171           n_paths++;
02172           lp.end_ring = -1;
02173           lp.remove_last();
02174           continue;
02175       }
02176       
02177       // check that this is not an atom we've included
02178       // (an exception is the first atom, which we're allowed to try add, obviously :)
02179       if (inthash_lookup(used_atoms, child_atom_id) != HASH_FAIL) continue;
02180               
02181       if (lp.num() < maxpathlength) {
02182          lp.append(child_atom_id);
02183 
02184          // push current state and new state onto stack and recurse
02185          intstack_push(atom_id_stack,cur_atom_id);
02186          intstack_push(bond_pos_stack,i+1);
02187          intstack_push(atom_id_stack,child_atom_id);
02188          intstack_push(bond_pos_stack,0);
02189          break;
02190       }
02191     }
02192 
02193     if ((i>=curatom->bonds)&&(!intstack_empty(atom_id_stack))) {      
02194       // clean up before returning from recurse
02195       lp.remove_last();
02196       inthash_delete(used_atoms,cur_atom_id);
02197     }
02198   }
02199   
02200   intstack_destroy(atom_id_stack);
02201   intstack_destroy(bond_pos_stack);
02202   return n_paths;
02203 }
02204 
02205 #endif  // end of carbohydrate related stuff
02206 
02207 
02208 
02209 void BaseMolecule::add_volume_data(const char *name, const float *o,
02210     const float *xa, const float *ya, const float *za, int x, int y, int z,
02211     float *data) {
02212   msgInfo << "Analyzing Volume..." << sendmsg;
02213 
02214   VolumetricData *vdata = new VolumetricData(name, o, xa, ya, za,
02215                                              x, y, z, data);
02216   
02217   // Print out grid size along with memory use for the grid itself plus
02218   // the memory required for the volume gradients (4x the scalar grid memory)
02219   // Color texture maps require another 0.75x the original scalar grid size.
02220   msgInfo << "   Grid size: " << x << "x" << y << "x" << z << "  (" 
02221           << (int) (4.0 * (x*y*z * sizeof(float)) / (1024.0 * 1024.0)) << " MB)" 
02222           << sendmsg;
02223 
02224   msgInfo << "   Total voxels: " << x*y*z << sendmsg;
02225 
02226   msgInfo << "   Min: " << vdata->datamin << "  Max: " << vdata->datamax 
02227           << "  Range: " << (vdata->datamax - vdata->datamin) << sendmsg;
02228 
02229   msgInfo << "   Computing volume gradient map for smooth shading" << sendmsg;
02230 
02231   vdata->compute_volume_gradient(); // calc gradients for smooth vertex normals
02232 
02233   volumeList.append(vdata);
02234 
02235   msgInfo << "Added volume data, name=" << vdata->name << sendmsg;
02236 }
02237 
02238 int BaseMolecule::num_volume_data() {
02239   return volumeList.num();
02240 }
02241 
02242 const VolumetricData *BaseMolecule::get_volume_data(int id) {
02243   if (id >= 0 && id < volumeList.num())
02244     return volumeList[id];
02245   return NULL;
02246 }

Generated on Tue May 21 01:45:58 2013 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002