Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

BaseMolecule.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: BaseMolecule.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.245 $      $Date: 2011/12/05 19:00:21 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Base class for all molecules, without display-specific information.  This
00020  * portion of a molecule contains the structural data, and all routines to
00021  * find the structure (backbone, residues, etc).  It does NOT contain the
00022  * animation list; that is maintained by Molecule (which is derived from
00023  * this class).
00024  *
00025  ***************************************************************************/
00026 
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include "Inform.h"
00031 #include "utilities.h"
00032 #include "intstack.h"
00033 #include "WKFUtils.h"
00034 
00035 #include "BaseMolecule.h"
00036 #include "VolumetricData.h"
00037 #include "QMData.h"
00038 #include "QMTimestep.h"
00039 
00040 #ifdef VMDWITHCARBS
00041 #include <vector> /* XXX this needs to go, gives trouble with MSVC */
00042 #endif
00043 
00044 #define MAXBONDERRORS 25
00045 
00047 
00048 BaseMolecule::BaseMolecule(int myID) : residueList(10) , fragList(1),
00049 #if defined(VMDWITHCARBS)
00050      pfragList(1), nfragList(1), smallringList(), smallringLinkages(), currentMaxRingSize(-1), currentMaxPathLength(-1), ID(myID) {
00051 #else
00052      pfragList(1), nfragList(1), ID(myID) {
00053 #endif
00054 
00055   // initialize all variables
00056   nAtoms = 0;
00057   cur_atom = 0;
00058   atomList = NULL;
00059   moleculename = NULL;
00060   lastbonderratomid=-1;
00061   bonderrorcount=0;
00062   datasetflags=NODATA;
00063   qm_data = NULL;
00064   radii_minmax_need_update = 1;
00065   radii_min = 0.0f;
00066   radii_max = 0.0f;
00067 }
00068 
00069 
00071 
00072 BaseMolecule::~BaseMolecule(void) {
00073   int i;
00074 
00075 #if 0
00076   // XXX still need to track down ringlist leak(s)
00077   // delete carbohydrate ring data structures
00078   smallringList.clear();
00079   smallringLinkages.clear();
00080 #endif
00081 
00082   // delete structural data
00083   delete [] atomList;
00084   for (i=0; i<residueList.num(); i++) {
00085     delete residueList[i];
00086   }
00087   for (i=0; i<nfragList.num(); i++) {
00088     delete nfragList[i];
00089   }
00090 #if defined(VMDFASTRIBBONS)
00091   for (i=0; i<nfragCPList.num(); i++) {
00092     delete [] nfragCPList[i];
00093   }
00094 #endif
00095   for (i=0; i<pfragList.num(); i++) {
00096     delete pfragList[i];
00097   }
00098 #if defined(VMDFASTRIBBONS)
00099   for (i=0; i<pfragCPList.num(); i++) {
00100     delete [] pfragCPList[i];
00101   }
00102 #endif
00103   for (i=0; i<fragList.num(); i++) {
00104     delete fragList[i];
00105   }
00106   for (i=0; i<volumeList.num(); i++) {
00107     delete volumeList[i];
00108   }
00109   for (i=0; i<extraflt.num(); i++) {
00110     delete [] extraflt.data(i);
00111   }
00112   for (i=0; i<extraint.num(); i++) {
00113     delete [] extraint.data(i);
00114   }
00115   if (qm_data)
00116     delete qm_data;
00117 }
00118 
00119 
00121 
00122 // initialize the atom list ... should be called before adding any atoms
00123 int BaseMolecule::init_atoms(int n) {
00124 
00125   if(n <= 0) {
00126     msgErr << "BaseMolecule: init_atoms called with invalid number of atoms: "
00127            << n << sendmsg;
00128     return FALSE;
00129   }
00130   if (cur_atom != 0 && nAtoms != n) {
00131     msgErr << "BaseMolecule: attempt to init atoms while structure building in progress!" << sendmsg;
00132     return FALSE;
00133   }
00134 
00135   if (!atomList) {
00136     int i;
00137     // first call to init_atoms
00138     nAtoms = n; // only place where nAtoms is set!
00139     atomList = new MolAtom[nAtoms];
00140     memset(atomList, 0, nAtoms*sizeof(MolAtom));
00141 
00142     // initialize NULL extra data field, which is returned when
00143     // querying a non-existent field with extra*.data("fielddoesntexist")
00144     extraflt.add_name("NULL", NULL);
00145     extraint.add_name("NULL", NULL);
00146 
00147     // initialize "default" extra data fields.
00148     extraflt.add_name("beta", new float[nAtoms]);
00149     extraflt.add_name("occupancy", new float[nAtoms]);
00150     extraflt.add_name("charge", new float[nAtoms]);
00151     extraflt.add_name("mass", new float[nAtoms]);
00152     extraflt.add_name("radius", new float[nAtoms]);
00153     for (i=0; i<extraflt.num(); i++) {
00154       void *data = extraflt.data(i);
00155       if (data != NULL) 
00156         memset(data, 0, nAtoms*sizeof(float));
00157     }
00158 
00159     // initialize "default" extra integer data fields.
00160     for (i=0; i<extraint.num(); i++) {
00161       void *data = extraint.data(i);
00162       if (data != NULL)
00163         memset(data, 0, nAtoms*sizeof(int));
00164     }
00165     return TRUE;
00166   }
00167   if (n != nAtoms) {
00168     msgErr << "The number of atoms in this molecule has already been assigned."
00169            << sendmsg;
00170     return FALSE;
00171   }
00172   return TRUE;
00173 }
00174 
00175 // add a new atom; return it's index, or (-1) if error.
00176 int BaseMolecule::add_atom(char *name, char *atomtype, int atomicnumber, 
00177     char *resname, int resid, const char *chain, const char *segname, 
00178     char *insertion, const char *altloc) {
00179   int nameindex, typeindex;
00180   int resnameindex, segnameindex, altlocindex, chainindex;
00181 
00182   if (!atomList || cur_atom >= nAtoms) {
00183     msgErr << "BaseMolecule: Cannot add new atom; currently " << nAtoms;
00184     msgErr << " atoms in structure." << sendmsg;
00185     return (-1);
00186   }
00187 
00188   // create atom  
00189   MolAtom *newatom = atom(cur_atom);
00190   newatom->init(cur_atom, resid, insertion);
00191 
00192   // add names to namelist, and put indices in MolAtom object
00193   nameindex = atomNames.add_name(name, atomNames.num());
00194   typeindex = atomTypes.add_name(atomtype, atomTypes.num());
00195   resnameindex = resNames.add_name(resname, resNames.num());
00196   segnameindex = segNames.add_name(segname, segNames.num());
00197   altlocindex = altlocNames.add_name(altloc, altlocNames.num());
00198 
00199   // use default of 'X' for chain if not given
00200   if(!chain || ! (*chain) || *chain == ' ')
00201     chainindex = chainNames.add_name("X", chainNames.num());
00202   else
00203     chainindex = chainNames.add_name(chain, chainNames.num());
00204 
00205   // set atom member variables
00206   newatom->nameindex = nameindex;
00207   newatom->typeindex = typeindex;
00208   newatom->atomicnumber = atomicnumber;
00209   newatom->resnameindex = resnameindex;
00210   newatom->segnameindex = segnameindex;
00211   newatom->altlocindex = altlocindex;
00212   newatom->chainindex = chainindex;
00213 
00214   // check for integer overflow/wraparound condition, which can occur
00215   // if an evil plugin defines 100,000 unique atom names, for example
00216   if (newatom->nameindex != nameindex ||
00217       newatom->typeindex != typeindex ||
00218       newatom->atomicnumber != atomicnumber ||
00219       newatom->resnameindex != resnameindex ||
00220       newatom->segnameindex != segnameindex ||
00221       newatom->altlocindex != altlocindex ||
00222       newatom->chainindex != chainindex) {
00223     msgErr << "BaseMolecule: Cannot add atom; namelist index value too large." << sendmsg;
00224     msgErr << "Recompile VMD with larger index types." << sendmsg;
00225     msgErr << "Atom namelist index values at time of overflow:" << sendmsg;
00226     msgErr << "  nameindex: " << nameindex << sendmsg;;
00227     msgErr << "  typeindex: " << typeindex << sendmsg;;
00228     msgErr << "  resnameindex: " << resnameindex << sendmsg;;
00229     msgErr << "  segnameindex: " << segnameindex << sendmsg;;
00230     msgErr << "  altlocindex: " << altlocindex << sendmsg;;
00231     msgErr << "  chainindex: " << chainindex << sendmsg;
00232     return -1;
00233   }
00234 
00235   return cur_atom++;
00236 }
00237 
00238 
00239 // add a new bond; return 0 on success, or (-1) if error.
00240 int BaseMolecule::add_bond(int a, int b, float bondorder, 
00241                            int bondtype, int backbonetype) {
00242   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00243     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00244     return (-1);
00245   } 
00246 
00247   if (a == b) {
00248     msgErr << "BaseMolecule: Cannot bond atom " <<a<< " to itself." << sendmsg;
00249     return (-1);
00250   }
00251 
00252   // put the bond in the atom list
00253   if (atom(a)->add_bond(b, backbonetype)) {
00254     if (bonderrorcount < MAXBONDERRORS) {
00255       if (lastbonderratomid != a) {
00256         msgErr << "MolAtom " << a << ": Exceeded maximum number of bonds ("
00257                << atom(a)->bonds << ")." << sendmsg;
00258         lastbonderratomid=a;
00259         bonderrorcount++;
00260       }
00261     } else if (bonderrorcount == MAXBONDERRORS) {
00262       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00263       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00264       bonderrorcount++;
00265     }
00266     return (-1);
00267   }
00268 
00269   if (atom(b)->add_bond(a, backbonetype)) {
00270     if (bonderrorcount < MAXBONDERRORS) {
00271       if (lastbonderratomid != b) {
00272         msgErr << "MolAtom " << b << ": Exceeded maximum number of bonds ("
00273                << atom(b)->bonds << ")." << sendmsg;
00274         lastbonderratomid=b;
00275         bonderrorcount++;
00276       }
00277     } else if (bonderrorcount == MAXBONDERRORS) {
00278       msgErr << "BaseMolecule: Excessive bonding errors encountered, perhaps atom coordinates are in the wrong units?" << sendmsg;
00279       msgErr << "BaseMolecule: Silencing bonding error messages." << sendmsg;
00280       bonderrorcount++;
00281     }
00282     return (-1);
00283   }
00284 
00285   // store bond orders and types
00286   setbondorder(a, atom(a)->bonds-1, bondorder);
00287   setbondorder(b, atom(b)->bonds-1, bondorder);
00288 
00289   setbondtype(a, atom(a)->bonds-1, bondtype);
00290   setbondtype(b, atom(b)->bonds-1, bondtype);
00291 
00292   return 0;
00293 }
00294 
00295 // Add a bond to a structure but check to make sure there isn't a 
00296 // duplicate, as may be the case when merging bondlists from a file 
00297 // and from a distance-based bond search
00298 int BaseMolecule::add_bond_dupcheck(int a, int b, float bondorder, int bondtype) {
00299   int i;
00300 
00301   if (!nAtoms || a >= nAtoms || b >= nAtoms) {
00302     msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
00303     return (-1);
00304   }
00305 
00306   MolAtom *atm = atom(a);
00307   int nbonds = atm->bonds;
00308   const int *bonds = &atm->bondTo[0];
00309   for (i=0; i<nbonds; i++) {
00310     if (bonds[i] == b) {
00311       return 0; // skip bond that already exists
00312     }
00313   }
00314   add_bond(a, b, bondorder, bondtype); // add it if it doesn't already exist
00315 
00316   return 0;
00317 }
00318 
00319 int BaseMolecule::add_angle(int a, int b, int c, int type) {
00320   int i,n;
00321   // make sure that a < c to make it easier to find duplicates later.
00322   if (a > c) { i = a; a = c; c = i; }
00323   
00324   angles.append(a); 
00325   angles.append(b); 
00326   angles.append(c);
00327 
00328   n = num_angles()-1;
00329   set_angletype(n, type);
00330   return n;
00331 }
00332 
00333 
00334 int BaseMolecule::set_angletype(int nangle, int type)
00335 {
00336   int i;
00337 
00338   // type -1 is the anonymous type and is handled special w/o storage.
00339   // we also bail out in case we get an illegal angle index.
00340   if ((type < 0) || (nangle >= num_angles()))
00341     return -1;
00342 
00343   // fill array if not there
00344   if (angleTypes.num() <= nangle) {
00345     set_dataset_flag(BaseMolecule::ANGLETYPES);
00346     for (i=angleTypes.num(); i < num_angles(); i++)
00347       angleTypes.append(-1);
00348   }
00349   
00350   angleTypes[nangle] = type;
00351   return type;
00352 }
00353 
00354 int BaseMolecule::get_angletype(int nangle)
00355 {
00356   if ((nangle < 0) || (nangle >= angleTypes.num()))
00357     return -1;
00358 
00359   return angleTypes[nangle];
00360 }
00361 
00362 
00363 int BaseMolecule::add_dihedral(int a, int b, int c, int d, int type) {
00364   int i, j, n;
00365   // make sure that b < c so that it is easier to find duplicates later.
00366   if (b > c) { i = a; j = b ; a = d ; b = c; d = i; c = j; }
00367   
00368   dihedrals.append(a); 
00369   dihedrals.append(b); 
00370   dihedrals.append(c);
00371   dihedrals.append(d);
00372 
00373   n = num_dihedrals()-1;
00374   set_dihedraltype(n, type);
00375   return n;
00376 }
00377 
00378 
00379 int BaseMolecule::set_dihedraltype(int ndihedral, int type)
00380 {
00381   int i;
00382   
00383   // type -1 is the anonymous type and is handled special w/o storage.
00384   // we also bail out in case we get an illegal dihedral index.
00385   if ((type < 0) || (ndihedral >= num_dihedrals()))
00386     return -1;
00387 
00388   // fill array if not there
00389   if (dihedralTypes.num() <= ndihedral) {
00390     set_dataset_flag(BaseMolecule::ANGLETYPES);
00391     for (i=dihedralTypes.num(); i < num_dihedrals(); i++)
00392       dihedralTypes.append(-1);
00393   }
00394   
00395   dihedralTypes[ndihedral] = type;
00396   return type;
00397 }
00398 
00399 int BaseMolecule::get_dihedraltype(int ndihedral)
00400 {
00401   if ((ndihedral < 0) || (ndihedral >= dihedralTypes.num()))
00402     return -1;
00403 
00404   return dihedralTypes[ndihedral];
00405 }
00406 
00407 int BaseMolecule::add_improper(int a, int b, int c, int d, int type) {
00408   int i, j, n;
00409   // make sure that b < c so that it is easier to find duplicates later.
00410   if (b > c) { i = a; j = b ; a = d ; b = c; d = i; c = j; }
00411   
00412   impropers.append(a); 
00413   impropers.append(b); 
00414   impropers.append(c);
00415   impropers.append(d);
00416 
00417   n = num_impropers()-1;
00418   set_impropertype(n, type);
00419   return n;
00420 }
00421 
00422 
00423 int BaseMolecule::set_impropertype(int nimproper, int type)
00424 {
00425   int i;
00426   
00427   // type -1 is the anonymous type and is handled special w/o storage.
00428   // we also bail out in case we get an illegal improper index.
00429   if ((type < 0) || (nimproper >= num_impropers()))
00430     return -1;
00431 
00432   // fill array if not there
00433   if (improperTypes.num() <= nimproper) {
00434     set_dataset_flag(BaseMolecule::ANGLETYPES);
00435     for (i=improperTypes.num(); i < num_impropers(); i++)
00436       improperTypes.append(-1);
00437   }
00438   
00439   improperTypes[nimproper] = type;
00440   return type;
00441 }
00442 
00443 int BaseMolecule::get_impropertype(int nimproper)
00444 {
00445   if ((nimproper < 0) || (nimproper >= improperTypes.num()))
00446     return -1;
00447 
00448   return improperTypes[nimproper];
00449 }
00450 
00452 
00453 void BaseMolecule::setbondorder(int atom, int bond, float order) {
00454   float *bondOrders = extraflt.data("bondorders");
00455 
00456   // if not already there, add it
00457   if (bondOrders == NULL) {
00458     if (order != 1) {
00459       int i;
00460       extraflt.add_name("bondorders", new float[nAtoms*MAXATOMBONDS]);
00461       bondOrders = extraflt.data("bondorders");
00462       for (i=0; i<nAtoms*MAXATOMBONDS; i++)
00463         bondOrders[i] = 1.0f;    
00464 
00465       bondOrders[atom * MAXATOMBONDS + bond] = order;
00466     } 
00467     return;
00468   }
00469 
00470   bondOrders[atom * MAXATOMBONDS + bond] = order;
00471 }
00472 
00473 float BaseMolecule::getbondorder(int atom, int bond) {
00474   float *bondOrders = extraflt.data("bondorders");
00475 
00476   // if not already there, add it
00477   if (bondOrders == NULL) { 
00478     return 1;
00479   }
00480    
00481   return bondOrders[atom * MAXATOMBONDS + bond];
00482 }
00483 
00484 
00485 void BaseMolecule::setbondtype(int atom, int bond, int type) {
00486   int *bondTypes = extraint.data("bondtypes");
00487 
00488   // if not already there, add it
00489   if (bondTypes == NULL) {
00490     if (type != -1) {
00491       int i;
00492       extraint.add_name("bondtypes", new int[nAtoms*MAXATOMBONDS]);
00493       bondTypes = extraint.data("bondtypes");
00494       for (i=0; i<nAtoms*MAXATOMBONDS; i++)
00495         bondTypes[i] = -1;    
00496 
00497       bondTypes[atom * MAXATOMBONDS + bond] = type;
00498     } 
00499     return;
00500   }
00501 
00502   bondTypes[atom * MAXATOMBONDS + bond] = type;
00503 }
00504 
00505 int BaseMolecule::getbondtype(int atom, int bond) {
00506   int *bondTypes = extraint.data("bondtypes");
00507 
00508   // if not already there, add it
00509   if (bondTypes == NULL) { 
00510     return -1;
00511   }
00512    
00513   return bondTypes[atom * MAXATOMBONDS + bond];
00514 }
00515 
00516 
00517 // return the Nth residue
00518 Residue *BaseMolecule::residue(int n) {
00519   return residueList[n];
00520 }
00521 
00522 
00523 // return the Nth fragment
00524 Fragment *BaseMolecule::fragment(int n) {
00525   return fragList[n];
00526 }
00527 
00528 
00529 // given an atom index, return the residue object for the residue it
00530 // is in.  If it is not in a residue, return NULL.
00531 Residue *BaseMolecule::atom_residue(int n) {
00532   MolAtom *atm = atom(n);
00533   if(atm->uniq_resid < 0)
00534     return NULL;
00535   else
00536     return residue(atm->uniq_resid);
00537 }
00538 
00539 
00540 // given an atom index, return the fragment object for the fragment it
00541 // is in.  If it is not in a fragment, return NULL.
00542 Fragment *BaseMolecule::atom_fragment(int n) {
00543   MolAtom *atm = atom(n);
00544   int frag = residue(atm->uniq_resid)->fragment;
00545   if(frag < 0)
00546     return NULL;
00547   else
00548     return fragment(frag);
00549 }
00550 
00551 // return a 'default' value for a given atom name
00552 float BaseMolecule::default_radius(char *nm) {
00553   float val = 1.5;
00554   // some names start with a number
00555   while (*nm && isdigit(*nm))
00556     nm++;
00557   if(nm) {
00558     switch(toupper(nm[0])) {
00559       // These are similar to the values used by X-PLOR with sigma=0.8
00560       // see page 50 of the X-PLOR 3.1 manual
00561       case 'H' : val = 1.00f; break;
00562       case 'C' : val = 1.50f; break;
00563       case 'N' : val = 1.40f; break;
00564       case 'O' : val = 1.30f; break;
00565       case 'F' : val = 1.20f; break;
00566       case 'S' : val = 1.90f; break;
00567     }
00568   }
00569 
00570   return val;
00571 }
00572 
00573 
00574 // return a 'default' value for a given atom name
00575 float BaseMolecule::default_mass(char *nm) {
00576   float val = 12.0;
00577 
00578   // some names start with a number
00579   while (*nm && isdigit(*nm)) nm++;
00580   if(nm) {
00581     switch(toupper(nm[0])) {
00582       case 'H' : val = 1.00800f; break;
00583       case 'C' : val = 12.01100f; break;
00584       case 'N' : val = 14.00700f; break;
00585       case 'O' : val = 15.99900f; break;
00586       case 'F' : val = 55.84700f; break;
00587       case 'P' : val = 30.97376f; break;
00588       case 'S' : val = 32.06000f; break;
00589     }
00590     if      (toupper(nm[0] == 'C') && toupper(nm[1] == 'L')) val = 35.453f;
00591     else if (toupper(nm[0] == 'N') && toupper(nm[1] == 'A')) val = 22.989770f;
00592     else if (toupper(nm[0] == 'M') && toupper(nm[1] == 'G')) val = 24.3050f;
00593   }
00594 
00595   return val;
00596 }
00597 
00598 
00599 float BaseMolecule::default_charge(char *) {
00600   // return 0 for everything; later, when we put in a more reliable
00601   // system for determining the charge that's user-configurable,
00602   // we can start assigning more realistic charges.
00603   return 0.0f;
00604 }
00605 
00606 
00607 // count the number of unique bonds in the structure
00608 int BaseMolecule::count_bonds(void) {
00609   int i, j;
00610   int count=0;
00611 
00612   for (i=0; i<nAtoms; i++) {
00613     int nbonds = atomList[i].bonds;
00614     const int *bonds = &atomList[i].bondTo[0];
00615 
00616     for (j=0; j<nbonds; j++) {
00617       if (bonds[j] > i)
00618         count++;
00619     }
00620   }
00621 
00622   return count;
00623 }
00624 
00625 
00626 void BaseMolecule::clear_bonds(void) {
00627   int i;
00628   for (i=0; i<nAtoms; i++)
00629     atomList[i].bonds = 0;
00630 }
00631 
00632 
00633 // analyze the molecule for more than just the atom/bond information
00634 // This is here since it is called _after_ the molecule is added to
00635 // the MoleculeList.  Thus, there is a Tcl callback to allow the
00636 // user to update the bond information (or other fields?) before
00637 // the actual search.
00638 void BaseMolecule::analyze(void) {
00639   need_find_bonds = 0; // at this point it's too late
00640 
00641   // I have to let 0 atoms in because I want to be able to read things
00642   // like electron density maps, which have no atoms.
00643   // It is kinda wierd, then to make BaseMolecule be at the top of the
00644   // heirarchy.  Oh well.
00645   if(nAtoms < 1)
00646     return;
00647 
00648   // call routines to find different characteristics of the molecule
00649   msgInfo << "Analyzing structure ..." << sendmsg;
00650   msgInfo << "   Atoms: " << nAtoms << sendmsg;
00651 
00652   // count unique bonds/angles/dihedrals/impropers/cterms
00653   msgInfo << "   Bonds: " << count_bonds() << sendmsg;
00654   msgInfo << "   Angles: " << num_angles()
00655           << "  Dihedrals: " << num_dihedrals()
00656           << "  Impropers: " << num_impropers()
00657           << "  Cross-terms: " << num_cterms()
00658           << sendmsg;
00659 
00660   msgInfo << "   Bondtypes: " << bondTypeNames.num()
00661           << "  Angletypes: " << angleTypeNames.num()
00662           << "  Dihedraltypes: " << dihedralTypeNames.num()
00663           << "  Impropertypes: " << improperTypeNames.num()
00664           << sendmsg;
00665 
00666   // restore residue and fragment lists to pristine state
00667   residueList.clear();
00668   fragList.clear();
00669   pfragList.clear();   
00670   pfragCyclic.clear(); 
00671 #if defined(VMDFASTRIBBONS)
00672   pfragCPList.clear(); 
00673 #endif
00674   nfragList.clear();   
00675   nfragCyclic.clear(); 
00676 #if defined(VMDFASTRIBBONS)
00677   nfragCPList.clear(); 
00678 #endif
00679 
00680   // assign per-atom backbone types
00681   find_backbone();
00682 
00683   // find all the atoms in a resid connected to DNA/RNA/PROTEIN/WATER
00684   // also, assign a unique resid (uniq_resid) to each atom
00685   nResidues = find_residues();
00686   msgInfo << "   Residues: " << nResidues << sendmsg;
00687 
00688   nWaters = find_waters();
00689   msgInfo << "   Waters: " << nWaters << sendmsg;
00690   
00691   // determine which residues are connected to each other
00692   bonderrorcount=0; // reset error count before residue connectivity search
00693   find_connected_residues(nResidues); 
00694 
00695  
00696   nSegments = find_segments(); 
00697   msgInfo << "   Segments: " << nSegments << sendmsg;
00698 
00699   nFragments = find_fragments();
00700   msgInfo << "   Fragments: " << nFragments;
00701 
00702   nProteinFragments = pfragList.num();
00703   msgInfo << "   Protein: " << nProteinFragments;
00704 
00705   nNucleicFragments = nfragList.num();
00706   msgInfo << "   Nucleic: " << nNucleicFragments << sendmsg;
00707   
00708   // NOTE: The current procedure incorrectly identifies some lipid 
00709   // atoms as "ATOMNUCLEICBACK" (but not as "nucleic") as well
00710   // as some single water oxygens as "backbone". Here, we 
00711   // correct this by setting all atoms of non-polymeric residue types
00712   // to be "ATOMNORMAL" (i.e.: not backbone).
00713   MolAtom *a;
00714   int i;
00715   for (i=0; i<nAtoms; i++) {
00716     a = atom(i); 
00717     if ((a->residueType != RESNUCLEIC) && (a->residueType != RESPROTEIN)) 
00718       a->atomType = ATOMNORMAL;
00719   }
00720 
00721   // Search for hydrogens
00722   // XXX Must be done after the rest of the structure finding routines,
00723   // because those routines assume that anything that isn't NORMAL is
00724   // a backbone atom.
00725   // We use the name-based definition used in the IS_HYDROGEN macro
00726   for (i=0; i<nAtoms; i++) {
00727     MolAtom *a = atom(i);
00728     const char *aname = atomNames.name(a->nameindex);
00729     if (aname != NULL && IS_HYDROGEN(aname))
00730       a->atomType = ATOMHYDROGEN;
00731   }
00732 
00733 #if defined(VMDFASTRIBBONS)
00734   calculate_ribbon_controlpoints();
00735 #endif
00736   
00737 }
00738 
00739 
00741 int BaseMolecule::find_backbone(void) {
00742   int i, j, k;
00743 
00744   // Search for the protein backbone
00745   int protypes[4];
00746   protypes[0] = atomNames.typecode((char *) "CA");
00747   protypes[1] = atomNames.typecode((char *) "C");
00748   protypes[2] = atomNames.typecode((char *) "O");
00749   protypes[3] = atomNames.typecode((char *) "N");
00750 
00751   // special case for terminal oxygens that miss the search for O
00752   // by looking for ones connected to a C
00753   int termtypes[5];
00754   termtypes[0] = atomNames.typecode((char *) "OT1"); // standard PDB names
00755   termtypes[1] = atomNames.typecode((char *) "OT2");
00756   termtypes[2] = atomNames.typecode((char *) "OXT"); // synonym for OT2
00757   termtypes[3] = atomNames.typecode((char *) "O1");  // Gromacs force field 
00758   termtypes[4] = atomNames.typecode((char *) "O2");  // atom names
00759 
00760   // search for the DNA/RNA backbone;  the atom names are:
00761   // for the phosphate:  P, O1P, O2P, OP1, OP2
00762   // for the rest: O3', C3', C4', C5', O5'
00763   // (or O3*, C3*, C4*, C5*, O5*)
00764   int nuctypes[15];
00765   nuctypes[ 0] = atomNames.typecode((char *) "P");
00766   nuctypes[ 1] = atomNames.typecode((char *) "O1P"); // old PDB files
00767   nuctypes[ 2] = atomNames.typecode((char *) "O2P"); // old PDB files
00768   nuctypes[ 3] = atomNames.typecode((char *) "OP1"); // new PDB files
00769   nuctypes[ 4] = atomNames.typecode((char *) "OP2"); // new PDB files
00770   nuctypes[ 5] = atomNames.typecode((char *) "C3*");
00771   nuctypes[ 6] = atomNames.typecode((char *) "C3'");
00772   nuctypes[ 7] = atomNames.typecode((char *) "O3*");
00773   nuctypes[ 8] = atomNames.typecode((char *) "O3'");
00774   nuctypes[ 9] = atomNames.typecode((char *) "C4*");
00775   nuctypes[10] = atomNames.typecode((char *) "C4'");
00776   nuctypes[11] = atomNames.typecode((char *) "C5*");
00777   nuctypes[12] = atomNames.typecode((char *) "C5'");
00778   nuctypes[13] = atomNames.typecode((char *) "O5*");
00779   nuctypes[14] = atomNames.typecode((char *) "O5'");
00780 
00781 #if 0
00782   // non-backbone nucleic acid atom names
00783   nuctypes[  ] = atomNames.typecode((char *) "C1*");
00784   nuctypes[  ] = atomNames.typecode((char *) "C1'");
00785   nuctypes[  ] = atomNames.typecode((char *) "C2*");
00786   nuctypes[  ] = atomNames.typecode((char *) "C2'");
00787   nuctypes[  ] = atomNames.typecode((char *) "O2*");
00788   nuctypes[  ] = atomNames.typecode((char *) "O2'");
00789   nuctypes[  ] = atomNames.typecode((char *) "O4*");
00790   nuctypes[  ] = atomNames.typecode((char *) "O4'");
00791 #endif
00792 
00793   // special case for terminal nucleic residues
00794   int nuctermtypes[2];
00795   nuctermtypes[0] = atomNames.typecode((char *) "H5T"); // standard names
00796   nuctermtypes[1] = atomNames.typecode((char *) "H3T");
00797 
00798 
00799   // loop over all atoms assigning atom backbone type flags
00800   for (i=0; i<nAtoms; i++) {
00801     MolAtom *a = atom(i);
00802  
00803     // initialize atom type to non-backbone
00804     a->atomType = ATOMNORMAL;
00805 
00806     // check for protein backbone atom names
00807     for (j=0; j < 4; j++) {
00808       if (a->nameindex == protypes[j]) {
00809         a->atomType = ATOMPROTEINBACK;
00810         break;
00811       }
00812     }
00813 
00814     // check terminal residue names as well
00815     for (j=0; j < 4; j++) {
00816       if (a->nameindex == termtypes[j]) { // check if OT1, OT2
00817         for (k=0; k < a->bonds; k++) {
00818           if (atom(a->bondTo[k])->atomType == ATOMPROTEINBACK) {
00819             a->atomType = ATOMPROTEINBACK;
00820             break;
00821           }
00822         }
00823       }
00824     }
00825   
00826     // check if in nucleic backbone, if not already set
00827     if (!(a->atomType)) {
00828       for (j=0; j < 15; j++) {
00829         if (a->nameindex == nuctypes[j]) {
00830           a->atomType = ATOMNUCLEICBACK;
00831           break;
00832         }
00833       }
00834     }
00835 
00836     // check if nucleic terminal atom names
00837     for (j=0; j < 2; j++) {
00838       if (a->nameindex == nuctermtypes[j]) {
00839         for (k=0; k < a->bonds; k++) {
00840           if (atom(a->bondTo[k])->atomType == ATOMNUCLEICBACK) {
00841             a->atomType = ATOMNUCLEICBACK;
00842             break;
00843           }
00844         }
00845       }
00846     }
00847   }
00848 
00849   return 0; 
00850 }
00851 
00852 
00853 
00854 // find water molecules based on the residue name
00855 // from the documentation for molscript, these are possible
00856 // waters:
00857 // type H2O HH0 OHH HOH OH2 SOL WAT
00858 // as well, I add TIP, TIP2, TIP3, and TIP4
00859 // The count is the number of sets of connected RESWATERS
00860 int BaseMolecule::find_waters(void) {
00861   int i, j;
00862   MolAtom *a;
00863 
00864   int watertypes[12];
00865   watertypes[0] = resNames.typecode((char *) "H2O");
00866   watertypes[1] = resNames.typecode((char *) "HH0");
00867   watertypes[2] = resNames.typecode((char *) "OHH");
00868   watertypes[3] = resNames.typecode((char *) "HOH");
00869   watertypes[4] = resNames.typecode((char *) "OH2");
00870   watertypes[5] = resNames.typecode((char *) "SOL");
00871   watertypes[6] = resNames.typecode((char *) "WAT");
00872   watertypes[7] = resNames.typecode((char *) "TIP");
00873   watertypes[8] = resNames.typecode((char *) "TIP2");
00874   watertypes[9] = resNames.typecode((char *) "TIP3");
00875   watertypes[10] = resNames.typecode((char *) "TIP4");
00876   // this conflicts with a PDB compound:
00877   //   http://minerva.roca.csic.es/hicup/SPC/spc_pdb.txt
00878   //
00879   // XXX we should add a check to make sure that there are only 
00880   //     three atoms in the residue when all is said and done. If there
00881   //     are more, then we should undo the assignment as a water and mark it
00882   //     as protein etc as appropriate.  This is tricky since at this stage
00883   //     we haven't done any connectivity tests etc, and we're only working
00884   //     with individual atoms.  Perhaps its time to re-think this logic.
00885   watertypes[11] = resNames.typecode((char *) "SPC");
00886 
00887   for (i=0; i<nAtoms; i++) {
00888     a = atom(i);
00889     if (a->residueType == RESNOTHING) {  // make sure it isn't named yet
00890       for (j=0; j<12; j++) {
00891         if (watertypes[j] == a->resnameindex) {
00892           a->residueType = RESWATERS;
00893           break;
00894         }
00895       }
00896     }
00897   }
00898  
00899   int count = find_connected_waters2();
00900 
00901   return count;   
00902 }
00903 
00904 
00905 // if this is a RESWATERS with index idx, mark it and find if
00906 // any of its neighbors are RESWATERS
00907 // this does a depth-first search with RECURSION.
00908 void BaseMolecule::find_connected_waters(int i, char *tmp) {
00909   MolAtom *a = atom(i);
00910   int j;
00911   if (a->residueType == RESWATERS && !tmp[i]) {
00912     tmp[i] = TRUE;
00913     for (j=0; j<a->bonds; j++) {
00914       find_connected_waters(a->bondTo[j], tmp);
00915     }
00916   }
00917 }
00918 
00919 
00920 // if this is a RESWATERS with index idx, mark it and find if
00921 // any of its neighbors are RESWATERS
00922 int BaseMolecule::find_connected_waters2(void) {
00923   MolAtom *a;
00924   int count, i;
00925   IntStackHandle s;
00926 
00927   char *tmp = new char[nAtoms];
00928   memset(tmp, 0, nAtoms * sizeof(char));
00929 
00930   s = intstack_create(nAtoms);
00931 
00932   for (count=0, i=0; i<nAtoms; i++) {
00933     if (atom(i)->residueType == RESWATERS && !tmp[i]) {
00934       int nextatom;
00935 
00936       count++;
00937       intstack_push(s, i);
00938     
00939       // find and mark all connected waters 
00940       while (!intstack_pop(s, &nextatom)) { 
00941         int j;
00942 
00943         a = atom(nextatom);
00944         tmp[nextatom] = TRUE;
00945 
00946         for (j=a->bonds - 1; j>=0; j--) {
00947           int bi = a->bondTo[j];
00948           MolAtom *b = atom(bi);
00949           if (b->residueType == RESWATERS && !tmp[bi])
00950             intstack_push(s, bi);
00951         }
00952       }
00953     }
00954   }
00955 
00956   intstack_destroy(s);
00957   delete [] tmp;
00958 
00959   return count;
00960 }
00961 
00962 
00963 // find n backbone atoms connected together with the given residueid
00964 // return the total count
00965 // this assumes that the given atom (atomidx) is correct
00966 int BaseMolecule::find_connected_backbone(IntStackHandle s, int backbone,
00967                          int atomidx, int residueid, int tmpid, int *flgs) {
00968   if (flgs[atomidx] != 0)
00969     return 0; // already done
00970 
00971   MolAtom *x = atom(atomidx);
00972   if (x->atomType != backbone || x->resid != residueid)
00973     return 0; // not a backbone atom, or resid doesn't match
00974 
00975   intstack_popall(s); // just in case
00976   intstack_push(s, atomidx);
00977   int nextatom;
00978   int count = 0;
00979    
00980   // find and mark connected backbone atoms
00981   while (!intstack_pop(s, &nextatom)) {
00982     MolAtom *a = atom(nextatom);
00983     flgs[nextatom] = tmpid;
00984     count++;
00985 
00986     int j;
00987     for (j=a->bonds - 1; j>=0; j--) {
00988       int bi = a->bondTo[j];
00989       if (flgs[bi] == 0) {
00990         MolAtom *b = atom(bi);
00991 
00992         // skip connections to atoms on different chains/segnames
00993         if (a->chainindex != b->chainindex || 
00994             a->segnameindex != b->segnameindex)
00995           continue;
00996 
00997         if (b->atomType == backbone && b->resid == residueid)
00998           intstack_push(s, bi);
00999       }
01000     }
01001   }
01002 
01003   return count;
01004 }
01005 
01006 
01007 // the find_connected_backbone left terms of flgs which need to be cleaned up
01008 void BaseMolecule::clean_up_connection(IntStackHandle s, int i, int tmpid, int *flgs) {
01009   if (flgs[i] != tmpid)  // been here before
01010     return;
01011 
01012   intstack_popall(s); // just in case
01013   intstack_push(s, i);
01014   int nextatom;
01015  
01016   // find and null out non-matching atom flags
01017   while (!intstack_pop(s, &nextatom)) {
01018     flgs[nextatom] = 0;
01019     MolAtom *a = atom(nextatom);
01020     int j;
01021     for (j=a->bonds - 1; j>=0; j--) {
01022       int bi = a->bondTo[j];
01023       if (flgs[bi] == tmpid) {
01024         intstack_push(s, bi);
01025       }
01026     }
01027   }
01028 }
01029 
01030 
01031 
01032 // now that I know this resid is okay, mark it so
01033 void BaseMolecule::find_connected_atoms_in_resid(IntStackHandle s,
01034     int restype, int i, int residueid, int tmpid, int *flgs)
01035 {
01036   if (flgs[i] != 0 || atom(i)->resid != residueid)
01037     return;
01038 
01039   intstack_popall(s); // just in case
01040   intstack_push(s, i);
01041   int nextatom;
01042 
01043   // find and mark all connected residues in the same chain/segname
01044   while (!intstack_pop(s, &nextatom)) {
01045     flgs[nextatom] = tmpid;
01046     MolAtom *a = atom(nextatom);
01047     a->residueType = restype;
01048 
01049     int j;
01050     for (j=a->bonds - 1; j>=0; j--) {
01051       int bi = a->bondTo[j];
01052       MolAtom *b = atom(bi);
01053       if (flgs[bi] == 0 &&
01054           a->chainindex == b->chainindex &&
01055           a->segnameindex == b->segnameindex &&
01056           b->resid == residueid) {
01057         intstack_push(s, bi);
01058       }
01059     }
01060   }
01061 }
01062 
01063 
01064 
01065 // Find connected backbone atoms with the same resid
01066 // if found, find all the atoms with the same resid
01067 // which are connected to those backbone atoms only through
01068 // atoms of the same resid
01069 void BaseMolecule::find_and_mark(int n, int backbone,
01070   int restype, int *tmpid, int *flgs)
01071 {
01072   int i;
01073   MolAtom *a;
01074   int residueid; // the real resid
01075   IntStackHandle s = intstack_create(nAtoms);
01076 
01077   for (i=0; i<nAtoms; i++) {
01078     a = atom(i);   // look for a new backbone atom
01079     if (a->atomType == backbone && flgs[i] == 0) {
01080       residueid = a->resid;
01081       if (find_connected_backbone(s, backbone, i, residueid, *tmpid, flgs) >= n) {
01082         // if find was successful, start all over again
01083         clean_up_connection(s, i, *tmpid, flgs);
01084         // but mark all the Atoms connected to here
01085         find_connected_atoms_in_resid(s, restype, i, residueid, *tmpid, flgs);
01086         // and one more was made
01087         (*tmpid)++;
01088       } else {
01089         // clean things up so I won't have problems later
01090         clean_up_connection(s, i, *tmpid, flgs);
01091       }
01092     }
01093   }
01094 
01095   intstack_destroy(s);
01096 }
01097 
01098 
01099 
01100 // assign a uniq resid (uniq_resid) to each set of connected atoms
01101 // with the same residue id.  There could be many residues with the
01102 // same id, but not connected (the SSN problem - SSNs are not unique
01103 // so don't use them as the primary key)
01104 int BaseMolecule::make_uniq_resids(int *flgs) {
01105   int i;
01106   int num_residues = 0;
01107   IntStackHandle s = intstack_create(nAtoms);
01108 
01109   for (i=0; i<nAtoms; i++) {
01110     if (!flgs[i]) {  // not been numbered
01111       // find connected atoms to i with the same resid and label
01112       // it with the uniq_resid
01113       MolAtom *a = atom(i);
01114       int resid = a->resid;
01115       char *insertion = a->insertionstr;
01116 
01117       intstack_push(s, i);
01118       int nextatom;
01119 
01120       // Loop over all atoms we're bonded to in the same chain/segname
01121       while (!intstack_pop(s, &nextatom)) {
01122         MolAtom *a = atom(nextatom);
01123         a->uniq_resid = num_residues;  // give it the new resid number
01124         flgs[nextatom] = TRUE;         // mark this atom done
01125   
01126         int j;
01127         for (j=a->bonds - 1; j>=0; j--) {
01128           int bi = a->bondTo[j];
01129           if (flgs[bi] == 0) {
01130             MolAtom *b = atom(bi);
01131             if (a->chainindex == b->chainindex && 
01132                 a->segnameindex == b->segnameindex &&
01133                 b->resid == resid && !strcmp(b->insertionstr, insertion))
01134               intstack_push(s, bi);
01135           }
01136         }
01137       }
01138 
01139       num_residues++;
01140     }
01141   }
01142 
01143   intstack_destroy(s);
01144 
01145   return num_residues;
01146 }
01147 
01148 
01149 
01150 int BaseMolecule::find_residues(void) {
01151   int *flgs = new int[nAtoms]; // flags used for connected atom searches
01152   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
01153   
01154   // assign a uniq resid (uniq_resid) to each set of connected atoms
01155   // with the same residue id.  There could be many residues with the
01156   // same id, but not connected (the SSN problem - SSNs are not unique
01157   // so don't use them as the primary key)
01158   int num_residues = make_uniq_resids(flgs);
01159    
01160   int back_res_count = 1; // tmp count of number of residues on some backbone
01161   memset(flgs, 0, nAtoms * sizeof(int)); // clear flags array
01162   
01163   //  hunt for the proteins
01164   // there must be 4 PROTEINBACK atoms connected and with the same resid
01165   // then all connected atoms will be marked as PROTEIN atoms
01166   // this gets everything except the terminals
01167   find_and_mark(4, ATOMPROTEINBACK, RESPROTEIN, &back_res_count, flgs);
01168   
01169   // do the same for nucleic acids
01170   // XXX we might not want to check for the phosphate (P and 2 O's).  Here's
01171   // the quick way I can almost do that.  Unfortionately, that
01172   // messes up nfragList, since it needs a P to detect an end
01173   find_and_mark(4, ATOMNUCLEICBACK, RESNUCLEIC, &back_res_count, flgs);
01174   
01175   delete [] flgs;
01176   return num_residues;
01177 }
01178 
01179 int BaseMolecule::find_atom_in_residue(const char *name, int residue) {
01180   int nametype = atomNames.typecode(name);
01181   if (nametype < 0)
01182     return -2;
01183 
01184   return find_atom_in_residue(nametype, residue);
01185 }
01186 
01187 
01188 // find which residues are connected to which
01189 // remember, I already have the uniq_id for each atom
01190 void BaseMolecule::find_connected_residues(int num_residues) {
01191   int i, j;
01192   for (i=0; i<num_residues; i++) {   // init the list to NULLs
01193     residueList.append(NULL);
01194   }
01195   
01196   for (i=nAtoms-1; i>=0; i--) {      // go through all the atoms
01197     j = atom(i)->uniq_resid;
01198     if (residueList[j] == NULL) {    // then init the residue
01199       residueList[j] = new Residue(atom(i)->resid, atom(i)->residueType);
01200     }
01201     // Tell the residue that this atom is in it
01202     residueList[j]->add_atom(i);
01203   }
01204 
01205   // double check that everything was created
01206   for (i=0; i<num_residues; i++) {
01207     if (residueList[i] == NULL) { // no atom was found for this residue
01208       msgErr << "Mysterious residue creation in ";
01209       msgErr << "BaseMolecule::find_connected_residues." << sendmsg;
01210       residueList[i] = new Residue((int) -1, RESNOTHING);
01211     }
01212   }
01213 
01214   // finally, check for unusual connections between residues, e.g. between
01215   // protein and water.
01216   for (i=0; i<num_residues; i++) {
01217     Residue *res = residueList[i];
01218     int bondfromtype = res->residueType;
01219     int numatoms = res->atoms.num();
01220     for (j=0; j<numatoms; j++) {
01221       MolAtom *a = atom(res->atoms[j]);
01222 
01223       // find off-residue bonds to residues of the same chain/segname
01224       int k;
01225       for (k=0; k<a->bonds; k++) {
01226         MolAtom *b = atom(a->bondTo[k]);
01227 
01228         // skip connections to atoms on different chains/segnames
01229         if (a->chainindex != b->chainindex || 
01230             a->segnameindex != b->segnameindex)
01231           continue;
01232          
01233         if (b->uniq_resid != i) {
01234           int bondtotype = residueList[b->uniq_resid]->residueType;
01235 
01236           if (bondfromtype != bondtotype) {
01237             if (i < b->uniq_resid) { // so that we only warn once
01238               msgWarn << "Unusual bond between residues:  ";
01239               msgWarn << residueList[i]->resid;
01240               switch (bondfromtype) {
01241                 case RESPROTEIN: msgWarn << " (protein)"; break;
01242                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01243                 case RESWATERS:  msgWarn << " (waters)"; break;
01244                 default:
01245                 case RESNOTHING: msgWarn << " (none)"; break;
01246               }
01247               msgWarn << " and ";
01248               msgWarn << residueList[b->uniq_resid]->resid;
01249               switch (bondtotype) {
01250                 case RESPROTEIN: msgWarn << " (protein)"; break;
01251                 case RESNUCLEIC: msgWarn << " (nucleic)"; break;
01252                 case RESWATERS:  msgWarn << " (waters)"; break;
01253                 default:
01254                 case RESNOTHING: msgWarn << " (none)"; break;
01255               }
01256               msgWarn << sendmsg;
01257             }
01258           }
01259         }
01260       }
01261     }
01262   }
01263 }
01264 
01265 
01266 // find all the residues connected to a specific residue
01267 int BaseMolecule::find_connected_fragments(void) {
01268   int i;
01269   int count = 0;
01270   char *flgs = new char[residueList.num()]; // set up temp space
01271   memset(flgs, 0, residueList.num() * sizeof(char)); // clear flags
01272   IntStackHandle s = intstack_create(residueList.num());
01273 
01274   int atomsg = atomNames.typecode((char *) "SG"); // to find disulfide bonds
01275 
01276   int nextres;
01277   for (i=0; i<residueList.num(); i++) { // find unmarked fragment
01278     if (!flgs[i]) {
01279       fragList.append(new Fragment);
01280       intstack_push(s, i);
01281 
01282       // find and mark all connected residues with the same chain/segname
01283       while (!intstack_pop(s, &nextres)) {
01284         fragList[count]->append(nextres);
01285         Residue *res = residueList[nextres];
01286         res->fragment = count; // store residue's fragment
01287 
01288         int numatoms = res->atoms.num();
01289         int j;
01290         for (j=0; j<numatoms; j++) {
01291           MolAtom *a = atom(res->atoms[j]);
01292 
01293           // find all bonds to residues of the same chain/segname 
01294           int k;
01295           for (k=0; k<a->bonds; k++) {
01296             MolAtom *b = atom(a->bondTo[k]);
01297             int ri = b->uniq_resid;
01298 
01299             // skip connections to residues with different chains/segnames,
01300             // and don't follow disulfide bonds, as we want the order of
01301             // residue traversal to be correct so we can use it to build
01302             // subfragment lists later on
01303             if ((ri != i) &&
01304                 (flgs[ri] == 0) &&
01305                 (a->chainindex == b->chainindex) &&
01306                 (a->segnameindex == b->segnameindex) &&
01307                 ((a->nameindex != atomsg) || (b->nameindex != atomsg))) {
01308               flgs[ri] = TRUE;
01309               intstack_push(s, ri);
01310             }
01311           }
01312         }
01313       }
01314 
01315       count++;
01316     }
01317   }
01318 
01319   intstack_destroy(s);
01320   delete [] flgs;
01321 
01322   return count;
01323 }
01324 
01325 
01326 // find each collection of connected fragments
01327 int BaseMolecule::find_fragments(void) {
01328   int count = find_connected_fragments();  // find and mark its neighbors
01329 
01330 #if 1
01331   // find the protein subfragments
01332   find_subfragments(atomNames.typecode((char *) "N"), 
01333      -1,
01334      -1,
01335      atomNames.typecode((char *) "C"), 
01336      -1,
01337      -1,
01338      -1,
01339      RESPROTEIN, &pfragList);
01340 
01341 #if 0
01342   // find the nucleic acid subfragments
01343   find_subfragments(atomNames.typecode((char *) "P"), 
01344      atomNames.typecode((char *) "H5T"),
01345      -1,
01346      atomNames.typecode((char *) "O3'"),
01347      atomNames.typecode((char *) "O3*"),
01348      atomNames.typecode((char *) "H3T"),
01349      -1,
01350      RESNUCLEIC, &nfragList);
01351 #else
01352   // find the nucleic acid subfragments
01353   find_subfragments_topologically(
01354      RESNUCLEIC, &nfragList,
01355      atomNames.typecode((char *) "O3'"),
01356      atomNames.typecode((char *) "O3*"),
01357      atomNames.typecode((char *) "H3T"),
01358      -1);
01359 #endif
01360 #else
01361   find_subfragments_cyclic(&pfragList, RESPROTEIN);
01362   find_subfragments_cyclic(&nfragList, RESNUCLEIC);
01363 #endif
01364 
01365   // determine whether fragments are cyclic or not
01366   find_cyclic_subfragments(&pfragList, &pfragCyclic);
01367   find_cyclic_subfragments(&nfragList, &nfragCyclic);
01368 
01369   return count;
01370 }
01371 
01372 
01373 void BaseMolecule::find_subfragments_cyclic(ResizeArray<Fragment *> *subfragList, int restype) {
01374   int numfrags = fragList.num();
01375   int i, frag;
01376 
01377   // test each fragment to see if it's a candidate for the subfraglist
01378   for (frag=0; frag<numfrags; frag++) {
01379     int numres = fragList[frag]->num();       // residues in this frag
01380     int match=1; // start true, and falsify
01381 
01382     // check each residue to see they are all the right restype
01383     for (i=0; i<numres; i++) {
01384       int resid = (*fragList[frag])[i];
01385       if (residueList[resid]->residueType != restype) {
01386         match=0;
01387         break;
01388       }
01389     }
01390 
01391     // if we found a matching fragment, add it to the subfraglist
01392     if (match) {
01393       Fragment *frg = new Fragment;
01394 
01395       // add all of the residues for this fragment to the subfraglist
01396       for (i=0; i<numres; i++) {
01397         int resid = (*fragList[frag])[i];
01398         frg->append(resid);
01399       } 
01400 
01401       subfragList->append(frg);
01402     }    
01403   }
01404 }
01405 
01406 
01407 
01408 void BaseMolecule::find_cyclic_subfragments(ResizeArray<Fragment *> *subfragList, ResizeArray<int> *subfragCyclic) {
01409   int i, j, frag;
01410   int numfrags = subfragList->num();
01411 
01412   // check each fragment for cycles
01413   for (frag=0; frag<numfrags; frag++) {
01414     int numres   = (*subfragList)[frag]->num();       // residues in this frag
01415 
01416     // skip testing fragments containing zero residues
01417     if (numres < 1)
01418       continue;
01419 
01420     int startres = (*(*subfragList)[frag])[0];        // first residue
01421     int endres   = (*(*subfragList)[frag])[numres-1]; // last residue
01422     int cyclic   = 0;
01423 
01424     // check for bonds between startres and endres
01425     int numatoms = residueList[endres]->atoms.num();
01426     int done = 0;
01427     for (i=0; (i < numatoms) && (!done); i++) {
01428       MolAtom *a = atom(residueList[endres]->atoms[i]);
01429       int nbonds = a->bonds;
01430       for (j=0; j < nbonds; j++) {
01431         MolAtom *b = atom(a->bondTo[j]);
01432 
01433         if (b->uniq_resid == startres) {
01434           cyclic=1;
01435           done=1;
01436           break;
01437         }
01438       }  
01439     }
01440 
01441     // record whether this fragment is cyclic or not
01442     subfragCyclic->append(cyclic);
01443   }
01444 }
01445 
01446 
01447 // this adds the current residue type to the *subfragList,
01448 // this finds the residue connected to the endatom atom type
01449 // and calls this function recursively on that residue
01450 // this will NOT work across NORMAL bonds
01451 void BaseMolecule::find_connected_subfragment(int resnum, int fragnum, 
01452          char *flgs, int endatom,  int altendatom, 
01453          int alt2endatom, int alt3endatom,
01454          int restype, 
01455          ResizeArray<Fragment *> *subfragList)
01456 {
01457   if (flgs[resnum] || residueList[resnum]->residueType != restype) 
01458       return;  // been here before, or this is no good
01459   (*subfragList)[fragnum]->append(resnum);    // add to the list
01460   flgs[resnum] = TRUE;                        // and prevent repeats
01461 
01462   // find the atom in this residue with the right type
01463   int i, j, nextres;
01464   MolAtom *a;
01465   for (i=residueList[resnum]->atoms.num() - 1; i>=0; i--) {
01466     a = atom(residueList[resnum]->atoms[i]);
01467     if (a->nameindex == endatom ||
01468         a->nameindex == altendatom ||
01469         a->nameindex == alt2endatom ||
01470         a->nameindex == alt3endatom) {   // found the end atom
01471       for (j=a->bonds-1; j>=0; j--) {    // look at the bonds
01472         // I can't look at if the residue "bond" is a PRO-PRO or NUC-NUC, since
01473         // that won't tell me if the atom involved is the endatom atom
01474         // This is important because I need to avoid things like S-S bonds
01475         // (note that I never checked that the end was bonded to a start on
01476         //  the next residue! - c'est la vie, or something like that
01477         if ((!(a->atomType == ATOMNORMAL && atom(a->bondTo[j])->atomType == ATOMNORMAL)) && // not backbone 
01478             (nextres = atom(a->bondTo[j])->uniq_resid) != resnum &&
01479             !flgs[nextres] ) { // found next residue, and unvisited
01480           find_connected_subfragment(nextres, fragnum, flgs, endatom,
01481               altendatom, alt2endatom, alt3endatom, restype, subfragList);
01482           return; // only find one; assume no branching
01483         }
01484       } // end of for
01485     } // end of found correct endtype
01486   } // searching atoms
01487 } // end of finding connected subfragment
01488 
01489 
01490 // find a class of fragments, and add them to the subfragment list
01491 void BaseMolecule::find_subfragments(int startatom, 
01492           int altstartatom, int alt2startatom,
01493           int endatom, int altendatom, int alt2endatom, int alt3endatom,
01494           int restype, ResizeArray<Fragment *> *subfragList)
01495 {
01496   int i, j, k;
01497   MolAtom *a;
01498   char *flgs = new char[residueList.num()];
01499   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01500 
01501   // Loop over all residues looking for candidate residues that start
01502   // a fragment.  A fragment starting residue must be an unvisited 
01503   // residue which has an startatom with no off residue bond to 
01504   // the same restype
01505   for (i=residueList.num()-1; i>=0; i--) {
01506     // test for previous visit, and whether it's the restype we want
01507     if (!flgs[i] && residueList[i]->residueType == restype) {
01508       // does this residue have a matching startatom
01509       for (j=residueList[i]->atoms.num()-1; j>=0; j--) { 
01510         int satom = (a=atom(residueList[i]->atoms[j]))->nameindex;
01511         if (satom == startatom || 
01512             satom == altstartatom || 
01513             satom == alt2startatom){
01514           for (k=a->bonds-1; k>=0; k--) {
01515             MolAtom *bondto = atom(a->bondTo[k]);
01516             // are there any off-residue bonds to the same restype
01517             if (bondto->uniq_resid != i && bondto->residueType == restype) {
01518               break; // if so then stop, so that k>=0
01519             }
01520           }
01521 
01522           // if we found a valid fragment start atom, find residues downchain
01523           if (k<0) { 
01524             subfragList->append(new Fragment);
01525             find_connected_subfragment(i, subfragList->num()-1, flgs, 
01526                   endatom, altendatom, alt2endatom, alt3endatom,
01527                   restype, subfragList);
01528           } // found starting residue
01529         } // found startatom
01530       } // going through atoms
01531     } // found restype
01532   } // going through residues
01533 
01534   // found 'em all
01535   delete [] flgs;
01536 } 
01537 
01538 
01539 // find a class of fragments, and add them to the subfragment list
01540 void BaseMolecule::find_subfragments_topologically(int restype, 
01541   ResizeArray<Fragment *> *subfragList, 
01542   int endatom, int altendatom, int alt2endatom, int alt3endatom) {
01543   int i; 
01544   char *flgs = new char[residueList.num()];
01545   memset(flgs, 0, residueList.num() * sizeof(char));  // clear flags
01546   int numres = residueList.num();
01547 
01548   // Loop over all residues looking for candidate residues that start
01549   // a fragment.  A fragment starting residue must be an unvisited
01550   // residue which has an startatom with no off residue bond to
01551   // the same restype
01552   for (i=0; i<numres; i++) {
01553     Residue *res = residueList[i];
01554 
01555     // test for previous visit, and whether it's the restype we want
01556     if (!flgs[i] && res->residueType == restype) {
01557       // if this residue only has 1 bond to a residue of the same restype
01558       // it must be a terminal residue
01559       int offresbondcount = 0;
01560       int j, k;
01561       int numatoms = res->atoms.num();
01562       for (j=0; j<numatoms; j++) {
01563         MolAtom *a = atom(res->atoms[j]);
01564 
01565         // find off-residue bonds
01566         for (k=0; k<a->bonds; k++) {
01567           MolAtom *b = atom(a->bondTo[k]);
01568           if (b->uniq_resid != i && 
01569               residueList[b->uniq_resid]->residueType == restype) {
01570             offresbondcount++;
01571           }
01572         }
01573       }
01574 
01575       // if we found a valid fragment start atom, find residues downchain
01576       if (offresbondcount == 1) {
01577         subfragList->append(new Fragment);
01578         find_connected_subfragment(i, subfragList->num()-1, flgs,
01579               endatom, altendatom, alt2endatom, alt3endatom,
01580               restype, subfragList);
01581       }
01582     } // found restype
01583   } // going through residues
01584 
01585   // found 'em all
01586   delete [] flgs;
01587 }
01588 
01589 
01590 #if defined(VMDFASTRIBBONS)
01591 // routine to pre-calculate lists of control point indices to
01592 // be used by the ribbon/cartoon representations
01593 void BaseMolecule::calculate_ribbon_controlpoints() {
01594   int onum, canum, frag, num, res;
01595 
01596   wkf_timerhandle tm = wkf_timer_create();
01597 
01598   msgInfo << "XXX Building pre-computed control point lists" << sendmsg;
01599 
01600   wkf_timer_start(tm);
01601 
01602   // Lookup atom typecodes ahead of time so we can use the most
01603   // efficient variant of find_atom_in_residue() in the main loop.
01604   // If we can't find the atom types we need, bail out immediately
01605   int CAtypecode  = atomNames.typecode("CA");
01606   int Otypecode   = atomNames.typecode("O");
01607   int OT1typecode = atomNames.typecode("OT1");
01608    
01609   // We can't draw a ribbon without CA and O atoms for guidance!
01610   if (CAtypecode < 0) {
01611     msgErr << "This structure has no identifiable CA atoms,"
01612            << "ribbon and cartoon representations will not be usable." 
01613            << sendmsg;
01614     return;
01615   }
01616   if ((Otypecode < 0) && (OT1typecode < 0)) {
01617     msgErr << "This structure has no identifiable Oxygen atoms,"
01618            << "ribbon and cartoon representations will not be usable." 
01619            << sendmsg;
01620     return;
01621   }
01622 
01623   //
01624   // calculate control point lists for the protein fragments
01625   //
01626 
01627   msgInfo << "XXX Building protein control point lists" << sendmsg;
01628 
01629   // go through each protein and find the CA and O atoms which are
01630   // eventually used to construct control points and perpendicular vectors
01631   ResizeArray<int> cpind;
01632   for (frag=0; frag<pfragList.num(); frag++) {
01633     num = pfragList[frag]->num();  // number of residues in this fragment
01634     if (num < 2) {
01635       // with less than two residues, we can't do anything useful, so skip
01636       nfragCPList.append(NULL);
01637       continue;
01638     }
01639 
01640     // check that we have a valid structure before continuing
01641       res = (*pfragList[frag])[0];
01642     canum = find_atom_in_residue(CAtypecode, res);
01643      onum = find_atom_in_residue(Otypecode, res);
01644 
01645     if (onum < 0 && OT1typecode >= 0) {
01646       onum = find_atom_in_residue(OT1typecode, res);
01647     }
01648     if (canum < 0 || onum < 0) {
01649       // can't find 1st CA or O of the protein fragment, so skip
01650       msgErr << "Fragment control point calc unable to identify target atoms" << sendmsg;
01651       nfragCPList.append(NULL);
01652       continue; 
01653     }
01654 
01655     // clear temp arrays before we begin
01656     cpind.clear();
01657 
01658     int loop;
01659     for (loop=0; loop<num; loop++) {
01660       res = (*pfragList[frag])[loop];
01661 
01662       // find next CA atom to be used as a spline control point
01663       canum = find_atom_in_residue(CAtypecode, res);
01664 
01665       // find next O atom to be used for spline orientation vector
01666       onum = find_atom_in_residue(Otypecode, res);
01667       if (onum < 0 && OT1typecode >= 0) {
01668         onum = find_atom_in_residue(OT1typecode, res);
01669       }
01670 
01671       // add the atom indices to the control point list
01672       cpind.append(canum);
01673       cpind.append(onum);
01674     }
01675 
01676     // copy this fragment's control point lists to permanent storage
01677     if (cpind.num() == 2*num) {
01678       int *cpindices = new int[cpind.num()];
01679       memcpy(cpindices, &cpind[0], cpind.num() * sizeof(int));
01680 
01681       // add control point list to the master list
01682       nfragCPList.append(cpindices);
01683     } else {
01684       msgErr << "Unexpected failure in control point calculation" << sendmsg;
01685       nfragCPList.append(NULL);
01686     }
01687   } 
01688 
01689   wkf_timer_stop(tm);
01690 
01691   msgInfo << "XXX Calc time for protein cplist: " 
01692           << wkf_timer_time(tm) << sendmsg;
01693   msgInfo << "XXX done building protein control point lists" << sendmsg;
01694 
01695   //
01696   // calculate control point lists for the nucleic fragments
01697   //
01698 
01699   wkf_timer_destroy(tm);
01700 }
01701 #endif
01702 
01703 #if defined(VMDWITHCARBS)
01704 
01705 // Routines for detecting carbohydrate rings
01706 // contributed by Simon Cross and Michelle Kuttel
01707 // XXX TODO:
01708 //   don't create hashes as pointers, rather just pass pointers for them
01709 //   into functions which need it.
01710 
01711 // find all small rings and links between them
01712 void BaseMolecule::find_small_rings_and_links(int maxpathlength, int maxringsize) {
01713   // skip ring finding if we've already done it
01714   if (maxpathlength == currentMaxPathLength && maxringsize == currentMaxRingSize)
01715     return;
01716   currentMaxPathLength = maxpathlength;
01717   currentMaxRingSize = maxringsize;
01718 
01719   // Clear smallringList, smallringLinkages
01720   smallringList.clear();
01721   smallringLinkages.clear();
01722 
01723   // find groups of atoms bonded into small rings  
01724   find_small_rings(maxringsize);
01725 #if 0
01726   msgInfo << "   Rings: " << smallringList.num() << sendmsg;
01727 #endif
01728   
01729   // orientate the rings if possible
01730   orientate_small_rings(maxringsize);
01731   int nOrientatedRings = 0;
01732   for (int i=0; i<smallringList.num(); i++) {
01733     if (smallringList[i]->orientated) 
01734       nOrientatedRings++;
01735   }
01736 #if 0
01737   msgInfo << "   Rings orientated: " << nOrientatedRings << sendmsg;
01738 #endif  
01739   // find paths between rings
01740   find_orientated_small_ring_linkages(maxpathlength, maxringsize);
01741 #if 0
01742   msgInfo << "   Ring Paths: " << smallringLinkages.paths.num() << sendmsg;
01743 #endif  
01744 }
01745 
01746 // find all loops less than a given size
01747 int BaseMolecule::find_small_rings(int maxringsize) {
01748   int n_back_edges, n_rings;
01749   ResizeArray<int> back_edge_src, back_edge_dest;
01750     
01751   n_back_edges = find_back_edges(back_edge_src, back_edge_dest);
01752 
01753 #if 0
01754   msgInfo << "  BACK EDGES: " << n_back_edges << sendmsg;
01755   for (int i=0; i < n_back_edges; i++) {
01756     msgInfo << "       SRC:" << back_edge_src[i] << ", DST:" << back_edge_dest[i] << sendmsg;
01757   }
01758 #endif
01759 
01760   n_rings = find_small_rings_from_back_edges(maxringsize, back_edge_src, back_edge_dest);
01761 
01762 #if 0
01763   msgInfo << " SMALL RINGS: " << n_rings << sendmsg;
01764   for (int i=0; i < n_rings; i++) {
01765     msgInfo << "    RING: " << *(smallringList[i]) << sendmsg;
01766   }
01767 #endif
01768 
01769   return n_rings; // number of rings found
01770 }
01771 
01772 
01773 #define INTREE_NOT      -1   // not in tree
01774 #define INTREE_NOPARENT -2   // no parent 
01775 
01776 // find the back edges of an arbitrary spanning tree (or set of trees if the molecule is disconnected)
01777 int BaseMolecule::find_back_edges(ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
01778   int i;
01779   int n_back_edges = 0;  
01780   int *intree_parents = new int[nAtoms];
01781   memset(intree_parents, INTREE_NOT, nAtoms * sizeof(int));  // clear parents, -1 = not in tree, -2 = no parent
01782       
01783   for (i=0; i<nAtoms; i++) {
01784     if (intree_parents[i] == INTREE_NOT) {  // not been visited
01785       n_back_edges += find_connected_subgraph_back_edges(i,back_edge_src,back_edge_dest,intree_parents);
01786     }
01787   }
01788 
01789   delete [] intree_parents;
01790  
01791   return n_back_edges;
01792 }
01793 
01794 
01795 // find the back edges of a spanning tree (for a connected portion of the molecule)
01796 int BaseMolecule::find_connected_subgraph_back_edges(int atomid, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest,
01797                                                      int *intree_parents) {
01798   int i, n_new_back_edges, cur_atom_id, child_atom_id, parent_atom_id;
01799   MolAtom *curatom;
01800   ResizeArray<int> node_stack;
01801   
01802   node_stack.append(atomid);
01803   intree_parents[atomid] = INTREE_NOPARENT; // in tree, but doesn't have parent
01804   n_new_back_edges = 0;
01805 
01806   while (node_stack.num() > 0) {
01807 #if 1
01808     cur_atom_id = node_stack.pop();
01809 #else
01810     cur_atom_id = node_stack[node_stack.num()-1];
01811     node_stack.remove(node_stack.num()-1);
01812 #endif
01813     
01814     curatom = atom(cur_atom_id);
01815     parent_atom_id = intree_parents[cur_atom_id];
01816     
01817     for(i=0;i<curatom->bonds;i++) {
01818       child_atom_id = curatom->bondTo[i];
01819       if (intree_parents[child_atom_id] != INTREE_NOT) {
01820         // back-edge found
01821         if ((child_atom_id != parent_atom_id) && (child_atom_id > cur_atom_id)) {
01822             // we ignore edges back to the parent
01823             // and only add each back edge once
01824             // (it'll crop up twice since each bond is listed on both atoms)
01825             back_edge_src.append(cur_atom_id);
01826             back_edge_dest.append(child_atom_id);
01827             n_new_back_edges++;
01828         }
01829       } else {
01830         // extended tree
01831         intree_parents[child_atom_id] = cur_atom_id;
01832         node_stack.append(child_atom_id);
01833       }
01834     }
01835   }
01836 
01837   return n_new_back_edges;
01838 }
01839 
01840 // find rings smaller than maxringsize given list of back edges
01841 int BaseMolecule::find_small_rings_from_back_edges(int maxringsize, ResizeArray<int> &back_edge_src, ResizeArray<int> &back_edge_dest) {
01842     int i, key, max_rings;
01843     int n_rings = 0;
01844     int n_back_edges = back_edge_src.num();
01845     SmallRing *ring;
01846     inthash_t *used_edges = new inthash_t; // back edges which have been dealt with 
01847     inthash_t *used_atoms = new inthash_t; // atoms (other than the first) which are used in the current path (i.e. possible loop)
01848     inthash_init(used_edges,n_back_edges);
01849     inthash_init(used_atoms,maxringsize);
01850    
01851     // cap the peak number of rings to find based on the size of the
01852     // input structure.  This should help prevent unusual structures 
01853     // with very high connectivity, such as silicon nanodevices from 
01854     // blowing up the ring search code
01855     max_rings = 2000 + (int) (100.0*sqrt((double) nAtoms));
01856  
01857     for(i=0;i<n_back_edges;i++) {
01858       ring = new SmallRing();
01859       ring->append(back_edge_src[i]);
01860       ring->append(back_edge_dest[i]);
01861       
01862       // first atom is not marked used, since we're allowed to re-use it.
01863       inthash_insert(used_atoms,back_edge_dest[i],1);
01864       
01865       n_rings += find_small_rings_from_partial(ring,maxringsize,used_edges,used_atoms);
01866       delete ring;
01867       
01868       // abort if there are too many rings
01869       if (n_rings > max_rings) {
01870         msgWarn << "Maximum number of rings (" << max_rings << ") exceed."
01871                 << " Stopped looking for rings after " << n_rings << " rings found." << sendmsg;
01872         break;
01873       }
01874       
01875       key = get_edge_key(back_edge_src[i],back_edge_dest[i]);
01876       inthash_insert(used_edges,key,1);
01877       
01878       // remove last atom from used_atoms
01879       inthash_delete(used_atoms,back_edge_dest[i]);
01880     }
01881     
01882     inthash_destroy(used_edges);
01883     delete used_edges;
01884     inthash_destroy(used_atoms);
01885     delete used_atoms;
01886 
01887     return n_rings;
01888 }
01889 
01890 
01891 // find rings smaller than maxringsize from the given partial ring (don't reuse used_edges)
01892 int BaseMolecule::find_small_rings_from_partial(SmallRing *ring, int maxringsize, inthash_t *used_edges, inthash_t *used_atoms) {
01893     int i, next_bond_pos, cur_atom_id, child_atom_id, bond_key, barred, closes, do_pop;
01894     int n_rings = 0;
01895     MolAtom *curatom;
01896 
01897     IntStackHandle atom_id_stack = intstack_create(maxringsize);
01898     IntStackHandle bond_pos_stack = intstack_create(maxringsize);
01899     intstack_push(atom_id_stack,ring->last_atom());
01900     intstack_push(bond_pos_stack,0);
01901 
01902     while (!intstack_pop(atom_id_stack,&cur_atom_id)) {
01903       intstack_pop(bond_pos_stack,&next_bond_pos);
01904       curatom = atom(cur_atom_id);
01905       do_pop = 0; // whether we need to pop the current atom
01906 
01907       // if the ring is already maxringsize, just pop the current atom off.
01908       if (ring->num() > maxringsize)
01909          do_pop = 1;
01910 
01911       if (next_bond_pos == 0 && !do_pop) {
01912         // ignore barred rings (by checking for links back to earlier parts of the ring
01913         // before exploring further)
01914         barred = closes = 0;
01915         for(i=0;i<curatom->bonds;i++) {
01916           child_atom_id = curatom->bondTo[i];
01917 
01918           // check that this is not an edge immediately back to the previous atom
01919           if (child_atom_id == ring->atoms[ring->atoms.num()-2]) continue;
01920 
01921           // check that this is not an atom we've included
01922           // (an exception is the first atom, which we're allowed to try add, obviously :)
01923           if (inthash_lookup(used_atoms, child_atom_id) != HASH_FAIL) {
01924             barred = 1;
01925             continue;
01926           }
01927 
01928           // check is this not a back edge which has already been used
01929           bond_key = get_edge_key(cur_atom_id,child_atom_id);
01930           if (inthash_lookup(used_edges, bond_key) != HASH_FAIL) {
01931             if (child_atom_id == ring->first_atom())
01932               barred = 1; // used back-edge which closes the ring counts as barred
01933             continue;
01934           }
01935 
01936           // check whether ring closes
01937           if (child_atom_id == ring->first_atom()) {
01938             closes = 1;
01939             continue;
01940           }
01941         }
01942 
01943         if (closes && !barred) {
01944           smallringList.append(ring->copy());
01945           n_rings += 1;
01946         }
01947         if (closes || barred) {
01948           // adding this atom would create a barred ring, so skip to next atom on stack
01949           do_pop = 1;
01950         }
01951       }
01952 
01953       if (!do_pop) {
01954         for(i=next_bond_pos;i<curatom->bonds;i++) {
01955           child_atom_id = curatom->bondTo[i];
01956 
01957           // check that this is not an edge immediately back to the previous atom
01958           if (child_atom_id == ring->atoms[ring->atoms.num()-2]) continue;
01959                   
01960           // check is this not a back edge which has already been used
01961           bond_key = get_edge_key(cur_atom_id,child_atom_id);
01962           if (inthash_lookup(used_edges, bond_key) != HASH_FAIL) continue;
01963           
01964           // Append child atom and go deeper
01965           ring->append(child_atom_id);
01966           inthash_insert(used_atoms,child_atom_id,1);
01967           
01968           // push current state and new state onto stack and recurse
01969           intstack_push(atom_id_stack,cur_atom_id);
01970           intstack_push(bond_pos_stack,i+1);
01971           intstack_push(atom_id_stack,child_atom_id);
01972           intstack_push(bond_pos_stack,0);
01973           break;     
01974         }
01975 
01976         // we finished the children, pop one parent
01977         if(i>=curatom->bonds)
01978             do_pop = 1;        
01979       }
01980 
01981       if (do_pop && !intstack_empty(atom_id_stack)) {
01982           // finished with cur_atom_id and all its children      
01983           // clean up before returning from recurse
01984           ring->remove_last();
01985           inthash_delete(used_atoms,cur_atom_id);      
01986       }
01987     }
01988     
01989     intstack_destroy(atom_id_stack);
01990     intstack_destroy(bond_pos_stack);
01991     return n_rings;
01992 }
01993 
01994 
01995 // construct edge key
01996 int BaseMolecule::get_edge_key(int edge_src, int edge_dest) {
01997     int t;
01998     if (edge_dest > edge_src) {
01999        t = edge_src;
02000        edge_src = edge_dest;
02001        edge_dest = t;
02002     }
02003     return edge_src * nAtoms + edge_dest;
02004 }
02005 
02006 // Routines for orientating rings
02007 // contributed by Simon Cross and Michelle Kuttel
02008 
02009 void BaseMolecule::orientate_small_rings(int maxringsize) {
02010   for (int i=0;i<smallringList.num();i++)
02011     orientate_small_ring(*smallringList[i],maxringsize);
02012 }
02013 
02014 void BaseMolecule::orientate_small_ring(SmallRing &ring,int maxringsize) {
02015   short int oxygen = -1;
02016   int oxygen_typeindex, c1_nameindex, c1p_nameindex, cu1_nameindex;
02017   int c2_nameindex, c2p_nameindex, cu2_nameindex;
02018   MolAtom *curatom, *atom_before_O, *atom_after_O;
02019 
02020   // lookup atom name indices in advance
02021   oxygen_typeindex = atomTypes.typecode("O");
02022   c1_nameindex = atomNames.typecode("C1");
02023   c1p_nameindex = atomNames.typecode("C1'");
02024   cu1_nameindex = atomNames.typecode("C_1");
02025   c2_nameindex = atomNames.typecode("C2");
02026   c2p_nameindex = atomNames.typecode("C2'");
02027   cu2_nameindex = atomNames.typecode("C_2");
02028     
02029   // Find an oxygen (or something with two bonds)
02030   for (int i=0; i<ring.num(); i++) {
02031     curatom = atom(ring[i]);
02032     if (curatom->atomicnumber == 8 || curatom->typeindex == oxygen_typeindex || curatom->bonds == 2) {
02033       oxygen = i;
02034       break;
02035     }
02036   }
02037 
02038   // leave ring unorientated if no oxygen found
02039   if (oxygen == -1) return;
02040 
02041   // find atoms before and after oxygen (taking into account wrapping)
02042   if (oxygen == 0) atom_before_O = atom(ring.last_atom());
02043   else atom_before_O = atom(ring[oxygen-1]);
02044 
02045   if (oxygen == ring.num()-1) atom_after_O = atom(ring.first_atom());
02046   else atom_after_O = atom(ring[oxygen+1]);
02047     
02048   // ensure C1 carbon is after the oxygen
02049   // leave unorientated if the C1 carbon can't be found
02050   if (atom_before_O->nameindex == c1_nameindex || atom_before_O->nameindex == c1p_nameindex
02051       || atom_before_O->nameindex == cu1_nameindex
02052       || atom_before_O->nameindex == c2_nameindex || atom_before_O->nameindex == c2p_nameindex
02053       || atom_before_O->nameindex == cu2_nameindex) {
02054     ring.reverse();
02055     ring.orientated = 1;
02056   }
02057   else if (atom_after_O->nameindex == c1_nameindex || atom_after_O->nameindex == c1p_nameindex
02058            || atom_after_O->nameindex == cu1_nameindex
02059            || atom_after_O->nameindex == c2_nameindex || atom_after_O->nameindex == c2p_nameindex
02060            || atom_after_O->nameindex == cu2_nameindex) {
02061     ring.orientated = 1;
02062   }
02063 }
02064 
02065 
02066 // Routines for finding links between rings
02067 // contributed by Simon Cross and Michelle Kuttel
02068 
02069 // find all paths shorter than a given length between the small rings
02070 // listed in smallringList
02071 int BaseMolecule::find_orientated_small_ring_linkages(int maxpathlength, int maxringsize) {
02072   SmallRing *sr;
02073   LinkagePath *lp;
02074   int i, j, atom_id;
02075   int n_paths = 0;
02076   inthash_t *atom_to_ring = new inthash_t;
02077   inthash_t *multi_ring_atoms = new inthash_t;
02078   inthash_t *used_atoms = new inthash_t;
02079   inthash_init(atom_to_ring,smallringList.num()*maxringsize/2);
02080   inthash_init(multi_ring_atoms,smallringList.num());
02081   inthash_init(used_atoms,maxpathlength);
02082    
02083   // create lookup from atom id to ring id
02084   for (i=0;i<smallringList.num();i++) {
02085     sr = smallringList[i];
02086     // XXX: Uncomment this if we want to include non-orientated rings
02087     //      in linkage paths
02088     // if (!sr->orientated) continue;
02089     for (j=0;j<sr->num();j++) {
02090       atom_id = (*sr)[j];
02091       if (inthash_lookup(atom_to_ring,atom_id) == HASH_FAIL)
02092         inthash_insert(atom_to_ring,atom_id,i);
02093       else
02094         inthash_insert(multi_ring_atoms,atom_id,1);
02095     }
02096   }
02097 
02098   // look for ring linkage paths starting from each atom
02099   // of each orientated ring
02100   for (i=0;i<smallringList.num();i++) {
02101     sr = smallringList[i];
02102     if (!sr->orientated) continue;
02103     for (j=0;j<sr->num();j++) {
02104       if (inthash_lookup(multi_ring_atoms,(*sr)[j]) != HASH_FAIL) continue;
02105       lp = new LinkagePath();
02106       lp->start_ring = i;
02107       lp->path.append((*sr)[j]);    
02108       n_paths += find_linkages_for_ring_from_partial(*lp,maxpathlength,atom_to_ring,multi_ring_atoms,used_atoms);
02109       delete lp;
02110     }
02111   }
02112   
02113   inthash_destroy(atom_to_ring);
02114   delete atom_to_ring;
02115   inthash_destroy(multi_ring_atoms);
02116   delete multi_ring_atoms;
02117   inthash_destroy(used_atoms);
02118   delete used_atoms;
02119 
02120 #if 0
02121   msgInfo << smallringLinkages << sendmsg;
02122 #endif
02123   
02124   return n_paths; // number of paths found.
02125 }
02126 
02127 int BaseMolecule::find_linkages_for_ring_from_partial(LinkagePath &lp, int maxpathlength, inthash_t *atom_to_ring, inthash_t *multi_ring_atoms, inthash_t *used_atoms) {
02128   int i, cur_atom_id, next_bond_pos, child_atom_id, ringidx;
02129   int n_paths = 0;
02130   MolAtom *curatom;
02131 
02132   IntStackHandle atom_id_stack = intstack_create(maxpathlength);
02133   IntStackHandle bond_pos_stack = intstack_create(maxpathlength);
02134   intstack_push(atom_id_stack,lp.path.last_atom());
02135   intstack_push(bond_pos_stack,0);
02136 
02137   while (!intstack_pop(atom_id_stack,&cur_atom_id)) {
02138     intstack_pop(bond_pos_stack,&next_bond_pos);
02139     curatom = atom(cur_atom_id);
02140 
02141     if (next_bond_pos == 0)
02142       inthash_insert(used_atoms, cur_atom_id, 1);
02143 
02144     for(i=next_bond_pos;i<curatom->bonds;i++) {
02145       child_atom_id = curatom->bondTo[i];
02146 
02147       // check that this isn't an atom that belongs to multiple rings
02148       if (inthash_lookup(multi_ring_atoms,child_atom_id) != HASH_FAIL) continue;
02149 
02150       // check that this is not an edge immediately back to the previous atom
02151       // (when there is only one atom in the path, it can't be a link back)
02152       if (lp.num() > 1 && child_atom_id == lp[lp.num()-2]) continue;
02153       
02154       // check that we haven't arrived at a non-orientated ring
02155       ringidx = inthash_lookup(atom_to_ring, child_atom_id);
02156       if (ringidx != HASH_FAIL && !smallringList[ringidx]->orientated) continue;
02157 
02158       // only store paths from smaller ringidx to larger ringidx (to avoid getting a copy of each orientation of the path)
02159       // ignore paths which return to the same ring
02160       // check that we're leaving the starting ring
02161       if (ringidx != HASH_FAIL && ringidx <= lp.start_ring) continue;
02162       
02163       // see if this takes us to another ring
02164       if (ringidx != HASH_FAIL && (ringidx > lp.start_ring)) {
02165           lp.append(child_atom_id);
02166           lp.end_ring = ringidx;
02167           smallringLinkages.addLinkagePath(*lp.copy());
02168           n_paths++;
02169           lp.end_ring = -1;
02170           lp.remove_last();
02171           continue;
02172       }
02173       
02174       // check that this is not an atom we've included
02175       // (an exception is the first atom, which we're allowed to try add, obviously :)
02176       if (inthash_lookup(used_atoms, child_atom_id) != HASH_FAIL) continue;
02177               
02178       if (lp.num() < maxpathlength) {
02179          lp.append(child_atom_id);
02180 
02181          // push current state and new state onto stack and recurse
02182          intstack_push(atom_id_stack,cur_atom_id);
02183          intstack_push(bond_pos_stack,i+1);
02184          intstack_push(atom_id_stack,child_atom_id);
02185          intstack_push(bond_pos_stack,0);
02186          break;
02187       }
02188     }
02189 
02190     if ((i>=curatom->bonds)&&(!intstack_empty(atom_id_stack))) {      
02191       // clean up before returning from recurse
02192       lp.remove_last();
02193       inthash_delete(used_atoms,cur_atom_id);
02194     }
02195   }
02196   
02197   intstack_destroy(atom_id_stack);
02198   intstack_destroy(bond_pos_stack);
02199   return n_paths;
02200 }
02201 
02202 #endif  // end of carbohydrate related stuff
02203 
02204 
02205 
02206 void BaseMolecule::add_volume_data(const char *name, const float *o,
02207     const float *xa, const float *ya, const float *za, int x, int y, int z,
02208     float *data) {
02209   msgInfo << "Analyzing Volume..." << sendmsg;
02210 
02211   VolumetricData *vdata = new VolumetricData(name, o, xa, ya, za,
02212                                              x, y, z, data);
02213   
02214   // Print out grid size along with memory use for the grid itself plus
02215   // the memory required for the volume gradients (4x the scalar grid memory)
02216   // Color texture maps require another 0.75x the original scalar grid size.
02217   msgInfo << "   Grid size: " << x << "x" << y << "x" << z << "  (" 
02218           << (int) (4.0 * (x*y*z * sizeof(float)) / (1024.0 * 1024.0)) << " MB)" 
02219           << sendmsg;
02220 
02221   msgInfo << "   Total voxels: " << x*y*z << sendmsg;
02222 
02223   msgInfo << "   Min: " << vdata->datamin << "  Max: " << vdata->datamax 
02224           << "  Range: " << (vdata->datamax - vdata->datamin) << sendmsg;
02225 
02226   msgInfo << "   Computing volume gradient map for smooth shading" << sendmsg;
02227 
02228   vdata->compute_volume_gradient(); // calc gradients for smooth vertex normals
02229 
02230   volumeList.append(vdata);
02231 
02232   msgInfo << "Added volume data, name=" << vdata->name << sendmsg;
02233 }
02234 
02235 int BaseMolecule::num_volume_data() {
02236   return volumeList.num();
02237 }
02238 
02239 const VolumetricData *BaseMolecule::get_volume_data(int id) {
02240   if (id >= 0 && id < volumeList.num())
02241     return volumeList[id];
02242   return NULL;
02243 }

Generated on Sat May 26 01:47:44 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002