Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

PDBData.h

Go to the documentation of this file.
00001 
00007 /*
00008    Read and parse a line of data from a PDB record.  There are many
00009    different types of PDB records.  This version reads only the ATOM and
00010    HETATM records and makes all fields accessible via the appropriate
00011    member function.  In NAMD, this will be called only by the PDB class,
00012    which reads PDB files, and a PDB writer class.
00013 */
00014 
00015 #ifndef _PDBREADER_H_
00016 #define _PDBREADER_H_
00017 
00018 // These are added to the global namespace
00019 //   the class PDBData
00020 //   the class PDBUnknown, derived from PDBData
00021 //   the class PDBAtom, derived from PDBData - contains ATOM and HETATM records
00022 //   the class PDBAtomRecord, derived from PDBAtom - contains only ATOM records
00023 //   the class PDBHetAtm, derived from PDBAtom - contains only HETATM records
00024 //   the function new_PDBData, which creates the right pdb class given a string
00025 
00026 #ifndef WIN32
00027 #include <strings.h>
00028 #endif
00029 #include "common.h"
00030 #include <string.h>
00031 
00032 
00033 class PDBData {  // at the basic level, a PDB record only knows its type
00034    public:
00035 
00036   // These data types come from the Protein Data Bank format
00037   // description located via anon. ftp to pdb.pdb.bnl.gov
00038   // in the file /pub/format.desc.ps
00039   //  In addition, I define an extra type, UNKNOWN.  If I can't
00040   // figure out what's going on, I store the complete string
00041   // and return it when asked.
00042    enum PDBType {HEADER, OBSLTE, COMPND, SOURCE, EXPDTA, AUTHOR,
00043      REVDAT, SPRSDE, JRNL, REMARK, SEQRES, FTNOTE, HET, FORMUL,
00044      HELIX, SHEET, TURN, SSBOND, SITE, CRYST1, ORIGX, SCALE,
00045      MTRIX, TVECT, MODEL, ATOM, HETATM, SIGATM, ANISOU, SIGUIJ,
00046      TER, ENDMDL, CONECT, MASTER, END, UNKNOWN};
00047 
00048      enum PDBFormatStyle { COLUMNS, FIELDS };  // used to specify if the
00049 
00050 
00051 
00052     static const char *PDBNames[UNKNOWN+1]; // string descriptors for each field
00053     
00054        // output should be based on columns (FORTRAN style) or
00055        // fields (C/ awk style).
00056  // See, there are two different types of formats that this program
00057  // understands, one is the basic PDB w/ or w/o the XLPOR extension - these
00058  // are the column based versions.  The other is my own
00059  // field based version - each data element is seperated by a blank
00060  // and, if the element is empty, a pound sign ('#') is put in its place.
00061  // This type of record is denoted by a '#' in the first non-blank
00062  // character (hence, it is the first non-blank character of the first
00063  // field.  Basically, I'm a unix/ C/ awk/ yacc ... freak, I like field
00064  // based data rather than column based data.
00065  
00066  
00067    private:
00068     PDBType mytype;
00069 
00070 #ifdef MEM_OPT_VERSION
00071 //for the sake of pruning PDBData
00072 public:
00073 #else
00074    protected:
00075 #endif
00076         // some parsing routines to get info from a line of text
00077     static void scan( const char *data, int len, int start, int size, 
00078                          int *ans, int defalt);
00079     static void scan( const char *data, int len, int start, int size,
00080                           BigReal *ans, BigReal defalt);
00081     static void scan( const char *data, int len, int start, int size,
00082                          char *ans);
00083     static void field( const char *data, int fld, char *result);
00084         // some routine to print to a specific column and width
00085     static void sprintcol( char *s, int start, int len, const char *val);
00086     static void sprintcol( char *s, int start, int len, int val);
00087     static void sprintcol( char *s, int start, int len, int prec, BigReal val);
00088     
00089    public:
00090      PDBData(PDBType newtype) {
00091        mytype = newtype;
00092      }
00093      virtual ~PDBData( void) {
00094      }
00095      PDBType type( void) {
00096        return mytype;
00097      }
00098                     // I know nothing, so I'll fake it and hope it works
00099      virtual void sprint( char *s, PDBFormatStyle usestyle = COLUMNS) {
00100        if (usestyle == COLUMNS)     // get rid of warning
00101          strcpy(s, "REMARK     (undefined remark - this is a bug)");
00102         else
00103          strcpy(s, "REMARK     (undefined remark - this is a bug)");
00104      }
00105 };
00106 
00108 class PDBUnknown : public PDBData {
00109   private:
00110     char *mystr;
00111   public:
00112    PDBUnknown(const char *data): PDBData(PDBData::UNKNOWN) {
00113      mystr = new char[strlen(data)+1];
00114      if ( mystr == NULL )
00115      {
00116        NAMD_die("memory allocation failed in PDBUnknown::PDBUnknown");
00117      }
00118      strcpy(mystr, data);
00119    }
00120    virtual ~PDBUnknown( void) {
00121      delete [] mystr;
00122    }  
00123    void sprint(char *s, PDBFormatStyle usestyle) {
00124      strcpy(s, mystr);
00125      if (usestyle == PDBData::COLUMNS)   // they are the same, but I won't
00126        strcpy( s, mystr);                //   get the stupid warning during
00127       else                               //   compilation
00128        strcpy( s, mystr);
00129    }
00130 };
00131 
00132 
00134 class PDBAtom : public PDBData {
00135 public:
00136     //extract them out from PDBAtom for the sake of pruning PDBData
00137       // starting location for each record element
00138     enum Start {STYPE=1,SSERIAL=7, SNAME=13, SALT=17, SRESNAME=18, SCHAIN=22, 
00139                 SRESSEQ=23, SINSERT=27, SX=31, SY=39, SZ=47,
00140                 SOCC=55, STEMPF=61, SFOOT=68, SSEGNAME=73, SELEMENT=77};
00141       // length of each element, the PREC is the number of digits
00142       // in the output after the decimal
00143 // NOTE: The PDB says the length of the residue name is only 3 characters
00144 //  whereas XPLOR allows 4 character names.  We choose 4 for compatability
00145 //  with both systems (since we never change the length, we you give us is
00146 //  what we use)
00147     enum Length {LTYPE=6, LSERIAL=5, LNAME=4, LALT=1, LRESNAME=4, LCHAIN=1, 
00148                  LRESSEQ=4, LINSERT=1, LCOOR=8,
00149                  LCOORPREC=3, LOCC=6, LOCCPREC=2, LTEMPF=6, 
00150                  LTEMPFPREC=2, LFOOT=3, LSEGNAME=4, LELEMENT=2};
00151 
00152   public:
00153     static const int default_serial;         // some default values
00154     static const int default_residueseq;     // these are set in the .C file
00155     static const BigReal default_coor;
00156     static const BigReal default_occupancy;
00157     static const BigReal default_temperaturefactor;
00158     static const int no_footnote;
00159 
00160   private:
00161     int myserialnumber;                 // atom serial number
00162     char myname[LNAME+1];               // atom name
00163     char myalternatelocation[LALT+1];   // alternamte location identifier
00164     char myresiduename[LNAME+1];        // residue name
00165     char mychain[LCHAIN+1];             // chain indentifier
00166     int myresidueseq;                   // residue seq. no.
00167     char myinsertioncode[LINSERT+1];    // code for insertions of residues
00168     BigReal mycoor[3];                     // X, Y, and Z orthogonal A coordinates
00169     BigReal myoccupancy;                   // occupancy
00170     BigReal mytemperaturefactor;           // temperature factor
00171     int myfootnote;                     // footnote number
00172     char mysegmentname[LSEGNAME+1];     // XPLOR-type segment name
00173     char myelement[LELEMENT+1];         // element
00174 
00175     void parse_field_data( const char *data);
00176     void parse_column_data( const char *data);
00177     void sprint_columns( char *outstr);
00178     void sprint_fields( char *outstr);
00179 
00180   protected:
00181     enum PDBPossibleAtoms {USE_ATOM = ATOM, USE_HETATM = HETATM};
00182     PDBAtom( const char *data,
00183            PDBPossibleAtoms whichatom);// parses a line from the PDB data file
00184     PDBAtom( void);        // makes a generic atom
00185 
00186   public:
00187     virtual ~PDBAtom( void);
00188     void parse( const char *s);  // reset to new input values
00189     void  sprint( char *s, PDBFormatStyle usestyle = COLUMNS);// write to string
00190     int serialnumber( void);
00191     void serialnumber( int newserialnumber);
00192     
00193     const char*name( void);
00194     void name( const char *newname);
00195     
00196     const char*alternatelocation( void);
00197     void alternatelocation( const char *newalternatelocation);
00198     
00199     const char*residuename( void);
00200     void residuename( const char *newresiduename);
00201     
00202     const char*chain( void);
00203     void chain( const char *newchain);
00204     
00205     int residueseq( void);
00206     void residueseq( int newresidueseq);
00207     
00208     const char*insertioncode( void);
00209     void insertioncode( const char *newinsertioncode);
00210     
00211     BigReal xcoor( void);
00212     void xcoor( BigReal newxcoor);
00213     BigReal ycoor( void);
00214     void ycoor( BigReal newycoor); 
00215     BigReal zcoor( void);
00216     void zcoor( BigReal newzcoor);
00217     
00218     const BigReal *coordinates( void);
00219     void coordinates(const BigReal *newcoordinates);
00220     
00221     BigReal occupancy( void);
00222     void occupancy( BigReal newoccupancy);
00223 
00224     BigReal temperaturefactor( void);
00225     void temperaturefactor( BigReal newtemperaturefactor);
00226 
00227     int footnote( void);
00228     void footnote( int newfootnote);
00229     
00230       // this is not part of the PDB format but is used by XPLOR instead of
00231       // the chain identifier (see XPLOR 3.1 manual, p 104)
00232     const char*segmentname( void);
00233     void segmentname( const char *newsegmentname);
00234 
00235     const char* element( void);
00236     void element( const char *newelement);
00237 };
00238 
00239 // The two sub-classes of PDB Atom
00240 class PDBAtomRecord : public PDBAtom{
00241    public:
00242      PDBAtomRecord( const char *data ) :
00243           PDBAtom( data, PDBAtom::USE_ATOM) {
00244      }
00245      virtual ~PDBAtomRecord( void) {
00246      }
00247 };
00248 
00249 class PDBHetatm : public PDBAtom {
00250   public:
00251     PDBHetatm( const char *data) :
00252          PDBAtom( data, PDBAtom::USE_HETATM) {
00253     }
00254     virtual ~PDBHetatm( void) {
00255     }
00256 };
00257 
00258 
00259 #ifdef MEM_OPT_VERSION
00260 struct PDBCoreData{
00261     BigReal coor[3];              // X, Y, and Z orthogonal A coordinates
00262     BigReal myoccupancy;          // occupancy
00263     BigReal tempfactor;           // temperature factor
00264 
00265     //These functions are added for fewer changes in the src code when
00266     //pruning the PDBData
00267     BigReal occupancy() { return myoccupancy;   }
00268     BigReal xcoor() { return coor[0];  }
00269     BigReal ycoor() { return coor[1];  }
00270     BigReal zcoor() { return coor[2];  }
00271     BigReal temperaturefactor() { return tempfactor; }
00272 
00273     void  sprint( char *s, PDBData::PDBFormatStyle usestyle = PDBData::COLUMNS);// write to string
00274 };
00275 #endif
00276 
00278 // somehow I need the base class to figure out which derived class
00279 // to use to parse.   Since I don't know how to do that, I'll
00280 // fake it with this.  Give it a string and it will create the
00281 // correct PDB data type.
00282 PDBData *new_PDBData(const char *data);  // nasty
00283 
00284 
00285 #endif
00286 

Generated on Sat Sep 6 04:07:42 2008 for NAMD by  doxygen 1.3.9.1