PDBData.h

Go to the documentation of this file.
00001 
00007 /*
00008    Read and parse a line of data from a PDB record.  There are many
00009    different types of PDB records.  This version reads only the ATOM and
00010    HETATM records and makes all fields accessible via the appropriate
00011    member function.  In NAMD, this will be called only by the PDB class,
00012    which reads PDB files, and a PDB writer class.
00013 */
00014 
00015 #ifndef _PDBREADER_H_
00016 #define _PDBREADER_H_
00017 
00018 // These are added to the global namespace
00019 //   the class PDBData
00020 //   the class PDBUnknown, derived from PDBData
00021 //   the class PDBAtom, derived from PDBData - contains ATOM and HETATM records
00022 //   the class PDBAtomRecord, derived from PDBAtom - contains only ATOM records
00023 //   the class PDBHetAtm, derived from PDBAtom - contains only HETATM records
00024 //   the function new_PDBData, which creates the right pdb class given a string
00025 
00026 #ifndef WIN32
00027 #include <strings.h>
00028 #endif
00029 #include "common.h"
00030 #include <string.h>
00031 
00032 
00033 class PDBData {  // at the basic level, a PDB record only knows its type
00034    public:
00035 
00036   // These data types come from the Protein Data Bank format
00037   // description located via anon. ftp to pdb.pdb.bnl.gov
00038   // in the file /pub/format.desc.ps
00039   //  In addition, I define an extra type, UNKNOWN.  If I can't
00040   // figure out what's going on, I store the complete string
00041   // and return it when asked.
00042    enum PDBType {HEADER, OBSLTE, COMPND, SOURCE, EXPDTA, AUTHOR,
00043      REVDAT, SPRSDE, JRNL, REMARK, SEQRES, FTNOTE, HET, FORMUL,
00044      HELIX, SHEET, TURN, SSBOND, SITE, CRYST1, ORIGX, SCALE,
00045      MTRIX, TVECT, MODEL, ATOM, HETATM, SIGATM, ANISOU, SIGUIJ,
00046      TER, ENDMDL, CONECT, MASTER, END, UNKNOWN};
00047 
00048      enum PDBFormatStyle { COLUMNS, FIELDS };  // used to specify if the
00049 
00050 
00051 
00052     static const char *PDBNames[UNKNOWN+1]; // string descriptors for each field
00053     
00054        // output should be based on columns (FORTRAN style) or
00055        // fields (C/ awk style).
00056  // See, there are two different types of formats that this program
00057  // understands, one is the basic PDB w/ or w/o the XLPOR extension - these
00058  // are the column based versions.  The other is my own
00059  // field based version - each data element is seperated by a blank
00060  // and, if the element is empty, a pound sign ('#') is put in its place.
00061  // This type of record is denoted by a '#' in the first non-blank
00062  // character (hence, it is the first non-blank character of the first
00063  // field.  Basically, I'm a unix/ C/ awk/ yacc ... freak, I like field
00064  // based data rather than column based data.
00065  
00066  
00067    private:
00068     PDBType mytype;
00069 
00070 #ifdef MEM_OPT_VERSION
00071 //for the sake of pruning PDBData
00072 public:
00073 #else
00074    protected:
00075 #endif
00076         // some parsing routines to get info from a line of text
00077     static void scan( const char *data, int len, int start, int size, 
00078                          int *ans, int defalt);
00079     static void scan( const char *data, int len, int start, int size,
00080                           BigReal *ans, BigReal defalt);
00081     static void scan( const char *data, int len, int start, int size,
00082                          char *ans);
00083     static void field( const char *data, int fld, char *result);
00084         // some routine to print to a specific column and width
00085     static void sprintcol( char *s, int start, int len, const char *val);
00086     static void sprintcol( char *s, int start, int len, int val);
00087     static void sprintcol( char *s, int start, int len, int prec, BigReal val);
00088     
00089    public:
00090      PDBData(PDBType newtype) {
00091        mytype = newtype;
00092      }
00093      virtual ~PDBData( void) {
00094      }
00095      PDBType type( void) {
00096        return mytype;
00097      }
00098                     // I know nothing, so I'll fake it and hope it works
00099      virtual void sprint( char *s, PDBFormatStyle usestyle = COLUMNS) {
00100        if (usestyle == COLUMNS)     // get rid of warning
00101          strcpy(s, "REMARK     (undefined remark - this is a bug)");
00102         else
00103          strcpy(s, "REMARK     (undefined remark - this is a bug)");
00104      }
00105 };
00106 
00108 class PDBUnknown : public PDBData {
00109   private:
00110     char *mystr;
00111   public:
00112    PDBUnknown(const char *data): PDBData(PDBData::UNKNOWN) {
00113      mystr = new char[strlen(data)+1];
00114      if ( mystr == NULL )
00115      {
00116        NAMD_die("memory allocation failed in PDBUnknown::PDBUnknown");
00117      }
00118      strcpy(mystr, data);
00119    }
00120    virtual ~PDBUnknown( void) {
00121      delete [] mystr;
00122    }  
00123    void sprint(char *s, PDBFormatStyle usestyle) {
00124      strcpy(s, mystr);
00125      if (usestyle == PDBData::COLUMNS)   // they are the same, but I won't
00126        strcpy( s, mystr);                //   get the stupid warning during
00127       else                               //   compilation
00128        strcpy( s, mystr);
00129    }
00130 };
00131 
00132 
00134 class PDBAtom : public PDBData {
00135 public:
00136     //extract them out from PDBAtom for the sake of pruning PDBData
00137       // starting location for each record element
00138     enum Start {STYPE=1,SSERIAL=7, SNAME=13, SALT=17, SRESNAME=18, SCHAIN=22, 
00139                 SRESSEQ=23, SINSERT=27, SX=31, SY=39, SZ=47,
00140                 SOCC=55, STEMPF=61, SFOOT=68, SSEGNAME=73, SELEMENT=77};
00141       // length of each element, the PREC is the number of digits
00142       // in the output after the decimal
00143 // NOTE: The PDB says the length of the residue name is only 3 characters
00144 //  whereas XPLOR allows 4 character names.  We choose 4 for compatability
00145 //  with both systems (since we never change the length, we you give us is
00146 //  what we use)
00147     enum Length {LTYPE=6, LSERIAL=5, LNAME=4, LALT=1, LRESNAME=4, LCHAIN=1, 
00148                  LRESSEQ=4, LINSERT=1, LCOOR=8,
00149                  LCOORPREC=3, LOCC=6, LOCCPREC=2, LTEMPF=6, 
00150                  LTEMPFPREC=2, LFOOT=3, LSEGNAME=4, LELEMENT=2};
00151 
00152   public:
00153     static const int default_serial;         // some default values
00154     static const int default_residueseq;     // these are set in the .C file
00155     static const BigReal default_coor;
00156     static const BigReal default_occupancy;
00157     static const BigReal default_temperaturefactor;
00158     static const int no_footnote;
00159 
00160   private:
00161     int myserialnumber;                 // atom serial number
00162     char myname[LNAME+1];               // atom name
00163     char myalternatelocation[LALT+1];   // alternamte location identifier
00164     char myresiduename[LNAME+1];        // residue name
00165     char mychain[LCHAIN+1];             // chain indentifier
00166     int myresidueseq;                   // residue seq. no.
00167     char myinsertioncode[LINSERT+1];    // code for insertions of residues
00168     BigReal mycoor[3];                     // X, Y, and Z orthogonal A coordinates
00169     BigReal myoccupancy;                   // occupancy
00170     BigReal mytemperaturefactor;           // temperature factor
00171     int myfootnote;                     // footnote number
00172     char mysegmentname[LSEGNAME+1];     // XPLOR-type segment name
00173     char myelement[LELEMENT+1];         // element
00174 
00175     void parse_field_data( const char *data);
00176     void parse_column_data( const char *data);
00177     void sprint_columns( char *outstr);
00178     void sprint_fields( char *outstr);
00179 
00180   protected:
00181     enum PDBPossibleAtoms {USE_ATOM = ATOM, USE_HETATM = HETATM};
00182     PDBAtom( const char *data,
00183            PDBPossibleAtoms whichatom);// parses a line from the PDB data file
00184     //PDBAtom( void);        // makes a generic atom
00185 
00186   public:
00187     PDBAtom( void);        // makes a generic atom
00188     virtual ~PDBAtom( void);
00189     void parse( const char *s);  // reset to new input values
00190     void  sprint( char *s, PDBFormatStyle usestyle = COLUMNS);// write to string
00191     int serialnumber( void);
00192     void serialnumber( int newserialnumber);
00193     
00194     const char*name( void);
00195     void name( const char *newname);
00196     
00197     const char*alternatelocation( void);
00198     void alternatelocation( const char *newalternatelocation);
00199     
00200     const char*residuename( void);
00201     void residuename( const char *newresiduename);
00202     
00203     const char*chain( void);
00204     void chain( const char *newchain);
00205     
00206     int residueseq( void);
00207     void residueseq( int newresidueseq);
00208     
00209     const char*insertioncode( void);
00210     void insertioncode( const char *newinsertioncode);
00211     
00212     BigReal xcoor( void);
00213     void xcoor( BigReal newxcoor);
00214     BigReal ycoor( void);
00215     void ycoor( BigReal newycoor); 
00216     BigReal zcoor( void);
00217     void zcoor( BigReal newzcoor);
00218     
00219     const BigReal *coordinates( void);
00220     void coordinates(const BigReal *newcoordinates);
00221     
00222     BigReal occupancy( void);
00223     void occupancy( BigReal newoccupancy);
00224 
00225     BigReal temperaturefactor( void);
00226     void temperaturefactor( BigReal newtemperaturefactor);
00227 
00228     int footnote( void);
00229     void footnote( int newfootnote);
00230     
00231       // this is not part of the PDB format but is used by XPLOR instead of
00232       // the chain identifier (see XPLOR 3.1 manual, p 104)
00233     const char*segmentname( void);
00234     void segmentname( const char *newsegmentname);
00235 
00236     const char* element( void);
00237     void element( const char *newelement);
00238 };
00239 
00240 // The two sub-classes of PDB Atom
00241 class PDBAtomRecord : public PDBAtom{
00242    public:
00243      PDBAtomRecord( const char *data ) :
00244           PDBAtom( data, PDBAtom::USE_ATOM) {
00245      }
00246      virtual ~PDBAtomRecord( void) {
00247      }
00248 };
00249 
00250 class PDBHetatm : public PDBAtom {
00251   public:
00252     PDBHetatm( const char *data) :
00253          PDBAtom( data, PDBAtom::USE_HETATM) {
00254     }
00255     virtual ~PDBHetatm( void) {
00256     }
00257 };
00258 
00259 
00260 #ifdef MEM_OPT_VERSION
00261 struct PDBCoreData{
00262     BigReal coor[3];              // X, Y, and Z orthogonal A coordinates
00263     BigReal myoccupancy;          // occupancy
00264     BigReal tempfactor;           // temperature factor
00265 
00266     //These functions are added for fewer changes in the src code when
00267     //pruning the PDBData
00268     BigReal occupancy() { return myoccupancy;   }
00269     BigReal xcoor() { return coor[0];  }
00270     BigReal ycoor() { return coor[1];  }
00271     BigReal zcoor() { return coor[2];  }
00272     BigReal temperaturefactor() { return tempfactor; }
00273 
00274     void  sprint( char *s, PDBData::PDBFormatStyle usestyle = PDBData::COLUMNS);// write to string
00275 };
00276 #endif
00277 
00279 // somehow I need the base class to figure out which derived class
00280 // to use to parse.   Since I don't know how to do that, I'll
00281 // fake it with this.  Give it a string and it will create the
00282 // correct PDB data type.
00283 PDBData *new_PDBData(const char *data);  // nasty
00284 
00285 
00286 #endif
00287 

Generated on Wed Nov 22 01:17:16 2017 for NAMD by  doxygen 1.4.7