Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

readpdb.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: readpdb.h,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.43 $       $Date: 2016/11/28 05:01:54 $
00015  *
00016  ***************************************************************************/
00017 
00018 #ifndef READ_PDB_H
00019 #define READ_PDB_H
00020 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 
00025 #define PDB_RECORD_LENGTH   80   /* actual record size */
00026 #define PDB_BUFFER_LENGTH   83   /* size need to buffer + CR, LF, and NUL */
00027 
00028 #define VMDUSECONECTRECORDS 1
00029 
00030 /*  record type defines */
00031 enum {
00032   PDB_HEADER, PDB_REMARK, PDB_ATOM, PDB_CONECT, PDB_UNKNOWN, PDB_END, PDB_EOF, PDB_CRYST1
00033 };
00034 
00035 /* read the next record from the specified pdb file, and put the string found
00036    in the given string pointer (the caller must provide adequate (81 chars)
00037    buffer space); return the type of record found
00038 */
00039 static int read_pdb_record(FILE *f, char *retStr) {
00040   int ch;
00041   char inbuf[PDB_BUFFER_LENGTH]; /* space for line + cr + lf + NUL */
00042   int recType = PDB_UNKNOWN;
00043  
00044   /* XXX This PDB record reading code breaks with files that use
00045    * Mac or DOS style line breaks with ctrl-M characters.  We need
00046    * to replace the use of fgets() and comparisons against \n with
00047    * code that properly handles the other cases.
00048    */
00049  
00050   /* read the next line, including any ending cr/lf char */
00051   if (inbuf != fgets(inbuf, PDB_RECORD_LENGTH + 2, f)) {
00052     retStr[0] = '\0';
00053     recType = PDB_EOF;
00054   } else {
00055 #if 0
00056     /* XXX disabled this code since \n chars are desirable in remarks */
00057     /* and to make the behavior consistent with webpdbplugin          */
00058 
00059     /* remove the newline character, if there is one */
00060     if (inbuf[strlen(inbuf)-1] == '\n')
00061       inbuf[strlen(inbuf)-1] = '\0';
00062 #endif
00063 
00064     /* atom records are the most common */
00065     if (!strncmp(inbuf, "ATOM ",  5) || !strncmp(inbuf, "HETATM", 6)) {
00066       /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
00067       /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
00068       /* to load which would otherwise fail.  Not needed for HETATM since   */
00069       /* those aren't going to show up in files produced for/by MD engines. */
00070       recType = PDB_ATOM;
00071     } else if (!strncmp(inbuf, "CONECT", 6)) {
00072       recType = PDB_CONECT;
00073     } else if (!strncmp(inbuf, "REMARK", 6)) {
00074       recType = PDB_REMARK;
00075     } else if (!strncmp(inbuf, "CRYST1", 6)) {
00076       recType = PDB_CRYST1;
00077     } else if (!strncmp(inbuf, "HEADER", 6)) {
00078       recType = PDB_HEADER;
00079     } else if (!strncmp(inbuf, "END", 3)) {  /* very permissive */
00080       /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
00081       /*     since we don't remove trailing '\n' chars                   */
00082 
00083       /* the only two legal END records are "END   " and "ENDMDL" */
00084       recType = PDB_END;
00085     } 
00086 
00087 #if 0
00088     /* XXX disable record type checking for now */
00089     if (recType == PDB_ATOM || 
00090         recType == PDB_CONECT || 
00091         recType == PDB_REMARK || 
00092         recType == PDB_HEADER || 
00093         recType == PDB_CRYST1) {
00094       strcpy(retStr, inbuf);
00095     } else {
00096       retStr[0] = '\0';
00097     }
00098 #else
00099     strcpy(retStr, inbuf);
00100 #endif
00101   }
00102 
00103   /* read the '\r', if there was one */
00104   ch = fgetc(f);
00105   if (ch != '\r')
00106     ungetc(ch, f);
00107   
00108   return recType;
00109 }
00110 
00111 
00112 /* Extract the alpha/beta/gamma a/b/c unit cell info from a CRYST1 record */
00113 static void get_pdb_cryst1(const char *record, 
00114                            float *alpha, float *beta, float *gamma, 
00115                            float *a, float *b, float *c) {
00116   char tmp[PDB_RECORD_LENGTH+3]; /* space for line + cr + lf + NUL */
00117   char ch, *s;
00118   memset(tmp, 0, sizeof(tmp));
00119   strncpy(tmp, record, PDB_RECORD_LENGTH);
00120 
00121   s = tmp+6 ;          ch = tmp[15]; tmp[15] = 0;
00122   *a = (float) atof(s);
00123   s = tmp+15; *s = ch; ch = tmp[24]; tmp[24] = 0;
00124   *b = (float) atof(s);
00125   s = tmp+24; *s = ch; ch = tmp[33]; tmp[33] = 0;
00126   *c = (float) atof(s);
00127   s = tmp+33; *s = ch; ch = tmp[40]; tmp[40] = 0;
00128   *alpha = (float) atof(s);
00129   s = tmp+40; *s = ch; ch = tmp[47]; tmp[47] = 0;
00130   *beta = (float) atof(s);
00131   s = tmp+47; *s = ch; ch = tmp[54]; tmp[54] = 0;
00132   *gamma = (float) atof(s);
00133 }
00134 
00135 
00136 /* Extract the x,y,z coords, occupancy, and beta from an ATOM record */
00137 static void get_pdb_coordinates(const char *record, 
00138                                 float *x, float *y, float *z,
00139                                 float *occup, float *beta) {
00140   char numstr[50]; /* store all fields in one array to save memset calls */
00141   memset(numstr, 0, sizeof(numstr));
00142 
00143   if (x != NULL) {
00144     strncpy(numstr, record + 30, 8);
00145     *x = (float) atof(numstr);
00146   }
00147 
00148   if (y != NULL) {
00149     strncpy(numstr+10, record + 38, 8);
00150     *y = (float) atof(numstr+10);
00151   }
00152 
00153   if (z != NULL) {
00154     strncpy(numstr+20, record + 46, 8);
00155     *z = (float) atof(numstr+20);
00156   }
00157 
00158   if (occup != NULL) {
00159     strncpy(numstr+30, record + 54, 6);
00160     *occup = (float) atof(numstr+30);
00161   }
00162 
00163   if (beta != NULL) {
00164     strncpy(numstr+40, record + 60, 6);
00165     *beta = (float) atof(numstr+40);
00166   }
00167 }
00168 
00169 
00170 /* remove leading and trailing spaces from PDB fields */
00171 static void adjust_pdb_field_string(char *field) {
00172   int i, len;
00173 
00174   len = strlen(field);
00175   while (len > 0 && field[len-1] == ' ') {
00176     field[len-1] = '\0';
00177     len--;
00178   }
00179 
00180   while (len > 0 && field[0] == ' ') {
00181     for (i=0; i < len; i++)
00182       field[i] = field[i+1];
00183     len--;
00184   }
00185 }
00186 
00187 static void get_pdb_header(const char *record, char *pdbcode, char *date,
00188                            char *classification) {
00189   if (date != NULL) {
00190     strncpy(date, record + 50, 9);
00191     date[9] = '\0';
00192   }
00193 
00194   if (classification != NULL) {
00195     strncpy(classification, record + 10, 40);
00196     classification[40] = '\0';
00197   }
00198 
00199   if (pdbcode != NULL) {
00200     strncpy(pdbcode, record + 62, 4);
00201     pdbcode[4] = '\0';
00202     adjust_pdb_field_string(pdbcode); /* remove spaces from accession code */
00203   }
00204 }
00205 
00206 
00207 static void get_pdb_conect(const char *record, int natoms, int *idxmap,
00208                            int *maxbnum, int *nbonds, int **from, int **to) {
00209   int bondto[11], numbonds, i;
00210 
00211   int reclen = strlen(record);
00212   for (numbonds=0, i=0; i<11; i++) {
00213     char bondstr[6];
00214     const int fieldwidth = 5;
00215     int start = 6 + i*fieldwidth;
00216     int end = start + fieldwidth;
00217 
00218     if (end >= reclen)
00219       break;
00220 
00221     memcpy(bondstr, record + start, fieldwidth);
00222     bondstr[5] = '\0';
00223     if (sscanf(bondstr, "%d", &bondto[numbonds]) < 0)
00224       break;
00225     numbonds++; 
00226   }
00227 
00228   for (i=0; i<numbonds; i++) {
00229     /* only add one bond per pair, PDBs list them redundantly */ 
00230     if (bondto[i] > bondto[0]) {
00231       int newnbonds = *nbonds + 1; /* add a new bond */
00232 
00233       /* allocate more bondlist space if necessary */
00234       if (newnbonds >= *maxbnum) {
00235         int newmax;
00236         int *newfromlist, *newtolist;
00237         newmax = (newnbonds + 11) * 1.25;
00238 
00239         newfromlist = (int *) realloc(*from, newmax * sizeof(int));
00240         newtolist = (int *) realloc(*to, newmax * sizeof(int));
00241 
00242         if (newfromlist != NULL || newtolist != NULL) {
00243           *maxbnum = newmax;
00244           *from = newfromlist;
00245           *to = newtolist;
00246         } else {
00247           printf("readpdb) failed to allocate memory for bondlists\n");
00248           return; /* abort */
00249         }
00250       }
00251 
00252       *nbonds = newnbonds;
00253       (*from)[newnbonds-1] = idxmap[bondto[0]] + 1;
00254       (*to)[newnbonds-1] = idxmap[bondto[i]] + 1;
00255     }
00256   }
00257 }
00258 
00259 /* ATOM field format according to PDB standard v2.2
00260   COLUMNS        DATA TYPE       FIELD         DEFINITION
00261 ---------------------------------------------------------------------------------
00262  1 -  6        Record name     "ATOM  "
00263  7 - 11        Integer         serial        Atom serial number.
00264 13 - 16        Atom            name          Atom name.
00265 17             Character       altLoc        Alternate location indicator.
00266 18 - 20        Residue name    resName       Residue name.
00267 22             Character       chainID       Chain identifier.
00268 23 - 26        Integer         resSeq        Residue sequence number.
00269 27             AChar           iCode         Code for insertion of residues.
00270 31 - 38        Real(8.3)       x             Orthogonal coordinates for X in Angstroms.
00271 39 - 46        Real(8.3)       y             Orthogonal coordinates for Y in Angstroms.
00272 47 - 54        Real(8.3)       z             Orthogonal coordinates for Z in Angstroms.
00273 55 - 60        Real(6.2)       occupancy     Occupancy.
00274 61 - 66        Real(6.2)       tempFactor    Temperature factor.
00275 73 - 76        LString(4)      segID         Segment identifier, left-justified.
00276 77 - 78        LString(2)      element       Element symbol, right-justified.
00277 79 - 80        LString(2)      charge        Charge on the atom.
00278  */
00279 
00280 /* Break a pdb ATOM record into its fields.  The user must provide the
00281    necessary space to store the atom name, residue name, and segment name.
00282    Character strings will be null-terminated.
00283 */
00284 static void get_pdb_fields(const char *record, int reclength, int *serial,
00285                            char *name, char *resname, char *chain, 
00286                            char *segname, char *resid, char *insertion, 
00287                            char *altloc, char *elementsymbol,
00288                            float *x, float *y, float *z, 
00289                            float *occup, float *beta) {
00290   char serialbuf[6];
00291 
00292   /* get atom serial number */
00293   strncpy(serialbuf, record + 6, 5);
00294   serialbuf[5] = '\0';
00295   *serial = 0;
00296   sscanf(serialbuf, "%5d", serial);
00297   
00298   /* get atom name */
00299   strncpy(name, record + 12, 4);
00300   name[4] = '\0';
00301   adjust_pdb_field_string(name); /* remove spaces from the name */
00302 
00303   /* get alternate location identifier */
00304   strncpy(altloc, record + 16, 1);
00305   altloc[1] = '\0';
00306 
00307   /* get residue name */
00308   strncpy(resname, record + 17, 4);
00309   resname[4] = '\0';
00310   adjust_pdb_field_string(resname); /* remove spaces from the resname */
00311 
00312   /* get chain name */
00313   chain[0] = record[21];
00314   chain[1] = '\0';
00315 
00316   /* get residue id number */
00317   strncpy(resid, record + 22, 4);
00318   resid[4] = '\0';
00319   adjust_pdb_field_string(resid); /* remove spaces from the resid */
00320 
00321   /* get the insertion code */
00322   insertion[0] = record[26];
00323   insertion[1] = '\0';
00324 
00325   /* get x, y, and z coordinates */
00326   get_pdb_coordinates(record, x, y, z, occup, beta);
00327 
00328   /* get segment name */
00329   if (reclength >= 73) {
00330     strncpy(segname, record + 72, 4);
00331     segname[4] = '\0';
00332     adjust_pdb_field_string(segname); /* remove spaces from the segname */
00333   } else {
00334     segname[0] = '\0';
00335   }
00336 
00337   /* get the atomic element symbol */
00338   if (reclength >= 77) {
00339     strncpy(elementsymbol, record + 76, 2);
00340     elementsymbol[2] = '\0';
00341   } else {
00342     elementsymbol[0] = '\0';
00343   }
00344 }  
00345 
00346 
00347 /* Write PDB data to given file descriptor; return success. */
00348 static int write_raw_pdb_record(FILE *fd, const char *recordname,
00349     int index,const char *atomname, const char *resname,int resid, 
00350     const char *insertion, const char *altloc, const char *elementsymbol,
00351     float x, float y, float z, float occ, float beta, 
00352     const char *chain, const char *segname) {
00353   int rc;
00354   char indexbuf[32];
00355   char residbuf[32];
00356   char segnamebuf[5];
00357   char resnamebuf[5];
00358   char altlocchar;
00359 
00360   /* XXX                                                          */
00361   /* if the atom or residue indices exceed the legal PDB spec, we */
00362   /* start emitting asterisks or hexadecimal strings rather than  */
00363   /* aborting.  This is not really legal, but is an accepted hack */
00364   /* among various other programs that deal with large PDB files  */
00365   /* If we run out of hexadecimal indices, then we just print     */
00366   /* asterisks.                                                   */
00367   if (index < 100000) {
00368     sprintf(indexbuf, "%5d", index);
00369   } else if (index < 1048576) {
00370     sprintf(indexbuf, "%05x", index);
00371   } else {
00372     sprintf(indexbuf, "*****");
00373   }
00374 
00375   if (resid < 10000) {
00376     sprintf(residbuf, "%4d", resid);
00377   } else if (resid < 65536) {
00378     sprintf(residbuf, "%04x", resid);
00379   } else { 
00380     sprintf(residbuf, "****");
00381   }
00382 
00383   altlocchar = altloc[0];
00384   if (altlocchar == '\0') {
00385     altlocchar = ' ';
00386   }
00387 
00388   /* make sure the segname or resname do not overflow the format */ 
00389   strncpy(segnamebuf,segname,4);
00390   segnamebuf[4] = '\0';
00391   strncpy(resnamebuf,resname,4);
00392   resnamebuf[4] = '\0';
00393 
00394  
00395   rc = fprintf(fd,
00396          "%-6s%5s %4s%c%-4s%c%4s%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %-4s%2s\n",
00397          recordname, indexbuf, atomname, altlocchar, resnamebuf, chain[0], 
00398          residbuf, insertion[0], x, y, z, occ, beta, segnamebuf, elementsymbol);
00399 
00400   return (rc > 0);
00401 }
00402 
00403 #endif

Generated on Tue Jan 21 02:56:22 2020 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002