Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

readpdb.h

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: readpdb.h,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.44 $       $Date: 2024/03/01 02:17:44 $
00015  *
00016  ***************************************************************************/
00017 
00018 #ifndef READ_PDB_H
00019 #define READ_PDB_H
00020 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 
00025 #define PDB_RECORD_LENGTH   80   /* actual record size */
00026 #define PDB_BUFFER_LENGTH   83   /* size need to buffer + CR, LF, and NUL */
00027 
00028 #define VMDUSECONECTRECORDS 1
00029 
00030 /*  record type defines */
00031 enum {
00032   PDB_HEADER, PDB_REMARK, PDB_ATOM, PDB_CONECT, PDB_UNKNOWN, PDB_END, PDB_EOF, PDB_CRYST1
00033 };
00034 
00035 /* read the next record from the specified pdb file, and put the string found
00036    in the given string pointer (the caller must provide adequate (81 chars)
00037    buffer space); return the type of record found
00038 */
00039 static int read_pdb_record(FILE *f, char *retStr) {
00040   int ch;
00041   char inbuf[PDB_BUFFER_LENGTH]; /* space for line + cr + lf + NUL */
00042   int recType = PDB_UNKNOWN;
00043  
00044   /* XXX This PDB record reading code breaks with files that use
00045    * Mac or DOS style line breaks with ctrl-M characters.  We need
00046    * to replace the use of fgets() and comparisons against \n with
00047    * code that properly handles the other cases.
00048    */
00049  
00050   /* read the next line, including any ending cr/lf char */
00051   if (inbuf != fgets(inbuf, PDB_RECORD_LENGTH + 2, f)) {
00052     retStr[0] = '\0';
00053     recType = PDB_EOF;
00054   } else {
00055 #if 0
00056     /* XXX disabled this code since \n chars are desirable in remarks */
00057     /* and to make the behavior consistent with webpdbplugin          */
00058 
00059     /* remove the newline character, if there is one */
00060     if (inbuf[strlen(inbuf)-1] == '\n')
00061       inbuf[strlen(inbuf)-1] = '\0';
00062 #endif
00063 
00064     /* atom records are the most common */
00065     if (!strncmp(inbuf, "ATOM ",  5) || !strncmp(inbuf, "HETATM", 6)) {
00066       /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
00067       /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
00068       /* to load which would otherwise fail.  Not needed for HETATM since   */
00069       /* those aren't going to show up in files produced for/by MD engines. */
00070       recType = PDB_ATOM;
00071     } else if (!strncmp(inbuf, "CONECT", 6)) {
00072       recType = PDB_CONECT;
00073     } else if (!strncmp(inbuf, "REMARK", 6)) {
00074       recType = PDB_REMARK;
00075     } else if (!strncmp(inbuf, "CRYST1", 6)) {
00076       recType = PDB_CRYST1;
00077     } else if (!strncmp(inbuf, "HEADER", 6)) {
00078       recType = PDB_HEADER;
00079     } else if (!strncmp(inbuf, "ENDBRANCH", 9)) {  /* PDBQT variant used by AutoDock */
00080       recType = PDB_UNKNOWN;
00081     } else if (!strncmp(inbuf, "ENDROOT", 7)) {    /* PDBQT variant used by AutoDock */
00082       recType = PDB_UNKNOWN;
00083     } else if (!strncmp(inbuf, "END", 3)) {  /* very permissive */
00084       /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
00085       /*     since we don't remove trailing '\n' chars                   */
00086 
00087       /* the only two legal END records are "END   " and "ENDMDL" */
00088       recType = PDB_END;
00089     } 
00090 
00091 #if 0
00092     /* XXX disable record type checking for now */
00093     if (recType == PDB_ATOM || 
00094         recType == PDB_CONECT || 
00095         recType == PDB_REMARK || 
00096         recType == PDB_HEADER || 
00097         recType == PDB_CRYST1) {
00098       strcpy(retStr, inbuf);
00099     } else {
00100       retStr[0] = '\0';
00101     }
00102 #else
00103     strcpy(retStr, inbuf);
00104 #endif
00105   }
00106 
00107   /* read the '\r', if there was one */
00108   ch = fgetc(f);
00109   if (ch != '\r')
00110     ungetc(ch, f);
00111   
00112   return recType;
00113 }
00114 
00115 
00116 /* Extract the alpha/beta/gamma a/b/c unit cell info from a CRYST1 record */
00117 static void get_pdb_cryst1(const char *record, 
00118                            float *alpha, float *beta, float *gamma, 
00119                            float *a, float *b, float *c) {
00120   char tmp[PDB_RECORD_LENGTH+3]; /* space for line + cr + lf + NUL */
00121   char ch, *s;
00122   memset(tmp, 0, sizeof(tmp));
00123   strncpy(tmp, record, PDB_RECORD_LENGTH);
00124 
00125   s = tmp+6 ;          ch = tmp[15]; tmp[15] = 0;
00126   *a = (float) atof(s);
00127   s = tmp+15; *s = ch; ch = tmp[24]; tmp[24] = 0;
00128   *b = (float) atof(s);
00129   s = tmp+24; *s = ch; ch = tmp[33]; tmp[33] = 0;
00130   *c = (float) atof(s);
00131   s = tmp+33; *s = ch; ch = tmp[40]; tmp[40] = 0;
00132   *alpha = (float) atof(s);
00133   s = tmp+40; *s = ch; ch = tmp[47]; tmp[47] = 0;
00134   *beta = (float) atof(s);
00135   s = tmp+47; *s = ch; ch = tmp[54]; tmp[54] = 0;
00136   *gamma = (float) atof(s);
00137 }
00138 
00139 
00140 /* Extract the x,y,z coords, occupancy, and beta from an ATOM record */
00141 static void get_pdb_coordinates(const char *record, 
00142                                 float *x, float *y, float *z,
00143                                 float *occup, float *beta) {
00144   char numstr[50]; /* store all fields in one array to save memset calls */
00145   memset(numstr, 0, sizeof(numstr));
00146 
00147   if (x != NULL) {
00148     strncpy(numstr, record + 30, 8);
00149     *x = (float) atof(numstr);
00150   }
00151 
00152   if (y != NULL) {
00153     strncpy(numstr+10, record + 38, 8);
00154     *y = (float) atof(numstr+10);
00155   }
00156 
00157   if (z != NULL) {
00158     strncpy(numstr+20, record + 46, 8);
00159     *z = (float) atof(numstr+20);
00160   }
00161 
00162   if (occup != NULL) {
00163     strncpy(numstr+30, record + 54, 6);
00164     *occup = (float) atof(numstr+30);
00165   }
00166 
00167   if (beta != NULL) {
00168     strncpy(numstr+40, record + 60, 6);
00169     *beta = (float) atof(numstr+40);
00170   }
00171 }
00172 
00173 
00174 /* remove leading and trailing spaces from PDB fields */
00175 static void adjust_pdb_field_string(char *field) {
00176   int i, len;
00177 
00178   len = strlen(field);
00179   while (len > 0 && field[len-1] == ' ') {
00180     field[len-1] = '\0';
00181     len--;
00182   }
00183 
00184   while (len > 0 && field[0] == ' ') {
00185     for (i=0; i < len; i++)
00186       field[i] = field[i+1];
00187     len--;
00188   }
00189 }
00190 
00191 static void get_pdb_header(const char *record, char *pdbcode, char *date,
00192                            char *classification) {
00193   if (date != NULL) {
00194     strncpy(date, record + 50, 9);
00195     date[9] = '\0';
00196   }
00197 
00198   if (classification != NULL) {
00199     strncpy(classification, record + 10, 40);
00200     classification[40] = '\0';
00201   }
00202 
00203   if (pdbcode != NULL) {
00204     strncpy(pdbcode, record + 62, 4);
00205     pdbcode[4] = '\0';
00206     adjust_pdb_field_string(pdbcode); /* remove spaces from accession code */
00207   }
00208 }
00209 
00210 
00211 static void get_pdb_conect(const char *record, int natoms, int *idxmap,
00212                            int *maxbnum, int *nbonds, int **from, int **to) {
00213   int bondto[11], numbonds, i;
00214 
00215   int reclen = strlen(record);
00216   for (numbonds=0, i=0; i<11; i++) {
00217     char bondstr[6];
00218     const int fieldwidth = 5;
00219     int start = 6 + i*fieldwidth;
00220     int end = start + fieldwidth;
00221 
00222     if (end >= reclen)
00223       break;
00224 
00225     memcpy(bondstr, record + start, fieldwidth);
00226     bondstr[5] = '\0';
00227     if (sscanf(bondstr, "%d", &bondto[numbonds]) < 0)
00228       break;
00229     numbonds++; 
00230   }
00231 
00232   for (i=0; i<numbonds; i++) {
00233     /* only add one bond per pair, PDBs list them redundantly */ 
00234     if (bondto[i] > bondto[0]) {
00235       int newnbonds = *nbonds + 1; /* add a new bond */
00236 
00237       /* allocate more bondlist space if necessary */
00238       if (newnbonds >= *maxbnum) {
00239         int newmax;
00240         int *newfromlist, *newtolist;
00241         newmax = (newnbonds + 11) * 1.25;
00242 
00243         newfromlist = (int *) realloc(*from, newmax * sizeof(int));
00244         newtolist = (int *) realloc(*to, newmax * sizeof(int));
00245 
00246         if (newfromlist != NULL || newtolist != NULL) {
00247           *maxbnum = newmax;
00248           *from = newfromlist;
00249           *to = newtolist;
00250         } else {
00251           printf("readpdb) failed to allocate memory for bondlists\n");
00252           return; /* abort */
00253         }
00254       }
00255 
00256       *nbonds = newnbonds;
00257       (*from)[newnbonds-1] = idxmap[bondto[0]] + 1;
00258       (*to)[newnbonds-1] = idxmap[bondto[i]] + 1;
00259     }
00260   }
00261 }
00262 
00263 /* ATOM field format according to PDB standard v2.2
00264   COLUMNS        DATA TYPE       FIELD         DEFINITION
00265 ---------------------------------------------------------------------------------
00266  1 -  6        Record name     "ATOM  "
00267  7 - 11        Integer         serial        Atom serial number.
00268 13 - 16        Atom            name          Atom name.
00269 17             Character       altLoc        Alternate location indicator.
00270 18 - 20        Residue name    resName       Residue name.
00271 22             Character       chainID       Chain identifier.
00272 23 - 26        Integer         resSeq        Residue sequence number.
00273 27             AChar           iCode         Code for insertion of residues.
00274 31 - 38        Real(8.3)       x             Orthogonal coordinates for X in Angstroms.
00275 39 - 46        Real(8.3)       y             Orthogonal coordinates for Y in Angstroms.
00276 47 - 54        Real(8.3)       z             Orthogonal coordinates for Z in Angstroms.
00277 55 - 60        Real(6.2)       occupancy     Occupancy.
00278 61 - 66        Real(6.2)       tempFactor    Temperature factor.
00279 73 - 76        LString(4)      segID         Segment identifier, left-justified.
00280 77 - 78        LString(2)      element       Element symbol, right-justified.
00281 79 - 80        LString(2)      charge        Charge on the atom.
00282  */
00283 
00284 /* Break a pdb ATOM record into its fields.  The user must provide the
00285    necessary space to store the atom name, residue name, and segment name.
00286    Character strings will be null-terminated.
00287 */
00288 static void get_pdb_fields(const char *record, int reclength, int *serial,
00289                            char *name, char *resname, char *chain, 
00290                            char *segname, char *resid, char *insertion, 
00291                            char *altloc, char *elementsymbol,
00292                            float *x, float *y, float *z, 
00293                            float *occup, float *beta) {
00294   char serialbuf[6];
00295 
00296   /* get atom serial number */
00297   strncpy(serialbuf, record + 6, 5);
00298   serialbuf[5] = '\0';
00299   *serial = 0;
00300   sscanf(serialbuf, "%5d", serial);
00301   
00302   /* get atom name */
00303   strncpy(name, record + 12, 4);
00304   name[4] = '\0';
00305   adjust_pdb_field_string(name); /* remove spaces from the name */
00306 
00307   /* get alternate location identifier */
00308   strncpy(altloc, record + 16, 1);
00309   altloc[1] = '\0';
00310 
00311   /* get residue name */
00312   strncpy(resname, record + 17, 4);
00313   resname[4] = '\0';
00314   adjust_pdb_field_string(resname); /* remove spaces from the resname */
00315 
00316   /* get chain name */
00317   chain[0] = record[21];
00318   chain[1] = '\0';
00319 
00320   /* get residue id number */
00321   strncpy(resid, record + 22, 4);
00322   resid[4] = '\0';
00323   adjust_pdb_field_string(resid); /* remove spaces from the resid */
00324 
00325   /* get the insertion code */
00326   insertion[0] = record[26];
00327   insertion[1] = '\0';
00328 
00329   /* get x, y, and z coordinates */
00330   get_pdb_coordinates(record, x, y, z, occup, beta);
00331 
00332   /* get segment name */
00333   if (reclength >= 73) {
00334     strncpy(segname, record + 72, 4);
00335     segname[4] = '\0';
00336     adjust_pdb_field_string(segname); /* remove spaces from the segname */
00337   } else {
00338     segname[0] = '\0';
00339   }
00340 
00341   /* get the atomic element symbol */
00342   if (reclength >= 77) {
00343     strncpy(elementsymbol, record + 76, 2);
00344     elementsymbol[2] = '\0';
00345   } else {
00346     elementsymbol[0] = '\0';
00347   }
00348 }  
00349 
00350 
00351 /* Write PDB data to given file descriptor; return success. */
00352 static int write_raw_pdb_record(FILE *fd, const char *recordname,
00353     int index,const char *atomname, const char *resname,int resid, 
00354     const char *insertion, const char *altloc, const char *elementsymbol,
00355     float x, float y, float z, float occ, float beta, 
00356     const char *chain, const char *segname) {
00357   int rc;
00358   char indexbuf[32];
00359   char residbuf[32];
00360   char segnamebuf[5];
00361   char resnamebuf[5];
00362   char altlocchar;
00363 
00364   /* XXX                                                          */
00365   /* if the atom or residue indices exceed the legal PDB spec, we */
00366   /* start emitting asterisks or hexadecimal strings rather than  */
00367   /* aborting.  This is not really legal, but is an accepted hack */
00368   /* among various other programs that deal with large PDB files  */
00369   /* If we run out of hexadecimal indices, then we just print     */
00370   /* asterisks.                                                   */
00371   if (index < 100000) {
00372     sprintf(indexbuf, "%5d", index);
00373   } else if (index < 1048576) {
00374     sprintf(indexbuf, "%05x", index);
00375   } else {
00376     sprintf(indexbuf, "*****");
00377   }
00378 
00379   if (resid < 10000) {
00380     sprintf(residbuf, "%4d", resid);
00381   } else if (resid < 65536) {
00382     sprintf(residbuf, "%04x", resid);
00383   } else { 
00384     sprintf(residbuf, "****");
00385   }
00386 
00387   altlocchar = altloc[0];
00388   if (altlocchar == '\0') {
00389     altlocchar = ' ';
00390   }
00391 
00392   /* make sure the segname or resname do not overflow the format */ 
00393   strncpy(segnamebuf,segname,4);
00394   segnamebuf[4] = '\0';
00395   strncpy(resnamebuf,resname,4);
00396   resnamebuf[4] = '\0';
00397 
00398  
00399   rc = fprintf(fd,
00400          "%-6s%5s %4s%c%-4s%c%4s%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %-4s%2s\n",
00401          recordname, indexbuf, atomname, altlocchar, resnamebuf, chain[0], 
00402          residbuf, insertion[0], x, y, z, occ, beta, segnamebuf, elementsymbol);
00403 
00404   return (rc > 0);
00405 }
00406 
00407 #endif

Generated on Thu Oct 10 03:07:16 2024 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002