Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

webpdbplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: webpdbplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.55 $       $Date: 2017/06/23 20:20:27 $
00015  *
00016  ***************************************************************************/
00017 
00018 #include <tcl.h>
00019 #include <stdio.h>
00020 #include <stdlib.h>
00021 #include <string.h>
00022 #include "molfile_plugin.h"
00023 #include "readpdb.h"
00024 #include "periodic_table.h"
00025 
00026 /*
00027  * Load pdb from the RCSB
00028  * Uses Tcl
00029  */
00030 
00031 /*
00032  * Need my own read_pdb_record because the one in readpdb takes a FILE*.
00033  * This one will be better anyway since I don't recopy the string ;-)
00034  * Read the given pdb string.  On returning, pos will point to the start of
00035  * the next read. 
00036  */ 
00037 static int my_read_pdb_record(const char *pdb, char **pos) {
00038   int recType = PDB_UNKNOWN;
00039   char *nlpos;  /* newline position */
00040 
00041   nlpos = strchr(pdb, '\n'); /* XXX segv occurs on x86_64 linux */
00042                              /* loading '1epw' or '1sft'        */ 
00043 
00044   if (!nlpos) {
00045     return PDB_EOF;
00046   } 
00047 
00048   /* set the next position to the first char after the newline */
00049   *pos = nlpos + 1;
00050 
00051   /* atom records are the most common */
00052   if (!strncmp(pdb, "ATOM ",  5) || !strncmp(pdb, "HETATM", 6)) {
00053     /* Note that by only comparing 5 chars for "ATOM " rather than 6,     */
00054     /* we allow PDB files containing > 99,999 atoms generated by AMBER    */
00055     /* to load which would otherwise fail.  Not needed for HETATM since   */
00056     /* those aren't going to show up in files produced for/by MD engines. */
00057     recType = PDB_ATOM;
00058   } else if (!strncmp(pdb, "REMARK", 6)) {
00059     recType = PDB_REMARK;
00060   } else if (!strncmp(pdb, "CRYST1", 6)) {
00061     recType = PDB_CRYST1;
00062   } else if (!strncmp(pdb, "HEADER", 6)) {
00063     recType = PDB_HEADER;
00064   } else if (!strncmp(pdb, "END", 3)) {  /* very permissive */
00065     /* XXX we treat any "ENDxxx" record as an end, to simplify testing */
00066     /*     since we don't remove trailing '\n' chars                   */
00067 
00068     /* the only two legal END records are "END   " and "ENDMDL" */
00069     recType = PDB_END;
00070   } 
00071 
00072   return recType;
00073 }
00074 
00075  
00076 typedef struct {
00077   char *pdbstr; 
00078   char *pos;
00079   int natoms;
00080   molfile_metadata_t *meta;
00081   int nconect;
00082   int nbonds, maxbnum;
00083   int *from, *to, *idxmap;
00084 } pdbdata;
00085 
00086 
00087 static void *pdb_read(char *pdbstr, int *natoms) {
00088   pdbdata *pdb;
00089   int indx, nconect;
00090   char *pos = pdbstr;
00091   char *next;
00092 
00093   if (!pdbstr) return NULL;
00094 
00095   pdb = (pdbdata *)malloc(sizeof(pdbdata));
00096   pdb->meta = (molfile_metadata_t *) malloc(sizeof(molfile_metadata_t));
00097   memset(pdb->meta, 0, sizeof(molfile_metadata_t));
00098 
00099   pdb->meta->remarklen = 0;
00100   pdb->meta->remarks = NULL;
00101 
00102   *natoms=0;
00103   nconect=0;
00104   do {
00105     indx = my_read_pdb_record(pos, &next);
00106     if (indx == PDB_ATOM) {
00107       *natoms += 1;
00108     } else if (indx == PDB_CONECT) {
00109       nconect++;
00110     } else if (indx == PDB_HEADER) {
00111       get_pdb_header(pos, pdb->meta->accession, pdb->meta->date, NULL);
00112       if (strlen(pdb->meta->accession) > 0)
00113         strcpy(pdb->meta->database, "PDB");
00114     } else if (indx == PDB_REMARK || indx == PDB_UNKNOWN) {
00115       int len = next - pos;
00116       int newlen = len + pdb->meta->remarklen;
00117 
00118       char *newstr=realloc(pdb->meta->remarks, newlen + 1);
00119       if (newstr != NULL) {
00120         pdb->meta->remarks = newstr;
00121         pdb->meta->remarks[pdb->meta->remarklen] = '\0';
00122         memcpy(pdb->meta->remarks + pdb->meta->remarklen, pos, len);
00123         pdb->meta->remarks[newlen] = '\0';
00124         pdb->meta->remarklen = newlen;
00125       }
00126     }
00127 
00128     pos = next;
00129   } while (indx != PDB_END && indx != PDB_EOF);
00130 
00131   pdb->pdbstr = pdbstr;
00132   pdb->pos =    pdbstr;
00133 
00134   pdb->natoms = *natoms;
00135   pdb->nconect = nconect;
00136   pdb->nbonds = 0;
00137   pdb->maxbnum = 0;
00138   pdb->from = NULL;
00139   pdb->to = NULL;
00140   pdb->idxmap = NULL;
00141 
00142 #if defined(VMDUSECONECTRECORDS)
00143   /* allocate atom index translation table if we have 99,999 atoms or less */
00144   /* and we have conect records to process                                 */
00145   if (pdb->natoms < 100000 && pdb->nconect > 0) {
00146     pdb->idxmap = (int *) malloc(100000 * sizeof(int));
00147     memset(pdb->idxmap, 0, 100000 * sizeof(int));
00148   }
00149 #endif
00150 
00151   return pdb;
00152 }
00153 
00154 static const char *rcsbmsg[] = {
00155   "  The PDB is supported by RCSB, the NSF, US PHS, NIH, NCRP, NIGMS, NLM,",
00156   "and US DoE, who are not liable for the data.  PDB files shall not be",
00157   "sold.  See ftp://ftp.rcsb.org/advisory.doc for full details."
00158 };
00159 
00160 static int show_msg = 1;
00161 
00162 static void *open_file_read(const char *filename, const char *filetype,
00163     int *natoms) {
00164 
00165   Tcl_Interp *interp;
00166   char url[300];
00167   char cmd[300]; 
00168   char *pdbfile;
00169   const char *result;
00170   void *v;
00171 
00172   /*
00173    * Create and initialize the interpreter
00174    */
00175   interp = Tcl_CreateInterp();
00176   if (!interp) {
00177     fprintf(stderr, "Could not create new Tcl Interp\n");
00178     return NULL; 
00179   }
00180   if (Tcl_Init(interp) != TCL_OK) {
00181     fprintf(stderr, "Warning, could not create initialize Tcl Interp\n");
00182   }
00183   if (!Tcl_PkgRequire(interp, (char *)"http", (char *)"2.0", 0)) {
00184     fprintf(stderr, "Could not load http package\n");
00185     Tcl_DeleteInterp(interp);
00186     return NULL;
00187   }
00188 
00189   if (strlen(filename) != 4) {
00190     fprintf(stderr, "PDB code %s is invalid; PDB accession codes have four letters.\n", filename);
00191     Tcl_DeleteInterp(interp);
00192     return NULL;
00193   }
00194 
00195   if (show_msg) {
00196     int i;
00197     show_msg = 0;
00198     for (i=0; i<3; i++) printf("%s\n", rcsbmsg[i]);
00199   }
00200 
00201 #if 1
00202   /* PDB website layout, changed on 6/23/2017                       */
00203   /* http://www.rcsb.org/pdb/static.do?p=download/http/index.html   */
00204   sprintf(url, "http://files.rcsb.org/download/%s.pdb", filename);
00205 #else
00206   /* PDB website layout, changed on 1/1/2006 */
00207   sprintf(url, "http://www.rcsb.org/pdb/downloadFile.do?fileFormat=pdb&compression=NO&structureId=%s", filename);
00208 #endif
00209   sprintf(cmd, "set token [::http::geturl \"%s\"]", url);
00210   if (Tcl_Eval(interp, cmd) != TCL_OK) {
00211     fprintf(stderr, "Error loading PDB: %s\n", Tcl_GetStringResult(interp));
00212     Tcl_DeleteInterp(interp);
00213     return NULL;
00214   } 
00215   sprintf(cmd, "upvar #0 $token state");
00216   Tcl_Eval(interp, cmd); 
00217   
00218   result = Tcl_GetVar2(interp, (char *)"state", "body", TCL_GLOBAL_ONLY); 
00219   if (!result) {
00220     fprintf(stderr, "Error loading PDB: %s\n", Tcl_GetStringResult(interp));
00221     Tcl_DeleteInterp(interp);
00222     return NULL;
00223   } 
00224   pdbfile = strdup(result);
00225   Tcl_DeleteInterp(interp);
00226 
00227   /* XXX this code needs updating still */
00228   /* pdbfile will be free'd by close_pdb() */
00229   v = pdb_read(pdbfile, natoms); 
00230   return v;
00231 }
00232    
00233 static int read_pdb_structure(void *mydata, int *optflags, 
00234     molfile_atom_t *atoms) {
00235 
00236   pdbdata *pdb = (pdbdata *)mydata;
00237   char *pos = pdb->pdbstr;
00238   char *next;
00239   int i, rectype, atomserial, pteidx;
00240   char ridstr[8];
00241   char elementsymbol[3];
00242   molfile_atom_t *atom;
00243   int badptecount = 0;
00244   elementsymbol[2]=0;
00245 
00246   *optflags = MOLFILE_INSERTION | MOLFILE_OCCUPANCY | MOLFILE_BFACTOR | 
00247               MOLFILE_ALTLOC | MOLFILE_ATOMICNUMBER | MOLFILE_BONDSSPECIAL;
00248 
00249   i=0; /* Count atoms */
00250   do {
00251     rectype = my_read_pdb_record(pos, &next);
00252     switch (rectype) {
00253     case PDB_ATOM:
00254       atom = atoms+i;
00255       get_pdb_fields(pos, next-pos, &atomserial,
00256           atom->name, atom->resname, atom->chain, atom->segid, 
00257           ridstr, atom->insertion, atom->altloc, elementsymbol,
00258           NULL, NULL, NULL, &atom->occupancy, &atom->bfactor);
00259 
00260       if (pdb->idxmap != NULL && atomserial < 100000) {
00261         pdb->idxmap[atomserial] = i; /* record new serial number translation */
00262       }
00263 
00264       atom->resid = atoi(ridstr);
00265 
00266       /* determine atomic number from the element symbol */
00267       pteidx = get_pte_idx_from_string(elementsymbol);
00268       atom->atomicnumber = pteidx;
00269       if (pteidx != 0) {
00270         atom->mass = get_pte_mass(pteidx);
00271         atom->radius = get_pte_vdw_radius(pteidx);
00272       } else {
00273         badptecount++; /* unrecognized element */
00274       }
00275 
00276       strcpy(atom->type, atom->name);
00277       i++;
00278       break;
00279 
00280     case PDB_CONECT:
00281       /* only read CONECT records for structures where we know they can */
00282       /* be valid for all of the atoms in the structure                 */
00283       if (pdb->idxmap != NULL) {
00284         char cbuf[PDB_BUFFER_LENGTH];
00285         int len = next-pos;
00286 
00287         if (len > PDB_BUFFER_LENGTH) 
00288           len = PDB_BUFFER_LENGTH;
00289         strncpy(cbuf, pos, len);
00290         get_pdb_conect(cbuf, pdb->natoms, pdb->idxmap,
00291                        &pdb->maxbnum, &pdb->nbonds, &pdb->from, &pdb->to);
00292       }
00293       break;
00294 
00295     default:
00296       /* other record types are ignored in the structure callback */
00297       /* and are dealt with in the timestep callback or elsewhere */
00298       break;
00299     }
00300     pos = next;
00301   } while (rectype != PDB_END && rectype != PDB_EOF);
00302 
00303   /* if all atoms are recognized, set the mass and radius flags too,  */
00304   /* otherwise let VMD guess these for itself using it's own methods  */
00305   if (badptecount == 0) {
00306     *optflags |= MOLFILE_MASS | MOLFILE_RADIUS;
00307   }
00308 
00309   return MOLFILE_SUCCESS;
00310 }
00311 
00312 
00313 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr, 
00314                       float **bondorder, int **bondtype, 
00315                       int *nbondtypes, char ***bondtypename) {
00316   pdbdata *pdb = (pdbdata *)v;
00317  
00318   *nbonds = 0;
00319   *fromptr = NULL;
00320   *toptr = NULL;
00321   *bondorder = NULL; /* PDB files don't have bond order information */
00322   *bondtype = NULL;
00323   *nbondtypes = 0;
00324   *bondtypename = NULL;
00325 
00326 /* The newest plugin API allows us to return CONECT records as
00327  * additional bonds above and beyond what the distance search returns.
00328  * Without that feature, we otherwise have to check completeness and
00329  * ignore them if they don't look to be fully specified for this molecule */
00330 #if !defined(MOLFILE_BONDSSPECIAL)
00331   if (pdb->natoms >= 100000) {
00332     printf("webpdbplugin) Warning: more than 99,999 atoms, ignored CONECT records\n");
00333     return MOLFILE_SUCCESS;
00334   } else if (((float) pdb->nconect / (float) pdb->natoms) <= 0.85) {
00335     printf("webpdbplugin) Warning: Probable incomplete bond structure specified,\n");
00336     printf("webpdbplugin)          ignoring CONECT records\n");
00337     return MOLFILE_SUCCESS;
00338   } else if (pdb->nconect == 0) {
00339     return MOLFILE_SUCCESS;
00340   }
00341 #endif
00342 
00343   *nbonds = pdb->nbonds;
00344   *fromptr = pdb->from;
00345   *toptr = pdb->to;
00346 
00347   return MOLFILE_SUCCESS;
00348 }
00349 
00350 
00351 static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) {
00352   pdbdata *pdb = (pdbdata *)v;
00353   char *pos = pdb->pos;
00354   char *next;
00355   float *x, *y, *z;
00356   float occup, bfac;
00357   int indx, i = 0;
00358 
00359   if (ts) {
00360     x = ts->coords;
00361     y = x+1;
00362     z = x+2;
00363   } else {
00364     x = y = z = 0;
00365   }
00366   do {
00367     indx = my_read_pdb_record(pos, &next);
00368     if((indx == PDB_END || indx == PDB_EOF) && (i < pdb->natoms)) {
00369       return MOLFILE_ERROR;
00370     } else if(indx == PDB_ATOM) {
00371       if(i++ >= pdb->natoms) {
00372         break;
00373       }
00374       /* just get the coordinates, and store them */
00375       if (ts) {
00376         get_pdb_coordinates(pos, x, y, z, &occup, &bfac);
00377         x += 3;
00378         y += 3;
00379         z += 3;
00380       }
00381     } else if (indx == PDB_CRYST1) {
00382       if (ts) {
00383         get_pdb_cryst1(pos, &ts->alpha, &ts->beta, &ts->gamma,
00384                                &ts->A, &ts->B, &ts->C);
00385       }
00386     }
00387     pos = next;
00388   } while(!(indx == PDB_END || indx == PDB_EOF));
00389   pdb->pos = pos;
00390 
00391   return MOLFILE_SUCCESS;
00392 }
00393 
00394 static void close_pdb_read(void *v) {
00395   pdbdata *pdb = (pdbdata *)v;
00396   if (!pdb) return;
00397   free(pdb->pdbstr);
00398   if (pdb->idxmap != NULL)
00399     free(pdb->idxmap);
00400   if (pdb->meta->remarks != NULL)
00401     free(pdb->meta->remarks);
00402   if (pdb->meta != NULL)
00403     free(pdb->meta);
00404   free(pdb);
00405 }
00406 
00407 
00408 static int read_molecule_metadata(void *v, molfile_metadata_t **metadata) {
00409   pdbdata *pdb = (pdbdata *)v;
00410   *metadata = pdb->meta;
00411   return MOLFILE_SUCCESS;
00412 }
00413 
00414 /* 
00415  * Registration stuff
00416  */
00417 
00418 static molfile_plugin_t plugin;
00419 
00420 VMDPLUGIN_API int VMDPLUGIN_init() {
00421   memset(&plugin, 0, sizeof(molfile_plugin_t));
00422   plugin.abiversion = vmdplugin_ABIVERSION;
00423   plugin.type = MOLFILE_PLUGIN_TYPE;
00424   plugin.name = "webpdb";
00425   plugin.prettyname = "Web PDB Download";
00426   plugin.author = "Justin Gullingsrud, John Stone";
00427   plugin.majorv = 1;
00428   plugin.minorv = 17;
00429   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
00430   plugin.filename_extension = "";
00431   plugin.open_file_read = open_file_read;
00432   plugin.read_structure = read_pdb_structure;
00433   plugin.read_bonds = read_bonds;
00434   plugin.read_next_timestep = read_next_timestep;
00435   plugin.close_file_read = close_pdb_read;
00436   plugin.read_molecule_metadata = read_molecule_metadata;
00437   return VMDPLUGIN_SUCCESS;
00438 }
00439 
00440 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00441   (*cb)(v, (vmdplugin_t *)&plugin);
00442   return VMDPLUGIN_SUCCESS;
00443 }
00444 
00445 VMDPLUGIN_API int VMDPLUGIN_fini() {
00446   return VMDPLUGIN_SUCCESS;
00447 }
00448 
00449 
00450 #ifdef TEST_WEBPDB_PLUGIN
00451 
00452 int main(int argc, char *argv[]) {
00453   char *file;
00454   if (argc < 2) {
00455     fprintf(stderr, "Usage: %s <pdbcode>\n", argv[0]);
00456     return -1;
00457   }
00458   file = (char *)open_file_read(argv[1], "webpdb",  NULL);
00459   printf("%s\n", file);
00460   free(file);
00461   return 0;
00462 }
00463 
00464 #endif

Generated on Tue Aug 11 03:06:44 2020 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002