Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

mdfplugin.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: mdfplugin.C,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.21 $       $Date: 2016/11/28 05:01:54 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * Molecular data file (.mdf) reader
00020  * Insight II, Discover, etc. structure and bond information. This plugin
00021  * reads only the topology section, ignoring the optional symmetry and
00022  * atomset sections.
00023  *
00024  * Format specification can be found at:
00025  * http://instinct.v24.uthscsa.edu/~hincklab/html/soft_packs/msi_docs/insight980/formats980/File_Formats_1998.html#484257
00026  *
00027  * TODO: The current code reads the file *four* times -- once on open, once
00028  * to read the structure, and twice to read the bonds. Perhaps these could
00029  * be consolidated, e.g. by counting the bonds and populating the hash
00030  * tables during open or read_structure.
00031  *
00032  */
00033 
00034 #include "molfile_plugin.h"
00035 
00036 #define THISPLUGIN plugin
00037 #include "vmdconio.h"
00038 
00039 #define VMDPLUGIN_STATIC
00040 #include "hash.h"
00041 
00042 #include <stdlib.h>
00043 #include <stdio.h>
00044 #include <string.h>
00045 #include <ctype.h>
00046 
00047 #if defined(_AIX)
00048 #include <strings.h>
00049 #endif
00050 
00051 #define LINESIZE 256
00052 #define NAMESIZE 32
00053 
00054 typedef struct {
00055   FILE *file;
00056   int natoms, nmols, *from, *to;
00057   long mol_data_location;
00058 } mdfdata;
00059 
00060 // Read a line of atom data and store the values in the atom structure
00061 // Return 1 on success, 0 on error
00062 static int read_mdf_structure_line(molfile_atom_t *atom, const char *line) {
00063   // Read pertinent structure information from the line
00064   if ( sscanf(line, "%[^:]:%s %s %*s %*s %*d %*s %f %*d %*d %*d %f",
00065               atom->resname, atom->name, atom->type, 
00066               &atom->charge, &atom->occupancy) != 5 ) {
00067     return 0;
00068   }
00069 
00070   // Get the resid from the resname
00071   if ( sscanf(atom->resname, "%*[^_]_%d", &atom->resid) != 1 ) {
00072     return 0;
00073   }
00074 
00075   // Provide defaults for missing values
00076   atom->chain[0] = '\0';
00077   atom->segid[0] = '\0';
00078 
00079   return 1;
00080 }
00081 
00082 // Read the atom info from src and copy the connectivity record to dest.
00083 // Convert each record to resname_resnumber:atom form
00084 // Return 1 on success, 0 on error
00085 static int get_mdf_bonds(char *dest, const char *src) {
00086   char resinfo[NAMESIZE], bond_records[LINESIZE], *curr, *next, *tmp;
00087   int retval;
00088 
00089   // Get the connectivity records
00090   retval = sscanf(src, "%[^:]:%*s %*s %*s %*s %*d %*s %*f %*d %*d %*d %*f %*f %255c", resinfo, bond_records);
00091   if (retval < 1) {
00092     // parse error.
00093     return -1;
00094     // no bonds for this atom
00095   } else if (retval == 1) return 0;
00096   
00097   // Append the bonds to the destination string, converting then to the
00098   // correct format along the way.
00099   dest[0] = '\0';
00100   for ( curr = bond_records; (next = strchr(curr, ' ')) != NULL;
00101         curr = next + 1 ) {
00102     *next = '\0';
00103 
00104     // Prepend the resname and resid to the destination atom name if it's
00105     // not already present.
00106     if ( strchr(curr, ':') == NULL ) {
00107       strcat(dest, resinfo);
00108       strcat(dest, ":");
00109     }
00110 
00111     // Remove cell/sympop/bondorder information from the bond
00112     if ( ((tmp = strchr(curr, '%')) != NULL) ||
00113          ((tmp = strchr(curr, '#')) != NULL) ||
00114          ((tmp = strchr(curr, '/')) != NULL) ||
00115          ((tmp = strchr(curr, '\n')) != NULL) ) {
00116       *tmp = '\0';
00117     }
00118     strcat(dest, curr);
00119     strcat(dest, " ");
00120   }
00121 
00122   return 1;
00123 }
00124 
00125 // Return the number of bond records on a line
00126 static int count_mdf_bonds(const char *line) {
00127   char bond_records[LINESIZE];
00128   int bonds = 0;
00129   char *tmp;
00130   
00131   // no bonds or parse error
00132   if ( get_mdf_bonds(bond_records, line) < 1) {
00133     return 0;
00134   }
00135   
00136   for ( tmp = bond_records; (tmp = strchr(tmp, ' ')) != NULL;
00137         tmp++ ) {
00138     bonds++;
00139   }
00140 
00141   return bonds;
00142 }
00143 
00144 // Open the file and create the mdf struct used to pass data to the other
00145 // functions.
00146 static void *open_mdf_read(const char *path, const char *filetype, 
00147     int *natoms) {
00148   FILE *fd;
00149   mdfdata *mdf;
00150   long mol_data_location;
00151   char line[LINESIZE]; 
00152   int nmols = 0;
00153 
00154   fd = fopen(path, "r");
00155   if (!fd)
00156     return NULL;
00157   
00158   // Find the first molecule record
00159   do {
00160     fgets(line, LINESIZE, fd);
00161     if ( ferror(fd) || feof(fd) ) {
00162       vmdcon_printf(VMDCON_ERROR, "mdfplugin) No molecule record found in file.\n");
00163       return NULL;
00164     }
00165   } while ( strncmp(line, "@molecule", 9) );
00166 
00167   // Remember the location of the beginning of the molecule data
00168   mol_data_location = ftell(fd);
00169 
00170   // Count the atoms in each molecule
00171   while ( line[0] != '#' ) {
00172     fgets(line, LINESIZE, fd);
00173 
00174     // Count atoms until a new molecule or the end of the section is reached
00175     while ( (line[0] != '@') && (line[0] != '#') ) {
00176       // Ignore blank and comment lines
00177       if ( !isspace(line[0]) && (line[0] != '!') )
00178         *natoms = *natoms + 1;
00179       fgets(line, LINESIZE, fd);
00180       if ( ferror(fd) || feof(fd) ) {
00181         vmdcon_printf(VMDCON_ERROR, "mdfplugin) Error while counting atoms.\n");
00182         return NULL;
00183       }
00184     }
00185     nmols++;
00186   }
00187 
00188   // Allocate and initialize the mdf structure
00189   vmdcon_printf(VMDCON_INFO, "mdfplugin) %d molecule records found in file.\n", nmols);
00190   mdf = new mdfdata;
00191   mdf->file = fd;
00192   mdf->natoms = *natoms;
00193   mdf->nmols = nmols;
00194   mdf->from = NULL;
00195   mdf->to = NULL;
00196   mdf->mol_data_location = mol_data_location; 
00197 
00198   return mdf;
00199 }
00200 
00201 // Read the atom information for each molecule, but not bonds.
00202 // XXX - this ignores the column records, which may cause the atom records
00203 // to be read incorrectly.
00204 static int read_mdf_structure(void *v, int *optflags, molfile_atom_t *atoms) {
00205   mdfdata *mdf = (mdfdata *)v;
00206   char line[LINESIZE];
00207   int mol_num;
00208   molfile_atom_t *atom = atoms;
00209 
00210   *optflags = MOLFILE_OCCUPANCY | MOLFILE_CHARGE;
00211 
00212   // Seek to the first molecule record
00213   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00214   line[0] = '\0';
00215 
00216   // Read the atom structure for each molecule
00217   mol_num = 0;
00218   while ( line[0] != '#' ) {
00219     fgets(line, LINESIZE, mdf->file);
00220 
00221     // Read atom structure for the current molecule
00222     while ( (line[0] != '@') && (line[0] != '#') ) {
00223       // Ignore blank and comment lines
00224       if ( !isspace(line[0]) && (line[0] != '!') ) {
00225         if ( !read_mdf_structure_line(atom, line) ) {
00226           vmdcon_printf(VMDCON_ERROR, "mdfplugin) Improperly formatted atom record encountered while reading structure.\n");
00227           return MOLFILE_ERROR;
00228         }
00229 
00230         // use the chain name to flag different molecules by using
00231         // characters A-Z. NOTE: chain can only hold 1 char.
00232         sprintf(atom->chain, "%c", ('A'+(mol_num % 26)));
00233 
00234         atom++;
00235       }
00236 
00237       fgets(line, LINESIZE, mdf->file);
00238       if ( ferror(mdf->file) || feof(mdf->file) ) {
00239         vmdcon_printf(VMDCON_ERROR, "mdfplugin) File error while reading structure.\n");
00240         return MOLFILE_ERROR;
00241       }
00242     }
00243     mol_num++;
00244   }
00245 
00246   return MOLFILE_SUCCESS;
00247 }
00248 
00249 // Create arrays of one-based bond indicies.
00250 static int read_mdf_bonds(void *v, int *nbonds, int **from_data, int **to_data, 
00251                           float **bondorderptr, int **bondtype, 
00252                           int *nbondtypes, char ***bondtypename) {
00253   mdfdata *mdf = (mdfdata *)v;
00254   int mol, atom, bond_count, *fromptr, *toptr, tmp_to;
00255   char *curr, *next, line[LINESIZE], bond_records[LINESIZE];
00256   char (*atomnames)[NAMESIZE]; // Dynamic array of cstrings
00257   hash_t *hasharray;           // Array of hash tables
00258 
00259   // Allocate and initialize the hash table for each molecule.
00260   hasharray = new hash_t[mdf->nmols];
00261   for (mol = 0; mol < mdf->nmols; mol++) {
00262     hash_init(&hasharray[mol], 256);
00263   }
00264   atomnames = new char[mdf->natoms][NAMESIZE];
00265 
00266   // Populate the hash table; key: atom name; value: one-based atom index.
00267   // Count the bonds, each bond is counted twice.
00268   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00269   line[0] = '\0';
00270   atom = 1;
00271   mol = 0;
00272   bond_count = 0;
00273   while ( line[0] != '#' ) {
00274     fgets(line, LINESIZE, mdf->file);
00275 
00276     // Read the atom names
00277     while ( (line[0] != '@') && (line[0] != '#') ) {
00278       // Ignore blank and comment lines
00279       if ( !isspace(line[0]) && (line[0] != '!') ) {
00280         if ( sscanf(line, "%s %*s", atomnames[atom-1]) != 1 ) {
00281           vmdcon_printf(VMDCON_ERROR, "mdfplugin) Improperly formatted atom record encountered while reading bonds.\n");
00282           return MOLFILE_ERROR;
00283         }
00284         if ( hash_insert(&hasharray[mol], atomnames[atom-1], atom) != HASH_FAIL ) {
00285           vmdcon_printf(VMDCON_ERROR, "mdfplugin) Could not add atom to hash table.\n");
00286           return MOLFILE_ERROR;
00287         }
00288 
00289         bond_count += count_mdf_bonds(line);
00290         atom++;
00291       }
00292 
00293       fgets(line, LINESIZE, mdf->file);
00294       if ( ferror(mdf->file) || feof(mdf->file) ) {
00295         vmdcon_printf(VMDCON_ERROR, "mdfplugin) File error while reading bonds.\n");
00296         return MOLFILE_ERROR;
00297       }
00298     }
00299 
00300     mol++;
00301   }
00302 
00303   bond_count /= 2;
00304   mdf->from = new int[bond_count];
00305   mdf->to = new int[bond_count];
00306   fromptr = mdf->from;
00307   toptr = mdf->to;
00308 
00309   // Read the molecules, storing the bond-indicies in fromptr and toprt
00310   fseek(mdf->file, mdf->mol_data_location, SEEK_SET);
00311   line[0] = '\0';
00312   atom = 1;
00313   mol = 0;
00314   while ( line[0] != '#' ) {
00315     fgets(line, LINESIZE, mdf->file);
00316 
00317     // Read the bonds
00318     while ( (line[0] != '@') && (line[0] != '#') ) {
00319       int retval;
00320 
00321       // Ignore blank and comment lines
00322       if ( !isspace(line[0]) && (line[0] != '!') ) {
00323         retval = get_mdf_bonds(bond_records, line);
00324         if (retval < 0) {
00325           vmdcon_printf(VMDCON_ERROR, "mdfplugin) Error reading bonds from atom data.\n");
00326           return MOLFILE_ERROR;
00327         }
00328 
00329         if (retval > 0) {
00330           // Read each bond in the line
00331           for ( curr = bond_records; (next = strchr(curr, ' ')) != NULL; 
00332                 curr = next+1 ) {
00333             *next = '\0';
00334             tmp_to = hash_lookup(&hasharray[mol], curr);
00335             if (tmp_to == HASH_FAIL) {
00336               vmdcon_printf(VMDCON_ERROR, "mdfplugin) Could not find atom '%s' in hash table.\n", curr);
00337               return MOLFILE_ERROR;
00338             }
00339             else if (tmp_to > atom) {
00340               // Only count bonds to atoms greater than the current one, since
00341               // each bond is listed twice
00342               *fromptr = atom;
00343               *toptr = tmp_to;
00344               fromptr++;
00345               toptr++;
00346             }
00347           }
00348         }
00349 
00350         atom++;
00351       }
00352 
00353       fgets(line, LINESIZE, mdf->file);
00354       if ( ferror(mdf->file) || feof(mdf->file) ) {
00355         vmdcon_printf(VMDCON_ERROR, "mdfplugin) File error while reading bonds.\n");
00356         return MOLFILE_ERROR;
00357       }
00358     }
00359 
00360     mol++;
00361   }
00362 
00363   for (mol = 0; mol < mdf->nmols; mol++) {
00364     hash_destroy(&hasharray[mol]);
00365   }
00366   delete [] hasharray;
00367   delete [] atomnames;
00368 
00369   *nbonds = bond_count;
00370   *from_data = mdf->from;
00371   *to_data = mdf->to;
00372   *bondorderptr = NULL; // not implemented yet
00373   *bondtype = NULL;
00374   *nbondtypes = 0;
00375   *bondtypename = NULL;
00376 
00377   return MOLFILE_SUCCESS;
00378 }
00379 
00380 // Free the memory used by the mdf structure
00381 static void close_mdf_read(void *v) {
00382   mdfdata *mdf = (mdfdata *)v;
00383   if (mdf) {
00384     if (mdf->file) fclose(mdf->file);
00385     if (mdf->from) delete [] mdf->from;
00386     if (mdf->to)   delete [] mdf->to;
00387     delete mdf;
00388   }
00389 }
00390 
00391 // Plugin Initialization
00392 
00393 VMDPLUGIN_API int VMDPLUGIN_init(void) { 
00394   memset(&plugin, 0, sizeof(molfile_plugin_t));
00395   plugin.abiversion = vmdplugin_ABIVERSION;
00396   plugin.type = MOLFILE_PLUGIN_TYPE;
00397   plugin.name = "mdf";
00398   plugin.prettyname = "InsightII MDF";
00399   plugin.author = "Eamon Caddigan, Axel Kohlmeyer";
00400   plugin.majorv = 0;
00401   plugin.minorv = 6;
00402   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
00403   plugin.filename_extension = "mdf";
00404   plugin.open_file_read = open_mdf_read;
00405   plugin.read_structure = read_mdf_structure;
00406   plugin.read_bonds = read_mdf_bonds;
00407   plugin.close_file_read = close_mdf_read;
00408   return VMDPLUGIN_SUCCESS;
00409 }
00410 
00411 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00412   (*cb)(v, (vmdplugin_t *)&plugin);
00413   return VMDPLUGIN_SUCCESS;
00414 }
00415 
00416 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
00417 

Generated on Tue Aug 11 03:06:36 2020 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002