Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

mol2plugin.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: mol2plugin.C,v $
00013  *      $Author: petefred $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.31 $       $Date: 2008/02/20 15:46:16 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * mol2 file reader
00020  * More information on this format is available at
00021  *   http://www.tripos.com/data/support/mol2.pdf
00022  *   http://www.tripos.com/mol2/
00023  *
00024  *   DOCK mol2 page: 
00025  *     http://www.csb.yale.edu/userguides/datamanip/dock/DOCK_4.0.1/html/Manual.41.html
00026  *
00027  * This plugin currently reads the following record types:
00028  *  MOLECULE
00029  *  ATOM
00030  *  BOND
00031  *
00032  */
00033 
00034 #include "molfile_plugin.h"
00035 
00036 #include <stdlib.h>
00037 #include <stdio.h>
00038 #include <string.h>
00039 
00040 #if defined(_AIX)
00041 #include <strings.h>
00042 #endif
00043 
00044 #define LINESIZE 256
00045 
00046 typedef struct {
00047   FILE *file;
00048   molfile_atom_t *atomlist;
00049   int natoms, nbonds, optflags, coords_read;
00050   int *from, *to;
00051   float *bondorder;
00052 } mol2data;
00053 
00054 // Open the file and create the mol2 struct used to pass data to the other
00055 // functions.
00056 static void *open_mol2_read(const char *path, const char *filetype, 
00057     int *natoms) {
00058   FILE *fd;
00059   mol2data *mol2;
00060   char line[LINESIZE]; 
00061   int match, nbonds, optflags;
00062 
00063   fd = fopen(path, "r");
00064   if (!fd)
00065     return NULL;
00066   
00067   // Find and read the MOLECULE record
00068   do {
00069     fgets(line, LINESIZE, fd);
00070     if ( ferror(fd) || feof(fd) ) {
00071       fprintf(stderr, "mol2plugin) No molecule record found in file.\n");
00072       return NULL;
00073     }
00074   } while ( strncmp(line, "@<TRIPOS>MOLECULE", 17) );
00075 
00076   fgets(line, LINESIZE, fd);  // Read and ignore the mol_name
00077   fgets(line, LINESIZE, fd);  // Read the molecule info
00078   match = sscanf(line, " %d %d", natoms, &nbonds);
00079   if (match == 1) {
00080     nbonds = 0;
00081   }
00082   else if (match != 2) {
00083     fprintf(stderr, "mol2plugin) Cannot determine the number of atoms.\n");
00084     return NULL;
00085   }
00086   fgets(line, LINESIZE, fd);  // Read and ignore the mol_type
00087   fgets(line, LINESIZE, fd);  // Read the charge_type
00088   if ( strncmp(line, "NO_CHARGES", 10) == 0 ) {
00089     optflags = MOLFILE_NOOPTIONS;
00090   }
00091   else {
00092     optflags = MOLFILE_CHARGE;
00093   }
00094 
00095   // Allocate and initialize the mol2 structure
00096   mol2 = new mol2data;
00097   mol2->file = fd;
00098   mol2->natoms = *natoms;
00099   mol2->nbonds = nbonds;
00100   mol2->optflags = optflags;
00101   mol2->coords_read = 0;
00102   mol2->from = NULL;
00103   mol2->to = NULL;
00104   mol2->bondorder = NULL;
00105 
00106   return mol2;
00107 }
00108 
00109 // Read atom information, but not coordinates.
00110 static int read_mol2(void *v, int *optflags, molfile_atom_t *atoms) {
00111   mol2data *mol2 = (mol2data *)v;
00112   char line[LINESIZE]; 
00113   int i, match;
00114   molfile_atom_t *atom;
00115 
00116   *optflags = mol2->optflags;
00117 
00118   // Find and read the ATOM record
00119   rewind(mol2->file);
00120   do {
00121     fgets(line, LINESIZE, mol2->file);
00122     if ( ferror(mol2->file) || feof(mol2->file) ) {
00123       fprintf(stderr, "mol2plugin) No atom record found in file.\n");
00124       return MOLFILE_ERROR;
00125     }
00126   } while ( strncmp(line, "@<TRIPOS>ATOM", 13) );
00127 
00128   // Read the atoms
00129   for (i = 0; i < mol2->natoms; i++) {
00130     atom = atoms+i;
00131 
00132     fgets(line, LINESIZE, mol2->file);
00133     if ( ferror(mol2->file) || feof(mol2->file) ) {
00134       fprintf(stderr, "mol2plugin) Error occurred reading atom record.\n");
00135       return MOLFILE_ERROR;
00136     }
00137 
00138     match = sscanf(line, " %*d %s %*f %*f %*f %s %d %s %f", 
00139       atom->name, atom->type, &atom->resid, atom->resname, &atom->charge);
00140 
00141     // The last three records are optional for mol2 files, supply values if
00142     // any are missing. Note that these cases are meant to fall through.
00143     switch (match) {
00144       case 0: 
00145         fprintf(stderr, "mol2plugin) Improperly formatted atom record.\n");
00146         return MOLFILE_ERROR;
00147 
00148       case 1:
00149         atom->resid = 0;
00150 
00151       case 2:
00152         sprintf(atom->resname, "%d", atom->resid);
00153 
00154       case 3:
00155         atom->charge = 0.0;
00156 
00157       default:
00158         break;
00159     }
00160 
00161     // Leave these blank when not provided by the file.
00162     atom->chain[0] = '\0';
00163     atom->segid[0] = '\0';
00164   }
00165 
00166   rewind(mol2->file);
00167 
00168   return MOLFILE_SUCCESS;
00169 }
00170 
00171 
00172 
00173 // Create arrays of one-based bond indicies.
00174 static int read_mol2_bonds(void *v, int *nbonds, int **fromptr, int **toptr, float **bondorderptr) {
00175   mol2data *mol2 = (mol2data *)v;
00176   char line[LINESIZE], bond_type[16]; 
00177   int i, match, bond_from, bond_to, bond_index, current_nbonds;
00178   float curr_order;
00179 
00180   if (mol2->nbonds == 0) {
00181     *nbonds = 0;
00182     *fromptr = NULL;
00183     *toptr = NULL;
00184     return MOLFILE_SUCCESS;
00185   }
00186 
00187   current_nbonds = mol2->nbonds;
00188 
00189   // Find and read the BOND record
00190   rewind(mol2->file);
00191   do {
00192     fgets(line, LINESIZE, mol2->file);
00193     if ( ferror(mol2->file) || feof(mol2->file) ) {
00194       fprintf(stderr, "mol2plugin) No atom record found in file.\n");
00195       return MOLFILE_ERROR;
00196     }
00197   } while ( strncmp(line, "@<TRIPOS>BOND", 13) );
00198 
00199   // Read the bonds
00200   bond_index = 0;
00201   for (i = 0; i < mol2->nbonds; i++) {
00202     fgets(line, LINESIZE, mol2->file);
00203     if ( ferror(mol2->file) || feof(mol2->file) ) {
00204       fprintf(stderr, "mol2plugin) Error occurred reading atom record.\n");
00205       return MOLFILE_ERROR;
00206     }
00207 
00208     //Move on if the next line is a header
00209     if (strncmp(line, "@", 1) == 0) {
00210       //Then the bonds are over
00211       break;
00212     }
00213 
00214     match = sscanf(line, " %*d %d %d %s", &bond_from, &bond_to, bond_type);
00215     if (match < 3) {
00216       fprintf(stderr, "mol2plugin) Improperly formatted bond record.\n");
00217       continue;
00218     }
00219     if ( strncmp(bond_type, "nc", 2) == 0 ) {
00220       // Not an actual bond, don't add it to the list
00221       current_nbonds--;
00222     } else if ( strncmp(bond_type, "ar", 2) == 0) {
00223        // Aromatic; consider it order 1.5
00224        curr_order = 1.5;
00225        mol2->from[bond_index] = bond_from;
00226        mol2->to[bond_index] = bond_to;
00227        mol2->bondorder[bond_index]=curr_order;
00228        bond_index++;
00229      } else {
00230       // Add the bond to the list
00231       curr_order=strtod(bond_type,NULL);
00232       if (curr_order<1.0 || curr_order>4.0) curr_order=1;
00233       //fprintf(stdout,"mol2plugin: Bond from %d to %d of order %f\n", bond_from, bond_to, curr_order);
00234       fflush(stdout);
00235       mol2->from[bond_index] = bond_from;
00236       mol2->to[bond_index] = bond_to;
00237       mol2->bondorder[bond_index]=curr_order;
00238       bond_index++;
00239     }
00240   }
00241   if (bond_index > 0) {
00242     *nbonds = current_nbonds;
00243     *fromptr = mol2->from;
00244     *toptr = mol2->to;
00245     *bondorderptr = mol2->bondorder; 
00246   } else {
00247     printf("mol2plugin) WARNING: no bonds defined in mol2 file\n");
00248     *nbonds = 0;
00249     *fromptr = NULL;
00250     *toptr = NULL;
00251     *bondorderptr = NULL; 
00252   }
00253     
00254   rewind(mol2->file);
00255   return MOLFILE_SUCCESS;
00256 }
00257 
00258 
00259 // Read atom coordinates
00260 static int read_mol2_timestep(void *v, int natoms, molfile_timestep_t *ts) {
00261   mol2data *mol2 = (mol2data *)v;
00262   char line[LINESIZE];
00263   int i, match;
00264   float x, y, z;
00265 
00266   // Find and read the ATOM record
00267   do {
00268     fgets(line, LINESIZE, mol2->file);
00269     if ( ferror(mol2->file) || feof(mol2->file) ) {
00270 
00271       //This is a problem, unless we've read at least one timestep
00272       if (mol2->coords_read == 0) {
00273         fprintf(stderr, "mol2plugin) No atom record found in file.\n");
00274       }
00275 
00276       return MOLFILE_ERROR;
00277     }
00278   } while ( strncmp(line, "@<TRIPOS>ATOM", 13) );
00279 
00280   // Read the atoms
00281   for (i = 0; i < mol2->natoms; i++) {
00282     fgets(line, LINESIZE, mol2->file);
00283     if ( ferror(mol2->file) || feof(mol2->file) ) {
00284       fprintf(stderr, "mol2plugin) Error occurred reading atom coordinates.\n");
00285       return MOLFILE_ERROR;
00286     }
00287 
00288 
00289     match = sscanf(line, " %*d %*s %f %f %f", &x, &y, &z);
00290     if (match < 3) {
00291       fprintf(stderr, "mol2plugin) Improperly formatted atom coordinates.\n");
00292       return MOLFILE_ERROR;
00293     }
00294 
00295     if (ts) {
00296       ts->coords[3*i  ] = x;
00297       ts->coords[3*i+1] = y;
00298       ts->coords[3*i+2] = z;
00299     }
00300   }
00301 
00302   mol2->coords_read = 1;
00303   return MOLFILE_SUCCESS;
00304 }
00305 
00306 
00307 static void *open_mol2_write(const char *filename, const char *filetype, 
00308                            int natoms) {
00309   FILE *fd;
00310   mol2data *data;
00311 
00312   fd = fopen(filename, "w");
00313   if (!fd) { 
00314     fprintf(stderr, "mol2plugin) Error: unable to open mol2 file %s for writing\n",
00315             filename);
00316     return NULL;
00317   }
00318   
00319   data = new mol2data;
00320   data->natoms = natoms;
00321   data->file = fd;
00322 //  data->file_name = strdup(filename);
00323   return data;
00324 }
00325 
00326 
00327 static int write_mol2_structure(void *mydata, int optflags, 
00328                                const molfile_atom_t *atoms) {
00329   mol2data *data = (mol2data *)mydata;
00330   data->atomlist = (molfile_atom_t *)malloc(data->natoms*sizeof(molfile_atom_t));
00331   memcpy(data->atomlist, atoms, data->natoms*sizeof(molfile_atom_t));
00332   return MOLFILE_SUCCESS;
00333 }
00334 
00335 /*
00336 void getmol2ff(char* outputtype, const char* psftype) {
00337 //fprintf(stdout,"Doing ff typing on %s\n",psftype);
00338   if (strncmp(psftype,"H",1)==0) {
00339     //It's a hydrogen
00340     strncpy(outputtype, "H   ",4);
00341     return;
00342   } else if (strncmp(psftype,"C",1)==0) {
00343     //It's a carbon... probably
00344     if (strncmp(psftype,"C ",2)==0 || strncmp(psftype,"CA ",3)==0 || strncmp(psftype,"CPH",3)==0 || strncmp(psftype,"CPT",3)==0 || strncmp(psftype,"CC ",3)==0 || strncmp(psftype,"CD ",3)==0 || strncmp(psftype,"CN1",3)==0 || strncmp(psftype,"CN2",3)==0 || strncmp(psftype,"CN3",3)==0 || strncmp(psftype,"CN4",3)==0 || strncmp(psftype,"CN5",3)==0 || strncmp(psftype,"CNA",3)==0) {
00345           strncpy(outputtype, "C.2 ",4);
00346           return;
00347     } else {
00348           strncpy(outputtype, "C.3 ",4);
00349           return;
00350     }  
00351   } else if (strncmp(psftype,"N",1)==0) {
00352      //It"s probably nitrogen
00353      if (strncmp(psftype,"NR",2)==0 || strncmp(psftype,"NH1",3)==0 || strncmp(psftype,"NH2",3)==0 || strncmp(psftype,"NC2",3)==0 || strncmp(psftype,"NY",2)==0 || (strncmp(psftype,"NN",2)==0 && strncmp(psftype,"NN6",3)!=0)) {
00354        strncpy(outputtype, "N.am",4);
00355        return;
00356        } else {
00357        strncpy(outputtype, "N.3 ",4);
00358        return;
00359        }
00360   } else if (strncmp(psftype,"O",1)==0) {
00361      //Probably an oxygen
00362      if (strncmp(psftype,"OH1",3)==0 || strncmp(psftype,"OS",2)==0 || strncmp(psftype,"OT ",3)==0 || strncmp(psftype,"ON4",3)==0 || strncmp(psftype,"ON5",3)==0 || strncmp(psftype,"ON6",3)==0) {
00363         strncpy(outputtype, "O.3 ",4);
00364         return;
00365      } else {
00366         strncpy(outputtype, "O.2 ",4);
00367         return;
00368      } 
00369   } else if (strncmp(psftype,"S",1)==0) {
00370      strncpy(outputtype, "S.3 ",4);
00371      return;
00372   } else if (strncmp(psftype,"P",1)==0) {
00373      strncpy(outputtype, "P.3 ",4);
00374      return;
00375   } else {
00376      strncpy(outputtype, "X.  ",4);
00377      return;
00378   }
00379 }
00380 */
00381 
00382 
00383 
00384 
00385 
00386 
00387 static int write_mol2_timestep(void *mydata, const molfile_timestep_t *ts) {
00388   mol2data *data = (mol2data *)mydata; 
00389   const molfile_atom_t *atom;
00390   const float *pos;
00391   float chrgsq;
00392   int i;
00393 
00394   // try to guess whether we have charge information.
00395   chrgsq=0.0;
00396   atom = data->atomlist;
00397   for (i = 0; i < data->natoms; i++) {
00398       chrgsq += atom->charge*atom->charge;
00399       ++atom;
00400   }
00401 
00402   //print header block
00403   fprintf(data->file, "@<TRIPOS>MOLECULE\n");
00404   fprintf(data->file, "generated by VMD\n");
00405   fprintf(data->file, " %4d %4d    1    0    0\n", data->natoms, data->nbonds);
00406   fprintf(data->file, "SMALL\n");
00407   // educated guess
00408   if (chrgsq > 0.0001) {
00409       fprintf(data->file, "USER_CHARGES\n");
00410   } else {
00411       fprintf(data->file, "NO_CHARGES\n");
00412   }
00413   fprintf(data->file, "****\n");
00414   fprintf(data->file, "Energy = 0\n\n");
00415   
00416   //print atoms block
00417   fprintf(data->file, "@<TRIPOS>ATOM\n");
00418   atom = data->atomlist;
00419   pos = ts->coords;
00420   //char mol2fftype[5];
00421   //mol2fftype[4] = '\0';
00422   for (i = 0; i < data->natoms; i++) {
00423     //getmol2ff(mol2fftype, atom->type);
00424 //    fprintf(data->file, "%7d %-4s      %8.4f  %8.4f  %8.4f %4s %4d  %3s        %8.6f\n", i+1, atom->name, pos[0], pos[1], pos[2], mol2fftype, atom->resid, atom->resname, atom->charge);
00425     fprintf(data->file, "%7d %-4s      %8.4f  %8.4f  %8.4f %4s %4d  %3s        %8.6f\n", i+1, atom->name, pos[0], pos[1], pos[2], atom->type, atom->resid, atom->resname, atom->charge);
00426     ++atom; 
00427     pos += 3;
00428   }
00429 
00430   //print bond info
00431 
00432   int l=1; //number of bond record
00433   printf("mol2plugin) numbonds: %d\n", data->nbonds);
00434   if (data->nbonds>0) fprintf(data->file, "@<TRIPOS>BOND\n");
00435   for (i=0; i<data->nbonds; i++) {
00436     // For mol2, only write bonds for fromptr[i]<toptr[i]
00437     // bondorder is either 1, 2, 3 or a textual representation: am,ar,du,un,nc
00438     // we don't have the info for the text, so we truncate to integer.
00439     fprintf(data->file, "%5d %5d %5d %2d\n", l ,data->from[i], data->to[i],
00440             (int)data->bondorder[i]);
00441     l++;
00442   } 
00443 
00444   // Print out substructure info to keep some programs sane
00445   fprintf(data->file,"\n@<TRIPOS>SUBSTRUCTURE\n");
00446   fprintf(data->file,"1 ****        1 TEMP                        ");
00447   fprintf(data->file,"0 ****  **** 0 ROOT\n");
00448 
00449   return MOLFILE_SUCCESS;
00450 }
00451 
00452 static int write_bonds(void *v, int nbonds, int *fromptr, int *toptr, float *bondorderptr) {
00453   mol2data *data = (mol2data *)v;
00454   data->from = (int *) malloc(nbonds * sizeof(int));
00455   data->to = (int *) malloc(nbonds * sizeof(int));
00456   data->bondorder = (float *) malloc(nbonds * sizeof(float));
00457   //set the pointers for use later
00458   for (int i=0;i<nbonds;i++) {
00459           data->from[i]=fromptr[i];
00460           data->to[i]=toptr[i];
00461           data->bondorder[i]=bondorderptr[i];
00462   }
00463   data->nbonds = nbonds;
00464   return MOLFILE_SUCCESS;
00465 }
00466 
00467 
00468 static void close_mol2_write(void *mydata) {
00469   mol2data *data = (mol2data *)mydata;
00470   if (data) {
00471     if (data->file) fclose(data->file);
00472     if (data->from != NULL) free(data->from);
00473     if (data->to != NULL)   free(data->to);
00474     if (data->bondorder != NULL)   free(data->bondorder);
00475     if (data->atomlist != NULL) free(data->atomlist);
00476     delete data;
00477   }
00478 }
00479 
00480 //
00481 // Free the memory used by the mol2 structure
00482 static void close_mol2_read(void *v) {
00483   mol2data *mol2 = (mol2data *)v;
00484   if (mol2) {
00485     if (mol2->file) fclose(mol2->file);
00486     if (mol2->from != NULL) free(mol2->from);
00487     if (mol2->to != NULL)   free(mol2->to);
00488     if (mol2->bondorder != NULL)   free(mol2->bondorder);
00489     delete mol2;
00490   }
00491 }
00492 
00493 
00494 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr, float **bondorderptr) {
00495   mol2data *mol2 = (mol2data *)v;
00496 
00497   /* now read bond data */
00498 //  *nbonds = start_psf_bonds(psf->fp);
00499 
00500   if (mol2->nbonds > 0) {
00501     mol2->from = (int *) malloc(mol2->nbonds*sizeof(int));
00502     mol2->to = (int *) malloc(mol2->nbonds*sizeof(int));
00503     mol2->bondorder = (float *) malloc(mol2->nbonds*sizeof(float));
00504     if (mol2->from == NULL || mol2->to == NULL || mol2->bondorder == NULL) {
00505       fprintf(stderr, "mol2plugin) ERROR: Failed to allocate memory for bonds\n");
00506       fclose(mol2->file);
00507       mol2->file = NULL;
00508       return MOLFILE_ERROR;
00509     }
00510     if ((read_mol2_bonds(mol2, nbonds, &(mol2->from), &(mol2->to), &(mol2->bondorder))) != MOLFILE_SUCCESS) {
00511       fclose(mol2->file);
00512       mol2->file = NULL;
00513       return MOLFILE_ERROR;
00514     }
00515     *fromptr = mol2->from;
00516     *toptr = mol2->to;
00517     *bondorderptr = mol2->bondorder; 
00518   } else {
00519     printf("mol2plugin) WARNING: no bonds defined in mol2 file.\n");
00520     *fromptr=NULL;
00521     *toptr=NULL;
00522     *bondorderptr=NULL;
00523   }
00524   return MOLFILE_SUCCESS;
00525 }
00526 
00527 static molfile_plugin_t plugin;
00528 
00529 VMDPLUGIN_API int VMDPLUGIN_init() {
00530   memset(&plugin, 0, sizeof(molfile_plugin_t));
00531   plugin.abiversion = vmdplugin_ABIVERSION;
00532   plugin.type = MOLFILE_PLUGIN_TYPE;
00533   plugin.name = "mol2";
00534   plugin.prettyname = "MDL mol2";
00535   plugin.author = "Peter Freddolino, Eamon Caddigan";
00536   plugin.majorv = 0;
00537   plugin.minorv = 12;
00538   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
00539   plugin.filename_extension = "mol2";
00540   plugin.open_file_read = open_mol2_read;
00541   plugin.read_structure = read_mol2;
00542   plugin.read_bonds = read_bonds;
00543   plugin.read_next_timestep = read_mol2_timestep;
00544   plugin.close_file_read = close_mol2_read;
00545   plugin.open_file_write = open_mol2_write;
00546   plugin.write_structure = write_mol2_structure;
00547   plugin.write_timestep = write_mol2_timestep;
00548   plugin.close_file_write = close_mol2_write;
00549   plugin.write_bonds = write_bonds;
00550   return VMDPLUGIN_SUCCESS;
00551 }
00552 
00553 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00554   (*cb)(v, (vmdplugin_t *)&plugin);
00555   return VMDPLUGIN_SUCCESS;
00556 }
00557 
00558 VMDPLUGIN_API int VMDPLUGIN_fini() {
00559   return VMDPLUGIN_SUCCESS;
00560 }
00561 

Generated on Mon Sep 8 01:39:41 2008 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002