Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

corplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: corplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.33 $       $Date: 2016/11/28 05:01:53 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * This plugin reads molecular coordinate data stored in 
00020  * CHARMM CARD Cartesian Coordinate format (ascii text format, not binary).
00021  *    http://www.charmm.org/
00022  *    http://www.charmm.org/documentation/c32b2/io.html       
00023  *
00024  * Normal format for less than 100000 atoms and PSF IDs with less than
00025  * five characters
00026  * TITLE (lines starting with "*")
00027  * NATOM (I5)
00028  * ATOMNO RESNO   RES  TYPE  X     Y     Z   SEGID RESID Weighting
00029  *   I5    I5  1X A4 1X A4 F10.5 F10.5 F10.5 1X A4 1X A4 F10.5
00030  *
00031  * Expanded format for more than 100000 atoms (upto 10**10) and with
00032  * upto 8 character PSF IDs. (versions c31a1 and later)
00033  * TITLE (lines starting with "*")
00034  * NATOM (I10)
00035  * ATOMNO RESNO   RES  TYPE  X     Y     Z   SEGID RESID Weighting
00036  *   I10   I10 2X A8 2X A8       3F20.10     2X A8 2X A8 F20.10
00037  *
00038  */
00039 
00040 #include "molfile_plugin.h"
00041 
00042 #include <stdio.h>
00043 #include <string.h>
00044 #include <stdlib.h>
00045 
00046 #define COR_RECORD_LENGTH   141 /* 80 */
00047 
00048 /* Remove leading and trailing whitespace from the string str of size n */
00049 static void strip_whitespace(char *str, int n) {
00050   char *beg, *end;
00051 
00052   beg = str;
00053   end = str + (n-2); /* Point to the last non-null character in the string */
00054 
00055   /* Remove leading whitespace */
00056   while(beg <= end && *beg == ' ') {
00057     beg++;
00058   }
00059 
00060   /* Remove trailing whitespace */
00061   while(end >= str && *end == ' ') {
00062     end--;
00063   }
00064 
00065   if (beg < end) {
00066     /* Shift the string */
00067     *(end+1) = '\0';
00068     memmove(str, beg, (end - beg + 2));
00069   } else {
00070     str[0] = '\0';
00071   }
00072 
00073   return;
00074 }
00075 
00076 /* Get a string from a stream, printing any errors that occur */
00077 static char *corgets(char *s, int n, FILE *stream) {
00078   char *returnVal;
00079 
00080   if (feof(stream)) {
00081     printf("corplugin) Unexpected end-of-file.\n");
00082     returnVal = NULL;
00083   } else if (ferror(stream)) {
00084     printf("corplugin) Error reading file.\n");
00085     return NULL;
00086   } else {
00087     returnVal = fgets(s, n, stream);
00088     if (returnVal == NULL) {
00089       printf("corplugin) Error reading line.\n");
00090     }
00091   }
00092 
00093   return returnVal;
00094 }
00095 
00096 
00097 /* Open the .cor file and skip past the remarks to the first data section.
00098  * Returns the file pointer, or NULL if error.  Also puts the number of
00099  * atoms in the molecule into the given integer.  
00100  */
00101 static FILE *open_cor_file(const char *fname, int *natom, int *iofoext) {
00102   char inbuf[COR_RECORD_LENGTH+2], header[11];
00103   FILE *f;
00104 
00105   *natom = MOLFILE_NUMATOMS_NONE;
00106 
00107   if (!fname) {
00108     printf("corplugin) Error opening file: no filename given.\n");
00109     return NULL;
00110   }
00111 
00112   if ((f = fopen(fname, "r")) == NULL) {
00113     printf("corplugin) Error opening file.\n");
00114     return NULL;
00115   }
00116 
00117   /* Read and discard the header */
00118   do {
00119     if (fgets(inbuf, COR_RECORD_LENGTH+1, f) == NULL) {
00120       fclose(f);
00121       printf("corplugin) Error opening file: cannot read line.\n");
00122       return NULL;
00123     }
00124 
00125     if (sscanf(inbuf, "%10c", header) != 1) {
00126       fclose(f);
00127       printf("corplugin) Error opening file: improperly formatted line.\n");
00128       return NULL;
00129     }
00130 
00131   } while (header[0]=='*');
00132 
00133   /* check for EXT keyword */
00134   *iofoext = 0 ;
00135   if (strstr(inbuf,"EXT") != NULL) 
00136     *iofoext = 1;
00137 
00138   /* check atom count */
00139   header[10] = '\0';
00140   *natom = atoi(header);
00141   if (*natom > 99999) 
00142     *iofoext = 1;
00143 
00144   if (*iofoext == 1)
00145     printf("corplugin) Using EXTended CHARMM coordinates file\n");
00146 
00147   return f;
00148 }
00149 
00150 /* Read in the next atom info into the given storage areas; this assumes
00151    that file has already been moved to the beginning of the atom records.
00152    Returns the serial number of the atom. If there is an error, returns -1.*/
00153 static int get_cor_atom(FILE *f, char *atomName, char *atomType, char
00154     *resName, char *segName, int *resId, int ioext) {
00155   char inbuf[COR_RECORD_LENGTH+2], numAtomStr[11], resNStr[11], resIdStr[11];
00156   char atomNameStr[11], segNameStr[11], resNameStr[11];
00157   int numAtom;
00158 
00159   memset(inbuf, 0, sizeof(inbuf));
00160 
00161   if (corgets(inbuf, COR_RECORD_LENGTH+1, f) == NULL) {
00162     return -1;
00163   }
00164 
00165   if (strlen(inbuf) < 60) {
00166     printf("corplugin) Line too short: \n%s\n", inbuf);
00167     return -1;
00168   }
00169 
00170   memset(numAtomStr, 0, sizeof(numAtomStr));
00171   memset(resNStr, 0, sizeof(resNStr));
00172   memset(resIdStr, 0, sizeof(resIdStr));
00173   memset(resNameStr, 0, sizeof(resNameStr));
00174   memset(segNameStr, 0, sizeof(segNameStr));
00175   memset(atomNameStr, 0, sizeof(atomNameStr));
00176 
00177   /*
00178 
00179     CHARMM has a variety of input formats for the
00180        read coor card
00181     command. In order to support all of them the simplest would be to replace 
00182 
00183     if (sscanf(inbuf, "%5c%5c%5c%5c%*10c%*10c%*10c%5c", 
00184 
00185     with
00186 
00187     if (sscanf(inbuf, "%s %s %s %s %*s %*s %*s %s", 
00188 
00189     However this solution has 2 problems:
00190      1. buffer overruns
00191      2. it doesn't handle the cases where X,Y,Z values have no spaces in-between
00192 
00193     In this fix we handle only two most important cases, depending on
00194      the value of IOFOrmat command (either EXTEnded or NOEXtended):
00195      EXT:    2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10
00196      NOEXT:  2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5
00197 
00198     This implementation introduces new flag iofoext in cordata
00199     structure, which can choose between the 2 formats. Note that
00200     CHARMM adds EXT keyword in the coordinate and psf files in the
00201     case of IOFO EXTE command in the input script, or automatically
00202     when there are 100,000 or more atoms in the system!
00203 
00204   */
00205 
00206   if (ioext == 1) {
00207     if (sscanf(inbuf, "%10c%10c%10c%10c%*20c%*20c%*20c%10c%10c",
00208                numAtomStr, resNStr, resNameStr, atomNameStr, segNameStr, resIdStr) != 6) {
00209       printf("corplugin) Improperly formatted line: \n%s\n", inbuf);
00210       return -1;
00211     }
00212     strip_whitespace(resName, sizeof(resName));  /* strip from long original */
00213     strip_whitespace(atomName, sizeof(atomName));
00214     strip_whitespace(segName, sizeof(segName));
00215     memcpy(resName, resNameStr, 7);              /* XXX truncate extra chars */
00216     memcpy(atomName, atomNameStr, 7);
00217     memcpy(segName, segNameStr, 7);
00218     resName[7] = '\0';                           /* NUL terminate strings.. */
00219     atomName[7] = '\0';
00220     segName[7] = '\0';
00221   } else {
00222     if (sscanf(inbuf, "%5c%5c%5c%5c%*10c%*10c%*10c%5c%5c",
00223                numAtomStr, resNStr, resName, atomName, segName, resIdStr) != 6) {
00224       printf("corplugin) Improperly formatted line: \n%s\n", inbuf);
00225       return -1;
00226     }
00227     strip_whitespace(resName, 8);
00228     strip_whitespace(atomName, 8);
00229     strip_whitespace(segName, 8);
00230   }
00231 
00232   numAtom = atoi(numAtomStr);
00233   *resId = atoi(resIdStr);
00234   strcpy(atomType, atomName);
00235 
00236   return numAtom;
00237 }
00238 
00239 
00240 /*
00241  * API functions
00242  */
00243 
00244 typedef struct {
00245   FILE *file;
00246   int numatoms;
00247   int iofoext;      /* flag for extended CHARMM c31 version support */
00248 } cordata;
00249 
00250 static void *open_cor_read(const char *path, const char *filetype, 
00251     int *natoms) {
00252   int ioext ;
00253   FILE *fd;
00254   cordata *cor;
00255 
00256   if (!(fd = open_cor_file(path, natoms, &ioext))) {
00257     return NULL;
00258   } 
00259   cor = (cordata *) malloc(sizeof(cordata));
00260   memset(cor, 0, sizeof(cordata));
00261   cor->numatoms = *natoms;
00262   cor->file = fd;
00263   cor->iofoext = ioext ;
00264   return cor;
00265 }
00266 
00267 static int read_cor_structure(void *v, int *optflags, molfile_atom_t *atoms) {
00268   cordata *data = (cordata *)v;
00269   int i;
00270   
00271   /* we don't read any optional data */
00272   *optflags = MOLFILE_NOOPTIONS;
00273 
00274   for (i=0; i<data->numatoms; i++) {
00275     molfile_atom_t *atom = atoms+i;
00276     if (get_cor_atom(data->file, atom->name, atom->type, 
00277                      atom->resname, atom->segid, 
00278                      &atom->resid, data->iofoext) < 0) {
00279       printf("corplugin) couldn't read atom %d\n", i);
00280       return MOLFILE_ERROR;
00281     }
00282     atom->chain[0] = atom->segid[0];
00283     atom->chain[1] = '\0';
00284   }
00285 
00286   rewind(data->file);
00287   return MOLFILE_SUCCESS;
00288 }
00289 
00290 static int read_cor_timestep(void *v, int natoms, molfile_timestep_t *ts) {
00291   cordata *cor = (cordata *)v;
00292   char inbuf[COR_RECORD_LENGTH+2], header[6];
00293   char xStr[21], yStr[21], zStr[21];
00294   int i;
00295 
00296   memset(inbuf, 0, sizeof(inbuf));
00297   memset(header, 0, sizeof(header));
00298   memset(xStr, 0, sizeof(xStr));
00299   memset(yStr, 0, sizeof(yStr));
00300   memset(zStr, 0, sizeof(zStr));
00301 
00302   /* Skip the header */
00303   do {
00304     /* Return -1 on EOF */
00305     if (feof(cor->file) || ferror(cor->file) || 
00306         (fgets(inbuf, COR_RECORD_LENGTH+1, cor->file) == NULL)) {
00307       return MOLFILE_ERROR;
00308     }
00309 
00310     if (sscanf(inbuf, " %5c", header) != 1) {
00311       printf("corplugin) Improperly formatted line.\n");
00312       return MOLFILE_ERROR;
00313     }
00314 
00315   } while (header[0]=='*');
00316 
00317 
00318   /* read the coordinates */
00319   for (i = 0; i < natoms; i++) {
00320     if (corgets(inbuf, COR_RECORD_LENGTH+1, cor->file) == NULL) {
00321       return MOLFILE_ERROR;
00322     }
00323     
00324     if (cor->iofoext == 1 ) {
00325       if (sscanf(inbuf, "%*10c%*10c%*10c%*10c%20c%20c%20c%*10c", 
00326                  xStr, yStr, zStr) != 3) {
00327         printf("corplugin) Error reading coordinates on line %d.\n%s\n", i, inbuf);
00328         return MOLFILE_ERROR;
00329       } else if (ts != NULL) {
00330         /* We have a timestep -- save the coordinates */
00331         ts->coords[3*i  ] = atof(xStr);
00332         ts->coords[3*i+1] = atof(yStr);
00333         ts->coords[3*i+2] = atof(zStr);
00334       }
00335     } else {
00336       if (sscanf(inbuf, "%*5c%*5c%*5c%*5c%10c%10c%10c%*5c", 
00337                  xStr, yStr, zStr) != 3) {
00338         printf("corplugin) Error reading coordinates on line %d.\n%s\n", i, inbuf);
00339         return MOLFILE_ERROR;
00340       } else if (ts != NULL) {
00341         /* We have a timestep -- save the coordinates */
00342         ts->coords[3*i  ] = atof(xStr);
00343         ts->coords[3*i+1] = atof(yStr);
00344         ts->coords[3*i+2] = atof(zStr);
00345       }
00346     }
00347   }
00348 
00349   return MOLFILE_SUCCESS;
00350 }
00351 
00352 static void close_cor_read(void *mydata) {
00353   cordata *data = (cordata *)mydata;
00354   if (data) {
00355     if (data->file) fclose(data->file);
00356     free(data);
00357   }
00358 }  
00359 
00360 /*
00361  * Initialization stuff down here
00362  */
00363 
00364 static molfile_plugin_t plugin;
00365 
00366 VMDPLUGIN_API int VMDPLUGIN_init() {
00367   memset(&plugin, 0, sizeof(molfile_plugin_t));
00368   plugin.abiversion = vmdplugin_ABIVERSION;
00369   plugin.type = MOLFILE_PLUGIN_TYPE;
00370   plugin.name = "cor";
00371   plugin.prettyname = "CHARMM Coordinates";
00372   plugin.author = "Eamon Caddigan, John Stone";
00373   plugin.majorv = 0;
00374   plugin.minorv = 9;
00375   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
00376   plugin.filename_extension = "cor";
00377   plugin.open_file_read = open_cor_read;
00378   plugin.read_structure = read_cor_structure;
00379   plugin.read_next_timestep = read_cor_timestep;
00380   plugin.close_file_read = close_cor_read;
00381   return VMDPLUGIN_SUCCESS;
00382 }
00383 
00384 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
00385   (*cb)(v, (vmdplugin_t *)&plugin);
00386   return VMDPLUGIN_SUCCESS;
00387 }
00388 
00389 VMDPLUGIN_API int VMDPLUGIN_fini() {
00390   return VMDPLUGIN_SUCCESS;
00391 }
00392 

Generated on Tue Dec 3 03:09:57 2024 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002