Main Page   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

netcdfplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2006 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: netcdfplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.23 $       $Date: 2006/06/19 18:19:45 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * NetCDF based trajectories, used by AMBER 9, MMTK, etc.
00020  */
00021 
00022 #include <netcdf.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include <ctype.h>
00027 #include "molfile_plugin.h"
00028 
00029 #define CDF_TYPE_UNKNOWN 0
00030 #define CDF_TYPE_AMBER   1
00031 #define CDF_TYPE_MMTK    2
00032 
00033 #define CDF_SUCCESS      0
00034 #define CDF_ERR         -1
00035 
00036 typedef struct {
00037   int trajectorytype;
00038   int step_numberdimid;
00039   size_t step_numberdim;
00040   int minor_step_numberdimid;
00041   size_t minor_step_numberdim;
00042   int atom_numberdimid;
00043   size_t atom_numberdim;
00044   int xyzdimid;
00045   size_t xyzdim;
00046   int box_size_lengthdimid;
00047   size_t box_size_lengthdim;
00048   int description_lengthdimid;
00049   size_t description_lengthdim;
00050   char *description;
00051   int description_id;
00052   int step_id;
00053   int time_id;
00054   int box_size_id;
00055   int configuration_id;
00056   int has_box;
00057   char *comment;
00058 } mmtkdata;
00059 
00060 typedef struct {
00061   int has_box;
00062   int atomdimid;
00063   size_t atomdim;
00064   int spatialdimid;
00065   size_t spatialdim;
00066   int framedimid;
00067   size_t framedim;
00068   char *conventionversion;
00069   char *title;
00070   char *application;
00071   char *program;
00072   char *programversion;
00073   int spatial_id;                  /* "xyz" */
00074   int cell_spatial_id;             /* "abc" */
00075   int cell_angular_id;             /* "alpha, beta, gamma" */
00076   int time_id;                     /* frame time in picoseconds */
00077   int coordinates_id;              /* coords in angstroms */
00078   char *coordinates_units;         /* coordinates units */
00079   float coordinates_scalefactor;   /* coordinates scaling factor */
00080   int cell_lengths_id;             /* cell lengths in angstroms */
00081   char *cell_lengths_units;        /* cell lengths units */
00082   float cell_lengths_scalefactor;  /* cell lengths scaling factor */
00083   int cell_angles_id;              /* cell angles in degrees */
00084   char *cell_angles_units;         /* cell angles units */
00085   float cell_angles_scalefactor;   /* cell angles scaling factor */
00086   int velocities_id;               /* velocities in angstroms/picosecond */
00087 } amberdata;
00088 
00089 
00090 typedef struct {
00091   /* sub-format independent data */
00092   int ncid;
00093   int type;
00094   int natoms; 
00095   int curframe;
00096   char *conventions;
00097 
00098   /* stuff used by AMBER */
00099   amberdata amber;
00100 
00101   /* stuff used by MMTK */
00102   mmtkdata mmtk;
00103 
00104 } cdfdata;
00105 
00106 
00107 static void close_cdf_read(void *mydata) {
00108   cdfdata *cdf = (cdfdata *)mydata;
00109 
00110   nc_close(cdf->ncid);
00111 
00112   /* AMBER stuff */
00113   if (cdf->amber.title)
00114     free(cdf->amber.title);
00115 
00116   if (cdf->amber.application)
00117     free(cdf->amber.application);
00118 
00119   if (cdf->amber.program)
00120     free(cdf->amber.program);
00121 
00122   if (cdf->amber.programversion)
00123     free(cdf->amber.programversion);
00124 
00125   if (cdf->amber.conventionversion)
00126     free(cdf->amber.conventionversion);
00127 
00128   if (cdf->amber.coordinates_units)
00129     free(cdf->amber.coordinates_units);
00130 
00131   if (cdf->amber.cell_lengths_units)
00132     free(cdf->amber.cell_lengths_units);
00133 
00134   /* MMTK stuff */
00135   if (cdf->mmtk.comment)
00136     free(cdf->mmtk.comment);
00137 
00138   /* format independent stuff */
00139   if (cdf->conventions)
00140     free(cdf->conventions);
00141 
00142   free(cdf);
00143 }
00144 
00145 
00146 
00147 static int open_amber_cdf_read(cdfdata *cdf) {
00148   int rc;
00149   size_t len; 
00150   amberdata *amber = &cdf->amber;
00151 
00152   /* global attrib: "ConventionVersion" -- required */
00153   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "ConventionVersion", &len);
00154   if (rc == NC_NOERR && len > 0) {
00155     amber->conventionversion = (char *) malloc((len+1) * sizeof(char));
00156     nc_get_att_text(cdf->ncid, NC_GLOBAL, "ConventionVersion", amber->conventionversion);
00157     amber->conventionversion[len] = '\0';
00158     printf("netcdfplugin) trajectory follows AMBER conventions version '%s'\n", amber->conventionversion);
00159   } else {
00160     return CDF_ERR;
00161   }
00162 
00163   /* at this point we know that this is an AMBER trajectory */
00164   cdf->type = CDF_TYPE_AMBER;
00165 
00166   /* initialize default scaling factors so they are always set to a sane */
00167   /* value even if problems occur later */
00168   amber->coordinates_scalefactor = 1.0;
00169   amber->cell_lengths_scalefactor = 1.0;
00170   amber->cell_angles_scalefactor = 1.0;
00171 
00172 
00173   /* global attrib: "program" -- required */
00174   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "program", &len);
00175   if (rc == NC_NOERR && len > 0) {
00176     amber->program = (char *) malloc((len+1) * sizeof(char));
00177     nc_get_att_text(cdf->ncid, NC_GLOBAL, "program", amber->program);
00178     amber->program[len] = '\0';
00179     printf("netcdfplugin) AMBER: program '%s'\n", amber->program);
00180   } else {
00181     printf("netcdfplugin) AMBER: Missing required 'program' global attribute, corrupt file?\n");
00182   }
00183 
00184 
00185   /* global attrib: "programVersion" -- required */
00186   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "programVersion", &len);
00187   if (rc == NC_NOERR && len > 0) {
00188     amber->programversion = (char *) malloc((len+1) * sizeof(char));
00189     nc_get_att_text(cdf->ncid, NC_GLOBAL, "programVersion", amber->programversion);
00190     amber->programversion[len] = '\0';
00191     printf("netcdfplugin) AMBER: program version '%s'\n", amber->programversion);
00192   } else {
00193     printf("netcdfplugin) AMBER: Missing required 'programVersion' global attribute, corrupt file?\n");
00194   }
00195 
00196 
00197   /* global attrib: "title" -- optional */
00198   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "title", &len);
00199   if (rc == NC_NOERR && len > 0) {
00200     amber->title = (char *) malloc((len+1) * sizeof(char));
00201     nc_get_att_text(cdf->ncid, NC_GLOBAL, "title", amber->title);
00202     amber->title[len] = '\0';
00203     printf("netcdfplugin) AMBER: title '%s'\n", amber->title);
00204   } 
00205 
00206 
00207   /* global attrib: "application" -- optional */
00208   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "application", &len);
00209   if (rc == NC_NOERR && len > 0) {
00210     amber->application = (char *) malloc((len+1) * sizeof(char));
00211     nc_get_att_text(cdf->ncid, NC_GLOBAL, "application", amber->application);
00212     amber->application[len] = '\0';
00213     printf("netcdfplugin) AMBER: application '%s'\n", amber->application);
00214   } 
00215 
00216 
00217 /* XXX lots of additional error checking is needed below... */
00218 
00219   /* read in spatial dimension */
00220   rc = nc_inq_dimid(cdf->ncid, "spatial", &amber->spatialdimid);
00221   if (rc == NC_NOERR) {    
00222     rc = nc_inq_dimlen(cdf->ncid, amber->spatialdimid, &amber->spatialdim);
00223     if (rc == NC_NOERR) {
00224       printf("netcdfplugin) AMBER: spatial dimension: %ld\n", (long)amber->spatialdim);
00225     } else {
00226       printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
00227       printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
00228       amber->spatialdim = 3;
00229     }
00230   } else {
00231     printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
00232     printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
00233     amber->spatialdim = 3;
00234   }
00235  
00236   /* read in atom dimension */
00237   rc = nc_inq_dimid(cdf->ncid, "atom", &amber->atomdimid);
00238   if (rc == NC_NOERR) {    
00239     rc = nc_inq_dimlen(cdf->ncid, amber->atomdimid, &amber->atomdim);
00240     if (rc == NC_NOERR) {
00241       printf("netcdfplugin) AMBER: atom dimension: %ld\n", (long)amber->atomdim);
00242       cdf->natoms = amber->atomdim; /* copy to format independent part */
00243     } else  {
00244       printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
00245       return CDF_ERR;
00246     }
00247   } else {
00248     printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
00249     return CDF_ERR;
00250   }
00251  
00252 
00253   /* read in frame dimension */
00254   rc = nc_inq_dimid(cdf->ncid, "frame", &amber->framedimid);
00255   if (rc == NC_NOERR) {    
00256     rc = nc_inq_dimlen(cdf->ncid, amber->framedimid, &amber->framedim);
00257     if (rc == NC_NOERR) {
00258       printf("netcdfplugin) AMBER: frame dimension: %ld\n", (long)amber->framedim);
00259     } else {
00260       printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
00261       return CDF_ERR;
00262     }
00263   } else {
00264     printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
00265     return CDF_ERR;
00266   }
00267 
00268   /* 
00269    * get ID values for all of the variables we're interested in 
00270    */
00271 #if 0
00272   /* VMD can live without the various human readable label variables. */
00273   rc = nc_inq_varid(cdf->ncid, "spatial", &amber->spatial_id);
00274   if (rc != NC_NOERR)
00275     return CDF_ERR;
00276 
00277   rc = nc_inq_varid(cdf->ncid, "cell_spatial", &amber->cell_spatial_id);
00278   if (rc != NC_NOERR)
00279     return CDF_ERR;
00280 
00281   rc = nc_inq_varid(cdf->ncid, "cell_angular", &amber->cell_angular_id);
00282   if (rc != NC_NOERR)
00283     return CDF_ERR;
00284 #endif
00285 
00286   /* VMD requires coordinates at a minimum */
00287   rc = nc_inq_varid(cdf->ncid, "coordinates", &amber->coordinates_id);
00288   if (rc != NC_NOERR) {
00289     printf("netcdfplugin) AMBER: no coordinates variable, nothing to load\n");
00290     return CDF_ERR;
00291   }
00292 
00293   /* Coordinate units */
00294   rc = nc_inq_attlen(cdf->ncid, amber->coordinates_id, "units", &len);
00295   if (rc == NC_NOERR && len > 0) {
00296     amber->coordinates_units = (char *) malloc((len+1) * sizeof(char));
00297     nc_get_att_text(cdf->ncid, amber->coordinates_id, "units", amber->coordinates_units);
00298     amber->coordinates_units[len] = '\0';
00299     printf("netcdfplugin) AMBER: coordinates units: '%s'\n", amber->coordinates_units);
00300   } else {
00301     printf("netcdfplugin) AMBER: no coordinates units attribute, Angstroms assumed\n");
00302   }
00303 
00304   /* Coordinate scaling factor to get to Angstroms */
00305   rc = nc_get_att_float(cdf->ncid, amber->coordinates_id, "scale_factor", &amber->coordinates_scalefactor);
00306   if (rc == NC_NOERR) {
00307     printf("netcdfplugin) AMBER: coordinates scalefactor: %f\n", amber->coordinates_scalefactor);
00308   } else {
00309     amber->coordinates_scalefactor = 1.0;
00310   }
00311 
00312 #if 0
00313   /* we don't need velocities at this time */
00314   rc = nc_inq_varid(cdf->ncid, "velocities", &amber->velocities_id);
00315   if (rc != NC_NOERR) {
00316     printf("netcdfplugin) AMBER: missing velocities variable, aborting\n");
00317     return CDF_ERR;
00318   }
00319 #endif
00320 
00321   /* optional periodic cell info */
00322   rc = nc_inq_varid(cdf->ncid, "cell_lengths", &amber->cell_lengths_id);
00323   if (rc == NC_NOERR) {
00324     rc = nc_inq_varid(cdf->ncid, "cell_angles", &amber->cell_angles_id);
00325     if (rc == NC_NOERR) {
00326       printf("netcdfplugin) AMBER trajectory contains periodic cell information\n");
00327       amber->has_box = 1;
00328 
00329       /* Cell lengths units */
00330       rc = nc_inq_attlen(cdf->ncid, amber->cell_lengths_id, "units", &len);
00331       if (rc == NC_NOERR && len > 0) {
00332         amber->cell_lengths_units = (char *) malloc((len+1) * sizeof(char));
00333         nc_get_att_text(cdf->ncid, amber->cell_lengths_id, "units", amber->cell_lengths_units);
00334         amber->cell_lengths_units[len] = '\0';
00335         printf("netcdfplugin) AMBER: cell lengths units: '%s'\n", amber->cell_lengths_units);
00336       } else {
00337         printf("netcdfplugin) AMBER: no cell lengths units attribute, Angstroms assumed\n");
00338       }
00339 
00340       /* Cell lengths scaling factor to get to Angstroms */
00341       rc = nc_get_att_float(cdf->ncid, amber->cell_lengths_id, "scale_factor", &amber->cell_lengths_scalefactor);
00342       if (rc == NC_NOERR) {
00343         printf("netcdfplugin) AMBER: cell lengths scalefactor: %f\n", amber->cell_lengths_scalefactor);
00344       } else {
00345         amber->cell_lengths_scalefactor = 1.0;
00346       }
00347 
00348       /* Cell angles units */
00349       rc = nc_inq_attlen(cdf->ncid, amber->cell_angles_id, "units", &len);
00350       if (rc == NC_NOERR && len > 0) {
00351         amber->cell_angles_units = (char *) malloc((len+1) * sizeof(char));
00352         nc_get_att_text(cdf->ncid, amber->cell_angles_id, "units", amber->cell_angles_units);
00353         amber->cell_angles_units[len] = '\0';
00354         printf("netcdfplugin) AMBER: cell angles units: '%s'\n", amber->cell_angles_units);
00355       } else {
00356         printf("netcdfplugin) AMBER: no cell angles units attribute, Degrees assumed\n");
00357       }
00358 
00359       /* Cell angles scaling factor to get to degrees */
00360       rc = nc_get_att_float(cdf->ncid, amber->cell_angles_id, "scale_factor", &amber->cell_angles_scalefactor);
00361       if (rc == NC_NOERR) {
00362         printf("netcdfplugin) AMBER: cell angles scalefactor: %f\n", amber->cell_angles_scalefactor);
00363       } else {
00364         amber->cell_angles_scalefactor = 1.0;
00365       }
00366     }
00367   }
00368 
00369   return CDF_SUCCESS;
00370 }
00371 
00372 
00373 static int open_mmtk_cdf_read(cdfdata *cdf, int conventionsknown) {
00374   int rc;
00375   size_t len; 
00376   mmtkdata *mmtk = &cdf->mmtk;
00377 
00378   /* If conventions specify MMTK then we're safe to continue */
00379   /* and we know what we're dealing with */
00380   if (conventionsknown) {
00381     cdf->type = CDF_TYPE_MMTK;
00382   }
00383 
00384   /* global attrib: "trajectory_type" (new format) */
00385   nc_get_att_int(cdf->ncid, NC_GLOBAL, "trajectory_type", &mmtk->trajectorytype);
00386   if (rc == NC_NOERR) {
00387     printf("netcdfplugin) MMTK trajectory type: %d\n", mmtk->trajectorytype);
00388   } else {
00389     printf("netcdfplugin) Assuming MMTK trajectory type: %d\n", mmtk->trajectorytype);
00390     mmtk->trajectorytype = 0;
00391   }
00392 
00393   /* read in spatial dimension */
00394   rc = nc_inq_dimid(cdf->ncid, "xyz", &mmtk->xyzdimid);
00395   if (rc == NC_NOERR) {
00396     rc = nc_inq_dimlen(cdf->ncid, mmtk->xyzdimid, &mmtk->xyzdim);
00397     if (rc == NC_NOERR)
00398       printf("netcdfplugin) MMTK: xyz dimension: %ld\n", (long)mmtk->xyzdim);
00399     else 
00400       return CDF_ERR;
00401   } else {
00402     return CDF_ERR;
00403   }
00404 
00405   /* read in atom dimension */
00406   rc = nc_inq_dimid(cdf->ncid, "atom_number", &mmtk->atom_numberdimid); 
00407   if (rc == NC_NOERR) {
00408     rc = nc_inq_dimlen(cdf->ncid, mmtk->atom_numberdimid, &mmtk->atom_numberdim);
00409     if (rc == NC_NOERR) {
00410       printf("netcdfplugin) MMTK: atom_number dimension: %ld\n", (long)mmtk->atom_numberdim);
00411       cdf->natoms = mmtk->atom_numberdim; /* copy to format independent part */
00412     } else {
00413       return CDF_ERR;
00414     }
00415   } else {
00416     return CDF_ERR;
00417   }
00418 
00419 
00420   /* read in frame dimension */
00421   rc = nc_inq_dimid(cdf->ncid, "step_number", &mmtk->step_numberdimid);
00422   if (rc == NC_NOERR) {
00423     rc = nc_inq_dimlen(cdf->ncid, mmtk->step_numberdimid, &mmtk->step_numberdim);
00424     if (rc == NC_NOERR)
00425       printf("netcdfplugin) MMTK: step_number dimension: %ld\n", (long)mmtk->step_numberdim);
00426     else 
00427       return CDF_ERR;
00428   } else {
00429     return CDF_ERR;
00430   }
00431 
00432 
00433   /* read in minor step number dimension */
00434   rc = nc_inq_dimid(cdf->ncid, "minor_step_number", &mmtk->minor_step_numberdimid);
00435   if (rc == NC_NOERR) {
00436     rc = nc_inq_dimlen(cdf->ncid, mmtk->minor_step_numberdimid, &mmtk->minor_step_numberdim);
00437     if (rc == NC_NOERR)
00438       printf("netcdfplugin) MMTK: minor_step_number dimension: %ld\n", (long)mmtk->minor_step_numberdim);
00439     else 
00440       return CDF_ERR;
00441   } else if (rc == NC_EBADDIM) {
00442     printf("netcdfplugin) MMTK: no minor_step_number dimension\n");
00443     mmtk->minor_step_numberdim = 0;
00444   } else {
00445     return CDF_ERR;
00446   }
00447 
00448 
00449   /* read in description_length dimension */
00450   rc = nc_inq_dimid(cdf->ncid, "description_length", &mmtk->description_lengthdimid); 
00451   if (rc == NC_NOERR) {
00452     rc = nc_inq_dimlen(cdf->ncid, mmtk->description_lengthdimid, &mmtk->description_lengthdim);
00453     if (rc == NC_NOERR)
00454       printf("netcdfplugin) MMTK: description_length dimension: %ld\n", (long)mmtk->description_lengthdim);
00455     else
00456       return CDF_ERR;
00457   } else {
00458     return CDF_ERR;
00459   }
00460 
00461 
00462   /* get ID values for all of the variables we're interested in */
00463   rc = nc_inq_varid(cdf->ncid, "configuration", &mmtk->configuration_id);
00464   if (rc != NC_NOERR)
00465     return CDF_ERR;
00466 
00467   rc = nc_inq_varid(cdf->ncid, "description", &mmtk->description_id);
00468   if (rc != NC_NOERR)
00469     return CDF_ERR;
00470 
00471   /* check for PBC */
00472   rc = nc_inq_varid(cdf->ncid, "box_size", &mmtk->box_size_id);
00473   if (rc == NC_NOERR) {
00474     mmtk->has_box = 1;
00475     printf("netcdfplugin) MMTK: system has periodic boundary conditions\n");
00476   }
00477   else if (rc == NC_ENOTVAR)
00478     mmtk->has_box = 0;
00479   else
00480     return CDF_ERR;
00481 
00482 
00483   /* global attrib: "comment" -- optional */
00484   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "comment", &len);
00485   if (rc == NC_NOERR && len > 0) {
00486     mmtk->comment = (char *) malloc((len+1) * sizeof(char));
00487     nc_get_att_text(cdf->ncid, NC_GLOBAL, "comment", mmtk->comment);
00488     mmtk->comment[len] = '\0';
00489     printf("netcdfplugin) MMTK: comment '%s'\n", mmtk->comment);
00490   } 
00491 
00492   /* at this point we know that this is an MMTK trajectory */
00493   if (!conventionsknown) {
00494     printf("netcdfplugin) File is an old format MMTK trajectory without conventions\n");    
00495     cdf->type = CDF_TYPE_MMTK;
00496   }
00497 
00498   return CDF_SUCCESS;
00499 }
00500 
00501  
00502 static void *open_cdf_read(const char *filename, const char *filetype, 
00503                            int *natoms) {
00504   int ncid, rc;
00505   size_t len;
00506   cdfdata *cdf;
00507  
00508   rc = nc_open(filename, NC_NOWRITE, &ncid);
00509   if (rc != NC_NOERR) return NULL;
00510 
00511   cdf = (cdfdata *) malloc(sizeof(cdfdata));
00512   memset(cdf, 0, sizeof(cdfdata));
00513 
00514   cdf->ncid = ncid;
00515   cdf->type = CDF_TYPE_UNKNOWN;
00516 
00517   /* Determine what NetCDF conventions apply to this data, if any */
00518   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "Conventions", &len);
00519   if (rc == NC_NOERR && len > 0) {
00520     cdf->conventions = (char *) malloc((len+1) * sizeof(char));
00521     nc_get_att_text(cdf->ncid, NC_GLOBAL, "Conventions", cdf->conventions);
00522     cdf->conventions[len] = '\0';
00523     printf("netcdfplugin) conventions: '%s'\n", cdf->conventions);
00524   } 
00525 
00526   if (cdf->conventions != NULL) {
00527     /* Check if this is a file generated by AMBER */
00528     if (strstr(cdf->conventions, "AMBER") != NULL) {
00529       if (!open_amber_cdf_read(cdf)) {
00530         *natoms = cdf->natoms;
00531         return cdf;
00532       }
00533     } 
00534 
00535     /* Check if this is a file generated by MMTK */
00536     if (strstr(cdf->conventions, "MMTK") != NULL) {
00537       if (!open_mmtk_cdf_read(cdf, 1)) {
00538         *natoms = cdf->natoms;
00539         return cdf;
00540       }
00541     } 
00542   } 
00543 
00544   printf("netcdfplugin) Missing or unrecognized conventions attribute\n");
00545   printf("netcdfplugin) checking for old format MMTK NetCDF file...\n");
00546 
00547   /* If no conventions are specified, then maybe it's from MMTK */
00548   if (!open_mmtk_cdf_read(cdf, 0)) {
00549     *natoms = cdf->natoms;
00550     return cdf;
00551   } 
00552 
00553   /* if no conventions are recognized, then we free everything */
00554   /* and return failure                                        */
00555   close_cdf_read(cdf);
00556 
00557   return NULL; 
00558 }
00559 
00560 /* A very basic bracket counter. It assumes that the expression
00561    is syntactically correct. */
00562 static char *find_closing_bracket(char *s) {
00563   int count = 1;
00564   while (*s && count > 0) {
00565     if (*s == '(' || *s == '[')
00566       count++;
00567     if (*s == ')' || *s == ']')
00568       count--;
00569     s++;
00570   }
00571   return s;
00572 }
00573 
00574 /* Simple string replacement routine for fixing atom names. */
00575 static void atom_name_replace(char *name, char *substring, char letter) {
00576   char *s = strstr(name, substring);
00577   if (s != NULL) {
00578     *s = letter;
00579     strcpy(s+1, s+strlen(substring));
00580   }
00581 }
00582 
00583 static void atom_name_remove_underscores(char *name) {
00584   char *s = name;
00585   while (1) {
00586     s = strchr(s, '_');
00587     if (s == NULL)
00588       break;
00589     strcpy(s, s+1);
00590   }
00591 }
00592 
00593 /* Set chainid, resname, and resnum for a range of atoms
00594    and fix atom names. */
00595 static void set_atom_attributes(molfile_atom_t *atoms, int natoms,
00596                                 char **atom_pointers, char chain_id,
00597                                 char *resname, int resnum,
00598                                 char *start, char *end,
00599                                 int name_correction_type) {
00600   int i;
00601   for (i=0; i<natoms; i++)
00602     if (atom_pointers[i] > start && atom_pointers[i] < end) {
00603       molfile_atom_t *atom = atoms + i;
00604       atom->chain[0] = chain_id;      
00605       atom->chain[1] = '\0';      
00606       strcpy(atom->resname, resname);
00607       atom->resid = resnum;
00608       if (name_correction_type == 1 /* proteins */) {
00609         atom_name_replace(atom->name, "_alpha", 'A');
00610         atom_name_replace(atom->name, "_beta", 'B');
00611         atom_name_replace(atom->name, "_gamma", 'G');
00612         atom_name_replace(atom->name, "_delta", 'D');
00613         atom_name_replace(atom->name, "_epsilon", 'E');
00614         atom_name_replace(atom->name, "_zeta", 'Z');
00615         atom_name_replace(atom->name, "_eta", 'H');
00616         atom_name_remove_underscores(atom->name);
00617       }
00618       else if (name_correction_type == 2 /* nucleic acids */) {
00619         if (strcmp(atom->name, "O_1") == 0)
00620           strcpy(atom->name, "O1P");
00621         else if (strcmp(atom->name, "O_2") == 0)
00622           strcpy(atom->name, "O2P");
00623         else if (strcmp(atom->name, "C_1") == 0)
00624           strcpy(atom->name, "C1'");
00625         else if (strcmp(atom->name, "C_2") == 0)
00626           strcpy(atom->name, "C2'");
00627         else if (strcmp(atom->name, "C_3") == 0)
00628           strcpy(atom->name, "C3'");
00629         else if (strcmp(atom->name, "O_3") == 0)
00630           strcpy(atom->name, "O3'");
00631         else if (strcmp(atom->name, "C_4") == 0)
00632           strcpy(atom->name, "C4'");
00633         else if (strcmp(atom->name, "O_4") == 0)
00634           strcpy(atom->name, "O4'");
00635         else if (strcmp(atom->name, "C_5") == 0)
00636           strcpy(atom->name, "C5'");
00637         else if (strcmp(atom->name, "O_5") == 0)
00638           strcpy(atom->name, "O5'");
00639         else
00640           atom_name_remove_underscores(atom->name);
00641       }
00642     }
00643 }
00644 
00645 /* Get structure from an MMTK trajectory file */
00646 static int read_mmtk_cdf_structure(void *mydata, int *optflags,
00647                                    molfile_atom_t *atoms) {
00648   int i, rc;
00649   molfile_atom_t *atom;
00650   cdfdata *cdf = (cdfdata *) mydata;
00651   mmtkdata *mmtk = &cdf->mmtk;
00652   size_t start[3], count[3];
00653   char *dstr;
00654   char **atom_pointers;
00655   int resnum;
00656   char resname[8];
00657 
00658   *optflags = MOLFILE_NOOPTIONS;
00659 
00660   mmtk->description = (char *) malloc((mmtk->description_lengthdim + 1) * sizeof(char));
00661   if (mmtk->description == NULL) 
00662     return MOLFILE_ERROR;
00663 
00664   start[0] = cdf->curframe; /* frame */
00665   count[0] = mmtk->description_lengthdim;
00666 
00667   rc = nc_get_vara_text(cdf->ncid, mmtk->description_id,
00668                         start, count, mmtk->description);
00669   if (rc != NC_NOERR)
00670     return MOLFILE_ERROR;
00671 
00672   /* initialize all atoms with name "X" to start with */
00673   /* indicating unknown atom types etc..              */
00674   for (i=0; i<cdf->natoms; i++) {
00675     atom = atoms + i;
00676     strncpy(atom->name, "X", sizeof(atom->name)-1);
00677     atom->name[sizeof(atom->name)] = '\0';
00678     strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00679     atom->type[sizeof(atom->type)] = '\0';
00680     atom->resname[0] = '\0';
00681     atom->resid = 1;
00682     atom->chain[0] = '\0';
00683     atom->segid[0] = '\0';
00684   }
00685 
00686   /* Allocate a pointer array that will hold each atom's location in
00687      the description string. This will be used in a second pass through
00688      the description string in which residue names and indices will
00689      be assigned. */
00690   atom_pointers = (char **) malloc(cdf->natoms * sizeof(char *));
00691   if (atom_pointers == NULL)
00692     return MOLFILE_ERROR;
00693 
00694   /* First pass: look only at atoms */
00695   dstr = mmtk->description;
00696   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00697     char *atomstr;
00698     atomstr = strstr(dstr, "A('");
00699 
00700     if (atomstr != NULL) {
00701       char name[1024];
00702       char *nmstart = NULL;
00703       char *nmend = NULL;
00704       char *indstart = NULL;
00705       char *endp = NULL;
00706       int index, len;
00707 
00708       endp = strchr(atomstr, ')');
00709       nmstart = strchr(atomstr, '\'');
00710       if (nmstart != NULL)
00711         nmend = strchr(nmstart+1, '\'');
00712       indstart = strchr(atomstr, ',');
00713       if (endp == NULL || nmstart == NULL || nmend == NULL || indstart == NULL) {
00714         printf("netcdfplugin) mmtk_read_structure(): unable to parse atom tag\n");
00715         break; /* something went wrong */
00716       }
00717 
00718       len = nmend - nmstart - 1;
00719       if (len > sizeof(name)) {
00720         printf("netcdfplugin) mmtk_read_structure(): bad length: %d\n", len);
00721         break; /* something went wrong */
00722       }
00723       memcpy(name, nmstart+1, len); 
00724       name[len] = '\0';
00725 
00726       index = -1;
00727       sscanf(indstart, ",%d)", &index);
00728       atom_pointers[index] = atomstr;
00729 
00730       if (index >= 0 && index < cdf->natoms) {
00731         atom = atoms + index;
00732         strncpy(atom->name, name, sizeof(atom->name)-1);
00733         atom->name[sizeof(atom->name)] = '\0';
00734         strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00735         atom->type[sizeof(atom->type)] = '\0';
00736       }
00737 
00738       dstr = atomstr+1;
00739     } else {
00740       break; /* no more atom records found */
00741     }
00742   }
00743 
00744   /* Second pass: peptide chains */
00745   dstr = mmtk->description;
00746   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00747     char *peptide, *pend;
00748     char *group, *gend;
00749     char *nmstart, *nmend;
00750     char chain_id = 'A';
00751     char *s;
00752 
00753     peptide = strstr(dstr, "S('");
00754     if (peptide == NULL)
00755       break;
00756     pend = find_closing_bracket(peptide+2);
00757 
00758     resnum = 1;
00759     group = peptide;
00760     while (1) {
00761       group = strstr(group, "G('");
00762       if (group == NULL || group >= pend)
00763         break;
00764       gend = find_closing_bracket(group+2);
00765       nmstart = strchr(group, '\'') + 1;
00766       nmend = strchr(nmstart, '\'');
00767       while (nmend > nmstart && isdigit(*(nmend-1)))
00768         nmend--;
00769       if (nmend-nmstart > 7)
00770         nmend = nmstart+7;
00771       strncpy(resname, nmstart, nmend-nmstart);
00772       resname[nmend-nmstart] = '\0';
00773       s = resname;
00774       while (*s) {
00775         *s = toupper(*s);
00776         s++;
00777       }
00778       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00779                           chain_id, resname, resnum, group, gend, 1);
00780       group = gend;
00781       resnum++;
00782     }
00783 
00784     if (chain_id == 'Z')
00785       chain_id = 'A';
00786     else
00787         chain_id++;
00788     dstr = pend;
00789   }
00790 
00791   /* Third pass: nucleic acid chains */
00792   dstr = mmtk->description;
00793   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00794     char *nacid, *nend;
00795     char *group, *gend;
00796     char *nmstart, *nmend;
00797     char chain_id = 'a';
00798     char *s;
00799 
00800     nacid = strstr(dstr, "N('");
00801     if (nacid == NULL)
00802       break;
00803     nend = find_closing_bracket(nacid+2);
00804 
00805     resnum = 1;
00806     group = nacid;
00807     while (1) {
00808       group = strstr(group, "G('");
00809       if (group == NULL || group >= nend)
00810         break;
00811       gend = find_closing_bracket(group+2);
00812       nmstart = strchr(group, '\'') + 1;
00813       nmend = strchr(nmstart, '\'');
00814       while (nmend > nmstart && isdigit(*(nmend-1)))
00815         nmend--;
00816       if (nmend > nmstart && nmend[-1] == '_')
00817         nmend--;
00818       if (nmend-nmstart > 7)
00819         nmend = nmstart+7;
00820       strncpy(resname, nmstart, nmend-nmstart);
00821       resname[nmend-nmstart] = '\0';
00822       s = resname;
00823       while (*s) {
00824         *s = toupper(*s);
00825         s++;
00826       }
00827       if (resname[0] == 'R' || resname[0] == 'D') {
00828         switch (resname[1]) {
00829         case 'A':
00830           strcpy(resname, "ADE");
00831           break;
00832         case 'C':
00833           strcpy(resname, "CYT");
00834           break;
00835         case 'G':
00836           strcpy(resname, "GUA");
00837           break;
00838         case 'T':
00839           strcpy(resname, "THY");
00840           break;
00841         case 'U':
00842           strcpy(resname, "URA");
00843           break;
00844         }
00845       }
00846       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00847                           chain_id, resname, resnum, group, gend, 2);
00848       group = gend;
00849       resnum++;
00850     }
00851 
00852     if (chain_id == 'z')
00853       chain_id = 'a';
00854     else
00855         chain_id++;
00856     dstr = nend;
00857   }
00858 
00859   /* Fourth pass: non-chain molecules */
00860   resnum = 1;
00861   dstr = mmtk->description;
00862   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00863     char *molecule, *mend;
00864     char *nmstart, *nmend;
00865 
00866     molecule = strstr(dstr, "M('");
00867     if (molecule == NULL)
00868       break;
00869     mend = find_closing_bracket(molecule+2);
00870     nmstart = strchr(molecule, '\'') + 1;
00871     nmend = strchr(nmstart, '\'');
00872     if (strncmp(nmstart, "water", 5) == 0)
00873       strcpy(resname, "HOH");
00874     else {
00875       if (nmend-nmstart > 7)
00876         nmend = nmstart+7;
00877       strncpy(resname, nmstart, nmend-nmstart);
00878       resname[nmend-nmstart] = '\0';
00879     }
00880     set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00881                         '_', resname, resnum, molecule, mend, 0);
00882     resnum++;
00883     dstr = mend;
00884   }
00885 
00886   free(atom_pointers);
00887 
00888   return MOLFILE_SUCCESS;
00889 }
00890 
00891 
00892 static int read_cdf_structure(void *mydata, int *optflags,
00893                                    molfile_atom_t *atoms) {
00894   cdfdata *cdf = (cdfdata *)mydata;
00895 
00896   switch (cdf->type) {
00897     case CDF_TYPE_AMBER:
00898       return MOLFILE_NOSTRUCTUREDATA; /* not an error, just no data */
00899 
00900     case CDF_TYPE_MMTK:
00901       return read_mmtk_cdf_structure(mydata, optflags, atoms);
00902   }
00903 
00904   return MOLFILE_ERROR;
00905 }
00906 
00907 
00908 static int read_amber_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00909   cdfdata *cdf = (cdfdata *)mydata;
00910   amberdata *amber = &cdf->amber;
00911   int rc;
00912 
00913   /* Read in the atom coordinates and unit cell information */
00914   /* only save coords if we're given a valid ts pointer     */ 
00915   /* otherwise VMD wants us to skip it.                     */
00916   if (ts != NULL) {
00917     size_t start[3], count[3];
00918 
00919     start[0] = cdf->curframe; /* frame */
00920     start[1] = 0;             /* atom */
00921     start[2] = 0;             /* spatial */
00922 
00923     count[0] = 1;
00924     count[1] = amber->atomdim;
00925     count[2] = amber->spatialdim;
00926 
00927     rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id, 
00928                            start, count, ts->coords);
00929     if (rc != NC_NOERR) 
00930       return MOLFILE_ERROR;
00931 
00932     /* apply coordinate scaling factor if not 1.0 */
00933     if (amber->coordinates_scalefactor != 1.0) {
00934       int i;
00935       float s = amber->coordinates_scalefactor;
00936       for (i=0; i<natoms*3; i++) {
00937         ts->coords[i] *= s;
00938       }
00939     }
00940 
00941     /* Read the PBC box info. */
00942     if (amber->has_box) {
00943       double lengths[3];
00944       double angles[3];
00945 
00946       start[0] = cdf->curframe; /* frame */
00947       start[1] = 0;             /* spatial */
00948 
00949       count[0] = 1;
00950       count[1] = amber->spatialdim;
00951 
00952       rc = nc_get_vara_double(cdf->ncid, amber->cell_lengths_id, 
00953                               start, count, lengths);
00954       if (rc != NC_NOERR) 
00955         return MOLFILE_ERROR;
00956 
00957       rc = nc_get_vara_double(cdf->ncid, amber->cell_angles_id, 
00958                               start, count, angles);
00959       if (rc != NC_NOERR) 
00960         return MOLFILE_ERROR;
00961 
00962       ts->A = lengths[0] * amber->cell_lengths_scalefactor;
00963       ts->B = lengths[1] * amber->cell_lengths_scalefactor;
00964       ts->C = lengths[2] * amber->cell_lengths_scalefactor;
00965 
00966       ts->alpha = angles[0] * amber->cell_angles_scalefactor;
00967       ts->beta  = angles[1] * amber->cell_angles_scalefactor;
00968       ts->gamma = angles[2] * amber->cell_angles_scalefactor;
00969     }
00970   }
00971 
00972   cdf->curframe++;
00973   return MOLFILE_SUCCESS;
00974 }
00975 
00976 
00977 static int read_mmtk_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00978   cdfdata *cdf = (cdfdata *)mydata;
00979   mmtkdata *mmtk = &cdf->mmtk;
00980   int rc;
00981 
00982   /* Read in the atom coordinates and unit cell information */
00983   /* only save coords if we're given a valid ts pointer     */ 
00984   /* otherwise VMD wants us to skip it.                     */
00985   if (ts != NULL) {
00986     size_t start[4], count[4];
00987     int i;
00988 
00989     if (mmtk->minor_step_numberdim == 0) {
00990       start[0] = cdf->curframe; /* step */
00991       start[1] = 0;             /* atom */
00992       start[2] = 0;             /* spatial */
00993       start[3] = 0;             /* minor step */
00994     }
00995     else {
00996       start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
00997       start[1] = 0;             /* atom */
00998       start[2] = 0;             /* spatial */
00999       start[3] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
01000     }
01001 
01002     count[0] = 1;
01003     count[1] = mmtk->atom_numberdim;
01004     count[2] = mmtk->xyzdim;
01005     count[3] = 1;             /* only want one minor step, regardless */
01006 
01007     rc = nc_get_vara_float(cdf->ncid, mmtk->configuration_id, 
01008                            start, count, ts->coords);
01009     if (rc != NC_NOERR) 
01010       return MOLFILE_ERROR;
01011 
01012     /* check for allocated but not yet used frame */
01013     if (ts->coords[0] == NC_FILL_FLOAT)
01014       return MOLFILE_ERROR;
01015 
01016     /* scale coordinates from nanometers to angstroms */
01017     for (i=0; i<(3 * mmtk->atom_numberdim); i++) {
01018       ts->coords[i] *= 10.0f;
01019     }
01020 
01021     /* Read the PBC box info. */
01022     if (mmtk->has_box) {
01023       float lengths[3];
01024 
01025       if (mmtk->minor_step_numberdim == 0) {
01026         start[0] = cdf->curframe; /* step */
01027         start[1] = 0;             /* box_size */
01028         start[2] = 0;             /* minor step */
01029       }
01030       else {
01031         start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
01032         start[1] = 0;             /* box_size */
01033         start[2] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
01034       }
01035 
01036       count[0] = 1;
01037       count[1] = 3;
01038       count[2] = 1;
01039 
01040       rc = nc_get_vara_float(cdf->ncid, mmtk->box_size_id,
01041                              start, count, lengths);
01042       if (rc != NC_NOERR) 
01043         return MOLFILE_ERROR;
01044 
01045       ts->A = 10.*lengths[0];
01046       ts->B = 10.*lengths[1];
01047       ts->C = 10.*lengths[2];
01048 
01049       ts->alpha = 90.;
01050       ts->beta  = 90.;
01051       ts->gamma = 90.;
01052     }
01053   }
01054 
01055   cdf->curframe++;
01056   return MOLFILE_SUCCESS;
01057 }
01058 
01059 
01060 
01061 static int read_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
01062   cdfdata *cdf = (cdfdata *)mydata;
01063 
01064   switch (cdf->type) {
01065     case CDF_TYPE_AMBER: 
01066       return read_amber_cdf_timestep(mydata, natoms, ts); 
01067 
01068     case CDF_TYPE_MMTK:
01069       return read_mmtk_cdf_timestep(mydata, natoms, ts); 
01070   }
01071 
01072   return MOLFILE_ERROR;
01073 }
01074 
01075 
01076 static molfile_plugin_t plugin;
01077 
01078 VMDPLUGIN_API int VMDPLUGIN_init(void) {
01079   memset(&plugin, 0, sizeof(molfile_plugin_t));
01080   plugin.abiversion = vmdplugin_ABIVERSION;
01081   plugin.type = MOLFILE_PLUGIN_TYPE;
01082   plugin.name = "netcdf";
01083   plugin.prettyname = "NetCDF (AMBER, MMTK)";
01084   plugin.author = "Konrad Hinsen, John Stone";
01085   plugin.majorv = 0;
01086   plugin.minorv = 9;
01087   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
01088   plugin.filename_extension = "nc";
01089   plugin.open_file_read = open_cdf_read;
01090   plugin.read_structure = read_cdf_structure;
01091   plugin.read_next_timestep = read_cdf_timestep;
01092   plugin.close_file_read = close_cdf_read;
01093   return VMDPLUGIN_SUCCESS; 
01094 }
01095 
01096 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
01097   (*cb)(v, (vmdplugin_t *)&plugin);
01098   return VMDPLUGIN_SUCCESS;
01099 }
01100 
01101 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
01102 

Generated on Mon Sep 8 01:39:42 2008 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002