Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

netcdfplugin.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: netcdfplugin.c,v $
00013  *      $Author: johns $       $Locker:  $             $State: Exp $
00014  *      $Revision: 1.31 $       $Date: 2019/12/02 16:05:57 $
00015  *
00016  ***************************************************************************/
00017 
00018 /*
00019  * NetCDF based trajectories, used by AMBER 9, MMTK, etc.
00020  */
00021 
00022 #include <netcdf.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include <ctype.h>
00027 #include "molfile_plugin.h"
00028 
00029 #define CDF_TYPE_UNKNOWN 0
00030 #define CDF_TYPE_AMBER   1
00031 #define CDF_TYPE_MMTK    2
00032 
00033 #define CDF_SUCCESS      0
00034 #define CDF_ERR         -1
00035 
00036 typedef struct {
00037   int trajectorytype;
00038   int step_numberdimid;
00039   size_t step_numberdim;
00040   int minor_step_numberdimid;
00041   size_t minor_step_numberdim;
00042   int atom_numberdimid;
00043   size_t atom_numberdim;
00044   int xyzdimid;
00045   size_t xyzdim;
00046   int box_size_lengthdimid;
00047   size_t box_size_lengthdim;
00048   int description_lengthdimid;
00049   size_t description_lengthdim;
00050   char *description;
00051   int description_id;
00052   int step_id;
00053   int time_id;
00054   int box_size_id;
00055   int configuration_id;
00056   int has_box;
00057   char *comment;
00058 } mmtkdata;
00059 
00060 typedef struct {
00061   int is_restart;
00062   int is_trajectory;
00063   int has_box;
00064   int atomdimid;
00065   size_t atomdim;
00066   int spatialdimid;
00067   size_t spatialdim;
00068   int framedimid;
00069   size_t framedim;
00070   char *conventionversion;
00071   char *title;
00072   char *application;
00073   char *program;
00074   char *programversion;
00075   int spatial_id;                  /* "xyz" */
00076   int cell_spatial_id;             /* "abc" */
00077   int cell_angular_id;             /* "alpha, beta, gamma" */
00078   int time_id;                     /* frame time in picoseconds */
00079   int coordinates_id;              /* coords in angstroms */
00080   char *coordinates_units;         /* coordinates units */
00081   float coordinates_scalefactor;   /* coordinates scaling factor */
00082   int cell_lengths_id;             /* cell lengths in angstroms */
00083   char *cell_lengths_units;        /* cell lengths units */
00084   float cell_lengths_scalefactor;  /* cell lengths scaling factor */
00085   int cell_angles_id;              /* cell angles in degrees */
00086   char *cell_angles_units;         /* cell angles units */
00087   float cell_angles_scalefactor;   /* cell angles scaling factor */
00088   int velocities_id;               /* velocities in angstroms/picosecond */
00089 } amberdata;
00090 
00091 
00092 typedef struct {
00093   /* sub-format independent data */
00094   int ncid;
00095   int type;
00096   int natoms; 
00097   int curframe;
00098   char *conventions;
00099 
00100   /* stuff used by AMBER */
00101   amberdata amber;
00102 
00103   /* stuff used by MMTK */
00104   mmtkdata mmtk;
00105 
00106 } cdfdata;
00107 
00108 
00109 static void close_cdf_read(void *mydata) {
00110   cdfdata *cdf = (cdfdata *)mydata;
00111 
00112   nc_close(cdf->ncid);
00113 
00114   /* AMBER stuff */
00115   if (cdf->amber.title)
00116     free(cdf->amber.title);
00117 
00118   if (cdf->amber.application)
00119     free(cdf->amber.application);
00120 
00121   if (cdf->amber.program)
00122     free(cdf->amber.program);
00123 
00124   if (cdf->amber.programversion)
00125     free(cdf->amber.programversion);
00126 
00127   if (cdf->amber.conventionversion)
00128     free(cdf->amber.conventionversion);
00129 
00130   if (cdf->amber.coordinates_units)
00131     free(cdf->amber.coordinates_units);
00132 
00133   if (cdf->amber.cell_lengths_units)
00134     free(cdf->amber.cell_lengths_units);
00135 
00136   if (cdf->amber.cell_angles_units)
00137     free(cdf->amber.cell_angles_units);
00138 
00139   /* MMTK stuff */
00140   if (cdf->mmtk.comment)
00141     free(cdf->mmtk.comment);
00142 
00143   /* format independent stuff */
00144   if (cdf->conventions)
00145     free(cdf->conventions);
00146 
00147   free(cdf);
00148 }
00149 
00150 
00151 
00152 static int open_amber_cdf_read(cdfdata *cdf) {
00153   int rc;
00154   size_t len; 
00155   amberdata *amber = &cdf->amber;
00156 
00157   /* check if this is a restart file or not */
00158   if (!strcmp(cdf->conventions, "AMBERRESTART")) {
00159     amber->is_restart = 1;
00160   } else {
00161     amber->is_trajectory = 1;
00162   }
00163 
00164   /* global attrib: "ConventionVersion" -- required */
00165   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "ConventionVersion", &len);
00166   if (rc == NC_NOERR && len > 0) {
00167     amber->conventionversion = (char *) malloc((len+1) * sizeof(char));
00168     nc_get_att_text(cdf->ncid, NC_GLOBAL, "ConventionVersion", amber->conventionversion);
00169     amber->conventionversion[len] = '\0';
00170     printf("netcdfplugin) %s follows AMBER conventions version '%s'\n", 
00171            (amber->is_restart) ? "restart file" : "trajectory",
00172            amber->conventionversion);
00173   } else {
00174     return CDF_ERR;
00175   }
00176 
00177   /* at this point we know that this is an AMBER trajectory or restart file */
00178   cdf->type = CDF_TYPE_AMBER;
00179 
00180   /* initialize default scaling factors so they are always set to a sane */
00181   /* value even if problems occur later */
00182   amber->coordinates_scalefactor = 1.0;
00183   amber->cell_lengths_scalefactor = 1.0;
00184   amber->cell_angles_scalefactor = 1.0;
00185 
00186   /* global attrib: "program" -- required */
00187   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "program", &len);
00188   if (rc == NC_NOERR && len > 0) {
00189     amber->program = (char *) malloc((len+1) * sizeof(char));
00190     nc_get_att_text(cdf->ncid, NC_GLOBAL, "program", amber->program);
00191     amber->program[len] = '\0';
00192     printf("netcdfplugin) AMBER: program '%s'\n", amber->program);
00193   } else {
00194     printf("netcdfplugin) AMBER: Missing required 'program' global attribute, corrupt file?\n");
00195   }
00196 
00197 
00198   /* global attrib: "programVersion" -- required */
00199   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "programVersion", &len);
00200   if (rc == NC_NOERR && len > 0) {
00201     amber->programversion = (char *) malloc((len+1) * sizeof(char));
00202     nc_get_att_text(cdf->ncid, NC_GLOBAL, "programVersion", amber->programversion);
00203     amber->programversion[len] = '\0';
00204     printf("netcdfplugin) AMBER: program version '%s'\n", amber->programversion);
00205   } else {
00206     printf("netcdfplugin) AMBER: Missing required 'programVersion' global attribute, corrupt file?\n");
00207   }
00208 
00209 
00210   /* global attrib: "title" -- optional */
00211   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "title", &len);
00212   if (rc == NC_NOERR && len > 0) {
00213     amber->title = (char *) malloc((len+1) * sizeof(char));
00214     nc_get_att_text(cdf->ncid, NC_GLOBAL, "title", amber->title);
00215     amber->title[len] = '\0';
00216     printf("netcdfplugin) AMBER: title '%s'\n", amber->title);
00217   } 
00218 
00219 
00220   /* global attrib: "application" -- optional */
00221   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "application", &len);
00222   if (rc == NC_NOERR && len > 0) {
00223     amber->application = (char *) malloc((len+1) * sizeof(char));
00224     nc_get_att_text(cdf->ncid, NC_GLOBAL, "application", amber->application);
00225     amber->application[len] = '\0';
00226     printf("netcdfplugin) AMBER: application '%s'\n", amber->application);
00227   } 
00228 
00229 
00230 /* XXX lots of additional error checking is needed below... */
00231 
00232   /* read in spatial dimension */
00233   rc = nc_inq_dimid(cdf->ncid, "spatial", &amber->spatialdimid);
00234   if (rc == NC_NOERR) {    
00235     rc = nc_inq_dimlen(cdf->ncid, amber->spatialdimid, &amber->spatialdim);
00236     if (rc == NC_NOERR) {
00237       printf("netcdfplugin) AMBER: spatial dimension: %ld\n", (long)amber->spatialdim);
00238     } else {
00239       printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
00240       printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
00241       amber->spatialdim = 3;
00242     }
00243   } else {
00244     printf("netcdfplugin) AMBER: Missing spatial dimension, corrupt file?\n");
00245     printf("netcdfplugin) AMBER: Fixing by guessing spatialdim as '3'\n");
00246     amber->spatialdim = 3;
00247   }
00248  
00249   /* read in atom dimension */
00250   rc = nc_inq_dimid(cdf->ncid, "atom", &amber->atomdimid);
00251   if (rc == NC_NOERR) {    
00252     rc = nc_inq_dimlen(cdf->ncid, amber->atomdimid, &amber->atomdim);
00253     if (rc == NC_NOERR) {
00254       printf("netcdfplugin) AMBER: atom dimension: %ld\n", (long)amber->atomdim);
00255       cdf->natoms = amber->atomdim; /* copy to format independent part */
00256     } else  {
00257       printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
00258       return CDF_ERR;
00259     }
00260   } else {
00261     printf("netcdfplugin) AMBER: missing atom dimension, aborting\n");
00262     return CDF_ERR;
00263   }
00264  
00265   /* if this is a trajectory, read in frame dimension */
00266   if (amber->is_trajectory) {
00267     rc = nc_inq_dimid(cdf->ncid, "frame", &amber->framedimid);
00268     if (rc == NC_NOERR) {    
00269       rc = nc_inq_dimlen(cdf->ncid, amber->framedimid, &amber->framedim);
00270       if (rc == NC_NOERR) {
00271         printf("netcdfplugin) AMBER: frame dimension: %ld\n", (long)amber->framedim);
00272       } else {
00273         printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
00274         return CDF_ERR;
00275       }
00276     } else {
00277       printf("netcdfplugin) AMBER: missing frame dimension, aborting\n");
00278       return CDF_ERR;
00279     }
00280   }
00281 
00282 
00283   /* 
00284    * get ID values for all of the variables we're interested in 
00285    */
00286 #if 0
00287   /* VMD can live without the various human readable label variables. */
00288   rc = nc_inq_varid(cdf->ncid, "spatial", &amber->spatial_id);
00289   if (rc != NC_NOERR)
00290     return CDF_ERR;
00291 
00292   rc = nc_inq_varid(cdf->ncid, "cell_spatial", &amber->cell_spatial_id);
00293   if (rc != NC_NOERR)
00294     return CDF_ERR;
00295 
00296   rc = nc_inq_varid(cdf->ncid, "cell_angular", &amber->cell_angular_id);
00297   if (rc != NC_NOERR)
00298     return CDF_ERR;
00299 #endif
00300 
00301   /* VMD requires coordinates at a minimum */
00302   rc = nc_inq_varid(cdf->ncid, "coordinates", &amber->coordinates_id);
00303   if (rc != NC_NOERR) {
00304     printf("netcdfplugin) AMBER: no coordinates variable, nothing to load\n");
00305     return CDF_ERR;
00306   }
00307 
00308   /* Coordinate units */
00309   rc = nc_inq_attlen(cdf->ncid, amber->coordinates_id, "units", &len);
00310   if (rc == NC_NOERR && len > 0) {
00311     amber->coordinates_units = (char *) malloc((len+1) * sizeof(char));
00312     nc_get_att_text(cdf->ncid, amber->coordinates_id, "units", amber->coordinates_units);
00313     amber->coordinates_units[len] = '\0';
00314     printf("netcdfplugin) AMBER: coordinates units: '%s'\n", amber->coordinates_units);
00315   } else {
00316     printf("netcdfplugin) AMBER: no coordinates units attribute, Angstroms assumed\n");
00317   }
00318 
00319   /* Coordinate scaling factor to get to Angstroms */
00320   if (nc_get_att_float(cdf->ncid, amber->coordinates_id, "scale_factor", &amber->coordinates_scalefactor) != NC_NOERR) {
00321     printf("netcdfplugin) AMBER: no coordinates scalefactor attribute, 1.0 assumed\n");
00322   }
00323   printf("netcdfplugin) AMBER: coordinates scalefactor: %f\n", amber->coordinates_scalefactor);
00324 
00325 #if 0
00326   /* we don't need velocities at this time */
00327   rc = nc_inq_varid(cdf->ncid, "velocities", &amber->velocities_id);
00328   if (rc != NC_NOERR) {
00329     printf("netcdfplugin) AMBER: missing velocities variable, aborting\n");
00330     return CDF_ERR;
00331   }
00332 #endif
00333 
00334   /* optional periodic cell info */
00335   rc = nc_inq_varid(cdf->ncid, "cell_lengths", &amber->cell_lengths_id);
00336   if (rc == NC_NOERR) {
00337     rc = nc_inq_varid(cdf->ncid, "cell_angles", &amber->cell_angles_id);
00338     if (rc == NC_NOERR) {
00339       printf("netcdfplugin) AMBER trajectory contains periodic cell information\n");
00340       amber->has_box = 1;
00341 
00342       /* Cell lengths units */
00343       rc = nc_inq_attlen(cdf->ncid, amber->cell_lengths_id, "units", &len);
00344       if (rc == NC_NOERR && len > 0) {
00345         amber->cell_lengths_units = (char *) malloc((len+1) * sizeof(char));
00346         nc_get_att_text(cdf->ncid, amber->cell_lengths_id, "units", amber->cell_lengths_units);
00347         amber->cell_lengths_units[len] = '\0';
00348         printf("netcdfplugin) AMBER: cell lengths units: '%s'\n", amber->cell_lengths_units);
00349       } else {
00350         printf("netcdfplugin) AMBER: no cell lengths units attribute, Angstroms assumed\n");
00351       }
00352 
00353       /* Cell lengths scaling factor to get to Angstroms */
00354       if (nc_get_att_float(cdf->ncid, amber->cell_lengths_id, "scale_factor", &amber->cell_lengths_scalefactor) != NC_NOERR) {
00355         printf("netcdfplugin) AMBER: no cell lengths scalefactor attribute, 1.0 assumed\n");
00356       }
00357       printf("netcdfplugin) AMBER: cell lengths scalefactor: %f\n", amber->cell_lengths_scalefactor);
00358 
00359       /* Cell angles units */
00360       rc = nc_inq_attlen(cdf->ncid, amber->cell_angles_id, "units", &len);
00361       if (rc == NC_NOERR && len > 0) {
00362         amber->cell_angles_units = (char *) malloc((len+1) * sizeof(char));
00363         nc_get_att_text(cdf->ncid, amber->cell_angles_id, "units", amber->cell_angles_units);
00364         amber->cell_angles_units[len] = '\0';
00365         printf("netcdfplugin) AMBER: cell angles units: '%s'\n", amber->cell_angles_units);
00366       } else {
00367         printf("netcdfplugin) AMBER: no cell angles units attribute, Degrees assumed\n");
00368       }
00369 
00370       /* Cell angles scaling factor to get to degrees */
00371       if (nc_get_att_float(cdf->ncid, amber->cell_angles_id, "scale_factor", &amber->cell_angles_scalefactor) != NC_NOERR) {
00372         printf("netcdfplugin) AMBER: no cell angles scalefactor attribute, 1.0 assumed\n");
00373       }
00374       printf("netcdfplugin) AMBER: cell angles scalefactor: %f\n", amber->cell_angles_scalefactor);
00375     }
00376   }
00377 
00378   return CDF_SUCCESS;
00379 }
00380 
00381 
00382 static int open_mmtk_cdf_read(cdfdata *cdf, int conventionsknown) {
00383   int rc;
00384   size_t len; 
00385   mmtkdata *mmtk = &cdf->mmtk;
00386 
00387   /* If conventions specify MMTK then we're safe to continue */
00388   /* and we know what we're dealing with */
00389   if (conventionsknown) {
00390     cdf->type = CDF_TYPE_MMTK;
00391   }
00392 
00393   /* global attrib: "trajectory_type" (new format) */
00394   rc = nc_get_att_int(cdf->ncid, NC_GLOBAL, "trajectory_type", &mmtk->trajectorytype);
00395   if (rc == NC_NOERR) {
00396     printf("netcdfplugin) MMTK trajectory type: %d\n", mmtk->trajectorytype);
00397   } else {
00398     printf("netcdfplugin) Assuming MMTK trajectory type: %d\n", mmtk->trajectorytype);
00399     mmtk->trajectorytype = 0;
00400   }
00401 
00402   /* read in spatial dimension */
00403   rc = nc_inq_dimid(cdf->ncid, "xyz", &mmtk->xyzdimid);
00404   if (rc == NC_NOERR) {
00405     rc = nc_inq_dimlen(cdf->ncid, mmtk->xyzdimid, &mmtk->xyzdim);
00406     if (rc == NC_NOERR)
00407       printf("netcdfplugin) MMTK: xyz dimension: %ld\n", (long)mmtk->xyzdim);
00408     else 
00409       return CDF_ERR;
00410   } else {
00411     return CDF_ERR;
00412   }
00413 
00414   /* read in atom dimension */
00415   rc = nc_inq_dimid(cdf->ncid, "atom_number", &mmtk->atom_numberdimid); 
00416   if (rc == NC_NOERR) {
00417     rc = nc_inq_dimlen(cdf->ncid, mmtk->atom_numberdimid, &mmtk->atom_numberdim);
00418     if (rc == NC_NOERR) {
00419       printf("netcdfplugin) MMTK: atom_number dimension: %ld\n", (long)mmtk->atom_numberdim);
00420       cdf->natoms = mmtk->atom_numberdim; /* copy to format independent part */
00421     } else {
00422       return CDF_ERR;
00423     }
00424   } else {
00425     return CDF_ERR;
00426   }
00427 
00428 
00429   /* read in frame dimension */
00430   rc = nc_inq_dimid(cdf->ncid, "step_number", &mmtk->step_numberdimid);
00431   if (rc == NC_NOERR) {
00432     rc = nc_inq_dimlen(cdf->ncid, mmtk->step_numberdimid, &mmtk->step_numberdim);
00433     if (rc == NC_NOERR)
00434       printf("netcdfplugin) MMTK: step_number dimension: %ld\n", (long)mmtk->step_numberdim);
00435     else 
00436       return CDF_ERR;
00437   } else {
00438     return CDF_ERR;
00439   }
00440 
00441 
00442   /* read in minor step number dimension */
00443   rc = nc_inq_dimid(cdf->ncid, "minor_step_number", &mmtk->minor_step_numberdimid);
00444   if (rc == NC_NOERR) {
00445     rc = nc_inq_dimlen(cdf->ncid, mmtk->minor_step_numberdimid, &mmtk->minor_step_numberdim);
00446     if (rc == NC_NOERR)
00447       printf("netcdfplugin) MMTK: minor_step_number dimension: %ld\n", (long)mmtk->minor_step_numberdim);
00448     else 
00449       return CDF_ERR;
00450   } else if (rc == NC_EBADDIM) {
00451     printf("netcdfplugin) MMTK: no minor_step_number dimension\n");
00452     mmtk->minor_step_numberdim = 0;
00453   } else {
00454     return CDF_ERR;
00455   }
00456 
00457 
00458   /* read in description_length dimension */
00459   rc = nc_inq_dimid(cdf->ncid, "description_length", &mmtk->description_lengthdimid); 
00460   if (rc == NC_NOERR) {
00461     rc = nc_inq_dimlen(cdf->ncid, mmtk->description_lengthdimid, &mmtk->description_lengthdim);
00462     if (rc == NC_NOERR)
00463       printf("netcdfplugin) MMTK: description_length dimension: %ld\n", (long)mmtk->description_lengthdim);
00464     else
00465       return CDF_ERR;
00466   } else {
00467     return CDF_ERR;
00468   }
00469 
00470 
00471   /* get ID values for all of the variables we're interested in */
00472   rc = nc_inq_varid(cdf->ncid, "configuration", &mmtk->configuration_id);
00473   if (rc != NC_NOERR)
00474     return CDF_ERR;
00475 
00476   rc = nc_inq_varid(cdf->ncid, "description", &mmtk->description_id);
00477   if (rc != NC_NOERR)
00478     return CDF_ERR;
00479 
00480   /* check for PBC */
00481   rc = nc_inq_varid(cdf->ncid, "box_size", &mmtk->box_size_id);
00482   if (rc == NC_NOERR) {
00483     mmtk->has_box = 1;
00484     printf("netcdfplugin) MMTK: system has periodic boundary conditions\n");
00485   }
00486   else if (rc == NC_ENOTVAR)
00487     mmtk->has_box = 0;
00488   else
00489     return CDF_ERR;
00490 
00491 
00492   /* global attrib: "comment" -- optional */
00493   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "comment", &len);
00494   if (rc == NC_NOERR && len > 0) {
00495     mmtk->comment = (char *) malloc((len+1) * sizeof(char));
00496     nc_get_att_text(cdf->ncid, NC_GLOBAL, "comment", mmtk->comment);
00497     mmtk->comment[len] = '\0';
00498     printf("netcdfplugin) MMTK: comment '%s'\n", mmtk->comment);
00499   } 
00500 
00501   /* at this point we know that this is an MMTK trajectory */
00502   if (!conventionsknown) {
00503     printf("netcdfplugin) File is an old format MMTK trajectory without conventions\n");    
00504     cdf->type = CDF_TYPE_MMTK;
00505   }
00506 
00507   return CDF_SUCCESS;
00508 }
00509 
00510  
00511 static void *open_cdf_read(const char *filename, const char *filetype, 
00512                            int *natoms) {
00513   int ncid, rc;
00514   size_t len;
00515   cdfdata *cdf;
00516  
00517   rc = nc_open(filename, NC_NOWRITE, &ncid);
00518   if (rc != NC_NOERR) return NULL;
00519 
00520   cdf = (cdfdata *) malloc(sizeof(cdfdata));
00521   memset(cdf, 0, sizeof(cdfdata));
00522 
00523   cdf->ncid = ncid;
00524   cdf->type = CDF_TYPE_UNKNOWN;
00525 
00526   /* Determine what NetCDF conventions apply to this data, if any */
00527   rc = nc_inq_attlen(cdf->ncid, NC_GLOBAL, "Conventions", &len);
00528   if (rc == NC_NOERR && len > 0) {
00529     cdf->conventions = (char *) malloc((len+1) * sizeof(char));
00530     nc_get_att_text(cdf->ncid, NC_GLOBAL, "Conventions", cdf->conventions);
00531     cdf->conventions[len] = '\0';
00532     printf("netcdfplugin) conventions: '%s'\n", cdf->conventions);
00533   } 
00534 
00535   if (cdf->conventions != NULL) {
00536     /* Check if this is a file generated by AMBER */
00537     if (strstr(cdf->conventions, "AMBER") != NULL) {
00538       if (!open_amber_cdf_read(cdf)) {
00539         *natoms = cdf->natoms;
00540         return cdf;
00541       }
00542     } 
00543 
00544     /* Check if this is a file generated by MMTK */
00545     if (strstr(cdf->conventions, "MMTK") != NULL) {
00546       if (!open_mmtk_cdf_read(cdf, 1)) {
00547         *natoms = cdf->natoms;
00548         return cdf;
00549       }
00550     } 
00551   } 
00552 
00553   printf("netcdfplugin) Missing or unrecognized conventions attribute\n");
00554   printf("netcdfplugin) checking for old format MMTK NetCDF file...\n");
00555 
00556   /* If no conventions are specified, then maybe it's from MMTK */
00557   if (!open_mmtk_cdf_read(cdf, 0)) {
00558     *natoms = cdf->natoms;
00559     return cdf;
00560   } 
00561 
00562   /* if no conventions are recognized, then we free everything */
00563   /* and return failure                                        */
00564   close_cdf_read(cdf);
00565 
00566   return NULL; 
00567 }
00568 
00569 /* A very basic bracket counter. It assumes that the expression
00570    is syntactically correct. */
00571 static char *find_closing_bracket(char *s) {
00572   int count = 1;
00573   while (*s && count > 0) {
00574     if (*s == '(' || *s == '[')
00575       count++;
00576     if (*s == ')' || *s == ']')
00577       count--;
00578     s++;
00579   }
00580   return s;
00581 }
00582 
00583 /* Simple string replacement routine for fixing atom names. */
00584 static void atom_name_replace(char *name, char *substring, char letter) {
00585   char *s = strstr(name, substring);
00586   if (s != NULL) {
00587     *s = letter;
00588     strcpy(s+1, s+strlen(substring));
00589   }
00590 }
00591 
00592 static void atom_name_remove_underscores(char *name) {
00593   char *s = name;
00594   while (1) {
00595     s = strchr(s, '_');
00596     if (s == NULL)
00597       break;
00598     strcpy(s, s+1);
00599   }
00600 }
00601 
00602 /* Set chainid, resname, and resnum for a range of atoms
00603    and fix atom names. */
00604 static void set_atom_attributes(molfile_atom_t *atoms, int natoms,
00605                                 char **atom_pointers, char chain_id,
00606                                 char *resname, int resnum,
00607                                 char *start, char *end,
00608                                 int name_correction_type) {
00609   int i;
00610   for (i=0; i<natoms; i++)
00611     if (atom_pointers[i] > start && atom_pointers[i] < end) {
00612       molfile_atom_t *atom = atoms + i;
00613       atom->chain[0] = chain_id;      
00614       atom->chain[1] = '\0';      
00615       strcpy(atom->resname, resname);
00616       atom->resid = resnum;
00617       if (name_correction_type == 1 /* proteins */) {
00618         atom_name_replace(atom->name, "_alpha", 'A');
00619         atom_name_replace(atom->name, "_beta", 'B');
00620         atom_name_replace(atom->name, "_gamma", 'G');
00621         atom_name_replace(atom->name, "_delta", 'D');
00622         atom_name_replace(atom->name, "_epsilon", 'E');
00623         atom_name_replace(atom->name, "_zeta", 'Z');
00624         atom_name_replace(atom->name, "_eta", 'H');
00625         atom_name_remove_underscores(atom->name);
00626       }
00627       else if (name_correction_type == 2 /* nucleic acids */) {
00628         if (strcmp(atom->name, "O_1") == 0)
00629           strcpy(atom->name, "O1P");
00630         else if (strcmp(atom->name, "O_2") == 0)
00631           strcpy(atom->name, "O2P");
00632         else if (strcmp(atom->name, "C_1") == 0)
00633           strcpy(atom->name, "C1'");
00634         else if (strcmp(atom->name, "C_2") == 0)
00635           strcpy(atom->name, "C2'");
00636         else if (strcmp(atom->name, "C_3") == 0)
00637           strcpy(atom->name, "C3'");
00638         else if (strcmp(atom->name, "O_3") == 0)
00639           strcpy(atom->name, "O3'");
00640         else if (strcmp(atom->name, "C_4") == 0)
00641           strcpy(atom->name, "C4'");
00642         else if (strcmp(atom->name, "O_4") == 0)
00643           strcpy(atom->name, "O4'");
00644         else if (strcmp(atom->name, "C_5") == 0)
00645           strcpy(atom->name, "C5'");
00646         else if (strcmp(atom->name, "O_5") == 0)
00647           strcpy(atom->name, "O5'");
00648         else
00649           atom_name_remove_underscores(atom->name);
00650       }
00651     }
00652 }
00653 
00654 /* Get structure from an MMTK trajectory file */
00655 static int read_mmtk_cdf_structure(void *mydata, int *optflags,
00656                                    molfile_atom_t *atoms) {
00657   int i, rc;
00658   molfile_atom_t *atom;
00659   cdfdata *cdf = (cdfdata *) mydata;
00660   mmtkdata *mmtk = &cdf->mmtk;
00661   size_t start[3], count[3];
00662   char *dstr;
00663   char **atom_pointers;
00664   int resnum;
00665   char resname[8];
00666 
00667   *optflags = MOLFILE_NOOPTIONS;
00668 
00669   mmtk->description = (char *) malloc((mmtk->description_lengthdim + 1) * sizeof(char));
00670   if (mmtk->description == NULL) 
00671     return MOLFILE_ERROR;
00672 
00673   start[0] = cdf->curframe; /* frame */
00674   count[0] = mmtk->description_lengthdim;
00675 
00676   rc = nc_get_vara_text(cdf->ncid, mmtk->description_id,
00677                         start, count, mmtk->description);
00678   if (rc != NC_NOERR)
00679     return MOLFILE_ERROR;
00680 
00681   /* initialize all atoms with name "X" to start with */
00682   /* indicating unknown atom types etc..              */
00683   for (i=0; i<cdf->natoms; i++) {
00684     atom = atoms + i;
00685     strncpy(atom->name, "X", sizeof(atom->name)-1);
00686     atom->name[sizeof(atom->name)-1] = '\0';
00687     strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00688     atom->type[sizeof(atom->type)-1] = '\0';
00689     atom->resname[0] = '\0';
00690     atom->resid = 1;
00691     atom->chain[0] = '\0';
00692     atom->segid[0] = '\0';
00693   }
00694 
00695   /* Allocate a pointer array that will hold each atom's location in
00696      the description string. This will be used in a second pass through
00697      the description string in which residue names and indices will
00698      be assigned. */
00699   atom_pointers = (char **) malloc(cdf->natoms * sizeof(char *));
00700   if (atom_pointers == NULL)
00701     return MOLFILE_ERROR;
00702 
00703   /* First pass: look only at atoms */
00704   dstr = mmtk->description;
00705   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00706     char *atomstr;
00707     atomstr = strstr(dstr, "A('");
00708 
00709     if (atomstr != NULL) {
00710       char name[1024];
00711       char *nmstart = NULL;
00712       char *nmend = NULL;
00713       char *indstart = NULL;
00714       char *endp = NULL;
00715       int index, len;
00716 
00717       endp = strchr(atomstr, ')');
00718       nmstart = strchr(atomstr, '\'');
00719       if (nmstart != NULL)
00720         nmend = strchr(nmstart+1, '\'');
00721       indstart = strchr(atomstr, ',');
00722       if (endp == NULL || nmstart == NULL || nmend == NULL || indstart == NULL) {
00723         printf("netcdfplugin) mmtk_read_structure(): unable to parse atom tag\n");
00724         break; /* something went wrong */
00725       }
00726 
00727       len = nmend - nmstart - 1;
00728       if (len > sizeof(name)) {
00729         printf("netcdfplugin) mmtk_read_structure(): bad length: %d\n", len);
00730         break; /* something went wrong */
00731       }
00732       memcpy(name, nmstart+1, len); 
00733       name[len] = '\0';
00734 
00735       index = -1;
00736       sscanf(indstart, ",%d)", &index);
00737       atom_pointers[index] = atomstr;
00738 
00739       if (index >= 0 && index < cdf->natoms) {
00740         atom = atoms + index;
00741         strncpy(atom->name, name, sizeof(atom->name)-1);
00742         atom->name[sizeof(atom->name)-1] = '\0';
00743         strncpy(atom->type, atom->name, sizeof(atom->type)-1);
00744         atom->type[sizeof(atom->type)-1] = '\0';
00745       }
00746 
00747       dstr = atomstr+1;
00748     } else {
00749       break; /* no more atom records found */
00750     }
00751   }
00752 
00753   /* Second pass: peptide chains */
00754   dstr = mmtk->description;
00755   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00756     char *peptide, *pend;
00757     char *group, *gend;
00758     char *nmstart, *nmend;
00759     char chain_id = 'A';
00760     char *s;
00761 
00762     peptide = strstr(dstr, "S('");
00763     if (peptide == NULL)
00764       break;
00765     pend = find_closing_bracket(peptide+2);
00766 
00767     resnum = 1;
00768     group = peptide;
00769     while (1) {
00770       group = strstr(group, "G('");
00771       if (group == NULL || group >= pend)
00772         break;
00773       gend = find_closing_bracket(group+2);
00774       nmstart = strchr(group, '\'') + 1;
00775       nmend = strchr(nmstart, '\'');
00776       while (nmend > nmstart && isdigit(*(nmend-1)))
00777         nmend--;
00778       if (nmend-nmstart > 7)
00779         nmend = nmstart+7;
00780       strncpy(resname, nmstart, nmend-nmstart);
00781       resname[nmend-nmstart] = '\0';
00782       s = resname;
00783       while (*s) {
00784         *s = toupper(*s);
00785         s++;
00786       }
00787       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00788                           chain_id, resname, resnum, group, gend, 1);
00789       group = gend;
00790       resnum++;
00791     }
00792 
00793     if (chain_id == 'Z')
00794       chain_id = 'A';
00795     else
00796         chain_id++;
00797     dstr = pend;
00798   }
00799 
00800   /* Third pass: nucleic acid chains */
00801   dstr = mmtk->description;
00802   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00803     char *nacid, *nend;
00804     char *group, *gend;
00805     char *nmstart, *nmend;
00806     char chain_id = 'a';
00807     char *s;
00808 
00809     nacid = strstr(dstr, "N('");
00810     if (nacid == NULL)
00811       break;
00812     nend = find_closing_bracket(nacid+2);
00813 
00814     resnum = 1;
00815     group = nacid;
00816     while (1) {
00817       group = strstr(group, "G('");
00818       if (group == NULL || group >= nend)
00819         break;
00820       gend = find_closing_bracket(group+2);
00821       nmstart = strchr(group, '\'') + 1;
00822       nmend = strchr(nmstart, '\'');
00823       while (nmend > nmstart && isdigit(*(nmend-1)))
00824         nmend--;
00825       if (nmend > nmstart && nmend[-1] == '_')
00826         nmend--;
00827       if (nmend-nmstart > 7)
00828         nmend = nmstart+7;
00829       strncpy(resname, nmstart, nmend-nmstart);
00830       resname[nmend-nmstart] = '\0';
00831       s = resname;
00832       while (*s) {
00833         *s = toupper(*s);
00834         s++;
00835       }
00836       if (resname[0] == 'R' || resname[0] == 'D') {
00837         switch (resname[1]) {
00838         case 'A':
00839           strcpy(resname, "ADE");
00840           break;
00841         case 'C':
00842           strcpy(resname, "CYT");
00843           break;
00844         case 'G':
00845           strcpy(resname, "GUA");
00846           break;
00847         case 'T':
00848           strcpy(resname, "THY");
00849           break;
00850         case 'U':
00851           strcpy(resname, "URA");
00852           break;
00853         }
00854       }
00855       set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00856                           chain_id, resname, resnum, group, gend, 2);
00857       group = gend;
00858       resnum++;
00859     }
00860 
00861     if (chain_id == 'z')
00862       chain_id = 'a';
00863     else
00864         chain_id++;
00865     dstr = nend;
00866   }
00867 
00868   /* Fourth pass: non-chain molecules */
00869   resnum = 1;
00870   dstr = mmtk->description;
00871   while (dstr < (mmtk->description + mmtk->description_lengthdim)) {
00872     char *molecule, *mend;
00873     char *nmstart, *nmend;
00874 
00875     molecule = strstr(dstr, "M('");
00876     if (molecule == NULL)
00877       break;
00878     mend = find_closing_bracket(molecule+2);
00879     nmstart = strchr(molecule, '\'') + 1;
00880     nmend = strchr(nmstart, '\'');
00881     if (strncmp(nmstart, "water", 5) == 0)
00882       strcpy(resname, "HOH");
00883     else {
00884       if (nmend-nmstart > 7)
00885         nmend = nmstart+7;
00886       strncpy(resname, nmstart, nmend-nmstart);
00887       resname[nmend-nmstart] = '\0';
00888     }
00889     set_atom_attributes(atoms, cdf->natoms, atom_pointers,
00890                         '_', resname, resnum, molecule, mend, 0);
00891     resnum++;
00892     dstr = mend;
00893   }
00894 
00895   free(atom_pointers);
00896 
00897   return MOLFILE_SUCCESS;
00898 }
00899 
00900 
00901 static int read_cdf_structure(void *mydata, int *optflags,
00902                                    molfile_atom_t *atoms) {
00903   cdfdata *cdf = (cdfdata *)mydata;
00904 
00905   switch (cdf->type) {
00906     case CDF_TYPE_AMBER:
00907       return MOLFILE_NOSTRUCTUREDATA; /* not an error, just no data */
00908 
00909     case CDF_TYPE_MMTK:
00910       return read_mmtk_cdf_structure(mydata, optflags, atoms);
00911   }
00912 
00913   return MOLFILE_ERROR;
00914 }
00915 
00916 
00917 static int read_amber_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
00918   size_t start[3], count[3];
00919   cdfdata *cdf = (cdfdata *)mydata;
00920   amberdata *amber = &cdf->amber;
00921   int rc;
00922 
00923   /* Read in the atom coordinates and unit cell information */
00924   /* only save coords if we're given a valid ts pointer     */ 
00925   /* otherwise VMD wants us to skip it.                     */
00926   if (ts != NULL) {
00927     if (amber->is_restart) {
00928       // restart files only contain one frame, so return if we get called again
00929       if (cdf->curframe > 0)
00930         return MOLFILE_ERROR;
00931 
00932       start[0] = 0;             /* atom */
00933       start[1] = 0;             /* spatial */
00934       start[2] = 0;
00935 
00936       count[0] = amber->atomdim;
00937       count[1] = amber->spatialdim;
00938       count[2] = 0;
00939 
00940       rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id, 
00941                              start, count, ts->coords);
00942       if (rc != NC_NOERR) { 
00943         printf("netcdfplugin) AMBER: failed to parse restart file coordinates!\n");
00944       }
00945     } else {
00946       start[0] = cdf->curframe; /* frame */
00947       start[1] = 0;             /* atom */
00948       start[2] = 0;             /* spatial */
00949 
00950       count[0] = 1;
00951       count[1] = amber->atomdim;
00952       count[2] = amber->spatialdim;
00953 
00954       /* parse trajectory timestep */
00955       rc = nc_get_vara_float(cdf->ncid, amber->coordinates_id, 
00956                              start, count, ts->coords);
00957     }
00958 
00959     if (rc != NC_NOERR) 
00960       return MOLFILE_ERROR;
00961 
00962     /* apply coordinate scaling factor if not 1.0 */
00963     if (amber->coordinates_scalefactor != 1.0) {
00964       int i;
00965       float s = amber->coordinates_scalefactor;
00966       for (i=0; i<natoms*3; i++) {
00967         ts->coords[i] *= s;
00968       }
00969     }
00970 
00971     /* Read the PBC box info. */
00972     if (amber->has_box) {
00973       size_t start[3], count[3];
00974       double lengths[3];
00975       double angles[3];
00976 
00977       if (amber->is_restart) {
00978         start[0] = 0;             /* spatial */
00979         start[1] = 0;
00980         start[2] = 0;
00981         count[0] = amber->spatialdim;
00982         count[1] = 0;
00983         count[2] = 0;
00984       } else {
00985         start[0] = cdf->curframe; /* frame */
00986         start[1] = 0;             /* spatial */
00987         start[2] = 0;
00988         count[0] = 1;
00989         count[1] = amber->spatialdim;
00990         count[2] = 0;
00991       }
00992 
00993       rc = nc_get_vara_double(cdf->ncid, amber->cell_lengths_id, 
00994                               start, count, lengths);
00995       if (rc != NC_NOERR) 
00996         return MOLFILE_ERROR;
00997 
00998       rc = nc_get_vara_double(cdf->ncid, amber->cell_angles_id, 
00999                               start, count, angles);
01000       if (rc != NC_NOERR) 
01001         return MOLFILE_ERROR;
01002 
01003       ts->A = lengths[0] * amber->cell_lengths_scalefactor;
01004       ts->B = lengths[1] * amber->cell_lengths_scalefactor;
01005       ts->C = lengths[2] * amber->cell_lengths_scalefactor;
01006 
01007       ts->alpha = angles[0] * amber->cell_angles_scalefactor;
01008       ts->beta  = angles[1] * amber->cell_angles_scalefactor;
01009       ts->gamma = angles[2] * amber->cell_angles_scalefactor;
01010     }
01011   }
01012 
01013   cdf->curframe++;
01014   return MOLFILE_SUCCESS;
01015 }
01016 
01017 
01018 static int read_mmtk_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
01019   cdfdata *cdf = (cdfdata *)mydata;
01020   mmtkdata *mmtk = &cdf->mmtk;
01021   int rc;
01022 
01023   /* Read in the atom coordinates and unit cell information */
01024   /* only save coords if we're given a valid ts pointer     */ 
01025   /* otherwise VMD wants us to skip it.                     */
01026   if (ts != NULL) {
01027     size_t start[4], count[4];
01028     int i;
01029 
01030     if (mmtk->minor_step_numberdim == 0) {
01031       start[0] = cdf->curframe; /* step */
01032       start[1] = 0;             /* atom */
01033       start[2] = 0;             /* spatial */
01034       start[3] = 0;             /* minor step */
01035     }
01036     else {
01037       start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
01038       start[1] = 0;             /* atom */
01039       start[2] = 0;             /* spatial */
01040       start[3] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
01041     }
01042 
01043     count[0] = 1;
01044     count[1] = mmtk->atom_numberdim;
01045     count[2] = mmtk->xyzdim;
01046     count[3] = 1;             /* only want one minor step, regardless */
01047 
01048     rc = nc_get_vara_float(cdf->ncid, mmtk->configuration_id, 
01049                            start, count, ts->coords);
01050     if (rc != NC_NOERR) 
01051       return MOLFILE_ERROR;
01052 
01053     /* check for allocated but not yet used frame */
01054     if (ts->coords[0] == NC_FILL_FLOAT)
01055       return MOLFILE_ERROR;
01056 
01057     /* scale coordinates from nanometers to angstroms */
01058     for (i=0; i<(3 * mmtk->atom_numberdim); i++) {
01059       ts->coords[i] *= 10.0f;
01060     }
01061 
01062     /* Read the PBC box info. */
01063     if (mmtk->has_box) {
01064       float lengths[3];
01065 
01066       if (mmtk->minor_step_numberdim == 0) {
01067         start[0] = cdf->curframe; /* step */
01068         start[1] = 0;             /* box_size */
01069         start[2] = 0;             /* minor step */
01070       }
01071       else {
01072         start[0] = cdf->curframe/mmtk->minor_step_numberdim;   /* step */
01073         start[1] = 0;             /* box_size */
01074         start[2] = cdf->curframe % mmtk->minor_step_numberdim; /* minor step */
01075       }
01076 
01077       count[0] = 1;
01078       count[1] = 3;
01079       count[2] = 1;
01080 
01081       rc = nc_get_vara_float(cdf->ncid, mmtk->box_size_id,
01082                              start, count, lengths);
01083       if (rc != NC_NOERR) 
01084         return MOLFILE_ERROR;
01085 
01086       ts->A = 10.*lengths[0];
01087       ts->B = 10.*lengths[1];
01088       ts->C = 10.*lengths[2];
01089 
01090       ts->alpha = 90.;
01091       ts->beta  = 90.;
01092       ts->gamma = 90.;
01093     }
01094   }
01095 
01096   cdf->curframe++;
01097   return MOLFILE_SUCCESS;
01098 }
01099 
01100 
01101 
01102 static int read_cdf_timestep(void *mydata, int natoms, molfile_timestep_t *ts) {
01103   cdfdata *cdf = (cdfdata *)mydata;
01104 
01105   switch (cdf->type) {
01106     case CDF_TYPE_AMBER: 
01107       return read_amber_cdf_timestep(mydata, natoms, ts); 
01108 
01109     case CDF_TYPE_MMTK:
01110       return read_mmtk_cdf_timestep(mydata, natoms, ts); 
01111   }
01112 
01113   return MOLFILE_ERROR;
01114 }
01115 
01116 
01117 static molfile_plugin_t plugin;
01118 
01119 VMDPLUGIN_API int VMDPLUGIN_init(void) {
01120   memset(&plugin, 0, sizeof(molfile_plugin_t));
01121   plugin.abiversion = vmdplugin_ABIVERSION;
01122   plugin.type = MOLFILE_PLUGIN_TYPE;
01123   plugin.name = "netcdf";
01124   plugin.prettyname = "NetCDF (AMBER, MMTK)";
01125   plugin.author = "Konrad Hinsen, John Stone";
01126   plugin.majorv = 1;
01127   plugin.minorv = 2;
01128   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
01129   plugin.filename_extension = "nc,ncrst";
01130   plugin.open_file_read = open_cdf_read;
01131   plugin.read_structure = read_cdf_structure;
01132   plugin.read_next_timestep = read_cdf_timestep;
01133   plugin.close_file_read = close_cdf_read;
01134   return VMDPLUGIN_SUCCESS; 
01135 }
01136 
01137 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
01138   (*cb)(v, (vmdplugin_t *)&plugin);
01139   return VMDPLUGIN_SUCCESS;
01140 }
01141 
01142 VMDPLUGIN_API int VMDPLUGIN_fini(void) { return VMDPLUGIN_SUCCESS; }
01143 

Generated on Fri Jan 24 02:56:24 2020 for VMD Plugins (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002