/*************************************************************************** *cr *cr (C) Copyright 1995-2005 The Board of Trustees of the *cr University of Illinois *cr All Rights Reserved *cr ***************************************************************************/ /*************************************************************************** * RCS INFORMATION: * * $RCSfile: dcdplugin.c,v $ * $Author: johns $ $Locker: $ $State: Exp $ * $Revision: 1.53 $ $Date: 2005/06/09 20:55:09 $ * *************************************************************************** * DESCRIPTION: * Code for reading and writing Charmm, NAMD, and X-PLOR format * molecular dynamic trajectory files. * * TODO: * Integrate improvements from the NAMD source tree * - NAMD's writer code has better type-correctness for the sizes * of "int". NAMD uses "int32" explicitly, which is required on * machines like the T3E. VMD's version of the code doesn't do that * presently. * * Try various alternative I/O API options: * - use mmap(), with read-once flags * - use O_DIRECT open mode on new revs of Linux kernel * - use directio() call on a file descriptor to enable on Solaris * - use aio_open()/read()/write() * - use readv/writev() etc. * * Standalone test binary compilation flags: * cc -fast -xarch=v9a -I../../include -DTEST_DCDPLUGIN dcdplugin.c \ * -o ~/bin/readdcd -lm * * Profiling flags: * cc -xpg -fast -xarch=v9a -g -I../../include -DTEST_DCDPLUGIN dcdplugin.c \ * -o ~/bin/readdcd -lm * ***************************************************************************/ #if defined(_AIX) /* Define to enable large file extensions on AIX */ #define _LARGE_FILE #else /* Defines which enable LFS I/O interfaces for large (>2GB) file support * on 32-bit machines. These must be defined before inclusion of any * system headers. */ #define _LARGEFILE_SOURCE #define _FILE_OFFSET_BITS 64 #endif #include #include #include #include #include #include #include #include "endianswap.h" #include "fastio.h" #include "molfile_plugin.h" #ifndef M_PI_2 #define M_PI_2 1.57079632679489661922 #endif typedef struct { fio_fd fd; int natoms; int nsets; int setsread; int istart; int nsavc; double delta; int nfixed; float *x, *y, *z; int *freeind; float *fixedcoords; int reverse; int charmm; int first; int with_unitcell; } dcdhandle; /* Define error codes that may be returned by the DCD routines */ #define DCD_SUCCESS 0 /* No problems */ #define DCD_EOF -1 /* Normal EOF */ #define DCD_DNE -2 /* DCD file does not exist */ #define DCD_OPENFAILED -3 /* Open of DCD file failed */ #define DCD_BADREAD -4 /* read call on DCD file failed */ #define DCD_BADEOF -5 /* premature EOF found in DCD file */ #define DCD_BADFORMAT -6 /* format of DCD file is wrong */ #define DCD_FILEEXISTS -7 /* output file already exists */ #define DCD_BADMALLOC -8 /* malloc failed */ #define DCD_BADWRITE -9 /* write call on DCD file failed */ /* Define feature flags for this DCD file */ #define DCD_IS_CHARMM 0x01 #define DCD_HAS_4DIMS 0x02 #define DCD_HAS_EXTRA_BLOCK 0x04 /* defines used by write_dcdstep */ #define NFILE_POS 8L #define NSTEP_POS 20L /* READ Macro to make porting easier */ #define READ(fd, buf, size) fio_fread(((void *) buf), (size), 1, (fd)) /* WRITE Macro to make porting easier */ #define WRITE(fd, buf, size) fio_fwrite(((void *) buf), (size), 1, (fd)) /* XXX This is broken - fread never returns -1 */ #define CHECK_FREAD(X, msg) if (X==-1) { return(DCD_BADREAD); } #define CHECK_FEOF(X, msg) if (X==0) { return(DCD_BADEOF); } /* * Read the header information from a dcd file. * Input: fd - a file struct opened for binary reading. * Output: 0 on success, negative error code on failure. * Side effects: *natoms set to number of atoms per frame * *nsets set to number of frames in dcd file * *istart set to starting timestep of dcd file * *nsavc set to timesteps between dcd saves * *delta set to value of trajectory timestep * *nfixed set to number of fixed atoms * *freeind may be set to heap-allocated space * *reverse set to one if reverse-endian, zero if not. * *charmm set to internal code for handling charmm data. */ static int read_dcdheader(fio_fd fd, int *N, int *NSET, int *ISTART, int *NSAVC, double *DELTA, int *NAMNF, int **FREEINDEXES, float **fixedcoords, int *reverseEndian, int *charmm) { int input_integer; /* buffer space */ int i, ret_val; char hdrbuf[84]; /* char buffer used to store header */ int NTITLE; /* First thing in the file should be an 84 */ ret_val = READ(fd, &input_integer, sizeof(int)); CHECK_FREAD(ret_val, "reading first int from dcd file"); CHECK_FEOF(ret_val, "reading first int from dcd file"); /* Check magic number in file header and determine byte order*/ if (input_integer != 84) { /* check to see if its merely reversed endianism */ /* rather than a totally incorrect file magic number */ swap4_aligned(&input_integer, 1); if (input_integer == 84) { *reverseEndian=1; } else { /* not simply reversed endianism, but something rather more evil */ return DCD_BADFORMAT; } } else { *reverseEndian=0; } /* Buffer the entire header for random access */ ret_val = READ(fd, hdrbuf, 84); CHECK_FREAD(ret_val, "buffering header"); CHECK_FEOF(ret_val, "buffering header"); /* Check for the ID string "COORD" */ if (hdrbuf[0] != 'C' || hdrbuf[1] != 'O' || hdrbuf[2] != 'R' || hdrbuf[3] != 'D') { return DCD_BADFORMAT; } /* CHARMm-genereate DCD files set the last integer in the */ /* header, which is unused by X-PLOR, to its version number. */ /* Checking if this is nonzero tells us this is a CHARMm file */ /* and to look for other CHARMm flags. */ if (*((int *) (hdrbuf + 80)) != 0) { (*charmm) = DCD_IS_CHARMM; if (*((int *) (hdrbuf + 44)) != 0) (*charmm) |= DCD_HAS_EXTRA_BLOCK; if (*((int *) (hdrbuf + 48)) == 1) (*charmm) |= DCD_HAS_4DIMS; } else { (*charmm) = 0; } /* Store the number of sets of coordinates (NSET) */ (*NSET) = *((int *) (hdrbuf + 4)); if (*reverseEndian) swap4_unaligned(NSET, 1); /* Store ISTART, the starting timestep */ (*ISTART) = *((int *) (hdrbuf + 8)); if (*reverseEndian) swap4_unaligned(ISTART, 1); /* Store NSAVC, the number of timesteps between dcd saves */ (*NSAVC) = *((int *) (hdrbuf + 12)); if (*reverseEndian) swap4_unaligned(NSAVC, 1); /* Store NAMNF, the number of fixed atoms */ (*NAMNF) = *((int *) (hdrbuf + 36)); if (*reverseEndian) swap4_unaligned(NAMNF, 1); /* Read in the timestep, DELTA */ /* Note: DELTA is stored as a double with X-PLOR but as a float with CHARMm */ if ((*charmm) & DCD_IS_CHARMM) { float ftmp; ftmp = *((float *)(hdrbuf+40)); /* is this safe on Alpha? */ if (*reverseEndian) swap4_aligned(&ftmp, 1); *DELTA = (double)ftmp; } else { (*DELTA) = *((double *)(hdrbuf + 40)); if (*reverseEndian) swap8_unaligned(DELTA, 1); } /* Get the end size of the first block */ ret_val = READ(fd, &input_integer, sizeof(int)); CHECK_FREAD(ret_val, "reading second 84 from dcd file"); CHECK_FEOF(ret_val, "reading second 84 from dcd file"); if (*reverseEndian) swap4_aligned(&input_integer, 1); if (input_integer != 84) { return DCD_BADFORMAT; } /* Read in the size of the next block */ ret_val = READ(fd, &input_integer, sizeof(int)); CHECK_FREAD(ret_val, "reading size of title block"); CHECK_FEOF(ret_val, "reading size of title block"); if (*reverseEndian) swap4_aligned(&input_integer, 1); if (((input_integer-4) % 80) == 0) { /* Read NTITLE, the number of 80 character title strings there are */ ret_val = READ(fd, &NTITLE, sizeof(int)); CHECK_FREAD(ret_val, "reading NTITLE"); CHECK_FEOF(ret_val, "reading NTITLE"); if (*reverseEndian) swap4_aligned(&NTITLE, 1); for (i=0; ifd = fd; if ((rc = read_dcdheader(dcd->fd, &dcd->natoms, &dcd->nsets, &dcd->istart, &dcd->nsavc, &dcd->delta, &dcd->nfixed, &dcd->freeind, &dcd->fixedcoords, &dcd->reverse, &dcd->charmm))) { fprintf(stderr, "read_dcdheader returned %d\n", rc); fio_fclose(dcd->fd); free(dcd); return NULL; } /* * Check that the file is big enough to really hold the number of sets * it claims to have. Then we'll use nsets to keep track of where EOF * should be. */ { off_t ndims, firstframesize, framesize, extrablocksize; off_t filesize; extrablocksize = dcd->charmm & DCD_HAS_EXTRA_BLOCK ? 48 + 8 : 0; ndims = dcd->charmm & DCD_HAS_4DIMS ? 4 : 3; firstframesize = (dcd->natoms+2) * ndims * sizeof(float) + extrablocksize; framesize = (dcd->natoms-dcd->nfixed+2) * ndims * sizeof(float) + extrablocksize; /* * It's safe to use ftell, even though ftell returns a long, because the * header size is < 4GB. */ filesize = stbuf.st_size - fio_ftell(dcd->fd) - firstframesize; if (filesize < 0) { fprintf(stderr, "DCD file '%s' appears to contain no timesteps.\n", path); fio_fclose(dcd->fd); free(dcd); return NULL; } dcd->nsets = filesize / framesize + 1; dcd->setsread = 0; } dcd->first = 1; dcd->x = (float *)malloc(dcd->natoms * sizeof(float)); dcd->y = (float *)malloc(dcd->natoms * sizeof(float)); dcd->z = (float *)malloc(dcd->natoms * sizeof(float)); if (!dcd->x || !dcd->y || !dcd->z) { fprintf(stderr, "Unable to allocate space for %d atoms.\n", dcd->natoms); free(dcd->x); free(dcd->y); free(dcd->z); fio_fclose(dcd->fd); free(dcd); return NULL; } *natoms = dcd->natoms; return dcd; } static int read_next_timestep(void *v, int natoms, molfile_timestep_t *ts) { dcdhandle *dcd; int i, j, rc; float unitcell[6]; unitcell[0] = unitcell[2] = unitcell[5] = 1.0f; unitcell[1] = unitcell[3] = unitcell[4] = 90.0f; dcd = (dcdhandle *)v; /* Check for EOF here; that way all EOF's encountered later must be errors */ if (dcd->setsread == dcd->nsets) return MOLFILE_EOF; dcd->setsread++; if (!ts) { if (dcd->first && dcd->nfixed) { /* We can't just skip it because we need the fixed atom coordinates */ rc = read_dcdstep(dcd->fd, dcd->natoms, dcd->x, dcd->y, dcd->z, unitcell, dcd->nfixed, dcd->first, dcd->freeind, dcd->fixedcoords, dcd->reverse, dcd->charmm); dcd->first = 0; return rc; /* XXX this needs to be updated */ } dcd->first = 0; /* XXX this needs to be changed */ return skip_dcdstep(dcd->fd, dcd->natoms, dcd->nfixed, dcd->charmm); } rc = read_dcdstep(dcd->fd, dcd->natoms, dcd->x, dcd->y, dcd->z, unitcell, dcd->nfixed, dcd->first, dcd->freeind, dcd->fixedcoords, dcd->reverse, dcd->charmm); dcd->first = 0; if (rc < 0) { fprintf(stderr, "read_dcdstep returned %d\n", rc); return MOLFILE_ERROR; } /* copy timestep data from plugin-local buffers to VMD's buffer */ /* XXX * This code is still the root of all evil. Just doing this extra copy * cuts the I/O rate of the DCD reader from 728 MB/sec down to * 394 MB/sec when reading from a ram filesystem. * For a physical disk filesystem, the I/O rate goes from * 187 MB/sec down to 122 MB/sec. Clearly this extra copy has to go. */ { int natoms = dcd->natoms; float *nts = ts->coords; const float *bufx = dcd->x; const float *bufy = dcd->y; const float *bufz = dcd->z; for (i=0, j=0; iA = unitcell[0]; ts->B = unitcell[2]; ts->C = unitcell[5]; if (unitcell[1] >= -1.0 && unitcell[1] <= 1.0 && unitcell[3] >= -1.0 && unitcell[3] <= 1.0 && unitcell[4] >= -1.0 && unitcell[4] <= 1.0) { /* This file was generated by Charmm, or by NAMD > 2.5, with the angle */ /* cosines of the periodic cell angles written to the DCD file. */ /* This formulation improves rounding behavior for orthogonal cells */ /* so that the angles end up at precisely 90 degrees, unlike acos(). */ ts->alpha = 90.0 - asin(unitcell[1]) * 90.0 / M_PI_2; ts->beta = 90.0 - asin(unitcell[3]) * 90.0 / M_PI_2; ts->gamma = 90.0 - asin(unitcell[4]) * 90.0 / M_PI_2; } else { /* This file was likely generated by NAMD 2.5 and the periodic cell */ /* angles are specified in degrees rather than angle cosines. */ ts->alpha = unitcell[1]; ts->beta = unitcell[3]; ts->gamma = unitcell[4]; } return MOLFILE_SUCCESS; } static void close_file_read(void *v) { dcdhandle *dcd = (dcdhandle *)v; close_dcd_read(dcd->freeind, dcd->fixedcoords); fio_fclose(dcd->fd); free(dcd->x); free(dcd->y); free(dcd->z); free(dcd); } static void *open_dcd_write(const char *path, const char *filetype, int natoms) { dcdhandle *dcd; fio_fd fd; int rc; int istart, nsavc; double delta; int with_unitcell; int charmm; if (fio_open(path, FIO_WRITE, &fd) < 0) { fprintf(stderr, "Could not open file %s for writing\n", path); return NULL; } dcd = (dcdhandle *)malloc(sizeof(dcdhandle)); memset(dcd, 0, sizeof(dcdhandle)); dcd->fd = fd; istart = 0; /* starting timestep of DCD file */ nsavc = 1; /* number of timesteps between written DCD frames */ delta = 1.0; /* length of a timestep */ with_unitcell = 1; /* contains unit cell information (charmm format) */ charmm = DCD_IS_CHARMM; /* charmm-formatted DCD file */ if (with_unitcell) charmm |= DCD_HAS_EXTRA_BLOCK; rc = write_dcdheader(dcd->fd, "Created by DCD plugin", natoms, istart, nsavc, delta, with_unitcell, charmm); if (rc < 0) { fprintf(stderr, "write_dcdheader returned %d\n", rc); fio_fclose(dcd->fd); free(dcd); return NULL; } dcd->natoms = natoms; dcd->nsets = 0; dcd->istart = istart; dcd->nsavc = nsavc; dcd->with_unitcell = with_unitcell; dcd->charmm = charmm; dcd->x = (float *)malloc(natoms * sizeof(float)); dcd->y = (float *)malloc(natoms * sizeof(float)); dcd->z = (float *)malloc(natoms * sizeof(float)); return dcd; } static int write_timestep(void *v, const molfile_timestep_t *ts) { dcdhandle *dcd = (dcdhandle *)v; int i, rc, curstep; float *pos = ts->coords; double unitcell[6]; unitcell[0] = unitcell[2] = unitcell[5] = 1.0f; unitcell[1] = unitcell[3] = unitcell[4] = 90.0f; /* copy atom coords into separate X/Y/Z arrays for writing */ for (i=0; inatoms; i++) { dcd->x[i] = *(pos++); dcd->y[i] = *(pos++); dcd->z[i] = *(pos++); } dcd->nsets++; curstep = dcd->istart + dcd->nsets * dcd->nsavc; unitcell[0] = ts->A; unitcell[2] = ts->B; unitcell[5] = ts->C; unitcell[1] = sin((M_PI_2 / 90.0) * (90.0 - ts->alpha)); unitcell[3] = sin((M_PI_2 / 90.0) * (90.0 - ts->beta)); unitcell[4] = sin((M_PI_2 / 90.0) * (90.0 - ts->gamma)); rc = write_dcdstep(dcd->fd, dcd->nsets, curstep, dcd->natoms, dcd->x, dcd->y, dcd->z, dcd->with_unitcell ? unitcell : NULL, dcd->charmm); if (rc < 0) { fprintf(stderr, "write_dcdstep returned %d\n", rc); return MOLFILE_ERROR; } return MOLFILE_SUCCESS; } static void close_file_write(void *v) { dcdhandle *dcd = (dcdhandle *)v; fio_fclose(dcd->fd); free(dcd->x); free(dcd->y); free(dcd->z); free(dcd); } /* * Initialization stuff here */ static molfile_plugin_t dcdplugin = { vmdplugin_ABIVERSION, /* ABI version */ MOLFILE_PLUGIN_TYPE, /* type */ "dcd", /* name */ "Justin Gullingsrud, John Stone", /* author */ 1, /* major version */ 0, /* minor version */ VMDPLUGIN_THREADSAFE, /* is reentrant */ "dcd", /* filename extension */ open_dcd_read, 0, 0, read_next_timestep, close_file_read, open_dcd_write, 0, write_timestep, close_file_write }; int VMDPLUGIN_init() { return VMDPLUGIN_SUCCESS; } int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) { (*cb)(v, (vmdplugin_t *)&dcdplugin); return VMDPLUGIN_SUCCESS; } int VMDPLUGIN_fini() { return VMDPLUGIN_SUCCESS; } #ifdef TEST_DCDPLUGIN #include /* get the time of day from the system clock, and store it (in seconds) */ double time_of_day(void) { #if defined(_MSC_VER) double t; t = GetTickCount(); t = t / 1000.0; return t; #else struct timeval tm; struct timezone tz; gettimeofday(&tm, &tz); return((double)(tm.tv_sec) + (double)(tm.tv_usec)/1000000.0); #endif } int main(int argc, char *argv[]) { molfile_timestep_t timestep; void *v; dcdhandle *dcd; int i, natoms; float sizeMB =0.0, totalMB = 0.0; double starttime, endtime, totaltime = 0.0; while (--argc) { ++argv; natoms = 0; v = open_dcd_read(*argv, "dcd", &natoms); if (!v) { fprintf(stderr, "open_dcd_read failed for file %s\n", *argv); return 1; } dcd = (dcdhandle *)v; sizeMB = ((natoms * 3.0) * dcd->nsets * 4.0) / (1024.0 * 1024.0); totalMB += sizeMB; printf("file: %s\n", *argv); printf(" %d atoms, %d frames, size: %6.1fMB\n", natoms, dcd->nsets, sizeMB); starttime = time_of_day(); timestep.coords = (float *)malloc(3*sizeof(float)*natoms); for (i=0; insets; i++) { int rc = read_next_timestep(v, natoms, ×tep); if (rc) { fprintf(stderr, "error in read_next_timestep on frame %d\n", i); return 1; } } endtime = time_of_day(); close_file_read(v); totaltime += endtime - starttime; printf(" Time: %5.1f seconds\n", endtime - starttime); printf(" Speed: %5.1f MB/sec, %5.1f timesteps/sec\n", sizeMB / (endtime - starttime), (dcd->nsets / (endtime - starttime))); } printf("Overall Size: %6.1f MB\n", totalMB); printf("Overall Time: %6.1f seconds\n", totaltime); printf("Overall Speed: %5.1f MB/sec\n", totalMB / totaltime); return 0; } #endif