/* * convert a series of gromos trajectory files into a dcd file. * * Copyright (c) 2002 by axel.kohlmeyer@ruhr-uni-bochum.de */ #include /* for getopt(3) */ #include /* for malloc(3), atoi(3) */ #include /* for buffered i/o */ #include /* for strrchr(3), strcmp(3) */ static const char *myname; /* some compile time constants */ #define BUFSZ 255 void trimstring(char *str, int len) { char *end; end = str + (len-1); while (*end == ' ' || *end == '\n') --end; ++end; *end = '\0'; } /* read title from gromos trajectory */ const char *read_gromos_title(FILE *in) { int intitle=0; char buf[BUFSZ]; char *tptr=NULL; while (fgets(buf, BUFSZ, in)) { if (buf[0] == '#') continue; trimstring(buf, strlen(buf)); if (0 == strcmp(buf, "TITLE") ) { intitle = 1; tptr = (char *)malloc(161); *tptr = '\0'; continue; } if (intitle > 0) { if (0 == strcmp(buf, "END") ) { return tptr; } else { strncat(tptr, buf, 160-strlen(tptr)); } } else { rewind(in); return tptr; } } return tptr; } const int gromos2dcd(FILE *in, FILE *out, const int natoms, const float scale) { int nstep=0, count=0, inpos=0; float x, y, z, bx, by, bz; static float *xp=NULL, *yp=NULL, *zp=NULL; char buf[BUFSZ]; /* allocate memory when called for the first time */ if (xp == NULL) { xp = (float *) malloc(natoms * sizeof(float)); yp = (float *) malloc(natoms * sizeof(float)); zp = (float *) malloc(natoms * sizeof(float)); } if (zp == NULL) { perror("could not allocate memory for coordinates"); return 0; } while (fgets(buf, BUFSZ, in)) { if (buf[0] == '#') continue; trimstring(buf, strlen(buf)); if (inpos != 0) { if (0 == strcmp(buf, "END")) { inpos = 0; if (count != natoms) { fprintf(stderr, "number of coordinates " "does not match the reference " "file. skipping rest of the file.\n"); return nstep; } count = 0; } else { sscanf(buf," %f %f %f", &x, &y, &z); xp[count] = x; yp[count] = y; zp[count] = z; ++count; } continue; } if (0 == strcmp(buf, "POSITIONRED")) { inpos = 1; count = 0; continue; } if (0 == strcmp(buf, "BOX")) { int dummy, i; /* read next line with the box dimensions */ fgets(buf, BUFSZ, in); sscanf(buf, " %f %f %f", &bx, &by, &bz); /* skip over 'END' marker */ fgets(buf, BUFSZ, in); /* convert coordinates to angstrom */ for (i = 0; i < natoms; ++i) { #if 0 xp[i] *= bx * scale; yp[i] *= by * scale; zp[i] *= bz * scale; #else xp[i] *= scale; yp[i] *= scale; zp[i] *= scale; #endif } /* write current frame to dcd */ dummy = 4 * natoms; fwrite((char *) &dummy, 4, 1, out); fwrite((char *) xp, dummy, 1, out); fwrite((char *) &dummy, 4, 1, out); fwrite((char *) &dummy, 4, 1, out); fwrite((char *) yp, dummy, 1, out); fwrite((char *) &dummy, 4, 1, out); fwrite((char *) &dummy, 4, 1, out); fwrite((char *) zp, dummy, 1, out); fwrite((char *) &dummy, 4, 1, out); ++ nstep; continue; } } fprintf(stdout, "added %d steps to dcd file.\n", nstep); return nstep; } /* read in reference coordinates and create a pdb file from that */ int reference2pdb(const char *refname, const char *basename, const float scale) { int atoms=0; int intitle=0, inpos=0; char *pdbname, buf[BUFSZ]; FILE *in, *out; in = fopen(refname, "r"); if (in == NULL) { perror("could not open g96 reference file for reading"); return 0; } fprintf(stdout, "reading molecule description from: %s\n", refname); pdbname = (char *) malloc(strlen(basename) + 4); if (pdbname == NULL) { perror("could not allocate memory for pdb file string"); return 0; } strcpy(pdbname, basename); strcat(pdbname, ".pdb"); out = fopen(pdbname, "w"); if (out == NULL) { perror("could not open pdb file for writing"); return 0; } fprintf(stdout, "writing pdb data to file: %s\n", pdbname); fprintf(out, "HEADER PROTEIN\n"); fprintf(out, "COMPND %s\n", refname); fprintf(out, "AUTHOR generated by %s\n", myname); while (fgets(buf, BUFSZ, in)) { if (buf[0] == '#') continue; trimstring(buf, strlen(buf)); if (intitle > 0) { if (0 == strcmp(buf, "END") ) { intitle = 0; } else { /* copy title section to pdb (with continuations) */ if (intitle > 1) { fprintf(out, "TITLE %-2d %s\n", intitle, buf); } else { fprintf(out, "TITLE %s\n", buf); } ++intitle; } } if (inpos > 0) { if (0 == strcmp(buf, "END") ) { fprintf(out, "TER\n"); inpos = 0; } else { int resnr, atomnr; char resname[10], atomlabel[10]; float x, y, z; static int oldresnr=0; sscanf(buf, "%d %s %s %d %f %f %f", &resnr, resname, atomlabel, &atomnr, &x, &y, &z); ++ atoms; /* convert to angstroms (or whatever) */ x *= scale; y *= scale; z *= scale; /* handle known non-protein residues differently */ if ( (0 == strcmp(resname, "ZUND")) || (0 == strcmp(resname, "H2O")) || (0 == strcmp(resname, "POPC")) || (0 == strcmp(resname, "SOLV"))) { if (oldresnr != resnr) { fprintf(out, "TER\n"); } resname[3] = '\0'; atomlabel[4] = '\0'; fprintf(out, "HETATM%5d %-4s %-3s %4d " "%8.3f%8.3f%8.3f%6.2f%6.2f\n", atomnr, atomlabel, resname, resnr, x, y, z, 0.0, 0.0); } else { resname[3] = '\0'; atomlabel[4] = '\0'; fprintf(out, "ATOM %5d %-4s %-3s %4d " "%8.3f%8.3f%8.3f%6.2f%6.2f\n", atomnr, atomlabel, resname, resnr, x, y, z, 0.0, 0.0); } oldresnr = resnr; } } if (0 == strcmp(buf, "TITLE") ) { intitle = 1; } if (0 == strcmp(buf, "POSITION") ) { inpos = 1; } } fclose(in); fclose(out); fprintf(stdout, "writing pdb done.\n"); return atoms; } /* write an dummy dcd header */ FILE *write_dcd_header(const char *basename) { int i, j; char c, *dcdname; double d = 1; FILE *out; /* sanity checks */ if (sizeof(int) != 4) { fprintf(stderr, "the size of an int is not 4 bytes " "on this platform.\n" "the dcd file is probably unreadable.\n"); return NULL; } if (sizeof(double) != 8) { fprintf(stderr, "the size of an double is not 8 bytes " "on this platform.\n" "the dcd file is probably unreadable.\n"); return NULL; } dcdname = (char *) malloc(strlen(basename) + 4); if (dcdname == NULL) { perror("could not allocate memory for dcd file string"); return NULL; } strcpy(dcdname, basename); strcat(dcdname, ".dcd"); out = fopen(dcdname, "w"); if (out == NULL) { perror("could not open dcd file for writing"); return 0; } fprintf(stdout, "writing dcd data to file: %s\n", dcdname); rewind(out); i = 84; fwrite((char *) &i, 4, 1, out); fwrite("CORD", 4, 1, out); i = 0; fwrite((char *) &i, 4, 1, out); fwrite((char *) &i, 4, 1, out); fwrite((char *) &i, 4, 1, out); for (j = 0; j < 6; ++j) fwrite((char *) &i, 4, 1, out); fwrite((char *) &d, 8, 1, out); for (j = 0; j < 9; ++j) fwrite((char *) &i, 4, 1, out); i = 84; fwrite((char *) &i, 4, 1, out); i = 164; fwrite((char *) &i, 4, 1, out); i = 2; fwrite((char *) &i, 4, 1, out); c = ' '; for (j = 0; j < 160; ++j) fwrite(&c, 1, 1, out); i = 164; fwrite((char *) &i, 4, 1, out); i = 4; fwrite((char *) &i, 4, 1, out); i = 0; fwrite((char *) &i, 4, 1, out); i = 4; fwrite((char *) &i, 4, 1, out); fflush(out); return out; } /* write missing information to dcd header */ void update_dcd_header(FILE *out, const char *title, const int natoms, const int nset, const int istart, const int ndelta, const double delta) { fseek(out, 8, SEEK_SET); fwrite((const char *) &nset, 4, 1, out); fwrite((const char *) &istart, 4, 1, out); fwrite((const char *) &ndelta, 4, 1, out); fseek(out, 24, SEEK_CUR); fwrite((const char *) &delta, 8, 1, out); fseek(out, 100, SEEK_SET); fwrite(title, 160, 1, out); fseek(out, 8, SEEK_CUR); fwrite((const char *) &natoms, 4, 1, out); return; } /* strip filename from the full path (similar to basename(1)) * we accept '/' or '\' as a path separator. a bit ugly, but anybody * who uses '/' or '\' in a regular filename deserves to be shot. */ const char *my_basename(const char *path) { const char *ptr; ptr = strrchr(path, '/'); if (! ptr) { /* no '/' path separator */ ptr = strrchr(path, '\\'); } if (! ptr) { /* no '\' path separator */ ptr = path; } else { ++ ptr; /* skip over separator */ } return ptr; } /* print a short help message to stderr */ void print_help(void) { fprintf(stderr, "\nusage: %s [options] [--] " " [ ...]\n\n" "Options:\n" " -o : write dcd/pdb to given files (.dcd/.pdb will be appended).\n" " this option is REQUIRED.\n" " -r : give path to reference .g96 file.\n" " this option is REQUIRED.\n" " -s : scale coordinates by (default is 10.0).\n" " -c : write only steps to dcdfile (default is all).\n" "\n" ,myname); } /* */ int main(int argc, char **argv) { char option; int i, optc=1, /* count processed argv options. */ maxstep=0, /* steps per dcd file limit. */ nstep=0, /* count of processed timesteps */ natoms=0, /* count of atoms in system */ ndelta=1; /* timesteps between frames */ double timestep=0.5; /* length of timestep (in fs??) */ float crdscale=10.0; const char *outbase=NULL; const char *refname=NULL; const char *titlestring = NULL; FILE *output=NULL; myname = my_basename(argv[0]); while (EOF != (option = getopt(argc, argv, "ho:r:s:c:"))) { switch(option) { case 'h': /* print usage message */ print_help(); return 1; break; case 'o': /* set output file basename */ outbase = optarg; optc += 2; break; case 'r': /* set reference coordinate file name */ refname = optarg; optc += 2; break; case 's': /* set scaling factor for coordinates */ crdscale = atof(optarg); if (crdscale <= 0.0) { fprintf(stderr, "invalid value to flag: " "'-%c %s'\n", option, optarg); print_help(); return 30; } optc += 2; break; case 'c': /* limit max number of configurations */ maxstep = atoi(optarg); if (maxstep <= 0) { fprintf(stderr, "invalid value to flag: " "'-%c %s'\n", option, optarg); print_help(); return 20; } optc += 2; break; case '?': /* fallthrough */ case ':': print_help(); return 2; break; default: /* ignore everything else */ break; } } /* the output name option is required (we need a searchable file) */ if (outbase == NULL) { fprintf(stderr, "\nFATAL ERROR: no output file basename given.\n"); print_help(); return 100; } if (refname == NULL) { fprintf(stderr, "\nFATAL ERROR: no reference coordinate file name given.\n"); print_help(); return 100; } /* trim command line to keep only the filenames */ argc -= optc; argv += optc; /* skip over '--', as it could have been used to stop * option processing */ if (0 == strcmp(argv[0], "--")) { --argc; ++argv; } /* be paranoid */ if (argc == 0) { fprintf(stderr, "no gromos files given to process.\n"); return 255; } /* first we read the reference coordinates and write the pdb file */ natoms = reference2pdb(refname, outbase, crdscale); if (natoms == 0) { fprintf(stderr, "could not read gromos reference file."); return 150; } /* write an (empty) dcd header */ output = write_dcd_header(outbase); if (output == NULL) { perror("could not open dcd file for writing"); return 200; } /* loop over gromos data files */ for (i=0; i < argc; ++i) { FILE *input; input = fopen(argv[i], "r"); if (input == NULL) { perror("could not open file for reading"); fprintf(stderr, "skipping over file '%s'\n", argv[i]); } fprintf(stdout, "adding contents of file '%s' to dcd.\n", argv[i]); if (!titlestring) titlestring = read_gromos_title(input); nstep += gromos2dcd(input, output, natoms, crdscale); if (input) fclose(input); } /* fill in the voids in the dcd header */ fprintf(stdout, "updating dcd file header.\n"); update_dcd_header(output, titlestring, natoms, nstep, 0, ndelta, timestep); fclose(output); fprintf(stdout, "writing dcd done.\n"); return 0; } /* * Local Variables: * mode: c * compile-command: "gcc -O -Wall -W -o gromos2dcd gromos2dcd.c" * c-basic-offset: 8 * End: */