/*
 * Copyright (C) 2004-2005 by David J. Hardy.  All rights reserved.
 *
 * pdbcoord.c - read and write PDB coordinate file
 */

#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "mdio/pdbcoord.h"
#include "debug/debug.h"

/* prototypes for internal functions */
static int pdb_read(mdio_Pdbcoord *p, int nexpect);
static int pdb_write(mdio_Pdbcoord *p);
static int decode_atom(MD_Dvec *, mdio_Pdbatom *, const char *, mdio_File *);
static int encode_atom(char *, const MD_Dvec *, const mdio_Pdbatom *,
    mdio_File *);


mdio_Pdbcoord *mdio_createPdbcoord(void)
{
  mdio_Pdbcoord *p;
  p = (mdio_Pdbcoord *) malloc(sizeof(mdio_Pdbcoord));
  if (p == NULL) {
    ERRMSG("out of memory");
    return NULL;
  }
  if (mdio_initializePdbcoord(p)) {
    free(p);
    return NULL;
  }
  return p;
}


int mdio_initializePdbcoord(mdio_Pdbcoord *p)
{
  ASSERT(p != NULL);
  memset(p, 0, sizeof(mdio_Pdbcoord));   /* zero memory */

  if (mdio_initializeFile(&(p->file))) {
    return MDIO_ERROR;
  }
  if (adt_initializeList(&(p->dvec), sizeof(MD_Dvec), 0, NULL)) {
    mdio_setErrorMessageFile(&(p->file), MDIO_ERROR_NOMEM,
        "cannot initialize \"dvec\" array");
    return MDIO_ERROR;
  }
  if (adt_initializeList(&(p->atom), sizeof(mdio_Pdbatom), 0, NULL)) {
    mdio_setErrorMessageFile(&(p->file), MDIO_ERROR_NOMEM,
        "cannot initialize \"atom\" array");
    return MDIO_ERROR;
  }
  return 0;
}


void mdio_destroyPdbcoord(mdio_Pdbcoord *p)
{
  ASSERT(p != NULL);
  mdio_cleanupPdbcoord(p);
  free(p);
}


void mdio_cleanupPdbcoord(mdio_Pdbcoord *p)
{
  ASSERT(p != NULL);
  adt_cleanupList(&(p->atom));
  adt_cleanupList(&(p->dvec));
  mdio_cleanupFile(&(p->file));
}


int mdio_readPdbcoord(mdio_Pdbcoord *p, const char *name, int n_expect)
{
  ASSERT(p != NULL);
  ASSERT(name != NULL);
  ASSERT(n_expect >= 0);

  /* open file */
  if (mdio_openFile(&(p->file), name, MDIO_FILE_TEXT | MDIO_FILE_READ)) {
    return MDIO_ERROR;
  }

  /* read file */
  if (pdb_read(p, n_expect)) {
    mdio_closeFile(&(p->file));
    return MDIO_ERROR;
  }

  /* close file */
  if (mdio_closeFile(&(p->file))) {
    return MDIO_ERROR;
  }
  return 0;
}


MD_Dvec *mdio_getPdbcoord(mdio_Pdbcoord *p, int *nelems)
{
  ASSERT(p != NULL);
  ASSERT(nelems != NULL);
  *nelems = adt_getLengthList(&(p->dvec));
  return (MD_Dvec *) adt_getDataList(&(p->dvec));
}


mdio_Pdbatom *mdio_getAtomPdbcoord(mdio_Pdbcoord *p, int *nelems)
{
  ASSERT(p != NULL);
  ASSERT(nelems != NULL);
  *nelems = adt_getLengthList(&(p->atom));
  return (mdio_Pdbatom *) adt_getDataList(&(p->atom));
}


int mdio_setPdbcoord(mdio_Pdbcoord *p, MD_Dvec *dvec, int nelems)
{
  ASSERT(p != NULL);

  /* trap for an easy bug */
  if (adt_getDataList(&(p->dvec)) == dvec) {
    BUG("cannot set coordinates using memory that Pdbcoord object owns");
  }

  /* destroy existing array and reconstruct with data provided */
  adt_cleanupList(&(p->dvec));
  if (adt_initializeList(&(p->dvec), sizeof(MD_Dvec), nelems, dvec)) {
    mdio_setErrorFile(&(p->file), MDIO_ERROR_NOMEM);
    return MDIO_ERROR;
  }
  return 0;
}


int mdio_setAtomPdbcoord(mdio_Pdbcoord *p, mdio_Pdbatom *atom, int nelems)
{
  ASSERT(p != NULL);

  /* trap for an easy bug */
  if (adt_getDataList(&(p->atom)) == atom) {
    BUG("cannot set coordinates using memory that Pdbcoord object owns");
  }

  /* destroy existing array and reconstruct with data provided */
  adt_cleanupList(&(p->atom));
  if (adt_initializeList(&(p->atom), sizeof(mdio_Pdbatom), nelems, atom)) {
    mdio_setErrorFile(&(p->file), MDIO_ERROR_NOMEM);
    return MDIO_ERROR;
  }
  return 0;
}


int mdio_writePdbcoord(mdio_Pdbcoord *p, const char *name)
{
  ASSERT(p != NULL);
  ASSERT(name != NULL);

#if 0
  /* check that arrays are same length */
  ndvec = adt_getLengthList(&(p->dvec));
  natom = adt_getLengthList(&(p->atom));
  if (natom != ndvec) {
    /* if atom array is length zero, then fill it up with zeros */
    if (natom == 0) {
      adt_cleanupList(&(p->atom));
      /* allocate memory on heap, freed automatically when we cleanup array */
      if (adt_initializeList(&(p->atom), sizeof(mdio_Pdbatom), ndvec, NULL)) {
        mdio_setErrorFile(&(p->file), MDIO_ERROR_NOMEM);
        return MDIO_ERROR;
      }
      /* fill it up with zeros */
/*
 * note:  initializing the dummy Pdbatom list with zeros won't work,
 * instead some fields need to be initialized to reasonable values
 */
      ASSERT(adt_getDataList(&(p->atom)) != NULL);
      memset(adt_getDataList(&(p->atom)), 0, ndvec * sizeof(mdio_Pdbatom));
    }
    else {
      mdio_setErrorMessageFile(&(p->file), MDIO_ERROR_WRITE,
          "\"atom\" and \"dvec\" arrays must have same length");
      return  MDIO_ERROR;
    }
  }
#endif

  /* open file */
  if (mdio_openFile(&(p->file), name, MDIO_FILE_TEXT | MDIO_FILE_WRITE)) {
    return MDIO_ERROR;
  }

  /* write file */
  if (pdb_write(p)) {
    mdio_closeFile(&(p->file));
    return MDIO_ERROR;
  }

  /* close file */
  if (mdio_closeFile(&(p->file))) {
    return MDIO_ERROR;
  }
  return 0;
}


/******************************************************************************
 *
 * internal routines to perform actual file reading and writing
 *
 ******************************************************************************/

int pdb_read(mdio_Pdbcoord *p, int nexpect)
{
  mdio_File *f;
  int len;
  char line[84];

  ASSERT(p != NULL);
  f = &(p->file);

  /* use nexpect to pre-allocate lists for efficiency */
  if (adt_resizeList(&(p->dvec), nexpect)) {
    mdio_setErrorFile(f, MDIO_ERROR_NOMEM);
    return MDIO_ERROR;
  }
  if (adt_resizeList(&(p->atom), nexpect)) {
    mdio_setErrorFile(f, MDIO_ERROR_NOMEM);
    return MDIO_ERROR;
  }

  if (nexpect > 0) {
    /* store no more than nexpect atoms */
    int k = 0;
    MD_Dvec *vec_data = adt_getDataList(&(p->dvec));
    mdio_Pdbatom *atom_data = adt_getDataList(&(p->atom));

    /* loop to read through file */
    while ((len = mdio_readTextFile(f, line, sizeof(line))) > 0) {

      /* validate line length */
      if (len > 81) {
        COND(len > 81);
        mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX,
            "line in PDB file is too long");
        return MDIO_ERROR;
      }

      /* continue if record does not provide atom coordinates */
      if (strncmp(line, "ATOM  ", 6) != 0 && strncmp(line, "HETATM", 6) != 0) {
        continue;
      }
      else if (k == nexpect) {
        char s[80];
        snprintf(s, sizeof(s), "more atoms than number %d expected", nexpect);
        mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, s);
        return MDIO_ERROR;
      }

      /* make sure line is exactly 80 characters long */
      len--;               /* remove trailing newline */
      while (len < 80) {
        line[len] = ' ';   /* pad short lines with spaces */
        len++;
      }
      line[len] = '\0';    /* nil-terminate string */

      /* parse line, update array elements in place */
      if (decode_atom(&vec_data[k], &atom_data[k], line, f)) return MDIO_ERROR; 
      k++;
    }
  }

  else {
    /* nexpect == 0, grow list arbitrarily large by using append */
    MD_Dvec vec;
    mdio_Pdbatom atom;

    /* loop to read through file */
    while ((len = mdio_readTextFile(f, line, sizeof(line))) > 0) {

      /* validate line length */
      if (len > 81) {
        COND(len > 81);
        mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX,
            "line in PDB file is too long");
        return MDIO_ERROR;
      }

      /* continue if record does not provide atom coordinates */
      if (strncmp(line, "ATOM  ", 6) != 0 && strncmp(line, "HETATM", 6) != 0) {
        continue;
      }

      /* make sure line is exactly 80 characters long */
      len--;               /* remove trailing newline */
      while (len < 80) {
        line[len] = ' ';   /* pad short lines with spaces */
        len++;
      }
      line[len] = '\0';    /* nil-terminate string */

      /* parse line, update array elements in place */
      if (decode_atom(&vec, &atom, line, f)) return MDIO_ERROR; 

      /* append to coordinate and Pdbatom arrays */
      if (adt_appendList(&(p->dvec), &vec)) {
        mdio_setErrorFile(f, MDIO_ERROR_NOMEM);
        return MDIO_ERROR;
      }
      if (adt_appendList(&(p->atom), &atom)) {
        mdio_setErrorFile(f, MDIO_ERROR_NOMEM);
        return MDIO_ERROR;
      }
    }
  }
  ASSERT(len == 0 || len == MDIO_ERROR);
  return len;  /* return error status */
}


int pdb_write(mdio_Pdbcoord *p)
{
  mdio_File *f;
  mdio_Pdbatom *p_atom;
  MD_Dvec *p_vec;
  const char *name;
  struct tm *tmbuf;
  time_t abstime;
  int n, k;
  char line[84];

  ASSERT(p != NULL);
  f = &(p->file);

  /* attempt to print REMARK record with filename */
  name = mdio_getNameFile(f);
  if (strlen(name) > 62) {
    /* attempt to sensibly truncate filename to basename to fit space */
    if ((name = strrchr(name, '/')) != NULL) {
      name++;
      if (strlen(name) > 62) name = NULL;
    }
  }
  if (name) {
    /* print REMARK FILENAME record */
    n = snprintf(line, sizeof(line), "REMARK FILENAME=\"%s\"\n", name);
    if (n > 0 && n <= 81) {
      if (mdio_writeTextFile(f, line) < 0) return MDIO_ERROR;
    }
  }

  /* print REMARK record regarding creation */
  abstime = time(NULL);
  tmbuf = localtime(&abstime);
  if (tmbuf == NULL
      || (n = strftime(line, sizeof(line), "REMARK Created %d %b %Y "
          "at %H:%M by MDIO PDB coordinate file writer\n", tmbuf)) == 0
      || n > 81 || n <= 0) {
    sprintf(line, "REMARK Created by MDIO PDB coordinate file writer\n");
  }
  if (mdio_writeTextFile(f, line) < 0) return MDIO_ERROR;

  ASSERT(adt_getLengthList(&(p->atom)) == adt_getLengthList(&(p->dvec)));

  /* initialize data pointers */
  n = adt_getLengthList(&(p->dvec));
  p_atom = (mdio_Pdbatom *) adt_getDataList(&(p->atom));
  p_vec = (MD_Dvec *) adt_getDataList(&(p->dvec));

  /* loop through arrays to print ATOM (or HETATM) records */
  for (k = 0;  k < n;  k++) {

    /* formulate record */
    if (encode_atom(line, &p_vec[k], &p_atom[k], f)) return MDIO_ERROR;

    /* write line */
    if (mdio_writeTextFile(f, line) < 0) return MDIO_ERROR;
  }

  /* print END record */
  if (mdio_writeTextFile(f, "END\n") < 0) return MDIO_ERROR;

  return 0;
}


/******************************************************************************
 *
 * low level parsing of ATOM and HETATM records from PDB file
 *
 ******************************************************************************/

typedef struct Cols_t {
  int scol, ecol;  /* start, end columns */
} Cols;

static const Cols atomCols[] = {
  /*
   * ATOM and HETATM record format as given by
   * http://www.rcsb.org/pdb/docs/format/pdbguide2.2/guide2.2_frame.html
   */
/* columns     data type     field       definition                          */
/* ------------------------------------------------------------------------- */
  { 1,  6}, /* record name   "ATOM  "    (or "HETATM")                       */
  { 7, 11}, /* Integer       serial      Atom serial number.                 */
  {13, 16}, /* Atom          name        Atom name.                          */
  {17, 17}, /* Character     altLoc      Alternate location indicator.       */
  {18, 20}, /* Residue name  resName     Residue name.                       */
  {22, 22}, /* Character     chainID     Chain identifier.                   */
  {23, 26}, /* Integer       resSeq      Residue sequence number.            */
  {27, 27}, /* AChar         iCode       Code for insertion of residues.     */
  {31, 38}, /* Real(8.3)     x           Orthogonal coordinates for X.       */
  {39, 46}, /* Real(8.3)     y           Orthogonal coordinates for Y.       */
  {47, 54}, /* Real(8.3)     z           Orthogonal coordinates for Z.       */
  {55, 60}, /* Real(6.2)     occupancy   Occupancy.                          */
  {61, 66}, /* Real(6.2)     tempFactor  Temperature factor.                 */
  {73, 76}, /* LString(4)    segID       Segment identifier (left-justified) */
  {77, 78}, /* LString(2)    element     Element symbol (right-justified)    */
  {79, 80}, /* LString(2)    charge      Charge on the atom.                 */
};


/* used when reading to parse line */
int decode_atom(MD_Dvec *v, mdio_Pdbatom *a, const char *buf, mdio_File *f)
{
  int start, len;
  const Cols *c;
  char s_x[12], s_y[12], s_z[12];
  char s_occupancy[8], s_tempFactor[8];
  char ch;
  char msg[80];

  ASSERT(strlen(buf) == 80);

  c = &atomCols[0];
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->record) > len);
  strncpy(a->record, &buf[start], len);
  a->record[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->serial) > len);
  strncpy(a->serial, &buf[start], len);
  a->serial[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->name) > len);
  strncpy(a->name, &buf[start], len);
  a->name[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->altLoc) > len);
  strncpy(a->altLoc, &buf[start], len);
  a->altLoc[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->resName) > len);
  strncpy(a->resName, &buf[start], len);
  a->resName[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->chainID) > len);
  strncpy(a->chainID, &buf[start], len);
  a->chainID[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->resSeq) > len);
  strncpy(a->resSeq, &buf[start], len);
  a->resSeq[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->iCode) > len);
  strncpy(a->iCode, &buf[start], len);
  a->iCode[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(s_x) > len);
  strncpy(s_x, &buf[start], len);
  s_x[len] = '\0';
  if (sscanf(s_x, "%lg%c", &(v->x), &ch) != 1) {
    snprintf(msg, sizeof(msg),
        "failed to find x-coordinate while parsing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(s_y) > len);
  strncpy(s_y, &buf[start], len);
  s_y[len] = '\0';
  if (sscanf(s_y, "%lg%c", &(v->y), &ch) != 1) {
    snprintf(msg, sizeof(msg),
        "failed to find y-coordinate while parsing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(s_z) > len);
  strncpy(s_z, &buf[start], len);
  s_z[len] = '\0';
  if (sscanf(s_z, "%lg%c", &(v->z), &ch) != 1) {
    snprintf(msg, sizeof(msg),
        "failed to find z-coordinate while parsing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(s_occupancy) > len);
  strncpy(s_occupancy, &buf[start], len);
  s_occupancy[len] = '\0';
  if (sscanf(s_occupancy, "%g%c", &(a->occupancy), &ch) != 1) {
    snprintf(msg, sizeof(msg),
        "failed to find occupancy while parsing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(s_tempFactor) > len);
  strncpy(s_tempFactor, &buf[start], len);
  s_tempFactor[len] = '\0';
  if (sscanf(s_tempFactor, "%g%c", &(a->tempFactor), &ch) != 1) {
    snprintf(msg, sizeof(msg),
        "failed to find temperature factor while parsing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->segID) > len);
  strncpy(a->segID, &buf[start], len);
  a->segID[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->element) > len);
  strncpy(a->element, &buf[start], len);
  a->element[len] = '\0';

  c++;
  start = c->scol - 1;
  len = c->ecol - c->scol + 1;
  ASSERT(sizeof(a->charge) > len);
  strncpy(a->charge, &buf[start], len);
  a->charge[len] = '\0';

  return 0;
}


/* used when writing to formulate line */
int encode_atom(char *buf, const MD_Dvec *v, const mdio_Pdbatom *a,
    mdio_File *f)
{
  char msg[80];
  int n;
#ifdef DEBUG_SUPPORT
  const Cols *c = &atomCols[0];
  ASSERT(strlen(a->record) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->serial) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->name) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->altLoc) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->resName) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->chainID) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->resSeq) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->iCode) == c->ecol - c->scol + 1);
  c += 6;
  ASSERT(strlen(a->segID) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->element) == c->ecol - c->scol + 1);
  c++;
  ASSERT(strlen(a->charge) == c->ecol - c->scol + 1);
#endif
  /* make sure numeric ranges are correct */
  if (v->x < -999.999 || v->x > 9999.999) {
    snprintf(msg, sizeof(msg),
        "x-coordinate out-of-range while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }
  if (v->y < -999.999 || v->y > 9999.999) {
    snprintf(msg, sizeof(msg),
        "y-coordinate out-of-range while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }
  if (v->z < -999.999 || v->z > 9999.999) {
    snprintf(msg, sizeof(msg),
        "z-coordinate out-of-range while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }
  if (a->occupancy < -99.99 || a->occupancy > 999.99) {
    snprintf(msg, sizeof(msg),
        "occupancy out-of-range while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }
  if (a->tempFactor < -99.99 || a->tempFactor > 999.99) {
    snprintf(msg, sizeof(msg),
        "temperature factor out-of-range while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }

  /* write fields into buffer, make sure line length is correct */
  n = snprintf(buf, 82,
      "%s%s %s%s%s %s%s%s   %8.3f%8.3f%8.3f%6.2f%6.2f      %s%s%s\n",
      a->record, a->serial, a->name, a->altLoc, a->resName, a->chainID,
      a->resSeq, a->iCode, v->x, v->y, v->z, (double) a->occupancy,
      (double) a->tempFactor, a->segID, a->element, a->charge);
  if (n != 81 || buf[80] != '\n') {
    COND(n != 81);
    COND(buf[80] != '\n');
    snprintf(msg, sizeof(msg),
        "incorrect line length for record while writing %s%s",
        a->record, a->serial);
    mdio_setErrorMessageFile(f, MDIO_ERROR_SYNTAX, msg);
    return MDIO_ERROR;
  }
  ASSERT(strlen(buf) == 81);
  return 0;
}


#if 0
int main()
{
  char buf[84] = "ATOM      1  CA  ACE     1      -2.184   0.591   0.910  1.00  7.00      MAIN    ";
  MD_Dvec v;
  mdio_Pdbatom a;
  MDIO_Error e;

  MDIO_error_init(&e);
  printf("%s\n", buf);
  decode_atom(&v, &a, buf, &e);
  memset(buf, ' ', 80);
  buf[80] = '\0';
  printf("pos = %g %g %g\n", v.x, v.y, v.z);
  encode_atom(buf, &v, &a, &e);
  printf("%s\n", buf);
  return 0;
}
#endif
