pdbxplugin.C Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2016 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 /***************************************************************************
00009  * RCS INFORMATION:
00010  *
00011  *      $RCSfile: pdbxplugin.C,v $
00012  *      $Author: johns $       $Locker:  $             $State: Exp $
00013  *      $Revision: 1.26 $       $Date: 2019/02/14 04:00:30 $
00014  ***************************************************************************
00015  * DESCRIPTION:
00016  *  A plugin for parsing and generating molecular model files that adhere 
00017  *  to the RCSB Protein Data Bank "PDBx" variant of the mmCIF file format, 
00018  *  as described here:
00019  *    http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Index/
00020  *
00021  *  The plugin also incorporates experimental read-only support for 
00022  *  Integrative Hybrid Modeling (IHM) structure data in enhanced PDBx files:
00023  *    https://github.com/ihmwg/IHM-dictionary/blob/master/dictionary_documentation/documentation.md
00024  *
00025  ***************************************************************************/
00026 
00027 #include "largefiles.h"   /* platform dependent 64-bit file I/O defines */
00028 
00029 #include "molfile_plugin.h"
00030 #include "periodic_table.h"
00031 #include <stdio.h>
00032 #include <stdlib.h>
00033 #include <string.h>
00034 
00035 #include <stdio.h>
00036 #include <stdlib.h>
00037 #include <string.h>
00038 
00039 #if !(defined(WIN32) || defined(WIN64))
00040 #include <sys/time.h>
00041 #endif
00042 
00043 #define VMDPLUGIN_STATIC
00044 #include "inthash.h"
00045 
00046 //#define PDBX_DEBUG 1
00047 
00048 //used for reading author defined values needed when reading bond info
00049 #define CHAIN_SIZE 4
00050 #define TYPE_SIZE 8
00051 #define BUFFER_SIZE 4096
00052 #define COLUMN_BUFFER_SIZE 1024
00053 #define MAX_COLUMNS 32
00054 
00055 struct list_node { 
00056   unsigned int next;
00057   unsigned int index;
00058 };
00059 
00060 // class definition
00061 typedef struct pdbxParser {
00062   FILE *file;
00063   int* resid_auth;
00064   char * chain_auth;
00065   char * type_auth;
00066   float* xyz;
00067   int* bondsTo;
00068   int* bondsFrom;
00069   molfile_graphics_t* g_data;
00070   list_node * hashMem;
00071   inthash_t bondHash;
00072   int table[64];
00073   unsigned int tableSize;
00074   int natoms;
00075   int nbonds;
00076   int n_graphics_elems;
00077   bool error;
00078   bool pdb_dev;
00079 } pdbxParser;
00080 
00081 // XXX yuck, this needs to go!
00082 static unsigned char charToNum[128];
00083 
00084 enum TableColums {
00085   COLUMN_NUMBER = 0,
00086   COLUMN_NAME,
00087   COLUMN_TYPE,
00088   COLUMN_TYPE_AUTH,
00089   COLUMN_RESNAME,
00090   COLUMN_RESID,
00091   COLUMN_RESID_AUTH,
00092   COLUMN_INSERTION,
00093   COLUMN_X,
00094   COLUMN_Y,
00095   COLUMN_Z,
00096   COLUMN_OCCUPANCY,
00097   COLUMN_BFACTOR,
00098   COLUMN_CHARGE,
00099   COLUMN_CHAIN,
00100   COLUMN_CHAIN_AUTH,
00101   COLUMN_MODEL_NUM,
00102   COLUMN_JUNK
00103 };
00104 
00105 
00106 // Opens the file, finds the number of atoms, and allocates arrays 
00107 // Reads in and stores information from the file 
00108 static pdbxParser* create_pdbxParser(const char* filepath);
00109 
00110 // Reads through the file and stores data 
00111 static int parseStructure(molfile_atom_t * atoms, int * optflags, pdbxParser* parser);
00112 
00113 static bool readRMSDBonds(molfile_atom_t * atoms, pdbxParser* parser);
00114 static bool readAngleBonds(molfile_atom_t * atoms, pdbxParser* parser);
00115 static bool readBonds(molfile_atom_t * atoms, pdbxParser* parser);
00116 
00117 // Parse through file and return the total number of atoms
00118 // Will rewind the file to the start 
00119 // Returns -1 if the number of atoms cannot be found
00120 static int parseNumberAtoms(pdbxParser* parser);
00121 
00122 // returns true if str starts with "_atom_site."
00123 static inline bool isAtomSite(char * str);
00124 
00125 static inline bool isValidateRMSDBond(char * str);
00126 
00127 // Assumes that str contains a single floating point number and 
00128 // returns it as a float. NO ERROR CHECKING 
00129 // Must be passed a null terminating string 
00130 // Wrote specifically to parse strings returned from getNextWord()
00131 static float stringToFloat(char * str);
00132 
00133 // Takes a string str and finds the next word starting from pos
00134 // word must be allocated and suffiently large, does NO ERROR CHECKING 
00135 // After returning, word will contain the next word and pos will be updated 
00136 // to point to the current position in str 
00137 static void getNextWord(char * str, void * word, int& pos, int maxstrlen, int maxwordlen);
00138 
00139 // Takes a string str and finds the next word starting from pos
00140 // word must be allocated and suffiently large, does NO ERROR CHECKING 
00141 // After returning, word will contain the next word and pos will be updated 
00142 // to point to the current position in str */
00143 static void skipNextWord(char * str, void * word, int& pos, int maxlen);
00144 
00145 // Returns a unique int id for an atom based on the chain and resid 
00146 static inline int getUniqueResID(char * chainstr, int resid);
00147 
00148 static void initCharToNum();
00149 
00150 #define WB_SIZE 1024
00151 
00152 #if 0 
00153 static const char atomSiteHeader[] =
00154   "loop_\n"
00155   "_atom_site.group_PDB\n"
00156   "_atom_site.id\n"
00157   "_atom_site.type_symbol\n"
00158   "_atom_site.label_atom_id\n"
00159   "_atom_site.label_alt_id\n"
00160   "_atom_site.label_comp_id\n"
00161   "_atom_site.label_asym_id\n"
00162   "_atom_site.label_entity_id\n"
00163   "_atom_site.label_seq_id\n"
00164   "_atom_site.pdbx_PDB_ins_code\n"
00165   "_atom_site.Cartn_x\n"
00166   "_atom_site.Cartn_y\n"
00167   "_atom_site.Cartn_z\n"
00168   "_atom_site.occupancy\n"
00169   "_atom_site.B_iso_or_equiv\n"
00170   "_atom_site.Cartn_x_esd\n"
00171   "_atom_site.Cartn_y_esd\n"
00172   "_atom_site.Cartn_z_esd\n"
00173   "_atom_site.occupancy_esd\n"
00174   "_atom_site.B_iso_or_equiv_esd\n"
00175   "_atom_site.pdbx_formal_charge\n"
00176   "_atom_site.auth_seq_id\n"
00177   "_atom_site.auth_comp_id\n"
00178   "_atom_site.auth_asym_id\n"
00179   "_atom_site.auth_atom_id\n"
00180   "_atom_site.pdbx_PDB_model_num\n";
00181 #endif
00182 
00183 
00184 static const char atomSiteHeader[] =
00185   "loop_\n"
00186   "_atom_site.group_PDB\n"
00187   "_atom_site.id\n"
00188   "_atom_site.type_symbol\n"
00189   "_atom_site.label_atom_id\n"
00190   "_atom_site.label_alt_id\n"
00191   "_atom_site.label_comp_id\n"
00192   "_atom_site.label_asym_id\n"
00193   "_atom_site.label_entity_id\n"
00194   "_atom_site.label_seq_id\n"
00195   "_atom_site.pdbx_PDB_ins_code\n"
00196   "_atom_site.Cartn_x\n"
00197   "_atom_site.Cartn_y\n"
00198   "_atom_site.Cartn_z\n"
00199   "_atom_site.occupancy\n"
00200   "_atom_site.pdbx_formal_charge\n"
00201   "_atom_site.auth_asym_id\n";
00202 
00203 
00204 typedef struct pdbxWriter {
00205   FILE* fd;
00206   char writeBuf[WB_SIZE];
00207   char pdbName[256];
00208   int bufferCount;
00209   molfile_atom_t* atoms;
00210   const float* coordinates;
00211   int numatoms;
00212 } pdbxWriter;
00213 
00214 
00215 static void writeBuffer(pdbxWriter* writer);
00216 static void writeIntro(pdbxWriter* writer);
00217 static void write(const char* str, pdbxWriter* writer);
00218 static void writeAtomSite(pdbxWriter* writer);
00219 static void close(pdbxWriter* writer);
00220 static pdbxWriter* create_pdbxWriter(const char* filename, int numAtoms);
00221 static void addAtoms(const molfile_atom_t* atoms, int optflags, pdbxWriter* writer);
00222 static void addCoordinates(const float* coords, pdbxWriter* writer);
00223 static void writeFile(pdbxWriter* writer);
00224 
00225 
00226 //
00227 // class implementation 
00228 //
00229 static pdbxParser* create_pdbxParser(const char* filepath) {
00230   pdbxParser* parser = new pdbxParser;
00231   char buffer[BUFFER_SIZE];
00232   int numberAtoms;
00233   parser->xyz = NULL;
00234   parser->nbonds = 0;
00235   parser->hashMem = NULL;
00236   parser->chain_auth = NULL;
00237   parser->resid_auth = NULL;
00238   parser->type_auth = NULL;
00239   parser->error = false;
00240   parser->g_data = NULL;
00241   parser->bondsTo = NULL;
00242   parser->bondsFrom = NULL;
00243   parser->file = fopen(filepath, "r");
00244   parser->pdb_dev = false;
00245   parser->n_graphics_elems = 0;
00246   memset(buffer, 0, sizeof(buffer));
00247   if (!parser->file) {
00248     printf("pdbxplugin) cannot open file %s\n", filepath);
00249     parser->error = true;
00250     return parser;
00251   }
00252   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
00253     printf("pdbxplugin) cannot read file %s\n", filepath);
00254     parser->error = true;
00255     return parser;
00256   }
00257 
00258   // Find the number of atoms
00259   parser->natoms = parseNumberAtoms(parser);
00260   numberAtoms = parser->natoms;
00261   if (!parser->pdb_dev && parser->natoms <= 0) {
00262     printf("pdbxplugin) Could not get atom number\n");
00263     parser->error = true;
00264     return parser;
00265   }
00266   if (parser->natoms != 0) {
00267     initCharToNum();
00268     parser->xyz = new float[numberAtoms*3];
00269     parser->hashMem = new list_node[numberAtoms+1];
00270     parser->chain_auth = new char[numberAtoms*CHAIN_SIZE];
00271     parser->resid_auth = new int [numberAtoms];
00272     parser->type_auth = new char[numberAtoms * TYPE_SIZE];
00273   }
00274   return parser;
00275 }
00276 
00277 
00278 static void delete_pdbxParser(pdbxParser* parser) {
00279   fclose(parser->file);
00280   if (parser->xyz != NULL) {
00281     delete [] parser->xyz;
00282     parser->xyz = NULL;
00283   }
00284   if (parser->hashMem != NULL) {
00285     delete [] parser->hashMem;
00286     parser->hashMem = NULL;
00287   }
00288   if (parser->chain_auth != NULL) {
00289     delete [] parser->chain_auth;
00290     parser->chain_auth = NULL;
00291   }
00292   if (parser->resid_auth != NULL) {
00293     delete [] parser->resid_auth;
00294     parser->resid_auth = NULL;
00295   }
00296   if (parser->type_auth != NULL) {
00297     delete [] parser->type_auth;
00298     parser->type_auth = NULL;
00299   }
00300   if (parser->g_data != NULL) {
00301     delete [] parser->g_data;
00302     parser->g_data = NULL;
00303   }
00304   if (parser->bondsTo != NULL) {
00305     free(parser->bondsTo);
00306     parser->bondsTo = NULL;
00307   }
00308   if (parser->bondsFrom != NULL) {
00309     free(parser->bondsFrom);
00310     parser->bondsFrom = NULL;
00311   }
00312   if (parser->natoms != 0) {
00313     inthash_destroy(&parser->bondHash);
00314   }
00315   delete parser;
00316 }
00317 
00318 
00319 static void skipNextWord(char * str, void * word, int& pos, int maxlen) {
00320   // Handle case if we start at end of line
00321   if (pos > maxlen) {
00322     return;
00323   }
00324   if (str[pos] == '\0' || str[pos] == '\n') {
00325     return;
00326   }
00327   // move forward until we hit non-whitespace
00328   while(str[pos] == ' ' || str[pos] == '\t') {
00329     ++pos;
00330     if (pos > maxlen) {
00331       return;
00332     }
00333   }
00334   // increment pos until we hit a whitespace
00335   while (str[pos] != ' ' && str[pos] != '\t') {
00336     pos++;
00337     if (pos > maxlen) {
00338       return;
00339     }
00340   }
00341 }
00342 
00343 
00344 static bool isPDB_DevFile(pdbxParser* parser) {
00345   char buffer[BUFFER_SIZE];
00346   int count = 0;
00347   rewind(parser->file);
00348   while (NULL != fgets(buffer, BUFFER_SIZE, parser->file)) {
00349     if (NULL != strstr(buffer,"_ihm_")) {
00350       ++count;
00351     }
00352     if (count > 5) {
00353       // arbitrarly chosen number of occurences
00354       // chosen so one or two random _ihm_ strings in a regular pdb
00355       // doesn't cause us to interpret the file as a PDB-Dev.
00356       // Although even if we do, parsing should continue without issues.
00357       return true;
00358     }
00359   }
00360   rewind(parser->file);
00361   return false;
00362 }
00363 
00364 
00365 static void getNextWord(char * str, void * word, int& pos, int maxstrlen, int maxwordlen) {
00366   char * w = (char*) word;
00367   int wordpos = 0;
00368   w[0] = '\0';
00369   if (pos >= maxstrlen) {
00370     return;
00371   }
00372 
00373   // Handle case if we start at end of line
00374   if (str[pos] == '\0' || str[pos] == '\n') {
00375     return;
00376   }
00377 
00378   // move forward until we hit non-whitespace
00379   while (str[pos] == ' ' || str[pos] == '\t') {
00380     if (++pos >= maxstrlen) {
00381       return;
00382     }
00383   }
00384 
00385   // XXX records can be broken across lines, which breaks all of the
00386   //     assumptions made within this parser, we're going to have to 
00387   //     completely re-tool how parsing is done for the more general
00388   //     variants of mmCIF such as the IHM models
00389 
00390   // 
00391   // XXX experimental handling of string columns
00392   // 
00393 #if 0
00394   // The parser must handle mmCIF files that have
00395   // rows/records containing strings that are delimited by 
00396   // the '\'' or '"' character, where the whitespace skipping
00397   // approach breaks.  When we encounter the start of a string,
00398   // we need to then walk to the end of the string rather than
00399   // proceeding normally with whitespace skipping.
00400   if (str[pos] == '\'') {
00401     int start = pos; // record index of first '\'' char...
00402     pos++; // advance to next char 
00403 
00404     // loop until we find the second '\'' char, or hit the end of the line
00405     while ((pos < maxstrlen) && (str[pos] != '\'')) {
00406       pos++;
00407     }
00408     int end = pos; // record index of second '\'' char...
00409     int len = end-start+1;
00410 
00411     char colbuf[1024];
00412     memset(colbuf, 0, sizeof(colbuf));
00413     memcpy(colbuf, str+start, len);
00414 //printf("getNextWord(): s: %d e: %d len: %d, \"%s\"\n", start, end, len, colbuf);
00415     strncpy(w, colbuf, maxwordlen-1);
00416     
00417     ++pos; // Increment pos to point to first char that has not been read
00418     return;
00419   }
00420 #endif
00421 
00422   // increment pos until we hit a whitespace
00423   while (str[pos] != ' ' && str[pos] != '\t') {
00424     w[wordpos++] = str[pos++];
00425     if (wordpos >= maxwordlen) {
00426       w[maxwordlen-1] = '\0';
00427       return;
00428     }
00429     if (pos >= maxstrlen) {
00430       w[wordpos] = '\0';
00431     }
00432   }
00433 
00434   w[wordpos] = '\0';
00435   ++pos; // Increment pos to point to first char that has not been read
00436 }
00437 
00438 
00439 static float stringToFloat(char * str) {
00440   bool neg = (str[0] == '-');
00441   unsigned int total = 0;
00442   unsigned pos = neg ? 1 : 0;
00443   unsigned int num = 0;
00444   unsigned int denom = 1;
00445   float retval;
00446 
00447   // calculate integer before the decimal 
00448   while (str[pos] != '.') {
00449     total = (total*10) + str[pos] - '0';
00450     ++pos;
00451   }
00452   ++pos;
00453 
00454   // Find the fraction representing the decimal 
00455   while (str[pos] != '\0') {
00456     num = (num * 10) + str[pos] - '0';
00457     denom *= 10;
00458     ++pos;
00459   }
00460 
00461   retval = (float)total + (double)num/(double)denom;
00462   if (neg)
00463     retval *= -1;
00464   return retval;
00465 }
00466 
00467 
00468 static bool isStructureDataHeader(const char* strbuf) {
00469   if (strstr(strbuf, "_atom_site.") != NULL) {
00470     return true;
00471   }
00472   if (strstr(strbuf, "_ihm_starting_model_coord.") != NULL) {
00473     return true;
00474   }
00475   return false;
00476 }
00477 
00478 
00479 static int parseNumberAtoms(pdbxParser* parser) {
00480   char buffer[BUFFER_SIZE];
00481   char wordbuffer[BUFFER_SIZE];
00482   int numatoms = 0;
00483   int i;
00484   int tableSize = 0;
00485   bool valid_mmcif = true;
00486   bool column_exists[MAX_COLUMNS];
00487   for (i=0; i<MAX_COLUMNS; i++) 
00488     column_exists[i] = false;
00489 
00490   int base_word_size = 0; // position of first char of column name
00491                           // ex: base_name.column_name
00492                           //               ^
00493 
00494   if (isPDB_DevFile(parser)) {
00495     printf("pdbxplugin) WARNING: this appears to be a PDB-Dev file. PDB-Dev file reading is experimental.\n");
00496     parser->pdb_dev = true;
00497   }
00498 
00499   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file))
00500     return 0;
00501 
00502   // skip past junk at start of file, stop when we get to atomSite data
00503   while (!isStructureDataHeader(buffer)) {
00504     // if this is true then we couldnt find the header. Maybe it has a different name in newer PDBx definitions?
00505     // When this was first written _atom_site was the only allowed name, which apparently is no longer true
00506     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file))
00507       return 0;
00508   }
00509 
00510   base_word_size = (char*)memchr(buffer, '.', sizeof(buffer)) - buffer + 1;
00511 
00512   while (isStructureDataHeader(buffer)) {
00513     sscanf(buffer+base_word_size, "%s", wordbuffer); // table is used in parseStructure too
00514     // assign integer values to each column 
00515     if (0 == strcmp(wordbuffer, "id")) {
00516       parser->table[tableSize] = COLUMN_NUMBER;
00517       column_exists[COLUMN_NUMBER] = true;
00518 
00519     } else if (0 == strcmp(wordbuffer, "type_symbol")) {
00520       parser->table[tableSize] = COLUMN_NAME;
00521       column_exists[COLUMN_NAME] = true;
00522 
00523     } else if (0 == strcmp(wordbuffer, "label_comp_id")
00524                || (parser->pdb_dev && 0 == strcmp(wordbuffer, "comp_id"))) {
00525       parser->table[tableSize] = COLUMN_RESNAME;
00526       column_exists[COLUMN_RESNAME] = true;
00527 
00528     } else if (0 == strcmp(wordbuffer, "label_asym_id")
00529                || (parser->pdb_dev && 0 == strcmp(wordbuffer, "asym_id"))) {
00530       parser->table[tableSize] = COLUMN_CHAIN;
00531       column_exists[COLUMN_CHAIN] = true;
00532 
00533     } else if (0 == strcmp(wordbuffer, "auth_asym_id")) {
00534       parser->table[tableSize] = COLUMN_CHAIN_AUTH;
00535       column_exists[COLUMN_CHAIN_AUTH] = true;
00536 
00537     } else if (0 == strcmp(wordbuffer, "Cartn_x")) {
00538       parser->table[tableSize] = COLUMN_X;
00539       column_exists[COLUMN_X] = true;
00540 
00541     } else if (0 == strcmp(wordbuffer, "Cartn_y")) {
00542       parser->table[tableSize] = COLUMN_Y;
00543       column_exists[COLUMN_Y] = true;
00544 
00545     } else if (0 == strcmp(wordbuffer, "Cartn_z")) {
00546       parser->table[tableSize] = COLUMN_Z;
00547       column_exists[COLUMN_Z] = true;
00548 
00549     } else if (0 == strcmp(wordbuffer, "label_seq_id")
00550                || (parser->pdb_dev && 0 == strcmp(wordbuffer, "seq_id"))) {
00551       parser->table[tableSize] = COLUMN_RESID;
00552       column_exists[COLUMN_RESID] = true;
00553 
00554     } else if (0 == strcmp(wordbuffer, "auth_seq_id")) {
00555       parser->table[tableSize] = COLUMN_RESID_AUTH;
00556       column_exists[COLUMN_RESID_AUTH] = true;
00557 
00558     } else if (0 == strcmp(wordbuffer, "pdbx_PDB_ins_code")) {
00559       parser->table[tableSize] = COLUMN_INSERTION;
00560       column_exists[COLUMN_INSERTION] = true;
00561 
00562     } else if (0 == strcmp(wordbuffer, "B_iso_or_equiv")) {
00563       parser->table[tableSize] = COLUMN_BFACTOR;
00564       column_exists[COLUMN_BFACTOR] = true;
00565 
00566     } else if (0 == strcmp(wordbuffer, "occupancy")) {
00567       parser->table[tableSize] = COLUMN_OCCUPANCY;
00568       column_exists[COLUMN_OCCUPANCY] = true;
00569 
00570     } else if (0 == strcmp(wordbuffer, "label_atom_id")
00571                || (parser->pdb_dev && 0 == strcmp(wordbuffer, "atom_id"))) {
00572       parser->table[tableSize] = COLUMN_TYPE;
00573       column_exists[COLUMN_TYPE] = true;
00574 
00575     } else if (0 == strcmp(wordbuffer, "auth_atom_id")) {
00576       parser->table[tableSize] = COLUMN_TYPE_AUTH;
00577       column_exists[COLUMN_TYPE_AUTH] = true;
00578 
00579     } else if (0 == strcmp(wordbuffer, "pdbx_formal_charge")) {
00580       parser->table[tableSize] = COLUMN_CHARGE;
00581       column_exists[COLUMN_CHARGE] = true;
00582 
00583     } else if (0 == strcmp(wordbuffer, "pdbx_PDB_model_num")) {
00584       parser->table[tableSize] = COLUMN_MODEL_NUM;
00585       column_exists[COLUMN_MODEL_NUM] = true;
00586 
00587     } else {
00588       parser->table[tableSize] = COLUMN_JUNK;
00589     }
00590 
00591     // if this is true then we couldnt find the numatoms
00592     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file))
00593       return 0;
00594 
00595     tableSize++;
00596   }
00597 
00598   // increment numatoms until we get to the end of the file
00599   while (buffer[0] != '#') {
00600     // if this is true then we couldnt find the numatoms
00601     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file))
00602       return 0;
00603     ++numatoms;
00604   }
00605 
00606   rewind(parser->file);
00607   // Cut off any junk columns from the end of table
00608 
00609   i = tableSize;
00610   while(parser->table[--i] == COLUMN_JUNK){}
00611   tableSize = i+1;
00612   parser->tableSize = tableSize;
00613 
00614   if (numatoms == 0) {
00615     printf("pdbxplugin) Could not parse atom number from file\n");
00616     return 0;
00617   }
00618 
00619   if (!column_exists[COLUMN_NUMBER]) {
00620     printf("pdbxplugin) WARNING: missing 'id' field.\n");
00621     valid_mmcif = false;
00622   }
00623   if (!column_exists[COLUMN_CHAIN_AUTH] && !parser->pdb_dev) {
00624     // PDB-Dev doesn't include any *auth* fields, so its not missing.
00625     printf("pdbxplugin) WARNING: missing 'auth_asym_id' field.\n");
00626     valid_mmcif = false;
00627   }
00628   if (!column_exists[COLUMN_CHAIN]) {
00629     printf("pdbxplugin) WARNING: missing 'label_asym_id' field.\n");
00630     valid_mmcif = false;
00631   }
00632   if (!column_exists[COLUMN_TYPE]) {
00633     printf("pdbxplugin) WARNING: missing 'label_atom_id' field.\n");
00634     valid_mmcif = false;
00635   }
00636   if (!column_exists[COLUMN_RESNAME]) {
00637     printf("pdbxplugin) WARNING: missing 'label_comp_id' field.\n");
00638     valid_mmcif = false;
00639   }
00640   if (!column_exists[COLUMN_RESID]) {
00641     printf("pdbxplugin) WARNING: missing 'label_seq_id' field.\n");
00642     valid_mmcif = false;
00643   }
00644   if (!column_exists[COLUMN_NAME]) {
00645     printf("pdbxplugin) WARNING: missing 'type_symbol' field.\n");
00646     valid_mmcif = false;
00647   }
00648 
00649   if (!column_exists[COLUMN_X] || !column_exists[COLUMN_Y] || !column_exists[COLUMN_Z]) {
00650     // This is still technically a valid mmCIF file, although at the time of this comment
00651     // the PDB reports 100% of PDBx files contain these fields.
00652     // Not sure what to do if they don't exist.
00653     printf("pdbxplugin) WARNING: coordinate fields not found.\n");
00654   }
00655 
00656   if (!valid_mmcif && !parser->pdb_dev) {
00657     // The documentation on pdb_dev requirements is not clear, but for mmCIF/PDBx files
00658     // there are clearly defined "required fields" for atom_site tables.
00659     // The checks we perform for those are not exhaustive, but the fields we don't check
00660     // are not currently read by this parser.
00661     printf("pdbxplugin) WARNING: this is not a valid PDBx/mmCIF file.\n");
00662   }
00663 
00664   return numatoms;
00665 }
00666 
00667 
00668 static void initCharToNum() {
00669   int i;
00670   int j = 1;
00671 
00672   for (i=0; i<128; i++)
00673     charToNum[i] = -1;
00674 
00675   i = 'A';
00676   while (i <= 'Z')
00677     charToNum[i++] = j++;
00678   i = 'a';
00679   while (i <= 'z')
00680     charToNum[i++] = j++;
00681   i = '0';
00682   while (i <= '9')
00683     charToNum[i++] = j++;
00684 }
00685 
00686 
00687 // This attempts to generate a unique id based off the chain name and resid...
00688 static inline int getUniqueResID(char * chainstr, int resid) {
00689   int uid;
00690   int length = strlen(chainstr);
00691   // Assuming max length of chainstr is 3 chars
00692   //Each char can be respresented by <= 6 bits since only a-z, A-Z, and 0-9 are valid values (62 possible values)
00693   uid = 1 + charToNum[(int)chainstr[0]];
00694   uid <<= 6;
00695 
00696   if (length == 1) {
00697     uid <<= 12;
00698   } else if (length == 2) {
00699     uid += charToNum[(int)chainstr[1]];
00700     uid <<= 12;
00701   } else if (length == 3) {
00702     uid += charToNum[(int)chainstr[1]];
00703     uid = (uid << 6) + charToNum[(int) chainstr[2]];
00704     uid <<= 6;
00705   }
00706 
00707   // First 18 bits of uid dedicated to 3 letters of chainstr
00708   uid <<= 12;
00709   uid += (0xFFF & resid); //add 12 least significant bits of resid to fill the remaining 10 bits of uid
00710 
00711   return uid;
00712 }
00713 
00714 
00715 #define ATOM_TYPE      0
00716 #define ATOM_RESNAME   1
00717 #define ATOM_INSERTION 2
00718 #define ATOM_CHAIN     3
00719 #define MAX_OPTIONAL_AUTH_FIELDS  2
00720 
00721 #define FLAG_CHAIN_LENGTH 0x01
00722 #define FLAG_CHARGE       0x02
00723 #define FLAG_INSERTION    0x04
00724 #define FLAG_BFACTOR      0x08
00725 #define FLAG_OCCUPANCY    0x10
00726 #define FLAG_MODEL_NUM    0x20
00727 
00728 static int parseStructure(molfile_atom_t * atoms, int * optflags, pdbxParser* parser) {
00729   int i, count, atomdata, pos, idx, xyzcount;
00730 
00731   // VMD will use the PDBx atom "type" field rather than the "name" field,
00732   // unless we're directed to do otherwise, so that we get the expected 
00733   // PDB-specific atom nomenclature in the name field, as needed by STRIDE
00734   // and other tools. 
00735   int vmdatomnamefrompdbxname = (getenv("VMDATOMNAMEFROMPDBXNAME") != NULL);
00736 
00737   char buffer[BUFFER_SIZE];
00738 
00739   char namebuffer[COLUMN_BUFFER_SIZE];
00740   char occupancybuffer[COLUMN_BUFFER_SIZE];
00741   char bfactorbuffer[COLUMN_BUFFER_SIZE];
00742   char chargebuffer[COLUMN_BUFFER_SIZE];
00743   char residbuffer[COLUMN_BUFFER_SIZE];
00744   char residAuthbuffer[COLUMN_BUFFER_SIZE];
00745   char chainbuffer[COLUMN_BUFFER_SIZE];
00746   char trash[COLUMN_BUFFER_SIZE];
00747   char xbuffer[COLUMN_BUFFER_SIZE];
00748   char ybuffer[COLUMN_BUFFER_SIZE];
00749   char zbuffer[COLUMN_BUFFER_SIZE];
00750   char model_num_buf[COLUMN_BUFFER_SIZE];
00751   void * columns[MAX_COLUMNS];
00752 
00753   memset(buffer, 0, sizeof(buffer));
00754   memset(namebuffer, 0, sizeof(namebuffer));
00755   memset(occupancybuffer, 0, sizeof(occupancybuffer));
00756   memset(bfactorbuffer, 0, sizeof(bfactorbuffer));
00757   memset(chargebuffer, 0, sizeof(chargebuffer));
00758   memset(residbuffer, 0, sizeof(residbuffer));
00759   memset(residAuthbuffer, 0, sizeof(residAuthbuffer));
00760   memset(chainbuffer, 0, sizeof(chainbuffer));
00761   memset(trash, 0, sizeof(trash));
00762   memset(xbuffer, 0, sizeof(xbuffer));
00763   memset(ybuffer, 0, sizeof(ybuffer));
00764   memset(zbuffer, 0, sizeof(zbuffer));
00765   memset(model_num_buf, 0, sizeof(model_num_buf));
00766   memset(columns, 0, sizeof(columns));
00767 
00768   molfile_atom_t * atom = NULL;
00769   int badptecount = 0;
00770   int chargecount = 0;
00771   int occupancycount = 0;
00772   int bfactorcount = 0;
00773   unsigned char parseFlags = 0;
00774   chainbuffer[1] = '\0';
00775   chainbuffer[2] = '\0';
00776   int hashTemp = 0;
00777   int hashCount = 1;
00778   int head = 0;
00779   int chainAuthIdx = MAX_COLUMNS-1, typeIdx = MAX_COLUMNS-1, resnameIdx = MAX_COLUMNS-1;
00780   int insertionIdx = MAX_COLUMNS-1, typeAuthIdx = MAX_COLUMNS-1;
00781 #if (vmdplugin_ABIVERSION >= 20)
00782   int chainIdx = MAX_COLUMNS-1;
00783 #endif
00784   char * chainAuth = parser->chain_auth;
00785   char * typeAuth = parser->type_auth;
00786   int tableSize = parser->tableSize;
00787   int* table = parser->table;
00788   unsigned char doBonds = 0;
00789 
00790   // Initialize hash table used later when reading the special bonds
00791   // This is necessary because PDBx files don't provide atom numbers
00792   // for the special bonds and instead just give atom type/chain/resid
00793   // It provides a mapping from the resid+chain -> linked list of all atoms
00794   // that have that resid and chain
00795   inthash_init(&parser->bondHash, parser->natoms);
00796 
00797   for (i=0; i<tableSize; i++) {
00798     switch (table[i]) {
00799       case COLUMN_NUMBER:
00800         columns[i] = trash;
00801         break;
00802 
00803       case COLUMN_NAME:
00804         columns[i] = namebuffer;
00805         break;
00806 
00807       case COLUMN_TYPE:
00808         columns[i] = atoms->type;
00809         typeIdx = i;
00810         break;
00811 
00812       case COLUMN_TYPE_AUTH:
00813         columns[i] = typeAuth;
00814         typeAuthIdx = i;
00815         break;
00816 
00817       case COLUMN_RESNAME:
00818         columns[i] = atoms->resname;
00819         resnameIdx = i;
00820         break;
00821 
00822       case COLUMN_RESID:
00823         columns[i] = residbuffer;
00824         break;
00825 
00826       case COLUMN_RESID_AUTH:
00827         columns[i] = residAuthbuffer;
00828         doBonds++;
00829         break;
00830 
00831       case COLUMN_INSERTION:
00832         columns[i] = atoms->insertion;
00833         insertionIdx = i;
00834         parseFlags |= FLAG_INSERTION;
00835         break;
00836 
00837       case COLUMN_X:
00838         columns[i] = xbuffer;
00839         break;
00840 
00841       case COLUMN_Y:
00842         columns[i] = ybuffer;
00843         break;
00844 
00845       case COLUMN_Z:
00846         columns[i] = zbuffer;
00847         break;
00848 
00849       case COLUMN_OCCUPANCY:
00850         columns[i] = occupancybuffer;
00851         break;
00852 
00853       case COLUMN_BFACTOR:
00854         columns[i] = bfactorbuffer;
00855         break;
00856 
00857       case COLUMN_CHARGE:
00858         columns[i] = chargebuffer;
00859         break;
00860 
00861       case COLUMN_CHAIN:
00862 #if (vmdplugin_ABIVERSION < 20)
00863         columns[i] = chainbuffer;
00864 #else
00865         columns[i] = atoms->chain;
00866         chainIdx = i;
00867 #endif
00868         break;
00869 
00870       case COLUMN_CHAIN_AUTH:
00871         columns[i] = chainAuth;
00872         chainAuthIdx = i;
00873         doBonds++;
00874         break;
00875 
00876       case COLUMN_MODEL_NUM:
00877         columns[i] = model_num_buf;
00878         parseFlags |= FLAG_MODEL_NUM;
00879         break;
00880 
00881       default:
00882         columns[i] = trash;
00883         break;
00884     }
00885   }
00886 
00887   // If the two optional auth fields aren't present, don't look for extra bonds
00888   if (doBonds != MAX_OPTIONAL_AUTH_FIELDS) {
00889     doBonds = 0;
00890   }
00891 
00892   // Start parsing, Skip through junk
00893   atomdata = 0;
00894   while (!atomdata) {
00895     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
00896       printf("pdbxplugin) failure while reading file.\n");
00897       parser->error = true;
00898       return -1;
00899     }
00900 
00901     if (isStructureDataHeader(buffer)) {
00902       atomdata = 1;
00903     }
00904   }
00905 
00906   // Skip through the atomdata table declaration
00907   while (atomdata) {
00908     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
00909       printf("pdbxplugin) failure while reading file\n");
00910       parser->error = true;
00911       return -1;
00912     }
00913 
00914     if (!isStructureDataHeader(buffer)) {
00915       atomdata = 0;
00916     }
00917   }
00918 
00919   count = 0;
00920   atom = atoms;
00921   do {
00922     if (count >= parser->natoms) {
00923       printf("pdbxplugin) ERROR: number of atoms is larger than predicted. Exiting...\n");
00924       return -1;
00925     }
00926 
00927     pos = 0;
00928     for (i=0; i<tableSize; ++i) {
00929       if (table[i] == COLUMN_JUNK) {
00930         // if we don't want this column, update pos to point to the next column
00931         skipNextWord(buffer, buffer, pos, sizeof(buffer));
00932       } else {
00933         // will copy each column string into the atom struct
00934         // or save the string if we need to convert it
00935         getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
00936       }
00937     }
00938 
00939     // Coordinates must be saved until timestep is called 
00940     xyzcount = count*3;
00941 
00942     // replacing atof with stringToFloat will increase performance 
00943     parser->xyz[xyzcount  ] = atof(xbuffer);
00944     parser->xyz[xyzcount+1] = atof(ybuffer);
00945     parser->xyz[xyzcount+2] = atof(zbuffer);
00946 
00947     atom->resid = atoi(residbuffer);
00948     if (doBonds && residAuthbuffer[0] != '.' && residAuthbuffer[0] != '?') {
00949       parser->resid_auth[count] = atoi(residAuthbuffer);
00950 
00951       // add atom to hash table
00952       // This attempts to generate a "unique id" based off the chain and resid
00953       hashTemp = getUniqueResID(chainAuth, parser->resid_auth[count]);
00954 
00955       if (-1 != (head = inthash_insert(&parser->bondHash, hashTemp, hashCount))) {
00956         // key already exists, so we have to "add" a node to the linked list 
00957         // for this residue.  Since we can't change the pointer in the 
00958         // hash table, we insert the node at the second position in the list
00959         parser->hashMem[hashCount].next = parser->hashMem[head].next;
00960         parser->hashMem[head].next = hashCount;
00961       }
00962 
00963       // "add" node to list
00964       parser->hashMem[hashCount++].index = count;
00965     }
00966 
00967     // XXX replace '?' or '.' insertion codes with a NUL char 
00968     // indicating an empty insertion code.
00969     if (insertionIdx == MAX_COLUMNS-1 || atom->insertion[0] == '?'
00970                                       || atom->insertion[0] == '.') {
00971       atom->insertion[0] = '\0';
00972       if (parseFlags & FLAG_INSERTION) {
00973         parseFlags ^= FLAG_INSERTION;
00974       }
00975     }
00976 
00977 // TODO: figure out what this conditional should be
00978 #if (vmdplugin_ABIVERSION < 20)
00979     /* check to see if the chain length is greater than 2 */
00980     if (chainbuffer[2] != '\0' && chainbuffer[1] != '\0') {
00981       chainbuffer[2] = '\0';
00982       parseFlags |= FLAG_CHAIN_LENGTH;
00983     }
00984     atom->chain[0] = chainbuffer[0];
00985     atom->chain[1] = chainbuffer[1];
00986 #endif
00987 
00988     // Assign these to the pdbx_data struct 
00989     if (bfactorbuffer[0] != '.' && bfactorbuffer[0] != '.') {
00990       atom->bfactor = atof(bfactorbuffer);
00991       ++bfactorcount;
00992       parseFlags |= FLAG_BFACTOR;
00993     } else {
00994       atom->bfactor = 0.0;
00995     }
00996 
00997     if (occupancybuffer[0] != '.' && occupancybuffer[0] != '?') {
00998       atom->occupancy = atof(occupancybuffer);
00999       ++occupancycount;
01000       parseFlags |= FLAG_OCCUPANCY;
01001     } else {
01002       atom->occupancy = 0.0;
01003     }
01004 
01005     if (chargebuffer[0] != '.' && chargebuffer[0] != '?') {
01006       atom->charge = atof(chargebuffer);
01007       ++chargecount;
01008       parseFlags |= FLAG_CHARGE;
01009     } else {
01010       atom->charge = 0.0;
01011     }
01012 
01013     idx = get_pte_idx_from_string(namebuffer);
01014 
01015     // check for parenthesis in atom type
01016     if (atom->type[0] == '"') {
01017       // only save what is inside the parenthesis
01018       i = 1;
01019       while (atom->type[i] != '"') {
01020         atom->type[i-1] = atom->type[i];
01021         ++i;
01022       }
01023       atom->type[i-1] = '\0';
01024     }
01025 
01026     if ((!vmdatomnamefrompdbxname) ||
01027         (strlen(namebuffer) == 0 || strlen(namebuffer) > 3)) {
01028       // atom->name and atom-> are the same
01029       strcpy(atom->name, atom->type);
01030     } else {
01031       strcpy(atom->name, namebuffer);
01032     }
01033 
01034     // Set periodic table values 
01035     if (idx != 0) {
01036       atom->atomicnumber = idx;
01037       atom->mass = get_pte_mass(idx);
01038       atom->radius = get_pte_vdw_radius(idx);
01039     } else {
01040       ++badptecount;
01041     }
01042 
01043     if (parseFlags & FLAG_MODEL_NUM) {
01044       if (model_num_buf[0] != '\0') {
01045         atom->altloc[0] = model_num_buf[0];
01046       }
01047       if (model_num_buf[1] != '\0') {
01048         atom->altloc[1] = model_num_buf[1];
01049       }
01050     }
01051 
01052     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01053       printf("pdbxplugin) failure while reading file\n");
01054       parser->error = true;
01055       return -1;
01056     }
01057 
01058     ++count;
01059     ++atom;
01060     typeAuth += TYPE_SIZE;
01061     if (doBonds) {
01062       chainAuth += CHAIN_SIZE;
01063       columns[chainAuthIdx] = chainAuth;
01064     }
01065     columns[typeAuthIdx] = typeAuth;
01066     columns[typeIdx] = atom->type;
01067     columns[resnameIdx] = atom->resname;
01068     columns[insertionIdx] = atom->insertion;
01069 #if (vmdplugin_ABIVERSION >= 20)
01070     columns[chainIdx] = atom->chain;
01071 #endif
01072   } while (buffer[0] != '#'); //do until all the atoms have been read
01073 
01074   // after we finish parsing, set optflags 
01075 #if (vmdplugin_ABIVERSION < 20)
01076   if (parseFlags & FLAG_CHAIN_LENGTH) {
01077     printf("pdbxplugin) WARNING: This plugin ABI does not support chain names longer than two characters. Some chain names have been truncated.\n");
01078   }
01079 #endif
01080 
01081   if (badptecount == 0) {
01082     *optflags |= MOLFILE_MASS | MOLFILE_RADIUS | MOLFILE_ATOMICNUMBER;
01083   }
01084 
01085   if (parseFlags & FLAG_INSERTION) {
01086     *optflags |= MOLFILE_INSERTION;
01087   }
01088 
01089   if (parseFlags & FLAG_CHARGE) {
01090     *optflags |= MOLFILE_CHARGE;
01091   }
01092 
01093   if (parseFlags & FLAG_BFACTOR) {
01094     *optflags |= MOLFILE_BFACTOR;
01095   }
01096 
01097   if (parseFlags & FLAG_OCCUPANCY) {
01098     *optflags |= MOLFILE_OCCUPANCY;
01099   }
01100 
01101   if (parseFlags & FLAG_MODEL_NUM) {
01102     *optflags |= MOLFILE_ALTLOC;
01103   }
01104 
01105   if (badptecount > 0) {
01106     printf("pdbxplugin) encountered %d bad element indices!\n", badptecount);
01107     return -1;
01108   }
01109 
01110   return 0;
01111 }
01112 
01113 
01114 #define BOND_JUNK      0
01115 #define BOND_NAME_1    1
01116 #define BOND_CHAIN_1   2
01117 #define BOND_RESNAME_1 3
01118 #define BOND_RESID_1   4
01119 #define BOND_NAME_2    5
01120 #define BOND_CHAIN_2   6
01121 #define BOND_RESNAME_2 7
01122 #define BOND_RESID_2   8
01123 
01124 static bool readAngleBonds(molfile_atom_t * atoms, pdbxParser* parser) {
01125   char buffer[BUFFER_SIZE];
01126   char* columns[MAX_COLUMNS];
01127   int bondTableSize = 0;
01128   int bnum = 0;
01129   int i, pos, k;
01130   int* newBondsTo, *newBondsFrom;
01131   fpos_t filePos;
01132   char junk[COLUMN_BUFFER_SIZE];
01133   char name1[COLUMN_BUFFER_SIZE];
01134   char name2[COLUMN_BUFFER_SIZE];
01135   char chain1[COLUMN_BUFFER_SIZE];
01136   char chain2[COLUMN_BUFFER_SIZE];
01137   char resid1buffer[COLUMN_BUFFER_SIZE];
01138   char resid2buffer[COLUMN_BUFFER_SIZE];
01139   int resid1, resid2;
01140   int uid1, uid2;
01141   int aIdx1, aIdx2;
01142   int n_angle_bonds = 0;
01143 
01144   rewind(parser->file);
01145 
01146   // skip through the file until we find the angle/bond information
01147   do {
01148     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01149       return false;
01150     }
01151   } while (NULL == strstr(buffer,"_pdbx_validate_rmsd_angle."));
01152 
01153   fgetpos(parser->file, &filePos);
01154 
01155   // Parse table header data 
01156   while (NULL != strstr(buffer,"_pdbx_validate_rmsd_angle.")) {
01157     // assign integer values to each column 
01158     if (NULL != strstr(buffer+26, "auth_atom_id_1")) {
01159       columns[bondTableSize] = (char*)name1;
01160     }
01161     else if (NULL != strstr(buffer+26, "auth_asym_id_1")) {
01162       columns[bondTableSize] = (char*)chain1;
01163     }
01164     else if (NULL != strstr(buffer+26, "auth_seq_id_1")) {
01165       columns[bondTableSize] = (char*)resid1buffer;
01166     }
01167     else if (NULL != strstr(buffer+26, "auth_atom_id_2")) {
01168       columns[bondTableSize] = (char*)name2;
01169     }
01170     else if (NULL != strstr(buffer+26, "auth_asym_id_2")) {
01171       columns[bondTableSize] = (char*)chain2;
01172     }
01173     else if (NULL != strstr(buffer+26, "auth_seq_id_2")) {
01174       columns[bondTableSize] = (char*)resid2buffer;
01175     }
01176     else {
01177       columns[bondTableSize] = (char*)junk;
01178     }
01179     ++bondTableSize;
01180 
01181     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01182       printf("pdbxplugin) could not read bond information.\n");
01183       return false;
01184     }
01185 
01186   }
01187 
01188   // figure out how many bonds are being defined 
01189   while (buffer[0] != '#') {
01190     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01191       printf("pdbxplugin) could not read bond information.\n");
01192       return false;
01193     }
01194     ++bnum;
01195   }
01196 
01197   n_angle_bonds = bnum; 
01198   if ((newBondsTo = (int*)realloc((void*)parser->bondsTo,
01199                                   (parser->nbonds + bnum) * sizeof(int))) == NULL) {
01200     printf("pdbxplugin) ERROR: could not reallocate bonds array.\n");
01201     return false;
01202   }
01203   if ((newBondsFrom = (int*)realloc((void*)parser->bondsFrom,
01204                                     (parser->nbonds + bnum) * sizeof(int))) == NULL) {
01205     printf("pdbxplugin) ERROR: could not reallocate bonds array.\n");
01206     return false;
01207   }
01208   parser->bondsTo = newBondsTo;
01209   parser->bondsFrom = newBondsFrom;
01210 
01211   // Skip back to the start of the bond info 
01212   fsetpos(parser->file, &filePos);
01213   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01214     printf("pdbxplugin) could not read bond information.\n");
01215     return false;
01216   }
01217 
01218   // Skip through the header
01219   while (NULL != strstr(buffer,"_pdbx_validate_rmsd_angle.")) {
01220     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01221       printf("pdbxplugin) could not read bond information.\n");
01222       return false;
01223     }
01224   }
01225 
01226   bnum = 0;
01227   while (buffer[0] != '#') {
01228     pos = 0;
01229     // copy each column of the table into the appropriate columns index 
01230     for (i=0; i<bondTableSize; ++i) {
01231       getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
01232     }
01233     resid1 = atoi(resid1buffer);
01234     resid2 = atoi(resid2buffer);
01235 
01236     // get unique res ID for hash table lookup 
01237     uid1 = getUniqueResID(chain1, resid1);
01238     uid2 = getUniqueResID(chain2, resid2);
01239     k = 0;
01240 
01241     // find the atoms in the hash table 
01242     if (((uid1 = inthash_lookup(&parser->bondHash, uid1)) != -1)
01243         && ((uid2 = inthash_lookup(&parser->bondHash, uid2)) != -1) ) {
01244       // because the hashtable is residue specifc, loop through 
01245       // all atoms in the residue to find the correct one
01246       // Find atom 1 
01247       do {
01248         aIdx1 = parser->hashMem[uid1].index;
01249         if (strcmp(name1, parser->type_auth + aIdx1 * TYPE_SIZE) == 0 && 
01250             parser->resid_auth[aIdx1] == resid1 &&
01251             strcmp(chain1, parser->chain_auth + aIdx1 * CHAIN_SIZE) == 0) {
01252           k++;
01253           break;
01254         } else {
01255           uid1 = parser->hashMem[uid1].next;
01256         }
01257       } while (uid1 != 0); //0 indicates end of "list"
01258 
01259       // Find atom 2
01260       do {
01261         aIdx2 = parser->hashMem[uid2].index;
01262         if (strcmp(name2, parser->type_auth + aIdx2 * TYPE_SIZE) == 0 && 
01263             parser->resid_auth[aIdx2] == resid2 &&
01264             strcmp(chain2, parser->chain_auth + aIdx2 * CHAIN_SIZE) == 0) {
01265           k++;
01266           break;
01267         } else {
01268           uid2 = parser->hashMem[uid2].next;
01269         }
01270       } while (uid2 != 0); // 0 indicates end of "list"
01271 
01272       if (k == 2) {
01273         // vmd doesn't use 0 based index for bond info?
01274         parser->bondsFrom[parser->nbonds + bnum] = aIdx1+1; 
01275         parser->bondsTo[parser->nbonds + bnum] = aIdx2+1;
01276         ++bnum;
01277       }
01278     }
01279 #ifdef PDBX_DEBUG
01280     else {
01281       printf("pdbxplugin) WARNING: Could not locate bond in hash table. %s %d\n", chain1, resid1);
01282       // This could occur if the number of chains/resids is large,
01283       // which could cause collisions in the bonds hash table.
01284       // Due to structore of pdbx special bonds, I'm not sure how we can get around this.
01285       if (uid1 == 0)
01286         printf("1 ");
01287       if (uid2 == 0)
01288         printf("2");
01289       printf("\n");
01290     }
01291 #endif
01292 
01293     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01294       printf("pdbxplugin) could not read RMSD bond deviation information.\n");
01295       return false;
01296     }
01297   }
01298 
01299   if (bnum != n_angle_bonds) {
01300     printf("pdbxplugin) ERROR: number of angle bonds does not match number of predicted bonds.\n");
01301   }
01302   parser->nbonds += bnum;
01303 
01304 #ifdef PDBX_DEBUG
01305   printf("pdbxplugin) nbonds defined: %d\n", parser->nbonds);
01306 #endif
01307   return bnum > 0;
01308 }
01309 
01310  
01311 static bool readRMSDBonds(molfile_atom_t * atoms, pdbxParser* parser) {
01312   char buffer[BUFFER_SIZE];
01313   char* columns[64];
01314   int bondTableSize = 0;
01315   int bnum = 0;
01316   int i, k;
01317   fpos_t filePos;
01318   char junk[COLUMN_BUFFER_SIZE];
01319   char name1[COLUMN_BUFFER_SIZE];
01320   char name2[COLUMN_BUFFER_SIZE];
01321   char chain1[COLUMN_BUFFER_SIZE];
01322   char chain2[COLUMN_BUFFER_SIZE];
01323   char resid1buffer[COLUMN_BUFFER_SIZE];
01324   char resid2buffer[COLUMN_BUFFER_SIZE];
01325   int resid1, resid2;
01326   int uid1, uid2;
01327   int aIdx1, aIdx2;
01328 
01329   // skip through the file until we find the RMSD/bond information 
01330   do {
01331     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01332       parser->nbonds = 0;
01333       return false;
01334     }
01335   } while (!isValidateRMSDBond(buffer));
01336 
01337   fgetpos(parser->file, &filePos);
01338 
01339   // if (sscanf(  return if two words in one table definition line
01340 
01341   // Parse table header data
01342   while (isValidateRMSDBond(buffer)) {
01343     // assign integer values to each column 
01344     // 25 is length validateRMSDbond header name
01345     if (NULL != strstr(buffer+25, "auth_atom_id_1")) {
01346       columns[bondTableSize] = (char*)name1;
01347     }
01348     else if (NULL != strstr(buffer+25, "auth_asym_id_1")) {
01349       columns[bondTableSize] = (char*)chain1;
01350     }
01351     else if (NULL != strstr(buffer+25, "auth_seq_id_1")) {
01352       columns[bondTableSize] = (char*)resid1buffer;
01353     }
01354     else if (NULL != strstr(buffer+25, "auth_atom_id_2")) {
01355       columns[bondTableSize] = (char*)name2;
01356     }
01357     else if (NULL != strstr(buffer+25, "auth_asym_id_2")) {
01358       columns[bondTableSize] = (char*)chain2;
01359     }
01360     else if (NULL != strstr(buffer+25, "auth_seq_id_2")) {
01361       columns[bondTableSize] = (char*)resid2buffer;
01362     }
01363     else {
01364       columns[bondTableSize] = (char*)junk;
01365     }
01366     ++bondTableSize;
01367 
01368     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01369       printf("pdbxplugin) could not read bond information.\n");
01370       return false;
01371     }
01372   }
01373 
01374   // figure out how many bonds are being defined 
01375   while (buffer[0] != '#') {
01376     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01377       printf("pdbxplugin) could not read bond information.\n");
01378       return false;
01379     }
01380     ++bnum;
01381   }
01382 
01383   parser->nbonds = bnum;
01384   parser->bondsTo = (int*)malloc(bnum * sizeof(int));
01385   parser->bondsFrom = (int*)malloc(bnum * sizeof(int));
01386 
01387   // Skip back to the start of the bond info 
01388   fsetpos(parser->file, &filePos);
01389   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01390     printf("pdbxplugin) could not read bond information.\n");
01391     return false;
01392   }
01393 
01394   // Skip through the header 
01395   while (isValidateRMSDBond(buffer)) {
01396     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01397       printf("pdbxplugin) could not read bond information.\n");
01398       return false;
01399     }
01400   }
01401 
01402   bnum = 0;
01403   while (buffer[0] != '#') {
01404     int pos = 0;
01405     // copy each column of the table into the appropriate columns index 
01406     for (i=0; i<bondTableSize; ++i) {
01407         getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
01408     }
01409     resid1 = atoi(resid1buffer);
01410     resid2 = atoi(resid2buffer);
01411 
01412     // get unique res ID for hash table lookup 
01413     uid1 = getUniqueResID(chain1, resid1);
01414     uid2 = getUniqueResID(chain2, resid2);
01415     k = 0;
01416 
01417     // Find the atoms in the hash table.
01418     // Because the hashtable is residue specifc, loop through all
01419     // atoms in the residue to find the correct one
01420     if ( ((uid1 = inthash_lookup(&parser->bondHash, uid1)) != -1)
01421         && ((uid2 = inthash_lookup(&parser->bondHash, uid2)) != -1) ) {
01422       // Find atom 1
01423       do {
01424         aIdx1 = parser->hashMem[uid1].index;
01425         if (strcmp(name1, parser->type_auth + aIdx1 * TYPE_SIZE) == 0
01426             && parser->resid_auth[aIdx1] == resid1
01427             && strcmp(chain1, parser->chain_auth + aIdx1 * CHAIN_SIZE) == 0){
01428           k++;
01429           break;
01430         } else {
01431           uid1 = parser->hashMem[uid1].next;
01432         }
01433       } while (uid1 != 0);  // 0 indicates end of "list"
01434 
01435       // Find atom 2 
01436       do {
01437         aIdx2 = parser->hashMem[uid2].index;
01438         if (strcmp(name2, parser->type_auth + aIdx2 * TYPE_SIZE) == 0
01439             && parser->resid_auth[aIdx2] == resid2
01440             && strcmp(chain2, parser->chain_auth + aIdx2 * CHAIN_SIZE) == 0) {
01441           k++;
01442           break;
01443         } else {
01444           uid2 = parser->hashMem[uid2].next;
01445         }
01446       } while (uid2 != 0); // 0 indicates end of "list"
01447 
01448       // If we found both atoms add them to the bonds list
01449       if (k == 2) {
01450         parser->bondsFrom[bnum] = aIdx1+1;  // vmd doesn't use 0 based index for bond info?
01451         parser->bondsTo[bnum] = aIdx2+1;
01452         ++bnum;
01453       }
01454     }
01455 #ifdef PDBX_DEBUG
01456     else {
01457       printf("^^^^Could locate bond^^^^^, %s %d\n", chain1, resid1);
01458       printf("Error finding atom ");
01459       if (uid1 == 0)
01460         printf("1 ");
01461       if (uid2 == 0)
01462         printf("2");
01463       printf("\n");
01464     }
01465 #endif
01466 
01467     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01468       printf("pdbxplugin) could not read RMSD bond deviation information.\n");
01469       return false;
01470     }
01471   }
01472 
01473   if (parser->nbonds != bnum) {
01474     printf("pdbxplugin: ERROR: mismatch in number of bonds.\n");
01475   }
01476 
01477 #ifdef PDBX_DEBUG
01478   printf("pdbxplugin) nbonds defined: %d\n", parser->nbonds);
01479 #endif
01480 
01481   return (bnum > 0);
01482 }
01483 
01484 
01485 //
01486 // Experimental support for Integrative Hybrid Modeling (IHM) structure data
01487 // in enhanced PDBx files:
01488 //   https://github.com/ihmwg/IHM-dictionary/blob/master/dictionary_documentation/documentation.md
01489 //   https://pdb-dev.wwpdb.org/
01490 //   https://python-ihm.readthedocs.io/en/latest/introduction.html
01491 //
01492 // At present the PDBx plugin makes an attempt to parse the
01493 // following IHM record types:
01494 //   _ihm_sphere_obj_site
01495 //
01496 static bool parse_pdbx_ihm_sphere_data(pdbxParser* parser) {
01497   char buffer[BUFFER_SIZE];
01498   char* columns[32];
01499   char xbuffer[COLUMN_BUFFER_SIZE];
01500   char ybuffer[COLUMN_BUFFER_SIZE];
01501   char zbuffer[COLUMN_BUFFER_SIZE];
01502   char radbuffer[COLUMN_BUFFER_SIZE];
01503   int catlen = 0;
01504   int tableSize = 0;
01505   int nelems = 0;
01506   int i;
01507   fpos_t filePos;
01508   char junk[32];
01509 
01510   rewind(parser->file);
01511 
01512   // skip through the file until we find IHM sphere information
01513   do {
01514     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01515       parser->nbonds = 0;
01516       return false;
01517     }
01518   } while (NULL == strstr(buffer, "_ihm_sphere_obj_site."));
01519 
01520   // record location of the start of IHM sphere info
01521   fgetpos(parser->file, &filePos);
01522 
01523   // Parse table header data
01524   catlen = strlen("_ihm_sphere_obj_site.");
01525   tableSize = 0;
01526   while (NULL != strstr(buffer, "_ihm_sphere_obj_site.")) {
01527     // assign data fields to columns
01528     const char *fieldbuf = buffer+catlen;
01529 
01530     if (NULL != strstr(fieldbuf, "object_radius")) {
01531       columns[tableSize] = (char*)radbuffer;
01532     } else if (NULL != strstr(fieldbuf, "Cartn_x")) {
01533       columns[tableSize] = (char*)xbuffer;
01534     } else if (NULL != strstr(fieldbuf, "Cartn_y")) {
01535       columns[tableSize] = (char*)ybuffer;
01536     } else if (NULL != strstr(fieldbuf, "Cartn_z")) {
01537       columns[tableSize] = (char*)zbuffer;
01538     } else {
01539       columns[tableSize] = (char*)junk;
01540     }
01541     ++tableSize;
01542 
01543     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01544       printf("pdbxplugin) could not read IHM '_ihm_sphere_obj_site' information.\n");
01545       return false;
01546     }
01547   }
01548 
01549   // figure out how many elems are being defined
01550   nelems = 0;
01551   while (buffer[0] != '#') {
01552     ++nelems;
01553     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01554       break;
01555     }
01556   }
01557 
01558   // Skip back to the start of the IHM sphere info
01559   fsetpos(parser->file, &filePos);
01560   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01561     printf("pdbxplugin) could not read IHM '_ihm_sphere_obj_site' information.\n");
01562     return false;
01563   }
01564 
01565   // Skip through the header
01566   while (NULL != strstr(buffer, "_ihm_sphere_obj_site.")) {
01567     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01568       printf("pdbxplugin) could not read IHM '_ihm_sphere_obj_site' information.\n");
01569       return false;
01570     }
01571   }
01572 
01573   // allocate graphics buffer sized to sphere count
01574   parser->n_graphics_elems = nelems;
01575   parser->g_data = new molfile_graphics_t[nelems];
01576 
01577   nelems = 0; // reset counter so we can use it again
01578   while (buffer[0] != '#') {
01579     int pos = 0;
01580     // copy each column of the table into the appropriate columns index
01581     for (i=0; i<tableSize; ++i) {
01582       getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
01583     }
01584     parser->g_data[nelems].type = MOLFILE_SPHERE;
01585     parser->g_data[nelems].style = 12;
01586     parser->g_data[nelems].size = atof(radbuffer);
01587     parser->g_data[nelems].data[0] = atof(xbuffer);
01588     parser->g_data[nelems].data[1] = atof(ybuffer);
01589     parser->g_data[nelems].data[2] = atof(zbuffer);
01590 
01591     nelems++;
01592     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01593       break;
01594     }
01595   }
01596 
01597   return nelems > 0;
01598 }
01599 
01600 
01601 static bool parse_pdbx_ihm_restraints_data(pdbxParser* parser) {
01602   char buffer[BUFFER_SIZE];
01603   char* columns[32];
01604   char groupbuffer[COLUMN_BUFFER_SIZE];
01605   char entity_description_1[COLUMN_BUFFER_SIZE];
01606   char entity_id_1[COLUMN_BUFFER_SIZE];
01607   char seq_id_1[COLUMN_BUFFER_SIZE];
01608   char comp_id_1[COLUMN_BUFFER_SIZE];
01609   char entity_description_2[COLUMN_BUFFER_SIZE];
01610   char entity_id_2[COLUMN_BUFFER_SIZE];
01611   char seq_id_2[COLUMN_BUFFER_SIZE];
01612   char comp_id_2[COLUMN_BUFFER_SIZE];
01613   char crosslink_type[COLUMN_BUFFER_SIZE];
01614   char asym_id_1[COLUMN_BUFFER_SIZE];
01615   char asym_id_2[COLUMN_BUFFER_SIZE];
01616   int catlen = 0;
01617   int tableSize = 0;
01618   int nelems = 0;
01619   int i;
01620   fpos_t crosslinkPos, crosslinkrestraintPos;
01621   char junk[32];
01622 
01623   int verbose = (getenv("VMDPDBXVERBOSE") != NULL);
01624 
01625   // 
01626   // parse crosslink records
01627   // 
01628   rewind(parser->file);
01629 
01630   // skip through the file until we find IHM cross link information
01631   do {
01632     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01633       parser->nbonds = 0;
01634       return false;
01635     }
01636   } while (NULL == strstr(buffer, "_ihm_cross_link_list."));
01637 
01638   // record location of the start of IHM sphere info
01639   fgetpos(parser->file, &crosslinkPos);
01640 
01641   // Parse table header data
01642   catlen = strlen("_ihm_cross_link_list.");
01643   tableSize = 0;
01644   memset(columns, 0, sizeof(columns));
01645   while (NULL != strstr(buffer, "_ihm_cross_link_list.")) {
01646     // assign data fields to columns
01647     const char *fieldbuf = buffer+catlen;
01648     if (NULL != strstr(fieldbuf, "group_id")) {
01649       columns[tableSize] = (char*)groupbuffer;
01650     } else if (NULL != strstr(fieldbuf, "entity_description_1")) {
01651       columns[tableSize] = (char*)entity_description_1;
01652     } else if (NULL != strstr(fieldbuf, "entity_id_1")) {
01653       columns[tableSize] = (char*)entity_id_1;
01654     } else if (NULL != strstr(fieldbuf, "seq_id_1")) {
01655       columns[tableSize] = (char*)seq_id_1;
01656     } else if (NULL != strstr(fieldbuf, "comp_id_1")) {
01657       columns[tableSize] = (char*)comp_id_1;
01658     } else if (NULL != strstr(fieldbuf, "entity_description_2")) {
01659       columns[tableSize] = (char*)entity_description_2;
01660     } else if (NULL != strstr(fieldbuf, "entity_id_2")) {
01661       columns[tableSize] = (char*)entity_id_2;
01662     } else if (NULL != strstr(fieldbuf, "seq_id_2")) {
01663       columns[tableSize] = (char*)seq_id_2;
01664     } else if (NULL != strstr(fieldbuf, "comp_id_2")) {
01665       columns[tableSize] = (char*)comp_id_2;
01666     } else if (NULL != strstr(fieldbuf, "type")) {
01667       columns[tableSize] = (char*)crosslink_type;
01668     } else {
01669       columns[tableSize] = (char*)junk;
01670     }
01671     ++tableSize;
01672 
01673     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01674       printf("pdbxplugin) could not read IHM '_ihm_cross_link_list' information.\n");
01675       return false;
01676     }
01677   }
01678 
01679   // figure out how many elems are being defined
01680   nelems = 0;
01681   while (buffer[0] != '#') {
01682     ++nelems;
01683     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01684       break;
01685     }
01686   }
01687 
01688   // Skip back to the start of the IHM crosslink info
01689   fsetpos(parser->file, &crosslinkPos);
01690   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01691     printf("pdbxplugin) could not read IHM '_ihm_cross_link_list' information.\n");
01692     return false;
01693   }
01694 
01695   // Skip through the header
01696   while (NULL != strstr(buffer, "_ihm_cross_link_list.")) {
01697     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01698       printf("pdbxplugin) could not read IHM '_ihm_cross_link_list' information.\n");
01699       return false;
01700     }
01701   }
01702 
01703   // XXX use nelems to allocate storage for output
01704 
01705   if (nelems > 0) {
01706     printf("Found %d IHM crosslinks:\n", nelems);
01707     nelems = 0; // reset counter so we can use it again
01708     while (buffer[0] != '#') {
01709       int pos = 0;
01710       // copy each column of the table into the appropriate columns index
01711       for (i=0; i<tableSize; ++i) {
01712         getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
01713       }
01714 
01715       if (verbose) {
01716         printf("[%d] %s, 1:%s %s %s %s, 2:%s %s %s %s, %s\n",
01717                nelems, groupbuffer, 
01718                entity_description_1, entity_id_1, seq_id_1, comp_id_1,
01719                entity_description_2, entity_id_2, seq_id_2, comp_id_2,
01720                crosslink_type);
01721       }
01722 
01723       nelems++;
01724 
01725       if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01726         break;
01727       }
01728     }
01729   }
01730 
01731   // 
01732   // parse crosslink restraints
01733   // 
01734 
01735   // If we don't have any crosslink records, there's no value
01736   // to trying to parse restraint records that depend on them
01737   if (nelems < 1) {
01738     rewind(parser->file);
01739     return 0;
01740   }
01741 
01742   // skip through the file until we find IHM cross link information
01743   do {
01744     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01745       parser->nbonds = 0;
01746       return false;
01747     }
01748   } while (NULL == strstr(buffer, "_ihm_cross_link_restraint."));
01749 
01750   // record location of the start of IHM sphere info
01751   fgetpos(parser->file, &crosslinkrestraintPos);
01752 
01753   // Parse table header data
01754   tableSize = 0;
01755   memset(columns, 0, sizeof(columns));
01756   catlen = strlen("_ihm_cross_link_restraint.");
01757   while (NULL != strstr(buffer, "_ihm_cross_link_restraint.")) {
01758     // assign data fields to columns
01759     const char *fieldbuf = buffer+catlen;
01760     if (NULL != strstr(fieldbuf, "group_id")) {
01761       columns[tableSize] = (char*)groupbuffer;
01762     } else if (NULL != strstr(fieldbuf, "entity_id_1")) {
01763       columns[tableSize] = (char*)entity_id_1;
01764     } else if (NULL != strstr(fieldbuf, "asym_id_1")) {
01765       columns[tableSize] = (char*)asym_id_1;
01766     } else if (NULL != strstr(fieldbuf, "seq_id_1")) {
01767       columns[tableSize] = (char*)seq_id_1;
01768     } else if (NULL != strstr(fieldbuf, "comp_id_1")) {
01769       columns[tableSize] = (char*)comp_id_1;
01770     } else if (NULL != strstr(fieldbuf, "entity_id_2")) {
01771       columns[tableSize] = (char*)entity_id_2;
01772     } else if (NULL != strstr(fieldbuf, "asym_id_2")) {
01773       columns[tableSize] = (char*)asym_id_2;
01774     } else if (NULL != strstr(fieldbuf, "seq_id_2")) {
01775       columns[tableSize] = (char*)seq_id_2;
01776     } else if (NULL != strstr(fieldbuf, "comp_id_2")) {
01777       columns[tableSize] = (char*)comp_id_2;
01778     } else if (NULL != strstr(fieldbuf, "type")) {
01779       columns[tableSize] = (char*)crosslink_type;
01780     } else {
01781       columns[tableSize] = (char*)junk;
01782     }
01783     ++tableSize;
01784 
01785     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01786       printf("pdbxplugin) could not read IHM '_ihm_cross_link_restraint' information.\n");
01787       return false;
01788     }
01789   }
01790 
01791   // figure out how many elems are being defined
01792   nelems = 0;
01793   while (buffer[0] != '#') {
01794     ++nelems;
01795     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01796       break;
01797     }
01798   }
01799 
01800   // Skip back to the start of the IHM crosslink info
01801   fsetpos(parser->file, &crosslinkrestraintPos);
01802   if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01803     printf("pdbxplugin) could not read IHM '_ihm_cross_link_restraint' information.\n");
01804     return false;
01805   }
01806 
01807   // Skip through the header
01808   while (NULL != strstr(buffer, "_ihm_cross_link_restraint.")) {
01809     if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01810       printf("pdbxplugin) could not read IHM '_ihm_cross_link_restraint' information.\n");
01811       return false;
01812     }
01813   }
01814 
01815   // XXX use nelems to allocate storage for output
01816 
01817   if (nelems > 0) {
01818     printf("Found %d IHM crosslink restraints:\n", nelems);
01819 
01820     nelems = 0; // reset counter so we can use it again
01821     while (buffer[0] != '#') {
01822       int pos = 0;
01823       // copy each column of the table into the appropriate columns index
01824       for (i=0; i<tableSize; ++i) {
01825         getNextWord(buffer, columns[i], pos, sizeof(buffer), COLUMN_BUFFER_SIZE);
01826       }
01827 
01828       if (verbose) {
01829         printf("[%d] %s, 1:%s %s %s %s, 2:%s %s %s %s, %s\n",
01830                nelems, groupbuffer,
01831                entity_id_1, asym_id_1, seq_id_1, comp_id_1,
01832                entity_id_2, asym_id_2, seq_id_2, comp_id_2,
01833                crosslink_type);
01834       }
01835 
01836       nelems++;
01837 
01838       if (NULL == fgets(buffer, BUFFER_SIZE, parser->file)) {
01839         break;
01840       }
01841     }
01842   }
01843 
01844   return nelems > 0;
01845 }
01846 
01847 
01848 
01849 // Parse any graphics objects that are not otherwise associated
01850 // with atomic structure fields in the molfile plugin API.
01851 static bool parse_pdbx_graphics_data(pdbxParser* parser) {
01852   bool rc = false;
01853   // XXX Integrated Hybrid Modeling (IHM) structure information
01854   //      are presently represented only as VMD graphics objects,
01855   //      until we add the necessary APIs and user interfaces to
01856   //      make direct use of the data for other purposes.
01857   rc = parse_pdbx_ihm_sphere_data(parser);
01858   if (!rc)
01859     printf("pdbxplugin) No IHM sphere graphics objects defined.\n");
01860 
01861 #if 0
01862   rc = parse_pdbx_ihm_restraints_data(parser);
01863   if (!rc)
01864     printf("pdbxplugin) No IHM crosslinks found.\n");
01865 #endif
01866 
01867   return rc;
01868 }
01869 
01870 
01871 static bool readBonds(molfile_atom_t * atoms, pdbxParser* parser) {
01872   bool retval = readRMSDBonds(atoms, parser);
01873   retval = readAngleBonds(atoms, parser) || retval;
01874   return retval;
01875 }
01876 
01877 
01878 static inline bool isValidateRMSDBond(char * str) {
01879   // return str[0-24] == "_pdbx_validate_rmsd_bond." 
01880   return (str[0] == '_' && str[1] == 'p' && str[2] == 'd' && str[3] == 'b' &&
01881           str[4] == 'x' && str[5] == '_' && str[6] == 'v' && str[7] == 'a' &&
01882           str[8] == 'l' && str[9] == 'i' && str[10] == 'd' && str[11] == 'a' &&
01883           str[12] == 't' && str[13] == 'e' && str[14] == '_' && str[15] == 'r' &&
01884           str[16] == 'm' && str[17] == 's' && str[18] == 'd' && str[19] == '_' &&
01885           str[20] == 'b' && str[21] == 'o' && str[22] == 'n' && str[23] == 'd' &&
01886           str[24] == '.');
01887 }
01888 
01889 
01890 static inline bool isAtomSite(char * str) {
01891   return (str[0]=='_' && str[1]=='a' && str[2]=='t' && str[3]=='o' && str[4]=='m' &&
01892           str[5]=='_' && str[6]=='s' && str[7]=='i' && str[8]=='t' && str[9]=='e' &&
01893           str[10]=='.');
01894 }
01895 
01896 
01897 
01898 //
01899 // start of pdbxWriter implementation 
01900 //
01901 static pdbxWriter* create_pdbxWriter(const char* filename, int numAtoms) {
01902   pdbxWriter* writer = new pdbxWriter;
01903   memset(writer, 0, sizeof(pdbxWriter));
01904   int length = strlen(filename);
01905   int start = 0;
01906   int end = length;
01907   int i;
01908   writer->numatoms = numAtoms;
01909   writer->bufferCount = 0;
01910   writer->fd = fopen(filename, "w");
01911 
01912   // get name of pdb file 
01913   for (i=0; i<length; ++i) {
01914     if (filename[i] == '/' || filename[i] == '\\') {
01915       if (i+1 < length)
01916         start = i+1;
01917     }
01918 
01919     if (filename[i] == '.')
01920       end = i;
01921   }
01922 
01923   strncpy(writer->pdbName, filename + start, end - start);
01924   writer->pdbName[end-start] = '\0';
01925   return writer;
01926 }
01927 
01928 
01929 static void addCoordinates(const float* coords, pdbxWriter* writer) {
01930   writer->coordinates = coords;
01931 }
01932 
01933 
01934 static void addAtoms(const molfile_atom_t* atomlist, int optflags, pdbxWriter* writer) {
01935   int i;
01936   writer->atoms = new molfile_atom_t[writer->numatoms];
01937   molfile_atom_t* atoms = writer->atoms;
01938   
01939   memcpy(atoms, atomlist, writer->numatoms * sizeof(molfile_atom_t));
01940 
01941   // If occ, bfactor, and insertion aren't given, we assign defaultvalues.
01942   if (!(optflags & MOLFILE_OCCUPANCY)) {
01943     for (i=0; i<writer->numatoms; i++)
01944       atoms[i].occupancy = 0.0f;
01945   }
01946 
01947   if (!(optflags & MOLFILE_BFACTOR)) {
01948     for (i=0; i<writer->numatoms; i++)
01949       atoms[i].bfactor= 0.0f;
01950   }
01951 
01952   if (!(optflags & MOLFILE_INSERTION)) {
01953     for (i=0; i<writer->numatoms; i++) {
01954       atoms[i].insertion[0] =' ';
01955       atoms[i].insertion[1] ='\0';
01956     }
01957   }
01958 
01959   if (!(optflags & MOLFILE_ALTLOC)) {
01960     for (i=0; i<writer->numatoms; i++) {
01961       atoms[i].altloc[0]=' ';
01962       atoms[i].altloc[1]='\0';
01963     }
01964   }
01965 
01966   if (!(optflags & MOLFILE_ATOMICNUMBER)) {
01967     for (i=0; i<writer->numatoms; i++)
01968       atoms[i].atomicnumber = 0;
01969   }
01970 }
01971 
01972 
01973 static void writeAtomSite(pdbxWriter* writer) {
01974   char lineBuffer[BUFFER_SIZE];
01975   int i;
01976   const float* x, *y, *z;
01977   molfile_atom_t* atoms = writer->atoms;
01978   memset(lineBuffer, 0, sizeof(lineBuffer));
01979   x = writer->coordinates;
01980   y = x+1;
01981   z = x+2;
01982 
01983   for (i=0; i<writer->numatoms; ++i) {
01984     sprintf(lineBuffer,"ATOM %d %s %s . %s %s . %d ? %f %f %f %f %f %s\n",
01985             i+1, atoms[i].name, atoms[i].type, atoms[i].resname, atoms[i].chain,
01986             atoms[i].resid, *x, *y, *z, atoms[i].occupancy,
01987             atoms[i].charge, atoms[i].chain);
01988     x += 3;
01989     y += 3;
01990     z += 3;
01991     write(lineBuffer, writer);
01992   }
01993 }
01994 
01995 
01996 static void writeFile(pdbxWriter* writer) {
01997   // write PDBx header
01998   writeIntro(writer);
01999   write(atomSiteHeader, writer);
02000   writeAtomSite(writer);
02001   write("#\n", writer);
02002   close(writer);
02003 }
02004 
02005 
02006 static void writeIntro(pdbxWriter* writer) {
02007   write("data_", writer);
02008   write(writer->pdbName, writer);
02009   write("\n", writer);
02010 }
02011 
02012 
02013 static void close(pdbxWriter* writer) {
02014   writeBuffer(writer);
02015   fclose(writer->fd);
02016 }
02017 
02018 
02019 static void write(const char* str, pdbxWriter* writer) {
02020   int length = strlen(str);
02021   int copy_size;
02022   int num_copied = 0;
02023 
02024   if (length + writer->bufferCount < WB_SIZE) {
02025     memcpy(writer->writeBuf + writer->bufferCount, str, length);
02026     writer->bufferCount += length;
02027   }
02028   else do {
02029     copy_size = WB_SIZE - writer->bufferCount;
02030     if (copy_size + num_copied > length) {
02031       copy_size = length - num_copied;
02032     }
02033     memcpy(writer->writeBuf + writer->bufferCount, str + num_copied, copy_size);
02034     writer->bufferCount += copy_size;
02035     num_copied += copy_size;
02036     if (writer->bufferCount == WB_SIZE) {
02037       writeBuffer(writer);
02038     }
02039   } while (num_copied < length);
02040 }
02041 
02042 
02043 static void writeBuffer(pdbxWriter* writer) {
02044   if (writer->bufferCount == 0)
02045     return;
02046   fwrite(writer->writeBuf, sizeof(char), writer->bufferCount, writer->fd);
02047   writer->bufferCount = 0;
02048 }
02049 
02050 
02051 //
02052 // API functions start here
02053 //
02054 typedef struct {
02055   pdbxParser * parser;
02056   pdbxWriter * writer;
02057   int natoms;
02058   molfile_atom_t *atomlist;
02059   molfile_metadata_t *meta;
02060   int readTS;
02061 } pdbx_data;
02062 
02063 
02064 static void * open_pdbx_read(const char *filepath, const char *filetype,
02065                              int *natoms) {
02066   pdbx_data *data;
02067   data = new pdbx_data;
02068   data->readTS = 0;
02069   data->parser = create_pdbxParser(filepath);
02070   data->natoms = data->parser->natoms;
02071   *natoms = data->natoms;
02072   if (data->parser->error) {
02073     printf("pdbxplugin) error opening file.\n");
02074     return NULL;
02075   }
02076   return data;
02077 
02078 }
02079 
02080 
02081 static int read_pdbx_structure(void * mydata, int *optflags, molfile_atom_t *atoms) {
02082   pdbx_data * data = (pdbx_data *)mydata;
02083   *optflags = MOLFILE_NOOPTIONS;
02084 
02085   if (data->parser->natoms == 0) {
02086     printf("pdbxplugin) No atoms found.\n");
02087     if (data->parser->pdb_dev) {
02088       return MOLFILE_NOSTRUCTUREDATA;
02089     }
02090     return MOLFILE_ERROR;
02091   }
02092 
02093   if (parseStructure(atoms, optflags, data->parser)) {
02094     printf("pdbxplugin) Error while trying to parse pdbx structure\n");
02095     return MOLFILE_ERROR;
02096   }
02097 
02098   if (readBonds(atoms, data->parser)) {
02099     *optflags |= MOLFILE_BONDSSPECIAL;
02100   }
02101   return MOLFILE_SUCCESS;
02102 }
02103 
02104 
02105 static int read_bonds(void *v, int *nbonds, int **fromptr, int **toptr,
02106                       float ** bondorder,int **bondtype,
02107                       int *nbondtypes, char ***bondtypename) {
02108   pdbx_data * data = (pdbx_data *)v;
02109   if (data->parser->nbonds == 0) {
02110     *nbonds = 0;
02111     *fromptr = NULL;
02112     *toptr = NULL;
02113   } else {
02114     *nbonds = data->parser->nbonds;
02115     printf("pdbxplugin) Found %d 'special bonds' in the PDBx file.\n", *nbonds);
02116     *fromptr = data->parser->bondsFrom;
02117     *toptr = data->parser->bondsTo;
02118   }
02119   *bondorder = NULL;
02120   *bondtype = NULL;
02121   *nbondtypes = 0;
02122   *bondtypename = NULL;
02123 
02124   return MOLFILE_SUCCESS;
02125 }
02126 
02127 
02128 static int read_pdbx_timestep(void * mydata, int natoms, molfile_timestep_t *ts) {
02129   pdbx_data * data = (pdbx_data *)mydata;
02130   if (data->readTS) {
02131     return MOLFILE_ERROR;
02132   }
02133   data->readTS = 1;
02134   memcpy(ts->coords, data->parser->xyz, natoms * 3 * sizeof(float));
02135 
02136   return MOLFILE_SUCCESS;
02137 }
02138 
02139 
02140 static int read_rawgraphics(void* v, int *nelem, const molfile_graphics_t **g_data) {
02141   pdbx_data* data = (pdbx_data *)v;
02142 
02143   if (data->parser->pdb_dev && parse_pdbx_graphics_data(data->parser)) {
02144     *nelem = data->parser->n_graphics_elems;
02145     *g_data = data->parser->g_data;
02146   } else {
02147     *nelem = 0;
02148     *g_data = NULL;
02149   }
02150 
02151   return MOLFILE_SUCCESS;
02152 }
02153 
02154 
02155 static void close_pdbx_read(void *v) {
02156   pdbx_data * data = (pdbx_data *)v;
02157   delete_pdbxParser(data->parser);
02158   delete data;
02159 }
02160 
02161 
02162 static void* open_file_write(const char *path, const char *filetypye, int natoms) {
02163   pdbx_data * data = new pdbx_data;
02164   data->writer = create_pdbxWriter(path, natoms);
02165   return data;
02166 }
02167 
02168 
02169 static int write_structure(void* v, int optflags, const molfile_atom_t *atoms) {
02170   pdbx_data * data = (pdbx_data *)v;
02171   addAtoms(atoms, optflags, data->writer);
02172   return MOLFILE_SUCCESS;
02173 }
02174 
02175 
02176 static int write_timestep(void *v, const molfile_timestep_t* ts) {
02177   pdbx_data * data = (pdbx_data *)v;
02178   addCoordinates(ts->coords, data->writer);
02179   writeFile(data->writer);
02180   return MOLFILE_SUCCESS;
02181 }
02182 
02183 
02184 static void close_file_write(void* v) {
02185   pdbx_data* data = (pdbx_data*)v;
02186   delete [] data->writer->atoms;
02187   delete data->writer;
02188   delete data;
02189 }
02190 
02191 
02192 
02193 //
02194 // Plugin initialization fctns and structures
02195 //
02196 static molfile_plugin_t plugin;
02197 
02198 VMDPLUGIN_API int VMDPLUGIN_init() {
02199   memset(&plugin, 0, sizeof(molfile_plugin_t));
02200   plugin.abiversion = vmdplugin_ABIVERSION;
02201   plugin.type = MOLFILE_PLUGIN_TYPE;
02202   plugin.name = "pdbx";
02203   plugin.prettyname = "mmCIF/PDBX";
02204   plugin.author = "Brendan McMorrow, John Stone";
02205   plugin.majorv = 0;
02206   plugin.minorv = 14;
02207   plugin.is_reentrant = VMDPLUGIN_THREADSAFE;
02208   plugin.filename_extension = "cif";
02209   plugin.open_file_read = open_pdbx_read;
02210   plugin.read_structure = read_pdbx_structure;
02211   plugin.read_next_timestep = read_pdbx_timestep;
02212   plugin.read_rawgraphics = read_rawgraphics;
02213   plugin.read_bonds = read_bonds;
02214   plugin.open_file_write = open_file_write;
02215   plugin.write_structure = write_structure;
02216   plugin.write_timestep = write_timestep;
02217   plugin.close_file_write = close_file_write;
02218   plugin.close_file_read = close_pdbx_read;
02219   return VMDPLUGIN_SUCCESS;
02220 }
02221 
02222 VMDPLUGIN_API int VMDPLUGIN_register(void *v, vmdplugin_register_cb cb) {
02223   (*cb)(v, (vmdplugin_t *)&plugin);
02224   return VMDPLUGIN_SUCCESS;
02225 }
02226 
02227 VMDPLUGIN_API int VMDPLUGIN_fini() {
02228   return VMDPLUGIN_SUCCESS;
02229 }
02230 
02231 
02232 
02233 #ifdef TEST_PLUGIN
02234 
02235 int main(int argc, char *argv[]) {
02236   molfile_timestep_t timestep;
02237   pdbx_data *v;
02238   int natoms;
02239   int set;
02240 
02241 //  while (--argc) {
02242 
02243   struct timeval  tot1, tot2;
02244   gettimeofday(&tot1, NULL);
02245   if (*argv != NULL) {
02246     v = (pdbx_data*)open_pdbx_read(argv[1], "pdbx", &natoms);
02247   } else {
02248  //   v = (pdbx_data*)open_pdbx_read("/Users/Brendan/pdbx/3j3q.cif", "pdbx", &natoms);
02249     v = (pdbx_data*)open_pdbx_read("/home/brendanbc1/Downloads/3j3q.cif", "pdbx", &natoms);
02250   }
02251   if (!v) {
02252     fprintf(stderr, "main) open_pdbx_read failed for file %s\n", argv[1]);
02253     return 1;
02254   }
02255 #ifdef PDBX_DEBUG
02256   fprintf(stderr, "main) open_pdbx_read succeeded for file %s\n", argv[1]);
02257   fprintf(stderr, "main) number of atoms: %d\n", natoms);
02258 #endif
02259 
02260   set = 0;
02261   molfile_atom_t * atoms = new molfile_atom_t[natoms];
02262   int rc = read_pdbx_structure(v, &set, atoms);
02263   if (rc != MOLFILE_ERROR) {
02264 #ifdef PDBX_DEBUG
02265     printf("xyz structure successfully read.\n");
02266 #endif
02267   } else {
02268     fprintf(stderr, "main) error reading pdbx file\n");
02269     return -1;
02270   }
02271 
02272   timestep.coords = new float[3 * natoms];
02273   if (!read_pdbx_timestep(v, natoms, &timestep)) {
02274     fprintf(stderr, "main) open_pdbx_read succeeded for file %s\n", argv[1]);
02275   } else {
02276     fprintf(stderr, "main) Failed to read timestep\n");
02277   }
02278   int nbonds, nbondtypes;
02279   int* fromptr, *toptr, *bondtype;
02280   float* bondorder;
02281   char** bondtypename;
02282   read_bonds(v, &nbonds, &fromptr, &toptr,
02283              &bondorder, &bondtype,
02284              &nbondtypes, &bondtypename);
02285 
02286   const molfile_graphics_t* g_data;
02287   int n_graphics_elems;
02288   read_rawgraphics(v, &n_graphics_elems, &g_data);
02289 
02290   gettimeofday(&tot2, NULL);
02291   printf ("Total time to read file: %f seconds\n",
02292          (double) (tot2.tv_usec - tot1.tv_usec) / 1000000 +
02293          (double) (tot2.tv_sec - tot1.tv_sec));
02294   close_pdbx_read(v);
02295 
02296   printf("\nWriting file...\n\n");
02297   v = (pdbx_data*)open_file_write("/tmp/test.cif", 0, natoms);
02298 
02299 #ifdef PDBX_DEBUG
02300   printf("File opened for writing...\n");
02301 #endif
02302   write_structure(v, set, (const molfile_atom_t*)atoms);
02303 #ifdef PDBX_DEBUG
02304   printf("Structure information gathered...\n");
02305 #endif
02306   write_timestep(v, &timestep);
02307 #ifdef PDBX_DEBUG
02308   printf("File written...\n");
02309 #endif
02310   close_file_write(v);
02311 #ifdef PDBX_DEBUG
02312   printf("File closed.\n");
02313 #endif
02314   delete [] atoms;
02315   delete [] timestep.coords;
02316 
02317 #if 0
02318   printf("Writing pdbx.txt\n");
02319   x = timestep.coords; y = x+1;
02320   z = x+2;
02321   FILE *f;
02322   f = fopen("pdbx.txt", "w");
02323   for(i=0; i<natoms; i++) {
02324     fprintf(f, "%i %d %s %s %s %f %f %f\n", atoms[i].atomicnumber, atoms[i].resid, atoms[i].chain, atoms[i].resname, atoms[i].type, *x,*y,*z);
02325     //fprintf(stderr, "%i\t%s  %s\t%s  %s  %i  %f\t%f\t%f\t%f\t%f\t%f\n", i+1, atoms[i].name, atoms[i].type,
02326       //      atoms[i].chain, atoms[i].resname, atoms[i].resid, *x, *y, *z, atoms[i].occupancy, atoms[i].bfactor, atoms[i].charge);
02327     x+=3;
02328     y+=3;
02329     z+=3;
02330   }
02331   fclose(f);
02332   printf("main) pdbx.txt written\n");
02333 #endif
02334 
02335   return 0;
02336 }
02337 #endif