Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

Stride.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: Stride.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.51 $       $Date: 2021/11/18 07:33:43 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *  Stride interface class.
00019  *  STRIDE source code:
00020  *    http://webclu.bio.wzw.tum.de/stride/
00021  *    http://webclu.bio.wzw.tum.de/stride/stride.tar.gz
00022  *    https://en.wikipedia.org/wiki/STRIDE
00023  * 
00024  *  DSSP/xDSSP/HSSP family; 
00025  *    https://swift.cmbi.umcn.nl/gv/dssp/DSSP_3.html
00026  *    https://github.com/cmbi/dssp
00027  *    https://github.com/cmbi/hssp/releases
00028  *
00029  *  Paper on evaluation of secondary structure assignment methods:
00030  *    https://pubmed.ncbi.nlm.nih.gov/16164759/
00031  *
00032  *  Secondary structure assignment for conformationally irregular peptides:
00033  *    https://pubmed.ncbi.nlm.nih.gov/25424660/
00034  * 
00035  ***************************************************************************/
00036 
00037 #include <stdio.h>      // for tmpfile
00038 #include "DrawMolecule.h"
00039 #include "Timestep.h"
00040 #include "Residue.h"
00041 #include "Inform.h"
00042 #include "Stride.h"
00043 #include "utilities.h"  // for vmd_delete_file
00044 #include "VMDDir.h"
00045 #include "PeriodicTable.h"
00046 
00047 // 
00048 // Despite prolific compile-time and link-time warnings about
00049 // the use of APIs such as tempnam(), we need to use this approach
00050 // because VMD itself doesn't create the STRIDE output, so temp file
00051 // API variants that return an open file handle aren't useful to us,
00052 // and the potential race condition they're meant to protect against
00053 // is inherent when using system() or exec() calls to run an external
00054 // process on input/output files.
00055 //
00056 // Note: the caller must free the returned C string.
00057 //
00058 char * get_temporary_filename(void) {
00059   char * filename = NULL;
00060 #if defined(ARCH_MACOSXARM64) || defined(ARCH_MACOSXX86) || defined(ARCH_MACOSXX86_64)
00061   char *tmpstr = tmpnam(NULL);
00062   if (tmpstr !=NULL)
00063     filename = strdup(tmpstr);
00064 #else
00065   filename  = tempnam(NULL, NULL);
00066 #endif
00067   return filename;
00068 }
00069 
00070 
00071 // Write a PDB of the given timestep, stripping out all of the non-protein 
00072 // atoms since they are not used for secondary structure assignment.
00073 // The PDB input can be used with STRIDE, DSSP, and likely other SS programs.
00074 // Write the uniq_resid of the residues written to the stride input file
00075 // into the residues argument.
00076 int write_ss_input_pdb(DrawMolecule *mol, const char *inputfilename,
00077                        ResizeArray<int>& residues) {
00078   const float *pos = mol->current()->pos;
00079   char name[6], resname[5], atomicelement[4], chain[2];
00080  
00081   residues.clear();
00082 
00083   FILE *inputfile = fopen(inputfilename,"w");
00084   if (!inputfile) {
00085     msgErr << "Unable to open input file '" << inputfilename 
00086            << "' for writing." << sendmsg;
00087     return 1;
00088   }
00089 
00090   int prev = -1; // previous uniq_resid
00091   int atomcount = 0;
00092 
00093   for (int i=0; i < mol->nAtoms; i++) {
00094     MolAtom *atom = mol->atom(i);
00095     // skip if this atom isn't part of protein
00096     if (atom->residueType != RESPROTEIN)
00097       continue;
00098 
00099     strncpy(name, (mol->atomNames).name(atom->nameindex), 4);
00100     name[4]='\0';
00101 
00102     strncpy(resname,(mol->resNames).name(atom->resnameindex),4);  
00103     resname[4] = '\0';
00104 
00105     strncpy(atomicelement, get_pte_label(atom->atomicnumber), 4);  
00106     atomicelement[1] = ' ';
00107     atomicelement[2] = ' ';
00108     atomicelement[3] = '\0';
00109 
00110     chain[0] = (mol->chainNames).name(atom->chainindex)[0];
00111     chain[1] = '\0';
00112 
00113     if (atom->uniq_resid != prev) {
00114       prev = atom->uniq_resid;
00115       residues.append(prev);
00116     }
00117 
00118     const float *loc=pos + 3L*i;
00119     const char *emptycols = "                       ";
00120     if (fprintf(inputfile, "ATOM  %5d %-4s %-4s%c%4ld    %8.3f%8.3f%8.3f%s%3s\n",
00121                 ++atomcount, name, resname, chain[0], long(residues.num()-1),
00122                 loc[0], loc[1], loc[2], emptycols, atomicelement) < 0) {
00123       msgErr << "Error writing line in Stride/DSSP input file for atom " << i << sendmsg;
00124       return 1;
00125     }
00126   }
00127   fprintf(inputfile, "TER                                                                             \n");
00128 
00129   fclose(inputfile);
00130   return 0;
00131 }
00132 
00133 
00134 //
00135 // Here's the sprintf command stride uses on output in report.c:
00136 //    sprintf(Tmp,"ASG  %3s %c %4s %4d    %c   %11s   %7.2f   %7.2f   %7.1f",
00137 //            p->ResType,SpaceToDash(Chain[Cn]->Id),p->PDB_ResNumb,i+1,
00138 //            p->Prop->Asn,Translate(p->Prop->Asn),p->Prop->Phi,
00139 //            p->Prop->Psi,p->Prop->Solv);
00140 //
00141 //   Here's a typical line:
00142 //ASG  THR -    9    9    B        Bridge   -113.85    157.34      21.9      ~~~~
00143 // 
00144 
00145 static int read_stride_record( DrawMolecule *mol, 
00146                                const char *stridefile,
00147                                const ResizeArray<int> &residues ) { 
00148   FILE *in = fopen(stridefile, "rt");
00149   if (!in) {
00150     msgErr << "Unable to find Stride output file: "
00151            << stridefile << sendmsg;
00152     return 1;
00153   }
00154 
00155   const int BUF_LEN = 90;
00156   char buf[BUF_LEN], resname[4], chain[1], uniq_residstr[5], ss;
00157   int resid=0;
00158 
00159   while (fgets(buf, BUF_LEN, in)) {
00160     if (strncmp(buf, "ASG", 3))
00161       continue;
00162 
00163     sscanf(buf,"ASG  %3s %c %4s %4d    %c",
00164            resname, chain, uniq_residstr, &resid, &ss);
00165 
00166     int index = atoi(uniq_residstr);
00167     if (index < 0 || index >= residues.num()) {
00168       msgErr << "invalid resid found in stride output file!" << sendmsg;
00169       msgErr << "Error found in the following line: " << sendmsg;
00170       msgErr << buf << sendmsg;
00171       fclose(in);
00172       return -1;
00173     }
00174 
00175     int uniq_resid = residues[index];
00176     switch (ss) {
00177       case 'H': // H: helix
00178         mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA;
00179         break;
00180 
00181       case 'G': // G: 3-10 helix
00182         mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10;
00183         break; 
00184 
00185       case 'I': // I: PI helix
00186         mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI;
00187         break;
00188 
00189       case 'E': // E: Beta sheet
00190         mol->residueList[uniq_resid]->sstruct = SS_BETA;
00191         break;
00192 
00193       case 'B': // B: bridge
00194       case 'b': // b: bridge
00195         mol->residueList[uniq_resid]->sstruct = SS_BRIDGE;
00196         break;
00197 
00198       case 'T': // T: turn
00199         mol->residueList[uniq_resid]->sstruct = SS_TURN;
00200         break;
00201 
00202       default:
00203         // msgErr << "Internal error in read_stride_record\n" << sendmsg;
00204         mol->residueList[uniq_resid]->sstruct = SS_COIL;
00205     }
00206   }
00207 
00208   fclose(in);
00209   return 0;
00210 }  
00211 
00212 
00213 int ss_from_stride(DrawMolecule *mol) {
00214   int rc = 0;
00215   
00216   char *stridebin = getenv("STRIDE_BIN");
00217   if (!stridebin) {
00218     msgErr << "No STRIDE binary found; please set STRIDE_BIN environment variable" << sendmsg;
00219     msgErr << "to the location of the STRIDE binary." << sendmsg;
00220     return 1;
00221   }
00222 
00223   // check to see if the executable exists
00224   if (!vmd_file_is_executable(stridebin)) {
00225     msgErr << "STRIDE binary " << stridebin << " cannot be run; check permissions." << sendmsg;
00226     return 1;
00227   }
00228 
00229   char *infilename  = get_temporary_filename();
00230   char *outfilename = get_temporary_filename();
00231   if (infilename == NULL || outfilename == NULL) {
00232     msgErr << "Unable to create temporary files for STRIDE." << sendmsg;
00233     return 1;
00234   }
00235 
00236   char *stridecall = new char[strlen(stridebin)
00237                               +strlen(infilename)
00238                               +strlen(outfilename)
00239                               +16];
00240 
00241   sprintf(stridecall,"\"%s\" %s -f%s", stridebin, infilename, outfilename);
00242 
00243   ResizeArray<int> residues;
00244 
00245   if (write_ss_input_pdb(mol,infilename,residues)) {
00246     msgErr << "write_ss_input_pdb(): unable "
00247            << "to write input file for Stride\n" << sendmsg;
00248     rc = 1;
00249   }
00250 
00251   if (!rc) {
00252     vmd_system(stridecall);
00253 
00254     if (read_stride_record(mol,outfilename,residues)) {
00255       msgErr << "Stride::read_stride_record: unable "
00256              << "to read output file from Stride\n" << sendmsg;
00257       rc = 1;
00258     }
00259   }
00260 
00261   delete [] stridecall;
00262   vmd_delete_file(outfilename);
00263   vmd_delete_file(infilename);
00264   free(outfilename);
00265   free(infilename);
00266 
00267   return rc;
00268 } 
00269 
00270 
00271 
00272 //
00273 // DSSP implementation
00274 //
00275 
00276 /* 
00277   Dssp output format:
00278 return (kDSSPResidueLine % residue.GetNumber() % ca.mResSeq % ca.mICode % chainChar %     code % ss % helix[0] % helix[1] % helix[2] % bend % chirality % bridgelabel[0] %   bridgelabel[1] % bp[0] % bp[1] % sheet % floor(residue.Accessibility() + 0.5) %     NHO[0] % ONH[0] % NHO[1] % ONH[1] % residue.TCO() % residue.Kappa() % alpha %     residue.Phi() % residue.Psi() % ca.mLoc.mX % ca.mLoc.mY % ca.mLoc.mZ %       long_ChainID1 % long_ChainID2).str();
00279 
00280   This is the header line for the residue lines in a DSSP file:
00281 
00282   #  RESIDUE AA STRUCTURE BP1 BP2  ACC     N-H-->O    O-->H-N    N-H-->O    O-->H-N    TCO  KAPPA ALPHA  PHI   PSI    X-CA   Y-CA   Z-CA           CHAIN AUTHCHAIN
00283 */
00284 
00285 
00286 
00287 static int read_dssp_record(DrawMolecule *mol, 
00288                             const char *dsspfile,
00289                             const ResizeArray<int> &residues ) { 
00290   FILE *in = fopen(dsspfile, "rt");
00291   if (!in) {
00292     msgErr << "Unable to find DSSP output file: "
00293            << dsspfile << sendmsg;
00294     return 1;
00295   }
00296 
00297   const int BUF_LEN = 1024;
00298   char buf[BUF_LEN], atom_num[6], chain[2], uniq_residstr[5], ss;
00299   bool start_flag = false;
00300 
00301   while (fgets(buf, BUF_LEN, in)) {
00302     // check if the header line is reached
00303     if (!start_flag){
00304       if (strncmp(buf, "  #  RESIDUE", 12)) {
00305         continue;
00306       } else {
00307         if (fgets(buf, BUF_LEN, in) == NULL) {
00308           fclose(in);
00309           return -1; // failed 
00310         }
00311         start_flag = true;
00312       }
00313     }
00314 
00315     sscanf(buf,"%5s %4s %1s %c",
00316            atom_num, uniq_residstr, chain, &ss);
00317 
00318     ss = buf[16];
00319  
00320     int index = atoi(uniq_residstr);
00321     if (!index) 
00322       continue; // for the gap between chains in DSSP output
00323 
00324     if (index < 0 || index >= residues.num()) {
00325       msgErr << "invalid resid found in DSSP output file!" << sendmsg;
00326       msgErr << "Error found in the following line: " << sendmsg;
00327       msgErr << buf << sendmsg;
00328       fclose(in);
00329       return -1;
00330     }
00331 
00332     int uniq_resid = residues[index];
00333 #if 0
00334     msgErr << atom_num << " " << uniq_residstr << " " << chain <<  " " << ss << sendmsg;
00335     msgErr << buf << sendmsg;
00336 #endif
00337 
00338     switch (ss) {
00339       case 'H': // H
00340         mol->residueList[uniq_resid]->sstruct = SS_HELIX_ALPHA;
00341         break;
00342 
00343       case 'B': // B
00344         mol->residueList[uniq_resid]->sstruct = SS_BRIDGE;
00345         break; 
00346 
00347       case 'E': // E
00348         mol->residueList[uniq_resid]->sstruct = SS_BETA;
00349         break;
00350 
00351       case 'G': // G
00352         mol->residueList[uniq_resid]->sstruct = SS_HELIX_3_10;
00353         break;
00354 
00355       case 'I': // I
00356         mol->residueList[uniq_resid]->sstruct = SS_HELIX_PI;
00357         break;
00358 
00359       case 'T': // T
00360         mol->residueList[uniq_resid]->sstruct = SS_TURN;
00361         break;
00362 
00363       case 'S': // S
00364                 // this is the case where vmd doesn't cover
00365                 // structure code "bent" assigned by DSSP  
00366 
00367       default:
00368         msgErr << "Internal error in read_dssp_record\n" << sendmsg;
00369 //        printf("DSSP uniq_resid: %d  SS: %c\n", uniq_resid, ss);
00370 //        printf("DSSP buf: '%s'\n\n", buf);
00371         mol->residueList[uniq_resid]->sstruct = SS_COIL;
00372     }
00373   }
00374 
00375   fclose(in);
00376   return 0;
00377 }  
00378 
00379 
00380 
00381 
00382 int ss_from_dssp(DrawMolecule *mol) {
00383   int rc = 0;
00384   
00385   char *dsspbin = getenv("DSSP_BIN");
00386   if (!dsspbin) {
00387     msgErr << "No DSSP/XDSSP binary found; please set DSSP_BIN environment variable" << sendmsg;
00388     msgErr << "to the location of the DSSP binary." << sendmsg;
00389     return 1;
00390   }
00391 
00392   // check to see if the executable exists
00393   if (!vmd_file_is_executable(dsspbin)) {
00394     msgErr << "DSSP binary " << dsspbin << " cannot be run; check permissions." << sendmsg;
00395     return 1;
00396   }
00397 
00398   char *infilename  = get_temporary_filename();
00399   char *outfilename = get_temporary_filename();
00400   if (infilename == NULL || outfilename == NULL) {
00401     msgErr << "Unable to create temporary files for DSSP." << sendmsg;
00402     return 1;
00403   }
00404 
00405   char *dsspcall = new char[strlen(dsspbin)
00406                             +strlen(infilename)
00407                             +strlen(outfilename)
00408                             +16];
00409 
00410   sprintf(dsspcall,"\"%s\" -i %s -o %s -v", dsspbin, infilename, outfilename);
00411 
00412   ResizeArray<int> residues;
00413 
00414   if (write_ss_input_pdb(mol,infilename,residues)) {
00415     msgErr << "write_ss_input_pdb(): unable "
00416            << "to write input file for DSSP\n" << sendmsg;
00417     rc = 1;
00418   }
00419 
00420   if (!rc) {
00421     vmd_system(dsspcall);
00422 
00423     if (read_dssp_record(mol,outfilename,residues)) {
00424       msgErr << "Dssp::read_dssp_record: unable "
00425              << "to read output file from DSSP\n" << sendmsg;
00426       rc = 1;
00427     }
00428   }
00429 
00430   delete [] dsspcall;
00431   vmd_delete_file(outfilename);
00432   vmd_delete_file(infilename);
00433   free(outfilename);
00434   free(infilename);
00435   return rc;
00436 } 
00437 
00438 
00439 

Generated on Wed Oct 9 02:43:33 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002