#include "alignment.h" #include "typeConvert.h" //#include "aaTools.h" // default constructor Alignment::Alignment() { if(DB) printf("entering default Alignment constructor\n"); name = new char[30]; strcpy(name, "default"); nSequences = 0; nProteins = 0; maximumSequenceLength = 0; sequences = 0; if(DB) printf("leaving default Alignment constructor\n"); } // constructor, assigns Alignment name to char* s Alignment::Alignment(char *s) { if(DB) printf("entering Alignment char* constructor\n"); if(strlen(s)>30) { printf("error in Alignment::Alignment(char*): Alignment::name too long\n"); return; } name = new char[30]; strcpy(name, s); nSequences = 0; nProteins = 0; maximumSequenceLength = 0; sequences = 0; if(DB) printf("leaving Alignment char* constructor\n"); } // Destructor Alignment::~Alignment() { if(DB) printf("entering Alignment destructor\n"); if(name!=0) delete[] name; if(sequences!=0) delete[] sequences; if(DB) printf("leaving Alignment destructor\n"); } // copy constructor Alignment::Alignment(const Alignment& m) { if(DB) printf("entering Alignment copy constructor\n"); if(m.name==0) name=0; else { name = new char[strlen(m.name)+1]; strcpy(name, m.name); } nSequences = m.nSequences; nProteins = m.nProteins; maximumSequenceLength = m.maximumSequenceLength; if(m.sequences==0) sequences=0; else { sequences = new Sequence[nSequences]; for(int i=0; i tempF"); system(temp); delete[] temp; fp = fopen("tempF", "r"); do { fgets(temp1, 200, fp); } while(temp1[2] != '#'); k = 0; for(int j=0; j%s\n", sequences[j].name); //printf(">%s\n", sequences[j].name, temp); for(int i = 0; i < maximumSequenceLength/60+1; i++) { strncpy(temp, sequences[j].residues+i*60,60); temp[60]='\0'; if(strlen(temp)>0) printf("%s\n", temp); } if(strlen(temp)>0) printf("\n"); } delete[] temp; } void Alignment::printMsa() { char *temp = new char[maximumSequenceLength+1]; for(int i = 0; i < maximumSequenceLength/60+1; i++) { for(int j = 0; j < nSequences; j++) { strncpy(temp, sequences[j].residues+i*60,60); temp[60]='\0'; if(strlen(temp)>0) printf("%s: %s\n", sequences[j].name, temp); } if(strlen(temp)>0) printf("\n"); } delete[] temp; } void Alignment::printPwa() { if(DB) printf("entering Alignment::printPwa\n"); char *temp= new char[maximumSequenceLength+1]; for(int k = 0; k < nSequences/2; k++) { for(int i = 0; i < sequences[2*k].length/60+1; i++) { for(int j = 0; j < 2; j++) { strncpy(temp, sequences[2*k+j].residues+i*60,60); temp[60]='\0'; if(strlen(temp)>0) printf("%s: %s\n", sequences[2*k+j].name, temp); } if(strlen(temp)>0) printf("\n"); } printf("\n"); } delete[] temp; if(DB) printf("leaving Alignment::printPwa\n"); } // Reads in information from a PairWise or Multiple Sequence Alignment // Expects to find sequences written in FASTA format void Alignment::readFromFile(char *s) { if(DB) printf("entering Alignment::readFromFile\n"); // clear old data first nSequences = 0; nProteins = 0; maximumSequenceLength = 0; sequences = 0; //std::ifstream inFile; //inFile.open(s,std::ios::in); FILE *inFile = fopen(s, "r"); int count=0; // int maxSeqLines=0; char *temp = new char[MSL+1]; char *temp2 = new char[MSL+1]; //while(inFile.getline(temp, MSL)) { while ( fgets(temp,MSL,inFile) ) { if (strstr(temp,"space")!=0) break; if (temp[0] == '>') { nSequences++; count=0; printf("%d\n",nSequences); } else count++; //if(count>maxSeqLines) // maxSeqLines = count; } fclose(inFile); //maximumSequenceLength = maxSeqLines*60; sequences = new Sequence[nSequences]; //inFile.seekg(0, std::ios::beg); //inFile.clear(); //inFile.seekg(0, std::ios::beg); //inFile.getline(temp,60+1); inFile = fopen(s, "r"); fgets(temp,60+1,inFile); for (int i=0; i0) break; } while (temp[0] != '>'); sequences[i].setResidues(temp2); strcpy(temp2,"\0"); } fclose(inFile); // for(int i=0; imaximumSequenceLength) maximumSequenceLength=sequences[i].length; // default for msa nProteins = nSequences; // check if Pwa, then nProteins is number of unique proteins if(nSequences>2) if(sequences[1].length!=sequences[2].length) nProteins = (int)floor(.1+.5*(1+sqrt((float)1+4*nSequences))); delete[] temp; delete[] temp2; if(DB) printf("leaving Alignment::readFromFile\n"); } /* // Reads in information from a PairWise or Multiple Sequence Alignment // Expects to find sequences written in FASTA format void Alignment::readFromFile(char *s) { if(DB) printf("entering Alignment::readFromFile\n"); // clear old data first nSequences = 0; nProteins = 0; maximumSequenceLength = 0; sequences = 0; std::ifstream inFile; inFile.open(s,std::ios::in); int count=0, maxSeqLines=0; char *temp = new char[MSL+1]; char *temp2 = new char[MSL+1]; while(inFile.getline(temp, MSL)) { if(strstr(temp,"space")!=0) break; if(temp[0] == '>') { nSequences++; count=0; //printf("%d\n",nSequences); } else count++; if(count>maxSeqLines) maxSeqLines = count; } maximumSequenceLength = maxSeqLines*60; sequences = new Sequence[nSequences]; inFile.seekg(0, std::ios::beg); inFile.clear(); inFile.seekg(0, std::ios::beg); inFile.getline(temp,60+1); for(int i=0; imaximumSequenceLength) maximumSequenceLength=sequences[i].length; // default for msa nProteins = nSequences; // check if Pwa, then nProteins is number of unique proteins if(nSequences>2) if(sequences[1].length!=sequences[2].length) nProteins = (int)floor(.1+.5*(1+sqrt((float)1+4*nSequences))); delete[] temp; delete[] temp2; if(DB) printf("leaving Alignment::readFromFile\n"); } */ // Read and parse pdb file void Alignment::readCaCoordinates() { //FILE *fpError = fopen("pdbError","w"); //fprintf(fpError,""); //fclose(fpError); for(int i=0;i-1 && isGap[i]==true; i--) ; if(DB) printf("leaving Sequence::getLeft\n"); if(i==-1) return -1; // return residuesIndexToResiduesWithoutGapsIndex[i]; return i; } // Returns the index in residuesWithoutGaps of the residue // immediately following the gapped position pos int Sequence::getRight(int pos) { if(DB) printf("entering Sequence::getRight\n"); if(residues==0) return -1; int i; for(i=pos; i0 && num-numL!=1) { r = sqrt( pow(tempCaCoordinates[i][0] - tempCaCoordinates[i-1][0],2) + pow(tempCaCoordinates[i][1] - tempCaCoordinates[i-1][1],2) + pow(tempCaCoordinates[i][2] - tempCaCoordinates[i-1][2],2) ); fprintf(fpError,"WARNING: In file %s, CA neighbors %d and %d\n",tName,numL,num); if(r>3.000) { fprintf(fpError,"CA distance = %1.3f > 3.000, so assuming consecutive residues\n",r); } else { fprintf(fpError,"CA distance = %1.3f < 3.000, so assuming duplicate residues\n",r); i--; } fprintf(fpError,"%s%s",tempL,temp); } i++; numL=num; strcpy(tempL,temp); } length = i; } fclose(fp); // now reallocate memory correctly; wasteful - should be fixed caCoordinates = new float*[length]; for(i=0;i