#include "aaTools.h" // Calculates the avg CA dist betw residues globally numbered 'start' // and 'end' across all the sequences in the Alignment float getAvgCaDist(Alignment msa, int start, int end) { float dist=0; for(int i=0; i0; i--) { if(block[i-1]!=0 && block[i]!=0) block[i-1] = block[i]; if(block[i]>maxBlockLen) maxBlockLen = block[i]; } int* blockCount = new int[maxBlockLen+1]; for(i=0+1; i0 && block[i]!=block[i-1]) blockCount[block[i]]++; for(i=0+1; i0 && block[i]!=block[i-1]) { start = i; i++; while(block[i]==block[i-1]) i++; end = i-1; // printf("%d to %d\n", start, end); dist = getAvgCaDist(msa,start,end); printf("%d %f\n", block[i-1], dist); } } delete[] block; delete[] blockCount; return; } // Counts the number and length of ungapped blocks // in the original protein Alignment void getBlockStatsPwa(Alignment pw) { int i, k, n, flag; // int j,A,B; if(DB) printf("ENTERED getBlockStatsPwa\n"); int length = pw.maximumSequenceLength; int* blockCount = new int[length]; for(int i=0; i0; i--) { if(block[i-1]!=0 && block[i]!=0) block[i-1] = block[i]; if(block[i]>maxBlockLen) maxBlockLen = block[i]; } // now determine frequency of blocks of each length for(i=0; i0 && block[i]!=block[i-1]) blockCount[block[i]]++; for(i=0; i0 && block[i]!=block[i-1]) { start = i; i++; while(block[i]==block[i-1]) i++; end = i-1; // printf("%d to %d\n", start, end); if((start-end)*(start-end)>1) { k = 2*n; dist =.5*sqrt( pow( pw.sequences[k].structure.caCoordinates[start][0] - pw.sequences[k].structure.caCoordinates[end][0] , 2) + pow( pw.sequences[k].structure.caCoordinates[start][1] - pw.sequences[k].structure.caCoordinates[end][1] , 2) + pow( pw.sequences[k].structure.caCoordinates[start][2] - pw.sequences[k].structure.caCoordinates[end][2] , 2) ); k = 2*n+1; dist+=.5*sqrt( pow( pw.sequences[k].structure.caCoordinates[start][0] - pw.sequences[k].structure.caCoordinates[end][0] , 2) + pow( pw.sequences[k].structure.caCoordinates[start][1] - pw.sequences[k].structure.caCoordinates[end][1] , 2) + pow( pw.sequences[k].structure.caCoordinates[start][2] - pw.sequences[k].structure.caCoordinates[end][2] , 2) ); if(dist<30) globBDCount[(int)floor(dist*2)]++; } /* for(int m=0; m0; j--) { if(gap[i][j-1]!=0 && gap[i][j]!=0) gap[i][j-1] = gap[i][j]; if(gap[i][j]>maxGapLen) maxGapLen = gap[i][j]; } int* gapCount = new int[maxGapLen+1]; for(i=0; i0 && gap[i][j]!=gap[i][j-1]) gapCount[gap[i][j]]++; for(i=0+1; i0 && gap[i][j]!=gap[i][j-1]) { start = j; j++; while(gap[i][j]==gap[i][j-1]) j++; end = j-1; // printf("%d to %d\n", start, end); if(endmaxDist) maxDist = dist; printf("%d %f\n", gap[i][j-1], dist); } } } } for(i=0; i<20*10; i++) printf("%d.%d %d\n", i/10, i%10, distStats[i]); delete[] gap; delete[] gapCount; delete[] distStats; return; } // Converts a string (temp) to a float float charToFloat(char *temp) { int flag = 0; float ans = 0; int i, dec=0; for(i=0;i<(int)strlen(temp);i++) if(temp[i]=='.') dec = i; for(i=0;i<(int)strlen(temp);i++) { if(temp[i]==45) flag=1; if(temp[i]>=48 && temp[i]<=57) ans += pow((float)10,dec-i-1*(i=48 && temp[i]<=57) ans += (int)pow((float)10,(float)strlen(temp)-1-i)*(temp[i]-48); } if(flag) ans*=-1; return ans; } // Converts and integer num to a string of length len char* intToString(int num, int len) { int negative = 0; //int ans = 0; int i,j; float k; char *temp = new char[len+1]; if(num<0) negative=1; if(num<0) num*=-1; for(i=0; i=65 && s[i]<=90) s[i]+=32; } return s; } // pairToMult(Alignment pw, Alignment* msa) // - parameters : pw - name of the pairwise Alignment structure to be // converted // *msa - pointer to the msa structure which we will write to // - Gets data from pw. Uses the first sequence as the "master", and uses // the pairwise Alignments of the "master" with each "slave" to // construct a multiple sequence Alignment. // void pairToMult(Alignment pw, Alignment* msa) { int i,j,k,count,len,cFS; // int n; char **tempmsa; char *masterSeq = new char[10*pw.maximumSequenceLength]; char *masterSeqNoGap = new char[10*pw.maximumSequenceLength]; char *tempSeq = new char[10*pw.maximumSequenceLength]; int *nGaps= new int[10*pw.maximumSequenceLength]; int *tempnGaps= new int[10*pw.maximumSequenceLength]; msa->nSequences = pw.nProteins; if(msa->name!=NULL) delete[] msa->name; msa->name = new char[strlen(pw.name)+1]; // Construct master sequence w/o gaps strcpy(tempSeq,pw.sequences[0].residues); count = 0; for(i=0; i<(int)strlen(tempSeq); i++) if(tempSeq[i] != '-' && tempSeq[i] != '?') masterSeqNoGap[count++] = tempSeq[i]; masterSeqNoGap[count] = '\0'; // Figure out how many gaps the master seq should have // // Read one sequence at a time for(k=0; knGaps[i]) nGaps[i] = j-count; } } } // Now add all these gaps into the master sequence for(i=0; i<(int)strlen(masterSeqNoGap)+1; i++) { for(j=0; jmaximumSequenceLength = strlen(masterSeq); msa->sequences = new Sequence[msa->nSequences]; // initialize some other dynamic variables len = strlen(masterSeq); tempmsa = new char*[pw.nProteins]; for(i=0;isequences[0].setResidues(masterSeq); // Set name of 'master' seq here for msa msa->sequences[0].setName(pw.sequences[0].name); // Set name of other unique seqs here for msa for(i=1; isequences[i].setName(pw.sequences[2*i-1].name); // Read through each pairwise Alignment // Go through and align each slave sequence to the overall master // using it's local master for(i=1; isequences[i].residues,"-"); //BUG tempSeq[0] = '\0'; for(k=0; ksequences[i].residues,tempSeq); } } delete[] tempmsa; delete[] masterSeq; delete[] masterSeqNoGap; delete[] tempSeq; delete[] nGaps; delete[] tempnGaps; return; } // multToPair(Alignment msa, Alignment *pw) // - parameters : msa - name of the msa structure to be // converted // *pw - pointer to the PW structure which we will write to // - Gets data from msa. Uses the first sequence as the "master", and uses // the pairwise Alignments of the "master" with each "slave" to // construct a multiple sequence Alignment. FIX // void multToPair(Alignment msa, Alignment *pw) { if(DB) printf("entering multToPair\n"); //int oldNSequences = pw->nSequences; //int oldMaxSeqLen = pw->maximumSequenceLength; pw->nProteins = msa.nSequences; pw->nSequences = msa.nSequences*(msa.nSequences-1); pw->maximumSequenceLength = msa.maximumSequenceLength; if(pw->name!=NULL) delete[] pw->name; pw->name = new char[strlen(msa.name)+1]; strcpy(pw->name, msa.name); if(pw->sequences!=NULL) delete[] pw->sequences; pw->sequences = new Sequence[pw->nSequences]; int count=0; for(int i=0; isequences[count].setName(msa.sequences[i].name); pw->sequences[count+1].setName(msa.sequences[j].name); for(int k=0, r=0; ksequences[count].residues[r]=msa.sequences[i].residues[k]; pw->sequences[count+1].residues[r]=msa.sequences[j].residues[k]; r++; } } } if(DB) printf("leaving multToPair\n"); } // Accepts a character, returns a 2 character string containing // the accepted character and the NULL character char * charToString(char i) { char *temp = new char[2]; temp[0] = i; temp[1] = '\0'; return temp; } float corr2(Matrix m1, Matrix m2) { int N=m1.N; float A=0,B=0; for(int i=0; iqScore[i]) qScore[i] = qMat[j][k]; printf("%d %f %f\n",i,jScore[i], qScore[i]); temp.nSequences--; } }