/***************************************************************************** * * Copyright (C) 2005 The Board of Trustees of the * University of Illinois * All Rights Reserved * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License located in the COPYING file for more * details. * ******************************************************************************/ /***************************************************************************** * RCS INFORMATION: * * $RCSfile: seqqr.cpp,v $ * $Author: erobert3 $ $Locker: $ $State: Exp $ * $Revision: 1.1.2.8 $ $Date: 2005/03/02 22:23:23 $ * ******************************************************************************/ // $Id: seqqr.cpp,v 1.1.2.8 2005/03/02 22:23:23 erobert3 Exp $ #include #include #include "symbol.h" #include "alphabet.h" #include "alphabetBuilder.h" #include "sequence.h" #include "alignedSequence.h" #include "sequenceAlignment.h" #include "fastaReader.h" #include "fastaWriter.h" #include "sequenceQR.h" #include "seqqr.h" /** * The name of the FASTA file that contains the alignment of which to get an evolutionary proifle. */ char *inputFilename = NULL; /** * The name of the file to which the evolutionary profile will be written. */ char *outputFilename = NULL; /** * The percent identity at which to stop including sequences in the profile. */ float identityCutoff = 0.4; /** * The number of sequences whose ordering should be preserved at the beginning of the matrix. */ int preserveCount = 0; /** * If gap scaling should be performed. */ int performGapScaling = 1; /** * The gap scale parameter used in scaling the gap values. */ float gapScaleParameter = 1.0; /** * Entry point for the seqqr program. This program calculates an evolutionary profile given a file * containing aligned sequences in FASTA format. * * @param argc The argument count. * @param argv The argument values. */ int main(int argc, char** argv) { //Print the copyright notice. printCopyright(argc, argv); //Parse the arguments. if (!parseArgs(argc, argv)) { //If the arguments were not valid, print the usage and exit. printUsage(argc, argv); exit(1); } //Create an alphabet for reading the sequences. AlphabetBuilder* alphabetBuilder = new AlphabetBuilder(); Alphabet* alphabet = alphabetBuilder->getProteinAlphabet(); //Set the file to be read. FASTAReader* reader = new FASTAReader(alphabet); reader->setPath(""); if (reader->setFilename(inputFilename) == 0) { printf("Error: file %s was not found.\n", inputFilename); exit(1); } //Read the sequences. SequenceAlignment* alignment = reader->getSequenceAlignment(); if (alignment == NULL) { printf("Error: file %s was not a valid FASTA file.\n", inputFilename); exit(1); } printf("Read alignment from '%s': %d sequences of length %d\n", inputFilename, alignment->getSequenceCount(), alignment->getLength()); //Make sure all of the sequences are of the same length. int length = -1; if (alignment->getSequenceCount() > 0) length = alignment->getSequence(0)->getLength(); for (int i=1; igetSequenceCount(); i++) { if (alignment->getSequence(i)->getLength() != length) { printf("Error: file %s did not contain a sequence alignment.\n", inputFilename); exit(1); } } //Calculate the evolutionary profile. SequenceQR* sequenceQR = new SequenceQR(alignment, identityCutoff, preserveCount, performGapScaling, gapScaleParameter); SequenceAlignment* profile = sequenceQR->qr(); //Write out the profile. FASTAWriter writer; writer.writeSequenceAlignment(outputFilename, profile, 60); printf("An evolutionary profile has been created in '%s' with %d sequences\n", outputFilename, profile->getSequenceCount()); //Free any used memory. delete alphabetBuilder; delete alphabet; delete reader; delete profile; } /** * This functions parses the arguments passed in to the seqqr program. It stores the parsed values * in the appropriate global variables and returns whether the parsing was successful. * * @param argc The argument count. * @param argv The argument values. * @return 1 if the parsing was successful, otherwise 0. */ int parseArgs(int argc, char** argv) { //Parse any arguments. for (int i=1; i 100.0) { printf("Error: identity cutoff value %0.2f is not valid. Value must be between 1\n", newIdentityCutoff); printf("and 100.\n"); exit(0); } identityCutoff = newIdentityCutoff/100.0; } else { return 0; } } else if (strcmp(option, "-p") == 0 || strcmp(option, "--preserve") == 0) { if (i < argc-1){ preserveCount = atoi(argv[++i]); if (preserveCount < 0) return 0; } else { return 0; } } else if (strcmp(option, "-n") == 0 || strcmp(option, "--no-scaling") == 0) { performGapScaling = 0; } //If this is the next to last argument and it is not an option, it must be the input filename. else if (i == argc-2) { inputFilename = option; } //If this is the last argument and it is not an option, it must be the output filename. else if (i == argc-1) { outputFilename = option; } //This must be an invalid option. else { return 0; } } //Make sure we have a valid state for all of the variables. if (inputFilename != NULL && outputFilename != NULL) return 1; //Something must be invalid, return 0; return 0; } /** * This function prints the copyright notice. */ void printCopyright(int argc, char** argv) { printf("%s v%s\n", argv[0], VERSION); printf("Copyright (C) 2003-2005 The Board of Trustees of the University of Illinois.\n"); printf("When publishing a work based in whole or in part on this program, please\n"); printf("reference: Sethi, A., O'Donoghue, P. & Luthey-Schulten, Z. (2005) 'Evolutionary\n"); printf("profiles from the QR factorization of multiple sequence alignments' Proc. Natl.\n"); printf("Acad. Sci. USA 102, 4045-4050.\n"); printf("\n"); } /** * This function prints the usage for the seqqr program. */ void printUsage(int argc, char** argv) { printf("usage: %s [OPTIONS] input_file output_file\n", argv[0]); printf("\n"); printf(" input_file A FASTA formatted file containing the aligned\n"); printf(" sequences on which to perform the evolutionary\n"); printf(" profiling.\n"); printf(" output_file The file to which the evolutionary profile\n"); printf(" should be written. The file will be in FASTA\n"); printf(" format.\n"); printf("\n"); printf(" Where OPTIONS is one or more of the following:\n"); printf(" -i value --id-cutoff value The percent identity cutoff. (default=40)\n"); printf(" -p value --preserve value Preserves the ordering of the first [value]\n"); printf(" sequences.\n"); printf(" -s value --gap-scale value Sets the gap scaling parameter. (default=1.0)\n"); printf(" -n --no-scaling Turns off gap scaling. Gaps will be value 1.\n"); printf(" -h --help Print this message.\n"); printf(" -v --version Print the version and reference information.\n"); printf("\n"); }