/*****************************************************************************
*
*            Copyright (C) 2005 The Board of Trustees of the
*                        University of Illinois
*                         All Rights Reserved
*
*    This program is free software; you can redistribute it and/or
*    modify it under the terms of the GNU Lesser General Public
*    License as published by the Free Software Foundation; either
*    version 2.1 of the License, or (at your option) any later version.
*
*    This library is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*    Lesser General Public License located in the COPYING file for more
*    details.
*
******************************************************************************/

/*****************************************************************************
* RCS INFORMATION:
*
*       $RCSfile: seqqr.cpp,v $
*       $Author: erobert3 $        $Locker:  $             $State: Exp $
*       $Revision: 1.1.2.8 $           $Date: 2005/03/02 22:23:23 $
*
******************************************************************************/

// $Id: seqqr.cpp,v 1.1.2.8 2005/03/02 22:23:23 erobert3 Exp $

#include <stdlib.h>
#include <stdio.h>
#include "symbol.h"
#include "alphabet.h"
#include "alphabetBuilder.h"
#include "sequence.h"
#include "alignedSequence.h"
#include "sequenceAlignment.h"
#include "fastaReader.h"
#include "fastaWriter.h"
#include "sequenceQR.h"
#include "seqqr.h"

/**
 * The name of the FASTA file that contains the alignment of which to get an evolutionary proifle.
 */
char *inputFilename = NULL;

/**
 * The name of the file to which the evolutionary profile will be written.
 */
char *outputFilename = NULL;

/**
 * The percent identity at which to stop including sequences in the profile.
 */
float identityCutoff = 0.4;

/**
 * The number of sequences whose ordering should be preserved at the beginning of the matrix.
 */
int preserveCount = 0;

/**
 * If gap scaling should be performed.
 */
int performGapScaling = 1;

/**
 * The gap scale parameter used in scaling the gap values.
 */
float gapScaleParameter = 1.0;


/**
 * Entry point for the seqqr program. This program calculates an evolutionary profile given a file
 * containing aligned sequences in FASTA format.
 *
 * @param   argc    The argument count.
 * @param   argv    The argument values.
 */
int main(int argc, char** argv) {
    
    //Print the copyright notice.
    printCopyright(argc, argv);
    
    //Parse the arguments.
    if (!parseArgs(argc, argv)) {
        
        //If the arguments were not valid, print the usage and exit.
        printUsage(argc, argv);
        exit(1);
    }

    //Create an alphabet for reading the sequences.
    AlphabetBuilder* alphabetBuilder = new AlphabetBuilder();
    Alphabet* alphabet = alphabetBuilder->getProteinAlphabet();

    //Set the file to be read.    
    FASTAReader* reader = new FASTAReader(alphabet);
    reader->setPath("");
    if (reader->setFilename(inputFilename) == 0) {
        printf("Error: file %s was not found.\n", inputFilename);
        exit(1);
    }
    
    //Read the sequences.
    SequenceAlignment* alignment = reader->getSequenceAlignment();
    if (alignment == NULL) {
        printf("Error: file %s was not a valid FASTA file.\n", inputFilename);
        exit(1);
    }
    printf("Read alignment from '%s': %d sequences of length %d\n", inputFilename, alignment->getSequenceCount(), alignment->getLength());
    
    //Make sure all of the sequences are of the same length.
    int length = -1;
    if (alignment->getSequenceCount() > 0) length = alignment->getSequence(0)->getLength();
    for (int i=1; i<alignment->getSequenceCount(); i++) {
        if (alignment->getSequence(i)->getLength() != length) {
            printf("Error: file %s did not contain a sequence alignment.\n", inputFilename);
            exit(1);            
        }
    }
    
    //Calculate the evolutionary profile.
    SequenceQR* sequenceQR = new SequenceQR(alignment, identityCutoff, preserveCount, performGapScaling, gapScaleParameter);
    SequenceAlignment* profile = sequenceQR->qr();
    
    //Write out the profile.
    FASTAWriter writer;
    writer.writeSequenceAlignment(outputFilename, profile, 60);
    printf("An evolutionary profile has been created in '%s' with %d sequences\n", outputFilename, profile->getSequenceCount());

    //Free any used memory.
    delete alphabetBuilder;
    delete alphabet;
    delete reader;
    delete profile;
}

/**
 * This functions parses the arguments passed in to the seqqr program. It stores the parsed values
 * in the appropriate global variables and returns whether the parsing was successful.
 *
 * @param   argc    The argument count.
 * @param   argv    The argument values.
 * @return  1 if the parsing was successful, otherwise 0.
 */
int parseArgs(int argc, char** argv) {
    
    //Parse any arguments.        
    for (int i=1; i<argc; i++) {
        
        char *option = argv[i];
        
        //See if the user is trying to get help.
        if (strcmp(option, "-h") == 0 || strcmp(option, "--help") == 0) {
            return 0;
        }
        else if (strcmp(option, "-v") == 0 || strcmp(option, "--version") == 0) {
            exit(0);
        }
        else if (strcmp(option, "-s") == 0 || strcmp(option, "--gap-scale") == 0) {
            if (i < argc-1){
                gapScaleParameter = atof(argv[++i]);
                if (gapScaleParameter == 0.0) return 0;
            } else {
                return 0;
            }
        }
        else if (strcmp(option, "-i") == 0 || strcmp(option, "--id-cutoff") == 0) {
            if (i < argc-1){
                float newIdentityCutoff = atof(argv[++i]);
                if (newIdentityCutoff < 1.0 || newIdentityCutoff > 100.0) {
                    printf("Error: identity cutoff value %0.2f is not valid. Value must be between 1\n", newIdentityCutoff);
                    printf("and 100.\n");
                    exit(0);
                }
                identityCutoff = newIdentityCutoff/100.0;
            } else {
                return 0;
            }
        }
        else if (strcmp(option, "-p") == 0 || strcmp(option, "--preserve") == 0) {
            if (i < argc-1){
                preserveCount = atoi(argv[++i]);
                if (preserveCount < 0) return 0;
            } else {
                return 0;
            }
        }
        else if (strcmp(option, "-n") == 0 || strcmp(option, "--no-scaling") == 0) {
            performGapScaling = 0;
        }
            
        //If this is the next to last argument and it is not an option, it must be the input filename.
        else if (i == argc-2) {
            inputFilename = option;
        }
        
        //If this is the last argument and it is not an option, it must be the output filename.
        else if (i == argc-1) {
            outputFilename = option;
        }
            
        //This must be an invalid option.
        else {
            return 0;
        }
    }
    
    //Make sure we have a valid state for all of the variables.
    if (inputFilename != NULL && outputFilename != NULL)
        return 1;
    
    //Something must be invalid, return 0;
    return 0;
}

/**
 * This function prints the copyright notice.
 */
void printCopyright(int argc, char** argv) {
    printf("%s v%s\n", argv[0], VERSION);
    printf("Copyright (C) 2003-2005 The Board of Trustees of the University of Illinois.\n");
    printf("When publishing a work based in whole or in part on this program, please\n");
    printf("reference: Sethi, A., O'Donoghue, P. & Luthey-Schulten, Z. (2005) 'Evolutionary\n");
    printf("profiles from the QR factorization of multiple sequence alignments' Proc. Natl.\n");
    printf("Acad. Sci. USA 102, 4045-4050.\n");
    printf("\n");
}


/**
 * This function prints the usage for the seqqr program.
 */
void printUsage(int argc, char** argv) {
    printf("usage: %s [OPTIONS] input_file output_file\n", argv[0]);
    printf("\n");
    printf(" input_file       A FASTA formatted file containing the aligned\n");
    printf("                  sequences on which to perform the evolutionary\n");
    printf("                  profiling.\n");
    printf(" output_file      The file to which the evolutionary profile\n");
    printf("                  should be written. The file will be in FASTA\n");
    printf("                  format.\n");
    printf("\n");
    printf(" Where OPTIONS is one or more of the following:\n");
    printf("   -i value  --id-cutoff value   The percent identity cutoff. (default=40)\n");
    printf("   -p value  --preserve value    Preserves the ordering of the first [value]\n");
    printf("                                 sequences.\n");
    printf("   -s value  --gap-scale value   Sets the gap scaling parameter. (default=1.0)\n");
    printf("   -n        --no-scaling        Turns off gap scaling. Gaps will be value 1.\n");
    printf("   -h --help                     Print this message.\n");
    printf("   -v --version                  Print the version and reference information.\n");
    printf("\n");
}

