/*	
 *	Relative Reaction Rate Estimator (rrre)
 *	Version 1.0 beta
 *	Last modified 3/23/2013 by Mahmoud Moradi
 *  	This program analyzes the Steered Molecular Dynamics (SMD) trajectories.
 *  	It uses the Hummor-Szabo estimator to measure free energy (perturbed and unperturbed).
 *  	Both uni- and bidirectional estimators are implemented.
 *  	If the trajectories can be classified into a limited number of paths, this program can measures their relative rates/probabilities.
 *  	Usage: ./rrre ParameterFile < input > output
*/

#include <stdio.h>
#include <cmath>
#include <math.h>
#include <time.h>
#include <cassert>
#include <iostream>
#include <iomanip>
#include <fstream>

// Boltzman constant kB (kcal/(mol*Kelvin))
#define boltzman 0.001987191683

using namespace std;

namespace {

    double temperature = 300.0; // in Kelvin
    int paths = 1; // number of pathway classes
    double x0 = 0; // initial value of collective variable (colvar)
    double x1 = 1; // final value of colvar
    double dx = 0.1; // bin size for the colvar histogram
    double kernel = 0.0; // kernel width for the colvar histogram
    int kWIDTH = 0; // kernel width for the colvar histogram scaled by 1/dx
    double kHEIGHT = 1.0; // kernel height for the colvar histogram (to normalize the sum)
    int S = 10; // number of snapshots stored for each run
    double harmonic = 1; // harmonic constant in (kcal/mol)/X^2 (X: colvar unit)
    double ddf = 0.1; // precision of free energy to determine convergence
    int iteration = 100; // maximum number of iterations for self-consistent algorithm
    int R = 0; // number of iterations per report in the self-consistent algorithm (R=0: no report except for the final one)
    int method = 0; // unidirectional/bidirectional 0/1
    int adj = 0;  // adjustment method: forward, reverse, average, optimized (to be implemented)
    int cx = 1; // column in which colvar is stored in the input file
    int cw = 2; // column in which work is stored in the input file
    int Nmax = 10000; // Maximum number of runs to be considered for the calculations
    double* scale; // (d\phi_i/d\lambda)^2 vector assuming it is constant along the path
    double* sigma; // enhancement factor inverse
    double beta = 1/(boltzman*temperature); // inverse temperature
    int L = int((x1-x0)/dx); // number of bins
    double k = harmonic * dx * dx; // scaled harmonic constant

    double* w; // all work measurements
    int* x; // all colvar measurements (scaled to integer values)
    int* m; // direction of the run
    int* p; // pathway of the run
    int* N; // number of runs for each direction/path
    double* W; // total work calues
    double* F; // Perturbed free energy
    double* G; // Unperturbed free energy

    // loading data from a file
    int* loadfile (const char* myfile) {
	int i = 0;
	int* I= new int[Nmax];
	ifstream f (myfile);
	if (f) {
	    while(i<Nmax&&!f.eof()) {
		f >> I[i];
		i++;
	    }
	} else {
	    std::cerr << "Error: Could not open " << myfile << " file!\n";
	    exit(EXIT_FAILURE);
	}
	Nmax=i;
	return I;
	delete[] I;
    }

    // reading the parameter file
    void loadparm (char* parmfile) {
	ifstream parm (parmfile);
	if (parm) {
	    string key,key1,key2,key3;
	    int n;
	    std::cout << "# Reading parameter file: " << parmfile << "\n";
	    while(!parm.eof()) {
		parm >> key;
		if (key=="/temperature") {
		    parm >> temperature;
		} else if (key=="/runs") {
		    parm >> Nmax;
		} else if (key=="/protocol") {
		    parm >> key1;
		    method=(key1=="bidirectional");
		    if (method) {
			m = new int[Nmax];
			parm >> key2;
			if (key2=="file") {
			    parm >> key3;
			    const char* myfile = key3.c_str();
			    m=loadfile(myfile);
			} else if (key2=="alternate")
			    for (int i=0; i<Nmax; i++) m[i]=i%2;
			else if (key2=="sorted") {
			    parm >> n;
			    for (int i=0; i<n; i++) m[i]=0;
			    for (int i=n; i<Nmax; i++) m[i]=1;
			}
		    }
		} else if (key=="/pathways") {
		    parm >> paths;
		    if (paths>1) {
    			scale = new double[paths+1];
			for (int P=0; P<=paths; P++) scale[P]=1;
			p = new int[Nmax];
			parm >> key1;
			// reading them from a file
			if (key1=="file") {
			    parm >> key2;
			    const char* myfile = key2.c_str();
			    p=loadfile(myfile);
			// the trajectories are sorted based on their classes (last class omitted since it is Nmax-sum_of_the_others)
			} else if (key1=="sorted") {
			    int n_=0;
			    for (int P=1; P<=paths; P++) {
				if (P<paths)
				    parm >> n;
				else
				    n=Nmax;
				for (int i=n_; i<n; i++)
				    p[i]=P;
			    }
			// same as "sorted" but separate for each direction (two sets of numbers need to be given)
			// this has to come after the direction otherwise will be ignored
			// the order: NF_1 NF_2 ... NR_1 NR_2 ...
			} else if (key1=="separate") {
			    int i=0;
			    for (int P=1; P<=paths; P++) {
				if (P<paths)
				    parm >> n;
				else
				    n=Nmax;
				int j=0;
				while(i<Nmax&&j<n) {
				    if (m[i]==0) {
					p[i]=P;
					j++;
				    }
				    i++;
				}
			    }
			    if (method) {
    				i=0;
				for (int P=1; P<=paths; P++) {
				    if (P<paths)
					parm >> n;
				    else
					n=Nmax;
				    int j=0;
			    	    while(i<Nmax&&j<n) {
					if (m[i]==0) {
					    p[i]=P;
				    	    j++;
					}
					i++;
			    	    }
			    	}
			    }
			}
		    }
    		} else if (key=="/initial") {
		    parm >> x0;
		} else if (key=="/final") {
		    parm >> x1;
		} else if (key=="/bin") {
		    parm >> dx;
		} else if (key=="/kernel") {
		    parm >> kernel;
		} else if (key=="/snapshots") {
		    parm >> S;
		} else if (key=="/harmonic") {
		    parm >> harmonic;
		} else if (key=="/convergence") {
		    parm >> ddf;
		} else if (key=="/iteration") {
		    parm >> iteration;
		} else if (key=="/report") {
		    parm >> R;
		// must come after pathways
		} else if (key=="/metric") {
		    for (int P=1;P<=paths;P++)
			parm >> scale[P];
		} else if (key=="/column_x") {
		    parm >> cx;
		} else if (key=="/column_w") {
		    parm >> cw;
		} else if (key=="/adjust") {
		    parm >> key1;
		    if (key1=="reverse")
			adj=1;
		    else if (key1=="average")
			adj=2;
		    else if (key1=="optimized")
			adj=3;
		}
	    }
	    parm.close();
	    // update the parameters
	    beta = 1/(boltzman*temperature);
	    L = int((x1-x0)/dx);
	    k = harmonic * dx * dx;
	    if (kernel>0) {
		kWIDTH=int(kernel/dx);
		kHEIGHT=1/(kWIDTH*sqrt(8*atan(1.0)));
	    } else {
		kernel=0;
	    }
	    // report
	    std::cout << "# Done with loading the parameters!\n";
	} else {
	    std::cerr << "Error: Could not open " << parmfile << " file!\n";
	    exit(EXIT_FAILURE);
	}
    }

    // reporting the parameter file
    void reportparm () {
	std::cout << "# Reporting the parameters:\n";
	std::cout << "# Temperature: " << temperature << " Kelvin\n";
	std::cout << "# Declared number of runs: " << Nmax << "\n";
	std::cout << "# Protocol: ";
	if (method)
	    std::cout << "Bidirectional" << "\n";
	else
	    std::cout << "Unidirectional" << "\n";
	std::cout << "# Number of pathway classes: " << paths << "\n";
	std::cout << "# Colvar (initial,target) value: (" << x0 << "," << x1 << ")\n";
	std::cout << "# Colvar bin size: " << dx << " (" << L << " bins)\n";
	if (kernel>0)
	    std::cout << "# Using a Gaussian Kernel of width: " << kernel << " for the histograms.\n";
    	std::cout << "# Number of snapshots: " << S << " per run\n";
	std::cout << "# Force constant: " << harmonic << " kcal/mol * (1/ColvarUnit)^2\n";
	std::cout << "# Convergence precision (for self-consistent solution of free energies): " << ddf << " kcal/mol\n";
	std::cout << "# Maximum number of iterations if not converged: " << iteration << "\n";
	std::cout << "# Results to be reported every: " << R << " iterations\n";
	std::cout << "# Metric:";
	for (int P=1;P<=paths;P++)
	    std::cout << " " << scale[P];
	std::cout << "\n";
	std::cout << "# Colvar/work column: " << cx << "/" << cw << "\n";
	std::cout << "# Adjustment method: " << adj << "\n";
	std::cout << "# Done with reporting the parameters!\n";
    }

    // reading the data from the standard input
    bool loaddata () {
	double colvar,work;
	int i = 0;
	int c;
	string key,nothing;
	while(i<Nmax&&!std::cin.eof()) {
	    int j=0;
	    while (j<S) {
		if (std::cin.peek()!='#') {
    		    c=1;
    		    while(c<=cx||c<=cw) {
    			if (c==cx) {
    			    std::cin >> colvar;
    			} else if (c==cw) {
			    std::cin >> work;
        		} else {
    			    std::cin >> nothing;
    			}
    			c++;
    		    }
		    x[i*S+j] = int((colvar-x0)/dx);
		    w[i*S+j] = work;
		    j++;
		}
    		std::cin.ignore(INT_MAX,'\n');
	    }
	    i++;
	}

	Nmax=i;
	std::cout << "# Done With Loading " << i << " Trajectories\n" ;
	// number of trajectories in each class/direction
	for (int P=0; P<2*paths+2; P++)
	    N[P]=0;
	for (int j=0;j<Nmax;j++) {
	    N[m[j]]++;
	    N[m[j]+2*p[j]]++;
	}
	if (method) std::cout << "# Forward:" << N[0] << "Trajectories.\n" ;
	for (int P=1; P<=paths; P++)
	    std::cout << "# Class " << P << ": " << N[2*P] << " Trajectories\n" ;
	if (method) {
	    std::cout << "# Reverse:" << N[0] << "Trajectories.\n" ;
	    for (int P=1; P<=paths; P++)
		std::cout << "# Class " << P << " : " << N[2*P+1] << " Trajectories\n" ;
	}
	return (Nmax>0);
    }

    // Kernel function
    double Kernel(int x1, int x2) {
	return kHEIGHT*exp(-0.5*(x1-x2)*(x1-x2)/(kWIDTH*kWIDTH));
    }
    // extracting the total work values of the trajectories in P
    double* Work (int P) {
        double* W_= new double[N[2*P]+N[2*P+1]];
        int i0=0;
        int i1=0;
        for (int i=0; i<Nmax; i++) {
            if (p[i]==P||!P) {
        	if (m[i]) {
        	    W_[N[2*P]+i1] = w[i*S+(S-1)]-w[i*S];
        	    i1++;
        	} else {
        	    W_[i0] = w[i*S+(S-1)]-w[i*S];
        	    i0++;
        	}
    	    }
        }
	return W_;
    }

    // estimating free energy difference df using BAR (only for the bidirectional case)
    double BAR (int P,double Wfa, double Wra) {
        double dfmin = (Wfa+Wra<0)?Wfa:-Wra;
        double dfmax = (Wfa+Wra<0)?-Wra:Wfa;
        double df=dfmin;
        double bar = 0;
        double bar_ = 0;
        while (df<dfmax && bar*bar_>=0) {
    	    df+=ddf;
    	    bar_ = bar;
	    bar = 0;
	    for (int i=0; i<N[2*P]; i++) {
		bar += 1/(1+(N[0]/N[1])*exp(beta*(W[i]-df)));
	    }
	    for (int i=0; i<N[2*P+1]; i++) {
		bar -= 1/(1+(N[1]/N[0])*exp(beta*(W[N[2*P]+i]+df)));
	    }
        }
        return df;
    }

    // initial guess for the perturbed free energy; F(t)=exp(-beta f(t))
    // in the uni-/bidirectional case it uses Jarzynski/Minh-Adib approach
    bool JAR (int P, double df) {
        for (int j=0; j<S; j++) {
	    F[j] = 0;
    	    int i0=0;
    	    int i1=0;
    	    for (int i=0; i<Nmax; i++) {
        	if (p[i]==P||!P) {
        	    if (m[i]) {
        		i1++;
			F[j]+=(N[1]/N[2*P+1])*exp(beta*(W[N[2*P]+i1]-w[i*S+(S-j-1)]))/(N[0]+N[1]*exp(beta*(W[N[2*P]+i1]+df)));
        	    } else {
        		i0++;
    			F[j]+=(N[0]/N[2*P])*exp(-beta*(w[i*S+j]))/(N[0]+N[1]*exp(-beta*(W[i0]-df)));
        	    }
    		}
    	    }
	}
	return true;
    }

    // estimating unperturbed free energy; G(x)=exp(-beta g(x)) from F(t)
    // in the uni-/bidirectional case it uses original/Minh-Adib variation of Hummer-Szabo estimator
    bool PMF (int P, double df) {
	double* A = new double[L];
        for (int l=0; l<L; l++) {
    	    G[l] = 0;
	    A[l] = 0;
	    for (int j=0; j<S; j++) {
		A[l]+=exp(-beta*0.5*k*(l-j*L*1.0/S)*(l-j*L*1.0/S))/F[j];
	    }
	}
	int i0=0;
	int i1=0;
	int xx0,xx1;
	for (int i=0; i<Nmax; i++) {
    	    if (p[i]==P||!P) {
    	        if (m[i]) {
    	    	    i1++;
		    for (int j=0; j<S; j++)
			if (kernel==0)
			    G[x[i*S+j]]+=(N[1]/N[2*P+1])*exp(beta*(W[N[2*P]+i1]-w[i*S+j]))/(N[0]+N[1]*exp(beta*(W[N[2*P]+i1]+df)))/F[S-j-1];
			else {
			    xx0=(x[i*S+j]>9*kWIDTH)?(x[i*S+j]-9*kWIDTH):(0);
			    xx1=(x[i*S+j]+9*kWIDTH<L)?(x[i*S+j]+9*kWIDTH):(L-1);
			    for (int xx=xx0; xx<=xx1; xx++)
				G[xx]+=Kernel(xx,x[i*S+j])*(N[1]/N[2*P+1])*exp(beta*(W[N[2*P]+i1]-w[i*S+j]))/(N[0]+N[1]*exp(beta*(W[N[2*P]+i1]+df)))/F[S-j-1];
			}
        	} else {
        	    i0++;
		    for (int j=0; j<S; j++)
			if (kernel==0)
			    G[x[i*S+j]]+=(N[0]/N[2*P])*exp(-beta*(w[i*S+j]))/(N[0]+N[1]*exp(-beta*(W[i0]-df)))/F[j];
			else {
			    xx0=(x[i*S+j]>9*kWIDTH)?(x[i*S+j]-9*kWIDTH):(0);
			    xx1=(x[i*S+j]+9*kWIDTH<L)?(x[i*S+j]+9*kWIDTH):(L-1);
			    for (int xx=xx0; xx<=xx1; xx++)
				G[xx]+=Kernel(xx,x[i*S+j])*(N[0]/N[2*P])*exp(-beta*(w[i*S+j]))/(N[0]+N[1]*exp(-beta*(W[i0]-df)))/F[j];

			}
		}
	    }
        }
        for (int l=0; l<L; l++) {
	    G[l]/=A[l];
        }
	delete[] A;
	return true;
    }

    // estimating F(t) from G(x); used iteratively along PMF
    bool FE () {
	double B=0;
	for (int l=0; l<L; l++)
	    B+=exp(-beta*0.5*k*l*l)*G[l];
	for (int j=0; j<S; j++) {
	    F[j] = 0;
	    for (int l=0; l<L; l++) {
	        F[j]+=exp(-beta*0.5*k*(l-j*L*1.0/S)*(l-j*L*1.0/S))*G[l];
	    }
	    F[j]/=B;
	}
	return true;
    }

    // reports f(t) and g(x) with no offset
    bool report(int it, int P) {
	std::cout << "# Perturbed Free Eneregy:\n# _iteration path time FreeEnergy\n";
	for (int j=0; j<S; j++) std::cout << "PFE_" << it << " " << P << " " << j << " " << -log(F[j])/beta << "\n";
	std::cout << "# Unperturbed Free Eneregy:\n# _iteration path colvar FreeEnergy\n";
	for (int l=0; l<L; l++) std::cout << "UFE_" << it << " " << P << " " << x0+(l+0.5)*dx << " " << -log(G[l])/beta << "\n";
    }

    // reports final f(t), g(x) with an offset, and committor function
    bool reportf(int P) {
	std::cout << "# Pathway Class: " << P << "\n";
	std::cout << "# Perturbed Free Eneregy:\n# path time FreeEnergy\n";
	for (int j=0; j<S; j++) {
    	    std::cout << "PFE " << P << " " << j << " " << -log(F[j])/beta << "\n";
	}
	std::cout << "# Unperturbed Free Eneregy and Committor Function:\n# path colvar FreeEenergy CommittorFunction\n";
	double SIGMA=0;
	for (int l=0; l<L; l++)
	    if (adj==2&&method) {
    		SIGMA+=scale[P]*sqrt(G[0]*G[L-1]*N[0]*N[1]/(N[2*P]*N[2*P+1]))/G[l];
    	    } else if (adj==1&&method) {
    		SIGMA+=scale[P]*G[L-1]*N[1]/(G[l]*N[2*P+1]);
    	    } else {
    		SIGMA+=scale[P]*G[0]*N[0]/(G[l]*N[2*P]);
    	    }
	sigma[P]=0;
	for (int l=0; l<L; l++) {
    	    std::cout << "UFE " << P << " " << x0+(l+0.5)*dx << " ";
    	    if (adj==2&&method) {
    		sigma[P]+=scale[P]*sqrt(G[0]*G[L-1]*N[0]*N[1]/(N[2*P]*N[2*P+1]))/G[l];
		std::cout << -log(G[l]*sqrt(N[2*P]*N[2*P+1]/(G[0]*G[L-1]*N[0]*N[1])))/beta;
    	    } else if (adj==1&&method) {
    		sigma[P]+=scale[P]*G[L-1]*N[1]/(G[l]*N[2*P+1]);
		std::cout << -log(G[l]*N[2*P+1]/(G[L-1]*N[1]))/beta;
    	    } else {
    		sigma[P]+=scale[P]*G[0]*N[0]/(G[l]*N[2*P]);
		std::cout << -log(G[l]*N[2*P]/(G[0]*N[0]))/beta;
    	    }
    	    std::cout << " " << sigma[P]/SIGMA << "\n";
	}
    }
}

int main(int argc,char *argv[]) {

    // reading the parameters
    if (argc<2) {
	std::cerr << "Error: No parameter file!\n" << "Usage: " << argv[0] << " ParameterFile < InpuFile > OutputFile\n";
	exit(EXIT_FAILURE);
    }
    loadparm(argv[1]);

    // using default values for unspecified parameters
    if (!method) {
        m = new int[Nmax];
	for (int i=0; i<Nmax; i++) m[i]=0;
    }
    if (paths==1) {
        scale = new double[2];
        scale[0]=1;
        scale[1]=1;
        p = new int[Nmax];
	for (int i=0; i<Nmax; i++) p[i]=1;
    }

    // reporting the parameters to be used
    reportparm();

    // constructing the histograms
    w = new double[Nmax*S]; // all work measurements
    x = new int[Nmax*S]; // all colvar measurements (scaled to integer values)
    N = new int[2*paths+2]; // number of runs for each direction/path
    if (!loaddata()) {
	std::cerr << "Error: Could not load the trajectories!\n";
	exit(EXIT_FAILURE);
    }
    
    // defining the free energies and sigma's.
    F = new double[S];
    G = new double[L];
    double* G_ = new double[L];
    sigma = new double[paths+1];

    // finding free energies for each method
    for (int P=0; P<=paths; P++) {
	// extracting the total W's and df
	W = new double[N[2*P]+N[2*P+1]];
	W = Work(P);
        double Wfa = 0;
        double Wra = 0;
        for (int i=0; i<N[2*P]; i++) Wfa+=W[i]/N[2*P];
        double df=0;
        if (method) {
    	    for (int i=0; i<N[2*P+1]; i++) Wra+=W[N[2*P]+i]/N[2*P+1];
    	    df=BAR(P,Wfa,Wra);
	}
	
	// initiate the solution of F and G
    	JAR(P,df);
	PMF(P,df);
        for (int l=0; l<L; l++) {
	    G_[l]=G[l];
        }
    
        // self-consistent solution of F and G
	int it=1;
	double epsilon = 1e8;
	double epsilon_;
	while( it<iteration && epsilon>ddf ) {
	    FE();
	    PMF(P,df);
    	    epsilon = 0;
    	    for (int l=0; l<L; l++) {
		epsilon_=abs(log(G[l]/G_[l])/beta);
		if (epsilon_>epsilon) epsilon=epsilon_;
		G_[l]=G[l];
    	    }
	    // reporting
	    if (R>0&&it%R==0) {
		std::cout << "# Iteration: " << it << "; converged to the precision " << epsilon << " kcal/mol\n";
		std::cout << "# Pathway Class: " << P << "\n";
		report(it,P);
	    }
	    it++;
	}

	// final results for path P
    	std::cout << "# Final Iteration: " << it << "; converged to the precision " << epsilon << " kcal/mol\n";
	reportf(P);
	delete[] W;
    }	

    // report the final probabilities
    double SIGMA=0;
    for (int P=1; P<=paths; P++) SIGMA+=1/sigma[P];
    std::cout << "# Relative Reaction Rates (Equilibrium)\n";
    std::cout << "# For each pair of pathways specified in rows and columns below,\n";
    std::cout << "# P_row/P_column (relative reaction rate) is:\n";
    std::cout << "#\t";
    for (int P=1; P<=paths; P++) std::cout << "\t(" << P << ")";
    std::cout << "\n";
    for (int P1=1; P1<=paths; P1++) {
	std::cout << "#\t(" << P1 << ")";
	for (int P2=1; P2<=paths; P2++) std::cout << std::setprecision(2) << std::fixed << "\t" << sigma[P2]/sigma[P1];
	std::cout << "\n";
    }
    if (method) {
	std::cout << "# Probability of Pathways\n" << "#\tPATH\tP_eq\tP_F\tP_R\n";
	for (int P=1; P<=paths; P++)
	    std::cout << "#\t(" << P << ")\t" << 1/(SIGMA*sigma[P]) << "\t" << N[2*P]*1.0/N[0] << "\t" << N[2*P+1]*1.0/N[1] << "\n";
    } else {
	std::cout << "#\tProbability of Pathways\n" << "#\tPATH\tP_eq\tP_dr\n";
	for (int P=1; P<=paths; P++)
	    std::cout << "#\t(" << P << ")\t" << 1/(SIGMA*sigma[P]) << "\t" << N[2*P]*1.0/N[0] << "\n";
    }

    // clearing the memory
    delete[] F;
    delete[] G;
    delete[] G_;
    delete[] w;
    delete[] x;
    delete[] p;
    delete[] m;
    delete[] sigma;
    delete[] scale;
}
