colvar_neuralnetworkcompute.C Source File

00001 // -*- Mode:c++; c-basic-offset: 4; -*-
00002 
00003 // This file is part of the Collective Variables module (Colvars).
00004 // The original version of Colvars and its updates are located at:
00005 // https://github.com/Colvars/colvars
00006 // Please update all Colvars source files before making any changes.
00007 // If you wish to distribute your changes, please submit them to the
00008 // Colvars repository at GitHub.
00009 
00010 #include <iostream>
00011 #include <fstream>
00012 
00013 #if (__cplusplus >= 201103L)
00014 #include "colvar_neuralnetworkcompute.h"
00015 #include "colvarparse.h"
00016 #include "colvarproxy.h"
00017 
00018 namespace neuralnetworkCV {
00019 std::map<std::string, std::pair<std::function<double(double)>, std::function<double(double)>>> activation_function_map
00020 {
00021     {"tanh",     {[](double x){return std::tanh(x);},
00022                   [](double x){return 1.0 - std::tanh(x) * std::tanh(x);}}},
00023     {"sigmoid",  {[](double x){return 1.0 / (1.0 + std::exp(-x));},
00024                   [](double x){return std::exp(-x) / ((1.0 + std::exp(-x)) * (1.0 + std::exp(-x)));}}},
00025     {"linear",   {[](double x){return x;},
00026                   [](double /*x*/){return 1.0;}}},
00027     {"relu",     {[](double x){return x < 0. ? 0. : x;},
00028                   [](double x){return x < 0. ? 0. : 1.;}}},
00029     {"lrelu100", {[](double x){return x < 0. ? 0.01 * x : x;},
00030                   [](double x){return x < 0. ? 0.01     : 1.;}}},
00031     {"elu",      {[](double x){return x < 0. ? std::exp(x)-1. : x;},
00032                   [](double x){return x < 0. ? std::exp(x)    : 1.;}}}
00033 };
00034 
00035 #ifdef LEPTON
00036 customActivationFunction::customActivationFunction():
00037 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00038 input_reference(nullptr), derivative_reference(nullptr) {}
00039 
00040 customActivationFunction::customActivationFunction(const std::string& expression_string):
00041 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00042 input_reference(nullptr), derivative_reference(nullptr) {
00043     setExpression(expression_string);
00044 }
00045 
00046 customActivationFunction::customActivationFunction(const customActivationFunction& source):
00047 expression(), value_evaluator(nullptr), gradient_evaluator(nullptr),
00048 input_reference(nullptr), derivative_reference(nullptr) {
00049     // check if the source object is initialized
00050     if (source.value_evaluator != nullptr) {
00051         this->setExpression(source.expression);
00052     }
00053 }
00054 
00055 customActivationFunction& customActivationFunction::operator=(const customActivationFunction& source) {
00056     if (source.value_evaluator != nullptr) {
00057         this->setExpression(source.expression);
00058     } else {
00059         expression = std::string();
00060         value_evaluator = nullptr;
00061         gradient_evaluator = nullptr;
00062         input_reference = nullptr;
00063         derivative_reference = nullptr;
00064     }
00065     return *this;
00066 }
00067 
00068 void customActivationFunction::setExpression(const std::string& expression_string) {
00069     expression = expression_string;
00070     Lepton::ParsedExpression parsed_expression;
00071     // the variable must be "x" for the input of an activation function
00072     const std::string activation_input_variable{"x"};
00073     // parse the expression
00074     try {
00075         parsed_expression = Lepton::Parser::parse(expression);
00076     } catch (...) {
00077         cvm::error("Error parsing or compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
00078     }
00079     // compile the expression
00080     try {
00081         value_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.createCompiledExpression()));
00082     } catch (...) {
00083         cvm::error("Error compiling expression \"" + expression + "\".\n", COLVARS_INPUT_ERROR);
00084     }
00085     // create a compiled expression for the derivative
00086     try {
00087         gradient_evaluator = std::unique_ptr<Lepton::CompiledExpression>(new Lepton::CompiledExpression(parsed_expression.differentiate(activation_input_variable).createCompiledExpression()));
00088     } catch (...) {
00089         cvm::error("Error creating compiled expression for variable \"" + activation_input_variable + "\".\n", COLVARS_INPUT_ERROR);
00090     }
00091     // get the reference to the input variable in the compiled expression
00092     try {
00093         input_reference = &(value_evaluator->getVariableReference(activation_input_variable));
00094     } catch (...) {
00095         cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled expression.\n", COLVARS_INPUT_ERROR);
00096     }
00097     // get the reference to the input variable in the compiled derivative expression
00098     try {
00099         derivative_reference = &(gradient_evaluator->getVariableReference(activation_input_variable));
00100     } catch (...) {
00101         cvm::error("Error on getting the reference to variable \"" + activation_input_variable + "\" in the compiled derivative exprssion.\n", COLVARS_INPUT_ERROR);
00102     }
00103 }
00104 
00105 std::string customActivationFunction::getExpression() const {
00106     return expression;
00107 }
00108 
00109 double customActivationFunction::evaluate(double x) const {
00110     *input_reference = x;
00111     return value_evaluator->evaluate();
00112 }
00113 
00114 double customActivationFunction::derivative(double x) const {
00115     *derivative_reference = x;
00116     return gradient_evaluator->evaluate();
00117 }
00118 #endif
00119 
00120 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::function<double(double)>& f, const std::function<double(double)>& df): m_activation_function(f), m_activation_function_derivative(df) {
00121 #ifdef LEPTON
00122     m_use_custom_activation = false;
00123 #endif
00124     readFromFile(weights_file, biases_file);
00125 }
00126 
00127 #ifdef LEPTON
00128 denseLayer::denseLayer(const std::string& weights_file, const std::string& biases_file, const std::string& custom_activation_expression) {
00129     m_use_custom_activation = true;
00130     m_custom_activation_function = customActivationFunction(custom_activation_expression);
00131     readFromFile(weights_file, biases_file);
00132 }
00133 #endif
00134 
00135 void denseLayer::readFromFile(const std::string& weights_file, const std::string& biases_file) {
00136     // parse weights file
00137     m_weights.clear();
00138     m_biases.clear();
00139     std::string line;
00140     colvarproxy *proxy = cvm::main()->proxy;
00141     auto &ifs_weights = proxy->input_stream(weights_file, "weights file");
00142     while (std::getline(ifs_weights, line)) {
00143         if (!ifs_weights) {
00144             throw std::runtime_error("I/O error while reading " + weights_file);
00145         }
00146         std::vector<std::string> splitted_data;
00147         colvarparse::split_string(line, std::string{" "}, splitted_data);
00148         if (splitted_data.size() > 0) {
00149             std::vector<double> weights_tmp(splitted_data.size());
00150             for (size_t i = 0; i < splitted_data.size(); ++i) {
00151                 try {
00152                     weights_tmp[i] = std::stod(splitted_data[i]);
00153                 } catch (...) {
00154                     throw std::runtime_error("Cannot convert " + splitted_data[i] + " to a number while reading file " + weights_file);
00155                 }
00156             }
00157             m_weights.push_back(weights_tmp);
00158         }
00159     }
00160     proxy->close_input_stream(weights_file);
00161 
00162     // parse biases file
00163     auto &ifs_biases = proxy->input_stream(biases_file, "biases file");
00164     while (std::getline(ifs_biases, line)) {
00165         if (!ifs_biases) {
00166             throw std::runtime_error("I/O error while reading " + biases_file);
00167         }
00168         std::vector<std::string> splitted_data;
00169         colvarparse::split_string(line, std::string{" "}, splitted_data);
00170         if (splitted_data.size() > 0) {
00171             double bias = 0;
00172             try {
00173                 bias = std::stod(splitted_data[0]);
00174             } catch (...) {
00175                 throw std::runtime_error("Cannot convert " + splitted_data[0] + " to a number while reading file " + biases_file);
00176             }
00177             m_biases.push_back(bias);
00178         }
00179     }
00180     proxy->close_input_stream(biases_file);
00181 
00182     m_input_size = m_weights[0].size();
00183     m_output_size = m_weights.size();
00184 }
00185 
00186 void denseLayer::setActivationFunction(const std::function<double(double)>& f, const std::function<double(double)>& df) {
00187     m_activation_function = f;
00188     m_activation_function_derivative = df;
00189 }
00190 
00191 void denseLayer::compute(const std::vector<double>& input, std::vector<double>& output) const {
00192     for (size_t i = 0; i < m_output_size; ++i) {
00193         output[i] = 0;
00194         for (size_t j = 0; j < m_input_size; ++j) {
00195             output[i] += input[j] * m_weights[i][j];
00196         }
00197         output[i] += m_biases[i];
00198 #ifdef LEPTON
00199         if (m_use_custom_activation) {
00200             output[i] = m_custom_activation_function.evaluate(output[i]);
00201         } else {
00202 #endif
00203             output[i] = m_activation_function(output[i]);
00204 #ifdef LEPTON
00205         }
00206 #endif
00207     }
00208 }
00209 
00210 double denseLayer::computeGradientElement(const std::vector<double>& input, const size_t i, const size_t j) const {
00211     double sum_with_bias = 0;
00212     for (size_t j_in = 0; j_in < m_input_size; ++j_in) {
00213         sum_with_bias += input[j_in] * m_weights[i][j_in];
00214     }
00215     sum_with_bias += m_biases[i];
00216 #ifdef LEPTON
00217     if (m_use_custom_activation) {
00218         const double grad_ij = m_custom_activation_function.derivative(sum_with_bias) * m_weights[i][j];
00219         return grad_ij;
00220     } else {
00221 #endif
00222         const double grad_ij = m_activation_function_derivative(sum_with_bias) * m_weights[i][j];
00223         return grad_ij;
00224 #ifdef LEPTON
00225     }
00226 #endif
00227 }
00228 
00229 void denseLayer::computeGradient(const std::vector<double>& input, std::vector<std::vector<double>>& output_grad) const {
00230     for (size_t j = 0; j < m_input_size; ++j) {
00231         for (size_t i = 0; i < m_output_size; ++i) {
00232             output_grad[i][j] = computeGradientElement(input, i, j);
00233         }
00234     }
00235 }
00236 
00237 neuralNetworkCompute::neuralNetworkCompute(const std::vector<denseLayer>& dense_layers): m_dense_layers(dense_layers) {
00238     m_layers_output.resize(m_dense_layers.size());
00239     m_grads_tmp.resize(m_dense_layers.size());
00240     for (size_t i_layer = 0; i_layer < m_layers_output.size(); ++i_layer) {
00241         m_layers_output[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), 0);
00242         m_grads_tmp[i_layer].assign(m_dense_layers[i_layer].getOutputSize(), std::vector<double>(m_dense_layers[i_layer].getInputSize(), 0));
00243     }
00244 }
00245 
00246 bool neuralNetworkCompute::addDenseLayer(const denseLayer& layer) {
00247     if (m_dense_layers.empty()) {
00248         // add layer to this ann directly if m_dense_layers is empty
00249         m_dense_layers.push_back(layer);
00250         m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
00251         m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
00252         return true;
00253     } else {
00254         // otherwise, we need to check if the output of last layer in m_dense_layers matches the input of layer to be added
00255         if (m_dense_layers.back().getOutputSize() == layer.getInputSize()) {
00256             m_dense_layers.push_back(layer);
00257             m_layers_output.push_back(std::vector<double>(layer.getOutputSize()));
00258             m_grads_tmp.push_back(std::vector<std::vector<double>>(layer.getOutputSize(), std::vector<double>(layer.getInputSize(), 0)));
00259             return true;
00260         } else {
00261             return false;
00262         }
00263     }
00264 }
00265 
00266 std::vector<std::vector<double>> neuralNetworkCompute::multiply_matrix(const std::vector<std::vector<double>>& A, const std::vector<std::vector<double>>& B) {
00267     const size_t m = A.size();
00268     const size_t n = B.size();
00269     if (A[0].size() != n) {
00270         std::cerr << "Error on multiplying matrices!\n";
00271     }
00272     const size_t t = B[0].size();
00273     std::vector<std::vector<double>> C(m, std::vector<double>(t, 0.0));
00274     for (size_t i = 0; i < m; ++i) {
00275         for (size_t j = 0; j < t; ++j) {
00276             for (size_t k = 0; k < n; ++k) {
00277                 C[i][j] += A[i][k] * B[k][j];
00278             }
00279         }
00280     }
00281     return C;
00282 }
00283 
00284 void neuralNetworkCompute::compute() {
00285     if (m_dense_layers.empty()) {
00286         return;
00287     }
00288     size_t i_layer;
00289     m_dense_layers[0].compute(m_input, m_layers_output[0]);
00290     for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
00291         m_dense_layers[i_layer].compute(m_layers_output[i_layer - 1], m_layers_output[i_layer]);
00292     }
00293     // gradients of each layer
00294     m_dense_layers[0].computeGradient(m_input, m_grads_tmp[0]);
00295     for (i_layer = 1; i_layer < m_dense_layers.size(); ++i_layer) {
00296         m_dense_layers[i_layer].computeGradient(m_layers_output[i_layer - 1], m_grads_tmp[i_layer]);
00297     }
00298     // chain rule
00299     if (m_dense_layers.size() > 1) {
00300         m_chained_grad = multiply_matrix(m_grads_tmp[1], m_grads_tmp[0]);
00301         for (i_layer = 2; i_layer < m_dense_layers.size(); ++i_layer) {
00302             m_chained_grad = multiply_matrix(m_grads_tmp[i_layer], m_chained_grad);
00303         }
00304     } else {
00305         m_chained_grad = m_grads_tmp[0];
00306     }
00307 }
00308 }
00309 
00310 #endif