mirror of
https://hub.njuu.cf/TheAlgorithms/C-Plus-Plus.git
synced 2023-10-11 13:05:55 +08:00
fix: error handling in opening of external files (neural_network.cpp). (#1044)
* Error Handdling of Files
* exit -> std::exit
* Improved Overall Error handling and reporting
* test commit
added spaces in include to try to trigger GitHub Actions
* updating DIRECTORY.md
* clang-format and clang-tidy fixes for 51e943d0
Co-authored-by: Krishna Vedala <7001608+kvedala@users.noreply.github.com>
Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
This commit is contained in:
parent
c4f586b6ab
commit
44f4f4d6c3
@ -2,143 +2,134 @@
|
|||||||
* @file
|
* @file
|
||||||
* @author [Deep Raval](https://github.com/imdeep2905)
|
* @author [Deep Raval](https://github.com/imdeep2905)
|
||||||
*
|
*
|
||||||
* @brief Implementation of [Multilayer Perceptron] (https://en.wikipedia.org/wiki/Multilayer_perceptron).
|
* @brief Implementation of [Multilayer Perceptron]
|
||||||
|
* (https://en.wikipedia.org/wiki/Multilayer_perceptron).
|
||||||
*
|
*
|
||||||
* @details
|
* @details
|
||||||
* A multilayer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The term MLP is used ambiguously,
|
* A multilayer perceptron (MLP) is a class of feedforward artificial neural
|
||||||
* sometimes loosely to any feedforward ANN, sometimes strictly to refer to networks composed of multiple layers of perceptrons
|
* network (ANN). The term MLP is used ambiguously, sometimes loosely to any
|
||||||
* (with threshold activation). Multilayer perceptrons are sometimes colloquially referred to as "vanilla" neural networks,
|
* feedforward ANN, sometimes strictly to refer to networks composed of multiple
|
||||||
* especially when they have a single hidden layer.
|
* layers of perceptrons (with threshold activation). Multilayer perceptrons are
|
||||||
|
* sometimes colloquially referred to as "vanilla" neural networks, especially
|
||||||
|
* when they have a single hidden layer.
|
||||||
*
|
*
|
||||||
* An MLP consists of at least three layers of nodes: an input layer, a hidden layer and an output layer. Except for the
|
* An MLP consists of at least three layers of nodes: an input layer, a hidden
|
||||||
* input nodes, each node is a neuron that uses a nonlinear activation function. MLP utilizes a supervised learning technique
|
* layer and an output layer. Except for the input nodes, each node is a neuron
|
||||||
* called backpropagation for training. Its multiple layers and non-linear activation distinguish MLP from a linear
|
* that uses a nonlinear activation function. MLP utilizes a supervised learning
|
||||||
* perceptron. It can distinguish data that is not linearly separable.
|
* technique called backpropagation for training. Its multiple layers and
|
||||||
|
* non-linear activation distinguish MLP from a linear perceptron. It can
|
||||||
|
* distinguish data that is not linearly separable.
|
||||||
*
|
*
|
||||||
* See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training algorithm.
|
* See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for
|
||||||
|
* training algorithm.
|
||||||
*
|
*
|
||||||
* \note This implementation uses mini-batch gradient descent as optimizer and MSE as loss function. Bias is also not included.
|
* \note This implementation uses mini-batch gradient descent as optimizer and
|
||||||
|
* MSE as loss function. Bias is also not included.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "vector_ops.hpp" // Custom header file for vector operations
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <cmath>
|
||||||
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
#include <valarray>
|
#include <valarray>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
|
||||||
#include <algorithm>
|
#include "vector_ops.hpp" // Custom header file for vector operations
|
||||||
#include <chrono>
|
|
||||||
#include <string>
|
|
||||||
#include <fstream>
|
|
||||||
#include <sstream>
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
/** \namespace machine_learning
|
/** \namespace machine_learning
|
||||||
* \brief Machine learning algorithms
|
* \brief Machine learning algorithms
|
||||||
*/
|
*/
|
||||||
namespace machine_learning {
|
namespace machine_learning {
|
||||||
/** \namespace neural_network
|
/** \namespace neural_network
|
||||||
* \brief Neural Network or Multilayer Perceptron
|
* \brief Neural Network or Multilayer Perceptron
|
||||||
*/
|
*/
|
||||||
namespace neural_network {
|
namespace neural_network {
|
||||||
/** \namespace activations
|
/** \namespace activations
|
||||||
* \brief Various activation functions used in Neural network
|
* \brief Various activation functions used in Neural network
|
||||||
*/
|
*/
|
||||||
namespace activations {
|
namespace activations {
|
||||||
/**
|
/**
|
||||||
* Sigmoid function
|
* Sigmoid function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns sigmoid(x)
|
* @return Returns sigmoid(x)
|
||||||
*/
|
*/
|
||||||
double sigmoid (const double &x) {
|
double sigmoid(const double &x) { return 1.0 / (1.0 + std::exp(-x)); }
|
||||||
return 1.0 / (1.0 + std::exp(-x));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Derivative of sigmoid function
|
* Derivative of sigmoid function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns derivative of sigmoid(x)
|
* @return Returns derivative of sigmoid(x)
|
||||||
*/
|
*/
|
||||||
double dsigmoid (const double &x) {
|
double dsigmoid(const double &x) { return x * (1 - x); }
|
||||||
return x * (1 - x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relu function
|
* Relu function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @returns relu(x)
|
* @returns relu(x)
|
||||||
*/
|
*/
|
||||||
double relu (const double &x) {
|
double relu(const double &x) { return std::max(0.0, x); }
|
||||||
return std::max(0.0, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Derivative of relu function
|
* Derivative of relu function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @returns derivative of relu(x)
|
* @returns derivative of relu(x)
|
||||||
*/
|
*/
|
||||||
double drelu (const double &x) {
|
double drelu(const double &x) { return x >= 0.0 ? 1.0 : 0.0; }
|
||||||
return x >= 0.0 ? 1.0 : 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tanh function
|
* Tanh function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns tanh(x)
|
* @return Returns tanh(x)
|
||||||
*/
|
*/
|
||||||
double tanh (const double &x) {
|
double tanh(const double &x) { return 2 / (1 + std::exp(-2 * x)) - 1; }
|
||||||
return 2 / (1 + std::exp(-2 * x)) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Derivative of Sigmoid function
|
* Derivative of Sigmoid function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns derivative of tanh(x)
|
* @return Returns derivative of tanh(x)
|
||||||
*/
|
*/
|
||||||
double dtanh (const double &x) {
|
double dtanh(const double &x) { return 1 - x * x; }
|
||||||
return 1 - x * x;
|
} // namespace activations
|
||||||
}
|
/** \namespace util_functions
|
||||||
} // namespace activations
|
|
||||||
/** \namespace util_functions
|
|
||||||
* \brief Various utility functions used in Neural network
|
* \brief Various utility functions used in Neural network
|
||||||
*/
|
*/
|
||||||
namespace util_functions {
|
namespace util_functions {
|
||||||
/**
|
/**
|
||||||
* Square function
|
* Square function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns x * x
|
* @return Returns x * x
|
||||||
*/
|
*/
|
||||||
double square(const double &x) {
|
double square(const double &x) { return x * x; }
|
||||||
return x * x;
|
/**
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Identity function
|
* Identity function
|
||||||
* @param X Value
|
* @param X Value
|
||||||
* @return Returns x
|
* @return Returns x
|
||||||
*/
|
*/
|
||||||
double identity_function(const double &x) {
|
double identity_function(const double &x) { return x; }
|
||||||
return x;
|
} // namespace util_functions
|
||||||
}
|
/** \namespace layers
|
||||||
} // namespace util_functions
|
|
||||||
/** \namespace layers
|
|
||||||
* \brief This namespace contains layers used
|
* \brief This namespace contains layers used
|
||||||
* in MLP.
|
* in MLP.
|
||||||
*/
|
*/
|
||||||
namespace layers {
|
namespace layers {
|
||||||
/**
|
/**
|
||||||
* neural_network::layers::DenseLayer class is used to store all necessary information about
|
* neural_network::layers::DenseLayer class is used to store all necessary
|
||||||
* the layers (i.e. neurons, activation and kernal). This class
|
* information about the layers (i.e. neurons, activation and kernal). This
|
||||||
* is used by NeuralNetwork class to store layers.
|
* class is used by NeuralNetwork class to store layers.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class DenseLayer {
|
class DenseLayer {
|
||||||
public:
|
public:
|
||||||
// To store activation function and it's derivative
|
// To store activation function and it's derivative
|
||||||
double (*activation_function)(const double &);
|
double (*activation_function)(const double &);
|
||||||
double (*dactivation_function)(const double &);
|
double (*dactivation_function)(const double &);
|
||||||
int neurons; // To store number of neurons (used in summary)
|
int neurons; // To store number of neurons (used in summary)
|
||||||
std::string activation; // To store activation name (used in summary)
|
std::string activation; // To store activation name (used in summary)
|
||||||
std::vector <std::valarray <double>> kernal; // To store kernal (aka weights)
|
std::vector<std::valarray<double>> kernal; // To store kernal (aka weights)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for neural_network::layers::DenseLayer class
|
* Constructor for neural_network::layers::DenseLayer class
|
||||||
@ -147,42 +138,39 @@ namespace machine_learning {
|
|||||||
* @param kernal_shape shape of kernal
|
* @param kernal_shape shape of kernal
|
||||||
* @param random_kernal flag for whether to intialize kernal randomly
|
* @param random_kernal flag for whether to intialize kernal randomly
|
||||||
*/
|
*/
|
||||||
DenseLayer(const int &neurons,
|
DenseLayer(const int &neurons, const std::string &activation,
|
||||||
const std::string &activation,
|
|
||||||
const std::pair<size_t, size_t> &kernal_shape,
|
const std::pair<size_t, size_t> &kernal_shape,
|
||||||
const bool &random_kernal) {
|
const bool &random_kernal) {
|
||||||
// Choosing activation (and it's derivative)
|
// Choosing activation (and it's derivative)
|
||||||
if (activation == "sigmoid") {
|
if (activation == "sigmoid") {
|
||||||
activation_function = neural_network::activations::sigmoid;
|
activation_function = neural_network::activations::sigmoid;
|
||||||
dactivation_function = neural_network::activations::sigmoid;
|
dactivation_function = neural_network::activations::sigmoid;
|
||||||
}
|
} else if (activation == "relu") {
|
||||||
else if (activation == "relu") {
|
|
||||||
activation_function = neural_network::activations::relu;
|
activation_function = neural_network::activations::relu;
|
||||||
dactivation_function = neural_network::activations::drelu;
|
dactivation_function = neural_network::activations::drelu;
|
||||||
}
|
} else if (activation == "tanh") {
|
||||||
else if (activation == "tanh") {
|
|
||||||
activation_function = neural_network::activations::tanh;
|
activation_function = neural_network::activations::tanh;
|
||||||
dactivation_function = neural_network::activations::dtanh;
|
dactivation_function = neural_network::activations::dtanh;
|
||||||
}
|
} else if (activation == "none") {
|
||||||
else if (activation == "none") {
|
|
||||||
// Set identity function in casse of none is supplied
|
// Set identity function in casse of none is supplied
|
||||||
activation_function = neural_network::util_functions::identity_function;
|
activation_function =
|
||||||
dactivation_function = neural_network::util_functions::identity_function;
|
neural_network::util_functions::identity_function;
|
||||||
}
|
dactivation_function =
|
||||||
else {
|
neural_network::util_functions::identity_function;
|
||||||
|
} else {
|
||||||
// If supplied activation is invalid
|
// If supplied activation is invalid
|
||||||
std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
|
std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
|
||||||
|
"tanh} got ";
|
||||||
std::cerr << activation << std::endl;
|
std::cerr << activation << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
this -> activation = activation; // Setting activation name
|
this->activation = activation; // Setting activation name
|
||||||
this -> neurons = neurons; // Setting number of neurons
|
this->neurons = neurons; // Setting number of neurons
|
||||||
// Initialize kernal according to flag
|
// Initialize kernal according to flag
|
||||||
if(random_kernal) {
|
if (random_kernal) {
|
||||||
uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
|
uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
unit_matrix_initialization(kernal, kernal_shape);
|
unit_matrix_initialization(kernal, kernal_shape);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -192,37 +180,35 @@ namespace machine_learning {
|
|||||||
* @param activation activation function for layer
|
* @param activation activation function for layer
|
||||||
* @param kernal values of kernal (useful in loading model)
|
* @param kernal values of kernal (useful in loading model)
|
||||||
*/
|
*/
|
||||||
DenseLayer (const int &neurons,
|
DenseLayer(const int &neurons, const std::string &activation,
|
||||||
const std::string &activation,
|
const std::vector<std::valarray<double>> &kernal) {
|
||||||
const std::vector <std::valarray<double>> &kernal) {
|
|
||||||
// Choosing activation (and it's derivative)
|
// Choosing activation (and it's derivative)
|
||||||
if (activation == "sigmoid") {
|
if (activation == "sigmoid") {
|
||||||
activation_function = neural_network::activations::sigmoid;
|
activation_function = neural_network::activations::sigmoid;
|
||||||
dactivation_function = neural_network::activations::sigmoid;
|
dactivation_function = neural_network::activations::sigmoid;
|
||||||
}
|
} else if (activation == "relu") {
|
||||||
else if (activation == "relu") {
|
|
||||||
activation_function = neural_network::activations::relu;
|
activation_function = neural_network::activations::relu;
|
||||||
dactivation_function = neural_network::activations::drelu;
|
dactivation_function = neural_network::activations::drelu;
|
||||||
}
|
} else if (activation == "tanh") {
|
||||||
else if (activation == "tanh") {
|
|
||||||
activation_function = neural_network::activations::tanh;
|
activation_function = neural_network::activations::tanh;
|
||||||
dactivation_function = neural_network::activations::dtanh;
|
dactivation_function = neural_network::activations::dtanh;
|
||||||
}
|
} else if (activation == "none") {
|
||||||
else if (activation == "none") {
|
|
||||||
// Set identity function in casse of none is supplied
|
// Set identity function in casse of none is supplied
|
||||||
activation_function = neural_network::util_functions::identity_function;
|
activation_function =
|
||||||
dactivation_function = neural_network::util_functions::identity_function;
|
neural_network::util_functions::identity_function;
|
||||||
}
|
dactivation_function =
|
||||||
else {
|
neural_network::util_functions::identity_function;
|
||||||
|
} else {
|
||||||
// If supplied activation is invalid
|
// If supplied activation is invalid
|
||||||
std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
|
std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
|
||||||
|
"tanh} got ";
|
||||||
std::cerr << activation << std::endl;
|
std::cerr << activation << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
this -> activation = activation; // Setting activation name
|
this->activation = activation; // Setting activation name
|
||||||
this -> neurons = neurons; // Setting number of neurons
|
this->neurons = neurons; // Setting number of neurons
|
||||||
this -> kernal = kernal; // Setting supplied kernal values
|
this->kernal = kernal; // Setting supplied kernal values
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -240,7 +226,7 @@ namespace machine_learning {
|
|||||||
/**
|
/**
|
||||||
* Copy assignment operator for class DenseLayer
|
* Copy assignment operator for class DenseLayer
|
||||||
*/
|
*/
|
||||||
DenseLayer& operator = (const DenseLayer &layer) = default;
|
DenseLayer &operator=(const DenseLayer &layer) = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Move constructor for class DenseLayer
|
* Move constructor for class DenseLayer
|
||||||
@ -250,42 +236,46 @@ namespace machine_learning {
|
|||||||
/**
|
/**
|
||||||
* Move assignment operator for class DenseLayer
|
* Move assignment operator for class DenseLayer
|
||||||
*/
|
*/
|
||||||
DenseLayer& operator = (DenseLayer &&) = default;
|
DenseLayer &operator=(DenseLayer &&) = default;
|
||||||
};
|
};
|
||||||
} // namespace layers
|
} // namespace layers
|
||||||
/**
|
/**
|
||||||
* NeuralNetwork class is implements MLP. This class is
|
* NeuralNetwork class is implements MLP. This class is
|
||||||
* used by actual user to create and train networks.
|
* used by actual user to create and train networks.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class NeuralNetwork {
|
class NeuralNetwork {
|
||||||
private:
|
private:
|
||||||
std::vector <neural_network::layers::DenseLayer> layers; // To store layers
|
std::vector<neural_network::layers::DenseLayer> layers; // To store layers
|
||||||
/**
|
/**
|
||||||
* Private Constructor for class NeuralNetwork. This constructor
|
* Private Constructor for class NeuralNetwork. This constructor
|
||||||
* is used internally to load model.
|
* is used internally to load model.
|
||||||
* @param config vector containing pair (neurons, activation)
|
* @param config vector containing pair (neurons, activation)
|
||||||
* @param kernals vector containing all pretrained kernals
|
* @param kernals vector containing all pretrained kernals
|
||||||
*/
|
*/
|
||||||
NeuralNetwork(const std::vector <std::pair<int, std::string>> &config,
|
NeuralNetwork(
|
||||||
const std::vector <std::vector<std::valarray<double>>> &kernals) {
|
const std::vector<std::pair<int, std::string>> &config,
|
||||||
|
const std::vector<std::vector<std::valarray<double>>> &kernals) {
|
||||||
// First layer should not have activation
|
// First layer should not have activation
|
||||||
if(config.begin() -> second != "none") {
|
if (config.begin()->second != "none") {
|
||||||
std::cerr << "ERROR: First layer can't have activation other than none";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr
|
||||||
|
<< "First layer can't have activation other than none got "
|
||||||
|
<< config.begin()->second;
|
||||||
std::cerr << std::endl;
|
std::cerr << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// Network should have atleast two layers
|
// Network should have atleast two layers
|
||||||
if(config.size() <= 1) {
|
if (config.size() <= 1) {
|
||||||
std::cerr << "ERROR: Invalid size of network, ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Invalid size of network, ";
|
||||||
std::cerr << "Atleast two layers are required";
|
std::cerr << "Atleast two layers are required";
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// Reconstructing all pretrained layers
|
// Reconstructing all pretrained layers
|
||||||
for(size_t i = 0; i < config.size(); i++) {
|
for (size_t i = 0; i < config.size(); i++) {
|
||||||
layers.emplace_back(neural_network::layers::DenseLayer(config[i].first,
|
layers.emplace_back(neural_network::layers::DenseLayer(
|
||||||
config[i].second,
|
config[i].first, config[i].second, kernals[i]));
|
||||||
kernals[i]));
|
|
||||||
}
|
}
|
||||||
std::cout << "INFO: Network constructed successfully" << std::endl;
|
std::cout << "INFO: Network constructed successfully" << std::endl;
|
||||||
}
|
}
|
||||||
@ -295,18 +285,19 @@ namespace machine_learning {
|
|||||||
* backpropagation, single predict and batch predict.
|
* backpropagation, single predict and batch predict.
|
||||||
* @param X input vector
|
* @param X input vector
|
||||||
*/
|
*/
|
||||||
std::vector<std::vector<std::valarray <double>>>
|
std::vector<std::vector<std::valarray<double>>>
|
||||||
__detailed_single_prediction (const std::vector<std::valarray <double>> &X) {
|
__detailed_single_prediction(const std::vector<std::valarray<double>> &X) {
|
||||||
std::vector<std::vector < std::valarray <double> >> details;
|
std::vector<std::vector<std::valarray<double>>> details;
|
||||||
std::vector < std::valarray <double> > current_pass = X;
|
std::vector<std::valarray<double>> current_pass = X;
|
||||||
details.emplace_back(X);
|
details.emplace_back(X);
|
||||||
for(const auto &l : layers) {
|
for (const auto &l : layers) {
|
||||||
current_pass = multiply(current_pass, l.kernal);
|
current_pass = multiply(current_pass, l.kernal);
|
||||||
current_pass = apply_function(current_pass, l.activation_function);
|
current_pass = apply_function(current_pass, l.activation_function);
|
||||||
details.emplace_back(current_pass);
|
details.emplace_back(current_pass);
|
||||||
}
|
}
|
||||||
return details;
|
return details;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
* Default Constructor for class NeuralNetwork. This constructor
|
* Default Constructor for class NeuralNetwork. This constructor
|
||||||
@ -319,31 +310,34 @@ namespace machine_learning {
|
|||||||
* is used by user.
|
* is used by user.
|
||||||
* @param config vector containing pair (neurons, activation)
|
* @param config vector containing pair (neurons, activation)
|
||||||
*/
|
*/
|
||||||
explicit NeuralNetwork(const std::vector <std::pair<int, std::string>> &config) {
|
explicit NeuralNetwork(
|
||||||
|
const std::vector<std::pair<int, std::string>> &config) {
|
||||||
// First layer should not have activation
|
// First layer should not have activation
|
||||||
if(config.begin() -> second != "none") {
|
if (config.begin()->second != "none") {
|
||||||
std::cerr << "ERROR: First layer can't have activation other than none";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr
|
||||||
|
<< "First layer can't have activation other than none got "
|
||||||
|
<< config.begin()->second;
|
||||||
std::cerr << std::endl;
|
std::cerr << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// Network should have atleast two layers
|
// Network should have atleast two layers
|
||||||
if(config.size() <= 1) {
|
if (config.size() <= 1) {
|
||||||
std::cerr << "ERROR: Invalid size of network, ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Invalid size of network, ";
|
||||||
std::cerr << "Atleast two layers are required";
|
std::cerr << "Atleast two layers are required";
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// Separately creating first layer so it can have unit matrix
|
// Separately creating first layer so it can have unit matrix
|
||||||
// as kernal.
|
// as kernal.
|
||||||
layers.push_back(neural_network::layers::DenseLayer(config[0].first,
|
layers.push_back(neural_network::layers::DenseLayer(
|
||||||
config[0].second,
|
config[0].first, config[0].second,
|
||||||
{config[0].first, config[0].first},
|
{config[0].first, config[0].first}, false));
|
||||||
false));
|
|
||||||
// Creating remaining layers
|
// Creating remaining layers
|
||||||
for(size_t i = 1; i < config.size(); i++) {
|
for (size_t i = 1; i < config.size(); i++) {
|
||||||
layers.push_back(neural_network::layers::DenseLayer(config[i].first,
|
layers.push_back(neural_network::layers::DenseLayer(
|
||||||
config[i].second,
|
config[i].first, config[i].second,
|
||||||
{config[i - 1].first, config[i].first},
|
{config[i - 1].first, config[i].first}, true));
|
||||||
true));
|
|
||||||
}
|
}
|
||||||
std::cout << "INFO: Network constructed successfully" << std::endl;
|
std::cout << "INFO: Network constructed successfully" << std::endl;
|
||||||
}
|
}
|
||||||
@ -363,7 +357,7 @@ namespace machine_learning {
|
|||||||
/**
|
/**
|
||||||
* Copy assignment operator for class NeuralNetwork
|
* Copy assignment operator for class NeuralNetwork
|
||||||
*/
|
*/
|
||||||
NeuralNetwork& operator = (const NeuralNetwork &model) = default;
|
NeuralNetwork &operator=(const NeuralNetwork &model) = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Move constructor for class NeuralNetwork
|
* Move constructor for class NeuralNetwork
|
||||||
@ -373,7 +367,7 @@ namespace machine_learning {
|
|||||||
/**
|
/**
|
||||||
* Move assignment operator for class NeuralNetwork
|
* Move assignment operator for class NeuralNetwork
|
||||||
*/
|
*/
|
||||||
NeuralNetwork& operator = (NeuralNetwork &&) = default;
|
NeuralNetwork &operator=(NeuralNetwork &&) = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to get X and Y from csv file (where X = data, Y = label)
|
* Function to get X and Y from csv file (where X = data, Y = label)
|
||||||
@ -383,34 +377,40 @@ namespace machine_learning {
|
|||||||
* @param slip_lines number of lines to skip
|
* @param slip_lines number of lines to skip
|
||||||
* @return returns pair of X and Y
|
* @return returns pair of X and Y
|
||||||
*/
|
*/
|
||||||
std::pair<std::vector<std::vector<std::valarray<double>>>, std::vector<std::vector<std::valarray<double>>>>
|
std::pair<std::vector<std::vector<std::valarray<double>>>,
|
||||||
get_XY_from_csv(const std::string &file_name,
|
std::vector<std::vector<std::valarray<double>>>>
|
||||||
const bool &last_label,
|
get_XY_from_csv(const std::string &file_name, const bool &last_label,
|
||||||
const bool &normalize,
|
const bool &normalize, const int &slip_lines = 1) {
|
||||||
const int &slip_lines = 1) {
|
|
||||||
std::ifstream in_file; // Ifstream to read file
|
std::ifstream in_file; // Ifstream to read file
|
||||||
in_file.open(file_name.c_str(), std::ios::in); // Open file
|
in_file.open(file_name.c_str(), std::ios::in); // Open file
|
||||||
std::vector <std::vector<std::valarray<double>>> X, Y; // To store X and Y
|
// If there is any problem in opening file
|
||||||
|
if (!in_file.is_open()) {
|
||||||
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Unable to open file: " << file_name << std::endl;
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
std::vector<std::vector<std::valarray<double>>> X,
|
||||||
|
Y; // To store X and Y
|
||||||
std::string line; // To store each line
|
std::string line; // To store each line
|
||||||
// Skip lines
|
// Skip lines
|
||||||
for(int i = 0; i < slip_lines; i ++) {
|
for (int i = 0; i < slip_lines; i++) {
|
||||||
std::getline(in_file, line, '\n'); // Ignore line
|
std::getline(in_file, line, '\n'); // Ignore line
|
||||||
}
|
}
|
||||||
// While file has information
|
// While file has information
|
||||||
while(!in_file.eof() && std::getline(in_file, line, '\n'))
|
while (!in_file.eof() && std::getline(in_file, line, '\n')) {
|
||||||
{
|
std::valarray<double> x_data,
|
||||||
std::valarray <double> x_data, y_data; // To store single sample and label
|
y_data; // To store single sample and label
|
||||||
std::stringstream ss(line); // Constructing stringstream from line
|
std::stringstream ss(line); // Constructing stringstream from line
|
||||||
std::string token; // To store each token in line (seprated by ',')
|
std::string token; // To store each token in line (seprated by ',')
|
||||||
while(std::getline(ss, token, ',')) { // For each token
|
while (std::getline(ss, token, ',')) { // For each token
|
||||||
// Insert numerical value of token in x_data
|
// Insert numerical value of token in x_data
|
||||||
x_data = insert_element(x_data, std::stod(token));
|
x_data = insert_element(x_data, std::stod(token));
|
||||||
}
|
}
|
||||||
// If label is in last column
|
// If label is in last column
|
||||||
if(last_label) {
|
if (last_label) {
|
||||||
y_data.resize(this -> layers.back().neurons);
|
y_data.resize(this->layers.back().neurons);
|
||||||
// If task is classification
|
// If task is classification
|
||||||
if(y_data.size() > 1) {
|
if (y_data.size() > 1) {
|
||||||
y_data[x_data[x_data.size() - 1]] = 1;
|
y_data[x_data[x_data.size() - 1]] = 1;
|
||||||
}
|
}
|
||||||
// If task is regrssion (of single value)
|
// If task is regrssion (of single value)
|
||||||
@ -418,11 +418,10 @@ namespace machine_learning {
|
|||||||
y_data[0] = x_data[x_data.size() - 1];
|
y_data[0] = x_data[x_data.size() - 1];
|
||||||
}
|
}
|
||||||
x_data = pop_back(x_data); // Remove label from x_data
|
x_data = pop_back(x_data); // Remove label from x_data
|
||||||
}
|
} else {
|
||||||
else {
|
y_data.resize(this->layers.back().neurons);
|
||||||
y_data.resize(this -> layers.back().neurons);
|
|
||||||
// If task is classification
|
// If task is classification
|
||||||
if(y_data.size() > 1) {
|
if (y_data.size() > 1) {
|
||||||
y_data[x_data[x_data.size() - 1]] = 1;
|
y_data[x_data[x_data.size() - 1]] = 1;
|
||||||
}
|
}
|
||||||
// If task is regrssion (of single value)
|
// If task is regrssion (of single value)
|
||||||
@ -435,12 +434,12 @@ namespace machine_learning {
|
|||||||
X.push_back({x_data});
|
X.push_back({x_data});
|
||||||
Y.push_back({y_data});
|
Y.push_back({y_data});
|
||||||
}
|
}
|
||||||
in_file.close();
|
|
||||||
// Normalize training data if flag is set
|
// Normalize training data if flag is set
|
||||||
if(normalize) {
|
if (normalize) {
|
||||||
// Scale data between 0 and 1 using min-max scaler
|
// Scale data between 0 and 1 using min-max scaler
|
||||||
X = minmax_scaler(X, 0.01, 1.0);
|
X = minmax_scaler(X, 0.01, 1.0);
|
||||||
}
|
}
|
||||||
|
in_file.close(); // Closing file
|
||||||
return make_pair(X, Y); // Return pair of X and Y
|
return make_pair(X, Y); // Return pair of X and Y
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,10 +448,10 @@ namespace machine_learning {
|
|||||||
* @param X array of feature vectors
|
* @param X array of feature vectors
|
||||||
* @return returns predictions as vector
|
* @return returns predictions as vector
|
||||||
*/
|
*/
|
||||||
std::vector<std::valarray <double>>
|
std::vector<std::valarray<double>> single_predict(
|
||||||
single_predict (const std::vector<std::valarray <double>> &X) {
|
const std::vector<std::valarray<double>> &X) {
|
||||||
// Get activations of all layers
|
// Get activations of all layers
|
||||||
auto activations = this -> __detailed_single_prediction(X);
|
auto activations = this->__detailed_single_prediction(X);
|
||||||
// Return activations of last layer (actual predicted values)
|
// Return activations of last layer (actual predicted values)
|
||||||
return activations.back();
|
return activations.back();
|
||||||
}
|
}
|
||||||
@ -462,13 +461,14 @@ namespace machine_learning {
|
|||||||
* @param X array of feature vectors
|
* @param X array of feature vectors
|
||||||
* @return returns predicted values as vector
|
* @return returns predicted values as vector
|
||||||
*/
|
*/
|
||||||
std::vector < std::vector <std::valarray<double>>>
|
std::vector<std::vector<std::valarray<double>>> batch_predict(
|
||||||
batch_predict (const std::vector <std::vector <std::valarray <double>>> &X) {
|
const std::vector<std::vector<std::valarray<double>>> &X) {
|
||||||
// Store predicted values
|
// Store predicted values
|
||||||
std::vector < std::vector <std::valarray<double>>> predicted_batch(X.size());
|
std::vector<std::vector<std::valarray<double>>> predicted_batch(
|
||||||
for(size_t i = 0; i < X.size(); i++) { // For every sample
|
X.size());
|
||||||
|
for (size_t i = 0; i < X.size(); i++) { // For every sample
|
||||||
// Push predicted values
|
// Push predicted values
|
||||||
predicted_batch[i] = this -> single_predict(X[i]);
|
predicted_batch[i] = this->single_predict(X[i]);
|
||||||
}
|
}
|
||||||
return predicted_batch; // Return predicted values
|
return predicted_batch; // Return predicted values
|
||||||
}
|
}
|
||||||
@ -482,71 +482,83 @@ namespace machine_learning {
|
|||||||
* @param batch_size batch size for gradient descent (default = 32)
|
* @param batch_size batch size for gradient descent (default = 32)
|
||||||
* @param shuffle flag for whether to shuffle data (default = true)
|
* @param shuffle flag for whether to shuffle data (default = true)
|
||||||
*/
|
*/
|
||||||
void fit(const std::vector < std::vector <std::valarray<double>>> &X_,
|
void fit(const std::vector<std::vector<std::valarray<double>>> &X_,
|
||||||
const std::vector < std::vector <std::valarray<double>>> &Y_,
|
const std::vector<std::vector<std::valarray<double>>> &Y_,
|
||||||
const int &epochs = 100,
|
const int &epochs = 100, const double &learning_rate = 0.01,
|
||||||
const double &learning_rate = 0.01,
|
const size_t &batch_size = 32, const bool &shuffle = true) {
|
||||||
const size_t &batch_size = 32,
|
std::vector<std::vector<std::valarray<double>>> X = X_, Y = Y_;
|
||||||
const bool &shuffle = true) {
|
|
||||||
std::vector < std::vector <std::valarray<double>>> X = X_, Y = Y_;
|
|
||||||
// Both label and input data should have same size
|
// Both label and input data should have same size
|
||||||
if (X.size() != Y.size()) {
|
if (X.size() != Y.size()) {
|
||||||
std::cerr << "ERROR : X and Y in fit have different sizes" << std::endl;
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "X and Y in fit have different sizes" << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
std::cout << "INFO: Training Started" << std::endl;
|
std::cout << "INFO: Training Started" << std::endl;
|
||||||
for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch
|
for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch
|
||||||
// Shuffle X and Y if flag is set
|
// Shuffle X and Y if flag is set
|
||||||
if(shuffle) {
|
if (shuffle) {
|
||||||
equal_shuffle(X, Y);
|
equal_shuffle(X, Y);
|
||||||
}
|
}
|
||||||
auto start = std::chrono::high_resolution_clock::now(); // Start clock
|
auto start =
|
||||||
double loss = 0, acc = 0; // Intialize performance metrics with zero
|
std::chrono::high_resolution_clock::now(); // Start clock
|
||||||
|
double loss = 0,
|
||||||
|
acc = 0; // Intialize performance metrics with zero
|
||||||
// For each starting index of batch
|
// For each starting index of batch
|
||||||
for(size_t batch_start = 0; batch_start < X.size(); batch_start += batch_size) {
|
for (size_t batch_start = 0; batch_start < X.size();
|
||||||
for(size_t i = batch_start; i < std::min(X.size(), batch_start + batch_size); i++) {
|
batch_start += batch_size) {
|
||||||
std::vector <std::valarray<double>> grad, cur_error, predicted;
|
for (size_t i = batch_start;
|
||||||
auto activations = this -> __detailed_single_prediction(X[i]);
|
i < std::min(X.size(), batch_start + batch_size); i++) {
|
||||||
|
std::vector<std::valarray<double>> grad, cur_error,
|
||||||
|
predicted;
|
||||||
|
auto activations = this->__detailed_single_prediction(X[i]);
|
||||||
// Gradients vector to store gradients for all layers
|
// Gradients vector to store gradients for all layers
|
||||||
// They will be averaged and applied to kernal
|
// They will be averaged and applied to kernal
|
||||||
std::vector<std::vector<std::valarray<double>>> gradients;
|
std::vector<std::vector<std::valarray<double>>> gradients;
|
||||||
gradients.resize(this -> layers.size());
|
gradients.resize(this->layers.size());
|
||||||
// First intialize gradients to zero
|
// First intialize gradients to zero
|
||||||
for(size_t i = 0; i < gradients.size(); i++) {
|
for (size_t i = 0; i < gradients.size(); i++) {
|
||||||
zeroes_initialization(gradients[i], get_shape(this -> layers[i].kernal));
|
zeroes_initialization(
|
||||||
|
gradients[i], get_shape(this->layers[i].kernal));
|
||||||
}
|
}
|
||||||
predicted = activations.back(); // Predicted vector
|
predicted = activations.back(); // Predicted vector
|
||||||
cur_error = predicted - Y[i]; // Absoulute error
|
cur_error = predicted - Y[i]; // Absoulute error
|
||||||
// Calculating loss with MSE
|
// Calculating loss with MSE
|
||||||
loss += sum(apply_function(cur_error, neural_network::util_functions::square));
|
loss += sum(apply_function(
|
||||||
|
cur_error, neural_network::util_functions::square));
|
||||||
// If prediction is correct
|
// If prediction is correct
|
||||||
if(argmax(predicted) == argmax(Y[i])) {
|
if (argmax(predicted) == argmax(Y[i])) {
|
||||||
acc += 1;
|
acc += 1;
|
||||||
}
|
}
|
||||||
// For every layer (except first) starting from last one
|
// For every layer (except first) starting from last one
|
||||||
for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
|
for (size_t j = this->layers.size() - 1; j >= 1; j--) {
|
||||||
// Backpropogating errors
|
// Backpropogating errors
|
||||||
cur_error = hadamard_product(cur_error,
|
cur_error = hadamard_product(
|
||||||
apply_function(activations[j + 1],
|
cur_error,
|
||||||
this -> layers[j].dactivation_function));
|
apply_function(
|
||||||
|
activations[j + 1],
|
||||||
|
this->layers[j].dactivation_function));
|
||||||
// Calculating gradient for current layer
|
// Calculating gradient for current layer
|
||||||
grad = multiply(transpose(activations[j]), cur_error);
|
grad = multiply(transpose(activations[j]), cur_error);
|
||||||
// Change error according to current kernal values
|
// Change error according to current kernal values
|
||||||
cur_error = multiply(cur_error, transpose(this -> layers[j].kernal));
|
cur_error = multiply(cur_error,
|
||||||
|
transpose(this->layers[j].kernal));
|
||||||
// Adding gradient values to collection of gradients
|
// Adding gradient values to collection of gradients
|
||||||
gradients[j] = gradients[j] + grad / double(batch_size);
|
gradients[j] = gradients[j] + grad / double(batch_size);
|
||||||
}
|
}
|
||||||
// Applying gradients
|
// Applying gradients
|
||||||
for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
|
for (size_t j = this->layers.size() - 1; j >= 1; j--) {
|
||||||
// Updating kernal (aka weights)
|
// Updating kernal (aka weights)
|
||||||
this -> layers[j].kernal = this -> layers[j].kernal -
|
this->layers[j].kernal = this->layers[j].kernal -
|
||||||
gradients[j] * learning_rate;
|
gradients[j] * learning_rate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto stop = std::chrono::high_resolution_clock::now(); // Stoping the clock
|
auto stop =
|
||||||
|
std::chrono::high_resolution_clock::now(); // Stoping the clock
|
||||||
// Calculate time taken by epoch
|
// Calculate time taken by epoch
|
||||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
|
auto duration =
|
||||||
|
std::chrono::duration_cast<std::chrono::microseconds>(stop -
|
||||||
|
start);
|
||||||
loss /= X.size(); // Averaging loss
|
loss /= X.size(); // Averaging loss
|
||||||
acc /= X.size(); // Averaging accuracy
|
acc /= X.size(); // Averaging accuracy
|
||||||
std::cout.precision(4); // set output precision to 4
|
std::cout.precision(4); // set output precision to 4
|
||||||
@ -554,7 +566,8 @@ namespace machine_learning {
|
|||||||
std::cout << "Training: Epoch " << epoch << '/' << epochs;
|
std::cout << "Training: Epoch " << epoch << '/' << epochs;
|
||||||
std::cout << ", Loss: " << loss;
|
std::cout << ", Loss: " << loss;
|
||||||
std::cout << ", Accuracy: " << acc;
|
std::cout << ", Accuracy: " << acc;
|
||||||
std::cout << ", Taken time: " << duration.count() / 1e6 << " seconds";
|
std::cout << ", Taken time: " << duration.count() / 1e6
|
||||||
|
<< " seconds";
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@ -571,18 +584,17 @@ namespace machine_learning {
|
|||||||
* @param batch_size batch size for gradient descent (default = 32)
|
* @param batch_size batch size for gradient descent (default = 32)
|
||||||
* @param shuffle flag for whether to shuffle data (default = true)
|
* @param shuffle flag for whether to shuffle data (default = true)
|
||||||
*/
|
*/
|
||||||
void fit_from_csv (const std::string &file_name,
|
void fit_from_csv(const std::string &file_name, const bool &last_label,
|
||||||
const bool &last_label,
|
const int &epochs, const double &learning_rate,
|
||||||
const int &epochs,
|
const bool &normalize, const int &slip_lines = 1,
|
||||||
const double &learning_rate,
|
|
||||||
const bool &normalize,
|
|
||||||
const int &slip_lines = 1,
|
|
||||||
const size_t &batch_size = 32,
|
const size_t &batch_size = 32,
|
||||||
const bool &shuffle = true) {
|
const bool &shuffle = true) {
|
||||||
// Getting training data from csv file
|
// Getting training data from csv file
|
||||||
auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
|
auto data =
|
||||||
|
this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
|
||||||
// Fit the model on training data
|
// Fit the model on training data
|
||||||
this -> fit(data.first, data.second, epochs, learning_rate, batch_size, shuffle);
|
this->fit(data.first, data.second, epochs, learning_rate, batch_size,
|
||||||
|
shuffle);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -591,20 +603,22 @@ namespace machine_learning {
|
|||||||
* @param X array of feature vectors (input data)
|
* @param X array of feature vectors (input data)
|
||||||
* @param Y array of target values (label)
|
* @param Y array of target values (label)
|
||||||
*/
|
*/
|
||||||
void evaluate(const std::vector< std::vector <std::valarray <double>>> &X,
|
void evaluate(const std::vector<std::vector<std::valarray<double>>> &X,
|
||||||
const std::vector< std::vector <std::valarray <double>>> &Y) {
|
const std::vector<std::vector<std::valarray<double>>> &Y) {
|
||||||
std::cout << "INFO: Evaluation Started" << std::endl;
|
std::cout << "INFO: Evaluation Started" << std::endl;
|
||||||
double acc = 0, loss = 0; // intialize performance metrics with zero
|
double acc = 0, loss = 0; // intialize performance metrics with zero
|
||||||
for(size_t i = 0; i < X.size(); i++) { // For every sample in input
|
for (size_t i = 0; i < X.size(); i++) { // For every sample in input
|
||||||
// Get predictions
|
// Get predictions
|
||||||
std::vector<std::valarray<double>> pred = this -> single_predict(X[i]);
|
std::vector<std::valarray<double>> pred =
|
||||||
|
this->single_predict(X[i]);
|
||||||
// If predicted class is correct
|
// If predicted class is correct
|
||||||
if(argmax(pred) == argmax(Y[i])) {
|
if (argmax(pred) == argmax(Y[i])) {
|
||||||
acc += 1; // Increment accuracy
|
acc += 1; // Increment accuracy
|
||||||
}
|
}
|
||||||
// Calculating loss - Mean Squared Error
|
// Calculating loss - Mean Squared Error
|
||||||
loss += sum(apply_function((Y[i] - pred),
|
loss += sum(apply_function((Y[i] - pred),
|
||||||
neural_network::util_functions::square) * 0.5);
|
neural_network::util_functions::square) *
|
||||||
|
0.5);
|
||||||
}
|
}
|
||||||
acc /= X.size(); // Averaging accuracy
|
acc /= X.size(); // Averaging accuracy
|
||||||
loss /= X.size(); // Averaging loss
|
loss /= X.size(); // Averaging loss
|
||||||
@ -621,14 +635,13 @@ namespace machine_learning {
|
|||||||
* @param normalize flag for whether to normalize data
|
* @param normalize flag for whether to normalize data
|
||||||
* @param slip_lines number of lines to skip
|
* @param slip_lines number of lines to skip
|
||||||
*/
|
*/
|
||||||
void evaluate_from_csv (const std::string &file_name,
|
void evaluate_from_csv(const std::string &file_name, const bool &last_label,
|
||||||
const bool &last_label,
|
const bool &normalize, const int &slip_lines = 1) {
|
||||||
const bool &normalize,
|
|
||||||
const int &slip_lines = 1) {
|
|
||||||
// Getting training data from csv file
|
// Getting training data from csv file
|
||||||
auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
|
auto data =
|
||||||
|
this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
|
||||||
// Evaluating model
|
// Evaluating model
|
||||||
this -> evaluate(data.first, data.second);
|
this->evaluate(data.first, data.second);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -636,28 +649,35 @@ namespace machine_learning {
|
|||||||
* Function to save current model.
|
* Function to save current model.
|
||||||
* @param file_name file name to save model (*.model)
|
* @param file_name file name to save model (*.model)
|
||||||
*/
|
*/
|
||||||
void save_model (const std::string &_file_name) {
|
void save_model(const std::string &_file_name) {
|
||||||
std::string file_name = _file_name;
|
std::string file_name = _file_name;
|
||||||
// Adding ".model" extension if it is not already there in name
|
// Adding ".model" extension if it is not already there in name
|
||||||
if(file_name.find(".model") == file_name.npos) {
|
if (file_name.find(".model") == file_name.npos) {
|
||||||
file_name += ".model";
|
file_name += ".model";
|
||||||
}
|
}
|
||||||
std::ofstream out_file; // Ofstream to write in file
|
std::ofstream out_file; // Ofstream to write in file
|
||||||
// Open file in out|trunc mode
|
// Open file in out|trunc mode
|
||||||
out_file.open(file_name.c_str(), std::ofstream::out | std::ofstream::trunc);
|
out_file.open(file_name.c_str(),
|
||||||
|
std::ofstream::out | std::ofstream::trunc);
|
||||||
|
// If there is any problem in opening file
|
||||||
|
if (!out_file.is_open()) {
|
||||||
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Unable to open file: " << file_name << std::endl;
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
Format in which model is saved:
|
Format in which model is saved:
|
||||||
|
|
||||||
total_layers
|
total_layers
|
||||||
neurons(1st neural_network::layers::DenseLayer) activation_name(1st neural_network::layers::DenseLayer)
|
neurons(1st neural_network::layers::DenseLayer) activation_name(1st
|
||||||
kernal_shape(1st neural_network::layers::DenseLayer)
|
neural_network::layers::DenseLayer) kernal_shape(1st
|
||||||
kernal_values
|
neural_network::layers::DenseLayer) kernal_values
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth neural_network::layers::DenseLayer)
|
neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth
|
||||||
kernal_shape(Nth neural_network::layers::DenseLayer)
|
neural_network::layers::DenseLayer) kernal_shape(Nth
|
||||||
kernal_value
|
neural_network::layers::DenseLayer) kernal_value
|
||||||
|
|
||||||
For Example, pretrained model with 3 layers:
|
For Example, pretrained model with 3 layers:
|
||||||
<pre>
|
<pre>
|
||||||
@ -687,12 +707,12 @@ namespace machine_learning {
|
|||||||
// Saving model in the same format
|
// Saving model in the same format
|
||||||
out_file << layers.size();
|
out_file << layers.size();
|
||||||
out_file << std::endl;
|
out_file << std::endl;
|
||||||
for(const auto &layer : this -> layers) {
|
for (const auto &layer : this->layers) {
|
||||||
out_file << layer.neurons << ' ' << layer.activation << std::endl;
|
out_file << layer.neurons << ' ' << layer.activation << std::endl;
|
||||||
const auto shape = get_shape(layer.kernal);
|
const auto shape = get_shape(layer.kernal);
|
||||||
out_file << shape.first << ' ' << shape.second << std::endl;
|
out_file << shape.first << ' ' << shape.second << std::endl;
|
||||||
for(const auto &row : layer.kernal) {
|
for (const auto &row : layer.kernal) {
|
||||||
for(const auto &val : row) {
|
for (const auto &val : row) {
|
||||||
out_file << val << ' ';
|
out_file << val << ' ';
|
||||||
}
|
}
|
||||||
out_file << std::endl;
|
out_file << std::endl;
|
||||||
@ -700,6 +720,7 @@ namespace machine_learning {
|
|||||||
}
|
}
|
||||||
std::cout << "INFO: Model saved successfully with name : ";
|
std::cout << "INFO: Model saved successfully with name : ";
|
||||||
std::cout << file_name << std::endl;
|
std::cout << file_name << std::endl;
|
||||||
|
out_file.close(); // Closing file
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -708,55 +729,73 @@ namespace machine_learning {
|
|||||||
* @param file_name file from which model will be loaded (*.model)
|
* @param file_name file from which model will be loaded (*.model)
|
||||||
* @return instance of NeuralNetwork class with pretrained weights
|
* @return instance of NeuralNetwork class with pretrained weights
|
||||||
*/
|
*/
|
||||||
NeuralNetwork load_model (const std::string &file_name) {
|
NeuralNetwork load_model(const std::string &file_name) {
|
||||||
std::ifstream in_file; // Ifstream to read file
|
std::ifstream in_file; // Ifstream to read file
|
||||||
in_file.open(file_name.c_str()); // Openinig file
|
in_file.open(file_name.c_str()); // Openinig file
|
||||||
std::vector <std::pair<int, std::string>> config; // To store config
|
// If there is any problem in opening file
|
||||||
std::vector <std::vector<std::valarray<double>>> kernals; // To store pretrained kernals
|
if (!in_file.is_open()) {
|
||||||
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Unable to open file: " << file_name << std::endl;
|
||||||
|
std::exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
std::vector<std::pair<int, std::string>> config; // To store config
|
||||||
|
std::vector<std::vector<std::valarray<double>>>
|
||||||
|
kernals; // To store pretrained kernals
|
||||||
// Loading model from saved file format
|
// Loading model from saved file format
|
||||||
size_t total_layers = 0;
|
size_t total_layers = 0;
|
||||||
in_file >> total_layers;
|
in_file >> total_layers;
|
||||||
for(size_t i = 0; i < total_layers; i++) {
|
for (size_t i = 0; i < total_layers; i++) {
|
||||||
int neurons = 0;
|
int neurons = 0;
|
||||||
std::string activation;
|
std::string activation;
|
||||||
size_t shape_a = 0, shape_b = 0;
|
size_t shape_a = 0, shape_b = 0;
|
||||||
std::vector<std::valarray<double>> kernal;
|
std::vector<std::valarray<double>> kernal;
|
||||||
in_file >> neurons >> activation >> shape_a >> shape_b;
|
in_file >> neurons >> activation >> shape_a >> shape_b;
|
||||||
for(size_t r = 0; r < shape_a; r++) {
|
for (size_t r = 0; r < shape_a; r++) {
|
||||||
std::valarray<double> row(shape_b);
|
std::valarray<double> row(shape_b);
|
||||||
for(size_t c = 0; c < shape_b; c++) {
|
for (size_t c = 0; c < shape_b; c++) {
|
||||||
in_file >> row[c];
|
in_file >> row[c];
|
||||||
}
|
}
|
||||||
kernal.push_back(row);
|
kernal.push_back(row);
|
||||||
}
|
}
|
||||||
config.emplace_back(make_pair(neurons, activation));;
|
config.emplace_back(make_pair(neurons, activation));
|
||||||
|
;
|
||||||
kernals.emplace_back(kernal);
|
kernals.emplace_back(kernal);
|
||||||
}
|
}
|
||||||
std::cout << "INFO: Model loaded successfully" << std::endl;
|
std::cout << "INFO: Model loaded successfully" << std::endl;
|
||||||
return NeuralNetwork(config, kernals); // Return instance of NeuralNetwork class
|
in_file.close(); // Closing file
|
||||||
|
return NeuralNetwork(
|
||||||
|
config, kernals); // Return instance of NeuralNetwork class
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to print summary of the network.
|
* Function to print summary of the network.
|
||||||
*/
|
*/
|
||||||
void summary () {
|
void summary() {
|
||||||
// Printing Summary
|
// Printing Summary
|
||||||
std::cout << "===============================================================" << std::endl;
|
std::cout
|
||||||
|
<< "==============================================================="
|
||||||
|
<< std::endl;
|
||||||
std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
|
std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
|
||||||
std::cout << "===============================================================" << std::endl;
|
std::cout
|
||||||
for(size_t i = 1; i <= layers.size(); i++) { // For every layer
|
<< "==============================================================="
|
||||||
|
<< std::endl;
|
||||||
|
for (size_t i = 1; i <= layers.size(); i++) { // For every layer
|
||||||
std::cout << i << ")";
|
std::cout << i << ")";
|
||||||
std::cout << " Neurons : " << layers[i - 1].neurons; // number of neurons
|
std::cout << " Neurons : "
|
||||||
std::cout << ", Activation : " << layers[i - 1].activation; // activation
|
<< layers[i - 1].neurons; // number of neurons
|
||||||
std::cout << ", Kernal Shape : " << get_shape(layers[i - 1].kernal); // kernal shape
|
std::cout << ", Activation : "
|
||||||
|
<< layers[i - 1].activation; // activation
|
||||||
|
std::cout << ", Kernal Shape : "
|
||||||
|
<< get_shape(layers[i - 1].kernal); // kernal shape
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
std::cout << "===============================================================" << std::endl;
|
std::cout
|
||||||
|
<< "==============================================================="
|
||||||
|
<< std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
};
|
} // namespace neural_network
|
||||||
} // namespace neural_network
|
|
||||||
} // namespace machine_learning
|
} // namespace machine_learning
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -768,17 +807,22 @@ static void test() {
|
|||||||
machine_learning::neural_network::NeuralNetwork myNN =
|
machine_learning::neural_network::NeuralNetwork myNN =
|
||||||
machine_learning::neural_network::NeuralNetwork({
|
machine_learning::neural_network::NeuralNetwork({
|
||||||
{4, "none"}, // First layer with 3 neurons and "none" as activation
|
{4, "none"}, // First layer with 3 neurons and "none" as activation
|
||||||
{6, "relu"}, // Second layer with 6 neurons and "relu" as activation
|
{6,
|
||||||
{3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as activation
|
"relu"}, // Second layer with 6 neurons and "relu" as activation
|
||||||
|
{3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as
|
||||||
|
// activation
|
||||||
});
|
});
|
||||||
// Printing summary of model
|
// Printing summary of model
|
||||||
myNN.summary();
|
myNN.summary();
|
||||||
// Training Model
|
// Training Model
|
||||||
myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
|
myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
|
||||||
// Testing predictions of model
|
// Testing predictions of model
|
||||||
assert(machine_learning::argmax(myNN.single_predict({{5,3.4,1.6,0.4}})) == 0);
|
assert(machine_learning::argmax(
|
||||||
assert(machine_learning::argmax(myNN.single_predict({{6.4,2.9,4.3,1.3}})) == 1);
|
myNN.single_predict({{5, 3.4, 1.6, 0.4}})) == 0);
|
||||||
assert(machine_learning::argmax(myNN.single_predict({{6.2,3.4,5.4,2.3}})) == 2);
|
assert(machine_learning::argmax(
|
||||||
|
myNN.single_predict({{6.4, 2.9, 4.3, 1.3}})) == 1);
|
||||||
|
assert(machine_learning::argmax(
|
||||||
|
myNN.single_predict({{6.2, 3.4, 5.4, 2.3}})) == 2);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,19 +2,20 @@
|
|||||||
* @file vector_ops.hpp
|
* @file vector_ops.hpp
|
||||||
* @author [Deep Raval](https://github.com/imdeep2905)
|
* @author [Deep Raval](https://github.com/imdeep2905)
|
||||||
*
|
*
|
||||||
* @brief Various functions for vectors associated with [NeuralNetwork (aka Multilayer Perceptron)]
|
* @brief Various functions for vectors associated with [NeuralNetwork (aka
|
||||||
|
* Multilayer Perceptron)]
|
||||||
* (https://en.wikipedia.org/wiki/Multilayer_perceptron).
|
* (https://en.wikipedia.org/wiki/Multilayer_perceptron).
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#ifndef VECTOR_OPS_FOR_NN
|
#ifndef VECTOR_OPS_FOR_NN
|
||||||
#define VECTOR_OPS_FOR_NN
|
#define VECTOR_OPS_FOR_NN
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
|
||||||
#include <valarray>
|
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <iostream>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
#include <valarray>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @namespace machine_learning
|
* @namespace machine_learning
|
||||||
@ -32,11 +33,11 @@ std::ostream &operator<<(std::ostream &out,
|
|||||||
std::vector<std::valarray<T>> const &A) {
|
std::vector<std::valarray<T>> const &A) {
|
||||||
// Setting output precision to 4 in case of floating point numbers
|
// Setting output precision to 4 in case of floating point numbers
|
||||||
out.precision(4);
|
out.precision(4);
|
||||||
for(const auto &a : A) { // For each row in A
|
for (const auto &a : A) { // For each row in A
|
||||||
for(const auto &x : a) { // For each element in row
|
for (const auto &x : a) { // For each element in row
|
||||||
std::cerr << x << ' '; // print element
|
std::cout << x << ' '; // print element
|
||||||
}
|
}
|
||||||
std::cerr << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@ -52,7 +53,7 @@ std::ostream &operator<<(std::ostream &out, const std::pair<T, T> &A) {
|
|||||||
// Setting output precision to 4 in case of floating point numbers
|
// Setting output precision to 4 in case of floating point numbers
|
||||||
out.precision(4);
|
out.precision(4);
|
||||||
// printing pair in the form (p, q)
|
// printing pair in the form (p, q)
|
||||||
std::cerr << "(" << A.first << ", " << A.second << ")";
|
std::cout << "(" << A.first << ", " << A.second << ")";
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -66,10 +67,10 @@ template <typename T>
|
|||||||
std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
|
std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
|
||||||
// Setting output precision to 4 in case of floating point numbers
|
// Setting output precision to 4 in case of floating point numbers
|
||||||
out.precision(4);
|
out.precision(4);
|
||||||
for(const auto &a : A) { // For every element in the vector.
|
for (const auto &a : A) { // For every element in the vector.
|
||||||
std::cerr << a << ' '; // Print element
|
std::cout << a << ' '; // Print element
|
||||||
}
|
}
|
||||||
std::cerr << std::endl;
|
std::cout << std::endl;
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,10 +82,10 @@ std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
|
std::valarray<T> insert_element(const std::valarray<T> &A, const T &ele) {
|
||||||
std::valarray <T> B; // New 1D vector to store resultant vector
|
std::valarray<T> B; // New 1D vector to store resultant vector
|
||||||
B.resize(A.size() + 1); // Resizing it accordingly
|
B.resize(A.size() + 1); // Resizing it accordingly
|
||||||
for(size_t i = 0; i < A.size(); i++) { // For every element in A
|
for (size_t i = 0; i < A.size(); i++) { // For every element in A
|
||||||
B[i] = A[i]; // Copy element in B
|
B[i] = A[i]; // Copy element in B
|
||||||
}
|
}
|
||||||
B[B.size() - 1] = ele; // Inserting new element in last position
|
B[B.size() - 1] = ele; // Inserting new element in last position
|
||||||
@ -98,10 +99,11 @@ std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::valarray <T> pop_front(const std::valarray<T> &A) {
|
std::valarray<T> pop_front(const std::valarray<T> &A) {
|
||||||
std::valarray <T> B; // New 1D vector to store resultant vector
|
std::valarray<T> B; // New 1D vector to store resultant vector
|
||||||
B.resize(A.size() - 1); // Resizing it accordingly
|
B.resize(A.size() - 1); // Resizing it accordingly
|
||||||
for(size_t i = 1; i < A.size(); i ++) { // // For every (except first) element in A
|
for (size_t i = 1; i < A.size();
|
||||||
|
i++) { // // For every (except first) element in A
|
||||||
B[i - 1] = A[i]; // Copy element in B with left shifted position
|
B[i - 1] = A[i]; // Copy element in B with left shifted position
|
||||||
}
|
}
|
||||||
return B; // Return resultant vector
|
return B; // Return resultant vector
|
||||||
@ -114,10 +116,11 @@ std::valarray <T> pop_front(const std::valarray<T> &A) {
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::valarray <T> pop_back(const std::valarray<T> &A) {
|
std::valarray<T> pop_back(const std::valarray<T> &A) {
|
||||||
std::valarray <T> B; // New 1D vector to store resultant vector
|
std::valarray<T> B; // New 1D vector to store resultant vector
|
||||||
B.resize(A.size() - 1); // Resizing it accordingly
|
B.resize(A.size() - 1); // Resizing it accordingly
|
||||||
for(size_t i = 0; i < A.size() - 1; i ++) { // For every (except last) element in A
|
for (size_t i = 0; i < A.size() - 1;
|
||||||
|
i++) { // For every (except last) element in A
|
||||||
B[i] = A[i]; // Copy element in B
|
B[i] = A[i]; // Copy element in B
|
||||||
}
|
}
|
||||||
return B; // Return resultant vector
|
return B; // Return resultant vector
|
||||||
@ -130,16 +133,17 @@ std::valarray <T> pop_back(const std::valarray<T> &A) {
|
|||||||
* @param B Second 3D vector
|
* @param B Second 3D vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void equal_shuffle(std::vector < std::vector <std::valarray<T>> > &A,
|
void equal_shuffle(std::vector<std::vector<std::valarray<T>>> &A,
|
||||||
std::vector < std::vector <std::valarray<T>> > &B) {
|
std::vector<std::vector<std::valarray<T>>> &B) {
|
||||||
// If two vectors have different sizes
|
// If two vectors have different sizes
|
||||||
if(A.size() != B.size())
|
if (A.size() != B.size()) {
|
||||||
{
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
std::cerr << "ERROR : Can not equally shuffle two vectors with different sizes: ";
|
std::cerr
|
||||||
|
<< "Can not equally shuffle two vectors with different sizes: ";
|
||||||
std::cerr << A.size() << " and " << B.size() << std::endl;
|
std::cerr << A.size() << " and " << B.size() << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
for(size_t i = 0; i < A.size(); i++) { // For every element in A and B
|
for (size_t i = 0; i < A.size(); i++) { // For every element in A and B
|
||||||
// Genrating random index < size of A and B
|
// Genrating random index < size of A and B
|
||||||
std::srand(std::chrono::system_clock::now().time_since_epoch().count());
|
std::srand(std::chrono::system_clock::now().time_since_epoch().count());
|
||||||
size_t random_index = std::rand() % A.size();
|
size_t random_index = std::rand() % A.size();
|
||||||
@ -161,16 +165,17 @@ void equal_shuffle(std::vector < std::vector <std::valarray<T>> > &A,
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void uniform_random_initialization(std::vector<std::valarray<T>> &A,
|
void uniform_random_initialization(std::vector<std::valarray<T>> &A,
|
||||||
const std::pair<size_t, size_t> &shape,
|
const std::pair<size_t, size_t> &shape,
|
||||||
const T &low,
|
const T &low, const T &high) {
|
||||||
const T &high) {
|
|
||||||
A.clear(); // Making A empty
|
A.clear(); // Making A empty
|
||||||
// Uniform distribution in range [low, high]
|
// Uniform distribution in range [low, high]
|
||||||
std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
|
std::default_random_engine generator(
|
||||||
std::uniform_real_distribution <T> distribution(low, high);
|
std::chrono::system_clock::now().time_since_epoch().count());
|
||||||
for(size_t i = 0; i < shape.first; i++) { // For every row
|
std::uniform_real_distribution<T> distribution(low, high);
|
||||||
std::valarray <T> row; // Making empty row which will be inserted in vector
|
for (size_t i = 0; i < shape.first; i++) { // For every row
|
||||||
|
std::valarray<T>
|
||||||
|
row; // Making empty row which will be inserted in vector
|
||||||
row.resize(shape.second);
|
row.resize(shape.second);
|
||||||
for(auto &r : row) { // For every element in row
|
for (auto &r : row) { // For every element in row
|
||||||
r = distribution(generator); // copy random number
|
r = distribution(generator); // copy random number
|
||||||
}
|
}
|
||||||
A.push_back(row); // Insert new row in vector
|
A.push_back(row); // Insert new row in vector
|
||||||
@ -178,7 +183,6 @@ void uniform_random_initialization(std::vector<std::valarray<T>> &A,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to Intialize 2D vector as unit matrix
|
* Function to Intialize 2D vector as unit matrix
|
||||||
* @tparam T typename of the vector
|
* @tparam T typename of the vector
|
||||||
@ -187,11 +191,11 @@ void uniform_random_initialization(std::vector<std::valarray<T>> &A,
|
|||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
|
void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
|
||||||
const std::pair<size_t, size_t> &shape
|
const std::pair<size_t, size_t> &shape) {
|
||||||
) {
|
|
||||||
A.clear(); // Making A empty
|
A.clear(); // Making A empty
|
||||||
for(size_t i = 0; i < shape.first; i++) {
|
for (size_t i = 0; i < shape.first; i++) {
|
||||||
std::valarray <T> row; // Making empty row which will be inserted in vector
|
std::valarray<T>
|
||||||
|
row; // Making empty row which will be inserted in vector
|
||||||
row.resize(shape.second);
|
row.resize(shape.second);
|
||||||
row[i] = T(1); // Insert 1 at ith position
|
row[i] = T(1); // Insert 1 at ith position
|
||||||
A.push_back(row); // Insert new row in vector
|
A.push_back(row); // Insert new row in vector
|
||||||
@ -207,11 +211,11 @@ void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
|
|||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void zeroes_initialization(std::vector<std::valarray<T>> &A,
|
void zeroes_initialization(std::vector<std::valarray<T>> &A,
|
||||||
const std::pair<size_t, size_t> &shape
|
const std::pair<size_t, size_t> &shape) {
|
||||||
) {
|
|
||||||
A.clear(); // Making A empty
|
A.clear(); // Making A empty
|
||||||
for(size_t i = 0; i < shape.first; i++) {
|
for (size_t i = 0; i < shape.first; i++) {
|
||||||
std::valarray <T> row; // Making empty row which will be inserted in vector
|
std::valarray<T>
|
||||||
|
row; // Making empty row which will be inserted in vector
|
||||||
row.resize(shape.second); // By default all elements are zero
|
row.resize(shape.second); // By default all elements are zero
|
||||||
A.push_back(row); // Insert new row in vector
|
A.push_back(row); // Insert new row in vector
|
||||||
}
|
}
|
||||||
@ -227,7 +231,7 @@ void zeroes_initialization(std::vector<std::valarray<T>> &A,
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
T sum(const std::vector<std::valarray<T>> &A) {
|
T sum(const std::vector<std::valarray<T>> &A) {
|
||||||
T cur_sum = 0; // Initially sum is zero
|
T cur_sum = 0; // Initially sum is zero
|
||||||
for(const auto &a : A) { // For every row in A
|
for (const auto &a : A) { // For every row in A
|
||||||
cur_sum += a.sum(); // Add sum of that row to current sum
|
cur_sum += a.sum(); // Add sum of that row to current sum
|
||||||
}
|
}
|
||||||
return cur_sum; // Return sum
|
return cur_sum; // Return sum
|
||||||
@ -242,10 +246,11 @@ T sum(const std::vector<std::valarray<T>> &A) {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
|
std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
|
||||||
const size_t sub_size = (*A.begin()).size();
|
const size_t sub_size = (*A.begin()).size();
|
||||||
for(const auto &a : A) {
|
for (const auto &a : A) {
|
||||||
// If supplied vector don't have same shape in all rows
|
// If supplied vector don't have same shape in all rows
|
||||||
if(a.size() != sub_size) {
|
if (a.size() != sub_size) {
|
||||||
std::cerr << "ERROR: (get_shape) Supplied vector is not 2D Matrix" << std::endl;
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Supplied vector is not 2D Matrix" << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -261,26 +266,32 @@ std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
|
|||||||
* @return new scaled 3D vector
|
* @return new scaled 3D vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector<std::vector<std::valarray<T>>>
|
std::vector<std::vector<std::valarray<T>>> minmax_scaler(
|
||||||
minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low, const T &high) {
|
const std::vector<std::vector<std::valarray<T>>> &A, const T &low,
|
||||||
std::vector<std::vector<std::valarray<T>>> B = A; // Copying into new vector B
|
const T &high) {
|
||||||
|
std::vector<std::vector<std::valarray<T>>> B =
|
||||||
|
A; // Copying into new vector B
|
||||||
const auto shape = get_shape(B[0]); // Storing shape of B's every element
|
const auto shape = get_shape(B[0]); // Storing shape of B's every element
|
||||||
// As this function is used for scaling training data vector should be of shape (1, X)
|
// As this function is used for scaling training data vector should be of
|
||||||
if(shape.first != 1) {
|
// shape (1, X)
|
||||||
std::cerr << "ERROR: (MinMax Scaling) Supplied vector is not supported for minmax scaling, shape: ";
|
if (shape.first != 1) {
|
||||||
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr
|
||||||
|
<< "Supplied vector is not supported for minmax scaling, shape: ";
|
||||||
std::cerr << shape << std::endl;
|
std::cerr << shape << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
for(size_t i = 0; i < shape.second; i++) {
|
for (size_t i = 0; i < shape.second; i++) {
|
||||||
T min = B[0][0][i], max = B[0][0][i];
|
T min = B[0][0][i], max = B[0][0][i];
|
||||||
for(size_t j = 0; j < B.size(); j++) {
|
for (size_t j = 0; j < B.size(); j++) {
|
||||||
// Updating minimum and maximum values
|
// Updating minimum and maximum values
|
||||||
min = std::min(min, B[j][0][i]);
|
min = std::min(min, B[j][0][i]);
|
||||||
max = std::max(max, B[j][0][i]);
|
max = std::max(max, B[j][0][i]);
|
||||||
}
|
}
|
||||||
for(size_t j = 0; j < B.size(); j++) {
|
for (size_t j = 0; j < B.size(); j++) {
|
||||||
// Applying min-max scaler formula
|
// Applying min-max scaler formula
|
||||||
B[j][0][i] = ((B[j][0][i] - min) / (max - min)) * (high - low) + low;
|
B[j][0][i] =
|
||||||
|
((B[j][0][i] - min) / (max - min)) * (high - low) + low;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return B; // Return new resultant 3D vector
|
return B; // Return new resultant 3D vector
|
||||||
@ -295,13 +306,16 @@ minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low,
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
size_t argmax(const std::vector<std::valarray<T>> &A) {
|
size_t argmax(const std::vector<std::valarray<T>> &A) {
|
||||||
const auto shape = get_shape(A);
|
const auto shape = get_shape(A);
|
||||||
// As this function is used on predicted (or target) vector, shape should be (1, X)
|
// As this function is used on predicted (or target) vector, shape should be
|
||||||
if(shape.first != 1) {
|
// (1, X)
|
||||||
std::cerr << "ERROR: (argmax) Supplied vector is ineligible for argmax" << std::endl;
|
if (shape.first != 1) {
|
||||||
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Supplied vector is ineligible for argmax" << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
// Return distance of max element from first element (i.e. index)
|
// Return distance of max element from first element (i.e. index)
|
||||||
return std::distance(std::begin(A[0]), std::max_element(std::begin(A[0]), std::end(A[0])));
|
return std::distance(std::begin(A[0]),
|
||||||
|
std::max_element(std::begin(A[0]), std::end(A[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -312,10 +326,11 @@ size_t argmax(const std::vector<std::valarray<T>> &A) {
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T>> apply_function(const std::vector <std::valarray <T>> &A,
|
std::vector<std::valarray<T>> apply_function(
|
||||||
T (*func) (const T &)) {
|
const std::vector<std::valarray<T>> &A, T (*func)(const T &)) {
|
||||||
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
|
std::vector<std::valarray<double>> B =
|
||||||
for(auto &b : B) { // For every row in vector
|
A; // New vector to store resultant vector
|
||||||
|
for (auto &b : B) { // For every row in vector
|
||||||
b = b.apply(func); // Apply function to that row
|
b = b.apply(func); // Apply function to that row
|
||||||
}
|
}
|
||||||
return B; // Return new resultant 2D vector
|
return B; // Return new resultant 2D vector
|
||||||
@ -329,9 +344,11 @@ std::vector <std::valarray <T>> apply_function(const std::vector <std::valarray
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T> > operator * (const std::vector<std::valarray<T>> &A, const T& val) {
|
std::vector<std::valarray<T>> operator*(const std::vector<std::valarray<T>> &A,
|
||||||
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
|
const T &val) {
|
||||||
for(auto &b : B) { // For every row in vector
|
std::vector<std::valarray<double>> B =
|
||||||
|
A; // New vector to store resultant vector
|
||||||
|
for (auto &b : B) { // For every row in vector
|
||||||
b = b * val; // Multiply row with scaler
|
b = b * val; // Multiply row with scaler
|
||||||
}
|
}
|
||||||
return B; // Return new resultant 2D vector
|
return B; // Return new resultant 2D vector
|
||||||
@ -345,9 +362,11 @@ std::vector <std::valarray <T> > operator * (const std::vector<std::valarray<T>>
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>> &A, const T& val) {
|
std::vector<std::valarray<T>> operator/(const std::vector<std::valarray<T>> &A,
|
||||||
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
|
const T &val) {
|
||||||
for(auto &b : B) { // For every row in vector
|
std::vector<std::valarray<double>> B =
|
||||||
|
A; // New vector to store resultant vector
|
||||||
|
for (auto &b : B) { // For every row in vector
|
||||||
b = b / val; // Divide row with scaler
|
b = b / val; // Divide row with scaler
|
||||||
}
|
}
|
||||||
return B; // Return new resultant 2D vector
|
return B; // Return new resultant 2D vector
|
||||||
@ -360,14 +379,15 @@ std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>>
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T>> transpose(const std::vector<std::valarray<T>> &A) {
|
std::vector<std::valarray<T>> transpose(
|
||||||
|
const std::vector<std::valarray<T>> &A) {
|
||||||
const auto shape = get_shape(A); // Current shape of vector
|
const auto shape = get_shape(A); // Current shape of vector
|
||||||
std::vector <std::valarray <T> > B; // New vector to store result
|
std::vector<std::valarray<T>> B; // New vector to store result
|
||||||
// Storing transpose values of A in B
|
// Storing transpose values of A in B
|
||||||
for(size_t j = 0; j < shape.second; j++) {
|
for (size_t j = 0; j < shape.second; j++) {
|
||||||
std::valarray <T> row;
|
std::valarray<T> row;
|
||||||
row.resize(shape.first);
|
row.resize(shape.first);
|
||||||
for(size_t i = 0; i < shape.first; i++) {
|
for (size_t i = 0; i < shape.first; i++) {
|
||||||
row[i] = A[i][j];
|
row[i] = A[i][j];
|
||||||
}
|
}
|
||||||
B.push_back(row);
|
B.push_back(row);
|
||||||
@ -383,17 +403,20 @@ std::vector <std::valarray <T>> transpose(const std::vector<std::valarray<T>> &A
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T> > operator + (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
|
std::vector<std::valarray<T>> operator+(
|
||||||
|
const std::vector<std::valarray<T>> &A,
|
||||||
|
const std::vector<std::valarray<T>> &B) {
|
||||||
const auto shape_a = get_shape(A);
|
const auto shape_a = get_shape(A);
|
||||||
const auto shape_b = get_shape(B);
|
const auto shape_b = get_shape(B);
|
||||||
// If vectors don't have equal shape
|
// If vectors don't have equal shape
|
||||||
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
||||||
std::cerr << "ERROR: (vector addition) Supplied vectors have different shapes ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Supplied vectors have different shapes ";
|
||||||
std::cerr << shape_a << " and " << shape_b << std::endl;
|
std::cerr << shape_a << " and " << shape_b << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
std::vector<std::valarray <T>> C;
|
std::vector<std::valarray<T>> C;
|
||||||
for(size_t i = 0; i < A.size(); i++) { // For every row
|
for (size_t i = 0; i < A.size(); i++) { // For every row
|
||||||
C.push_back(A[i] + B[i]); // Elementwise addition
|
C.push_back(A[i] + B[i]); // Elementwise addition
|
||||||
}
|
}
|
||||||
return C; // Return new resultant 2D vector
|
return C; // Return new resultant 2D vector
|
||||||
@ -407,17 +430,20 @@ std::vector <std::valarray <T> > operator + (const std::vector<std::valarray<T>>
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T>> operator - (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
|
std::vector<std::valarray<T>> operator-(
|
||||||
|
const std::vector<std::valarray<T>> &A,
|
||||||
|
const std::vector<std::valarray<T>> &B) {
|
||||||
const auto shape_a = get_shape(A);
|
const auto shape_a = get_shape(A);
|
||||||
const auto shape_b = get_shape(B);
|
const auto shape_b = get_shape(B);
|
||||||
// If vectors don't have equal shape
|
// If vectors don't have equal shape
|
||||||
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
||||||
std::cerr << "ERROR: (vector subtraction) Supplied vectors have different shapes ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Supplied vectors have different shapes ";
|
||||||
std::cerr << shape_a << " and " << shape_b << std::endl;
|
std::cerr << shape_a << " and " << shape_b << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
std::vector<std::valarray<T>> C; // Vector to store result
|
std::vector<std::valarray<T>> C; // Vector to store result
|
||||||
for(size_t i = 0; i < A.size(); i++) { // For every row
|
for (size_t i = 0; i < A.size(); i++) { // For every row
|
||||||
C.push_back(A[i] - B[i]); // Elementwise substraction
|
C.push_back(A[i] - B[i]); // Elementwise substraction
|
||||||
}
|
}
|
||||||
return C; // Return new resultant 2D vector
|
return C; // Return new resultant 2D vector
|
||||||
@ -431,12 +457,14 @@ std::vector <std::valarray <T>> operator - (const std::vector<std::valarray<T>>
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
|
std::vector<std::valarray<T>> multiply(const std::vector<std::valarray<T>> &A,
|
||||||
|
const std::vector<std::valarray<T>> &B) {
|
||||||
const auto shape_a = get_shape(A);
|
const auto shape_a = get_shape(A);
|
||||||
const auto shape_b = get_shape(B);
|
const auto shape_b = get_shape(B);
|
||||||
// If vectors are not eligible for multiplication
|
// If vectors are not eligible for multiplication
|
||||||
if(shape_a.second != shape_b.first ) {
|
if (shape_a.second != shape_b.first) {
|
||||||
std::cerr << "ERROR: (multiply) Supplied vectors are not eligible for multiplication ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Vectors are not eligible for multiplication ";
|
||||||
std::cerr << shape_a << " and " << shape_b << std::endl;
|
std::cerr << shape_a << " and " << shape_b << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
@ -445,8 +473,8 @@ std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A,
|
|||||||
for (size_t i = 0; i < shape_a.first; i++) {
|
for (size_t i = 0; i < shape_a.first; i++) {
|
||||||
std::valarray<T> row;
|
std::valarray<T> row;
|
||||||
row.resize(shape_b.second);
|
row.resize(shape_b.second);
|
||||||
for(size_t j = 0; j < shape_b.second; j++) {
|
for (size_t j = 0; j < shape_b.second; j++) {
|
||||||
for(size_t k = 0; k < shape_a.second; k++) {
|
for (size_t k = 0; k < shape_a.second; k++) {
|
||||||
row[j] += A[i][k] * B[k][j];
|
row[j] += A[i][k] * B[k][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -463,22 +491,24 @@ std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A,
|
|||||||
* @return new resultant vector
|
* @return new resultant vector
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::vector <std::valarray <T>> hadamard_product(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
|
std::vector<std::valarray<T>> hadamard_product(
|
||||||
|
const std::vector<std::valarray<T>> &A,
|
||||||
|
const std::vector<std::valarray<T>> &B) {
|
||||||
const auto shape_a = get_shape(A);
|
const auto shape_a = get_shape(A);
|
||||||
const auto shape_b = get_shape(B);
|
const auto shape_b = get_shape(B);
|
||||||
// If vectors are not eligible for hadamard product
|
// If vectors are not eligible for hadamard product
|
||||||
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
|
||||||
std::cerr << "ERROR: (hadamard_product) Supplied vectors have different shapes ";
|
std::cerr << "ERROR (" << __func__ << ") : ";
|
||||||
|
std::cerr << "Vectors have different shapes ";
|
||||||
std::cerr << shape_a << " and " << shape_b << std::endl;
|
std::cerr << shape_a << " and " << shape_b << std::endl;
|
||||||
std::exit(EXIT_FAILURE);
|
std::exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
std::vector<std::valarray<T>> C; // Vector to store result
|
std::vector<std::valarray<T>> C; // Vector to store result
|
||||||
for(size_t i = 0; i < A.size(); i++) {
|
for (size_t i = 0; i < A.size(); i++) {
|
||||||
C.push_back(A[i] * B[i]); // Elementwise multiplication
|
C.push_back(A[i] * B[i]); // Elementwise multiplication
|
||||||
}
|
}
|
||||||
return C; // Return new resultant 2D vector
|
return C; // Return new resultant 2D vector
|
||||||
}
|
}
|
||||||
} // namespace machine_learning
|
} // namespace machine_learning
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
Reference in New Issue
Block a user