feat: Add Neural Network (Multilayer Perceptron) (#1025)

* Completed NN

* Made changes

* Added return in identity function

* Added <random> and fixed namespace naming

* clang-tidy changes

* Update machine_learning/neural_network.cpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/neural_network.cpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/neural_network.cpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/vector_ops.hpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/vector_ops.hpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/neural_network.cpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* Update machine_learning/neural_network.cpp

Co-authored-by: David Leal <halfpacho@gmail.com>

* added std::cerr and changed argmax's namespace

* Done suggested changes

* Fixed a comment

* clang-tidy fixes

Co-authored-by: David Leal <halfpacho@gmail.com>
This commit is contained in:
Deep Raval 2020-08-20 00:55:32 +05:30 committed by GitHub
parent dfe5bd7638
commit 4a34bec125
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 1426 additions and 0 deletions

152
machine_learning/iris.csv Normal file
View File

@ -0,0 +1,152 @@
https://archive.ics.uci.edu/ml/datasets/iris
sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
5.1,3.5,1.4,.2,0
4.9,3,1.4,.2,0
4.7,3.2,1.3,.2,0
4.6,3.1,1.5,.2,0
5,3.6,1.4,.2,0
5.4,3.9,1.7,.4,0
4.6,3.4,1.4,.3,0
5,3.4,1.5,.2,0
4.4,2.9,1.4,.2,0
4.9,3.1,1.5,.1,0
5.4,3.7,1.5,.2,0
4.8,3.4,1.6,.2,0
4.8,3,1.4,.1,0
4.3,3,1.1,.1,0
5.8,4,1.2,.2,0
5.7,4.4,1.5,.4,0
5.4,3.9,1.3,.4,0
5.1,3.5,1.4,.3,0
5.7,3.8,1.7,.3,0
5.1,3.8,1.5,.3,0
5.4,3.4,1.7,.2,0
5.1,3.7,1.5,.4,0
4.6,3.6,1,.2,0
5.1,3.3,1.7,.5,0
4.8,3.4,1.9,.2,0
5,3,1.6,.2,0
5,3.4,1.6,.4,0
5.2,3.5,1.5,.2,0
5.2,3.4,1.4,.2,0
4.7,3.2,1.6,.2,0
4.8,3.1,1.6,.2,0
5.4,3.4,1.5,.4,0
5.2,4.1,1.5,.1,0
5.5,4.2,1.4,.2,0
4.9,3.1,1.5,.2,0
5,3.2,1.2,.2,0
5.5,3.5,1.3,.2,0
4.9,3.6,1.4,.1,0
4.4,3,1.3,.2,0
5.1,3.4,1.5,.2,0
5,3.5,1.3,.3,0
4.5,2.3,1.3,.3,0
4.4,3.2,1.3,.2,0
5,3.5,1.6,.6,0
5.1,3.8,1.9,.4,0
4.8,3,1.4,.3,0
5.1,3.8,1.6,.2,0
4.6,3.2,1.4,.2,0
5.3,3.7,1.5,.2,0
5,3.3,1.4,.2,0
7,3.2,4.7,1.4,1
6.4,3.2,4.5,1.5,1
6.9,3.1,4.9,1.5,1
5.5,2.3,4,1.3,1
6.5,2.8,4.6,1.5,1
5.7,2.8,4.5,1.3,1
6.3,3.3,4.7,1.6,1
4.9,2.4,3.3,1,1
6.6,2.9,4.6,1.3,1
5.2,2.7,3.9,1.4,1
5,2,3.5,1,1
5.9,3,4.2,1.5,1
6,2.2,4,1,1
6.1,2.9,4.7,1.4,1
5.6,2.9,3.6,1.3,1
6.7,3.1,4.4,1.4,1
5.6,3,4.5,1.5,1
5.8,2.7,4.1,1,1
6.2,2.2,4.5,1.5,1
5.6,2.5,3.9,1.1,1
5.9,3.2,4.8,1.8,1
6.1,2.8,4,1.3,1
6.3,2.5,4.9,1.5,1
6.1,2.8,4.7,1.2,1
6.4,2.9,4.3,1.3,1
6.6,3,4.4,1.4,1
6.8,2.8,4.8,1.4,1
6.7,3,5,1.7,1
6,2.9,4.5,1.5,1
5.7,2.6,3.5,1,1
5.5,2.4,3.8,1.1,1
5.5,2.4,3.7,1,1
5.8,2.7,3.9,1.2,1
6,2.7,5.1,1.6,1
5.4,3,4.5,1.5,1
6,3.4,4.5,1.6,1
6.7,3.1,4.7,1.5,1
6.3,2.3,4.4,1.3,1
5.6,3,4.1,1.3,1
5.5,2.5,4,1.3,1
5.5,2.6,4.4,1.2,1
6.1,3,4.6,1.4,1
5.8,2.6,4,1.2,1
5,2.3,3.3,1,1
5.6,2.7,4.2,1.3,1
5.7,3,4.2,1.2,1
5.7,2.9,4.2,1.3,1
6.2,2.9,4.3,1.3,1
5.1,2.5,3,1.1,1
5.7,2.8,4.1,1.3,1
6.3,3.3,6,2.5,2
5.8,2.7,5.1,1.9,2
7.1,3,5.9,2.1,2
6.3,2.9,5.6,1.8,2
6.5,3,5.8,2.2,2
7.6,3,6.6,2.1,2
4.9,2.5,4.5,1.7,2
7.3,2.9,6.3,1.8,2
6.7,2.5,5.8,1.8,2
7.2,3.6,6.1,2.5,2
6.5,3.2,5.1,2,2
6.4,2.7,5.3,1.9,2
6.8,3,5.5,2.1,2
5.7,2.5,5,2,2
5.8,2.8,5.1,2.4,2
6.4,3.2,5.3,2.3,2
6.5,3,5.5,1.8,2
7.7,3.8,6.7,2.2,2
7.7,2.6,6.9,2.3,2
6,2.2,5,1.5,2
6.9,3.2,5.7,2.3,2
5.6,2.8,4.9,2,2
7.7,2.8,6.7,2,2
6.3,2.7,4.9,1.8,2
6.7,3.3,5.7,2.1,2
7.2,3.2,6,1.8,2
6.2,2.8,4.8,1.8,2
6.1,3,4.9,1.8,2
6.4,2.8,5.6,2.1,2
7.2,3,5.8,1.6,2
7.4,2.8,6.1,1.9,2
7.9,3.8,6.4,2,2
6.4,2.8,5.6,2.2,2
6.3,2.8,5.1,1.5,2
6.1,2.6,5.6,1.4,2
7.7,3,6.1,2.3,2
6.3,3.4,5.6,2.4,2
6.4,3.1,5.5,1.8,2
6,3,4.8,1.8,2
6.9,3.1,5.4,2.1,2
6.7,3.1,5.6,2.4,2
6.9,3.1,5.1,2.3,2
5.8,2.7,5.1,1.9,2
6.8,3.2,5.9,2.3,2
6.7,3.3,5.7,2.5,2
6.7,3,5.2,2.3,2
6.3,2.5,5,1.9,2
6.5,3,5.2,2,2
6.2,3.4,5.4,2.3,2
5.9,3,5.1,1.8,2
1 https://archive.ics.uci.edu/ml/datasets/iris
2 sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
3 5.1,3.5,1.4,.2,0
4 4.9,3,1.4,.2,0
5 4.7,3.2,1.3,.2,0
6 4.6,3.1,1.5,.2,0
7 5,3.6,1.4,.2,0
8 5.4,3.9,1.7,.4,0
9 4.6,3.4,1.4,.3,0
10 5,3.4,1.5,.2,0
11 4.4,2.9,1.4,.2,0
12 4.9,3.1,1.5,.1,0
13 5.4,3.7,1.5,.2,0
14 4.8,3.4,1.6,.2,0
15 4.8,3,1.4,.1,0
16 4.3,3,1.1,.1,0
17 5.8,4,1.2,.2,0
18 5.7,4.4,1.5,.4,0
19 5.4,3.9,1.3,.4,0
20 5.1,3.5,1.4,.3,0
21 5.7,3.8,1.7,.3,0
22 5.1,3.8,1.5,.3,0
23 5.4,3.4,1.7,.2,0
24 5.1,3.7,1.5,.4,0
25 4.6,3.6,1,.2,0
26 5.1,3.3,1.7,.5,0
27 4.8,3.4,1.9,.2,0
28 5,3,1.6,.2,0
29 5,3.4,1.6,.4,0
30 5.2,3.5,1.5,.2,0
31 5.2,3.4,1.4,.2,0
32 4.7,3.2,1.6,.2,0
33 4.8,3.1,1.6,.2,0
34 5.4,3.4,1.5,.4,0
35 5.2,4.1,1.5,.1,0
36 5.5,4.2,1.4,.2,0
37 4.9,3.1,1.5,.2,0
38 5,3.2,1.2,.2,0
39 5.5,3.5,1.3,.2,0
40 4.9,3.6,1.4,.1,0
41 4.4,3,1.3,.2,0
42 5.1,3.4,1.5,.2,0
43 5,3.5,1.3,.3,0
44 4.5,2.3,1.3,.3,0
45 4.4,3.2,1.3,.2,0
46 5,3.5,1.6,.6,0
47 5.1,3.8,1.9,.4,0
48 4.8,3,1.4,.3,0
49 5.1,3.8,1.6,.2,0
50 4.6,3.2,1.4,.2,0
51 5.3,3.7,1.5,.2,0
52 5,3.3,1.4,.2,0
53 7,3.2,4.7,1.4,1
54 6.4,3.2,4.5,1.5,1
55 6.9,3.1,4.9,1.5,1
56 5.5,2.3,4,1.3,1
57 6.5,2.8,4.6,1.5,1
58 5.7,2.8,4.5,1.3,1
59 6.3,3.3,4.7,1.6,1
60 4.9,2.4,3.3,1,1
61 6.6,2.9,4.6,1.3,1
62 5.2,2.7,3.9,1.4,1
63 5,2,3.5,1,1
64 5.9,3,4.2,1.5,1
65 6,2.2,4,1,1
66 6.1,2.9,4.7,1.4,1
67 5.6,2.9,3.6,1.3,1
68 6.7,3.1,4.4,1.4,1
69 5.6,3,4.5,1.5,1
70 5.8,2.7,4.1,1,1
71 6.2,2.2,4.5,1.5,1
72 5.6,2.5,3.9,1.1,1
73 5.9,3.2,4.8,1.8,1
74 6.1,2.8,4,1.3,1
75 6.3,2.5,4.9,1.5,1
76 6.1,2.8,4.7,1.2,1
77 6.4,2.9,4.3,1.3,1
78 6.6,3,4.4,1.4,1
79 6.8,2.8,4.8,1.4,1
80 6.7,3,5,1.7,1
81 6,2.9,4.5,1.5,1
82 5.7,2.6,3.5,1,1
83 5.5,2.4,3.8,1.1,1
84 5.5,2.4,3.7,1,1
85 5.8,2.7,3.9,1.2,1
86 6,2.7,5.1,1.6,1
87 5.4,3,4.5,1.5,1
88 6,3.4,4.5,1.6,1
89 6.7,3.1,4.7,1.5,1
90 6.3,2.3,4.4,1.3,1
91 5.6,3,4.1,1.3,1
92 5.5,2.5,4,1.3,1
93 5.5,2.6,4.4,1.2,1
94 6.1,3,4.6,1.4,1
95 5.8,2.6,4,1.2,1
96 5,2.3,3.3,1,1
97 5.6,2.7,4.2,1.3,1
98 5.7,3,4.2,1.2,1
99 5.7,2.9,4.2,1.3,1
100 6.2,2.9,4.3,1.3,1
101 5.1,2.5,3,1.1,1
102 5.7,2.8,4.1,1.3,1
103 6.3,3.3,6,2.5,2
104 5.8,2.7,5.1,1.9,2
105 7.1,3,5.9,2.1,2
106 6.3,2.9,5.6,1.8,2
107 6.5,3,5.8,2.2,2
108 7.6,3,6.6,2.1,2
109 4.9,2.5,4.5,1.7,2
110 7.3,2.9,6.3,1.8,2
111 6.7,2.5,5.8,1.8,2
112 7.2,3.6,6.1,2.5,2
113 6.5,3.2,5.1,2,2
114 6.4,2.7,5.3,1.9,2
115 6.8,3,5.5,2.1,2
116 5.7,2.5,5,2,2
117 5.8,2.8,5.1,2.4,2
118 6.4,3.2,5.3,2.3,2
119 6.5,3,5.5,1.8,2
120 7.7,3.8,6.7,2.2,2
121 7.7,2.6,6.9,2.3,2
122 6,2.2,5,1.5,2
123 6.9,3.2,5.7,2.3,2
124 5.6,2.8,4.9,2,2
125 7.7,2.8,6.7,2,2
126 6.3,2.7,4.9,1.8,2
127 6.7,3.3,5.7,2.1,2
128 7.2,3.2,6,1.8,2
129 6.2,2.8,4.8,1.8,2
130 6.1,3,4.9,1.8,2
131 6.4,2.8,5.6,2.1,2
132 7.2,3,5.8,1.6,2
133 7.4,2.8,6.1,1.9,2
134 7.9,3.8,6.4,2,2
135 6.4,2.8,5.6,2.2,2
136 6.3,2.8,5.1,1.5,2
137 6.1,2.6,5.6,1.4,2
138 7.7,3,6.1,2.3,2
139 6.3,3.4,5.6,2.4,2
140 6.4,3.1,5.5,1.8,2
141 6,3,4.8,1.8,2
142 6.9,3.1,5.4,2.1,2
143 6.7,3.1,5.6,2.4,2
144 6.9,3.1,5.1,2.3,2
145 5.8,2.7,5.1,1.9,2
146 6.8,3.2,5.9,2.3,2
147 6.7,3.3,5.7,2.5,2
148 6.7,3,5.2,2.3,2
149 6.3,2.5,5,1.9,2
150 6.5,3,5.2,2,2
151 6.2,3.4,5.4,2.3,2
152 5.9,3,5.1,1.8,2

View File

@ -0,0 +1,790 @@
/**
* @file
* @author [Deep Raval](https://github.com/imdeep2905)
*
* @brief Implementation of [Multilayer Perceptron] (https://en.wikipedia.org/wiki/Multilayer_perceptron).
*
* @details
* A multilayer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The term MLP is used ambiguously,
* sometimes loosely to any feedforward ANN, sometimes strictly to refer to networks composed of multiple layers of perceptrons
* (with threshold activation). Multilayer perceptrons are sometimes colloquially referred to as "vanilla" neural networks,
* especially when they have a single hidden layer.
*
* An MLP consists of at least three layers of nodes: an input layer, a hidden layer and an output layer. Except for the
* input nodes, each node is a neuron that uses a nonlinear activation function. MLP utilizes a supervised learning technique
* called backpropagation for training. Its multiple layers and non-linear activation distinguish MLP from a linear
* perceptron. It can distinguish data that is not linearly separable.
*
* See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training algorithm.
*
* \note This implementation uses mini-batch gradient descent as optimizer and MSE as loss function. Bias is also not included.
*/
#include "vector_ops.hpp" // Custom header file for vector operations
#include <iostream>
#include <valarray>
#include <vector>
#include <cmath>
#include <algorithm>
#include <chrono>
#include <string>
#include <fstream>
#include <sstream>
#include <cassert>
/** \namespace machine_learning
* \brief Machine learning algorithms
*/
namespace machine_learning {
/** \namespace neural_network
* \brief Neural Network or Multilayer Perceptron
*/
namespace neural_network {
/** \namespace activations
* \brief Various activation functions used in Neural network
*/
namespace activations {
/**
* Sigmoid function
* @param X Value
* @return Returns sigmoid(x)
*/
double sigmoid (const double &x) {
return 1.0 / (1.0 + std::exp(-x));
}
/**
* Derivative of sigmoid function
* @param X Value
* @return Returns derivative of sigmoid(x)
*/
double dsigmoid (const double &x) {
return x * (1 - x);
}
/**
* Relu function
* @param X Value
* @returns relu(x)
*/
double relu (const double &x) {
return std::max(0.0, x);
}
/**
* Derivative of relu function
* @param X Value
* @returns derivative of relu(x)
*/
double drelu (const double &x) {
return x >= 0.0 ? 1.0 : 0.0;
}
/**
* Tanh function
* @param X Value
* @return Returns tanh(x)
*/
double tanh (const double &x) {
return 2 / (1 + std::exp(-2 * x)) - 1;
}
/**
* Derivative of Sigmoid function
* @param X Value
* @return Returns derivative of tanh(x)
*/
double dtanh (const double &x) {
return 1 - x * x;
}
} // namespace activations
/** \namespace util_functions
* \brief Various utility functions used in Neural network
*/
namespace util_functions {
/**
* Square function
* @param X Value
* @return Returns x * x
*/
double square(const double &x) {
return x * x;
}
/**
* Identity function
* @param X Value
* @return Returns x
*/
double identity_function(const double &x) {
return x;
}
} // namespace util_functions
/** \namespace layers
* \brief This namespace contains layers used
* in MLP.
*/
namespace layers {
/**
* neural_network::layers::DenseLayer class is used to store all necessary information about
* the layers (i.e. neurons, activation and kernal). This class
* is used by NeuralNetwork class to store layers.
*
*/
class DenseLayer {
public:
// To store activation function and it's derivative
double (*activation_function)(const double &);
double (*dactivation_function)(const double &);
int neurons; // To store number of neurons (used in summary)
std::string activation; // To store activation name (used in summary)
std::vector <std::valarray <double>> kernal; // To store kernal (aka weights)
/**
* Constructor for neural_network::layers::DenseLayer class
* @param neurons number of neurons
* @param activation activation function for layer
* @param kernal_shape shape of kernal
* @param random_kernal flag for whether to intialize kernal randomly
*/
DenseLayer(const int &neurons,
const std::string &activation,
const std::pair<size_t, size_t> &kernal_shape,
const bool &random_kernal) {
// Choosing activation (and it's derivative)
if (activation == "sigmoid") {
activation_function = neural_network::activations::sigmoid;
dactivation_function = neural_network::activations::sigmoid;
}
else if (activation == "relu") {
activation_function = neural_network::activations::relu;
dactivation_function = neural_network::activations::drelu;
}
else if (activation == "tanh") {
activation_function = neural_network::activations::tanh;
dactivation_function = neural_network::activations::dtanh;
}
else if (activation == "none") {
// Set identity function in casse of none is supplied
activation_function = neural_network::util_functions::identity_function;
dactivation_function = neural_network::util_functions::identity_function;
}
else {
// If supplied activation is invalid
std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
std::cerr << activation << std::endl;
std::exit(EXIT_FAILURE);
}
this -> activation = activation; // Setting activation name
this -> neurons = neurons; // Setting number of neurons
// Initialize kernal according to flag
if(random_kernal) {
uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
}
else {
unit_matrix_initialization(kernal, kernal_shape);
}
}
/**
* Constructor for neural_network::layers::DenseLayer class
* @param neurons number of neurons
* @param activation activation function for layer
* @param kernal values of kernal (useful in loading model)
*/
DenseLayer (const int &neurons,
const std::string &activation,
const std::vector <std::valarray<double>> &kernal) {
// Choosing activation (and it's derivative)
if (activation == "sigmoid") {
activation_function = neural_network::activations::sigmoid;
dactivation_function = neural_network::activations::sigmoid;
}
else if (activation == "relu") {
activation_function = neural_network::activations::relu;
dactivation_function = neural_network::activations::drelu;
}
else if (activation == "tanh") {
activation_function = neural_network::activations::tanh;
dactivation_function = neural_network::activations::dtanh;
}
else if (activation == "none") {
// Set identity function in casse of none is supplied
activation_function = neural_network::util_functions::identity_function;
dactivation_function = neural_network::util_functions::identity_function;
}
else {
// If supplied activation is invalid
std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
std::cerr << activation << std::endl;
std::exit(EXIT_FAILURE);
}
this -> activation = activation; // Setting activation name
this -> neurons = neurons; // Setting number of neurons
this -> kernal = kernal; // Setting supplied kernal values
}
/**
* Copy Constructor for class DenseLayer.
*
* @param model instance of class to be copied.
*/
DenseLayer(const DenseLayer &layer) = default;
/**
* Destructor for class DenseLayer.
*/
~DenseLayer() = default;
/**
* Copy assignment operator for class DenseLayer
*/
DenseLayer& operator = (const DenseLayer &layer) = default;
/**
* Move constructor for class DenseLayer
*/
DenseLayer(DenseLayer &&) = default;
/**
* Move assignment operator for class DenseLayer
*/
DenseLayer& operator = (DenseLayer &&) = default;
};
} // namespace layers
/**
* NeuralNetwork class is implements MLP. This class is
* used by actual user to create and train networks.
*
*/
class NeuralNetwork {
private:
std::vector <neural_network::layers::DenseLayer> layers; // To store layers
/**
* Private Constructor for class NeuralNetwork. This constructor
* is used internally to load model.
* @param config vector containing pair (neurons, activation)
* @param kernals vector containing all pretrained kernals
*/
NeuralNetwork(const std::vector <std::pair<int, std::string>> &config,
const std::vector <std::vector<std::valarray<double>>> &kernals) {
// First layer should not have activation
if(config.begin() -> second != "none") {
std::cerr << "ERROR: First layer can't have activation other than none";
std::cerr << std::endl;
std::exit(EXIT_FAILURE);
}
// Network should have atleast two layers
if(config.size() <= 1) {
std::cerr << "ERROR: Invalid size of network, ";
std::cerr << "Atleast two layers are required";
std::exit(EXIT_FAILURE);
}
// Reconstructing all pretrained layers
for(size_t i = 0; i < config.size(); i++) {
layers.emplace_back(neural_network::layers::DenseLayer(config[i].first,
config[i].second,
kernals[i]));
}
std::cout << "INFO: Network constructed successfully" << std::endl;
}
/**
* Private function to get detailed predictions (i.e.
* activated neuron values). This function is used in
* backpropagation, single predict and batch predict.
* @param X input vector
*/
std::vector<std::vector<std::valarray <double>>>
__detailed_single_prediction (const std::vector<std::valarray <double>> &X) {
std::vector<std::vector < std::valarray <double> >> details;
std::vector < std::valarray <double> > current_pass = X;
details.emplace_back(X);
for(const auto &l : layers) {
current_pass = multiply(current_pass, l.kernal);
current_pass = apply_function(current_pass, l.activation_function);
details.emplace_back(current_pass);
}
return details;
}
public:
/**
* Default Constructor for class NeuralNetwork. This constructor
* is used to create empty variable of type NeuralNetwork class.
*/
NeuralNetwork() = default;
/**
* Constructor for class NeuralNetwork. This constructor
* is used by user.
* @param config vector containing pair (neurons, activation)
*/
explicit NeuralNetwork(const std::vector <std::pair<int, std::string>> &config) {
// First layer should not have activation
if(config.begin() -> second != "none") {
std::cerr << "ERROR: First layer can't have activation other than none";
std::cerr << std::endl;
std::exit(EXIT_FAILURE);
}
// Network should have atleast two layers
if(config.size() <= 1) {
std::cerr << "ERROR: Invalid size of network, ";
std::cerr << "Atleast two layers are required";
std::exit(EXIT_FAILURE);
}
// Separately creating first layer so it can have unit matrix
// as kernal.
layers.push_back(neural_network::layers::DenseLayer(config[0].first,
config[0].second,
{config[0].first, config[0].first},
false));
// Creating remaining layers
for(size_t i = 1; i < config.size(); i++) {
layers.push_back(neural_network::layers::DenseLayer(config[i].first,
config[i].second,
{config[i - 1].first, config[i].first},
true));
}
std::cout << "INFO: Network constructed successfully" << std::endl;
}
/**
* Copy Constructor for class NeuralNetwork.
*
* @param model instance of class to be copied.
*/
NeuralNetwork(const NeuralNetwork &model) = default;
/**
* Destructor for class NeuralNetwork.
*/
~NeuralNetwork() = default;
/**
* Copy assignment operator for class NeuralNetwork
*/
NeuralNetwork& operator = (const NeuralNetwork &model) = default;
/**
* Move constructor for class NeuralNetwork
*/
NeuralNetwork(NeuralNetwork &&) = default;
/**
* Move assignment operator for class NeuralNetwork
*/
NeuralNetwork& operator = (NeuralNetwork &&) = default;
/**
* Function to get X and Y from csv file (where X = data, Y = label)
* @param file_name csv file name
* @param last_label flag for whether label is in first or last column
* @param normalize flag for whether to normalize data
* @param slip_lines number of lines to skip
* @return returns pair of X and Y
*/
std::pair<std::vector<std::vector<std::valarray<double>>>, std::vector<std::vector<std::valarray<double>>>>
get_XY_from_csv(const std::string &file_name,
const bool &last_label,
const bool &normalize,
const int &slip_lines = 1) {
std::ifstream in_file; // Ifstream to read file
in_file.open(file_name.c_str(), std::ios::in); // Open file
std::vector <std::vector<std::valarray<double>>> X, Y; // To store X and Y
std::string line; // To store each line
// Skip lines
for(int i = 0; i < slip_lines; i ++) {
std::getline(in_file, line, '\n'); // Ignore line
}
// While file has information
while(!in_file.eof() && std::getline(in_file, line, '\n'))
{
std::valarray <double> x_data, y_data; // To store single sample and label
std::stringstream ss(line); // Constructing stringstream from line
std::string token; // To store each token in line (seprated by ',')
while(std::getline(ss, token, ',')) { // For each token
// Insert numerical value of token in x_data
x_data = insert_element(x_data, std::stod(token));
}
// If label is in last column
if(last_label) {
y_data.resize(this -> layers.back().neurons);
// If task is classification
if(y_data.size() > 1) {
y_data[x_data[x_data.size() - 1]] = 1;
}
// If task is regrssion (of single value)
else {
y_data[0] = x_data[x_data.size() - 1];
}
x_data = pop_back(x_data); // Remove label from x_data
}
else {
y_data.resize(this -> layers.back().neurons);
// If task is classification
if(y_data.size() > 1) {
y_data[x_data[x_data.size() - 1]] = 1;
}
// If task is regrssion (of single value)
else {
y_data[0] = x_data[x_data.size() - 1];
}
x_data = pop_front(x_data); // Remove label from x_data
}
// Push collected X_data and y_data in X and Y
X.push_back({x_data});
Y.push_back({y_data});
}
in_file.close();
// Normalize training data if flag is set
if(normalize) {
// Scale data between 0 and 1 using min-max scaler
X = minmax_scaler(X, 0.01, 1.0);
}
return make_pair(X, Y); // Return pair of X and Y
}
/**
* Function to get prediction of model on single sample.
* @param X array of feature vectors
* @return returns predictions as vector
*/
std::vector<std::valarray <double>>
single_predict (const std::vector<std::valarray <double>> &X) {
// Get activations of all layers
auto activations = this -> __detailed_single_prediction(X);
// Return activations of last layer (actual predicted values)
return activations.back();
}
/**
* Function to get prediction of model on batch
* @param X array of feature vectors
* @return returns predicted values as vector
*/
std::vector < std::vector <std::valarray<double>>>
batch_predict (const std::vector <std::vector <std::valarray <double>>> &X) {
// Store predicted values
std::vector < std::vector <std::valarray<double>>> predicted_batch(X.size());
for(size_t i = 0; i < X.size(); i++) { // For every sample
// Push predicted values
predicted_batch[i] = this -> single_predict(X[i]);
}
return predicted_batch; // Return predicted values
}
/**
* Function to fit model on supplied data
* @param X array of feature vectors
* @param Y array of target values
* @param epochs number of epochs (default = 100)
* @param learning_rate learning rate (default = 0.01)
* @param batch_size batch size for gradient descent (default = 32)
* @param shuffle flag for whether to shuffle data (default = true)
*/
void fit(const std::vector < std::vector <std::valarray<double>>> &X_,
const std::vector < std::vector <std::valarray<double>>> &Y_,
const int &epochs = 100,
const double &learning_rate = 0.01,
const size_t &batch_size = 32,
const bool &shuffle = true) {
std::vector < std::vector <std::valarray<double>>> X = X_, Y = Y_;
// Both label and input data should have same size
if (X.size() != Y.size()) {
std::cerr << "ERROR : X and Y in fit have different sizes" << std::endl;
std::exit(EXIT_FAILURE);
}
std::cout << "INFO: Training Started" << std::endl;
for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch
// Shuffle X and Y if flag is set
if(shuffle) {
equal_shuffle(X, Y);
}
auto start = std::chrono::high_resolution_clock::now(); // Start clock
double loss = 0, acc = 0; // Intialize performance metrics with zero
// For each starting index of batch
for(size_t batch_start = 0; batch_start < X.size(); batch_start += batch_size) {
for(size_t i = batch_start; i < std::min(X.size(), batch_start + batch_size); i++) {
std::vector <std::valarray<double>> grad, cur_error, predicted;
auto activations = this -> __detailed_single_prediction(X[i]);
// Gradients vector to store gradients for all layers
// They will be averaged and applied to kernal
std::vector<std::vector<std::valarray<double>>> gradients;
gradients.resize(this -> layers.size());
// First intialize gradients to zero
for(size_t i = 0; i < gradients.size(); i++) {
zeroes_initialization(gradients[i], get_shape(this -> layers[i].kernal));
}
predicted = activations.back(); // Predicted vector
cur_error = predicted - Y[i]; // Absoulute error
// Calculating loss with MSE
loss += sum(apply_function(cur_error, neural_network::util_functions::square));
// If prediction is correct
if(argmax(predicted) == argmax(Y[i])) {
acc += 1;
}
// For every layer (except first) starting from last one
for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
// Backpropogating errors
cur_error = hadamard_product(cur_error,
apply_function(activations[j + 1],
this -> layers[j].dactivation_function));
// Calculating gradient for current layer
grad = multiply(transpose(activations[j]), cur_error);
// Change error according to current kernal values
cur_error = multiply(cur_error, transpose(this -> layers[j].kernal));
// Adding gradient values to collection of gradients
gradients[j] = gradients[j] + grad / double(batch_size);
}
// Applying gradients
for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
// Updating kernal (aka weights)
this -> layers[j].kernal = this -> layers[j].kernal -
gradients[j] * learning_rate;
}
}
}
auto stop = std::chrono::high_resolution_clock::now(); // Stoping the clock
// Calculate time taken by epoch
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
loss /= X.size(); // Averaging loss
acc /= X.size(); // Averaging accuracy
std::cout.precision(4); // set output precision to 4
// Printing training stats
std::cout << "Training: Epoch " << epoch << '/' << epochs;
std::cout << ", Loss: " << loss;
std::cout << ", Accuracy: " << acc;
std::cout << ", Taken time: " << duration.count() / 1e6 << " seconds";
std::cout << std::endl;
}
return;
}
/**
* Function to fit model on data stored in csv file
* @param file_name csv file name
* @param last_label flag for whether label is in first or last column
* @param epochs number of epochs
* @param learning_rate learning rate
* @param normalize flag for whether to normalize data
* @param slip_lines number of lines to skip
* @param batch_size batch size for gradient descent (default = 32)
* @param shuffle flag for whether to shuffle data (default = true)
*/
void fit_from_csv (const std::string &file_name,
const bool &last_label,
const int &epochs,
const double &learning_rate,
const bool &normalize,
const int &slip_lines = 1,
const size_t &batch_size = 32,
const bool &shuffle = true) {
// Getting training data from csv file
auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
// Fit the model on training data
this -> fit(data.first, data.second, epochs, learning_rate, batch_size, shuffle);
return;
}
/**
* Function to evaluate model on supplied data
* @param X array of feature vectors (input data)
* @param Y array of target values (label)
*/
void evaluate(const std::vector< std::vector <std::valarray <double>>> &X,
const std::vector< std::vector <std::valarray <double>>> &Y) {
std::cout << "INFO: Evaluation Started" << std::endl;
double acc = 0, loss = 0; // intialize performance metrics with zero
for(size_t i = 0; i < X.size(); i++) { // For every sample in input
// Get predictions
std::vector<std::valarray<double>> pred = this -> single_predict(X[i]);
// If predicted class is correct
if(argmax(pred) == argmax(Y[i])) {
acc += 1; // Increment accuracy
}
// Calculating loss - Mean Squared Error
loss += sum(apply_function((Y[i] - pred),
neural_network::util_functions::square) * 0.5);
}
acc /= X.size(); // Averaging accuracy
loss /= X.size(); // Averaging loss
// Prinitng performance of the model
std::cout << "Evaluation: Loss: " << loss;
std::cout << ", Accuracy: " << acc << std::endl;
return;
}
/**
* Function to evaluate model on data stored in csv file
* @param file_name csv file name
* @param last_label flag for whether label is in first or last column
* @param normalize flag for whether to normalize data
* @param slip_lines number of lines to skip
*/
void evaluate_from_csv (const std::string &file_name,
const bool &last_label,
const bool &normalize,
const int &slip_lines = 1) {
// Getting training data from csv file
auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
// Evaluating model
this -> evaluate(data.first, data.second);
return;
}
/**
* Function to save current model.
* @param file_name file name to save model (*.model)
*/
void save_model (const std::string &_file_name) {
std::string file_name = _file_name;
// Adding ".model" extension if it is not already there in name
if(file_name.find(".model") == file_name.npos) {
file_name += ".model";
}
std::ofstream out_file; // Ofstream to write in file
// Open file in out|trunc mode
out_file.open(file_name.c_str(), std::ofstream::out | std::ofstream::trunc);
/**
Format in which model is saved:
total_layers
neurons(1st neural_network::layers::DenseLayer) activation_name(1st neural_network::layers::DenseLayer)
kernal_shape(1st neural_network::layers::DenseLayer)
kernal_values
.
.
.
neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth neural_network::layers::DenseLayer)
kernal_shape(Nth neural_network::layers::DenseLayer)
kernal_value
For Example, pretrained model with 3 layers:
<pre>
3
4 none
4 4
1 0 0 0
0 1 0 0
0 0 1 0
0 0 0 1
6 relu
4 6
-1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653
-0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202
-2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049
-0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741
3 sigmoid
6 3
0.390267 -0.391703 -0.0989607
0.499234 -0.564539 -0.28097
0.553386 -0.153974 -1.92493
-2.01336 -0.0219682 1.44145
1.72853 -0.465264 -0.705373
-0.908409 -0.740547 0.376416
</pre>
*/
// Saving model in the same format
out_file << layers.size();
out_file << std::endl;
for(const auto &layer : this -> layers) {
out_file << layer.neurons << ' ' << layer.activation << std::endl;
const auto shape = get_shape(layer.kernal);
out_file << shape.first << ' ' << shape.second << std::endl;
for(const auto &row : layer.kernal) {
for(const auto &val : row) {
out_file << val << ' ';
}
out_file << std::endl;
}
}
std::cout << "INFO: Model saved successfully with name : ";
std::cout << file_name << std::endl;
return;
}
/**
* Function to load earlier saved model.
* @param file_name file from which model will be loaded (*.model)
* @return instance of NeuralNetwork class with pretrained weights
*/
NeuralNetwork load_model (const std::string &file_name) {
std::ifstream in_file; // Ifstream to read file
in_file.open(file_name.c_str()); // Openinig file
std::vector <std::pair<int, std::string>> config; // To store config
std::vector <std::vector<std::valarray<double>>> kernals; // To store pretrained kernals
// Loading model from saved file format
size_t total_layers = 0;
in_file >> total_layers;
for(size_t i = 0; i < total_layers; i++) {
int neurons = 0;
std::string activation;
size_t shape_a = 0, shape_b = 0;
std::vector<std::valarray<double>> kernal;
in_file >> neurons >> activation >> shape_a >> shape_b;
for(size_t r = 0; r < shape_a; r++) {
std::valarray<double> row(shape_b);
for(size_t c = 0; c < shape_b; c++) {
in_file >> row[c];
}
kernal.push_back(row);
}
config.emplace_back(make_pair(neurons, activation));;
kernals.emplace_back(kernal);
}
std::cout << "INFO: Model loaded successfully" << std::endl;
return NeuralNetwork(config, kernals); // Return instance of NeuralNetwork class
}
/**
* Function to print summary of the network.
*/
void summary () {
// Printing Summary
std::cout << "===============================================================" << std::endl;
std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
std::cout << "===============================================================" << std::endl;
for(size_t i = 1; i <= layers.size(); i++) { // For every layer
std::cout << i << ")";
std::cout << " Neurons : " << layers[i - 1].neurons; // number of neurons
std::cout << ", Activation : " << layers[i - 1].activation; // activation
std::cout << ", Kernal Shape : " << get_shape(layers[i - 1].kernal); // kernal shape
std::cout << std::endl;
}
std::cout << "===============================================================" << std::endl;
return;
}
};
} // namespace neural_network
} // namespace machine_learning
/**
* Function to test neural network
* @returns none
*/
static void test() {
// Creating network with 3 layers for "iris.csv"
machine_learning::neural_network::NeuralNetwork myNN =
machine_learning::neural_network::NeuralNetwork({
{4, "none"}, // First layer with 3 neurons and "none" as activation
{6, "relu"}, // Second layer with 6 neurons and "relu" as activation
{3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as activation
});
// Printing summary of model
myNN.summary();
// Training Model
myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
// Testing predictions of model
assert(machine_learning::argmax(myNN.single_predict({{5,3.4,1.6,0.4}})) == 0);
assert(machine_learning::argmax(myNN.single_predict({{6.4,2.9,4.3,1.3}})) == 1);
assert(machine_learning::argmax(myNN.single_predict({{6.2,3.4,5.4,2.3}})) == 2);
return;
}
/** Driver Code */
int main() {
// Testing
test();
return 0;
}

View File

@ -0,0 +1,484 @@
/**
* @file vector_ops.hpp
* @author [Deep Raval](https://github.com/imdeep2905)
*
* @brief Various functions for vectors associated with [NeuralNetwork (aka Multilayer Perceptron)]
* (https://en.wikipedia.org/wiki/Multilayer_perceptron).
*
*/
#ifndef VECTOR_OPS_FOR_NN
#define VECTOR_OPS_FOR_NN
#include <iostream>
#include <algorithm>
#include <vector>
#include <valarray>
#include <chrono>
#include <random>
/**
* @namespace machine_learning
* @brief Machine Learning algorithms
*/
namespace machine_learning {
/**
* Overloaded operator "<<" to print 2D vector
* @tparam T typename of the vector
* @param out std::ostream to output
* @param A 2D vector to be printed
*/
template <typename T>
std::ostream &operator<<(std::ostream &out,
std::vector<std::valarray<T>> const &A) {
// Setting output precision to 4 in case of floating point numbers
out.precision(4);
for(const auto &a : A) { // For each row in A
for(const auto &x : a) { // For each element in row
std::cerr << x << ' '; // print element
}
std::cerr << std::endl;
}
return out;
}
/**
* Overloaded operator "<<" to print a pair
* @tparam T typename of the pair
* @param out std::ostream to output
* @param A Pair to be printed
*/
template <typename T>
std::ostream &operator<<(std::ostream &out, const std::pair<T, T> &A) {
// Setting output precision to 4 in case of floating point numbers
out.precision(4);
// printing pair in the form (p, q)
std::cerr << "(" << A.first << ", " << A.second << ")";
return out;
}
/**
* Overloaded operator "<<" to print a 1D vector
* @tparam T typename of the vector
* @param out std::ostream to output
* @param A 1D vector to be printed
*/
template <typename T>
std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
// Setting output precision to 4 in case of floating point numbers
out.precision(4);
for(const auto &a : A) { // For every element in the vector.
std::cerr << a << ' '; // Print element
}
std::cerr << std::endl;
return out;
}
/**
* Function to insert element into 1D vector
* @tparam T typename of the 1D vector and the element
* @param A 1D vector in which element will to be inserted
* @param ele element to be inserted
* @return new resultant vector
*/
template <typename T>
std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
std::valarray <T> B; // New 1D vector to store resultant vector
B.resize(A.size() + 1); // Resizing it accordingly
for(size_t i = 0; i < A.size(); i++) { // For every element in A
B[i] = A[i]; // Copy element in B
}
B[B.size() - 1] = ele; // Inserting new element in last position
return B; // Return resultant vector
}
/**
* Function to remove first element from 1D vector
* @tparam T typename of the vector
* @param A 1D vector from which first element will be removed
* @return new resultant vector
*/
template <typename T>
std::valarray <T> pop_front(const std::valarray<T> &A) {
std::valarray <T> B; // New 1D vector to store resultant vector
B.resize(A.size() - 1); // Resizing it accordingly
for(size_t i = 1; i < A.size(); i ++) { // // For every (except first) element in A
B[i - 1] = A[i]; // Copy element in B with left shifted position
}
return B; // Return resultant vector
}
/**
* Function to remove last element from 1D vector
* @tparam T typename of the vector
* @param A 1D vector from which last element will be removed
* @return new resultant vector
*/
template <typename T>
std::valarray <T> pop_back(const std::valarray<T> &A) {
std::valarray <T> B; // New 1D vector to store resultant vector
B.resize(A.size() - 1); // Resizing it accordingly
for(size_t i = 0; i < A.size() - 1; i ++) { // For every (except last) element in A
B[i] = A[i]; // Copy element in B
}
return B; // Return resultant vector
}
/**
* Function to equally shuffle two 3D vectors (used for shuffling training data)
* @tparam T typename of the vector
* @param A First 3D vector
* @param B Second 3D vector
*/
template <typename T>
void equal_shuffle(std::vector < std::vector <std::valarray<T>> > &A,
std::vector < std::vector <std::valarray<T>> > &B) {
// If two vectors have different sizes
if(A.size() != B.size())
{
std::cerr << "ERROR : Can not equally shuffle two vectors with different sizes: ";
std::cerr << A.size() << " and " << B.size() << std::endl;
std::exit(EXIT_FAILURE);
}
for(size_t i = 0; i < A.size(); i++) { // For every element in A and B
// Genrating random index < size of A and B
std::srand(std::chrono::system_clock::now().time_since_epoch().count());
size_t random_index = std::rand() % A.size();
// Swap elements in both A and B with same random index
std::swap(A[i], A[random_index]);
std::swap(B[i], B[random_index]);
}
return;
}
/**
* Function to initialize given 2D vector using uniform random initialization
* @tparam T typename of the vector
* @param A 2D vector to be initialized
* @param shape required shape
* @param low lower limit on value
* @param high upper limit on value
*/
template <typename T>
void uniform_random_initialization(std::vector<std::valarray<T>> &A,
const std::pair<size_t, size_t> &shape,
const T &low,
const T &high) {
A.clear(); // Making A empty
// Uniform distribution in range [low, high]
std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
std::uniform_real_distribution <T> distribution(low, high);
for(size_t i = 0; i < shape.first; i++) { // For every row
std::valarray <T> row; // Making empty row which will be inserted in vector
row.resize(shape.second);
for(auto &r : row) { // For every element in row
r = distribution(generator); // copy random number
}
A.push_back(row); // Insert new row in vector
}
return;
}
/**
* Function to Intialize 2D vector as unit matrix
* @tparam T typename of the vector
* @param A 2D vector to be initialized
* @param shape required shape
*/
template <typename T>
void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
const std::pair<size_t, size_t> &shape
) {
A.clear(); // Making A empty
for(size_t i = 0; i < shape.first; i++) {
std::valarray <T> row; // Making empty row which will be inserted in vector
row.resize(shape.second);
row[i] = T(1); // Insert 1 at ith position
A.push_back(row); // Insert new row in vector
}
return;
}
/**
* Function to Intialize 2D vector as zeroes
* @tparam T typename of the vector
* @param A 2D vector to be initialized
* @param shape required shape
*/
template <typename T>
void zeroes_initialization(std::vector<std::valarray<T>> &A,
const std::pair<size_t, size_t> &shape
) {
A.clear(); // Making A empty
for(size_t i = 0; i < shape.first; i++) {
std::valarray <T> row; // Making empty row which will be inserted in vector
row.resize(shape.second); // By default all elements are zero
A.push_back(row); // Insert new row in vector
}
return;
}
/**
* Function to get sum of all elements in 2D vector
* @tparam T typename of the vector
* @param A 2D vector for which sum is required
* @return returns sum of all elements of 2D vector
*/
template <typename T>
T sum(const std::vector<std::valarray<T>> &A) {
T cur_sum = 0; // Initially sum is zero
for(const auto &a : A) { // For every row in A
cur_sum += a.sum(); // Add sum of that row to current sum
}
return cur_sum; // Return sum
}
/**
* Function to get shape of given 2D vector
* @tparam T typename of the vector
* @param A 2D vector for which shape is required
* @return shape as pair
*/
template <typename T>
std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
const size_t sub_size = (*A.begin()).size();
for(const auto &a : A) {
// If supplied vector don't have same shape in all rows
if(a.size() != sub_size) {
std::cerr << "ERROR: (get_shape) Supplied vector is not 2D Matrix" << std::endl;
std::exit(EXIT_FAILURE);
}
}
return std::make_pair(A.size(), sub_size); // Return shape as pair
}
/**
* Function to scale given 3D vector using min-max scaler
* @tparam T typename of the vector
* @param A 3D vector which will be scaled
* @param low new minimum value
* @param high new maximum value
* @return new scaled 3D vector
*/
template <typename T>
std::vector<std::vector<std::valarray<T>>>
minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low, const T &high) {
std::vector<std::vector<std::valarray<T>>> B = A; // Copying into new vector B
const auto shape = get_shape(B[0]); // Storing shape of B's every element
// As this function is used for scaling training data vector should be of shape (1, X)
if(shape.first != 1) {
std::cerr << "ERROR: (MinMax Scaling) Supplied vector is not supported for minmax scaling, shape: ";
std::cerr << shape << std::endl;
std::exit(EXIT_FAILURE);
}
for(size_t i = 0; i < shape.second; i++) {
T min = B[0][0][i], max = B[0][0][i];
for(size_t j = 0; j < B.size(); j++) {
// Updating minimum and maximum values
min = std::min(min, B[j][0][i]);
max = std::max(max, B[j][0][i]);
}
for(size_t j = 0; j < B.size(); j++) {
// Applying min-max scaler formula
B[j][0][i] = ((B[j][0][i] - min) / (max - min)) * (high - low) + low;
}
}
return B; // Return new resultant 3D vector
}
/**
* Function to get index of maximum element in 2D vector
* @tparam T typename of the vector
* @param A 2D vector for which maximum index is required
* @return index of maximum element
*/
template <typename T>
size_t argmax(const std::vector<std::valarray<T>> &A) {
const auto shape = get_shape(A);
// As this function is used on predicted (or target) vector, shape should be (1, X)
if(shape.first != 1) {
std::cerr << "ERROR: (argmax) Supplied vector is ineligible for argmax" << std::endl;
std::exit(EXIT_FAILURE);
}
// Return distance of max element from first element (i.e. index)
return std::distance(std::begin(A[0]), std::max_element(std::begin(A[0]), std::end(A[0])));
}
/**
* Function which applys supplied function to every element of 2D vector
* @tparam T typename of the vector
* @param A 2D vector on which function will be applied
* @param func Function to be applied
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T>> apply_function(const std::vector <std::valarray <T>> &A,
T (*func) (const T &)) {
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
for(auto &b : B) { // For every row in vector
b = b.apply(func); // Apply function to that row
}
return B; // Return new resultant 2D vector
}
/**
* Overloaded operator "*" to multiply given 2D vector with scaler
* @tparam T typename of both vector and the scaler
* @param A 2D vector to which scaler will be multiplied
* @param val Scaler value which will be multiplied
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T> > operator * (const std::vector<std::valarray<T>> &A, const T& val) {
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
for(auto &b : B) { // For every row in vector
b = b * val; // Multiply row with scaler
}
return B; // Return new resultant 2D vector
}
/**
* Overloaded operator "/" to divide given 2D vector with scaler
* @tparam T typename of the vector and the scaler
* @param A 2D vector to which scaler will be divided
* @param val Scaler value which will be divided
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>> &A, const T& val) {
std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
for(auto &b : B) { // For every row in vector
b = b / val; // Divide row with scaler
}
return B; // Return new resultant 2D vector
}
/**
* Function to get transpose of 2D vector
* @tparam T typename of the vector
* @param A 2D vector which will be transposed
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T>> transpose(const std::vector<std::valarray<T>> &A) {
const auto shape = get_shape(A); // Current shape of vector
std::vector <std::valarray <T> > B; // New vector to store result
// Storing transpose values of A in B
for(size_t j = 0; j < shape.second; j++) {
std::valarray <T> row;
row.resize(shape.first);
for(size_t i = 0; i < shape.first; i++) {
row[i] = A[i][j];
}
B.push_back(row);
}
return B; // Return new resultant 2D vector
}
/**
* Overloaded operator "+" to add two 2D vectors
* @tparam T typename of the vector
* @param A First 2D vector
* @param B Second 2D vector
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T> > operator + (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
const auto shape_a = get_shape(A);
const auto shape_b = get_shape(B);
// If vectors don't have equal shape
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
std::cerr << "ERROR: (vector addition) Supplied vectors have different shapes ";
std::cerr << shape_a << " and " << shape_b << std::endl;
std::exit(EXIT_FAILURE);
}
std::vector<std::valarray <T>> C;
for(size_t i = 0; i < A.size(); i++) { // For every row
C.push_back(A[i] + B[i]); // Elementwise addition
}
return C; // Return new resultant 2D vector
}
/**
* Overloaded operator "-" to add subtract 2D vectors
* @tparam T typename of the vector
* @param A First 2D vector
* @param B Second 2D vector
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T>> operator - (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
const auto shape_a = get_shape(A);
const auto shape_b = get_shape(B);
// If vectors don't have equal shape
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
std::cerr << "ERROR: (vector subtraction) Supplied vectors have different shapes ";
std::cerr << shape_a << " and " << shape_b << std::endl;
std::exit(EXIT_FAILURE);
}
std::vector<std::valarray<T>> C; // Vector to store result
for(size_t i = 0; i < A.size(); i++) { // For every row
C.push_back(A[i] - B[i]); // Elementwise substraction
}
return C; // Return new resultant 2D vector
}
/**
* Function to multiply two 2D vectors
* @tparam T typename of the vector
* @param A First 2D vector
* @param B Second 2D vector
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
const auto shape_a = get_shape(A);
const auto shape_b = get_shape(B);
// If vectors are not eligible for multiplication
if(shape_a.second != shape_b.first ) {
std::cerr << "ERROR: (multiply) Supplied vectors are not eligible for multiplication ";
std::cerr << shape_a << " and " << shape_b << std::endl;
std::exit(EXIT_FAILURE);
}
std::vector<std::valarray<T>> C; // Vector to store result
// Normal matrix multiplication
for (size_t i = 0; i < shape_a.first; i++) {
std::valarray<T> row;
row.resize(shape_b.second);
for(size_t j = 0; j < shape_b.second; j++) {
for(size_t k = 0; k < shape_a.second; k++) {
row[j] += A[i][k] * B[k][j];
}
}
C.push_back(row);
}
return C; // Return new resultant 2D vector
}
/**
* Function to get hadamard product of two 2D vectors
* @tparam T typename of the vector
* @param A First 2D vector
* @param B Second 2D vector
* @return new resultant vector
*/
template <typename T>
std::vector <std::valarray <T>> hadamard_product(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
const auto shape_a = get_shape(A);
const auto shape_b = get_shape(B);
// If vectors are not eligible for hadamard product
if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
std::cerr << "ERROR: (hadamard_product) Supplied vectors have different shapes ";
std::cerr << shape_a << " and " << shape_b << std::endl;
std::exit(EXIT_FAILURE);
}
std::vector<std::valarray<T>> C; // Vector to store result
for(size_t i = 0; i < A.size(); i++) {
C.push_back(A[i] * B[i]); // Elementwise multiplication
}
return C; // Return new resultant 2D vector
}
} // namespace machine_learning
#endif