feat: Add Neural Network (Multilayer Perceptron) (#1025)

* Completed NN * Made changes * Added return in identity function * Added <random> and fixed namespace naming * clang-tidy changes * Update machine_learning/neural_network.cpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/neural_network.cpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/neural_network.cpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/vector_ops.hpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/vector_ops.hpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/neural_network.cpp Co-authored-by: David Leal <halfpacho@gmail.com> * Update machine_learning/neural_network.cpp Co-authored-by: David Leal <halfpacho@gmail.com> * added std::cerr and changed argmax's namespace * Done suggested changes * Fixed a comment * clang-tidy fixes Co-authored-by: David Leal <halfpacho@gmail.com>
2023-10-11 13:05:55 +08:00 · 2020-08-20 00:55:32 +05:30 · 2020-08-20 00:55:32 +05:30 · 4a34bec125
commit 4a34bec125
parent dfe5bd7638
3 changed files with 1426 additions and 0 deletions
--- a/machine_learning/iris.csv
+++ b/machine_learning/iris.csv
@ -0,0 +1,152 @@
 https://archive.ics.uci.edu/ml/datasets/iris
 sepal length in cm,sepal width in cm,petal length in cm,petal width in cm
 5.1,3.5,1.4,.2,0
 4.9,3,1.4,.2,0
 4.7,3.2,1.3,.2,0
 4.6,3.1,1.5,.2,0
 5,3.6,1.4,.2,0
 5.4,3.9,1.7,.4,0
 4.6,3.4,1.4,.3,0
 5,3.4,1.5,.2,0
 4.4,2.9,1.4,.2,0
 4.9,3.1,1.5,.1,0
 5.4,3.7,1.5,.2,0
 4.8,3.4,1.6,.2,0
 4.8,3,1.4,.1,0
 4.3,3,1.1,.1,0
 5.8,4,1.2,.2,0
 5.7,4.4,1.5,.4,0
 5.4,3.9,1.3,.4,0
 5.1,3.5,1.4,.3,0
 5.7,3.8,1.7,.3,0
 5.1,3.8,1.5,.3,0
 5.4,3.4,1.7,.2,0
 5.1,3.7,1.5,.4,0
 4.6,3.6,1,.2,0
 5.1,3.3,1.7,.5,0
 4.8,3.4,1.9,.2,0
 5,3,1.6,.2,0
 5,3.4,1.6,.4,0
 5.2,3.5,1.5,.2,0
 5.2,3.4,1.4,.2,0
 4.7,3.2,1.6,.2,0
 4.8,3.1,1.6,.2,0
 5.4,3.4,1.5,.4,0
 5.2,4.1,1.5,.1,0
 5.5,4.2,1.4,.2,0
 4.9,3.1,1.5,.2,0
 5,3.2,1.2,.2,0
 5.5,3.5,1.3,.2,0
 4.9,3.6,1.4,.1,0
 4.4,3,1.3,.2,0
 5.1,3.4,1.5,.2,0
 5,3.5,1.3,.3,0
 4.5,2.3,1.3,.3,0
 4.4,3.2,1.3,.2,0
 5,3.5,1.6,.6,0
 5.1,3.8,1.9,.4,0
 4.8,3,1.4,.3,0
 5.1,3.8,1.6,.2,0
 4.6,3.2,1.4,.2,0
 5.3,3.7,1.5,.2,0
 5,3.3,1.4,.2,0
 7,3.2,4.7,1.4,1
 6.4,3.2,4.5,1.5,1
 6.9,3.1,4.9,1.5,1
 5.5,2.3,4,1.3,1
 6.5,2.8,4.6,1.5,1
 5.7,2.8,4.5,1.3,1
 6.3,3.3,4.7,1.6,1
 4.9,2.4,3.3,1,1
 6.6,2.9,4.6,1.3,1
 5.2,2.7,3.9,1.4,1
 5,2,3.5,1,1
 5.9,3,4.2,1.5,1
 6,2.2,4,1,1
 6.1,2.9,4.7,1.4,1
 5.6,2.9,3.6,1.3,1
 6.7,3.1,4.4,1.4,1
 5.6,3,4.5,1.5,1
 5.8,2.7,4.1,1,1
 6.2,2.2,4.5,1.5,1
 5.6,2.5,3.9,1.1,1
 5.9,3.2,4.8,1.8,1
 6.1,2.8,4,1.3,1
 6.3,2.5,4.9,1.5,1
 6.1,2.8,4.7,1.2,1
 6.4,2.9,4.3,1.3,1
 6.6,3,4.4,1.4,1
 6.8,2.8,4.8,1.4,1
 6.7,3,5,1.7,1
 6,2.9,4.5,1.5,1
 5.7,2.6,3.5,1,1
 5.5,2.4,3.8,1.1,1
 5.5,2.4,3.7,1,1
 5.8,2.7,3.9,1.2,1
 6,2.7,5.1,1.6,1
 5.4,3,4.5,1.5,1
 6,3.4,4.5,1.6,1
 6.7,3.1,4.7,1.5,1
 6.3,2.3,4.4,1.3,1
 5.6,3,4.1,1.3,1
 5.5,2.5,4,1.3,1
 5.5,2.6,4.4,1.2,1
 6.1,3,4.6,1.4,1
 5.8,2.6,4,1.2,1
 5,2.3,3.3,1,1
 5.6,2.7,4.2,1.3,1
 5.7,3,4.2,1.2,1
 5.7,2.9,4.2,1.3,1
 6.2,2.9,4.3,1.3,1
 5.1,2.5,3,1.1,1
 5.7,2.8,4.1,1.3,1
 6.3,3.3,6,2.5,2
 5.8,2.7,5.1,1.9,2
 7.1,3,5.9,2.1,2
 6.3,2.9,5.6,1.8,2
 6.5,3,5.8,2.2,2
 7.6,3,6.6,2.1,2
 4.9,2.5,4.5,1.7,2
 7.3,2.9,6.3,1.8,2
 6.7,2.5,5.8,1.8,2
 7.2,3.6,6.1,2.5,2
 6.5,3.2,5.1,2,2
 6.4,2.7,5.3,1.9,2
 6.8,3,5.5,2.1,2
 5.7,2.5,5,2,2
 5.8,2.8,5.1,2.4,2
 6.4,3.2,5.3,2.3,2
 6.5,3,5.5,1.8,2
 7.7,3.8,6.7,2.2,2
 7.7,2.6,6.9,2.3,2
 6,2.2,5,1.5,2
 6.9,3.2,5.7,2.3,2
 5.6,2.8,4.9,2,2
 7.7,2.8,6.7,2,2
 6.3,2.7,4.9,1.8,2
 6.7,3.3,5.7,2.1,2
 7.2,3.2,6,1.8,2
 6.2,2.8,4.8,1.8,2
 6.1,3,4.9,1.8,2
 6.4,2.8,5.6,2.1,2
 7.2,3,5.8,1.6,2
 7.4,2.8,6.1,1.9,2
 7.9,3.8,6.4,2,2
 6.4,2.8,5.6,2.2,2
 6.3,2.8,5.1,1.5,2
 6.1,2.6,5.6,1.4,2
 7.7,3,6.1,2.3,2
 6.3,3.4,5.6,2.4,2
 6.4,3.1,5.5,1.8,2
 6,3,4.8,1.8,2
 6.9,3.1,5.4,2.1,2
 6.7,3.1,5.6,2.4,2
 6.9,3.1,5.1,2.3,2
 5.8,2.7,5.1,1.9,2
 6.8,3.2,5.9,2.3,2
 6.7,3.3,5.7,2.5,2
 6.7,3,5.2,2.3,2
 6.3,2.5,5,1.9,2
 6.5,3,5.2,2,2
 6.2,3.4,5.4,2.3,2
 5.9,3,5.1,1.8,2
--- a/machine_learning/neural_network.cpp
+++ b/machine_learning/neural_network.cpp
@ -0,0 +1,790 @@
 /**
 * @file 
 * @author [Deep Raval](https://github.com/imdeep2905)
 * 
 * @brief Implementation of [Multilayer Perceptron] (https://en.wikipedia.org/wiki/Multilayer_perceptron). 
 * 
 * @details
 * A multilayer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The term MLP is used ambiguously, 
 * sometimes loosely to any feedforward ANN, sometimes strictly to refer to networks composed of multiple layers of perceptrons 
 * (with threshold activation). Multilayer perceptrons are sometimes colloquially referred to as "vanilla" neural networks, 
 * especially when they have a single hidden layer.
 * 
 * An MLP consists of at least three layers of nodes: an input layer, a hidden layer and an output layer. Except for the 
 * input nodes, each node is a neuron that uses a nonlinear activation function. MLP utilizes a supervised learning technique 
 * called backpropagation for training. Its multiple layers and non-linear activation distinguish MLP from a linear 
 * perceptron. It can distinguish data that is not linearly separable.
 * 
 * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training algorithm.
 * 
 * \note This implementation uses mini-batch gradient descent as optimizer and MSE as loss function. Bias is also not included.
 */
 #include "vector_ops.hpp" // Custom header file for vector operations
 #include <iostream>
 #include <valarray>
 #include <vector>
 #include <cmath>
 #include <algorithm>
 #include <chrono>
 #include <string>
 #include <fstream>
 #include <sstream>
 #include <cassert>
 /** \namespace machine_learning
 * \brief Machine learning algorithms
 */
 namespace machine_learning {
    /** \namespace neural_network
     * \brief Neural Network or Multilayer Perceptron
     */   
    namespace neural_network {
        /** \namespace activations
         * \brief Various activation functions used in Neural network
         */         
        namespace activations {
            /**
             * Sigmoid function
             * @param X Value 
             * @return Returns sigmoid(x)
             */  
            double sigmoid (const double &x) {
                return 1.0 / (1.0 + std::exp(-x));
            }
            /**
             * Derivative of sigmoid function
             * @param X Value 
             * @return Returns derivative of sigmoid(x)
             */  
            double dsigmoid (const double &x) {
                return x * (1 - x);
            }
            /**
             * Relu function
             * @param X Value 
             * @returns relu(x)
             */  
            double relu (const double &x) {
                return std::max(0.0, x);
            }
            /**
             * Derivative of relu function
             * @param X Value 
             * @returns derivative of relu(x)
             */  
            double drelu (const double &x) {
                return x >= 0.0 ? 1.0 : 0.0;
            }
            /**
             * Tanh function
             * @param X Value 
             * @return Returns tanh(x)
             */  
            double tanh (const double &x) {
                return 2 / (1 + std::exp(-2 * x)) - 1;
            }
            /**
             * Derivative of Sigmoid function
             * @param X Value 
             * @return Returns derivative of tanh(x)
             */  
            double dtanh (const double &x) {
                return 1 - x * x;
            }
        } // namespace activations
        /** \namespace util_functions
         * \brief Various utility functions used in Neural network
         */      
        namespace util_functions {
            /**
             * Square function
             * @param X Value 
             * @return Returns x * x
             */  
            double square(const double &x) {
                return x * x;
            }
            /**
             * Identity function
             * @param X Value 
             * @return Returns x 
             */  
            double identity_function(const double &x) {
                return x;
            }
        } // namespace util_functions
        /** \namespace layers
         * \brief This namespace contains layers used 
         * in MLP.
         */   
        namespace layers {
            /**
             * neural_network::layers::DenseLayer class is used to store all necessary information about 
             * the layers (i.e. neurons, activation and kernal). This class 
             * is used by NeuralNetwork class to store layers.
             * 
             */
            class DenseLayer {
                public:
                    // To store activation function and it's derivative
                    double (*activation_function)(const double &); 
                    double (*dactivation_function)(const double &);
                    int neurons; // To store number of neurons (used in summary)
                    std::string activation; // To store activation name (used in summary)
                    std::vector <std::valarray <double>> kernal; // To store kernal (aka weights)
                    /**
                     * Constructor for neural_network::layers::DenseLayer class
                     * @param neurons number of neurons
                     * @param activation activation function for layer
                     * @param kernal_shape shape of kernal 
                     * @param random_kernal flag for whether to intialize kernal randomly
                     */
                    DenseLayer(const int &neurons, 
                          const std::string &activation,
                          const std::pair<size_t, size_t> &kernal_shape,
                          const bool &random_kernal) {
                        // Choosing activation (and it's derivative)
                        if (activation == "sigmoid") {
                            activation_function = neural_network::activations::sigmoid;
                            dactivation_function = neural_network::activations::sigmoid;
                        }
                        else if (activation == "relu") {
                            activation_function = neural_network::activations::relu;
                            dactivation_function = neural_network::activations::drelu;
                        }
                        else if (activation == "tanh") {
                            activation_function = neural_network::activations::tanh;
                            dactivation_function = neural_network::activations::dtanh;
                        }
                        else if (activation == "none") {
                            // Set identity function in casse of none is supplied
                            activation_function = neural_network::util_functions::identity_function;
                            dactivation_function = neural_network::util_functions::identity_function;
                        }
                        else {
                            // If supplied activation is invalid
                            std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
                            std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
                            std::cerr << activation << std::endl;
                            std::exit(EXIT_FAILURE);
                        }
                        this -> activation = activation; // Setting activation name
                        this -> neurons = neurons; // Setting number of neurons
                        // Initialize kernal according to flag
                        if(random_kernal) {
                            uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
                        }
                        else {
                            unit_matrix_initialization(kernal, kernal_shape);
                        }
                    }
                    /**
                     * Constructor for neural_network::layers::DenseLayer class
                     * @param neurons number of neurons
                     * @param activation activation function for layer
                     * @param kernal values of kernal (useful in loading model)
                     */
                    DenseLayer (const int &neurons, 
                           const std::string &activation, 
                           const std::vector <std::valarray<double>> &kernal) {
                        // Choosing activation (and it's derivative)
                        if (activation == "sigmoid") {
                            activation_function = neural_network::activations::sigmoid;
                            dactivation_function = neural_network::activations::sigmoid;
                        }
                        else if (activation == "relu") {
                            activation_function = neural_network::activations::relu;
                            dactivation_function = neural_network::activations::drelu;
                        }
                        else if (activation == "tanh") {
                            activation_function = neural_network::activations::tanh;
                            dactivation_function = neural_network::activations::dtanh;
                        }
                        else if (activation == "none") {
                            // Set identity function in casse of none is supplied
                            activation_function = neural_network::util_functions::identity_function;
                            dactivation_function = neural_network::util_functions::identity_function;
                        }
                        else {
                            // If supplied activation is invalid
                            std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
                            std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
                            std::cerr << activation << std::endl;
                            std::exit(EXIT_FAILURE);
                        }
                        this -> activation = activation; // Setting activation name
                        this -> neurons = neurons; // Setting number of neurons
                        this -> kernal = kernal;  // Setting supplied kernal values                 
                    }
                /**
                 * Copy Constructor for class DenseLayer. 
                 * 
                 * @param model instance of class to be copied.
                 */
                DenseLayer(const DenseLayer &layer) = default;   
                /**
                 * Destructor for class DenseLayer. 
                 */                   
                ~DenseLayer() = default;
                /**
                 * Copy assignment operator for class DenseLayer
                 */
                DenseLayer& operator = (const DenseLayer &layer) = default;
                /**
                 * Move constructor for class DenseLayer
                 */
                DenseLayer(DenseLayer &&) = default;
                /**
                 * Move assignment operator for class DenseLayer
                 */
                DenseLayer& operator = (DenseLayer &&) = default;
            };
        } // namespace layers
        /**
         * NeuralNetwork class is implements MLP. This class is
         * used by actual user to create and train networks.
         * 
         */
        class NeuralNetwork {
            private:
                std::vector <neural_network::layers::DenseLayer> layers; // To store layers
                /**
                 * Private Constructor for class NeuralNetwork. This constructor
                 * is used internally to load model.
                 * @param config vector containing pair (neurons, activation)
                 * @param kernals vector containing all pretrained kernals
                 */                
                NeuralNetwork(const std::vector <std::pair<int, std::string>> &config,
                              const std::vector <std::vector<std::valarray<double>>> &kernals) {
                    // First layer should not have activation
                    if(config.begin() -> second != "none") {
                        std::cerr << "ERROR: First layer can't have activation other than none";
                        std::cerr << std::endl;
                        std::exit(EXIT_FAILURE);
                    }
                    // Network should have atleast two layers
                    if(config.size() <= 1) {
                        std::cerr << "ERROR: Invalid size of network, ";
                        std::cerr << "Atleast two layers are required";
                        std::exit(EXIT_FAILURE);
                    }
                    // Reconstructing all pretrained layers
                    for(size_t i = 0; i < config.size(); i++) {
                        layers.emplace_back(neural_network::layers::DenseLayer(config[i].first, 
                                               config[i].second,
                                               kernals[i])); 
                    } 
                    std::cout << "INFO: Network constructed successfully" << std::endl;                  
                }
                /**
                 * Private function to get detailed predictions (i.e.
                 * activated neuron values). This function is used in
                 * backpropagation, single predict and batch predict.
                 * @param X input vector
                 */
                std::vector<std::vector<std::valarray <double>>> 
                __detailed_single_prediction (const std::vector<std::valarray <double>> &X) {
                    std::vector<std::vector < std::valarray <double> >> details;
                    std::vector < std::valarray <double> > current_pass = X;
                    details.emplace_back(X);
                    for(const auto &l : layers) {
                        current_pass = multiply(current_pass, l.kernal);
                        current_pass = apply_function(current_pass, l.activation_function);
                        details.emplace_back(current_pass);
                    }
                    return details;
                }
            public:
                /**
                 * Default Constructor for class NeuralNetwork. This constructor
                 * is used to create empty variable of type NeuralNetwork class.
                 */   
                NeuralNetwork() = default;
                /**
                 * Constructor for class NeuralNetwork. This constructor
                 * is used by user.
                 * @param config vector containing pair (neurons, activation)
                 */   
                explicit NeuralNetwork(const std::vector <std::pair<int, std::string>> &config) {
                    // First layer should not have activation
                    if(config.begin() -> second != "none") {
                        std::cerr << "ERROR: First layer can't have activation other than none";
                        std::cerr << std::endl;
                        std::exit(EXIT_FAILURE);
                    }
                    // Network should have atleast two layers
                    if(config.size() <= 1) {
                        std::cerr << "ERROR: Invalid size of network, ";
                        std::cerr << "Atleast two layers are required";
                        std::exit(EXIT_FAILURE);
                    }
                    // Separately creating first layer so it can have unit matrix 
                    // as kernal.
                    layers.push_back(neural_network::layers::DenseLayer(config[0].first, 
                                           config[0].second, 
                                           {config[0].first, config[0].first},
                                           false));
                    // Creating remaining layers
                    for(size_t i = 1; i < config.size(); i++) {
                        layers.push_back(neural_network::layers::DenseLayer(config[i].first, 
                                               config[i].second,
                                               {config[i - 1].first, config[i].first},
                                               true));
                    }
                    std::cout << "INFO: Network constructed successfully" << std::endl;
                }
                /**
                 * Copy Constructor for class NeuralNetwork. 
                 * 
                 * @param model instance of class to be copied.
                 */
                NeuralNetwork(const NeuralNetwork &model) = default;   
                /**
                 * Destructor for class NeuralNetwork. 
                 */                   
                ~NeuralNetwork() = default;
                /**
                 * Copy assignment operator for class NeuralNetwork
                 */
                NeuralNetwork& operator = (const NeuralNetwork &model) = default;
                /**
                 * Move constructor for class NeuralNetwork
                 */
                NeuralNetwork(NeuralNetwork &&) = default;
                /**
                 * Move assignment operator for class NeuralNetwork
                 */
                NeuralNetwork& operator = (NeuralNetwork &&) = default;
                /**
                 * Function to get X and Y from csv file (where X = data, Y = label)
                 * @param file_name csv file name
                 * @param last_label flag for whether label is in first or last column
                 * @param normalize flag for whether to normalize data 
                 * @param slip_lines number of lines to skip
                 * @return returns pair of X and Y
                 */  
                std::pair<std::vector<std::vector<std::valarray<double>>>, std::vector<std::vector<std::valarray<double>>>>
                get_XY_from_csv(const std::string &file_name, 
                                     const bool &last_label, 
                                     const bool &normalize,
                                     const int &slip_lines = 1) {
                    std::ifstream in_file; // Ifstream to read file
                    in_file.open(file_name.c_str(), std::ios::in); // Open file
                    std::vector <std::vector<std::valarray<double>>> X, Y; // To store X and Y
                    std::string line; // To store each line
                    // Skip lines
                    for(int i = 0; i < slip_lines; i ++) {
                        std::getline(in_file, line, '\n'); // Ignore line
                    }
                    // While file has information
                    while(!in_file.eof() && std::getline(in_file, line, '\n'))
                    {
                        std::valarray <double> x_data, y_data; // To store single sample and label
                        std::stringstream ss(line); // Constructing stringstream from line
                        std::string token; // To store each token in line (seprated by ',')
                        while(std::getline(ss, token, ',')) { // For each token
                            // Insert numerical value of token in x_data
                            x_data = insert_element(x_data, std::stod(token));
                        } 
                        // If label is in last column
                        if(last_label) {
                            y_data.resize(this -> layers.back().neurons);
                            // If task is classification
                            if(y_data.size() > 1) {
                                y_data[x_data[x_data.size() - 1]] = 1;
                            }
                            // If task is regrssion (of single value)
                            else {
                                y_data[0] = x_data[x_data.size() - 1];
                            } 
                            x_data = pop_back(x_data); // Remove label from x_data
                        }
                        else {
                            y_data.resize(this -> layers.back().neurons);
                            // If task is classification
                            if(y_data.size() > 1) {
                                y_data[x_data[x_data.size() - 1]] = 1;
                            }
                            // If task is regrssion (of single value)
                            else {
                                y_data[0] = x_data[x_data.size() - 1];
                            } 
                            x_data = pop_front(x_data); // Remove label from x_data
                        }
                        // Push collected X_data and y_data in X and Y
                        X.push_back({x_data});
                        Y.push_back({y_data});
                    }
                    in_file.close();
                    // Normalize training data if flag is set
                    if(normalize) {
                        // Scale data between 0 and 1 using min-max scaler
                        X = minmax_scaler(X, 0.01, 1.0);
                    }
                    return make_pair(X, Y); // Return pair of X and Y
                }
                /**
                 * Function to get prediction of model on single sample.
                 * @param X array of feature vectors
                 * @return returns predictions as vector
                 */  
                std::vector<std::valarray <double>> 
                single_predict (const std::vector<std::valarray <double>> &X) {
                    // Get activations of all layers
                    auto activations = this -> __detailed_single_prediction(X);
                    // Return activations of last layer (actual predicted values)
                    return activations.back();
                }
                /**
                 * Function to get prediction of model on batch
                 * @param X array of feature vectors
                 * @return returns predicted values as vector
                 */  
                std::vector < std::vector <std::valarray<double>>>
                batch_predict (const std::vector <std::vector <std::valarray <double>>> &X) {
                    // Store predicted values
                    std::vector < std::vector <std::valarray<double>>> predicted_batch(X.size());
                    for(size_t i = 0; i < X.size(); i++) { // For every sample
                        // Push predicted values
                        predicted_batch[i] = this -> single_predict(X[i]);
                    }
                    return predicted_batch; // Return predicted values
                }
                /**
                 * Function to fit model on supplied data
                 * @param X array of feature vectors
                 * @param Y array of target values
                 * @param epochs number of epochs (default = 100)
                 * @param learning_rate learning rate (default = 0.01)
                 * @param batch_size batch size for gradient descent (default = 32)
                 * @param shuffle flag for whether to shuffle data (default = true)
                 */  
                void fit(const std::vector < std::vector <std::valarray<double>>>  &X_, 
                         const std::vector < std::vector <std::valarray<double>>>  &Y_, 
                         const int &epochs = 100, 
                         const double &learning_rate = 0.01,
                         const size_t &batch_size = 32,
                         const bool &shuffle = true) {
                    std::vector < std::vector <std::valarray<double>>> X = X_, Y = Y_;
                    // Both label and input data should have same size
                    if (X.size() != Y.size()) {
                        std::cerr << "ERROR : X and Y in fit have different sizes" << std::endl;
                        std::exit(EXIT_FAILURE);
                    }
                    std::cout << "INFO: Training Started" << std::endl;
                    for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch
                        // Shuffle X and Y if flag is set
                        if(shuffle) {
                            equal_shuffle(X, Y);
                        }
                        auto start = std::chrono::high_resolution_clock::now(); // Start clock
                        double loss = 0, acc = 0; // Intialize performance metrics with zero
                        // For each starting index of batch
                        for(size_t batch_start = 0; batch_start < X.size(); batch_start += batch_size) {
                            for(size_t i = batch_start; i < std::min(X.size(), batch_start + batch_size); i++) {
                                    std::vector <std::valarray<double>> grad, cur_error, predicted;
                                    auto activations = this -> __detailed_single_prediction(X[i]);
                                    // Gradients vector to store gradients for all layers
                                    // They will be averaged and applied to kernal
                                    std::vector<std::vector<std::valarray<double>>> gradients;
                                    gradients.resize(this -> layers.size());
                                    // First intialize gradients to zero
                                    for(size_t i = 0; i < gradients.size(); i++) {
                                        zeroes_initialization(gradients[i], get_shape(this -> layers[i].kernal));
                                    }
                                    predicted = activations.back(); // Predicted vector
                                    cur_error = predicted - Y[i]; // Absoulute error
                                    // Calculating loss with MSE
                                    loss += sum(apply_function(cur_error, neural_network::util_functions::square));
                                    // If prediction is correct
                                    if(argmax(predicted) == argmax(Y[i])) {
                                        acc += 1;
                                    }
                                    // For every layer (except first) starting from last one
                                    for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
                                        // Backpropogating errors
                                        cur_error = hadamard_product(cur_error, 
                                                                    apply_function(activations[j + 1], 
                                                                    this -> layers[j].dactivation_function));
                                        // Calculating gradient for current layer
                                        grad = multiply(transpose(activations[j]), cur_error);
                                        // Change error according to current kernal values
                                        cur_error = multiply(cur_error, transpose(this -> layers[j].kernal));
                                        // Adding gradient values to collection of gradients
                                        gradients[j] = gradients[j] + grad / double(batch_size);
                                    }
                                    // Applying gradients
                                    for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
                                        // Updating kernal (aka weights)
                                        this -> layers[j].kernal = this -> layers[j].kernal -
                                                            gradients[j] * learning_rate;
                                    }
                                }
                        }
                        auto stop = std::chrono::high_resolution_clock::now(); // Stoping the clock
                        // Calculate time taken by epoch
                        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
                        loss /= X.size(); // Averaging loss
                        acc /= X.size(); // Averaging accuracy
                        std::cout.precision(4); // set output precision to 4 
                        // Printing training stats
                        std::cout << "Training: Epoch " << epoch << '/' << epochs; 
                        std::cout << ", Loss: " << loss;
                        std::cout << ", Accuracy: " << acc;
                        std::cout << ", Taken time: " << duration.count() / 1e6 << " seconds";
                        std::cout << std::endl;
                    }
                    return;
                }
                /**
                 * Function to fit model on data stored in csv file
                 * @param file_name csv file name
                 * @param last_label flag for whether label is in first or last column
                 * @param epochs number of epochs
                 * @param learning_rate learning rate 
                 * @param normalize flag for whether to normalize data 
                 * @param slip_lines number of lines to skip
                 * @param batch_size batch size for gradient descent (default = 32)
                 * @param shuffle flag for whether to shuffle data (default = true)
                 */                
                void fit_from_csv (const std::string &file_name, 
                                     const bool &last_label,
                                     const int &epochs, 
                                     const double &learning_rate,
                                     const bool &normalize,
                                     const int &slip_lines = 1,
                                     const size_t &batch_size = 32,
                                     const bool &shuffle = true) {
                    // Getting training data from csv file                    
                    auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
                    // Fit the model on training data
                    this -> fit(data.first, data.second, epochs, learning_rate, batch_size, shuffle);
                    return;
                }
                /**
                 * Function to evaluate model on supplied data
                 * @param X array of feature vectors (input data)
                 * @param Y array of target values (label)
                 */  
                void evaluate(const std::vector< std::vector <std::valarray <double>>> &X, 
                                const std::vector< std::vector <std::valarray <double>>> &Y) {
                    std::cout << "INFO: Evaluation Started" << std::endl;
                    double acc = 0, loss = 0; // intialize performance metrics with zero
                    for(size_t i = 0; i < X.size(); i++) { // For every sample in input
                        // Get predictions
                        std::vector<std::valarray<double>> pred = this -> single_predict(X[i]);
                        // If predicted class is correct 
                        if(argmax(pred) == argmax(Y[i])) {
                            acc += 1; // Increment accuracy
                        }
                        // Calculating loss - Mean Squared Error
                        loss += sum(apply_function((Y[i] - pred), 
                                    neural_network::util_functions::square) * 0.5);
                    }
                    acc /= X.size(); // Averaging accuracy
                    loss /= X.size(); // Averaging loss
                    // Prinitng performance of the model
                    std::cout << "Evaluation: Loss: " << loss;
                    std::cout << ", Accuracy: " << acc << std::endl;
                    return;
                }
                /**
                 * Function to evaluate model on data stored in csv file
                 * @param file_name csv file name
                 * @param last_label flag for whether label is in first or last column
                 * @param normalize flag for whether to normalize data 
                 * @param slip_lines number of lines to skip
                 */                
                void evaluate_from_csv (const std::string &file_name, 
                                     const bool &last_label,
                                     const bool &normalize,
                                     const int &slip_lines = 1) {
                    // Getting training data from csv file
                    auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
                    // Evaluating model
                    this -> evaluate(data.first, data.second);
                    return;
                }                
                /**
                 * Function to save current model.
                 * @param file_name file name to save model (*.model)
                 */
                void save_model (const std::string &_file_name) {
                    std::string file_name = _file_name;
                    // Adding ".model" extension if it is not already there in name
                    if(file_name.find(".model") == file_name.npos) {
                        file_name += ".model";
                    }
                    std::ofstream out_file; // Ofstream to write in file
                    // Open file in out|trunc mode
                    out_file.open(file_name.c_str(), std::ofstream::out | std::ofstream::trunc);
                    /**
                        Format in which model is saved:
                        total_layers
                        neurons(1st neural_network::layers::DenseLayer) activation_name(1st neural_network::layers::DenseLayer)
                        kernal_shape(1st neural_network::layers::DenseLayer)
                        kernal_values
                        .
                        .
                        .
                        neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth neural_network::layers::DenseLayer)
                        kernal_shape(Nth neural_network::layers::DenseLayer)
                        kernal_value
                        For Example, pretrained model with 3 layers:
                        <pre>
                        3
                        4 none
                        4 4
                        1 0 0 0 
                        0 1 0 0 
                        0 0 1 0 
                        0 0 0 1 
                        6 relu
                        4 6
                        -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653 
                        -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202 
                        -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049 
                        -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741 
                        3 sigmoid
                        6 3
                        0.390267 -0.391703 -0.0989607 
                        0.499234 -0.564539 -0.28097 
                        0.553386 -0.153974 -1.92493 
                        -2.01336 -0.0219682 1.44145 
                        1.72853 -0.465264 -0.705373 
                        -0.908409 -0.740547 0.376416 
                        </pre>
                    */
                    // Saving model in the same format
                    out_file << layers.size();
                    out_file << std::endl;
                    for(const auto &layer : this -> layers) {
                        out_file << layer.neurons << ' ' << layer.activation << std::endl;
                        const auto shape = get_shape(layer.kernal);
                        out_file << shape.first << ' ' << shape.second << std::endl;
                        for(const auto &row : layer.kernal) {
                            for(const auto &val : row) {
                                out_file << val << ' ';
                            }
                            out_file << std::endl;
                        }
                    }
                    std::cout << "INFO: Model saved successfully with name : ";
                    std::cout << file_name << std::endl;
                    return;
                }
                /**
                 * Function to load earlier saved model.
                 * @param file_name file from which model will be loaded (*.model)
                 * @return instance of NeuralNetwork class with pretrained weights
                 */
                NeuralNetwork load_model (const std::string &file_name) {
                    std::ifstream in_file; // Ifstream to read file
                    in_file.open(file_name.c_str()); // Openinig file
                    std::vector <std::pair<int, std::string>> config; // To store config
                    std::vector <std::vector<std::valarray<double>>> kernals; // To store pretrained kernals
                    // Loading model from saved file format
                    size_t total_layers = 0; 
                    in_file >> total_layers;
                    for(size_t i = 0; i < total_layers; i++) {
                        int neurons = 0;
                        std::string activation;
                        size_t shape_a = 0, shape_b = 0;
                        std::vector<std::valarray<double>> kernal;
                        in_file >> neurons >> activation >> shape_a >> shape_b;
                        for(size_t r = 0; r < shape_a; r++) {
                            std::valarray<double> row(shape_b);
                            for(size_t c = 0; c < shape_b; c++) {
                                in_file >> row[c];
                            }
                            kernal.push_back(row);
                        }
                        config.emplace_back(make_pair(neurons, activation));;
                        kernals.emplace_back(kernal);
                    }
                    std::cout << "INFO: Model loaded successfully" << std::endl;
                    return NeuralNetwork(config, kernals); // Return instance of NeuralNetwork class
                }
                /**
                 * Function to print summary of the network.
                 */
                void summary () {
                    // Printing Summary 
                    std::cout << "===============================================================" << std::endl;
                    std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
                    std::cout << "===============================================================" << std::endl;
                    for(size_t i = 1; i <= layers.size(); i++) { // For every layer
                        std::cout << i << ")";
                        std::cout << " Neurons : " << layers[i - 1].neurons; // number of neurons
                        std::cout << ", Activation : " << layers[i - 1].activation; // activation
                        std::cout << ", Kernal Shape : " << get_shape(layers[i - 1].kernal); // kernal shape
                        std::cout << std::endl;
                    }
                    std::cout << "===============================================================" << std::endl;
                    return;
                }
        };
    } // namespace neural_network
 } // namespace machine_learning
 /**
 * Function to test neural network
 * @returns none
 */
 static void test() {
    // Creating network with 3 layers for "iris.csv"
    machine_learning::neural_network::NeuralNetwork myNN =
    machine_learning::neural_network::NeuralNetwork({
        {4, "none"}, // First layer with 3 neurons and "none" as activation
        {6, "relu"}, // Second layer with 6 neurons and "relu" as activation
        {3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as activation
    });
    // Printing summary of model
    myNN.summary();
    // Training Model
    myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
    // Testing predictions of model
    assert(machine_learning::argmax(myNN.single_predict({{5,3.4,1.6,0.4}})) == 0);
    assert(machine_learning::argmax(myNN.single_predict({{6.4,2.9,4.3,1.3}})) == 1);
    assert(machine_learning::argmax(myNN.single_predict({{6.2,3.4,5.4,2.3}})) == 2);
    return;
 }
 /** Driver Code */
 int main() {
    // Testing
    test();
    return 0;
 }
--- a/machine_learning/vector_ops.hpp
+++ b/machine_learning/vector_ops.hpp
@ -0,0 +1,484 @@
 /**
 * @file vector_ops.hpp
 * @author [Deep Raval](https://github.com/imdeep2905)
 * 
 * @brief Various functions for vectors associated with [NeuralNetwork (aka Multilayer Perceptron)] 
 * (https://en.wikipedia.org/wiki/Multilayer_perceptron).
 * 
 */
 #ifndef VECTOR_OPS_FOR_NN
 #define VECTOR_OPS_FOR_NN
 #include <iostream>
 #include <algorithm>
 #include <vector>
 #include <valarray>
 #include <chrono>
 #include <random>
 /**
 * @namespace machine_learning
 * @brief Machine Learning algorithms
 */
 namespace machine_learning {
 /**
 * Overloaded operator "<<" to print 2D vector
 * @tparam T typename of the vector
 * @param out std::ostream to output
 * @param A 2D vector to be printed
 */
 template <typename T>
 std::ostream &operator<<(std::ostream &out,
                         std::vector<std::valarray<T>> const &A) {
    // Setting output precision to 4 in case of floating point numbers
    out.precision(4); 
    for(const auto &a : A) { // For each row in A
        for(const auto &x : a) { // For each element in row
            std::cerr << x << ' '; // print element 
        }
        std::cerr << std::endl;
    }
    return out;
 }
 /**
 * Overloaded operator "<<" to print a pair 
 * @tparam T typename of the pair
 * @param out std::ostream to output
 * @param A Pair to be printed
 */
 template <typename T>
 std::ostream &operator<<(std::ostream &out, const std::pair<T, T> &A) {
    // Setting output precision to 4 in case of floating point numbers
    out.precision(4);
    // printing pair in the form (p, q)
    std::cerr << "(" << A.first << ", " << A.second << ")";
    return out;
 }
 /**
 * Overloaded operator "<<" to print a 1D vector
 * @tparam T typename of the vector
 * @param out std::ostream to output
 * @param A 1D vector to be printed
 */
 template <typename T>
 std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
    // Setting output precision to 4 in case of floating point numbers
    out.precision(4);
    for(const auto &a : A) { // For every element in the vector.
        std::cerr << a << ' '; // Print element
    }
    std::cerr << std::endl;
    return out;
 }
 /**
 * Function to insert element into 1D vector
 * @tparam T typename of the 1D vector and the element
 * @param A 1D vector in which element will to be inserted
 * @param ele element to be inserted
 * @return new resultant vector
 */
 template <typename T>
 std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
    std::valarray <T> B; // New 1D vector to store resultant vector
    B.resize(A.size() + 1); // Resizing it accordingly
    for(size_t i = 0; i < A.size(); i++) { // For every element in A
        B[i] = A[i]; // Copy element in B
    }
    B[B.size() - 1] = ele; // Inserting new element in last position
    return B; // Return resultant vector
 }
 /**
 * Function to remove first element from 1D vector
 * @tparam T typename of the vector
 * @param A 1D vector from which first element will be removed
 * @return new resultant vector
 */
 template <typename T>
 std::valarray <T> pop_front(const std::valarray<T> &A) {
    std::valarray <T> B; // New 1D vector to store resultant vector
    B.resize(A.size() - 1); // Resizing it accordingly
    for(size_t i = 1; i < A.size(); i ++) { // // For every (except first) element in A 
        B[i - 1] = A[i]; // Copy element in B with left shifted position
    }
    return B; // Return resultant vector
 }
 /**
 * Function to remove last element from 1D vector
 * @tparam T typename of the vector
 * @param A 1D vector from which last element will be removed
 * @return new resultant vector
 */
 template <typename T>
 std::valarray <T> pop_back(const std::valarray<T> &A) {
    std::valarray <T> B; // New 1D vector to store resultant vector
    B.resize(A.size() - 1); // Resizing it accordingly
    for(size_t i = 0; i < A.size() - 1; i ++) { // For every (except last) element in A 
        B[i] = A[i]; // Copy element in B
    }
    return B; // Return resultant vector
 }
 /**
 * Function to equally shuffle two 3D vectors (used for shuffling training data)
 * @tparam T typename of the vector
 * @param A First 3D vector
 * @param B Second 3D vector
 */
 template <typename T>
 void equal_shuffle(std::vector < std::vector <std::valarray<T>> >  &A, 
                   std::vector < std::vector <std::valarray<T>> >  &B) {
    // If two vectors have different sizes
    if(A.size() != B.size())
    {
        std::cerr << "ERROR : Can not equally shuffle two vectors with different sizes: ";
        std::cerr << A.size() << " and " << B.size() << std::endl;
        std::exit(EXIT_FAILURE);
    }
    for(size_t i = 0; i < A.size(); i++) { // For every element in A and B
        // Genrating random index < size of A and B
        std::srand(std::chrono::system_clock::now().time_since_epoch().count());
        size_t random_index = std::rand() % A.size();
        // Swap elements in both A and B with same random index
        std::swap(A[i], A[random_index]);
        std::swap(B[i], B[random_index]);
    }
    return;
 }
 /**
 * Function to initialize given 2D vector using uniform random initialization
 * @tparam T typename of the vector
 * @param A 2D vector to be initialized
 * @param shape required shape
 * @param low lower limit on value
 * @param high upper limit on value
 */
 template <typename T>
 void uniform_random_initialization(std::vector<std::valarray<T>> &A, 
                   const std::pair<size_t, size_t> &shape, 
                   const T &low, 
                   const T &high) {
    A.clear(); // Making A empty 
    // Uniform distribution in range [low, high]
    std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
    std::uniform_real_distribution <T> distribution(low, high);
    for(size_t i = 0; i < shape.first; i++) { // For every row 
        std::valarray <T> row; // Making empty row which will be inserted in vector
        row.resize(shape.second);
        for(auto &r : row) { // For every element in row
            r = distribution(generator); // copy random number 
        }  
        A.push_back(row); // Insert new row in vector
    }
    return;
 }
 /**
 * Function to Intialize 2D vector as unit matrix
 * @tparam T typename of the vector
 * @param A 2D vector to be initialized
 * @param shape required shape
 */
 template <typename T>
 void unit_matrix_initialization(std::vector<std::valarray<T>> &A, 
                   const std::pair<size_t, size_t> &shape
                   ) {
    A.clear(); // Making A empty 
    for(size_t i = 0; i < shape.first; i++) {
        std::valarray <T> row; // Making empty row which will be inserted in vector
        row.resize(shape.second);
        row[i] = T(1); // Insert 1 at ith position 
        A.push_back(row); // Insert new row in vector
    }
    return;
 }
 /**
 * Function to Intialize 2D vector as zeroes
 * @tparam T typename of the vector
 * @param A 2D vector to be initialized
 * @param shape required shape
 */
 template <typename T>
 void zeroes_initialization(std::vector<std::valarray<T>> &A, 
                   const std::pair<size_t, size_t> &shape
                   ) {
    A.clear(); // Making A empty 
    for(size_t i = 0; i < shape.first; i++) {
        std::valarray <T> row; // Making empty row which will be inserted in vector
        row.resize(shape.second); // By default all elements are zero
        A.push_back(row); // Insert new row in vector
    }
    return;
 }
 /**
 * Function to get sum of all elements in 2D vector
 * @tparam T typename of the vector
 * @param A 2D vector for which sum is required
 * @return returns sum of all elements of 2D vector
 */
 template <typename T>
 T sum(const std::vector<std::valarray<T>> &A) {
    T cur_sum = 0; // Initially sum is zero
    for(const auto &a : A) { // For every row in A
        cur_sum += a.sum(); // Add sum of that row to current sum
    }
    return cur_sum; // Return sum
 }
 /**
 * Function to get shape of given 2D vector
 * @tparam T typename of the vector
 * @param A 2D vector for which shape is required
 * @return shape as pair
 */
 template <typename T>
 std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
    const size_t sub_size = (*A.begin()).size();
    for(const auto &a : A) {
        // If supplied vector don't have same shape in all rows
        if(a.size() != sub_size) {
            std::cerr << "ERROR: (get_shape) Supplied vector is not 2D Matrix" << std::endl;
            std::exit(EXIT_FAILURE);
        }
    }
    return std::make_pair(A.size(), sub_size); // Return shape as pair
 }
 /**
 * Function to scale given 3D vector using min-max scaler
 * @tparam T typename of the vector
 * @param A 3D vector which will be scaled
 * @param low new minimum value 
 * @param high new maximum value
 * @return new scaled 3D vector
 */
 template <typename T>
 std::vector<std::vector<std::valarray<T>>>
 minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low, const T &high) {
    std::vector<std::vector<std::valarray<T>>> B = A; // Copying into new vector B
    const auto shape = get_shape(B[0]); // Storing shape of B's every element
    // As this function is used for scaling training data vector should be of shape (1, X)
    if(shape.first != 1) {
        std::cerr << "ERROR: (MinMax Scaling) Supplied vector is not supported for minmax scaling, shape: ";
        std::cerr << shape << std::endl;
        std::exit(EXIT_FAILURE);
    }
    for(size_t i = 0; i < shape.second; i++) {
        T min = B[0][0][i], max = B[0][0][i]; 
        for(size_t j = 0; j < B.size(); j++) {
            // Updating minimum and maximum values
            min = std::min(min, B[j][0][i]);
            max = std::max(max, B[j][0][i]);
        }
        for(size_t j = 0; j < B.size(); j++) {
            // Applying min-max scaler formula
            B[j][0][i] = ((B[j][0][i] - min) / (max - min)) * (high - low) + low;
        }
    }
    return B; // Return new resultant 3D vector
 }
 /**
 * Function to get index of maximum element in 2D vector 
 * @tparam T typename of the vector
 * @param A 2D vector for which maximum index is required
 * @return index of maximum element
 */
 template <typename T>
 size_t argmax(const std::vector<std::valarray<T>> &A) {
    const auto shape = get_shape(A);
    // As this function is used on predicted (or target) vector, shape should be (1, X)    
    if(shape.first != 1) {
        std::cerr << "ERROR: (argmax) Supplied vector is ineligible for argmax" << std::endl;
        std::exit(EXIT_FAILURE);        
    }
    // Return distance of max element from first element (i.e. index)
    return std::distance(std::begin(A[0]), std::max_element(std::begin(A[0]), std::end(A[0])));
 }
 /**
 * Function which applys supplied function to every element of 2D vector
 * @tparam T typename of the vector
 * @param A 2D vector on which function will be applied
 * @param func Function to be applied
 * @return new resultant vector
 */
 template <typename T> 
 std::vector <std::valarray <T>> apply_function(const std::vector <std::valarray <T>> &A, 
                                               T (*func) (const T &)) {
    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
    for(auto &b : B) { // For every row in vector
        b = b.apply(func); // Apply function to that row
    }
    return B; // Return new resultant 2D vector
 }
 /**
 * Overloaded operator "*" to multiply given 2D vector with scaler 
 * @tparam T typename of both vector and the scaler
 * @param A 2D vector to which scaler will be multiplied
 * @param val Scaler value which will be multiplied
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T> > operator * (const std::vector<std::valarray<T>> &A, const T& val) {
    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
    for(auto &b : B) { // For every row in vector
        b = b * val; // Multiply row with scaler
    }
    return B; // Return new resultant 2D vector
 }
 /**
 * Overloaded operator "/" to divide given 2D vector with scaler 
 * @tparam T typename of the vector and the scaler
 * @param A 2D vector to which scaler will be divided
 * @param val Scaler value which will be divided
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>> &A, const T& val) {
    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
    for(auto &b : B) { // For every row in vector
        b = b / val; // Divide row with scaler
    }
    return B; // Return new resultant 2D vector
 }
 /**
 * Function to get transpose of 2D vector
 * @tparam T typename of the vector
 * @param A 2D vector which will be transposed
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T>> transpose(const std::vector<std::valarray<T>> &A) {
    const auto shape = get_shape(A); // Current shape of vector
    std::vector <std::valarray <T> > B; // New vector to store result
    // Storing transpose values of A in B
    for(size_t j = 0; j < shape.second; j++) { 
        std::valarray <T> row; 
        row.resize(shape.first);
        for(size_t i = 0; i < shape.first; i++) {
            row[i] = A[i][j];
        }
        B.push_back(row);
    }
    return B; // Return new resultant 2D vector
 }
 /**
 * Overloaded operator "+" to add two 2D vectors
 * @tparam T typename of the vector
 * @param A First 2D vector 
 * @param B Second 2D vector
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T> > operator + (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
    const auto shape_a = get_shape(A);
    const auto shape_b = get_shape(B);
    // If vectors don't have equal shape
    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
        std::cerr << "ERROR: (vector addition) Supplied vectors have different shapes ";
        std::cerr << shape_a << " and " << shape_b << std::endl;
        std::exit(EXIT_FAILURE);
    }
    std::vector<std::valarray <T>> C;
    for(size_t i = 0; i < A.size(); i++) { // For every row
        C.push_back(A[i] + B[i]); // Elementwise addition
    }
    return C; // Return new resultant 2D vector
 }
 /**
 * Overloaded operator "-" to add subtract 2D vectors
 * @tparam T typename of the vector
 * @param A First 2D vector 
 * @param B Second 2D vector
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T>> operator - (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
    const auto shape_a = get_shape(A);
    const auto shape_b = get_shape(B);
    // If vectors don't have equal shape
    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
        std::cerr << "ERROR: (vector subtraction) Supplied vectors have different shapes ";
        std::cerr << shape_a << " and " << shape_b << std::endl;
        std::exit(EXIT_FAILURE);
    }
    std::vector<std::valarray<T>> C; // Vector to store result
    for(size_t i = 0; i < A.size(); i++) { // For every row
        C.push_back(A[i] - B[i]); // Elementwise substraction
    }
    return C; // Return new resultant 2D vector
 }
 /**
 * Function to multiply two 2D vectors
 * @tparam T typename of the vector
 * @param A First 2D vector 
 * @param B Second 2D vector
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
    const auto shape_a = get_shape(A);
    const auto shape_b = get_shape(B);
    // If vectors are not eligible for multiplication
    if(shape_a.second != shape_b.first ) {
        std::cerr << "ERROR: (multiply) Supplied vectors are not eligible for multiplication ";
        std::cerr << shape_a << " and " << shape_b << std::endl;
        std::exit(EXIT_FAILURE);
    }
    std::vector<std::valarray<T>> C; // Vector to store result
    // Normal matrix multiplication 
    for (size_t i = 0; i < shape_a.first; i++) {
        std::valarray<T> row;
        row.resize(shape_b.second);
        for(size_t j = 0; j < shape_b.second; j++) {
            for(size_t k = 0; k < shape_a.second; k++) {
                row[j] += A[i][k] * B[k][j];
            }
        }
        C.push_back(row);
    }
    return C; // Return new resultant 2D vector
 }
 /**
 * Function to get hadamard product of two 2D vectors
 * @tparam T typename of the vector
 * @param A First 2D vector 
 * @param B Second 2D vector
 * @return new resultant vector
 */
 template <typename T>
 std::vector <std::valarray <T>> hadamard_product(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
    const auto shape_a = get_shape(A);
    const auto shape_b = get_shape(B);
    // If vectors are not eligible for hadamard product
    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
        std::cerr << "ERROR: (hadamard_product) Supplied vectors have different shapes ";
        std::cerr << shape_a << " and " << shape_b << std::endl;
        std::exit(EXIT_FAILURE);
    }
    std::vector<std::valarray<T>> C; // Vector to store result
    for(size_t i = 0; i < A.size(); i++) {
        C.push_back(A[i] * B[i]); // Elementwise multiplication
    }
    return C; // Return new resultant 2D vector
 }
 }  // namespace machine_learning
 #endif