diff --git a/machine_learning/neural_network.cpp b/machine_learning/neural_network.cpp
index aad469b14..b477f5e2e 100644
--- a/machine_learning/neural_network.cpp
+++ b/machine_learning/neural_network.cpp
@@ -1,763 +1,802 @@
 /**
- * @file 
+ * @file
  * @author [Deep Raval](https://github.com/imdeep2905)
- * 
- * @brief Implementation of [Multilayer Perceptron] (https://en.wikipedia.org/wiki/Multilayer_perceptron). 
- * 
+ *
+ * @brief Implementation of [Multilayer Perceptron]
+ * (https://en.wikipedia.org/wiki/Multilayer_perceptron).
+ *
  * @details
- * A multilayer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The term MLP is used ambiguously, 
- * sometimes loosely to any feedforward ANN, sometimes strictly to refer to networks composed of multiple layers of perceptrons 
- * (with threshold activation). Multilayer perceptrons are sometimes colloquially referred to as "vanilla" neural networks, 
- * especially when they have a single hidden layer.
- * 
- * An MLP consists of at least three layers of nodes: an input layer, a hidden layer and an output layer. Except for the 
- * input nodes, each node is a neuron that uses a nonlinear activation function. MLP utilizes a supervised learning technique 
- * called backpropagation for training. Its multiple layers and non-linear activation distinguish MLP from a linear 
- * perceptron. It can distinguish data that is not linearly separable.
- * 
- * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training algorithm.
- * 
- * \note This implementation uses mini-batch gradient descent as optimizer and MSE as loss function. Bias is also not included.
+ * A multilayer perceptron (MLP) is a class of feedforward artificial neural
+ * network (ANN). The term MLP is used ambiguously, sometimes loosely to any
+ * feedforward ANN, sometimes strictly to refer to networks composed of multiple
+ * layers of perceptrons (with threshold activation). Multilayer perceptrons are
+ * sometimes colloquially referred to as "vanilla" neural networks, especially
+ * when they have a single hidden layer.
+ *
+ * An MLP consists of at least three layers of nodes: an input layer, a hidden
+ * layer and an output layer. Except for the input nodes, each node is a neuron
+ * that uses a nonlinear activation function. MLP utilizes a supervised learning
+ * technique called backpropagation for training. Its multiple layers and
+ * non-linear activation distinguish MLP from a linear perceptron. It can
+ * distinguish data that is not linearly separable.
+ *
+ * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for
+ * training algorithm.
+ *
+ * \note This implementation uses mini-batch gradient descent as optimizer and
+ * MSE as loss function. Bias is also not included.
  */
 
-#include "vector_ops.hpp" // Custom header file for vector operations
-
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <cmath>
+#include <fstream>
 #include <iostream>
+#include <sstream>
+#include <string>
 #include <valarray>
 #include <vector>
-#include <cmath>
-#include <algorithm>
-#include <chrono>
-#include <string>
-#include <fstream>
-#include <sstream>
-#include <cassert>
+
+#include "vector_ops.hpp"  // Custom header file for vector operations
 
 /** \namespace machine_learning
  * \brief Machine learning algorithms
  */
 namespace machine_learning {
-    /** \namespace neural_network
-     * \brief Neural Network or Multilayer Perceptron
-     */   
-    namespace neural_network {
-        /** \namespace activations
-         * \brief Various activation functions used in Neural network
-         */         
-        namespace activations {
-            /**
-             * Sigmoid function
-             * @param X Value 
-             * @return Returns sigmoid(x)
-             */  
-            double sigmoid (const double &x) {
-                return 1.0 / (1.0 + std::exp(-x));
-            }
+/** \namespace neural_network
+ * \brief Neural Network or Multilayer Perceptron
+ */
+namespace neural_network {
+/** \namespace activations
+ * \brief Various activation functions used in Neural network
+ */
+namespace activations {
+/**
+ * Sigmoid function
+ * @param X Value
+ * @return Returns sigmoid(x)
+ */
+double sigmoid(const double &x) { return 1.0 / (1.0 + std::exp(-x)); }
 
-            /**
-             * Derivative of sigmoid function
-             * @param X Value 
-             * @return Returns derivative of sigmoid(x)
-             */  
-            double dsigmoid (const double &x) {
-                return x * (1 - x);
-            }
+/**
+ * Derivative of sigmoid function
+ * @param X Value
+ * @return Returns derivative of sigmoid(x)
+ */
+double dsigmoid(const double &x) { return x * (1 - x); }
 
-            /**
-             * Relu function
-             * @param X Value 
-             * @returns relu(x)
-             */  
-            double relu (const double &x) {
-                return std::max(0.0, x);
-            }
+/**
+ * Relu function
+ * @param X Value
+ * @returns relu(x)
+ */
+double relu(const double &x) { return std::max(0.0, x); }
 
-            /**
-             * Derivative of relu function
-             * @param X Value 
-             * @returns derivative of relu(x)
-             */  
-            double drelu (const double &x) {
-                return x >= 0.0 ? 1.0 : 0.0;
-            }
+/**
+ * Derivative of relu function
+ * @param X Value
+ * @returns derivative of relu(x)
+ */
+double drelu(const double &x) { return x >= 0.0 ? 1.0 : 0.0; }
 
-            /**
-             * Tanh function
-             * @param X Value 
-             * @return Returns tanh(x)
-             */  
-            double tanh (const double &x) {
-                return 2 / (1 + std::exp(-2 * x)) - 1;
-            }
+/**
+ * Tanh function
+ * @param X Value
+ * @return Returns tanh(x)
+ */
+double tanh(const double &x) { return 2 / (1 + std::exp(-2 * x)) - 1; }
 
-            /**
-             * Derivative of Sigmoid function
-             * @param X Value 
-             * @return Returns derivative of tanh(x)
-             */  
-            double dtanh (const double &x) {
-                return 1 - x * x;
+/**
+ * Derivative of Sigmoid function
+ * @param X Value
+ * @return Returns derivative of tanh(x)
+ */
+double dtanh(const double &x) { return 1 - x * x; }
+}  // namespace activations
+/** \namespace util_functions
+ * \brief Various utility functions used in Neural network
+ */
+namespace util_functions {
+/**
+ * Square function
+ * @param X Value
+ * @return Returns x * x
+ */
+double square(const double &x) { return x * x; }
+/**
+ * Identity function
+ * @param X Value
+ * @return Returns x
+ */
+double identity_function(const double &x) { return x; }
+}  // namespace util_functions
+/** \namespace layers
+ * \brief This namespace contains layers used
+ * in MLP.
+ */
+namespace layers {
+/**
+ * neural_network::layers::DenseLayer class is used to store all necessary
+ * information about the layers (i.e. neurons, activation and kernal). This
+ * class is used by NeuralNetwork class to store layers.
+ *
+ */
+class DenseLayer {
+ public:
+    // To store activation function and it's derivative
+    double (*activation_function)(const double &);
+    double (*dactivation_function)(const double &);
+    int neurons;             // To store number of neurons (used in summary)
+    std::string activation;  // To store activation name (used in summary)
+    std::vector<std::valarray<double>> kernal;  // To store kernal (aka weights)
+
+    /**
+     * Constructor for neural_network::layers::DenseLayer class
+     * @param neurons number of neurons
+     * @param activation activation function for layer
+     * @param kernal_shape shape of kernal
+     * @param random_kernal flag for whether to intialize kernal randomly
+     */
+    DenseLayer(const int &neurons, const std::string &activation,
+               const std::pair<size_t, size_t> &kernal_shape,
+               const bool &random_kernal) {
+        // Choosing activation (and it's derivative)
+        if (activation == "sigmoid") {
+            activation_function = neural_network::activations::sigmoid;
+            dactivation_function = neural_network::activations::sigmoid;
+        } else if (activation == "relu") {
+            activation_function = neural_network::activations::relu;
+            dactivation_function = neural_network::activations::drelu;
+        } else if (activation == "tanh") {
+            activation_function = neural_network::activations::tanh;
+            dactivation_function = neural_network::activations::dtanh;
+        } else if (activation == "none") {
+            // Set identity function in casse of none is supplied
+            activation_function =
+                neural_network::util_functions::identity_function;
+            dactivation_function =
+                neural_network::util_functions::identity_function;
+        } else {
+            // If supplied activation is invalid
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
+                         "tanh} got ";
+            std::cerr << activation << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        this->activation = activation;  // Setting activation name
+        this->neurons = neurons;        // Setting number of neurons
+        // Initialize kernal according to flag
+        if (random_kernal) {
+            uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
+        } else {
+            unit_matrix_initialization(kernal, kernal_shape);
+        }
+    }
+    /**
+     * Constructor for neural_network::layers::DenseLayer class
+     * @param neurons number of neurons
+     * @param activation activation function for layer
+     * @param kernal values of kernal (useful in loading model)
+     */
+    DenseLayer(const int &neurons, const std::string &activation,
+               const std::vector<std::valarray<double>> &kernal) {
+        // Choosing activation (and it's derivative)
+        if (activation == "sigmoid") {
+            activation_function = neural_network::activations::sigmoid;
+            dactivation_function = neural_network::activations::sigmoid;
+        } else if (activation == "relu") {
+            activation_function = neural_network::activations::relu;
+            dactivation_function = neural_network::activations::drelu;
+        } else if (activation == "tanh") {
+            activation_function = neural_network::activations::tanh;
+            dactivation_function = neural_network::activations::dtanh;
+        } else if (activation == "none") {
+            // Set identity function in casse of none is supplied
+            activation_function =
+                neural_network::util_functions::identity_function;
+            dactivation_function =
+                neural_network::util_functions::identity_function;
+        } else {
+            // If supplied activation is invalid
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Invalid argument. Expected {none, sigmoid, relu, "
+                         "tanh} got ";
+            std::cerr << activation << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        this->activation = activation;  // Setting activation name
+        this->neurons = neurons;        // Setting number of neurons
+        this->kernal = kernal;          // Setting supplied kernal values
+    }
+
+    /**
+     * Copy Constructor for class DenseLayer.
+     *
+     * @param model instance of class to be copied.
+     */
+    DenseLayer(const DenseLayer &layer) = default;
+
+    /**
+     * Destructor for class DenseLayer.
+     */
+    ~DenseLayer() = default;
+
+    /**
+     * Copy assignment operator for class DenseLayer
+     */
+    DenseLayer &operator=(const DenseLayer &layer) = default;
+
+    /**
+     * Move constructor for class DenseLayer
+     */
+    DenseLayer(DenseLayer &&) = default;
+
+    /**
+     * Move assignment operator for class DenseLayer
+     */
+    DenseLayer &operator=(DenseLayer &&) = default;
+};
+}  // namespace layers
+/**
+ * NeuralNetwork class is implements MLP. This class is
+ * used by actual user to create and train networks.
+ *
+ */
+class NeuralNetwork {
+ private:
+    std::vector<neural_network::layers::DenseLayer> layers;  // To store layers
+    /**
+     * Private Constructor for class NeuralNetwork. This constructor
+     * is used internally to load model.
+     * @param config vector containing pair (neurons, activation)
+     * @param kernals vector containing all pretrained kernals
+     */
+    NeuralNetwork(
+        const std::vector<std::pair<int, std::string>> &config,
+        const std::vector<std::vector<std::valarray<double>>> &kernals) {
+        // First layer should not have activation
+        if (config.begin()->second != "none") {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr
+                << "First layer can't have activation other than none got "
+                << config.begin()->second;
+            std::cerr << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        // Network should have atleast two layers
+        if (config.size() <= 1) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Invalid size of network, ";
+            std::cerr << "Atleast two layers are required";
+            std::exit(EXIT_FAILURE);
+        }
+        // Reconstructing all pretrained layers
+        for (size_t i = 0; i < config.size(); i++) {
+            layers.emplace_back(neural_network::layers::DenseLayer(
+                config[i].first, config[i].second, kernals[i]));
+        }
+        std::cout << "INFO: Network constructed successfully" << std::endl;
+    }
+    /**
+     * Private function to get detailed predictions (i.e.
+     * activated neuron values). This function is used in
+     * backpropagation, single predict and batch predict.
+     * @param X input vector
+     */
+    std::vector<std::vector<std::valarray<double>>>
+    __detailed_single_prediction(const std::vector<std::valarray<double>> &X) {
+        std::vector<std::vector<std::valarray<double>>> details;
+        std::vector<std::valarray<double>> current_pass = X;
+        details.emplace_back(X);
+        for (const auto &l : layers) {
+            current_pass = multiply(current_pass, l.kernal);
+            current_pass = apply_function(current_pass, l.activation_function);
+            details.emplace_back(current_pass);
+        }
+        return details;
+    }
+
+ public:
+    /**
+     * Default Constructor for class NeuralNetwork. This constructor
+     * is used to create empty variable of type NeuralNetwork class.
+     */
+    NeuralNetwork() = default;
+
+    /**
+     * Constructor for class NeuralNetwork. This constructor
+     * is used by user.
+     * @param config vector containing pair (neurons, activation)
+     */
+    explicit NeuralNetwork(
+        const std::vector<std::pair<int, std::string>> &config) {
+        // First layer should not have activation
+        if (config.begin()->second != "none") {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr
+                << "First layer can't have activation other than none got "
+                << config.begin()->second;
+            std::cerr << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        // Network should have atleast two layers
+        if (config.size() <= 1) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Invalid size of network, ";
+            std::cerr << "Atleast two layers are required";
+            std::exit(EXIT_FAILURE);
+        }
+        // Separately creating first layer so it can have unit matrix
+        // as kernal.
+        layers.push_back(neural_network::layers::DenseLayer(
+            config[0].first, config[0].second,
+            {config[0].first, config[0].first}, false));
+        // Creating remaining layers
+        for (size_t i = 1; i < config.size(); i++) {
+            layers.push_back(neural_network::layers::DenseLayer(
+                config[i].first, config[i].second,
+                {config[i - 1].first, config[i].first}, true));
+        }
+        std::cout << "INFO: Network constructed successfully" << std::endl;
+    }
+
+    /**
+     * Copy Constructor for class NeuralNetwork.
+     *
+     * @param model instance of class to be copied.
+     */
+    NeuralNetwork(const NeuralNetwork &model) = default;
+
+    /**
+     * Destructor for class NeuralNetwork.
+     */
+    ~NeuralNetwork() = default;
+
+    /**
+     * Copy assignment operator for class NeuralNetwork
+     */
+    NeuralNetwork &operator=(const NeuralNetwork &model) = default;
+
+    /**
+     * Move constructor for class NeuralNetwork
+     */
+    NeuralNetwork(NeuralNetwork &&) = default;
+
+    /**
+     * Move assignment operator for class NeuralNetwork
+     */
+    NeuralNetwork &operator=(NeuralNetwork &&) = default;
+
+    /**
+     * Function to get X and Y from csv file (where X = data, Y = label)
+     * @param file_name csv file name
+     * @param last_label flag for whether label is in first or last column
+     * @param normalize flag for whether to normalize data
+     * @param slip_lines number of lines to skip
+     * @return returns pair of X and Y
+     */
+    std::pair<std::vector<std::vector<std::valarray<double>>>,
+              std::vector<std::vector<std::valarray<double>>>>
+    get_XY_from_csv(const std::string &file_name, const bool &last_label,
+                    const bool &normalize, const int &slip_lines = 1) {
+        std::ifstream in_file;                          // Ifstream to read file
+        in_file.open(file_name.c_str(), std::ios::in);  // Open file
+        // If there is any problem in opening file
+        if (!in_file.is_open()) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Unable to open file: " << file_name << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        std::vector<std::vector<std::valarray<double>>> X,
+            Y;             // To store X and Y
+        std::string line;  // To store each line
+        // Skip lines
+        for (int i = 0; i < slip_lines; i++) {
+            std::getline(in_file, line, '\n');  // Ignore line
+        }
+        // While file has information
+        while (!in_file.eof() && std::getline(in_file, line, '\n')) {
+            std::valarray<double> x_data,
+                y_data;                  // To store single sample and label
+            std::stringstream ss(line);  // Constructing stringstream from line
+            std::string token;  // To store each token in line (seprated by ',')
+            while (std::getline(ss, token, ',')) {  // For each token
+                // Insert numerical value of token in x_data
+                x_data = insert_element(x_data, std::stod(token));
             }
-        } // namespace activations
-        /** \namespace util_functions
-         * \brief Various utility functions used in Neural network
-         */      
-        namespace util_functions {
-            /**
-             * Square function
-             * @param X Value 
-             * @return Returns x * x
-             */  
-            double square(const double &x) {
-                return x * x;
+            // If label is in last column
+            if (last_label) {
+                y_data.resize(this->layers.back().neurons);
+                // If task is classification
+                if (y_data.size() > 1) {
+                    y_data[x_data[x_data.size() - 1]] = 1;
+                }
+                // If task is regrssion (of single value)
+                else {
+                    y_data[0] = x_data[x_data.size() - 1];
+                }
+                x_data = pop_back(x_data);  // Remove label from x_data
+            } else {
+                y_data.resize(this->layers.back().neurons);
+                // If task is classification
+                if (y_data.size() > 1) {
+                    y_data[x_data[x_data.size() - 1]] = 1;
+                }
+                // If task is regrssion (of single value)
+                else {
+                    y_data[0] = x_data[x_data.size() - 1];
+                }
+                x_data = pop_front(x_data);  // Remove label from x_data
             }
-            /**
-             * Identity function
-             * @param X Value 
-             * @return Returns x 
-             */  
-            double identity_function(const double &x) {
-                return x;
+            // Push collected X_data and y_data in X and Y
+            X.push_back({x_data});
+            Y.push_back({y_data});
+        }
+        // Normalize training data if flag is set
+        if (normalize) {
+            // Scale data between 0 and 1 using min-max scaler
+            X = minmax_scaler(X, 0.01, 1.0);
+        }
+        in_file.close();         // Closing file
+        return make_pair(X, Y);  // Return pair of X and Y
+    }
+
+    /**
+     * Function to get prediction of model on single sample.
+     * @param X array of feature vectors
+     * @return returns predictions as vector
+     */
+    std::vector<std::valarray<double>> single_predict(
+        const std::vector<std::valarray<double>> &X) {
+        // Get activations of all layers
+        auto activations = this->__detailed_single_prediction(X);
+        // Return activations of last layer (actual predicted values)
+        return activations.back();
+    }
+
+    /**
+     * Function to get prediction of model on batch
+     * @param X array of feature vectors
+     * @return returns predicted values as vector
+     */
+    std::vector<std::vector<std::valarray<double>>> batch_predict(
+        const std::vector<std::vector<std::valarray<double>>> &X) {
+        // Store predicted values
+        std::vector<std::vector<std::valarray<double>>> predicted_batch(
+            X.size());
+        for (size_t i = 0; i < X.size(); i++) {  // For every sample
+            // Push predicted values
+            predicted_batch[i] = this->single_predict(X[i]);
+        }
+        return predicted_batch;  // Return predicted values
+    }
+
+    /**
+     * Function to fit model on supplied data
+     * @param X array of feature vectors
+     * @param Y array of target values
+     * @param epochs number of epochs (default = 100)
+     * @param learning_rate learning rate (default = 0.01)
+     * @param batch_size batch size for gradient descent (default = 32)
+     * @param shuffle flag for whether to shuffle data (default = true)
+     */
+    void fit(const std::vector<std::vector<std::valarray<double>>> &X_,
+             const std::vector<std::vector<std::valarray<double>>> &Y_,
+             const int &epochs = 100, const double &learning_rate = 0.01,
+             const size_t &batch_size = 32, const bool &shuffle = true) {
+        std::vector<std::vector<std::valarray<double>>> X = X_, Y = Y_;
+        // Both label and input data should have same size
+        if (X.size() != Y.size()) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "X and Y in fit have different sizes" << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        std::cout << "INFO: Training Started" << std::endl;
+        for (int epoch = 1; epoch <= epochs; epoch++) {  // For every epoch
+            // Shuffle X and Y if flag is set
+            if (shuffle) {
+                equal_shuffle(X, Y);
             }
-        } // namespace util_functions
-        /** \namespace layers
-         * \brief This namespace contains layers used 
-         * in MLP.
-         */   
-        namespace layers {
-            /**
-             * neural_network::layers::DenseLayer class is used to store all necessary information about 
-             * the layers (i.e. neurons, activation and kernal). This class 
-             * is used by NeuralNetwork class to store layers.
-             * 
-             */
-            class DenseLayer {
-                public:
-                    // To store activation function and it's derivative
-                    double (*activation_function)(const double &); 
-                    double (*dactivation_function)(const double &);
-                    int neurons; // To store number of neurons (used in summary)
-                    std::string activation; // To store activation name (used in summary)
-                    std::vector <std::valarray <double>> kernal; // To store kernal (aka weights)
-                    
-                    /**
-                     * Constructor for neural_network::layers::DenseLayer class
-                     * @param neurons number of neurons
-                     * @param activation activation function for layer
-                     * @param kernal_shape shape of kernal 
-                     * @param random_kernal flag for whether to intialize kernal randomly
-                     */
-                    DenseLayer(const int &neurons, 
-                          const std::string &activation,
-                          const std::pair<size_t, size_t> &kernal_shape,
-                          const bool &random_kernal) {
-                        // Choosing activation (and it's derivative)
-                        if (activation == "sigmoid") {
-                            activation_function = neural_network::activations::sigmoid;
-                            dactivation_function = neural_network::activations::sigmoid;
-                        }
-                        else if (activation == "relu") {
-                            activation_function = neural_network::activations::relu;
-                            dactivation_function = neural_network::activations::drelu;
-                        }
-                        else if (activation == "tanh") {
-                            activation_function = neural_network::activations::tanh;
-                            dactivation_function = neural_network::activations::dtanh;
-                        }
-                        else if (activation == "none") {
-                            // Set identity function in casse of none is supplied
-                            activation_function = neural_network::util_functions::identity_function;
-                            dactivation_function = neural_network::util_functions::identity_function;
-                        }
-                        else {
-                            // If supplied activation is invalid
-                            std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
-                            std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
-                            std::cerr << activation << std::endl;
-                            std::exit(EXIT_FAILURE);
-                        }
-                        this -> activation = activation; // Setting activation name
-                        this -> neurons = neurons; // Setting number of neurons
-                        // Initialize kernal according to flag
-                        if(random_kernal) {
-                            uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0);
-                        }
-                        else {
-                            unit_matrix_initialization(kernal, kernal_shape);
-                        }
+            auto start =
+                std::chrono::high_resolution_clock::now();  // Start clock
+            double loss = 0,
+                   acc = 0;  // Intialize performance metrics with zero
+            // For each starting index of batch
+            for (size_t batch_start = 0; batch_start < X.size();
+                 batch_start += batch_size) {
+                for (size_t i = batch_start;
+                     i < std::min(X.size(), batch_start + batch_size); i++) {
+                    std::vector<std::valarray<double>> grad, cur_error,
+                        predicted;
+                    auto activations = this->__detailed_single_prediction(X[i]);
+                    // Gradients vector to store gradients for all layers
+                    // They will be averaged and applied to kernal
+                    std::vector<std::vector<std::valarray<double>>> gradients;
+                    gradients.resize(this->layers.size());
+                    // First intialize gradients to zero
+                    for (size_t i = 0; i < gradients.size(); i++) {
+                        zeroes_initialization(
+                            gradients[i], get_shape(this->layers[i].kernal));
                     }
-                    /**
-                     * Constructor for neural_network::layers::DenseLayer class
-                     * @param neurons number of neurons
-                     * @param activation activation function for layer
-                     * @param kernal values of kernal (useful in loading model)
-                     */
-                    DenseLayer (const int &neurons, 
-                           const std::string &activation, 
-                           const std::vector <std::valarray<double>> &kernal) {
-                        // Choosing activation (and it's derivative)
-                        if (activation == "sigmoid") {
-                            activation_function = neural_network::activations::sigmoid;
-                            dactivation_function = neural_network::activations::sigmoid;
-                        }
-                        else if (activation == "relu") {
-                            activation_function = neural_network::activations::relu;
-                            dactivation_function = neural_network::activations::drelu;
-                        }
-                        else if (activation == "tanh") {
-                            activation_function = neural_network::activations::tanh;
-                            dactivation_function = neural_network::activations::dtanh;
-                        }
-                        else if (activation == "none") {
-                            // Set identity function in casse of none is supplied
-                            activation_function = neural_network::util_functions::identity_function;
-                            dactivation_function = neural_network::util_functions::identity_function;
-                        }
-                        else {
-                            // If supplied activation is invalid
-                            std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, ";
-                            std::cerr << "Expected from {none, sigmoid, relu, tanh} got ";
-                            std::cerr << activation << std::endl;
-                            std::exit(EXIT_FAILURE);
-                        }
-                        this -> activation = activation; // Setting activation name
-                        this -> neurons = neurons; // Setting number of neurons
-                        this -> kernal = kernal;  // Setting supplied kernal values                 
+                    predicted = activations.back();  // Predicted vector
+                    cur_error = predicted - Y[i];    // Absoulute error
+                    // Calculating loss with MSE
+                    loss += sum(apply_function(
+                        cur_error, neural_network::util_functions::square));
+                    // If prediction is correct
+                    if (argmax(predicted) == argmax(Y[i])) {
+                        acc += 1;
                     }
-                                       
-                /**
-                 * Copy Constructor for class DenseLayer. 
-                 * 
-                 * @param model instance of class to be copied.
-                 */
-                DenseLayer(const DenseLayer &layer) = default;   
+                    // For every layer (except first) starting from last one
+                    for (size_t j = this->layers.size() - 1; j >= 1; j--) {
+                        // Backpropogating errors
+                        cur_error = hadamard_product(
+                            cur_error,
+                            apply_function(
+                                activations[j + 1],
+                                this->layers[j].dactivation_function));
+                        // Calculating gradient for current layer
+                        grad = multiply(transpose(activations[j]), cur_error);
+                        // Change error according to current kernal values
+                        cur_error = multiply(cur_error,
+                                             transpose(this->layers[j].kernal));
+                        // Adding gradient values to collection of gradients
+                        gradients[j] = gradients[j] + grad / double(batch_size);
+                    }
+                    // Applying gradients
+                    for (size_t j = this->layers.size() - 1; j >= 1; j--) {
+                        // Updating kernal (aka weights)
+                        this->layers[j].kernal = this->layers[j].kernal -
+                                                 gradients[j] * learning_rate;
+                    }
+                }
+            }
+            auto stop =
+                std::chrono::high_resolution_clock::now();  // Stoping the clock
+            // Calculate time taken by epoch
+            auto duration =
+                std::chrono::duration_cast<std::chrono::microseconds>(stop -
+                                                                      start);
+            loss /= X.size();        // Averaging loss
+            acc /= X.size();         // Averaging accuracy
+            std::cout.precision(4);  // set output precision to 4
+            // Printing training stats
+            std::cout << "Training: Epoch " << epoch << '/' << epochs;
+            std::cout << ", Loss: " << loss;
+            std::cout << ", Accuracy: " << acc;
+            std::cout << ", Taken time: " << duration.count() / 1e6
+                      << " seconds";
+            std::cout << std::endl;
+        }
+        return;
+    }
 
-                /**
-                 * Destructor for class DenseLayer. 
-                 */                   
-                ~DenseLayer() = default;
+    /**
+     * Function to fit model on data stored in csv file
+     * @param file_name csv file name
+     * @param last_label flag for whether label is in first or last column
+     * @param epochs number of epochs
+     * @param learning_rate learning rate
+     * @param normalize flag for whether to normalize data
+     * @param slip_lines number of lines to skip
+     * @param batch_size batch size for gradient descent (default = 32)
+     * @param shuffle flag for whether to shuffle data (default = true)
+     */
+    void fit_from_csv(const std::string &file_name, const bool &last_label,
+                      const int &epochs, const double &learning_rate,
+                      const bool &normalize, const int &slip_lines = 1,
+                      const size_t &batch_size = 32,
+                      const bool &shuffle = true) {
+        // Getting training data from csv file
+        auto data =
+            this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
+        // Fit the model on training data
+        this->fit(data.first, data.second, epochs, learning_rate, batch_size,
+                  shuffle);
+        return;
+    }
 
-                /**
-                 * Copy assignment operator for class DenseLayer
-                 */
-                DenseLayer& operator = (const DenseLayer &layer) = default;
+    /**
+     * Function to evaluate model on supplied data
+     * @param X array of feature vectors (input data)
+     * @param Y array of target values (label)
+     */
+    void evaluate(const std::vector<std::vector<std::valarray<double>>> &X,
+                  const std::vector<std::vector<std::valarray<double>>> &Y) {
+        std::cout << "INFO: Evaluation Started" << std::endl;
+        double acc = 0, loss = 0;  // intialize performance metrics with zero
+        for (size_t i = 0; i < X.size(); i++) {  // For every sample in input
+            // Get predictions
+            std::vector<std::valarray<double>> pred =
+                this->single_predict(X[i]);
+            // If predicted class is correct
+            if (argmax(pred) == argmax(Y[i])) {
+                acc += 1;  // Increment accuracy
+            }
+            // Calculating loss - Mean Squared Error
+            loss += sum(apply_function((Y[i] - pred),
+                                       neural_network::util_functions::square) *
+                        0.5);
+        }
+        acc /= X.size();   // Averaging accuracy
+        loss /= X.size();  // Averaging loss
+        // Prinitng performance of the model
+        std::cout << "Evaluation: Loss: " << loss;
+        std::cout << ", Accuracy: " << acc << std::endl;
+        return;
+    }
 
-                /**
-                 * Move constructor for class DenseLayer
-                 */
-                DenseLayer(DenseLayer &&) = default;
+    /**
+     * Function to evaluate model on data stored in csv file
+     * @param file_name csv file name
+     * @param last_label flag for whether label is in first or last column
+     * @param normalize flag for whether to normalize data
+     * @param slip_lines number of lines to skip
+     */
+    void evaluate_from_csv(const std::string &file_name, const bool &last_label,
+                           const bool &normalize, const int &slip_lines = 1) {
+        // Getting training data from csv file
+        auto data =
+            this->get_XY_from_csv(file_name, last_label, normalize, slip_lines);
+        // Evaluating model
+        this->evaluate(data.first, data.second);
+        return;
+    }
 
-                /**
-                 * Move assignment operator for class DenseLayer
-                 */
-                DenseLayer& operator = (DenseLayer &&) = default;
-            };
-        } // namespace layers
+    /**
+     * Function to save current model.
+     * @param file_name file name to save model (*.model)
+     */
+    void save_model(const std::string &_file_name) {
+        std::string file_name = _file_name;
+        // Adding ".model" extension if it is not already there in name
+        if (file_name.find(".model") == file_name.npos) {
+            file_name += ".model";
+        }
+        std::ofstream out_file;  // Ofstream to write in file
+        // Open file in out|trunc mode
+        out_file.open(file_name.c_str(),
+                      std::ofstream::out | std::ofstream::trunc);
+        // If there is any problem in opening file
+        if (!out_file.is_open()) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Unable to open file: " << file_name << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
         /**
-         * NeuralNetwork class is implements MLP. This class is
-         * used by actual user to create and train networks.
-         * 
-         */
-        class NeuralNetwork {
-            private:
-                std::vector <neural_network::layers::DenseLayer> layers; // To store layers
-                /**
-                 * Private Constructor for class NeuralNetwork. This constructor
-                 * is used internally to load model.
-                 * @param config vector containing pair (neurons, activation)
-                 * @param kernals vector containing all pretrained kernals
-                 */                
-                NeuralNetwork(const std::vector <std::pair<int, std::string>> &config,
-                              const std::vector <std::vector<std::valarray<double>>> &kernals) {
-                    // First layer should not have activation
-                    if(config.begin() -> second != "none") {
-                        std::cerr << "ERROR: First layer can't have activation other than none";
-                        std::cerr << std::endl;
-                        std::exit(EXIT_FAILURE);
-                    }
-                    // Network should have atleast two layers
-                    if(config.size() <= 1) {
-                        std::cerr << "ERROR: Invalid size of network, ";
-                        std::cerr << "Atleast two layers are required";
-                        std::exit(EXIT_FAILURE);
-                    }
-                    // Reconstructing all pretrained layers
-                    for(size_t i = 0; i < config.size(); i++) {
-                        layers.emplace_back(neural_network::layers::DenseLayer(config[i].first, 
-                                               config[i].second,
-                                               kernals[i])); 
-                    } 
-                    std::cout << "INFO: Network constructed successfully" << std::endl;                  
+            Format in which model is saved:
+
+            total_layers
+            neurons(1st neural_network::layers::DenseLayer) activation_name(1st
+           neural_network::layers::DenseLayer) kernal_shape(1st
+           neural_network::layers::DenseLayer) kernal_values
+            .
+            .
+            .
+            neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth
+           neural_network::layers::DenseLayer) kernal_shape(Nth
+           neural_network::layers::DenseLayer) kernal_value
+
+            For Example, pretrained model with 3 layers:
+            <pre>
+            3
+            4 none
+            4 4
+            1 0 0 0
+            0 1 0 0
+            0 0 1 0
+            0 0 0 1
+            6 relu
+            4 6
+            -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653
+            -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202
+            -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049
+            -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741
+            3 sigmoid
+            6 3
+            0.390267 -0.391703 -0.0989607
+            0.499234 -0.564539 -0.28097
+            0.553386 -0.153974 -1.92493
+            -2.01336 -0.0219682 1.44145
+            1.72853 -0.465264 -0.705373
+            -0.908409 -0.740547 0.376416
+            </pre>
+        */
+        // Saving model in the same format
+        out_file << layers.size();
+        out_file << std::endl;
+        for (const auto &layer : this->layers) {
+            out_file << layer.neurons << ' ' << layer.activation << std::endl;
+            const auto shape = get_shape(layer.kernal);
+            out_file << shape.first << ' ' << shape.second << std::endl;
+            for (const auto &row : layer.kernal) {
+                for (const auto &val : row) {
+                    out_file << val << ' ';
                 }
-                /**
-                 * Private function to get detailed predictions (i.e.
-                 * activated neuron values). This function is used in
-                 * backpropagation, single predict and batch predict.
-                 * @param X input vector
-                 */
-                std::vector<std::vector<std::valarray <double>>> 
-                __detailed_single_prediction (const std::vector<std::valarray <double>> &X) {
-                    std::vector<std::vector < std::valarray <double> >> details;
-                    std::vector < std::valarray <double> > current_pass = X;
-                    details.emplace_back(X);
-                    for(const auto &l : layers) {
-                        current_pass = multiply(current_pass, l.kernal);
-                        current_pass = apply_function(current_pass, l.activation_function);
-                        details.emplace_back(current_pass);
-                    }
-                    return details;
+                out_file << std::endl;
+            }
+        }
+        std::cout << "INFO: Model saved successfully with name : ";
+        std::cout << file_name << std::endl;
+        out_file.close();  // Closing file
+        return;
+    }
+
+    /**
+     * Function to load earlier saved model.
+     * @param file_name file from which model will be loaded (*.model)
+     * @return instance of NeuralNetwork class with pretrained weights
+     */
+    NeuralNetwork load_model(const std::string &file_name) {
+        std::ifstream in_file;            // Ifstream to read file
+        in_file.open(file_name.c_str());  // Openinig file
+        // If there is any problem in opening file
+        if (!in_file.is_open()) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Unable to open file: " << file_name << std::endl;
+            std::exit(EXIT_FAILURE);
+        }
+        std::vector<std::pair<int, std::string>> config;  // To store config
+        std::vector<std::vector<std::valarray<double>>>
+            kernals;  // To store pretrained kernals
+        // Loading model from saved file format
+        size_t total_layers = 0;
+        in_file >> total_layers;
+        for (size_t i = 0; i < total_layers; i++) {
+            int neurons = 0;
+            std::string activation;
+            size_t shape_a = 0, shape_b = 0;
+            std::vector<std::valarray<double>> kernal;
+            in_file >> neurons >> activation >> shape_a >> shape_b;
+            for (size_t r = 0; r < shape_a; r++) {
+                std::valarray<double> row(shape_b);
+                for (size_t c = 0; c < shape_b; c++) {
+                    in_file >> row[c];
                 }
-            public:
-                /**
-                 * Default Constructor for class NeuralNetwork. This constructor
-                 * is used to create empty variable of type NeuralNetwork class.
-                 */   
-                NeuralNetwork() = default;
+                kernal.push_back(row);
+            }
+            config.emplace_back(make_pair(neurons, activation));
+            ;
+            kernals.emplace_back(kernal);
+        }
+        std::cout << "INFO: Model loaded successfully" << std::endl;
+        in_file.close();  // Closing file
+        return NeuralNetwork(
+            config, kernals);  // Return instance of NeuralNetwork class
+    }
 
-                /**
-                 * Constructor for class NeuralNetwork. This constructor
-                 * is used by user.
-                 * @param config vector containing pair (neurons, activation)
-                 */   
-                explicit NeuralNetwork(const std::vector <std::pair<int, std::string>> &config) {
-                    // First layer should not have activation
-                    if(config.begin() -> second != "none") {
-                        std::cerr << "ERROR: First layer can't have activation other than none";
-                        std::cerr << std::endl;
-                        std::exit(EXIT_FAILURE);
-                    }
-                    // Network should have atleast two layers
-                    if(config.size() <= 1) {
-                        std::cerr << "ERROR: Invalid size of network, ";
-                        std::cerr << "Atleast two layers are required";
-                        std::exit(EXIT_FAILURE);
-                    }
-                    // Separately creating first layer so it can have unit matrix 
-                    // as kernal.
-                    layers.push_back(neural_network::layers::DenseLayer(config[0].first, 
-                                           config[0].second, 
-                                           {config[0].first, config[0].first},
-                                           false));
-                    // Creating remaining layers
-                    for(size_t i = 1; i < config.size(); i++) {
-                        layers.push_back(neural_network::layers::DenseLayer(config[i].first, 
-                                               config[i].second,
-                                               {config[i - 1].first, config[i].first},
-                                               true));
-                    }
-                    std::cout << "INFO: Network constructed successfully" << std::endl;
-                }
-
-                /**
-                 * Copy Constructor for class NeuralNetwork. 
-                 * 
-                 * @param model instance of class to be copied.
-                 */
-                NeuralNetwork(const NeuralNetwork &model) = default;   
-
-                /**
-                 * Destructor for class NeuralNetwork. 
-                 */                   
-                ~NeuralNetwork() = default;
-
-                /**
-                 * Copy assignment operator for class NeuralNetwork
-                 */
-                NeuralNetwork& operator = (const NeuralNetwork &model) = default;
-
-                /**
-                 * Move constructor for class NeuralNetwork
-                 */
-                NeuralNetwork(NeuralNetwork &&) = default;
-
-                /**
-                 * Move assignment operator for class NeuralNetwork
-                 */
-                NeuralNetwork& operator = (NeuralNetwork &&) = default;
-
-                /**
-                 * Function to get X and Y from csv file (where X = data, Y = label)
-                 * @param file_name csv file name
-                 * @param last_label flag for whether label is in first or last column
-                 * @param normalize flag for whether to normalize data 
-                 * @param slip_lines number of lines to skip
-                 * @return returns pair of X and Y
-                 */  
-                std::pair<std::vector<std::vector<std::valarray<double>>>, std::vector<std::vector<std::valarray<double>>>>
-                get_XY_from_csv(const std::string &file_name, 
-                                     const bool &last_label, 
-                                     const bool &normalize,
-                                     const int &slip_lines = 1) {
-                    std::ifstream in_file; // Ifstream to read file
-                    in_file.open(file_name.c_str(), std::ios::in); // Open file
-                    std::vector <std::vector<std::valarray<double>>> X, Y; // To store X and Y
-                    std::string line; // To store each line
-                    // Skip lines
-                    for(int i = 0; i < slip_lines; i ++) {
-                        std::getline(in_file, line, '\n'); // Ignore line
-                    }
-                    // While file has information
-                    while(!in_file.eof() && std::getline(in_file, line, '\n'))
-                    {
-                        std::valarray <double> x_data, y_data; // To store single sample and label
-                        std::stringstream ss(line); // Constructing stringstream from line
-                        std::string token; // To store each token in line (seprated by ',')
-                        while(std::getline(ss, token, ',')) { // For each token
-                            // Insert numerical value of token in x_data
-                            x_data = insert_element(x_data, std::stod(token));
-                        } 
-                        // If label is in last column
-                        if(last_label) {
-                            y_data.resize(this -> layers.back().neurons);
-                            // If task is classification
-                            if(y_data.size() > 1) {
-                                y_data[x_data[x_data.size() - 1]] = 1;
-                            }
-                            // If task is regrssion (of single value)
-                            else {
-                                y_data[0] = x_data[x_data.size() - 1];
-                            } 
-                            x_data = pop_back(x_data); // Remove label from x_data
-                        }
-                        else {
-                            y_data.resize(this -> layers.back().neurons);
-                            // If task is classification
-                            if(y_data.size() > 1) {
-                                y_data[x_data[x_data.size() - 1]] = 1;
-                            }
-                            // If task is regrssion (of single value)
-                            else {
-                                y_data[0] = x_data[x_data.size() - 1];
-                            } 
-                            x_data = pop_front(x_data); // Remove label from x_data
-                        }
-                        // Push collected X_data and y_data in X and Y
-                        X.push_back({x_data});
-                        Y.push_back({y_data});
-                    }
-                    in_file.close();
-                    // Normalize training data if flag is set
-                    if(normalize) {
-                        // Scale data between 0 and 1 using min-max scaler
-                        X = minmax_scaler(X, 0.01, 1.0);
-                    }
-                    return make_pair(X, Y); // Return pair of X and Y
-                }
-
-                /**
-                 * Function to get prediction of model on single sample.
-                 * @param X array of feature vectors
-                 * @return returns predictions as vector
-                 */  
-                std::vector<std::valarray <double>> 
-                single_predict (const std::vector<std::valarray <double>> &X) {
-                    // Get activations of all layers
-                    auto activations = this -> __detailed_single_prediction(X);
-                    // Return activations of last layer (actual predicted values)
-                    return activations.back();
-                }
-
-                /**
-                 * Function to get prediction of model on batch
-                 * @param X array of feature vectors
-                 * @return returns predicted values as vector
-                 */  
-                std::vector < std::vector <std::valarray<double>>>
-                batch_predict (const std::vector <std::vector <std::valarray <double>>> &X) {
-                    // Store predicted values
-                    std::vector < std::vector <std::valarray<double>>> predicted_batch(X.size());
-                    for(size_t i = 0; i < X.size(); i++) { // For every sample
-                        // Push predicted values
-                        predicted_batch[i] = this -> single_predict(X[i]);
-                    }
-                    return predicted_batch; // Return predicted values
-                }
-
-                /**
-                 * Function to fit model on supplied data
-                 * @param X array of feature vectors
-                 * @param Y array of target values
-                 * @param epochs number of epochs (default = 100)
-                 * @param learning_rate learning rate (default = 0.01)
-                 * @param batch_size batch size for gradient descent (default = 32)
-                 * @param shuffle flag for whether to shuffle data (default = true)
-                 */  
-                void fit(const std::vector < std::vector <std::valarray<double>>>  &X_, 
-                         const std::vector < std::vector <std::valarray<double>>>  &Y_, 
-                         const int &epochs = 100, 
-                         const double &learning_rate = 0.01,
-                         const size_t &batch_size = 32,
-                         const bool &shuffle = true) {
-                    std::vector < std::vector <std::valarray<double>>> X = X_, Y = Y_;
-                    // Both label and input data should have same size
-                    if (X.size() != Y.size()) {
-                        std::cerr << "ERROR : X and Y in fit have different sizes" << std::endl;
-                        std::exit(EXIT_FAILURE);
-                    }
-                    std::cout << "INFO: Training Started" << std::endl;
-                    for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch
-                        // Shuffle X and Y if flag is set
-                        if(shuffle) {
-                            equal_shuffle(X, Y);
-                        }
-                        auto start = std::chrono::high_resolution_clock::now(); // Start clock
-                        double loss = 0, acc = 0; // Intialize performance metrics with zero
-                        // For each starting index of batch
-                        for(size_t batch_start = 0; batch_start < X.size(); batch_start += batch_size) {
-                            for(size_t i = batch_start; i < std::min(X.size(), batch_start + batch_size); i++) {
-                                    std::vector <std::valarray<double>> grad, cur_error, predicted;
-                                    auto activations = this -> __detailed_single_prediction(X[i]);
-                                    // Gradients vector to store gradients for all layers
-                                    // They will be averaged and applied to kernal
-                                    std::vector<std::vector<std::valarray<double>>> gradients;
-                                    gradients.resize(this -> layers.size());
-                                    // First intialize gradients to zero
-                                    for(size_t i = 0; i < gradients.size(); i++) {
-                                        zeroes_initialization(gradients[i], get_shape(this -> layers[i].kernal));
-                                    }
-                                    predicted = activations.back(); // Predicted vector
-                                    cur_error = predicted - Y[i]; // Absoulute error
-                                    // Calculating loss with MSE
-                                    loss += sum(apply_function(cur_error, neural_network::util_functions::square));
-                                    // If prediction is correct
-                                    if(argmax(predicted) == argmax(Y[i])) {
-                                        acc += 1;
-                                    }
-                                    // For every layer (except first) starting from last one
-                                    for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
-                                        // Backpropogating errors
-                                        cur_error = hadamard_product(cur_error, 
-                                                                    apply_function(activations[j + 1], 
-                                                                    this -> layers[j].dactivation_function));
-                                        // Calculating gradient for current layer
-                                        grad = multiply(transpose(activations[j]), cur_error);
-                                        // Change error according to current kernal values
-                                        cur_error = multiply(cur_error, transpose(this -> layers[j].kernal));
-                                        // Adding gradient values to collection of gradients
-                                        gradients[j] = gradients[j] + grad / double(batch_size);
-                                    }
-                                    // Applying gradients
-                                    for(size_t j = this -> layers.size() - 1; j >= 1; j--) {
-                                        // Updating kernal (aka weights)
-                                        this -> layers[j].kernal = this -> layers[j].kernal -
-                                                            gradients[j] * learning_rate;
-                                    }
-                                }
-                        }
-                        auto stop = std::chrono::high_resolution_clock::now(); // Stoping the clock
-                        // Calculate time taken by epoch
-                        auto duration = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
-                        loss /= X.size(); // Averaging loss
-                        acc /= X.size(); // Averaging accuracy
-                        std::cout.precision(4); // set output precision to 4 
-                        // Printing training stats
-                        std::cout << "Training: Epoch " << epoch << '/' << epochs; 
-                        std::cout << ", Loss: " << loss;
-                        std::cout << ", Accuracy: " << acc;
-                        std::cout << ", Taken time: " << duration.count() / 1e6 << " seconds";
-                        std::cout << std::endl;
-                    }
-                    return;
-                }
-
-                /**
-                 * Function to fit model on data stored in csv file
-                 * @param file_name csv file name
-                 * @param last_label flag for whether label is in first or last column
-                 * @param epochs number of epochs
-                 * @param learning_rate learning rate 
-                 * @param normalize flag for whether to normalize data 
-                 * @param slip_lines number of lines to skip
-                 * @param batch_size batch size for gradient descent (default = 32)
-                 * @param shuffle flag for whether to shuffle data (default = true)
-                 */                
-                void fit_from_csv (const std::string &file_name, 
-                                     const bool &last_label,
-                                     const int &epochs, 
-                                     const double &learning_rate,
-                                     const bool &normalize,
-                                     const int &slip_lines = 1,
-                                     const size_t &batch_size = 32,
-                                     const bool &shuffle = true) {
-                    // Getting training data from csv file                    
-                    auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
-                    // Fit the model on training data
-                    this -> fit(data.first, data.second, epochs, learning_rate, batch_size, shuffle);
-                    return;
-                }
-
-                /**
-                 * Function to evaluate model on supplied data
-                 * @param X array of feature vectors (input data)
-                 * @param Y array of target values (label)
-                 */  
-                void evaluate(const std::vector< std::vector <std::valarray <double>>> &X, 
-                                const std::vector< std::vector <std::valarray <double>>> &Y) {
-                    std::cout << "INFO: Evaluation Started" << std::endl;
-                    double acc = 0, loss = 0; // intialize performance metrics with zero
-                    for(size_t i = 0; i < X.size(); i++) { // For every sample in input
-                        // Get predictions
-                        std::vector<std::valarray<double>> pred = this -> single_predict(X[i]);
-                        // If predicted class is correct 
-                        if(argmax(pred) == argmax(Y[i])) {
-                            acc += 1; // Increment accuracy
-                        }
-                        // Calculating loss - Mean Squared Error
-                        loss += sum(apply_function((Y[i] - pred), 
-                                    neural_network::util_functions::square) * 0.5);
-                    }
-                    acc /= X.size(); // Averaging accuracy
-                    loss /= X.size(); // Averaging loss
-                    // Prinitng performance of the model
-                    std::cout << "Evaluation: Loss: " << loss;
-                    std::cout << ", Accuracy: " << acc << std::endl;
-                    return;
-                }
-
-                /**
-                 * Function to evaluate model on data stored in csv file
-                 * @param file_name csv file name
-                 * @param last_label flag for whether label is in first or last column
-                 * @param normalize flag for whether to normalize data 
-                 * @param slip_lines number of lines to skip
-                 */                
-                void evaluate_from_csv (const std::string &file_name, 
-                                     const bool &last_label,
-                                     const bool &normalize,
-                                     const int &slip_lines = 1) {
-                    // Getting training data from csv file
-                    auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines);
-                    // Evaluating model
-                    this -> evaluate(data.first, data.second);
-                    return;
-                }                
-
-                /**
-                 * Function to save current model.
-                 * @param file_name file name to save model (*.model)
-                 */
-                void save_model (const std::string &_file_name) {
-                    std::string file_name = _file_name;
-                    // Adding ".model" extension if it is not already there in name
-                    if(file_name.find(".model") == file_name.npos) {
-                        file_name += ".model";
-                    }
-                    std::ofstream out_file; // Ofstream to write in file
-                    // Open file in out|trunc mode
-                    out_file.open(file_name.c_str(), std::ofstream::out | std::ofstream::trunc);
-                    /**
-                        Format in which model is saved:
-
-                        total_layers
-                        neurons(1st neural_network::layers::DenseLayer) activation_name(1st neural_network::layers::DenseLayer)
-                        kernal_shape(1st neural_network::layers::DenseLayer)
-                        kernal_values
-                        .
-                        .
-                        .
-                        neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth neural_network::layers::DenseLayer)
-                        kernal_shape(Nth neural_network::layers::DenseLayer)
-                        kernal_value
-
-                        For Example, pretrained model with 3 layers:
-                        <pre>
-                        3
-                        4 none
-                        4 4
-                        1 0 0 0 
-                        0 1 0 0 
-                        0 0 1 0 
-                        0 0 0 1 
-                        6 relu
-                        4 6
-                        -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653 
-                        -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202 
-                        -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049 
-                        -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741 
-                        3 sigmoid
-                        6 3
-                        0.390267 -0.391703 -0.0989607 
-                        0.499234 -0.564539 -0.28097 
-                        0.553386 -0.153974 -1.92493 
-                        -2.01336 -0.0219682 1.44145 
-                        1.72853 -0.465264 -0.705373 
-                        -0.908409 -0.740547 0.376416 
-                        </pre>
-                    */
-                    // Saving model in the same format
-                    out_file << layers.size();
-                    out_file << std::endl;
-                    for(const auto &layer : this -> layers) {
-                        out_file << layer.neurons << ' ' << layer.activation << std::endl;
-                        const auto shape = get_shape(layer.kernal);
-                        out_file << shape.first << ' ' << shape.second << std::endl;
-                        for(const auto &row : layer.kernal) {
-                            for(const auto &val : row) {
-                                out_file << val << ' ';
-                            }
-                            out_file << std::endl;
-                        }
-                    }
-                    std::cout << "INFO: Model saved successfully with name : ";
-                    std::cout << file_name << std::endl;
-                    return;
-                }
-
-                /**
-                 * Function to load earlier saved model.
-                 * @param file_name file from which model will be loaded (*.model)
-                 * @return instance of NeuralNetwork class with pretrained weights
-                 */
-                NeuralNetwork load_model (const std::string &file_name) {
-                    std::ifstream in_file; // Ifstream to read file
-                    in_file.open(file_name.c_str()); // Openinig file
-                    std::vector <std::pair<int, std::string>> config; // To store config
-                    std::vector <std::vector<std::valarray<double>>> kernals; // To store pretrained kernals
-                    // Loading model from saved file format
-                    size_t total_layers = 0; 
-                    in_file >> total_layers;
-                    for(size_t i = 0; i < total_layers; i++) {
-                        int neurons = 0;
-                        std::string activation;
-                        size_t shape_a = 0, shape_b = 0;
-                        std::vector<std::valarray<double>> kernal;
-                        in_file >> neurons >> activation >> shape_a >> shape_b;
-                        for(size_t r = 0; r < shape_a; r++) {
-                            std::valarray<double> row(shape_b);
-                            for(size_t c = 0; c < shape_b; c++) {
-                                in_file >> row[c];
-                            }
-                            kernal.push_back(row);
-                        }
-                        config.emplace_back(make_pair(neurons, activation));;
-                        kernals.emplace_back(kernal);
-                    }
-                    std::cout << "INFO: Model loaded successfully" << std::endl;
-                    return NeuralNetwork(config, kernals); // Return instance of NeuralNetwork class
-                }
-
-                /**
-                 * Function to print summary of the network.
-                 */
-                void summary () {
-                    // Printing Summary 
-                    std::cout << "===============================================================" << std::endl;
-                    std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
-                    std::cout << "===============================================================" << std::endl;
-                    for(size_t i = 1; i <= layers.size(); i++) { // For every layer
-                        std::cout << i << ")";
-                        std::cout << " Neurons : " << layers[i - 1].neurons; // number of neurons
-                        std::cout << ", Activation : " << layers[i - 1].activation; // activation
-                        std::cout << ", Kernal Shape : " << get_shape(layers[i - 1].kernal); // kernal shape
-                        std::cout << std::endl;
-                    }
-                    std::cout << "===============================================================" << std::endl;
-                    return;
-                }
-        
-        };
-    } // namespace neural_network
-} // namespace machine_learning
+    /**
+     * Function to print summary of the network.
+     */
+    void summary() {
+        // Printing Summary
+        std::cout
+            << "==============================================================="
+            << std::endl;
+        std::cout << "\t\t+ MODEL SUMMARY +\t\t\n";
+        std::cout
+            << "==============================================================="
+            << std::endl;
+        for (size_t i = 1; i <= layers.size(); i++) {  // For every layer
+            std::cout << i << ")";
+            std::cout << " Neurons : "
+                      << layers[i - 1].neurons;  // number of neurons
+            std::cout << ", Activation : "
+                      << layers[i - 1].activation;  // activation
+            std::cout << ", Kernal Shape : "
+                      << get_shape(layers[i - 1].kernal);  // kernal shape
+            std::cout << std::endl;
+        }
+        std::cout
+            << "==============================================================="
+            << std::endl;
+        return;
+    }
+};
+}  // namespace neural_network
+}  // namespace machine_learning
 
 /**
  * Function to test neural network
@@ -766,19 +805,24 @@ namespace machine_learning {
 static void test() {
     // Creating network with 3 layers for "iris.csv"
     machine_learning::neural_network::NeuralNetwork myNN =
-    machine_learning::neural_network::NeuralNetwork({
-        {4, "none"}, // First layer with 3 neurons and "none" as activation
-        {6, "relu"}, // Second layer with 6 neurons and "relu" as activation
-        {3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as activation
-    });
+        machine_learning::neural_network::NeuralNetwork({
+            {4, "none"},  // First layer with 3 neurons and "none" as activation
+            {6,
+             "relu"},  // Second layer with 6 neurons and "relu" as activation
+            {3, "sigmoid"}  // Third layer with 3 neurons and "sigmoid" as
+                            // activation
+        });
     // Printing summary of model
     myNN.summary();
     // Training Model
     myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true);
     // Testing predictions of model
-    assert(machine_learning::argmax(myNN.single_predict({{5,3.4,1.6,0.4}})) == 0);
-    assert(machine_learning::argmax(myNN.single_predict({{6.4,2.9,4.3,1.3}})) == 1);
-    assert(machine_learning::argmax(myNN.single_predict({{6.2,3.4,5.4,2.3}})) == 2);
+    assert(machine_learning::argmax(
+               myNN.single_predict({{5, 3.4, 1.6, 0.4}})) == 0);
+    assert(machine_learning::argmax(
+               myNN.single_predict({{6.4, 2.9, 4.3, 1.3}})) == 1);
+    assert(machine_learning::argmax(
+               myNN.single_predict({{6.2, 3.4, 5.4, 2.3}})) == 2);
     return;
 }
 
diff --git a/machine_learning/vector_ops.hpp b/machine_learning/vector_ops.hpp
index bb70b5c4f..2e519b4b6 100644
--- a/machine_learning/vector_ops.hpp
+++ b/machine_learning/vector_ops.hpp
@@ -1,20 +1,21 @@
 /**
  * @file vector_ops.hpp
  * @author [Deep Raval](https://github.com/imdeep2905)
- * 
- * @brief Various functions for vectors associated with [NeuralNetwork (aka Multilayer Perceptron)] 
+ *
+ * @brief Various functions for vectors associated with [NeuralNetwork (aka
+ * Multilayer Perceptron)]
  * (https://en.wikipedia.org/wiki/Multilayer_perceptron).
- * 
+ *
  */
 #ifndef VECTOR_OPS_FOR_NN
 #define VECTOR_OPS_FOR_NN
 
-#include <iostream>
 #include <algorithm>
-#include <vector>
-#include <valarray>
 #include <chrono>
+#include <iostream>
 #include <random>
+#include <valarray>
+#include <vector>
 
 /**
  * @namespace machine_learning
@@ -31,18 +32,18 @@ template <typename T>
 std::ostream &operator<<(std::ostream &out,
                          std::vector<std::valarray<T>> const &A) {
     // Setting output precision to 4 in case of floating point numbers
-    out.precision(4); 
-    for(const auto &a : A) { // For each row in A
-        for(const auto &x : a) { // For each element in row
-            std::cerr << x << ' '; // print element 
+    out.precision(4);
+    for (const auto &a : A) {       // For each row in A
+        for (const auto &x : a) {   // For each element in row
+            std::cout << x << ' ';  // print element
         }
-        std::cerr << std::endl;
+        std::cout << std::endl;
     }
     return out;
 }
 
 /**
- * Overloaded operator "<<" to print a pair 
+ * Overloaded operator "<<" to print a pair
  * @tparam T typename of the pair
  * @param out std::ostream to output
  * @param A Pair to be printed
@@ -52,7 +53,7 @@ std::ostream &operator<<(std::ostream &out, const std::pair<T, T> &A) {
     // Setting output precision to 4 in case of floating point numbers
     out.precision(4);
     // printing pair in the form (p, q)
-    std::cerr << "(" << A.first << ", " << A.second << ")";
+    std::cout << "(" << A.first << ", " << A.second << ")";
     return out;
 }
 
@@ -66,10 +67,10 @@ template <typename T>
 std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
     // Setting output precision to 4 in case of floating point numbers
     out.precision(4);
-    for(const auto &a : A) { // For every element in the vector.
-        std::cerr << a << ' '; // Print element
+    for (const auto &a : A) {   // For every element in the vector.
+        std::cout << a << ' ';  // Print element
     }
-    std::cerr << std::endl;
+    std::cout << std::endl;
     return out;
 }
 
@@ -81,14 +82,14 @@ std::ostream &operator<<(std::ostream &out, const std::valarray<T> &A) {
  * @return new resultant vector
  */
 template <typename T>
-std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
-    std::valarray <T> B; // New 1D vector to store resultant vector
-    B.resize(A.size() + 1); // Resizing it accordingly
-    for(size_t i = 0; i < A.size(); i++) { // For every element in A
-        B[i] = A[i]; // Copy element in B
+std::valarray<T> insert_element(const std::valarray<T> &A, const T &ele) {
+    std::valarray<T> B;      // New 1D vector to store resultant vector
+    B.resize(A.size() + 1);  // Resizing it accordingly
+    for (size_t i = 0; i < A.size(); i++) {  // For every element in A
+        B[i] = A[i];                         // Copy element in B
     }
-    B[B.size() - 1] = ele; // Inserting new element in last position
-    return B; // Return resultant vector
+    B[B.size() - 1] = ele;  // Inserting new element in last position
+    return B;               // Return resultant vector
 }
 
 /**
@@ -98,13 +99,14 @@ std::valarray<T> insert_element(const std::valarray <T> &A, const T &ele) {
  * @return new resultant vector
  */
 template <typename T>
-std::valarray <T> pop_front(const std::valarray<T> &A) {
-    std::valarray <T> B; // New 1D vector to store resultant vector
-    B.resize(A.size() - 1); // Resizing it accordingly
-    for(size_t i = 1; i < A.size(); i ++) { // // For every (except first) element in A 
-        B[i - 1] = A[i]; // Copy element in B with left shifted position
+std::valarray<T> pop_front(const std::valarray<T> &A) {
+    std::valarray<T> B;      // New 1D vector to store resultant vector
+    B.resize(A.size() - 1);  // Resizing it accordingly
+    for (size_t i = 1; i < A.size();
+         i++) {           // // For every (except first) element in A
+        B[i - 1] = A[i];  // Copy element in B with left shifted position
     }
-    return B; // Return resultant vector
+    return B;  // Return resultant vector
 }
 
 /**
@@ -114,13 +116,14 @@ std::valarray <T> pop_front(const std::valarray<T> &A) {
  * @return new resultant vector
  */
 template <typename T>
-std::valarray <T> pop_back(const std::valarray<T> &A) {
-    std::valarray <T> B; // New 1D vector to store resultant vector
-    B.resize(A.size() - 1); // Resizing it accordingly
-    for(size_t i = 0; i < A.size() - 1; i ++) { // For every (except last) element in A 
-        B[i] = A[i]; // Copy element in B
+std::valarray<T> pop_back(const std::valarray<T> &A) {
+    std::valarray<T> B;      // New 1D vector to store resultant vector
+    B.resize(A.size() - 1);  // Resizing it accordingly
+    for (size_t i = 0; i < A.size() - 1;
+         i++) {       // For every (except last) element in A
+        B[i] = A[i];  // Copy element in B
     }
-    return B; // Return resultant vector
+    return B;  // Return resultant vector
 }
 
 /**
@@ -130,16 +133,17 @@ std::valarray <T> pop_back(const std::valarray<T> &A) {
  * @param B Second 3D vector
  */
 template <typename T>
-void equal_shuffle(std::vector < std::vector <std::valarray<T>> >  &A, 
-                   std::vector < std::vector <std::valarray<T>> >  &B) {
+void equal_shuffle(std::vector<std::vector<std::valarray<T>>> &A,
+                   std::vector<std::vector<std::valarray<T>>> &B) {
     // If two vectors have different sizes
-    if(A.size() != B.size())
-    {
-        std::cerr << "ERROR : Can not equally shuffle two vectors with different sizes: ";
+    if (A.size() != B.size()) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr
+            << "Can not equally shuffle two vectors with different sizes: ";
         std::cerr << A.size() << " and " << B.size() << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    for(size_t i = 0; i < A.size(); i++) { // For every element in A and B
+    for (size_t i = 0; i < A.size(); i++) {  // For every element in A and B
         // Genrating random index < size of A and B
         std::srand(std::chrono::system_clock::now().time_since_epoch().count());
         size_t random_index = std::rand() % A.size();
@@ -159,26 +163,26 @@ void equal_shuffle(std::vector < std::vector <std::valarray<T>> >  &A,
  * @param high upper limit on value
  */
 template <typename T>
-void uniform_random_initialization(std::vector<std::valarray<T>> &A, 
-                   const std::pair<size_t, size_t> &shape, 
-                   const T &low, 
-                   const T &high) {
-    A.clear(); // Making A empty 
+void uniform_random_initialization(std::vector<std::valarray<T>> &A,
+                                   const std::pair<size_t, size_t> &shape,
+                                   const T &low, const T &high) {
+    A.clear();  // Making A empty
     // Uniform distribution in range [low, high]
-    std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
-    std::uniform_real_distribution <T> distribution(low, high);
-    for(size_t i = 0; i < shape.first; i++) { // For every row 
-        std::valarray <T> row; // Making empty row which will be inserted in vector
+    std::default_random_engine generator(
+        std::chrono::system_clock::now().time_since_epoch().count());
+    std::uniform_real_distribution<T> distribution(low, high);
+    for (size_t i = 0; i < shape.first; i++) {  // For every row
+        std::valarray<T>
+            row;  // Making empty row which will be inserted in vector
         row.resize(shape.second);
-        for(auto &r : row) { // For every element in row
-            r = distribution(generator); // copy random number 
-        }  
-        A.push_back(row); // Insert new row in vector
+        for (auto &r : row) {             // For every element in row
+            r = distribution(generator);  // copy random number
+        }
+        A.push_back(row);  // Insert new row in vector
     }
     return;
 }
 
-
 /**
  * Function to Intialize 2D vector as unit matrix
  * @tparam T typename of the vector
@@ -186,15 +190,15 @@ void uniform_random_initialization(std::vector<std::valarray<T>> &A,
  * @param shape required shape
  */
 template <typename T>
-void unit_matrix_initialization(std::vector<std::valarray<T>> &A, 
-                   const std::pair<size_t, size_t> &shape
-                   ) {
-    A.clear(); // Making A empty 
-    for(size_t i = 0; i < shape.first; i++) {
-        std::valarray <T> row; // Making empty row which will be inserted in vector
+void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
+                                const std::pair<size_t, size_t> &shape) {
+    A.clear();  // Making A empty
+    for (size_t i = 0; i < shape.first; i++) {
+        std::valarray<T>
+            row;  // Making empty row which will be inserted in vector
         row.resize(shape.second);
-        row[i] = T(1); // Insert 1 at ith position 
-        A.push_back(row); // Insert new row in vector
+        row[i] = T(1);     // Insert 1 at ith position
+        A.push_back(row);  // Insert new row in vector
     }
     return;
 }
@@ -206,14 +210,14 @@ void unit_matrix_initialization(std::vector<std::valarray<T>> &A,
  * @param shape required shape
  */
 template <typename T>
-void zeroes_initialization(std::vector<std::valarray<T>> &A, 
-                   const std::pair<size_t, size_t> &shape
-                   ) {
-    A.clear(); // Making A empty 
-    for(size_t i = 0; i < shape.first; i++) {
-        std::valarray <T> row; // Making empty row which will be inserted in vector
-        row.resize(shape.second); // By default all elements are zero
-        A.push_back(row); // Insert new row in vector
+void zeroes_initialization(std::vector<std::valarray<T>> &A,
+                           const std::pair<size_t, size_t> &shape) {
+    A.clear();  // Making A empty
+    for (size_t i = 0; i < shape.first; i++) {
+        std::valarray<T>
+            row;  // Making empty row which will be inserted in vector
+        row.resize(shape.second);  // By default all elements are zero
+        A.push_back(row);          // Insert new row in vector
     }
     return;
 }
@@ -226,11 +230,11 @@ void zeroes_initialization(std::vector<std::valarray<T>> &A,
  */
 template <typename T>
 T sum(const std::vector<std::valarray<T>> &A) {
-    T cur_sum = 0; // Initially sum is zero
-    for(const auto &a : A) { // For every row in A
-        cur_sum += a.sum(); // Add sum of that row to current sum
+    T cur_sum = 0;             // Initially sum is zero
+    for (const auto &a : A) {  // For every row in A
+        cur_sum += a.sum();    // Add sum of that row to current sum
     }
-    return cur_sum; // Return sum
+    return cur_sum;  // Return sum
 }
 
 /**
@@ -242,52 +246,59 @@ T sum(const std::vector<std::valarray<T>> &A) {
 template <typename T>
 std::pair<size_t, size_t> get_shape(const std::vector<std::valarray<T>> &A) {
     const size_t sub_size = (*A.begin()).size();
-    for(const auto &a : A) {
+    for (const auto &a : A) {
         // If supplied vector don't have same shape in all rows
-        if(a.size() != sub_size) {
-            std::cerr << "ERROR: (get_shape) Supplied vector is not 2D Matrix" << std::endl;
+        if (a.size() != sub_size) {
+            std::cerr << "ERROR (" << __func__ << ") : ";
+            std::cerr << "Supplied vector is not 2D Matrix" << std::endl;
             std::exit(EXIT_FAILURE);
         }
     }
-    return std::make_pair(A.size(), sub_size); // Return shape as pair
+    return std::make_pair(A.size(), sub_size);  // Return shape as pair
 }
 
 /**
  * Function to scale given 3D vector using min-max scaler
  * @tparam T typename of the vector
  * @param A 3D vector which will be scaled
- * @param low new minimum value 
+ * @param low new minimum value
  * @param high new maximum value
  * @return new scaled 3D vector
  */
 template <typename T>
-std::vector<std::vector<std::valarray<T>>>
-minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low, const T &high) {
-    std::vector<std::vector<std::valarray<T>>> B = A; // Copying into new vector B
-    const auto shape = get_shape(B[0]); // Storing shape of B's every element
-    // As this function is used for scaling training data vector should be of shape (1, X)
-    if(shape.first != 1) {
-        std::cerr << "ERROR: (MinMax Scaling) Supplied vector is not supported for minmax scaling, shape: ";
+std::vector<std::vector<std::valarray<T>>> minmax_scaler(
+    const std::vector<std::vector<std::valarray<T>>> &A, const T &low,
+    const T &high) {
+    std::vector<std::vector<std::valarray<T>>> B =
+        A;                               // Copying into new vector B
+    const auto shape = get_shape(B[0]);  // Storing shape of B's every element
+    // As this function is used for scaling training data vector should be of
+    // shape (1, X)
+    if (shape.first != 1) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr
+            << "Supplied vector is not supported for minmax scaling, shape: ";
         std::cerr << shape << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    for(size_t i = 0; i < shape.second; i++) {
-        T min = B[0][0][i], max = B[0][0][i]; 
-        for(size_t j = 0; j < B.size(); j++) {
+    for (size_t i = 0; i < shape.second; i++) {
+        T min = B[0][0][i], max = B[0][0][i];
+        for (size_t j = 0; j < B.size(); j++) {
             // Updating minimum and maximum values
             min = std::min(min, B[j][0][i]);
             max = std::max(max, B[j][0][i]);
         }
-        for(size_t j = 0; j < B.size(); j++) {
+        for (size_t j = 0; j < B.size(); j++) {
             // Applying min-max scaler formula
-            B[j][0][i] = ((B[j][0][i] - min) / (max - min)) * (high - low) + low;
+            B[j][0][i] =
+                ((B[j][0][i] - min) / (max - min)) * (high - low) + low;
         }
     }
-    return B; // Return new resultant 3D vector
+    return B;  // Return new resultant 3D vector
 }
 
 /**
- * Function to get index of maximum element in 2D vector 
+ * Function to get index of maximum element in 2D vector
  * @tparam T typename of the vector
  * @param A 2D vector for which maximum index is required
  * @return index of maximum element
@@ -295,13 +306,16 @@ minmax_scaler(const std::vector<std::vector<std::valarray<T>>> &A, const T &low,
 template <typename T>
 size_t argmax(const std::vector<std::valarray<T>> &A) {
     const auto shape = get_shape(A);
-    // As this function is used on predicted (or target) vector, shape should be (1, X)    
-    if(shape.first != 1) {
-        std::cerr << "ERROR: (argmax) Supplied vector is ineligible for argmax" << std::endl;
-        std::exit(EXIT_FAILURE);        
+    // As this function is used on predicted (or target) vector, shape should be
+    // (1, X)
+    if (shape.first != 1) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr << "Supplied vector is ineligible for argmax" << std::endl;
+        std::exit(EXIT_FAILURE);
     }
     // Return distance of max element from first element (i.e. index)
-    return std::distance(std::begin(A[0]), std::max_element(std::begin(A[0]), std::end(A[0])));
+    return std::distance(std::begin(A[0]),
+                         std::max_element(std::begin(A[0]), std::end(A[0])));
 }
 
 /**
@@ -311,46 +325,51 @@ size_t argmax(const std::vector<std::valarray<T>> &A) {
  * @param func Function to be applied
  * @return new resultant vector
  */
-template <typename T> 
-std::vector <std::valarray <T>> apply_function(const std::vector <std::valarray <T>> &A, 
-                                               T (*func) (const T &)) {
-    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
-    for(auto &b : B) { // For every row in vector
-        b = b.apply(func); // Apply function to that row
+template <typename T>
+std::vector<std::valarray<T>> apply_function(
+    const std::vector<std::valarray<T>> &A, T (*func)(const T &)) {
+    std::vector<std::valarray<double>> B =
+        A;                  // New vector to store resultant vector
+    for (auto &b : B) {     // For every row in vector
+        b = b.apply(func);  // Apply function to that row
     }
-    return B; // Return new resultant 2D vector
+    return B;  // Return new resultant 2D vector
 }
 
 /**
- * Overloaded operator "*" to multiply given 2D vector with scaler 
+ * Overloaded operator "*" to multiply given 2D vector with scaler
  * @tparam T typename of both vector and the scaler
  * @param A 2D vector to which scaler will be multiplied
  * @param val Scaler value which will be multiplied
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T> > operator * (const std::vector<std::valarray<T>> &A, const T& val) {
-    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
-    for(auto &b : B) { // For every row in vector
-        b = b * val; // Multiply row with scaler
+std::vector<std::valarray<T>> operator*(const std::vector<std::valarray<T>> &A,
+                                        const T &val) {
+    std::vector<std::valarray<double>> B =
+        A;               // New vector to store resultant vector
+    for (auto &b : B) {  // For every row in vector
+        b = b * val;     // Multiply row with scaler
     }
-    return B; // Return new resultant 2D vector
+    return B;  // Return new resultant 2D vector
 }
 
 /**
- * Overloaded operator "/" to divide given 2D vector with scaler 
+ * Overloaded operator "/" to divide given 2D vector with scaler
  * @tparam T typename of the vector and the scaler
  * @param A 2D vector to which scaler will be divided
  * @param val Scaler value which will be divided
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>> &A, const T& val) {
-    std::vector<std::valarray<double>> B = A; // New vector to store resultant vector
-    for(auto &b : B) { // For every row in vector
-        b = b / val; // Divide row with scaler
+std::vector<std::valarray<T>> operator/(const std::vector<std::valarray<T>> &A,
+                                        const T &val) {
+    std::vector<std::valarray<double>> B =
+        A;               // New vector to store resultant vector
+    for (auto &b : B) {  // For every row in vector
+        b = b / val;     // Divide row with scaler
     }
-    return B; // Return new resultant 2D vector
+    return B;  // Return new resultant 2D vector
 }
 
 /**
@@ -360,125 +379,136 @@ std::vector <std::valarray <T> > operator / (const std::vector<std::valarray<T>>
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T>> transpose(const std::vector<std::valarray<T>> &A) {
-    const auto shape = get_shape(A); // Current shape of vector
-    std::vector <std::valarray <T> > B; // New vector to store result
+std::vector<std::valarray<T>> transpose(
+    const std::vector<std::valarray<T>> &A) {
+    const auto shape = get_shape(A);  // Current shape of vector
+    std::vector<std::valarray<T>> B;  // New vector to store result
     // Storing transpose values of A in B
-    for(size_t j = 0; j < shape.second; j++) { 
-        std::valarray <T> row; 
+    for (size_t j = 0; j < shape.second; j++) {
+        std::valarray<T> row;
         row.resize(shape.first);
-        for(size_t i = 0; i < shape.first; i++) {
+        for (size_t i = 0; i < shape.first; i++) {
             row[i] = A[i][j];
         }
         B.push_back(row);
     }
-    return B; // Return new resultant 2D vector
+    return B;  // Return new resultant 2D vector
 }
 
 /**
  * Overloaded operator "+" to add two 2D vectors
  * @tparam T typename of the vector
- * @param A First 2D vector 
+ * @param A First 2D vector
  * @param B Second 2D vector
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T> > operator + (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
+std::vector<std::valarray<T>> operator+(
+    const std::vector<std::valarray<T>> &A,
+    const std::vector<std::valarray<T>> &B) {
     const auto shape_a = get_shape(A);
     const auto shape_b = get_shape(B);
     // If vectors don't have equal shape
-    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
-        std::cerr << "ERROR: (vector addition) Supplied vectors have different shapes ";
+    if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr << "Supplied vectors have different shapes ";
         std::cerr << shape_a << " and " << shape_b << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    std::vector<std::valarray <T>> C;
-    for(size_t i = 0; i < A.size(); i++) { // For every row
-        C.push_back(A[i] + B[i]); // Elementwise addition
+    std::vector<std::valarray<T>> C;
+    for (size_t i = 0; i < A.size(); i++) {  // For every row
+        C.push_back(A[i] + B[i]);            // Elementwise addition
     }
-    return C; // Return new resultant 2D vector
+    return C;  // Return new resultant 2D vector
 }
 
 /**
  * Overloaded operator "-" to add subtract 2D vectors
  * @tparam T typename of the vector
- * @param A First 2D vector 
+ * @param A First 2D vector
  * @param B Second 2D vector
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T>> operator - (const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
+std::vector<std::valarray<T>> operator-(
+    const std::vector<std::valarray<T>> &A,
+    const std::vector<std::valarray<T>> &B) {
     const auto shape_a = get_shape(A);
     const auto shape_b = get_shape(B);
     // If vectors don't have equal shape
-    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
-        std::cerr << "ERROR: (vector subtraction) Supplied vectors have different shapes ";
+    if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr << "Supplied vectors have different shapes ";
         std::cerr << shape_a << " and " << shape_b << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    std::vector<std::valarray<T>> C; // Vector to store result
-    for(size_t i = 0; i < A.size(); i++) { // For every row
-        C.push_back(A[i] - B[i]); // Elementwise substraction
+    std::vector<std::valarray<T>> C;         // Vector to store result
+    for (size_t i = 0; i < A.size(); i++) {  // For every row
+        C.push_back(A[i] - B[i]);            // Elementwise substraction
     }
-    return C; // Return new resultant 2D vector
+    return C;  // Return new resultant 2D vector
 }
 
 /**
  * Function to multiply two 2D vectors
  * @tparam T typename of the vector
- * @param A First 2D vector 
+ * @param A First 2D vector
  * @param B Second 2D vector
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T>> multiply(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
+std::vector<std::valarray<T>> multiply(const std::vector<std::valarray<T>> &A,
+                                       const std::vector<std::valarray<T>> &B) {
     const auto shape_a = get_shape(A);
     const auto shape_b = get_shape(B);
     // If vectors are not eligible for multiplication
-    if(shape_a.second != shape_b.first ) {
-        std::cerr << "ERROR: (multiply) Supplied vectors are not eligible for multiplication ";
+    if (shape_a.second != shape_b.first) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr << "Vectors are not eligible for multiplication ";
         std::cerr << shape_a << " and " << shape_b << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    std::vector<std::valarray<T>> C; // Vector to store result
-    // Normal matrix multiplication 
+    std::vector<std::valarray<T>> C;  // Vector to store result
+    // Normal matrix multiplication
     for (size_t i = 0; i < shape_a.first; i++) {
         std::valarray<T> row;
         row.resize(shape_b.second);
-        for(size_t j = 0; j < shape_b.second; j++) {
-            for(size_t k = 0; k < shape_a.second; k++) {
+        for (size_t j = 0; j < shape_b.second; j++) {
+            for (size_t k = 0; k < shape_a.second; k++) {
                 row[j] += A[i][k] * B[k][j];
             }
         }
         C.push_back(row);
     }
-    return C; // Return new resultant 2D vector
+    return C;  // Return new resultant 2D vector
 }
 
 /**
  * Function to get hadamard product of two 2D vectors
  * @tparam T typename of the vector
- * @param A First 2D vector 
+ * @param A First 2D vector
  * @param B Second 2D vector
  * @return new resultant vector
  */
 template <typename T>
-std::vector <std::valarray <T>> hadamard_product(const std::vector<std::valarray<T>> &A, const std::vector<std::valarray<T>> &B) {
+std::vector<std::valarray<T>> hadamard_product(
+    const std::vector<std::valarray<T>> &A,
+    const std::vector<std::valarray<T>> &B) {
     const auto shape_a = get_shape(A);
     const auto shape_b = get_shape(B);
     // If vectors are not eligible for hadamard product
-    if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
-        std::cerr << "ERROR: (hadamard_product) Supplied vectors have different shapes ";
+    if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) {
+        std::cerr << "ERROR (" << __func__ << ") : ";
+        std::cerr << "Vectors have different shapes ";
         std::cerr << shape_a << " and " << shape_b << std::endl;
         std::exit(EXIT_FAILURE);
     }
-    std::vector<std::valarray<T>> C; // Vector to store result
-    for(size_t i = 0; i < A.size(); i++) {
-        C.push_back(A[i] * B[i]); // Elementwise multiplication
+    std::vector<std::valarray<T>> C;  // Vector to store result
+    for (size_t i = 0; i < A.size(); i++) {
+        C.push_back(A[i] * B[i]);  // Elementwise multiplication
     }
-    return C; // Return new resultant 2D vector
+    return C;  // Return new resultant 2D vector
 }
 }  // namespace machine_learning
 
-
 #endif