diff --git a/machine_learning/neural_network.cpp b/machine_learning/neural_network.cpp index aad469b14..b477f5e2e 100644 --- a/machine_learning/neural_network.cpp +++ b/machine_learning/neural_network.cpp @@ -1,763 +1,802 @@ /** - * @file + * @file * @author [Deep Raval](https://github.com/imdeep2905) - * - * @brief Implementation of [Multilayer Perceptron] (https://en.wikipedia.org/wiki/Multilayer_perceptron). - * + * + * @brief Implementation of [Multilayer Perceptron] + * (https://en.wikipedia.org/wiki/Multilayer_perceptron). + * * @details - * A multilayer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The term MLP is used ambiguously, - * sometimes loosely to any feedforward ANN, sometimes strictly to refer to networks composed of multiple layers of perceptrons - * (with threshold activation). Multilayer perceptrons are sometimes colloquially referred to as "vanilla" neural networks, - * especially when they have a single hidden layer. - * - * An MLP consists of at least three layers of nodes: an input layer, a hidden layer and an output layer. Except for the - * input nodes, each node is a neuron that uses a nonlinear activation function. MLP utilizes a supervised learning technique - * called backpropagation for training. Its multiple layers and non-linear activation distinguish MLP from a linear - * perceptron. It can distinguish data that is not linearly separable. - * - * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for training algorithm. - * - * \note This implementation uses mini-batch gradient descent as optimizer and MSE as loss function. Bias is also not included. + * A multilayer perceptron (MLP) is a class of feedforward artificial neural + * network (ANN). The term MLP is used ambiguously, sometimes loosely to any + * feedforward ANN, sometimes strictly to refer to networks composed of multiple + * layers of perceptrons (with threshold activation). Multilayer perceptrons are + * sometimes colloquially referred to as "vanilla" neural networks, especially + * when they have a single hidden layer. + * + * An MLP consists of at least three layers of nodes: an input layer, a hidden + * layer and an output layer. Except for the input nodes, each node is a neuron + * that uses a nonlinear activation function. MLP utilizes a supervised learning + * technique called backpropagation for training. Its multiple layers and + * non-linear activation distinguish MLP from a linear perceptron. It can + * distinguish data that is not linearly separable. + * + * See [Backpropagation](https://en.wikipedia.org/wiki/Backpropagation) for + * training algorithm. + * + * \note This implementation uses mini-batch gradient descent as optimizer and + * MSE as loss function. Bias is also not included. */ -#include "vector_ops.hpp" // Custom header file for vector operations - +#include +#include +#include +#include +#include #include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include + +#include "vector_ops.hpp" // Custom header file for vector operations /** \namespace machine_learning * \brief Machine learning algorithms */ namespace machine_learning { - /** \namespace neural_network - * \brief Neural Network or Multilayer Perceptron - */ - namespace neural_network { - /** \namespace activations - * \brief Various activation functions used in Neural network - */ - namespace activations { - /** - * Sigmoid function - * @param X Value - * @return Returns sigmoid(x) - */ - double sigmoid (const double &x) { - return 1.0 / (1.0 + std::exp(-x)); - } +/** \namespace neural_network + * \brief Neural Network or Multilayer Perceptron + */ +namespace neural_network { +/** \namespace activations + * \brief Various activation functions used in Neural network + */ +namespace activations { +/** + * Sigmoid function + * @param X Value + * @return Returns sigmoid(x) + */ +double sigmoid(const double &x) { return 1.0 / (1.0 + std::exp(-x)); } - /** - * Derivative of sigmoid function - * @param X Value - * @return Returns derivative of sigmoid(x) - */ - double dsigmoid (const double &x) { - return x * (1 - x); - } +/** + * Derivative of sigmoid function + * @param X Value + * @return Returns derivative of sigmoid(x) + */ +double dsigmoid(const double &x) { return x * (1 - x); } - /** - * Relu function - * @param X Value - * @returns relu(x) - */ - double relu (const double &x) { - return std::max(0.0, x); - } +/** + * Relu function + * @param X Value + * @returns relu(x) + */ +double relu(const double &x) { return std::max(0.0, x); } - /** - * Derivative of relu function - * @param X Value - * @returns derivative of relu(x) - */ - double drelu (const double &x) { - return x >= 0.0 ? 1.0 : 0.0; - } +/** + * Derivative of relu function + * @param X Value + * @returns derivative of relu(x) + */ +double drelu(const double &x) { return x >= 0.0 ? 1.0 : 0.0; } - /** - * Tanh function - * @param X Value - * @return Returns tanh(x) - */ - double tanh (const double &x) { - return 2 / (1 + std::exp(-2 * x)) - 1; - } +/** + * Tanh function + * @param X Value + * @return Returns tanh(x) + */ +double tanh(const double &x) { return 2 / (1 + std::exp(-2 * x)) - 1; } - /** - * Derivative of Sigmoid function - * @param X Value - * @return Returns derivative of tanh(x) - */ - double dtanh (const double &x) { - return 1 - x * x; +/** + * Derivative of Sigmoid function + * @param X Value + * @return Returns derivative of tanh(x) + */ +double dtanh(const double &x) { return 1 - x * x; } +} // namespace activations +/** \namespace util_functions + * \brief Various utility functions used in Neural network + */ +namespace util_functions { +/** + * Square function + * @param X Value + * @return Returns x * x + */ +double square(const double &x) { return x * x; } +/** + * Identity function + * @param X Value + * @return Returns x + */ +double identity_function(const double &x) { return x; } +} // namespace util_functions +/** \namespace layers + * \brief This namespace contains layers used + * in MLP. + */ +namespace layers { +/** + * neural_network::layers::DenseLayer class is used to store all necessary + * information about the layers (i.e. neurons, activation and kernal). This + * class is used by NeuralNetwork class to store layers. + * + */ +class DenseLayer { + public: + // To store activation function and it's derivative + double (*activation_function)(const double &); + double (*dactivation_function)(const double &); + int neurons; // To store number of neurons (used in summary) + std::string activation; // To store activation name (used in summary) + std::vector> kernal; // To store kernal (aka weights) + + /** + * Constructor for neural_network::layers::DenseLayer class + * @param neurons number of neurons + * @param activation activation function for layer + * @param kernal_shape shape of kernal + * @param random_kernal flag for whether to intialize kernal randomly + */ + DenseLayer(const int &neurons, const std::string &activation, + const std::pair &kernal_shape, + const bool &random_kernal) { + // Choosing activation (and it's derivative) + if (activation == "sigmoid") { + activation_function = neural_network::activations::sigmoid; + dactivation_function = neural_network::activations::sigmoid; + } else if (activation == "relu") { + activation_function = neural_network::activations::relu; + dactivation_function = neural_network::activations::drelu; + } else if (activation == "tanh") { + activation_function = neural_network::activations::tanh; + dactivation_function = neural_network::activations::dtanh; + } else if (activation == "none") { + // Set identity function in casse of none is supplied + activation_function = + neural_network::util_functions::identity_function; + dactivation_function = + neural_network::util_functions::identity_function; + } else { + // If supplied activation is invalid + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Invalid argument. Expected {none, sigmoid, relu, " + "tanh} got "; + std::cerr << activation << std::endl; + std::exit(EXIT_FAILURE); + } + this->activation = activation; // Setting activation name + this->neurons = neurons; // Setting number of neurons + // Initialize kernal according to flag + if (random_kernal) { + uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0); + } else { + unit_matrix_initialization(kernal, kernal_shape); + } + } + /** + * Constructor for neural_network::layers::DenseLayer class + * @param neurons number of neurons + * @param activation activation function for layer + * @param kernal values of kernal (useful in loading model) + */ + DenseLayer(const int &neurons, const std::string &activation, + const std::vector> &kernal) { + // Choosing activation (and it's derivative) + if (activation == "sigmoid") { + activation_function = neural_network::activations::sigmoid; + dactivation_function = neural_network::activations::sigmoid; + } else if (activation == "relu") { + activation_function = neural_network::activations::relu; + dactivation_function = neural_network::activations::drelu; + } else if (activation == "tanh") { + activation_function = neural_network::activations::tanh; + dactivation_function = neural_network::activations::dtanh; + } else if (activation == "none") { + // Set identity function in casse of none is supplied + activation_function = + neural_network::util_functions::identity_function; + dactivation_function = + neural_network::util_functions::identity_function; + } else { + // If supplied activation is invalid + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Invalid argument. Expected {none, sigmoid, relu, " + "tanh} got "; + std::cerr << activation << std::endl; + std::exit(EXIT_FAILURE); + } + this->activation = activation; // Setting activation name + this->neurons = neurons; // Setting number of neurons + this->kernal = kernal; // Setting supplied kernal values + } + + /** + * Copy Constructor for class DenseLayer. + * + * @param model instance of class to be copied. + */ + DenseLayer(const DenseLayer &layer) = default; + + /** + * Destructor for class DenseLayer. + */ + ~DenseLayer() = default; + + /** + * Copy assignment operator for class DenseLayer + */ + DenseLayer &operator=(const DenseLayer &layer) = default; + + /** + * Move constructor for class DenseLayer + */ + DenseLayer(DenseLayer &&) = default; + + /** + * Move assignment operator for class DenseLayer + */ + DenseLayer &operator=(DenseLayer &&) = default; +}; +} // namespace layers +/** + * NeuralNetwork class is implements MLP. This class is + * used by actual user to create and train networks. + * + */ +class NeuralNetwork { + private: + std::vector layers; // To store layers + /** + * Private Constructor for class NeuralNetwork. This constructor + * is used internally to load model. + * @param config vector containing pair (neurons, activation) + * @param kernals vector containing all pretrained kernals + */ + NeuralNetwork( + const std::vector> &config, + const std::vector>> &kernals) { + // First layer should not have activation + if (config.begin()->second != "none") { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr + << "First layer can't have activation other than none got " + << config.begin()->second; + std::cerr << std::endl; + std::exit(EXIT_FAILURE); + } + // Network should have atleast two layers + if (config.size() <= 1) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Invalid size of network, "; + std::cerr << "Atleast two layers are required"; + std::exit(EXIT_FAILURE); + } + // Reconstructing all pretrained layers + for (size_t i = 0; i < config.size(); i++) { + layers.emplace_back(neural_network::layers::DenseLayer( + config[i].first, config[i].second, kernals[i])); + } + std::cout << "INFO: Network constructed successfully" << std::endl; + } + /** + * Private function to get detailed predictions (i.e. + * activated neuron values). This function is used in + * backpropagation, single predict and batch predict. + * @param X input vector + */ + std::vector>> + __detailed_single_prediction(const std::vector> &X) { + std::vector>> details; + std::vector> current_pass = X; + details.emplace_back(X); + for (const auto &l : layers) { + current_pass = multiply(current_pass, l.kernal); + current_pass = apply_function(current_pass, l.activation_function); + details.emplace_back(current_pass); + } + return details; + } + + public: + /** + * Default Constructor for class NeuralNetwork. This constructor + * is used to create empty variable of type NeuralNetwork class. + */ + NeuralNetwork() = default; + + /** + * Constructor for class NeuralNetwork. This constructor + * is used by user. + * @param config vector containing pair (neurons, activation) + */ + explicit NeuralNetwork( + const std::vector> &config) { + // First layer should not have activation + if (config.begin()->second != "none") { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr + << "First layer can't have activation other than none got " + << config.begin()->second; + std::cerr << std::endl; + std::exit(EXIT_FAILURE); + } + // Network should have atleast two layers + if (config.size() <= 1) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Invalid size of network, "; + std::cerr << "Atleast two layers are required"; + std::exit(EXIT_FAILURE); + } + // Separately creating first layer so it can have unit matrix + // as kernal. + layers.push_back(neural_network::layers::DenseLayer( + config[0].first, config[0].second, + {config[0].first, config[0].first}, false)); + // Creating remaining layers + for (size_t i = 1; i < config.size(); i++) { + layers.push_back(neural_network::layers::DenseLayer( + config[i].first, config[i].second, + {config[i - 1].first, config[i].first}, true)); + } + std::cout << "INFO: Network constructed successfully" << std::endl; + } + + /** + * Copy Constructor for class NeuralNetwork. + * + * @param model instance of class to be copied. + */ + NeuralNetwork(const NeuralNetwork &model) = default; + + /** + * Destructor for class NeuralNetwork. + */ + ~NeuralNetwork() = default; + + /** + * Copy assignment operator for class NeuralNetwork + */ + NeuralNetwork &operator=(const NeuralNetwork &model) = default; + + /** + * Move constructor for class NeuralNetwork + */ + NeuralNetwork(NeuralNetwork &&) = default; + + /** + * Move assignment operator for class NeuralNetwork + */ + NeuralNetwork &operator=(NeuralNetwork &&) = default; + + /** + * Function to get X and Y from csv file (where X = data, Y = label) + * @param file_name csv file name + * @param last_label flag for whether label is in first or last column + * @param normalize flag for whether to normalize data + * @param slip_lines number of lines to skip + * @return returns pair of X and Y + */ + std::pair>>, + std::vector>>> + get_XY_from_csv(const std::string &file_name, const bool &last_label, + const bool &normalize, const int &slip_lines = 1) { + std::ifstream in_file; // Ifstream to read file + in_file.open(file_name.c_str(), std::ios::in); // Open file + // If there is any problem in opening file + if (!in_file.is_open()) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Unable to open file: " << file_name << std::endl; + std::exit(EXIT_FAILURE); + } + std::vector>> X, + Y; // To store X and Y + std::string line; // To store each line + // Skip lines + for (int i = 0; i < slip_lines; i++) { + std::getline(in_file, line, '\n'); // Ignore line + } + // While file has information + while (!in_file.eof() && std::getline(in_file, line, '\n')) { + std::valarray x_data, + y_data; // To store single sample and label + std::stringstream ss(line); // Constructing stringstream from line + std::string token; // To store each token in line (seprated by ',') + while (std::getline(ss, token, ',')) { // For each token + // Insert numerical value of token in x_data + x_data = insert_element(x_data, std::stod(token)); } - } // namespace activations - /** \namespace util_functions - * \brief Various utility functions used in Neural network - */ - namespace util_functions { - /** - * Square function - * @param X Value - * @return Returns x * x - */ - double square(const double &x) { - return x * x; + // If label is in last column + if (last_label) { + y_data.resize(this->layers.back().neurons); + // If task is classification + if (y_data.size() > 1) { + y_data[x_data[x_data.size() - 1]] = 1; + } + // If task is regrssion (of single value) + else { + y_data[0] = x_data[x_data.size() - 1]; + } + x_data = pop_back(x_data); // Remove label from x_data + } else { + y_data.resize(this->layers.back().neurons); + // If task is classification + if (y_data.size() > 1) { + y_data[x_data[x_data.size() - 1]] = 1; + } + // If task is regrssion (of single value) + else { + y_data[0] = x_data[x_data.size() - 1]; + } + x_data = pop_front(x_data); // Remove label from x_data } - /** - * Identity function - * @param X Value - * @return Returns x - */ - double identity_function(const double &x) { - return x; + // Push collected X_data and y_data in X and Y + X.push_back({x_data}); + Y.push_back({y_data}); + } + // Normalize training data if flag is set + if (normalize) { + // Scale data between 0 and 1 using min-max scaler + X = minmax_scaler(X, 0.01, 1.0); + } + in_file.close(); // Closing file + return make_pair(X, Y); // Return pair of X and Y + } + + /** + * Function to get prediction of model on single sample. + * @param X array of feature vectors + * @return returns predictions as vector + */ + std::vector> single_predict( + const std::vector> &X) { + // Get activations of all layers + auto activations = this->__detailed_single_prediction(X); + // Return activations of last layer (actual predicted values) + return activations.back(); + } + + /** + * Function to get prediction of model on batch + * @param X array of feature vectors + * @return returns predicted values as vector + */ + std::vector>> batch_predict( + const std::vector>> &X) { + // Store predicted values + std::vector>> predicted_batch( + X.size()); + for (size_t i = 0; i < X.size(); i++) { // For every sample + // Push predicted values + predicted_batch[i] = this->single_predict(X[i]); + } + return predicted_batch; // Return predicted values + } + + /** + * Function to fit model on supplied data + * @param X array of feature vectors + * @param Y array of target values + * @param epochs number of epochs (default = 100) + * @param learning_rate learning rate (default = 0.01) + * @param batch_size batch size for gradient descent (default = 32) + * @param shuffle flag for whether to shuffle data (default = true) + */ + void fit(const std::vector>> &X_, + const std::vector>> &Y_, + const int &epochs = 100, const double &learning_rate = 0.01, + const size_t &batch_size = 32, const bool &shuffle = true) { + std::vector>> X = X_, Y = Y_; + // Both label and input data should have same size + if (X.size() != Y.size()) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "X and Y in fit have different sizes" << std::endl; + std::exit(EXIT_FAILURE); + } + std::cout << "INFO: Training Started" << std::endl; + for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch + // Shuffle X and Y if flag is set + if (shuffle) { + equal_shuffle(X, Y); } - } // namespace util_functions - /** \namespace layers - * \brief This namespace contains layers used - * in MLP. - */ - namespace layers { - /** - * neural_network::layers::DenseLayer class is used to store all necessary information about - * the layers (i.e. neurons, activation and kernal). This class - * is used by NeuralNetwork class to store layers. - * - */ - class DenseLayer { - public: - // To store activation function and it's derivative - double (*activation_function)(const double &); - double (*dactivation_function)(const double &); - int neurons; // To store number of neurons (used in summary) - std::string activation; // To store activation name (used in summary) - std::vector > kernal; // To store kernal (aka weights) - - /** - * Constructor for neural_network::layers::DenseLayer class - * @param neurons number of neurons - * @param activation activation function for layer - * @param kernal_shape shape of kernal - * @param random_kernal flag for whether to intialize kernal randomly - */ - DenseLayer(const int &neurons, - const std::string &activation, - const std::pair &kernal_shape, - const bool &random_kernal) { - // Choosing activation (and it's derivative) - if (activation == "sigmoid") { - activation_function = neural_network::activations::sigmoid; - dactivation_function = neural_network::activations::sigmoid; - } - else if (activation == "relu") { - activation_function = neural_network::activations::relu; - dactivation_function = neural_network::activations::drelu; - } - else if (activation == "tanh") { - activation_function = neural_network::activations::tanh; - dactivation_function = neural_network::activations::dtanh; - } - else if (activation == "none") { - // Set identity function in casse of none is supplied - activation_function = neural_network::util_functions::identity_function; - dactivation_function = neural_network::util_functions::identity_function; - } - else { - // If supplied activation is invalid - std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, "; - std::cerr << "Expected from {none, sigmoid, relu, tanh} got "; - std::cerr << activation << std::endl; - std::exit(EXIT_FAILURE); - } - this -> activation = activation; // Setting activation name - this -> neurons = neurons; // Setting number of neurons - // Initialize kernal according to flag - if(random_kernal) { - uniform_random_initialization(kernal, kernal_shape, -1.0, 1.0); - } - else { - unit_matrix_initialization(kernal, kernal_shape); - } + auto start = + std::chrono::high_resolution_clock::now(); // Start clock + double loss = 0, + acc = 0; // Intialize performance metrics with zero + // For each starting index of batch + for (size_t batch_start = 0; batch_start < X.size(); + batch_start += batch_size) { + for (size_t i = batch_start; + i < std::min(X.size(), batch_start + batch_size); i++) { + std::vector> grad, cur_error, + predicted; + auto activations = this->__detailed_single_prediction(X[i]); + // Gradients vector to store gradients for all layers + // They will be averaged and applied to kernal + std::vector>> gradients; + gradients.resize(this->layers.size()); + // First intialize gradients to zero + for (size_t i = 0; i < gradients.size(); i++) { + zeroes_initialization( + gradients[i], get_shape(this->layers[i].kernal)); } - /** - * Constructor for neural_network::layers::DenseLayer class - * @param neurons number of neurons - * @param activation activation function for layer - * @param kernal values of kernal (useful in loading model) - */ - DenseLayer (const int &neurons, - const std::string &activation, - const std::vector > &kernal) { - // Choosing activation (and it's derivative) - if (activation == "sigmoid") { - activation_function = neural_network::activations::sigmoid; - dactivation_function = neural_network::activations::sigmoid; - } - else if (activation == "relu") { - activation_function = neural_network::activations::relu; - dactivation_function = neural_network::activations::drelu; - } - else if (activation == "tanh") { - activation_function = neural_network::activations::tanh; - dactivation_function = neural_network::activations::dtanh; - } - else if (activation == "none") { - // Set identity function in casse of none is supplied - activation_function = neural_network::util_functions::identity_function; - dactivation_function = neural_network::util_functions::identity_function; - } - else { - // If supplied activation is invalid - std::cerr << "ERROR: Invalid argument for layer -> constructor -> activation, "; - std::cerr << "Expected from {none, sigmoid, relu, tanh} got "; - std::cerr << activation << std::endl; - std::exit(EXIT_FAILURE); - } - this -> activation = activation; // Setting activation name - this -> neurons = neurons; // Setting number of neurons - this -> kernal = kernal; // Setting supplied kernal values + predicted = activations.back(); // Predicted vector + cur_error = predicted - Y[i]; // Absoulute error + // Calculating loss with MSE + loss += sum(apply_function( + cur_error, neural_network::util_functions::square)); + // If prediction is correct + if (argmax(predicted) == argmax(Y[i])) { + acc += 1; } - - /** - * Copy Constructor for class DenseLayer. - * - * @param model instance of class to be copied. - */ - DenseLayer(const DenseLayer &layer) = default; + // For every layer (except first) starting from last one + for (size_t j = this->layers.size() - 1; j >= 1; j--) { + // Backpropogating errors + cur_error = hadamard_product( + cur_error, + apply_function( + activations[j + 1], + this->layers[j].dactivation_function)); + // Calculating gradient for current layer + grad = multiply(transpose(activations[j]), cur_error); + // Change error according to current kernal values + cur_error = multiply(cur_error, + transpose(this->layers[j].kernal)); + // Adding gradient values to collection of gradients + gradients[j] = gradients[j] + grad / double(batch_size); + } + // Applying gradients + for (size_t j = this->layers.size() - 1; j >= 1; j--) { + // Updating kernal (aka weights) + this->layers[j].kernal = this->layers[j].kernal - + gradients[j] * learning_rate; + } + } + } + auto stop = + std::chrono::high_resolution_clock::now(); // Stoping the clock + // Calculate time taken by epoch + auto duration = + std::chrono::duration_cast(stop - + start); + loss /= X.size(); // Averaging loss + acc /= X.size(); // Averaging accuracy + std::cout.precision(4); // set output precision to 4 + // Printing training stats + std::cout << "Training: Epoch " << epoch << '/' << epochs; + std::cout << ", Loss: " << loss; + std::cout << ", Accuracy: " << acc; + std::cout << ", Taken time: " << duration.count() / 1e6 + << " seconds"; + std::cout << std::endl; + } + return; + } - /** - * Destructor for class DenseLayer. - */ - ~DenseLayer() = default; + /** + * Function to fit model on data stored in csv file + * @param file_name csv file name + * @param last_label flag for whether label is in first or last column + * @param epochs number of epochs + * @param learning_rate learning rate + * @param normalize flag for whether to normalize data + * @param slip_lines number of lines to skip + * @param batch_size batch size for gradient descent (default = 32) + * @param shuffle flag for whether to shuffle data (default = true) + */ + void fit_from_csv(const std::string &file_name, const bool &last_label, + const int &epochs, const double &learning_rate, + const bool &normalize, const int &slip_lines = 1, + const size_t &batch_size = 32, + const bool &shuffle = true) { + // Getting training data from csv file + auto data = + this->get_XY_from_csv(file_name, last_label, normalize, slip_lines); + // Fit the model on training data + this->fit(data.first, data.second, epochs, learning_rate, batch_size, + shuffle); + return; + } - /** - * Copy assignment operator for class DenseLayer - */ - DenseLayer& operator = (const DenseLayer &layer) = default; + /** + * Function to evaluate model on supplied data + * @param X array of feature vectors (input data) + * @param Y array of target values (label) + */ + void evaluate(const std::vector>> &X, + const std::vector>> &Y) { + std::cout << "INFO: Evaluation Started" << std::endl; + double acc = 0, loss = 0; // intialize performance metrics with zero + for (size_t i = 0; i < X.size(); i++) { // For every sample in input + // Get predictions + std::vector> pred = + this->single_predict(X[i]); + // If predicted class is correct + if (argmax(pred) == argmax(Y[i])) { + acc += 1; // Increment accuracy + } + // Calculating loss - Mean Squared Error + loss += sum(apply_function((Y[i] - pred), + neural_network::util_functions::square) * + 0.5); + } + acc /= X.size(); // Averaging accuracy + loss /= X.size(); // Averaging loss + // Prinitng performance of the model + std::cout << "Evaluation: Loss: " << loss; + std::cout << ", Accuracy: " << acc << std::endl; + return; + } - /** - * Move constructor for class DenseLayer - */ - DenseLayer(DenseLayer &&) = default; + /** + * Function to evaluate model on data stored in csv file + * @param file_name csv file name + * @param last_label flag for whether label is in first or last column + * @param normalize flag for whether to normalize data + * @param slip_lines number of lines to skip + */ + void evaluate_from_csv(const std::string &file_name, const bool &last_label, + const bool &normalize, const int &slip_lines = 1) { + // Getting training data from csv file + auto data = + this->get_XY_from_csv(file_name, last_label, normalize, slip_lines); + // Evaluating model + this->evaluate(data.first, data.second); + return; + } - /** - * Move assignment operator for class DenseLayer - */ - DenseLayer& operator = (DenseLayer &&) = default; - }; - } // namespace layers + /** + * Function to save current model. + * @param file_name file name to save model (*.model) + */ + void save_model(const std::string &_file_name) { + std::string file_name = _file_name; + // Adding ".model" extension if it is not already there in name + if (file_name.find(".model") == file_name.npos) { + file_name += ".model"; + } + std::ofstream out_file; // Ofstream to write in file + // Open file in out|trunc mode + out_file.open(file_name.c_str(), + std::ofstream::out | std::ofstream::trunc); + // If there is any problem in opening file + if (!out_file.is_open()) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Unable to open file: " << file_name << std::endl; + std::exit(EXIT_FAILURE); + } /** - * NeuralNetwork class is implements MLP. This class is - * used by actual user to create and train networks. - * - */ - class NeuralNetwork { - private: - std::vector layers; // To store layers - /** - * Private Constructor for class NeuralNetwork. This constructor - * is used internally to load model. - * @param config vector containing pair (neurons, activation) - * @param kernals vector containing all pretrained kernals - */ - NeuralNetwork(const std::vector > &config, - const std::vector >> &kernals) { - // First layer should not have activation - if(config.begin() -> second != "none") { - std::cerr << "ERROR: First layer can't have activation other than none"; - std::cerr << std::endl; - std::exit(EXIT_FAILURE); - } - // Network should have atleast two layers - if(config.size() <= 1) { - std::cerr << "ERROR: Invalid size of network, "; - std::cerr << "Atleast two layers are required"; - std::exit(EXIT_FAILURE); - } - // Reconstructing all pretrained layers - for(size_t i = 0; i < config.size(); i++) { - layers.emplace_back(neural_network::layers::DenseLayer(config[i].first, - config[i].second, - kernals[i])); - } - std::cout << "INFO: Network constructed successfully" << std::endl; + Format in which model is saved: + + total_layers + neurons(1st neural_network::layers::DenseLayer) activation_name(1st + neural_network::layers::DenseLayer) kernal_shape(1st + neural_network::layers::DenseLayer) kernal_values + . + . + . + neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth + neural_network::layers::DenseLayer) kernal_shape(Nth + neural_network::layers::DenseLayer) kernal_value + + For Example, pretrained model with 3 layers: +
+            3
+            4 none
+            4 4
+            1 0 0 0
+            0 1 0 0
+            0 0 1 0
+            0 0 0 1
+            6 relu
+            4 6
+            -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653
+            -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202
+            -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049
+            -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741
+            3 sigmoid
+            6 3
+            0.390267 -0.391703 -0.0989607
+            0.499234 -0.564539 -0.28097
+            0.553386 -0.153974 -1.92493
+            -2.01336 -0.0219682 1.44145
+            1.72853 -0.465264 -0.705373
+            -0.908409 -0.740547 0.376416
+            
+ */ + // Saving model in the same format + out_file << layers.size(); + out_file << std::endl; + for (const auto &layer : this->layers) { + out_file << layer.neurons << ' ' << layer.activation << std::endl; + const auto shape = get_shape(layer.kernal); + out_file << shape.first << ' ' << shape.second << std::endl; + for (const auto &row : layer.kernal) { + for (const auto &val : row) { + out_file << val << ' '; } - /** - * Private function to get detailed predictions (i.e. - * activated neuron values). This function is used in - * backpropagation, single predict and batch predict. - * @param X input vector - */ - std::vector>> - __detailed_single_prediction (const std::vector> &X) { - std::vector >> details; - std::vector < std::valarray > current_pass = X; - details.emplace_back(X); - for(const auto &l : layers) { - current_pass = multiply(current_pass, l.kernal); - current_pass = apply_function(current_pass, l.activation_function); - details.emplace_back(current_pass); - } - return details; + out_file << std::endl; + } + } + std::cout << "INFO: Model saved successfully with name : "; + std::cout << file_name << std::endl; + out_file.close(); // Closing file + return; + } + + /** + * Function to load earlier saved model. + * @param file_name file from which model will be loaded (*.model) + * @return instance of NeuralNetwork class with pretrained weights + */ + NeuralNetwork load_model(const std::string &file_name) { + std::ifstream in_file; // Ifstream to read file + in_file.open(file_name.c_str()); // Openinig file + // If there is any problem in opening file + if (!in_file.is_open()) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Unable to open file: " << file_name << std::endl; + std::exit(EXIT_FAILURE); + } + std::vector> config; // To store config + std::vector>> + kernals; // To store pretrained kernals + // Loading model from saved file format + size_t total_layers = 0; + in_file >> total_layers; + for (size_t i = 0; i < total_layers; i++) { + int neurons = 0; + std::string activation; + size_t shape_a = 0, shape_b = 0; + std::vector> kernal; + in_file >> neurons >> activation >> shape_a >> shape_b; + for (size_t r = 0; r < shape_a; r++) { + std::valarray row(shape_b); + for (size_t c = 0; c < shape_b; c++) { + in_file >> row[c]; } - public: - /** - * Default Constructor for class NeuralNetwork. This constructor - * is used to create empty variable of type NeuralNetwork class. - */ - NeuralNetwork() = default; + kernal.push_back(row); + } + config.emplace_back(make_pair(neurons, activation)); + ; + kernals.emplace_back(kernal); + } + std::cout << "INFO: Model loaded successfully" << std::endl; + in_file.close(); // Closing file + return NeuralNetwork( + config, kernals); // Return instance of NeuralNetwork class + } - /** - * Constructor for class NeuralNetwork. This constructor - * is used by user. - * @param config vector containing pair (neurons, activation) - */ - explicit NeuralNetwork(const std::vector > &config) { - // First layer should not have activation - if(config.begin() -> second != "none") { - std::cerr << "ERROR: First layer can't have activation other than none"; - std::cerr << std::endl; - std::exit(EXIT_FAILURE); - } - // Network should have atleast two layers - if(config.size() <= 1) { - std::cerr << "ERROR: Invalid size of network, "; - std::cerr << "Atleast two layers are required"; - std::exit(EXIT_FAILURE); - } - // Separately creating first layer so it can have unit matrix - // as kernal. - layers.push_back(neural_network::layers::DenseLayer(config[0].first, - config[0].second, - {config[0].first, config[0].first}, - false)); - // Creating remaining layers - for(size_t i = 1; i < config.size(); i++) { - layers.push_back(neural_network::layers::DenseLayer(config[i].first, - config[i].second, - {config[i - 1].first, config[i].first}, - true)); - } - std::cout << "INFO: Network constructed successfully" << std::endl; - } - - /** - * Copy Constructor for class NeuralNetwork. - * - * @param model instance of class to be copied. - */ - NeuralNetwork(const NeuralNetwork &model) = default; - - /** - * Destructor for class NeuralNetwork. - */ - ~NeuralNetwork() = default; - - /** - * Copy assignment operator for class NeuralNetwork - */ - NeuralNetwork& operator = (const NeuralNetwork &model) = default; - - /** - * Move constructor for class NeuralNetwork - */ - NeuralNetwork(NeuralNetwork &&) = default; - - /** - * Move assignment operator for class NeuralNetwork - */ - NeuralNetwork& operator = (NeuralNetwork &&) = default; - - /** - * Function to get X and Y from csv file (where X = data, Y = label) - * @param file_name csv file name - * @param last_label flag for whether label is in first or last column - * @param normalize flag for whether to normalize data - * @param slip_lines number of lines to skip - * @return returns pair of X and Y - */ - std::pair>>, std::vector>>> - get_XY_from_csv(const std::string &file_name, - const bool &last_label, - const bool &normalize, - const int &slip_lines = 1) { - std::ifstream in_file; // Ifstream to read file - in_file.open(file_name.c_str(), std::ios::in); // Open file - std::vector >> X, Y; // To store X and Y - std::string line; // To store each line - // Skip lines - for(int i = 0; i < slip_lines; i ++) { - std::getline(in_file, line, '\n'); // Ignore line - } - // While file has information - while(!in_file.eof() && std::getline(in_file, line, '\n')) - { - std::valarray x_data, y_data; // To store single sample and label - std::stringstream ss(line); // Constructing stringstream from line - std::string token; // To store each token in line (seprated by ',') - while(std::getline(ss, token, ',')) { // For each token - // Insert numerical value of token in x_data - x_data = insert_element(x_data, std::stod(token)); - } - // If label is in last column - if(last_label) { - y_data.resize(this -> layers.back().neurons); - // If task is classification - if(y_data.size() > 1) { - y_data[x_data[x_data.size() - 1]] = 1; - } - // If task is regrssion (of single value) - else { - y_data[0] = x_data[x_data.size() - 1]; - } - x_data = pop_back(x_data); // Remove label from x_data - } - else { - y_data.resize(this -> layers.back().neurons); - // If task is classification - if(y_data.size() > 1) { - y_data[x_data[x_data.size() - 1]] = 1; - } - // If task is regrssion (of single value) - else { - y_data[0] = x_data[x_data.size() - 1]; - } - x_data = pop_front(x_data); // Remove label from x_data - } - // Push collected X_data and y_data in X and Y - X.push_back({x_data}); - Y.push_back({y_data}); - } - in_file.close(); - // Normalize training data if flag is set - if(normalize) { - // Scale data between 0 and 1 using min-max scaler - X = minmax_scaler(X, 0.01, 1.0); - } - return make_pair(X, Y); // Return pair of X and Y - } - - /** - * Function to get prediction of model on single sample. - * @param X array of feature vectors - * @return returns predictions as vector - */ - std::vector> - single_predict (const std::vector> &X) { - // Get activations of all layers - auto activations = this -> __detailed_single_prediction(X); - // Return activations of last layer (actual predicted values) - return activations.back(); - } - - /** - * Function to get prediction of model on batch - * @param X array of feature vectors - * @return returns predicted values as vector - */ - std::vector < std::vector >> - batch_predict (const std::vector >> &X) { - // Store predicted values - std::vector < std::vector >> predicted_batch(X.size()); - for(size_t i = 0; i < X.size(); i++) { // For every sample - // Push predicted values - predicted_batch[i] = this -> single_predict(X[i]); - } - return predicted_batch; // Return predicted values - } - - /** - * Function to fit model on supplied data - * @param X array of feature vectors - * @param Y array of target values - * @param epochs number of epochs (default = 100) - * @param learning_rate learning rate (default = 0.01) - * @param batch_size batch size for gradient descent (default = 32) - * @param shuffle flag for whether to shuffle data (default = true) - */ - void fit(const std::vector < std::vector >> &X_, - const std::vector < std::vector >> &Y_, - const int &epochs = 100, - const double &learning_rate = 0.01, - const size_t &batch_size = 32, - const bool &shuffle = true) { - std::vector < std::vector >> X = X_, Y = Y_; - // Both label and input data should have same size - if (X.size() != Y.size()) { - std::cerr << "ERROR : X and Y in fit have different sizes" << std::endl; - std::exit(EXIT_FAILURE); - } - std::cout << "INFO: Training Started" << std::endl; - for (int epoch = 1; epoch <= epochs; epoch++) { // For every epoch - // Shuffle X and Y if flag is set - if(shuffle) { - equal_shuffle(X, Y); - } - auto start = std::chrono::high_resolution_clock::now(); // Start clock - double loss = 0, acc = 0; // Intialize performance metrics with zero - // For each starting index of batch - for(size_t batch_start = 0; batch_start < X.size(); batch_start += batch_size) { - for(size_t i = batch_start; i < std::min(X.size(), batch_start + batch_size); i++) { - std::vector > grad, cur_error, predicted; - auto activations = this -> __detailed_single_prediction(X[i]); - // Gradients vector to store gradients for all layers - // They will be averaged and applied to kernal - std::vector>> gradients; - gradients.resize(this -> layers.size()); - // First intialize gradients to zero - for(size_t i = 0; i < gradients.size(); i++) { - zeroes_initialization(gradients[i], get_shape(this -> layers[i].kernal)); - } - predicted = activations.back(); // Predicted vector - cur_error = predicted - Y[i]; // Absoulute error - // Calculating loss with MSE - loss += sum(apply_function(cur_error, neural_network::util_functions::square)); - // If prediction is correct - if(argmax(predicted) == argmax(Y[i])) { - acc += 1; - } - // For every layer (except first) starting from last one - for(size_t j = this -> layers.size() - 1; j >= 1; j--) { - // Backpropogating errors - cur_error = hadamard_product(cur_error, - apply_function(activations[j + 1], - this -> layers[j].dactivation_function)); - // Calculating gradient for current layer - grad = multiply(transpose(activations[j]), cur_error); - // Change error according to current kernal values - cur_error = multiply(cur_error, transpose(this -> layers[j].kernal)); - // Adding gradient values to collection of gradients - gradients[j] = gradients[j] + grad / double(batch_size); - } - // Applying gradients - for(size_t j = this -> layers.size() - 1; j >= 1; j--) { - // Updating kernal (aka weights) - this -> layers[j].kernal = this -> layers[j].kernal - - gradients[j] * learning_rate; - } - } - } - auto stop = std::chrono::high_resolution_clock::now(); // Stoping the clock - // Calculate time taken by epoch - auto duration = std::chrono::duration_cast(stop - start); - loss /= X.size(); // Averaging loss - acc /= X.size(); // Averaging accuracy - std::cout.precision(4); // set output precision to 4 - // Printing training stats - std::cout << "Training: Epoch " << epoch << '/' << epochs; - std::cout << ", Loss: " << loss; - std::cout << ", Accuracy: " << acc; - std::cout << ", Taken time: " << duration.count() / 1e6 << " seconds"; - std::cout << std::endl; - } - return; - } - - /** - * Function to fit model on data stored in csv file - * @param file_name csv file name - * @param last_label flag for whether label is in first or last column - * @param epochs number of epochs - * @param learning_rate learning rate - * @param normalize flag for whether to normalize data - * @param slip_lines number of lines to skip - * @param batch_size batch size for gradient descent (default = 32) - * @param shuffle flag for whether to shuffle data (default = true) - */ - void fit_from_csv (const std::string &file_name, - const bool &last_label, - const int &epochs, - const double &learning_rate, - const bool &normalize, - const int &slip_lines = 1, - const size_t &batch_size = 32, - const bool &shuffle = true) { - // Getting training data from csv file - auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines); - // Fit the model on training data - this -> fit(data.first, data.second, epochs, learning_rate, batch_size, shuffle); - return; - } - - /** - * Function to evaluate model on supplied data - * @param X array of feature vectors (input data) - * @param Y array of target values (label) - */ - void evaluate(const std::vector< std::vector >> &X, - const std::vector< std::vector >> &Y) { - std::cout << "INFO: Evaluation Started" << std::endl; - double acc = 0, loss = 0; // intialize performance metrics with zero - for(size_t i = 0; i < X.size(); i++) { // For every sample in input - // Get predictions - std::vector> pred = this -> single_predict(X[i]); - // If predicted class is correct - if(argmax(pred) == argmax(Y[i])) { - acc += 1; // Increment accuracy - } - // Calculating loss - Mean Squared Error - loss += sum(apply_function((Y[i] - pred), - neural_network::util_functions::square) * 0.5); - } - acc /= X.size(); // Averaging accuracy - loss /= X.size(); // Averaging loss - // Prinitng performance of the model - std::cout << "Evaluation: Loss: " << loss; - std::cout << ", Accuracy: " << acc << std::endl; - return; - } - - /** - * Function to evaluate model on data stored in csv file - * @param file_name csv file name - * @param last_label flag for whether label is in first or last column - * @param normalize flag for whether to normalize data - * @param slip_lines number of lines to skip - */ - void evaluate_from_csv (const std::string &file_name, - const bool &last_label, - const bool &normalize, - const int &slip_lines = 1) { - // Getting training data from csv file - auto data = this -> get_XY_from_csv(file_name, last_label, normalize, slip_lines); - // Evaluating model - this -> evaluate(data.first, data.second); - return; - } - - /** - * Function to save current model. - * @param file_name file name to save model (*.model) - */ - void save_model (const std::string &_file_name) { - std::string file_name = _file_name; - // Adding ".model" extension if it is not already there in name - if(file_name.find(".model") == file_name.npos) { - file_name += ".model"; - } - std::ofstream out_file; // Ofstream to write in file - // Open file in out|trunc mode - out_file.open(file_name.c_str(), std::ofstream::out | std::ofstream::trunc); - /** - Format in which model is saved: - - total_layers - neurons(1st neural_network::layers::DenseLayer) activation_name(1st neural_network::layers::DenseLayer) - kernal_shape(1st neural_network::layers::DenseLayer) - kernal_values - . - . - . - neurons(Nth neural_network::layers::DenseLayer) activation_name(Nth neural_network::layers::DenseLayer) - kernal_shape(Nth neural_network::layers::DenseLayer) - kernal_value - - For Example, pretrained model with 3 layers: -
-                        3
-                        4 none
-                        4 4
-                        1 0 0 0 
-                        0 1 0 0 
-                        0 0 1 0 
-                        0 0 0 1 
-                        6 relu
-                        4 6
-                        -1.88963 -3.61165 1.30757 -0.443906 -2.41039 -2.69653 
-                        -0.684753 0.0891452 0.795294 -2.39619 2.73377 0.318202 
-                        -2.91451 -4.43249 -0.804187 2.51995 -6.97524 -1.07049 
-                        -0.571531 -1.81689 -1.24485 1.92264 -2.81322 1.01741 
-                        3 sigmoid
-                        6 3
-                        0.390267 -0.391703 -0.0989607 
-                        0.499234 -0.564539 -0.28097 
-                        0.553386 -0.153974 -1.92493 
-                        -2.01336 -0.0219682 1.44145 
-                        1.72853 -0.465264 -0.705373 
-                        -0.908409 -0.740547 0.376416 
-                        
- */ - // Saving model in the same format - out_file << layers.size(); - out_file << std::endl; - for(const auto &layer : this -> layers) { - out_file << layer.neurons << ' ' << layer.activation << std::endl; - const auto shape = get_shape(layer.kernal); - out_file << shape.first << ' ' << shape.second << std::endl; - for(const auto &row : layer.kernal) { - for(const auto &val : row) { - out_file << val << ' '; - } - out_file << std::endl; - } - } - std::cout << "INFO: Model saved successfully with name : "; - std::cout << file_name << std::endl; - return; - } - - /** - * Function to load earlier saved model. - * @param file_name file from which model will be loaded (*.model) - * @return instance of NeuralNetwork class with pretrained weights - */ - NeuralNetwork load_model (const std::string &file_name) { - std::ifstream in_file; // Ifstream to read file - in_file.open(file_name.c_str()); // Openinig file - std::vector > config; // To store config - std::vector >> kernals; // To store pretrained kernals - // Loading model from saved file format - size_t total_layers = 0; - in_file >> total_layers; - for(size_t i = 0; i < total_layers; i++) { - int neurons = 0; - std::string activation; - size_t shape_a = 0, shape_b = 0; - std::vector> kernal; - in_file >> neurons >> activation >> shape_a >> shape_b; - for(size_t r = 0; r < shape_a; r++) { - std::valarray row(shape_b); - for(size_t c = 0; c < shape_b; c++) { - in_file >> row[c]; - } - kernal.push_back(row); - } - config.emplace_back(make_pair(neurons, activation));; - kernals.emplace_back(kernal); - } - std::cout << "INFO: Model loaded successfully" << std::endl; - return NeuralNetwork(config, kernals); // Return instance of NeuralNetwork class - } - - /** - * Function to print summary of the network. - */ - void summary () { - // Printing Summary - std::cout << "===============================================================" << std::endl; - std::cout << "\t\t+ MODEL SUMMARY +\t\t\n"; - std::cout << "===============================================================" << std::endl; - for(size_t i = 1; i <= layers.size(); i++) { // For every layer - std::cout << i << ")"; - std::cout << " Neurons : " << layers[i - 1].neurons; // number of neurons - std::cout << ", Activation : " << layers[i - 1].activation; // activation - std::cout << ", Kernal Shape : " << get_shape(layers[i - 1].kernal); // kernal shape - std::cout << std::endl; - } - std::cout << "===============================================================" << std::endl; - return; - } - - }; - } // namespace neural_network -} // namespace machine_learning + /** + * Function to print summary of the network. + */ + void summary() { + // Printing Summary + std::cout + << "===============================================================" + << std::endl; + std::cout << "\t\t+ MODEL SUMMARY +\t\t\n"; + std::cout + << "===============================================================" + << std::endl; + for (size_t i = 1; i <= layers.size(); i++) { // For every layer + std::cout << i << ")"; + std::cout << " Neurons : " + << layers[i - 1].neurons; // number of neurons + std::cout << ", Activation : " + << layers[i - 1].activation; // activation + std::cout << ", Kernal Shape : " + << get_shape(layers[i - 1].kernal); // kernal shape + std::cout << std::endl; + } + std::cout + << "===============================================================" + << std::endl; + return; + } +}; +} // namespace neural_network +} // namespace machine_learning /** * Function to test neural network @@ -766,19 +805,24 @@ namespace machine_learning { static void test() { // Creating network with 3 layers for "iris.csv" machine_learning::neural_network::NeuralNetwork myNN = - machine_learning::neural_network::NeuralNetwork({ - {4, "none"}, // First layer with 3 neurons and "none" as activation - {6, "relu"}, // Second layer with 6 neurons and "relu" as activation - {3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as activation - }); + machine_learning::neural_network::NeuralNetwork({ + {4, "none"}, // First layer with 3 neurons and "none" as activation + {6, + "relu"}, // Second layer with 6 neurons and "relu" as activation + {3, "sigmoid"} // Third layer with 3 neurons and "sigmoid" as + // activation + }); // Printing summary of model myNN.summary(); // Training Model myNN.fit_from_csv("iris.csv", true, 100, 0.3, false, 2, 32, true); // Testing predictions of model - assert(machine_learning::argmax(myNN.single_predict({{5,3.4,1.6,0.4}})) == 0); - assert(machine_learning::argmax(myNN.single_predict({{6.4,2.9,4.3,1.3}})) == 1); - assert(machine_learning::argmax(myNN.single_predict({{6.2,3.4,5.4,2.3}})) == 2); + assert(machine_learning::argmax( + myNN.single_predict({{5, 3.4, 1.6, 0.4}})) == 0); + assert(machine_learning::argmax( + myNN.single_predict({{6.4, 2.9, 4.3, 1.3}})) == 1); + assert(machine_learning::argmax( + myNN.single_predict({{6.2, 3.4, 5.4, 2.3}})) == 2); return; } diff --git a/machine_learning/vector_ops.hpp b/machine_learning/vector_ops.hpp index bb70b5c4f..2e519b4b6 100644 --- a/machine_learning/vector_ops.hpp +++ b/machine_learning/vector_ops.hpp @@ -1,20 +1,21 @@ /** * @file vector_ops.hpp * @author [Deep Raval](https://github.com/imdeep2905) - * - * @brief Various functions for vectors associated with [NeuralNetwork (aka Multilayer Perceptron)] + * + * @brief Various functions for vectors associated with [NeuralNetwork (aka + * Multilayer Perceptron)] * (https://en.wikipedia.org/wiki/Multilayer_perceptron). - * + * */ #ifndef VECTOR_OPS_FOR_NN #define VECTOR_OPS_FOR_NN -#include #include -#include -#include #include +#include #include +#include +#include /** * @namespace machine_learning @@ -31,18 +32,18 @@ template std::ostream &operator<<(std::ostream &out, std::vector> const &A) { // Setting output precision to 4 in case of floating point numbers - out.precision(4); - for(const auto &a : A) { // For each row in A - for(const auto &x : a) { // For each element in row - std::cerr << x << ' '; // print element + out.precision(4); + for (const auto &a : A) { // For each row in A + for (const auto &x : a) { // For each element in row + std::cout << x << ' '; // print element } - std::cerr << std::endl; + std::cout << std::endl; } return out; } /** - * Overloaded operator "<<" to print a pair + * Overloaded operator "<<" to print a pair * @tparam T typename of the pair * @param out std::ostream to output * @param A Pair to be printed @@ -52,7 +53,7 @@ std::ostream &operator<<(std::ostream &out, const std::pair &A) { // Setting output precision to 4 in case of floating point numbers out.precision(4); // printing pair in the form (p, q) - std::cerr << "(" << A.first << ", " << A.second << ")"; + std::cout << "(" << A.first << ", " << A.second << ")"; return out; } @@ -66,10 +67,10 @@ template std::ostream &operator<<(std::ostream &out, const std::valarray &A) { // Setting output precision to 4 in case of floating point numbers out.precision(4); - for(const auto &a : A) { // For every element in the vector. - std::cerr << a << ' '; // Print element + for (const auto &a : A) { // For every element in the vector. + std::cout << a << ' '; // Print element } - std::cerr << std::endl; + std::cout << std::endl; return out; } @@ -81,14 +82,14 @@ std::ostream &operator<<(std::ostream &out, const std::valarray &A) { * @return new resultant vector */ template -std::valarray insert_element(const std::valarray &A, const T &ele) { - std::valarray B; // New 1D vector to store resultant vector - B.resize(A.size() + 1); // Resizing it accordingly - for(size_t i = 0; i < A.size(); i++) { // For every element in A - B[i] = A[i]; // Copy element in B +std::valarray insert_element(const std::valarray &A, const T &ele) { + std::valarray B; // New 1D vector to store resultant vector + B.resize(A.size() + 1); // Resizing it accordingly + for (size_t i = 0; i < A.size(); i++) { // For every element in A + B[i] = A[i]; // Copy element in B } - B[B.size() - 1] = ele; // Inserting new element in last position - return B; // Return resultant vector + B[B.size() - 1] = ele; // Inserting new element in last position + return B; // Return resultant vector } /** @@ -98,13 +99,14 @@ std::valarray insert_element(const std::valarray &A, const T &ele) { * @return new resultant vector */ template -std::valarray pop_front(const std::valarray &A) { - std::valarray B; // New 1D vector to store resultant vector - B.resize(A.size() - 1); // Resizing it accordingly - for(size_t i = 1; i < A.size(); i ++) { // // For every (except first) element in A - B[i - 1] = A[i]; // Copy element in B with left shifted position +std::valarray pop_front(const std::valarray &A) { + std::valarray B; // New 1D vector to store resultant vector + B.resize(A.size() - 1); // Resizing it accordingly + for (size_t i = 1; i < A.size(); + i++) { // // For every (except first) element in A + B[i - 1] = A[i]; // Copy element in B with left shifted position } - return B; // Return resultant vector + return B; // Return resultant vector } /** @@ -114,13 +116,14 @@ std::valarray pop_front(const std::valarray &A) { * @return new resultant vector */ template -std::valarray pop_back(const std::valarray &A) { - std::valarray B; // New 1D vector to store resultant vector - B.resize(A.size() - 1); // Resizing it accordingly - for(size_t i = 0; i < A.size() - 1; i ++) { // For every (except last) element in A - B[i] = A[i]; // Copy element in B +std::valarray pop_back(const std::valarray &A) { + std::valarray B; // New 1D vector to store resultant vector + B.resize(A.size() - 1); // Resizing it accordingly + for (size_t i = 0; i < A.size() - 1; + i++) { // For every (except last) element in A + B[i] = A[i]; // Copy element in B } - return B; // Return resultant vector + return B; // Return resultant vector } /** @@ -130,16 +133,17 @@ std::valarray pop_back(const std::valarray &A) { * @param B Second 3D vector */ template -void equal_shuffle(std::vector < std::vector > > &A, - std::vector < std::vector > > &B) { +void equal_shuffle(std::vector>> &A, + std::vector>> &B) { // If two vectors have different sizes - if(A.size() != B.size()) - { - std::cerr << "ERROR : Can not equally shuffle two vectors with different sizes: "; + if (A.size() != B.size()) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr + << "Can not equally shuffle two vectors with different sizes: "; std::cerr << A.size() << " and " << B.size() << std::endl; std::exit(EXIT_FAILURE); } - for(size_t i = 0; i < A.size(); i++) { // For every element in A and B + for (size_t i = 0; i < A.size(); i++) { // For every element in A and B // Genrating random index < size of A and B std::srand(std::chrono::system_clock::now().time_since_epoch().count()); size_t random_index = std::rand() % A.size(); @@ -159,26 +163,26 @@ void equal_shuffle(std::vector < std::vector > > &A, * @param high upper limit on value */ template -void uniform_random_initialization(std::vector> &A, - const std::pair &shape, - const T &low, - const T &high) { - A.clear(); // Making A empty +void uniform_random_initialization(std::vector> &A, + const std::pair &shape, + const T &low, const T &high) { + A.clear(); // Making A empty // Uniform distribution in range [low, high] - std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count()); - std::uniform_real_distribution distribution(low, high); - for(size_t i = 0; i < shape.first; i++) { // For every row - std::valarray row; // Making empty row which will be inserted in vector + std::default_random_engine generator( + std::chrono::system_clock::now().time_since_epoch().count()); + std::uniform_real_distribution distribution(low, high); + for (size_t i = 0; i < shape.first; i++) { // For every row + std::valarray + row; // Making empty row which will be inserted in vector row.resize(shape.second); - for(auto &r : row) { // For every element in row - r = distribution(generator); // copy random number - } - A.push_back(row); // Insert new row in vector + for (auto &r : row) { // For every element in row + r = distribution(generator); // copy random number + } + A.push_back(row); // Insert new row in vector } return; } - /** * Function to Intialize 2D vector as unit matrix * @tparam T typename of the vector @@ -186,15 +190,15 @@ void uniform_random_initialization(std::vector> &A, * @param shape required shape */ template -void unit_matrix_initialization(std::vector> &A, - const std::pair &shape - ) { - A.clear(); // Making A empty - for(size_t i = 0; i < shape.first; i++) { - std::valarray row; // Making empty row which will be inserted in vector +void unit_matrix_initialization(std::vector> &A, + const std::pair &shape) { + A.clear(); // Making A empty + for (size_t i = 0; i < shape.first; i++) { + std::valarray + row; // Making empty row which will be inserted in vector row.resize(shape.second); - row[i] = T(1); // Insert 1 at ith position - A.push_back(row); // Insert new row in vector + row[i] = T(1); // Insert 1 at ith position + A.push_back(row); // Insert new row in vector } return; } @@ -206,14 +210,14 @@ void unit_matrix_initialization(std::vector> &A, * @param shape required shape */ template -void zeroes_initialization(std::vector> &A, - const std::pair &shape - ) { - A.clear(); // Making A empty - for(size_t i = 0; i < shape.first; i++) { - std::valarray row; // Making empty row which will be inserted in vector - row.resize(shape.second); // By default all elements are zero - A.push_back(row); // Insert new row in vector +void zeroes_initialization(std::vector> &A, + const std::pair &shape) { + A.clear(); // Making A empty + for (size_t i = 0; i < shape.first; i++) { + std::valarray + row; // Making empty row which will be inserted in vector + row.resize(shape.second); // By default all elements are zero + A.push_back(row); // Insert new row in vector } return; } @@ -226,11 +230,11 @@ void zeroes_initialization(std::vector> &A, */ template T sum(const std::vector> &A) { - T cur_sum = 0; // Initially sum is zero - for(const auto &a : A) { // For every row in A - cur_sum += a.sum(); // Add sum of that row to current sum + T cur_sum = 0; // Initially sum is zero + for (const auto &a : A) { // For every row in A + cur_sum += a.sum(); // Add sum of that row to current sum } - return cur_sum; // Return sum + return cur_sum; // Return sum } /** @@ -242,52 +246,59 @@ T sum(const std::vector> &A) { template std::pair get_shape(const std::vector> &A) { const size_t sub_size = (*A.begin()).size(); - for(const auto &a : A) { + for (const auto &a : A) { // If supplied vector don't have same shape in all rows - if(a.size() != sub_size) { - std::cerr << "ERROR: (get_shape) Supplied vector is not 2D Matrix" << std::endl; + if (a.size() != sub_size) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Supplied vector is not 2D Matrix" << std::endl; std::exit(EXIT_FAILURE); } } - return std::make_pair(A.size(), sub_size); // Return shape as pair + return std::make_pair(A.size(), sub_size); // Return shape as pair } /** * Function to scale given 3D vector using min-max scaler * @tparam T typename of the vector * @param A 3D vector which will be scaled - * @param low new minimum value + * @param low new minimum value * @param high new maximum value * @return new scaled 3D vector */ template -std::vector>> -minmax_scaler(const std::vector>> &A, const T &low, const T &high) { - std::vector>> B = A; // Copying into new vector B - const auto shape = get_shape(B[0]); // Storing shape of B's every element - // As this function is used for scaling training data vector should be of shape (1, X) - if(shape.first != 1) { - std::cerr << "ERROR: (MinMax Scaling) Supplied vector is not supported for minmax scaling, shape: "; +std::vector>> minmax_scaler( + const std::vector>> &A, const T &low, + const T &high) { + std::vector>> B = + A; // Copying into new vector B + const auto shape = get_shape(B[0]); // Storing shape of B's every element + // As this function is used for scaling training data vector should be of + // shape (1, X) + if (shape.first != 1) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr + << "Supplied vector is not supported for minmax scaling, shape: "; std::cerr << shape << std::endl; std::exit(EXIT_FAILURE); } - for(size_t i = 0; i < shape.second; i++) { - T min = B[0][0][i], max = B[0][0][i]; - for(size_t j = 0; j < B.size(); j++) { + for (size_t i = 0; i < shape.second; i++) { + T min = B[0][0][i], max = B[0][0][i]; + for (size_t j = 0; j < B.size(); j++) { // Updating minimum and maximum values min = std::min(min, B[j][0][i]); max = std::max(max, B[j][0][i]); } - for(size_t j = 0; j < B.size(); j++) { + for (size_t j = 0; j < B.size(); j++) { // Applying min-max scaler formula - B[j][0][i] = ((B[j][0][i] - min) / (max - min)) * (high - low) + low; + B[j][0][i] = + ((B[j][0][i] - min) / (max - min)) * (high - low) + low; } } - return B; // Return new resultant 3D vector + return B; // Return new resultant 3D vector } /** - * Function to get index of maximum element in 2D vector + * Function to get index of maximum element in 2D vector * @tparam T typename of the vector * @param A 2D vector for which maximum index is required * @return index of maximum element @@ -295,13 +306,16 @@ minmax_scaler(const std::vector>> &A, const T &low, template size_t argmax(const std::vector> &A) { const auto shape = get_shape(A); - // As this function is used on predicted (or target) vector, shape should be (1, X) - if(shape.first != 1) { - std::cerr << "ERROR: (argmax) Supplied vector is ineligible for argmax" << std::endl; - std::exit(EXIT_FAILURE); + // As this function is used on predicted (or target) vector, shape should be + // (1, X) + if (shape.first != 1) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Supplied vector is ineligible for argmax" << std::endl; + std::exit(EXIT_FAILURE); } // Return distance of max element from first element (i.e. index) - return std::distance(std::begin(A[0]), std::max_element(std::begin(A[0]), std::end(A[0]))); + return std::distance(std::begin(A[0]), + std::max_element(std::begin(A[0]), std::end(A[0]))); } /** @@ -311,46 +325,51 @@ size_t argmax(const std::vector> &A) { * @param func Function to be applied * @return new resultant vector */ -template -std::vector > apply_function(const std::vector > &A, - T (*func) (const T &)) { - std::vector> B = A; // New vector to store resultant vector - for(auto &b : B) { // For every row in vector - b = b.apply(func); // Apply function to that row +template +std::vector> apply_function( + const std::vector> &A, T (*func)(const T &)) { + std::vector> B = + A; // New vector to store resultant vector + for (auto &b : B) { // For every row in vector + b = b.apply(func); // Apply function to that row } - return B; // Return new resultant 2D vector + return B; // Return new resultant 2D vector } /** - * Overloaded operator "*" to multiply given 2D vector with scaler + * Overloaded operator "*" to multiply given 2D vector with scaler * @tparam T typename of both vector and the scaler * @param A 2D vector to which scaler will be multiplied * @param val Scaler value which will be multiplied * @return new resultant vector */ template -std::vector > operator * (const std::vector> &A, const T& val) { - std::vector> B = A; // New vector to store resultant vector - for(auto &b : B) { // For every row in vector - b = b * val; // Multiply row with scaler +std::vector> operator*(const std::vector> &A, + const T &val) { + std::vector> B = + A; // New vector to store resultant vector + for (auto &b : B) { // For every row in vector + b = b * val; // Multiply row with scaler } - return B; // Return new resultant 2D vector + return B; // Return new resultant 2D vector } /** - * Overloaded operator "/" to divide given 2D vector with scaler + * Overloaded operator "/" to divide given 2D vector with scaler * @tparam T typename of the vector and the scaler * @param A 2D vector to which scaler will be divided * @param val Scaler value which will be divided * @return new resultant vector */ template -std::vector > operator / (const std::vector> &A, const T& val) { - std::vector> B = A; // New vector to store resultant vector - for(auto &b : B) { // For every row in vector - b = b / val; // Divide row with scaler +std::vector> operator/(const std::vector> &A, + const T &val) { + std::vector> B = + A; // New vector to store resultant vector + for (auto &b : B) { // For every row in vector + b = b / val; // Divide row with scaler } - return B; // Return new resultant 2D vector + return B; // Return new resultant 2D vector } /** @@ -360,125 +379,136 @@ std::vector > operator / (const std::vector> * @return new resultant vector */ template -std::vector > transpose(const std::vector> &A) { - const auto shape = get_shape(A); // Current shape of vector - std::vector > B; // New vector to store result +std::vector> transpose( + const std::vector> &A) { + const auto shape = get_shape(A); // Current shape of vector + std::vector> B; // New vector to store result // Storing transpose values of A in B - for(size_t j = 0; j < shape.second; j++) { - std::valarray row; + for (size_t j = 0; j < shape.second; j++) { + std::valarray row; row.resize(shape.first); - for(size_t i = 0; i < shape.first; i++) { + for (size_t i = 0; i < shape.first; i++) { row[i] = A[i][j]; } B.push_back(row); } - return B; // Return new resultant 2D vector + return B; // Return new resultant 2D vector } /** * Overloaded operator "+" to add two 2D vectors * @tparam T typename of the vector - * @param A First 2D vector + * @param A First 2D vector * @param B Second 2D vector * @return new resultant vector */ template -std::vector > operator + (const std::vector> &A, const std::vector> &B) { +std::vector> operator+( + const std::vector> &A, + const std::vector> &B) { const auto shape_a = get_shape(A); const auto shape_b = get_shape(B); // If vectors don't have equal shape - if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) { - std::cerr << "ERROR: (vector addition) Supplied vectors have different shapes "; + if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Supplied vectors have different shapes "; std::cerr << shape_a << " and " << shape_b << std::endl; std::exit(EXIT_FAILURE); } - std::vector> C; - for(size_t i = 0; i < A.size(); i++) { // For every row - C.push_back(A[i] + B[i]); // Elementwise addition + std::vector> C; + for (size_t i = 0; i < A.size(); i++) { // For every row + C.push_back(A[i] + B[i]); // Elementwise addition } - return C; // Return new resultant 2D vector + return C; // Return new resultant 2D vector } /** * Overloaded operator "-" to add subtract 2D vectors * @tparam T typename of the vector - * @param A First 2D vector + * @param A First 2D vector * @param B Second 2D vector * @return new resultant vector */ template -std::vector > operator - (const std::vector> &A, const std::vector> &B) { +std::vector> operator-( + const std::vector> &A, + const std::vector> &B) { const auto shape_a = get_shape(A); const auto shape_b = get_shape(B); // If vectors don't have equal shape - if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) { - std::cerr << "ERROR: (vector subtraction) Supplied vectors have different shapes "; + if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Supplied vectors have different shapes "; std::cerr << shape_a << " and " << shape_b << std::endl; std::exit(EXIT_FAILURE); } - std::vector> C; // Vector to store result - for(size_t i = 0; i < A.size(); i++) { // For every row - C.push_back(A[i] - B[i]); // Elementwise substraction + std::vector> C; // Vector to store result + for (size_t i = 0; i < A.size(); i++) { // For every row + C.push_back(A[i] - B[i]); // Elementwise substraction } - return C; // Return new resultant 2D vector + return C; // Return new resultant 2D vector } /** * Function to multiply two 2D vectors * @tparam T typename of the vector - * @param A First 2D vector + * @param A First 2D vector * @param B Second 2D vector * @return new resultant vector */ template -std::vector > multiply(const std::vector> &A, const std::vector> &B) { +std::vector> multiply(const std::vector> &A, + const std::vector> &B) { const auto shape_a = get_shape(A); const auto shape_b = get_shape(B); // If vectors are not eligible for multiplication - if(shape_a.second != shape_b.first ) { - std::cerr << "ERROR: (multiply) Supplied vectors are not eligible for multiplication "; + if (shape_a.second != shape_b.first) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Vectors are not eligible for multiplication "; std::cerr << shape_a << " and " << shape_b << std::endl; std::exit(EXIT_FAILURE); } - std::vector> C; // Vector to store result - // Normal matrix multiplication + std::vector> C; // Vector to store result + // Normal matrix multiplication for (size_t i = 0; i < shape_a.first; i++) { std::valarray row; row.resize(shape_b.second); - for(size_t j = 0; j < shape_b.second; j++) { - for(size_t k = 0; k < shape_a.second; k++) { + for (size_t j = 0; j < shape_b.second; j++) { + for (size_t k = 0; k < shape_a.second; k++) { row[j] += A[i][k] * B[k][j]; } } C.push_back(row); } - return C; // Return new resultant 2D vector + return C; // Return new resultant 2D vector } /** * Function to get hadamard product of two 2D vectors * @tparam T typename of the vector - * @param A First 2D vector + * @param A First 2D vector * @param B Second 2D vector * @return new resultant vector */ template -std::vector > hadamard_product(const std::vector> &A, const std::vector> &B) { +std::vector> hadamard_product( + const std::vector> &A, + const std::vector> &B) { const auto shape_a = get_shape(A); const auto shape_b = get_shape(B); // If vectors are not eligible for hadamard product - if(shape_a.first != shape_b.first || shape_a.second != shape_b.second) { - std::cerr << "ERROR: (hadamard_product) Supplied vectors have different shapes "; + if (shape_a.first != shape_b.first || shape_a.second != shape_b.second) { + std::cerr << "ERROR (" << __func__ << ") : "; + std::cerr << "Vectors have different shapes "; std::cerr << shape_a << " and " << shape_b << std::endl; std::exit(EXIT_FAILURE); } - std::vector> C; // Vector to store result - for(size_t i = 0; i < A.size(); i++) { - C.push_back(A[i] * B[i]); // Elementwise multiplication + std::vector> C; // Vector to store result + for (size_t i = 0; i < A.size(); i++) { + C.push_back(A[i] * B[i]); // Elementwise multiplication } - return C; // Return new resultant 2D vector + return C; // Return new resultant 2D vector } } // namespace machine_learning - #endif