From c854b78bc9dfd111fdf989605c2737d7e1474f64 Mon Sep 17 00:00:00 2001 From: Nirjas Jakilim Date: Mon, 12 Apr 2021 16:34:13 +0600 Subject: [PATCH] feat: Added Minimum Edit Distance Algorithm (#1472) * Added Minimum Edit Distance Algorithm * Updated the code According to the guidelines * Updated with necessary changed and documentation * fix: Apply suggestions from code review * updating DIRECTORY.md * Some more chnages according to clang-tidy clang-tidy suggested moving all if-else statements under braces. So, I did it. * Array changed to vector as suggested Using of arrays changed to vectors as clang-tidy suggested to use vector instead of arrays. * Apply suggestions from code review Co-authored-by: David Leal * Updated with necessary changes * Apply suggestions from code review Co-authored-by: David Leal * Slight fixes on a comment * Updated Necessary Changes * Apply suggestions from code review Co-authored-by: David Leal * Updated with necessary changes * Updated with necessary changes * Updated with changes * Updated with changes and some tweaks * Updated with changes * Update dynamic_programming/minimum_edit_distance.cpp Co-authored-by: David Leal * Update dynamic_programming/minimum_edit_distance.cpp Co-authored-by: David Leal * Updated suggested changes Co-authored-by: David Leal Co-authored-by: David Leal Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: Anup Kumar Panwar <1anuppanwar@gmail.com> --- DIRECTORY.md | 1 + dynamic_programming/minimum_edit_distance.cpp | 163 ++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 dynamic_programming/minimum_edit_distance.cpp diff --git a/DIRECTORY.md b/DIRECTORY.md index 66683cfcd..d144131db 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -78,6 +78,7 @@ * [Longest Increasing Subsequence (Nlogn)](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/longest_increasing_subsequence_(nlogn).cpp) * [Longest Palindromic Subsequence](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/longest_palindromic_subsequence.cpp) * [Matrix Chain Multiplication](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/matrix_chain_multiplication.cpp) + * [Minimum Edit Distance](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/minimum_edit_distance.cpp) * [Palindrome Partitioning](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/palindrome_partitioning.cpp) * [Searching Of Element In Dynamic Array](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/searching_of_element_in_dynamic_array.cpp) * [Shortest Common Supersequence](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/shortest_common_supersequence.cpp) diff --git a/dynamic_programming/minimum_edit_distance.cpp b/dynamic_programming/minimum_edit_distance.cpp new file mode 100644 index 000000000..db9dd665d --- /dev/null +++ b/dynamic_programming/minimum_edit_distance.cpp @@ -0,0 +1,163 @@ +/** + * @file + * @brief Implementation of [Minimum Edit Distance](https://en.wikipedia.org/wiki/Edit_distance) using Dynamic Programing + * + * @details + * + * Given two strings str1 & str2 and we have to calculate the minimum + * number of operations (Insert, Remove, Replace) required to convert + * str1 to str2. + * + * ### Algorithm + * + * We will solve this problem using Naive recursion. But as we are + * approaching with a DP solution. So, we will take a DP array to + * store the solution of all sub-problems so that we don't have to + * perform recursion again and again. Now to solve the problem, We + * can traverse all characters from either right side of the strings + * or left side. Suppose we will do it from the right side. So, there + * are two possibilities for every pair of characters being traversed. + * 1. If the last characters of two strings are the same, Ignore + * the characters and get the count for the remaining string. + * So, we get the solution for lengths m-1 and n-1 in a DP array. + * + * 2. Else, (If last characters are not the same), we will consider all + * three operations (Insert, Remove, Replace) on the last character of + * the first string and compute the minimum cost for all three operations + * and take the minimum of three values in the DP array. + * For Insert: Recur for m and n-1 + * For Remove: Recur for for m-1 and n + * For Replace: Recur for for m-1 and n-1 + * + * @author [Nirjas Jakilim](github.com/nirzak) + */ + +#include /// for assert +#include /// for IO operations +#include /// for std::vector +/** + * @namespace dynamic_programming + * @brief Dynamic Programming algorithms + */ + +namespace dynamic_programming { + +/** + * @namespace Minimum Edit Distance + * @brief Implementation of [Minimum Edit Distance](https://en.wikipedia.org/wiki/Edit_distance) algorithm + */ + +namespace minimum_edit_distance { + +/** + * @brief Takes input of the cost of + * three operations: Insert, Replace and Delete + * and return the minimum cost among them. + * @param x used to pass minimum cost of Insert operations + * @param y used to pass minimum cost of Replace operations + * @param z used to pass minimum cost of Delete operations + * @returns x if `x` is the minimum value + * @returns y if `y` is the minimum value + * @returns z if `z` is the minimum value + */ +uint64_t min(uint64_t x, uint64_t y, uint64_t z) { + if (x <= y && x <= z) { + return x; /// returns x, if x is the minimum value + } + if (y <= x && y <= z) { + return y; /// returns y, if y is the minimum value + } + else { + return z; /// returns z if z is the minimum value + } +} + +/** + * @brief Calculates and stores the result + * of all the sub-problems, so that we don't have to recur to compute + * the minimum cost of a particular operation if it is already + * computed and stored in the `dp` vector. + * @param dp vector to store the computed minimum costs + * @param str1 to pass the 1st string + * @param str2 to pass the 2nd string + * @param m the length of str1 + * @param n the length of str2 + * @returns dp[m][n] the minimum cost of operations + * needed to convert str1 to str2 + */ +uint64_t editDistDP(std::string str1, std::string str2, uint64_t m, uint64_t n) { + /// Create a table to store results of subproblems + std::vector>dp(m+1, std::vector(n+1)); /// creasting 2D vector dp to store the results of subproblems + + /// Fill d[][] in bottom up manner + for (uint64_t i = 0; i <= m; i++) { + for (uint64_t j = 0; j <= n; j++) { + /// If first string is empty, only option is to + /// insert all characters of second string + if (i == 0) { + dp[i][j] = j; /// Minimum operations = j + } + + /// If second string is empty, only option is to + /// remove all characters of second string + else if (j == 0) { + dp[i][j] = i; /// Minimum operations = i + } + + /// If last characters are same, ignore last char + /// and recur for remaining string + else if (str1[i - 1] == str2[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; + } + + /// If the last character is different, consider all + /// possibilities and find the minimum + else { + dp[i][j] = 1 + min(dp[i][j - 1], // Insert + dp[i - 1][j], // Remove + dp[i - 1][j - 1]); // Replace + } + } + } + + return dp[m][n]; /// returning the minimum cost of operations needed to convert str1 to str2 +} +} // namespace minimum_edit_distance +} // namespace dynamic_programming + +/** + * @brief Self-test implementations + * @returns void + */ +static void test() { + // 1st test + std::string str1 = "INTENTION"; // Sample input of 1st string + std::string str2 = "EXECUTION"; // Sample input of 2nd string + uint64_t expected_output1 = 5; // Expected minimum cost + uint64_t output1 = dynamic_programming::minimum_edit_distance::editDistDP( + str1, str2, str1.length(), str2.length()); // calling the editDistDP function and storing the result on output1 + assert(output1 == expected_output1); // comparing the output with the expected output + std::cout << "Minimum Number of Operations Required: " << output1 + << std::endl; + + // 2nd test + std::string str3 = "SATURDAY"; + std::string str4 = "SUNDAY"; + uint64_t expected_output2 = 3; + uint64_t output2 = dynamic_programming::minimum_edit_distance::editDistDP( + str3, str4, str3.length(), str4.length()); + assert(output2 == expected_output2); + std::cout << "Minimum Number of Operations Required: " << output2 + << std::endl; +} + +/** + * @brief main function + * @param argc commandline argument count (ignored) + * @param argv commandline array of arguments (ignored) + * @returns 0 on exit + */ +int main(int argc, char *argv[]) { + test(); // run self-test implementations + return 0; +}