From fd695305150777981dc2a1f256aa2be444e4f108 Mon Sep 17 00:00:00 2001 From: Krishna Vedala <7001608+kvedala@users.noreply.github.com> Date: Sun, 31 May 2020 23:09:10 -0400 Subject: [PATCH] added namespace string_search --- strings/brute_force_string_searching.cpp | 42 ++++--- strings/knuth_morris_pratt.cpp | 78 ++++++------ strings/rabin_karp.cpp | 152 ++++++++++++----------- 3 files changed, 143 insertions(+), 129 deletions(-) diff --git a/strings/brute_force_string_searching.cpp b/strings/brute_force_string_searching.cpp index 9a2b5327a..fc2f543c0 100644 --- a/strings/brute_force_string_searching.cpp +++ b/strings/brute_force_string_searching.cpp @@ -10,28 +10,32 @@ #endif #include -/** - * Find a pattern in a string by comparing the pattern to every substring. - * @param text Any string that might contain the pattern. - * @param pattern String that we are searching for. - * @return Index where the pattern starts in the text - * @return -1 if the pattern was not found. - */ -int brute_force(const std::string &text, const std::string &pattern) { - size_t pat_l = pattern.length(); - size_t txt_l = text.length(); - int index = -1; - if (pat_l <= txt_l) { - for (size_t i = 0; i < txt_l - pat_l + 1; i++) { - std::string s = text.substr(i, pat_l); - if (s == pattern) { - index = i; - break; +namespace string_search { + /** + * Find a pattern in a string by comparing the pattern to every substring. + * @param text Any string that might contain the pattern. + * @param pattern String that we are searching for. + * @return Index where the pattern starts in the text + * @return -1 if the pattern was not found. + */ + int brute_force(const std::string &text, const std::string &pattern) { + size_t pat_l = pattern.length(); + size_t txt_l = text.length(); + int index = -1; + if (pat_l <= txt_l) { + for (size_t i = 0; i < txt_l - pat_l + 1; i++) { + std::string s = text.substr(i, pat_l); + if (s == pattern) { + index = i; + break; + } } } + return index; } - return index; -} +} // namespace string_search + +using string_search::brute_force; /** set of test cases */ const std::vector> test_set = { diff --git a/strings/knuth_morris_pratt.cpp b/strings/knuth_morris_pratt.cpp index b83cab966..ee569cccc 100644 --- a/strings/knuth_morris_pratt.cpp +++ b/strings/knuth_morris_pratt.cpp @@ -20,50 +20,54 @@ #endif #include -/** - * Generate the partial match table aka failure function for a pattern to - * search. - * \param[in] pattern text for which to create the partial match table - * \returns the partial match table as a vector array - */ -std::vector getFailureArray(const std::string &pattern) { - int pattern_length = pattern.size(); - std::vector failure(pattern_length + 1); - failure[0] = -1; - int j = -1; +namespace string_search { + /** + * Generate the partial match table aka failure function for a pattern to + * search. + * \param[in] pattern text for which to create the partial match table + * \returns the partial match table as a vector array + */ + std::vector getFailureArray(const std::string &pattern) { + int pattern_length = pattern.size(); + std::vector failure(pattern_length + 1); + failure[0] = -1; + int j = -1; - for (int i = 0; i < pattern_length; i++) { - while (j != -1 && pattern[j] != pattern[i]) { - j = failure[j]; + for (int i = 0; i < pattern_length; i++) { + while (j != -1 && pattern[j] != pattern[i]) { + j = failure[j]; + } + j++; + failure[i + 1] = j; } - j++; - failure[i + 1] = j; + return failure; } - return failure; -} -/** - * KMP algorithm to find a pattern in a text - * \param[in] pattern string pattern to search - * \param[in] text text in which to search - * \returns `true` if pattern was found - * \returns `false` if pattern was not found - */ -bool kmp(const std::string &pattern, const std::string &text) { - int text_length = text.size(), pattern_length = pattern.size(); - std::vector failure = getFailureArray(pattern); + /** + * KMP algorithm to find a pattern in a text + * \param[in] pattern string pattern to search + * \param[in] text text in which to search + * \returns `true` if pattern was found + * \returns `false` if pattern was not found + */ + bool kmp(const std::string &pattern, const std::string &text) { + int text_length = text.size(), pattern_length = pattern.size(); + std::vector failure = getFailureArray(pattern); - int k = 0; - for (int j = 0; j < text_length; j++) { - while (k != -1 && pattern[k] != text[j]) { - k = failure[k]; + int k = 0; + for (int j = 0; j < text_length; j++) { + while (k != -1 && pattern[k] != text[j]) { + k = failure[k]; + } + k++; + if (k == pattern_length) + return true; } - k++; - if (k == pattern_length) - return true; + return false; } - return false; -} +} // namespace string_search + +using string_search::kmp; /** Main function */ int main() { diff --git a/strings/rabin_karp.cpp b/strings/rabin_karp.cpp index 018ff5632..6d1f4e7de 100644 --- a/strings/rabin_karp.cpp +++ b/strings/rabin_karp.cpp @@ -15,86 +15,92 @@ #define PRIME 5 ///< Prime modulus for hash functions -/** - * convert a string to an intger - called as hashing function - * \param[in] s source of string to hash - * \param[in] n length of substring to hash - * \returns hash integer - */ -int64_t create_hash(const std::string& s, int n) { - int64_t result = 0; - for (int i = 0; i < n; ++i) { - result += (int64_t)(s[i] * (int64_t)pow(PRIME, i)); +namespace string_search { + /** + * convert a string to an intger - called as hashing function + * \param[in] s source of string to hash + * \param[in] n length of substring to hash + * \returns hash integer + */ + int64_t create_hash(const std::string& s, int n) { + int64_t result = 0; + for (int i = 0; i < n; ++i) { + result += (int64_t)(s[i] * (int64_t)pow(PRIME, i)); + } + return result; } - return result; -} -/** - * re-hash a string using known existing hash - * \param[in] s source of string to hash - * \param[in] old_index previous index of string - * \param[in] new_index new index of string - * \param[in] old_hash previous hash of substring - * \param[in] patLength length of substring to hash - * \returns new hash integer - */ -int64_t recalculate_hash(const std::string& s, int old_index, int new_index, - int64_t old_hash, int patLength) { - int64_t new_hash = old_hash - s[old_index]; - new_hash /= PRIME; - new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1)); - return new_hash; -} - -/** - * compare if two sub-strings are equal - * \param[in] str1 string pattern to search - * \param[in] str2 text in which to search - * \param[in] start1,end1 start and end indices for substring in str1 - * \param[in] start2,end2 start and end indices for substring in str2 - * \returns `true` if pattern was found - * \returns `false` if pattern was not found - * @note can this be replaced by std::string::compare? - */ -bool check_if_equal(const std::string& str1, const std::string& str2, - int start1, int end1, int start2, int end2) { - if (end1 - start1 != end2 - start2) { - return false; + /** + * re-hash a string using known existing hash + * \param[in] s source of string to hash + * \param[in] old_index previous index of string + * \param[in] new_index new index of string + * \param[in] old_hash previous hash of substring + * \param[in] patLength length of substring to hash + * \returns new hash integer + */ + int64_t recalculate_hash(const std::string& s, int old_index, int new_index, + int64_t old_hash, int patLength) { + int64_t new_hash = old_hash - s[old_index]; + new_hash /= PRIME; + new_hash += + (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1)); + return new_hash; } - while (start1 <= end1 && start2 <= end2) { - if (str1[start1] != str2[start2]) { + + /** + * compare if two sub-strings are equal + * \param[in] str1 string pattern to search + * \param[in] str2 text in which to search + * \param[in] start1,end1 start and end indices for substring in str1 + * \param[in] start2,end2 start and end indices for substring in str2 + * \returns `true` if pattern was found + * \returns `false` if pattern was not found + * @note can this be replaced by std::string::compare? + */ + bool check_if_equal(const std::string& str1, const std::string& str2, + int start1, int end1, int start2, int end2) { + if (end1 - start1 != end2 - start2) { return false; } - start1++; - start2++; - } - return true; -} - -/** - * Perform string pattern search using Rabin-Karp algorithm - * @param[in] str string to search in - * @param[in] pat pattern to search for - * @return index of first occurrence of pattern - * @return -1 if pattern not found - */ - -int rabin_karp(const std::string& str, const std::string& pat) { - int64_t pat_hash = create_hash(pat, pat.size()); - int64_t str_hash = create_hash(str, pat.size()); - for (int i = 0; i <= str.size() - pat.size(); ++i) { - if (pat_hash == str_hash && - check_if_equal(str, pat, i, i + pat.size() - 1, 0, - pat.size() - 1)) { - return i; - } - if (i < str.size() - pat.size()) { - str_hash = - recalculate_hash(str, i, i + pat.size(), str_hash, pat.size()); + while (start1 <= end1 && start2 <= end2) { + if (str1[start1] != str2[start2]) { + return false; + } + start1++; + start2++; } + return true; } - return -1; // return -1 if given pattern not found -} + + /** + * Perform string pattern search using Rabin-Karp algorithm + * @param[in] str string to search in + * @param[in] pat pattern to search for + * @return index of first occurrence of pattern + * @return -1 if pattern not found + */ + + int rabin_karp(const std::string& str, const std::string& pat) { + int64_t pat_hash = create_hash(pat, pat.size()); + int64_t str_hash = create_hash(str, pat.size()); + for (int i = 0; i <= str.size() - pat.size(); ++i) { + if (pat_hash == str_hash && + check_if_equal(str, pat, i, i + pat.size() - 1, 0, + pat.size() - 1)) { + return i; + } + if (i < str.size() - pat.size()) { + str_hash = recalculate_hash(str, i, i + pat.size(), str_hash, + pat.size()); + } + } + return -1; // return -1 if given pattern not found + } + +} // namespace string_search + +using string_search::rabin_karp; /** Main function */ int main(void) {