added namespace string_search

This commit is contained in:
Krishna Vedala 2020-05-31 23:09:10 -04:00
parent 5a2615c54e
commit fd69530515
3 changed files with 143 additions and 129 deletions

View File

@ -10,14 +10,15 @@
#endif #endif
#include <vector> #include <vector>
/** namespace string_search {
/**
* Find a pattern in a string by comparing the pattern to every substring. * Find a pattern in a string by comparing the pattern to every substring.
* @param text Any string that might contain the pattern. * @param text Any string that might contain the pattern.
* @param pattern String that we are searching for. * @param pattern String that we are searching for.
* @return Index where the pattern starts in the text * @return Index where the pattern starts in the text
* @return -1 if the pattern was not found. * @return -1 if the pattern was not found.
*/ */
int brute_force(const std::string &text, const std::string &pattern) { int brute_force(const std::string &text, const std::string &pattern) {
size_t pat_l = pattern.length(); size_t pat_l = pattern.length();
size_t txt_l = text.length(); size_t txt_l = text.length();
int index = -1; int index = -1;
@ -31,7 +32,10 @@ int brute_force(const std::string &text, const std::string &pattern) {
} }
} }
return index; return index;
} }
} // namespace string_search
using string_search::brute_force;
/** set of test cases */ /** set of test cases */
const std::vector<std::vector<std::string>> test_set = { const std::vector<std::vector<std::string>> test_set = {

View File

@ -20,13 +20,14 @@
#endif #endif
#include <vector> #include <vector>
/** namespace string_search {
/**
* Generate the partial match table aka failure function for a pattern to * Generate the partial match table aka failure function for a pattern to
* search. * search.
* \param[in] pattern text for which to create the partial match table * \param[in] pattern text for which to create the partial match table
* \returns the partial match table as a vector array * \returns the partial match table as a vector array
*/ */
std::vector<int> getFailureArray(const std::string &pattern) { std::vector<int> getFailureArray(const std::string &pattern) {
int pattern_length = pattern.size(); int pattern_length = pattern.size();
std::vector<int> failure(pattern_length + 1); std::vector<int> failure(pattern_length + 1);
failure[0] = -1; failure[0] = -1;
@ -40,16 +41,16 @@ std::vector<int> getFailureArray(const std::string &pattern) {
failure[i + 1] = j; failure[i + 1] = j;
} }
return failure; return failure;
} }
/** /**
* KMP algorithm to find a pattern in a text * KMP algorithm to find a pattern in a text
* \param[in] pattern string pattern to search * \param[in] pattern string pattern to search
* \param[in] text text in which to search * \param[in] text text in which to search
* \returns `true` if pattern was found * \returns `true` if pattern was found
* \returns `false` if pattern was not found * \returns `false` if pattern was not found
*/ */
bool kmp(const std::string &pattern, const std::string &text) { bool kmp(const std::string &pattern, const std::string &text) {
int text_length = text.size(), pattern_length = pattern.size(); int text_length = text.size(), pattern_length = pattern.size();
std::vector<int> failure = getFailureArray(pattern); std::vector<int> failure = getFailureArray(pattern);
@ -63,7 +64,10 @@ bool kmp(const std::string &pattern, const std::string &text) {
return true; return true;
} }
return false; return false;
} }
} // namespace string_search
using string_search::kmp;
/** Main function */ /** Main function */
int main() { int main() {

View File

@ -15,21 +15,22 @@
#define PRIME 5 ///< Prime modulus for hash functions #define PRIME 5 ///< Prime modulus for hash functions
/** namespace string_search {
/**
* convert a string to an intger - called as hashing function * convert a string to an intger - called as hashing function
* \param[in] s source of string to hash * \param[in] s source of string to hash
* \param[in] n length of substring to hash * \param[in] n length of substring to hash
* \returns hash integer * \returns hash integer
*/ */
int64_t create_hash(const std::string& s, int n) { int64_t create_hash(const std::string& s, int n) {
int64_t result = 0; int64_t result = 0;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i)); result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
} }
return result; return result;
} }
/** /**
* re-hash a string using known existing hash * re-hash a string using known existing hash
* \param[in] s source of string to hash * \param[in] s source of string to hash
* \param[in] old_index previous index of string * \param[in] old_index previous index of string
@ -38,15 +39,16 @@ int64_t create_hash(const std::string& s, int n) {
* \param[in] patLength length of substring to hash * \param[in] patLength length of substring to hash
* \returns new hash integer * \returns new hash integer
*/ */
int64_t recalculate_hash(const std::string& s, int old_index, int new_index, int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
int64_t old_hash, int patLength) { int64_t old_hash, int patLength) {
int64_t new_hash = old_hash - s[old_index]; int64_t new_hash = old_hash - s[old_index];
new_hash /= PRIME; new_hash /= PRIME;
new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1)); new_hash +=
(int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
return new_hash; return new_hash;
} }
/** /**
* compare if two sub-strings are equal * compare if two sub-strings are equal
* \param[in] str1 string pattern to search * \param[in] str1 string pattern to search
* \param[in] str2 text in which to search * \param[in] str2 text in which to search
@ -56,7 +58,7 @@ int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
* \returns `false` if pattern was not found * \returns `false` if pattern was not found
* @note can this be replaced by std::string::compare? * @note can this be replaced by std::string::compare?
*/ */
bool check_if_equal(const std::string& str1, const std::string& str2, bool check_if_equal(const std::string& str1, const std::string& str2,
int start1, int end1, int start2, int end2) { int start1, int end1, int start2, int end2) {
if (end1 - start1 != end2 - start2) { if (end1 - start1 != end2 - start2) {
return false; return false;
@ -69,9 +71,9 @@ bool check_if_equal(const std::string& str1, const std::string& str2,
start2++; start2++;
} }
return true; return true;
} }
/** /**
* Perform string pattern search using Rabin-Karp algorithm * Perform string pattern search using Rabin-Karp algorithm
* @param[in] str string to search in * @param[in] str string to search in
* @param[in] pat pattern to search for * @param[in] pat pattern to search for
@ -79,7 +81,7 @@ bool check_if_equal(const std::string& str1, const std::string& str2,
* @return -1 if pattern not found * @return -1 if pattern not found
*/ */
int rabin_karp(const std::string& str, const std::string& pat) { int rabin_karp(const std::string& str, const std::string& pat) {
int64_t pat_hash = create_hash(pat, pat.size()); int64_t pat_hash = create_hash(pat, pat.size());
int64_t str_hash = create_hash(str, pat.size()); int64_t str_hash = create_hash(str, pat.size());
for (int i = 0; i <= str.size() - pat.size(); ++i) { for (int i = 0; i <= str.size() - pat.size(); ++i) {
@ -89,12 +91,16 @@ int rabin_karp(const std::string& str, const std::string& pat) {
return i; return i;
} }
if (i < str.size() - pat.size()) { if (i < str.size() - pat.size()) {
str_hash = str_hash = recalculate_hash(str, i, i + pat.size(), str_hash,
recalculate_hash(str, i, i + pat.size(), str_hash, pat.size()); pat.size());
} }
} }
return -1; // return -1 if given pattern not found return -1; // return -1 if given pattern not found
} }
} // namespace string_search
using string_search::rabin_karp;
/** Main function */ /** Main function */
int main(void) { int main(void) {