mirror of
https://hub.njuu.cf/TheAlgorithms/C-Plus-Plus.git
synced 2023-10-11 13:05:55 +08:00
added namespace string_search
This commit is contained in:
parent
5a2615c54e
commit
fd69530515
@ -10,28 +10,32 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Find a pattern in a string by comparing the pattern to every substring.
|
||||
* @param text Any string that might contain the pattern.
|
||||
* @param pattern String that we are searching for.
|
||||
* @return Index where the pattern starts in the text
|
||||
* @return -1 if the pattern was not found.
|
||||
*/
|
||||
int brute_force(const std::string &text, const std::string &pattern) {
|
||||
size_t pat_l = pattern.length();
|
||||
size_t txt_l = text.length();
|
||||
int index = -1;
|
||||
if (pat_l <= txt_l) {
|
||||
for (size_t i = 0; i < txt_l - pat_l + 1; i++) {
|
||||
std::string s = text.substr(i, pat_l);
|
||||
if (s == pattern) {
|
||||
index = i;
|
||||
break;
|
||||
namespace string_search {
|
||||
/**
|
||||
* Find a pattern in a string by comparing the pattern to every substring.
|
||||
* @param text Any string that might contain the pattern.
|
||||
* @param pattern String that we are searching for.
|
||||
* @return Index where the pattern starts in the text
|
||||
* @return -1 if the pattern was not found.
|
||||
*/
|
||||
int brute_force(const std::string &text, const std::string &pattern) {
|
||||
size_t pat_l = pattern.length();
|
||||
size_t txt_l = text.length();
|
||||
int index = -1;
|
||||
if (pat_l <= txt_l) {
|
||||
for (size_t i = 0; i < txt_l - pat_l + 1; i++) {
|
||||
std::string s = text.substr(i, pat_l);
|
||||
if (s == pattern) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
} // namespace string_search
|
||||
|
||||
using string_search::brute_force;
|
||||
|
||||
/** set of test cases */
|
||||
const std::vector<std::vector<std::string>> test_set = {
|
||||
|
@ -20,50 +20,54 @@
|
||||
#endif
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Generate the partial match table aka failure function for a pattern to
|
||||
* search.
|
||||
* \param[in] pattern text for which to create the partial match table
|
||||
* \returns the partial match table as a vector array
|
||||
*/
|
||||
std::vector<int> getFailureArray(const std::string &pattern) {
|
||||
int pattern_length = pattern.size();
|
||||
std::vector<int> failure(pattern_length + 1);
|
||||
failure[0] = -1;
|
||||
int j = -1;
|
||||
namespace string_search {
|
||||
/**
|
||||
* Generate the partial match table aka failure function for a pattern to
|
||||
* search.
|
||||
* \param[in] pattern text for which to create the partial match table
|
||||
* \returns the partial match table as a vector array
|
||||
*/
|
||||
std::vector<int> getFailureArray(const std::string &pattern) {
|
||||
int pattern_length = pattern.size();
|
||||
std::vector<int> failure(pattern_length + 1);
|
||||
failure[0] = -1;
|
||||
int j = -1;
|
||||
|
||||
for (int i = 0; i < pattern_length; i++) {
|
||||
while (j != -1 && pattern[j] != pattern[i]) {
|
||||
j = failure[j];
|
||||
for (int i = 0; i < pattern_length; i++) {
|
||||
while (j != -1 && pattern[j] != pattern[i]) {
|
||||
j = failure[j];
|
||||
}
|
||||
j++;
|
||||
failure[i + 1] = j;
|
||||
}
|
||||
j++;
|
||||
failure[i + 1] = j;
|
||||
return failure;
|
||||
}
|
||||
return failure;
|
||||
}
|
||||
|
||||
/**
|
||||
* KMP algorithm to find a pattern in a text
|
||||
* \param[in] pattern string pattern to search
|
||||
* \param[in] text text in which to search
|
||||
* \returns `true` if pattern was found
|
||||
* \returns `false` if pattern was not found
|
||||
*/
|
||||
bool kmp(const std::string &pattern, const std::string &text) {
|
||||
int text_length = text.size(), pattern_length = pattern.size();
|
||||
std::vector<int> failure = getFailureArray(pattern);
|
||||
/**
|
||||
* KMP algorithm to find a pattern in a text
|
||||
* \param[in] pattern string pattern to search
|
||||
* \param[in] text text in which to search
|
||||
* \returns `true` if pattern was found
|
||||
* \returns `false` if pattern was not found
|
||||
*/
|
||||
bool kmp(const std::string &pattern, const std::string &text) {
|
||||
int text_length = text.size(), pattern_length = pattern.size();
|
||||
std::vector<int> failure = getFailureArray(pattern);
|
||||
|
||||
int k = 0;
|
||||
for (int j = 0; j < text_length; j++) {
|
||||
while (k != -1 && pattern[k] != text[j]) {
|
||||
k = failure[k];
|
||||
int k = 0;
|
||||
for (int j = 0; j < text_length; j++) {
|
||||
while (k != -1 && pattern[k] != text[j]) {
|
||||
k = failure[k];
|
||||
}
|
||||
k++;
|
||||
if (k == pattern_length)
|
||||
return true;
|
||||
}
|
||||
k++;
|
||||
if (k == pattern_length)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace string_search
|
||||
|
||||
using string_search::kmp;
|
||||
|
||||
/** Main function */
|
||||
int main() {
|
||||
|
@ -15,86 +15,92 @@
|
||||
|
||||
#define PRIME 5 ///< Prime modulus for hash functions
|
||||
|
||||
/**
|
||||
* convert a string to an intger - called as hashing function
|
||||
* \param[in] s source of string to hash
|
||||
* \param[in] n length of substring to hash
|
||||
* \returns hash integer
|
||||
*/
|
||||
int64_t create_hash(const std::string& s, int n) {
|
||||
int64_t result = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
|
||||
namespace string_search {
|
||||
/**
|
||||
* convert a string to an intger - called as hashing function
|
||||
* \param[in] s source of string to hash
|
||||
* \param[in] n length of substring to hash
|
||||
* \returns hash integer
|
||||
*/
|
||||
int64_t create_hash(const std::string& s, int n) {
|
||||
int64_t result = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
result += (int64_t)(s[i] * (int64_t)pow(PRIME, i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* re-hash a string using known existing hash
|
||||
* \param[in] s source of string to hash
|
||||
* \param[in] old_index previous index of string
|
||||
* \param[in] new_index new index of string
|
||||
* \param[in] old_hash previous hash of substring
|
||||
* \param[in] patLength length of substring to hash
|
||||
* \returns new hash integer
|
||||
*/
|
||||
int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
|
||||
int64_t old_hash, int patLength) {
|
||||
int64_t new_hash = old_hash - s[old_index];
|
||||
new_hash /= PRIME;
|
||||
new_hash += (int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
|
||||
return new_hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare if two sub-strings are equal
|
||||
* \param[in] str1 string pattern to search
|
||||
* \param[in] str2 text in which to search
|
||||
* \param[in] start1,end1 start and end indices for substring in str1
|
||||
* \param[in] start2,end2 start and end indices for substring in str2
|
||||
* \returns `true` if pattern was found
|
||||
* \returns `false` if pattern was not found
|
||||
* @note can this be replaced by std::string::compare?
|
||||
*/
|
||||
bool check_if_equal(const std::string& str1, const std::string& str2,
|
||||
int start1, int end1, int start2, int end2) {
|
||||
if (end1 - start1 != end2 - start2) {
|
||||
return false;
|
||||
/**
|
||||
* re-hash a string using known existing hash
|
||||
* \param[in] s source of string to hash
|
||||
* \param[in] old_index previous index of string
|
||||
* \param[in] new_index new index of string
|
||||
* \param[in] old_hash previous hash of substring
|
||||
* \param[in] patLength length of substring to hash
|
||||
* \returns new hash integer
|
||||
*/
|
||||
int64_t recalculate_hash(const std::string& s, int old_index, int new_index,
|
||||
int64_t old_hash, int patLength) {
|
||||
int64_t new_hash = old_hash - s[old_index];
|
||||
new_hash /= PRIME;
|
||||
new_hash +=
|
||||
(int64_t)(s[new_index] * (int64_t)pow(PRIME, patLength - 1));
|
||||
return new_hash;
|
||||
}
|
||||
while (start1 <= end1 && start2 <= end2) {
|
||||
if (str1[start1] != str2[start2]) {
|
||||
|
||||
/**
|
||||
* compare if two sub-strings are equal
|
||||
* \param[in] str1 string pattern to search
|
||||
* \param[in] str2 text in which to search
|
||||
* \param[in] start1,end1 start and end indices for substring in str1
|
||||
* \param[in] start2,end2 start and end indices for substring in str2
|
||||
* \returns `true` if pattern was found
|
||||
* \returns `false` if pattern was not found
|
||||
* @note can this be replaced by std::string::compare?
|
||||
*/
|
||||
bool check_if_equal(const std::string& str1, const std::string& str2,
|
||||
int start1, int end1, int start2, int end2) {
|
||||
if (end1 - start1 != end2 - start2) {
|
||||
return false;
|
||||
}
|
||||
start1++;
|
||||
start2++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform string pattern search using Rabin-Karp algorithm
|
||||
* @param[in] str string to search in
|
||||
* @param[in] pat pattern to search for
|
||||
* @return index of first occurrence of pattern
|
||||
* @return -1 if pattern not found
|
||||
*/
|
||||
|
||||
int rabin_karp(const std::string& str, const std::string& pat) {
|
||||
int64_t pat_hash = create_hash(pat, pat.size());
|
||||
int64_t str_hash = create_hash(str, pat.size());
|
||||
for (int i = 0; i <= str.size() - pat.size(); ++i) {
|
||||
if (pat_hash == str_hash &&
|
||||
check_if_equal(str, pat, i, i + pat.size() - 1, 0,
|
||||
pat.size() - 1)) {
|
||||
return i;
|
||||
}
|
||||
if (i < str.size() - pat.size()) {
|
||||
str_hash =
|
||||
recalculate_hash(str, i, i + pat.size(), str_hash, pat.size());
|
||||
while (start1 <= end1 && start2 <= end2) {
|
||||
if (str1[start1] != str2[start2]) {
|
||||
return false;
|
||||
}
|
||||
start1++;
|
||||
start2++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return -1; // return -1 if given pattern not found
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform string pattern search using Rabin-Karp algorithm
|
||||
* @param[in] str string to search in
|
||||
* @param[in] pat pattern to search for
|
||||
* @return index of first occurrence of pattern
|
||||
* @return -1 if pattern not found
|
||||
*/
|
||||
|
||||
int rabin_karp(const std::string& str, const std::string& pat) {
|
||||
int64_t pat_hash = create_hash(pat, pat.size());
|
||||
int64_t str_hash = create_hash(str, pat.size());
|
||||
for (int i = 0; i <= str.size() - pat.size(); ++i) {
|
||||
if (pat_hash == str_hash &&
|
||||
check_if_equal(str, pat, i, i + pat.size() - 1, 0,
|
||||
pat.size() - 1)) {
|
||||
return i;
|
||||
}
|
||||
if (i < str.size() - pat.size()) {
|
||||
str_hash = recalculate_hash(str, i, i + pat.size(), str_hash,
|
||||
pat.size());
|
||||
}
|
||||
}
|
||||
return -1; // return -1 if given pattern not found
|
||||
}
|
||||
|
||||
} // namespace string_search
|
||||
|
||||
using string_search::rabin_karp;
|
||||
|
||||
/** Main function */
|
||||
int main(void) {
|
||||
|
Loading…
Reference in New Issue
Block a user