Merge branch 'master' into cmake-version-update

This commit is contained in:
David Leal 2023-06-16 15:55:48 -06:00 committed by GitHub
commit 402c56271b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 791 additions and 91 deletions

View File

@ -27,11 +27,6 @@ jobs:
wget https://raw.githubusercontent.com/TheAlgorithms/scripts/main/filename_formatter.sh
chmod +x filename_formatter.sh
./filename_formatter.sh . .cpp,.hpp
- name: Update DIRECTORY.md
run: |
wget https://raw.githubusercontent.com/TheAlgorithms/scripts/main/build_directory_md.py
python3 build_directory_md.py C-Plus-Plus . .cpp,.hpp,.h > DIRECTORY.md
git commit -m "updating DIRECTORY.md" DIRECTORY.md || true
- name: Get file changes
run: |
git branch

31
.github/workflows/directory_writer.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: Directory writer
on:
push:
branches:
- main
schedule:
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
# │ │ │ │ │
# │ │ │ │ │
# │ │ │ │ │
# * * * * *
- cron: '0 0 * * 1'
jobs:
build:
if: github.repository == 'TheAlgorithms/C-Plus-Plus' # We only need this to run in our repository.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build directory
uses: TheAlgorithms/scripts/directory_md@main
with:
language: C-Plus-Plus
working-directory: .
filetypes: .cpp,.hpp,.h
ignored-directories: doc/

29
.vscode/settings.json vendored
View File

@ -59,6 +59,33 @@
"stdexcept": "cpp",
"streambuf": "cpp",
"typeinfo": "cpp",
"valarray": "cpp"
"valarray": "cpp",
"bit": "cpp",
"charconv": "cpp",
"compare": "cpp",
"concepts": "cpp",
"format": "cpp",
"forward_list": "cpp",
"ios": "cpp",
"locale": "cpp",
"queue": "cpp",
"stack": "cpp",
"xfacet": "cpp",
"xhash": "cpp",
"xiosbase": "cpp",
"xlocale": "cpp",
"xlocbuf": "cpp",
"xlocinfo": "cpp",
"xlocmes": "cpp",
"xlocmon": "cpp",
"xlocnum": "cpp",
"xloctime": "cpp",
"xmemory": "cpp",
"xstddef": "cpp",
"xstring": "cpp",
"xtr1common": "cpp",
"xtree": "cpp",
"xutility": "cpp",
"climits": "cpp"
}
}

View File

@ -45,6 +45,7 @@ add_subdirectory(graph)
add_subdirectory(divide_and_conquer)
add_subdirectory(games)
add_subdirectory(cpu_scheduling_algorithms)
add_subdirectory(physics)
cmake_policy(SET CMP0054 NEW)
cmake_policy(SET CMP0057 NEW)

View File

@ -171,6 +171,7 @@
* [Md5](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/md5.cpp)
* [Quadratic Probing Hash Table](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/quadratic_probing_hash_table.cpp)
* [Sha1](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/sha1.cpp)
* [Sha256](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/sha256.cpp)
## Machine Learning
* [A Star Search](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/machine_learning/a_star_search.cpp)

View File

@ -21,7 +21,7 @@ This repository is a collection of open-source implementation of a variety of al
* The repository provides implementations of various algorithms in one of the most fundamental general purpose languages - [C++](https://en.wikipedia.org/wiki/C%2B%2B).
* Well documented source code with detailed explanations provide a valuable resource for educators and students alike.
* Each source code is atomic using [STL classes](https://en.wikipedia.org/wiki/Standard_Template_Library) and _no external libraries_ are required for their compilation and execution. Thus, the fundamentals of the algorithms can be studied in much depth.
* Source codes are [compiled and tested](https://github.com/TheAlgorithms/C-Plus-Plus/actions?query=workflow%3A%22Awesome+CI+Workflow%22) for every commit on the latest versions of three major operating systems viz., Windows, MacOS and Ubuntu (Linux) using MSVC 16 2019, AppleClang 11.0 and GNU 7.5.0 respectively.
* Source codes are [compiled and tested](https://github.com/TheAlgorithms/C-Plus-Plus/actions?query=workflow%3A%22Awesome+CI+Workflow%22) for every commit on the latest versions of three major operating systems viz., Windows, MacOS, and Ubuntu (Linux) using MSVC 19 2022, AppleClang 14.0.0, and GNU 11.3.0 respectively.
* Strict adherence to [C++11](https://en.wikipedia.org/wiki/C%2B%2B11) standard ensures portability of code to embedded systems as well like ESP32, ARM Cortex, etc. with little to no changes.
* Self-checks within programs ensure correct implementations with confidence.
* Modular implementations and OpenSource licensing enable the functions to be utilized conveniently in other applications.

329
hashing/sha256.cpp Normal file
View File

@ -0,0 +1,329 @@
/**
* @file
* @author [Md. Anisul Haque](https://github.com/mdanisulh)
* @brief Simple C++ implementation of the [SHA-256 Hashing Algorithm]
* (https://en.wikipedia.org/wiki/SHA-2)
*
* @details
* [SHA-2](https://en.wikipedia.org/wiki/SHA-2) is a set of cryptographic hash
* functions that was designed by the
* [NSA](https://en.wikipedia.org/wiki/National_Security_Agency) and first
* published in 2001. SHA-256 is a part of the SHA-2 family. SHA-256 is widely
* used for authenticating software packages and secure password hashing.
*/
#include <array> /// For std::array
#include <cassert> /// For assert
#include <cstdint> /// For uint8_t, uint32_t and uint64_t data types
#include <iomanip> /// For std::setfill and std::setw
#include <iostream> /// For IO operations
#include <sstream> /// For std::stringstream
#include <utility> /// For std::move
#include <vector> /// For std::vector
/**
* @namespace hashing
* @brief Hashing algorithms
*/
namespace hashing {
/**
* @namespace SHA-256
* @brief Functions for the [SHA-256](https://en.wikipedia.org/wiki/SHA-2)
* algorithm implementation
*/
namespace sha256 {
/**
* @class Hash
* @brief Contains hash array and functions to update it and convert it to a
* hexadecimal string
*/
class Hash {
// Initialize array of hash values with first 32 bits of the fractional
// parts of the square roots of the first 8 primes 2..19
std::array<uint32_t, 8> hash = {0x6A09E667, 0xBB67AE85, 0x3C6EF372,
0xA54FF53A, 0x510E527F, 0x9B05688C,
0x1F83D9AB, 0x5BE0CD19};
public:
void update(const std::array<uint32_t, 64> &blocks);
std::string to_string() const;
};
/**
* @brief Rotates the bits of a 32-bit unsigned integer
* @param n Integer to rotate
* @param rotate Number of bits to rotate
* @return uint32_t The rotated integer
*/
uint32_t right_rotate(uint32_t n, size_t rotate) {
return (n >> rotate) | (n << (32 - rotate));
}
/**
* @brief Updates the hash array
* @param blocks Message schedule array
* @return void
*/
void Hash::update(const std::array<uint32_t, 64> &blocks) {
// Initialize array of round constants with first 32 bits of the fractional
// parts of the cube roots of the first 64 primes 2..311
const std::array<uint32_t, 64> round_constants = {
0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1,
0x923F82A4, 0xAB1C5ED5, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0xE49B69C1, 0xEFBE4786,
0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147,
0x06CA6351, 0x14292967, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0xA2BFE8A1, 0xA81A664B,
0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A,
0x5B9CCA4F, 0x682E6FF3, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2};
// Initialize working variables
auto a = hash[0];
auto b = hash[1];
auto c = hash[2];
auto d = hash[3];
auto e = hash[4];
auto f = hash[5];
auto g = hash[6];
auto h = hash[7];
// Compression function main loop
for (size_t block_num = 0; block_num < 64; ++block_num) {
const auto s1 =
right_rotate(e, 6) ^ right_rotate(e, 11) ^ right_rotate(e, 25);
const auto ch = (e & f) ^ (~e & g);
const auto temp1 =
h + s1 + ch + round_constants[block_num] + blocks[block_num];
const auto s0 =
right_rotate(a, 2) ^ right_rotate(a, 13) ^ right_rotate(a, 22);
const auto maj = (a & b) ^ (a & c) ^ (b & c);
const auto temp2 = s0 + maj;
h = g;
g = f;
f = e;
e = d + temp1;
d = c;
c = b;
b = a;
a = temp1 + temp2;
}
// Update hash values
hash[0] += a;
hash[1] += b;
hash[2] += c;
hash[3] += d;
hash[4] += e;
hash[5] += f;
hash[6] += g;
hash[7] += h;
}
/**
* @brief Convert the hash to a hexadecimal string
* @return std::string Final hash value
*/
std::string Hash::to_string() const {
std::stringstream ss;
for (size_t i = 0; i < 8; ++i) {
ss << std::hex << std::setfill('0') << std::setw(8) << hash[i];
}
return ss.str();
}
/**
* @brief Computes size of the padded input
* @param input Input string
* @return size_t Size of the padded input
*/
std::size_t compute_padded_size(const std::size_t input_size) {
if (input_size % 64 < 56) {
return input_size + 64 - (input_size % 64);
}
return input_size + 128 - (input_size % 64);
}
/**
* @brief Returns the byte at position byte_num in in_value
* @param in_value Input value
* @param byte_num Position of byte to be returned
* @return uint8_t Byte at position byte_num
*/
template <typename T>
uint8_t extract_byte(const T in_value, const std::size_t byte_num) {
if (sizeof(in_value) <= byte_num) {
throw std::out_of_range("Byte at index byte_num does not exist");
}
return (in_value >> (byte_num * 8)) & 0xFF;
}
/**
* @brief Returns the character at pos after the input is padded
* @param input Input string
* @param pos Position of character to be returned
* @return char Character at the index pos in the padded string
*/
char get_char(const std::string &input, std::size_t pos) {
const auto input_size = input.length();
if (pos < input_size) {
return input[pos];
}
if (pos == input_size) {
return '\x80';
}
const auto padded_input_size = compute_padded_size(input_size);
if (pos < padded_input_size - 8) {
return '\x00';
}
if (padded_input_size <= pos) {
throw std::out_of_range("pos is out of range");
}
return static_cast<char>(
extract_byte<size_t>(input_size * 8, padded_input_size - pos - 1));
}
/**
* @brief Creates the message schedule array
* @param input Input string
* @param byte_num Position of the first byte of the chunk
* @return std::array<uint32_t, 64> Message schedule array
*/
std::array<uint32_t, 64> create_message_schedule_array(const std::string &input,
const size_t byte_num) {
std::array<uint32_t, 64> blocks{};
// Copy chunk into first 16 words of the message schedule array
for (size_t block_num = 0; block_num < 16; ++block_num) {
blocks[block_num] =
(static_cast<uint8_t>(get_char(input, byte_num + block_num * 4))
<< 24) |
(static_cast<uint8_t>(get_char(input, byte_num + block_num * 4 + 1))
<< 16) |
(static_cast<uint8_t>(get_char(input, byte_num + block_num * 4 + 2))
<< 8) |
static_cast<uint8_t>(get_char(input, byte_num + block_num * 4 + 3));
}
// Extend the first 16 words into remaining 48 words of the message schedule
// array
for (size_t block_num = 16; block_num < 64; ++block_num) {
const auto s0 = right_rotate(blocks[block_num - 15], 7) ^
right_rotate(blocks[block_num - 15], 18) ^
(blocks[block_num - 15] >> 3);
const auto s1 = right_rotate(blocks[block_num - 2], 17) ^
right_rotate(blocks[block_num - 2], 19) ^
(blocks[block_num - 2] >> 10);
blocks[block_num] =
blocks[block_num - 16] + s0 + blocks[block_num - 7] + s1;
}
return blocks;
}
/**
* @brief Computes the final hash value
* @param input Input string
* @return std::string The final hash value
*/
std::string sha256(const std::string &input) {
Hash h;
// Process message in successive 512-bit (64-byte) chunks
for (size_t byte_num = 0; byte_num < compute_padded_size(input.length());
byte_num += 64) {
h.update(create_message_schedule_array(input, byte_num));
}
return h.to_string();
}
} // namespace sha256
} // namespace hashing
/**
* @brief Self-test implementations
* @returns void
*/
static void test_compute_padded_size() {
assert(hashing::sha256::compute_padded_size(55) == 64);
assert(hashing::sha256::compute_padded_size(56) == 128);
assert(hashing::sha256::compute_padded_size(130) == 192);
}
static void test_extract_byte() {
assert(hashing::sha256::extract_byte<uint32_t>(512, 0) == 0);
assert(hashing::sha256::extract_byte<uint32_t>(512, 1) == 2);
bool exception = false;
try {
hashing::sha256::extract_byte<uint32_t>(512, 5);
} catch (const std::out_of_range &) {
exception = true;
}
assert(exception);
}
static void test_get_char() {
assert(hashing::sha256::get_char("test", 3) == 't');
assert(hashing::sha256::get_char("test", 4) == '\x80');
assert(hashing::sha256::get_char("test", 5) == '\x00');
assert(hashing::sha256::get_char("test", 63) == 32);
bool exception = false;
try {
hashing::sha256::get_char("test", 64);
} catch (const std::out_of_range &) {
exception = true;
}
assert(exception);
}
static void test_right_rotate() {
assert(hashing::sha256::right_rotate(128, 3) == 16);
assert(hashing::sha256::right_rotate(1, 30) == 4);
assert(hashing::sha256::right_rotate(6, 30) == 24);
}
static void test_sha256() {
struct TestCase {
const std::string input;
const std::string expected_hash;
TestCase(std::string input, std::string expected_hash)
: input(std::move(input)),
expected_hash(std::move(expected_hash)) {}
};
const std::vector<TestCase> test_cases{
TestCase(
"",
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"),
TestCase(
"test",
"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"),
TestCase(
"Hello World",
"a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e"),
TestCase("Hello World!",
"7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9"
"069")};
for (const auto &tc : test_cases) {
assert(hashing::sha256::sha256(tc.input) == tc.expected_hash);
}
}
static void test() {
test_compute_padded_size();
test_extract_byte();
test_get_char();
test_right_rotate();
test_sha256();
std::cout << "All tests have successfully passed!\n";
}
/**
* @brief Main function
* @returns 0 on exit
*/
int main() {
test(); // Run self-test implementations
return 0;
}

View File

@ -1,64 +1,73 @@
/**
* @file
* @brief A simple program to check if the given number is a factorial of some
* @brief A simple program to check if the given number is a [factorial](https://en.wikipedia.org/wiki/Factorial) of some
* number or not.
*
* @details A factorial number is the sum of k! where any value of k is a
* positive integer. https://www.mathsisfun.com/numbers/factorial.html
*
* @author [Divyajyoti Ukirde](https://github.com/divyajyotiuk)
* @author [ewd00010](https://github.com/ewd00010)
*/
#include <cassert>
#include <iostream>
#include <cassert> /// for assert
#include <iostream> /// for cout
/**
* Function to check if the given number is factorial of some number or not.
* @param n number to be checked.
* @return if number is a factorial, returns true, else false.
* @namespace
* @brief Mathematical algorithms
*/
namespace math {
/**
* @brief Function to check if the given number is factorial of some number or
* not.
* @param n number to be checked.
* @return true if number is a factorial returns true
* @return false if number is not a factorial
*/
bool is_factorial(uint64_t n) {
if (n <= 0) {
if (n <= 0) { // factorial numbers are only ever positive Integers
return false;
}
for (uint32_t i = 1;; i++) {
if (n % i != 0) {
break;
}
n = n / i;
}
if (n == 1) {
return true;
} else {
return false;
}
}
/** Test function
/*!
* this loop is basically a reverse factorial calculation, where instead
* of multiplying we are dividing. We start at i = 2 since i = 1 has
* no impact division wise
*/
int i = 2;
while (n % i == 0) {
n = n / i;
i++;
}
/*!
* if n was the sum of a factorial then it should be divided until it
* becomes 1
*/
return (n == 1);
}
} // namespace math
/**
* @brief Self-test implementations
* @returns void
*/
void tests() {
std::cout << "Test 1:\t n=50\n";
assert(is_factorial(50) == false);
std::cout << "passed\n";
static void tests() {
assert(math::is_factorial(50) == false);
assert(math::is_factorial(720) == true);
assert(math::is_factorial(0) == false);
assert(math::is_factorial(1) == true);
assert(math::is_factorial(479001600) == true);
assert(math::is_factorial(-24) == false);
std::cout << "Test 2:\t n=720\n";
assert(is_factorial(720) == true);
std::cout << "passed\n";
std::cout << "Test 3:\t n=0\n";
assert(is_factorial(0) == false);
std::cout << "passed\n";
std::cout << "Test 4:\t n=479001600\n";
assert(is_factorial(479001600) == true);
std::cout << "passed\n";
std::cout << "Test 5:\t n=-24\n";
assert(is_factorial(-24) == false);
std::cout << "passed\n";
std::cout << "All tests have successfully passed!" << std::endl;
}
/** Main function
/**
* @brief Main function
* @returns 0 on exit
*/
int main() {
tests();
tests(); // run self-test implementations
return 0;
}

View File

@ -1,62 +1,83 @@
/**
* Copyright 2020 @author omkarlanghe
*
* @file
* A simple program to check if the given number if prime or not.
*
* @brief
* Reduced all possibilities of a number which cannot be prime.
* Eg: No even number, except 2 can be a prime number, hence we will increment
* our loop with i+6 jumping and check for i or i+2 to be a factor of the
* number; if it's a factor then we will return false otherwise true after the
* loop terminates at the terminating condition which is (i*i<=num)
* A simple program to check if the given number is [Prime](https://en.wikipedia.org/wiki/Primality_test) or not.
* @details
* A prime number is any number that can be divided only by itself and 1. It must
* be positive and a whole number, therefore any prime number is part of the
* set of natural numbers. The majority of prime numbers are even numbers, with
* the exception of 2. This algorithm finds prime numbers using this information.
* additional ways to solve the prime check problem:
* https://cp-algorithms.com/algebra/primality_tests.html#practice-problems
* @author [Omkar Langhe](https://github.com/omkarlanghe)
* @author [ewd00010](https://github.com/ewd00010)
*/
#include <cassert> /// for assert
#include <iostream> /// for IO operations
/**
* Function to check if the given number is prime or not.
* @param num number to be checked.
* @return if number is prime, it returns @ true, else it returns @ false.
* @brief Mathematical algorithms
* @namespace
*/
template <typename T>
bool is_prime(T num) {
bool result = true;
if (num <= 1) {
return false;
} else if (num == 2 || num == 3) {
return true;
} else if ((num % 2) == 0 || num % 3 == 0) {
return false;
} else {
for (T i = 5; (i * i) <= (num); i = (i + 6)) {
if ((num % i) == 0 || (num % (i + 2) == 0)) {
result = false;
break;
namespace math {
/**
* @brief Function to check if the given number is prime or not.
* @param num number to be checked.
* @return true if number is a prime
* @return false if number is not a prime.
*/
bool is_prime(int64_t num) {
/*!
* Reduce all possibilities of a number which cannot be prime with the first
* 3 if, else if conditionals. Example: Since no even number, except 2 can
* be a prime number and the next prime we find after our checks is 5,
* we will start the for loop with i = 5. then for each loop we increment
* i by +6 and check if i or i+2 is a factor of the number; if it's a factor
* then we will return false. otherwise, true will be returned after the
* loop terminates at the terminating condition which is i*i <= num
*/
if (num <= 1) {
return false;
} else if (num == 2 || num == 3) {
return true;
} else if (num % 2 == 0 || num % 3 == 0) {
return false;
} else {
for (int64_t i = 5; i * i <= num; i = i + 6) {
if (num % i == 0 || num % (i + 2) == 0) {
return false;
}
}
}
return true;
}
return (result);
} // namespace math
/**
* @brief Self-test implementations
* @returns void
*/
static void tests() {
assert(math::is_prime(1) == false);
assert(math::is_prime(2) == true);
assert(math::is_prime(3) == true);
assert(math::is_prime(4) == false);
assert(math::is_prime(-4) == false);
assert(math::is_prime(7) == true);
assert(math::is_prime(-7) == false);
assert(math::is_prime(19) == true);
assert(math::is_prime(50) == false);
assert(math::is_prime(115249) == true);
std::cout << "All tests have successfully passed!" << std::endl;
}
/**
* Main function
* @brief Main function
* @returns 0 on exit
*/
int main() {
// perform self-test
assert(is_prime(50) == false);
assert(is_prime(115249) == true);
int num = 0;
std::cout << "Enter the number to check if it is prime or not" << std::endl;
std::cin >> num;
bool result = is_prime(num);
if (result) {
std::cout << num << " is a prime number" << std::endl;
} else {
std::cout << num << " is not a prime number" << std::endl;
}
tests(); // perform self-tests implementations
return 0;
}

16
physics/CMakeLists.txt Normal file
View File

@ -0,0 +1,16 @@
# If necessary, use the RELATIVE flag, otherwise each source file may be listed
# with full pathname. The RELATIVE flag makes it easier to extract an executable's name
# automatically.
file( GLOB APP_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp )
foreach( testsourcefile ${APP_SOURCES} )
string( REPLACE ".cpp" "" testname ${testsourcefile} ) # File type. Example: `.cpp`
add_executable( ${testname} ${testsourcefile} )
set_target_properties(${testname} PROPERTIES LINKER_LANGUAGE CXX)
if(OpenMP_CXX_FOUND)
target_link_libraries(${testname} OpenMP::OpenMP_CXX)
endif()
install(TARGETS ${testname} DESTINATION "bin/physics") # Folder name. Do NOT include `<>`
endforeach( testsourcefile ${APP_SOURCES} )

270
strings/boyer_moore.cpp Normal file
View File

@ -0,0 +1,270 @@
/**
* @file
* @brief
* The [BoyerMoore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm) algorithm searches for occurrences of pattern P in text T by
* performing explicit character comparisons at different alignments. Instead of
* a brute-force search of all alignments (of which there are n - m + 1),
* BoyerMoore uses information gained by preprocessing P to skip as many
* alignments as possible.
*
* @details
* The key insight in this algorithm is that if the end of the pattern is
* compared to the text, then jumps along the text can be made rather than
* checking every character of the text. The reason that this works is that in
* lining up the pattern against the text, the last character of the pattern is
* compared to the character in the text.
*
* If the characters do not match, there is no need to continue searching
* backwards along the text. This leaves us with two cases.
*
* Case 1:
* If the character in the text does not match any of the characters in the
* pattern, then the next character in the text to check is located m characters
* farther along the text, where m is the length of the pattern.
*
* Case 2:
* If the character in the text is in the pattern, then a partial shift of the
* pattern along the text is done to line up along the matching character and
* the process is repeated.
*
* There are two shift rules:
*
* [The bad character rule]
* (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_bad_character_rule)
*
* [The good suffix rule]
* (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_good_suffix_rule)
*
* The shift rules are implemented as constant-time table lookups, using tables
* generated during the preprocessing of P.
* @author [Stoycho Kyosev](https://github.com/stoychoX)
*/
#include <cassert> /// for assert
#include <climits> /// for CHAR_MAX macro
#include <cstring> /// for strlen
#include <iostream> /// for IO operations
#include <string> /// for std::string
#include <vector> /// for std::vector
#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use
/**
* @namespace
* @brief String algorithms
*/
namespace strings {
/**
* @namespace
* @brief Functions for the [Boyer
* Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm)
* algorithm implementation
*/
namespace boyer_moore {
/**
* @brief A structure representing all the data we need to search the
* preprocessed pattern in text.
*/
struct pattern {
std::string pat;
std::vector<size_t>
bad_char; ///< bad char table used in [Bad Character
///< Heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/)
std::vector<size_t>
good_suffix; ///< good suffix table used for [Good Suffix
///< heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp)
};
/**
* @brief A function that preprocess the good suffix thable
*
* @param str The string being preprocessed
* @param arg The good suffix table
* @returns void
*/
void init_good_suffix(const std::string& str, std::vector<size_t>& arg) {
arg.resize(str.size() + 1, 0);
// border_pos[i] - the index of the longest proper suffix of str[i..] which
// is also a proper prefix.
std::vector<size_t> border_pos(str.size() + 1, 0);
size_t current_char = str.length();
size_t border_index = str.length() + 1;
border_pos[current_char] = border_index;
while (current_char > 0) {
while (border_index <= str.length() &&
str[current_char - 1] != str[border_index - 1]) {
if (arg[border_index] == 0) {
arg[border_index] = border_index - current_char;
}
border_index = border_pos[border_index];
}
current_char--;
border_index--;
border_pos[current_char] = border_index;
}
size_t largest_border_index = border_pos[0];
for (size_t i = 0; i < str.size(); i++) {
if (arg[i] == 0) {
arg[i] = largest_border_index;
}
// If we go pass the largest border we find the next one as we iterate
if (i == largest_border_index) {
largest_border_index = border_pos[largest_border_index];
}
}
}
/**
* @brief A function that preprocess the bad char table
*
* @param str The string being preprocessed
* @param arg The bad char table
* @returns void
*/
void init_bad_char(const std::string& str, std::vector<size_t>& arg) {
arg.resize(APLHABET_SIZE, str.length());
for (size_t i = 0; i < str.length(); i++) {
arg[str[i]] = str.length() - i - 1;
}
}
/**
* @brief A function that initializes pattern
*
* @param str Text used for initialization
* @param arg Initialized structure
* @returns void
*/
void init_pattern(const std::string& str, pattern& arg) {
arg.pat = str;
init_bad_char(str, arg.bad_char);
init_good_suffix(str, arg.good_suffix);
}
/**
* @brief A function that implements Boyer-Moore's algorithm.
*
* @param str Text we are seatching in.
* @param arg pattern structure containing the preprocessed pattern
* @return Vector of indexes of the occurrences of pattern in text
*/
std::vector<size_t> search(const std::string& str, const pattern& arg) {
size_t index_position = arg.pat.size() - 1;
std::vector<size_t> index_storage;
while (index_position < str.length()) {
size_t index_string = index_position;
int index_pattern = static_cast<int>(arg.pat.size()) - 1;
while (index_pattern >= 0 &&
str[index_string] == arg.pat[index_pattern]) {
--index_pattern;
--index_string;
}
if (index_pattern < 0) {
index_storage.push_back(index_position - arg.pat.length() + 1);
index_position += arg.good_suffix[0];
} else {
index_position += std::max(arg.bad_char[str[index_string]],
arg.good_suffix[index_pattern + 1]);
}
}
return index_storage;
}
/**
* @brief Check if pat is prefix of str.
*
* @param str pointer to some part of the input text.
* @param pat the searched pattern.
* @param len length of the searched pattern
* @returns `true` if pat IS prefix of str.
* @returns `false` if pat is NOT a prefix of str.
*/
bool is_prefix(const char* str, const char* pat, size_t len) {
if (strlen(str) < len) {
return false;
}
for (size_t i = 0; i < len; i++) {
if (str[i] != pat[i]) {
return false;
}
}
return true;
}
} // namespace boyer_moore
} // namespace strings
/**
* @brief A test case in which we search for every appearance of the word 'and'
* @param text The text in which we search for appearance of the word 'and'
* @returns void
*/
void and_test(const char* text) {
strings::boyer_moore::pattern ands;
strings::boyer_moore::init_pattern("and", ands);
std::vector<size_t> indexes = strings::boyer_moore::search(text, ands);
assert(indexes.size() == 2);
assert(strings::boyer_moore::is_prefix(text + indexes[0], "and", 3));
assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3));
}
/**
* @brief A test case in which we search for every appearance of the word 'pat'
* @param text The text in which we search for appearance of the word 'pat'
* @returns void
*/
void pat_test(const char* text) {
strings::boyer_moore::pattern pat;
strings::boyer_moore::init_pattern("pat", pat);
std::vector<size_t> indexes = strings::boyer_moore::search(text, pat);
assert(indexes.size() == 6);
for (const auto& currentIndex : indexes) {
assert(strings::boyer_moore::is_prefix(text + currentIndex, "pat", 3));
}
}
/**
* @brief Self-test implementations
* @returns void
*/
static void tests() {
const char* text =
"When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \
Tuesday our story starts, \
there was nothing about pat the cloudy sky outside to pat suggest that\
strange and \
mysterious things would pat soon be happening all pat over the \
country.";
and_test(text);
pat_test(text);
std::cout << "All tests have successfully passed!\n";
}
/**
* @brief Main function
* @returns 0 on exit
*/
int main() {
tests(); // run self-test implementations
return 0;
}