diff --git a/.github/workflows/awesome_workflow.yml b/.github/workflows/awesome_workflow.yml index c4c398cd9..28ec557f0 100644 --- a/.github/workflows/awesome_workflow.yml +++ b/.github/workflows/awesome_workflow.yml @@ -27,11 +27,6 @@ jobs: wget https://raw.githubusercontent.com/TheAlgorithms/scripts/main/filename_formatter.sh chmod +x filename_formatter.sh ./filename_formatter.sh . .cpp,.hpp - - name: Update DIRECTORY.md - run: | - wget https://raw.githubusercontent.com/TheAlgorithms/scripts/main/build_directory_md.py - python3 build_directory_md.py C-Plus-Plus . .cpp,.hpp,.h > DIRECTORY.md - git commit -m "updating DIRECTORY.md" DIRECTORY.md || true - name: Get file changes run: | git branch diff --git a/.github/workflows/directory_writer.yml b/.github/workflows/directory_writer.yml new file mode 100644 index 000000000..06cf942e0 --- /dev/null +++ b/.github/workflows/directory_writer.yml @@ -0,0 +1,31 @@ +name: Directory writer +on: + push: + branches: + - main + schedule: + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + # │ │ │ │ │ + # │ │ │ │ │ + # │ │ │ │ │ + # * * * * * + - cron: '0 0 * * 1' +jobs: + build: + if: github.repository == 'TheAlgorithms/C-Plus-Plus' # We only need this to run in our repository. + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Build directory + uses: TheAlgorithms/scripts/directory_md@main + with: + language: C-Plus-Plus + working-directory: . + filetypes: .cpp,.hpp,.h + ignored-directories: doc/ diff --git a/.vscode/settings.json b/.vscode/settings.json index 67fe06477..f6d76514f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -59,6 +59,33 @@ "stdexcept": "cpp", "streambuf": "cpp", "typeinfo": "cpp", - "valarray": "cpp" + "valarray": "cpp", + "bit": "cpp", + "charconv": "cpp", + "compare": "cpp", + "concepts": "cpp", + "format": "cpp", + "forward_list": "cpp", + "ios": "cpp", + "locale": "cpp", + "queue": "cpp", + "stack": "cpp", + "xfacet": "cpp", + "xhash": "cpp", + "xiosbase": "cpp", + "xlocale": "cpp", + "xlocbuf": "cpp", + "xlocinfo": "cpp", + "xlocmes": "cpp", + "xlocmon": "cpp", + "xlocnum": "cpp", + "xloctime": "cpp", + "xmemory": "cpp", + "xstddef": "cpp", + "xstring": "cpp", + "xtr1common": "cpp", + "xtree": "cpp", + "xutility": "cpp", + "climits": "cpp" } } diff --git a/CMakeLists.txt b/CMakeLists.txt index 38a3ecf70..245615de4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ add_subdirectory(graph) add_subdirectory(divide_and_conquer) add_subdirectory(games) add_subdirectory(cpu_scheduling_algorithms) +add_subdirectory(physics) cmake_policy(SET CMP0054 NEW) cmake_policy(SET CMP0057 NEW) diff --git a/DIRECTORY.md b/DIRECTORY.md index e15d1a07f..91fd3ef28 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -171,6 +171,7 @@ * [Md5](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/md5.cpp) * [Quadratic Probing Hash Table](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/quadratic_probing_hash_table.cpp) * [Sha1](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/sha1.cpp) + * [Sha256](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/hashing/sha256.cpp) ## Machine Learning * [A Star Search](https://github.com/TheAlgorithms/C-Plus-Plus/blob/HEAD/machine_learning/a_star_search.cpp) diff --git a/README.md b/README.md index 03093277b..5ad49b184 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This repository is a collection of open-source implementation of a variety of al * The repository provides implementations of various algorithms in one of the most fundamental general purpose languages - [C++](https://en.wikipedia.org/wiki/C%2B%2B). * Well documented source code with detailed explanations provide a valuable resource for educators and students alike. * Each source code is atomic using [STL classes](https://en.wikipedia.org/wiki/Standard_Template_Library) and _no external libraries_ are required for their compilation and execution. Thus, the fundamentals of the algorithms can be studied in much depth. -* Source codes are [compiled and tested](https://github.com/TheAlgorithms/C-Plus-Plus/actions?query=workflow%3A%22Awesome+CI+Workflow%22) for every commit on the latest versions of three major operating systems viz., Windows, MacOS and Ubuntu (Linux) using MSVC 16 2019, AppleClang 11.0 and GNU 7.5.0 respectively. +* Source codes are [compiled and tested](https://github.com/TheAlgorithms/C-Plus-Plus/actions?query=workflow%3A%22Awesome+CI+Workflow%22) for every commit on the latest versions of three major operating systems viz., Windows, MacOS, and Ubuntu (Linux) using MSVC 19 2022, AppleClang 14.0.0, and GNU 11.3.0 respectively. * Strict adherence to [C++11](https://en.wikipedia.org/wiki/C%2B%2B11) standard ensures portability of code to embedded systems as well like ESP32, ARM Cortex, etc. with little to no changes. * Self-checks within programs ensure correct implementations with confidence. * Modular implementations and OpenSource licensing enable the functions to be utilized conveniently in other applications. diff --git a/hashing/sha256.cpp b/hashing/sha256.cpp new file mode 100644 index 000000000..0eae0bd36 --- /dev/null +++ b/hashing/sha256.cpp @@ -0,0 +1,329 @@ +/** + * @file + * @author [Md. Anisul Haque](https://github.com/mdanisulh) + * @brief Simple C++ implementation of the [SHA-256 Hashing Algorithm] + * (https://en.wikipedia.org/wiki/SHA-2) + * + * @details + * [SHA-2](https://en.wikipedia.org/wiki/SHA-2) is a set of cryptographic hash + * functions that was designed by the + * [NSA](https://en.wikipedia.org/wiki/National_Security_Agency) and first + * published in 2001. SHA-256 is a part of the SHA-2 family. SHA-256 is widely + * used for authenticating software packages and secure password hashing. + */ + +#include /// For std::array +#include /// For assert +#include /// For uint8_t, uint32_t and uint64_t data types +#include /// For std::setfill and std::setw +#include /// For IO operations +#include /// For std::stringstream +#include /// For std::move +#include /// For std::vector + +/** + * @namespace hashing + * @brief Hashing algorithms + */ +namespace hashing { +/** + * @namespace SHA-256 + * @brief Functions for the [SHA-256](https://en.wikipedia.org/wiki/SHA-2) + * algorithm implementation + */ +namespace sha256 { +/** + * @class Hash + * @brief Contains hash array and functions to update it and convert it to a + * hexadecimal string + */ +class Hash { + // Initialize array of hash values with first 32 bits of the fractional + // parts of the square roots of the first 8 primes 2..19 + std::array hash = {0x6A09E667, 0xBB67AE85, 0x3C6EF372, + 0xA54FF53A, 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19}; + + public: + void update(const std::array &blocks); + std::string to_string() const; +}; + +/** + * @brief Rotates the bits of a 32-bit unsigned integer + * @param n Integer to rotate + * @param rotate Number of bits to rotate + * @return uint32_t The rotated integer + */ +uint32_t right_rotate(uint32_t n, size_t rotate) { + return (n >> rotate) | (n << (32 - rotate)); +} + +/** + * @brief Updates the hash array + * @param blocks Message schedule array + * @return void + */ +void Hash::update(const std::array &blocks) { + // Initialize array of round constants with first 32 bits of the fractional + // parts of the cube roots of the first 64 primes 2..311 + const std::array round_constants = { + 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5, 0x3956C25B, 0x59F111F1, + 0x923F82A4, 0xAB1C5ED5, 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174, 0xE49B69C1, 0xEFBE4786, + 0x0FC19DC6, 0x240CA1CC, 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7, 0xC6E00BF3, 0xD5A79147, + 0x06CA6351, 0x14292967, 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85, 0xA2BFE8A1, 0xA81A664B, + 0xC24B8B70, 0xC76C51A3, 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5, 0x391C0CB3, 0x4ED8AA4A, + 0x5B9CCA4F, 0x682E6FF3, 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2}; + + // Initialize working variables + auto a = hash[0]; + auto b = hash[1]; + auto c = hash[2]; + auto d = hash[3]; + auto e = hash[4]; + auto f = hash[5]; + auto g = hash[6]; + auto h = hash[7]; + + // Compression function main loop + for (size_t block_num = 0; block_num < 64; ++block_num) { + const auto s1 = + right_rotate(e, 6) ^ right_rotate(e, 11) ^ right_rotate(e, 25); + const auto ch = (e & f) ^ (~e & g); + const auto temp1 = + h + s1 + ch + round_constants[block_num] + blocks[block_num]; + const auto s0 = + right_rotate(a, 2) ^ right_rotate(a, 13) ^ right_rotate(a, 22); + const auto maj = (a & b) ^ (a & c) ^ (b & c); + const auto temp2 = s0 + maj; + + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + // Update hash values + hash[0] += a; + hash[1] += b; + hash[2] += c; + hash[3] += d; + hash[4] += e; + hash[5] += f; + hash[6] += g; + hash[7] += h; +} + +/** + * @brief Convert the hash to a hexadecimal string + * @return std::string Final hash value + */ +std::string Hash::to_string() const { + std::stringstream ss; + for (size_t i = 0; i < 8; ++i) { + ss << std::hex << std::setfill('0') << std::setw(8) << hash[i]; + } + return ss.str(); +} + +/** + * @brief Computes size of the padded input + * @param input Input string + * @return size_t Size of the padded input + */ +std::size_t compute_padded_size(const std::size_t input_size) { + if (input_size % 64 < 56) { + return input_size + 64 - (input_size % 64); + } + return input_size + 128 - (input_size % 64); +} + +/** + * @brief Returns the byte at position byte_num in in_value + * @param in_value Input value + * @param byte_num Position of byte to be returned + * @return uint8_t Byte at position byte_num + */ +template +uint8_t extract_byte(const T in_value, const std::size_t byte_num) { + if (sizeof(in_value) <= byte_num) { + throw std::out_of_range("Byte at index byte_num does not exist"); + } + return (in_value >> (byte_num * 8)) & 0xFF; +} + +/** + * @brief Returns the character at pos after the input is padded + * @param input Input string + * @param pos Position of character to be returned + * @return char Character at the index pos in the padded string + */ +char get_char(const std::string &input, std::size_t pos) { + const auto input_size = input.length(); + if (pos < input_size) { + return input[pos]; + } + if (pos == input_size) { + return '\x80'; + } + const auto padded_input_size = compute_padded_size(input_size); + if (pos < padded_input_size - 8) { + return '\x00'; + } + if (padded_input_size <= pos) { + throw std::out_of_range("pos is out of range"); + } + return static_cast( + extract_byte(input_size * 8, padded_input_size - pos - 1)); +} + +/** + * @brief Creates the message schedule array + * @param input Input string + * @param byte_num Position of the first byte of the chunk + * @return std::array Message schedule array + */ +std::array create_message_schedule_array(const std::string &input, + const size_t byte_num) { + std::array blocks{}; + + // Copy chunk into first 16 words of the message schedule array + for (size_t block_num = 0; block_num < 16; ++block_num) { + blocks[block_num] = + (static_cast(get_char(input, byte_num + block_num * 4)) + << 24) | + (static_cast(get_char(input, byte_num + block_num * 4 + 1)) + << 16) | + (static_cast(get_char(input, byte_num + block_num * 4 + 2)) + << 8) | + static_cast(get_char(input, byte_num + block_num * 4 + 3)); + } + + // Extend the first 16 words into remaining 48 words of the message schedule + // array + for (size_t block_num = 16; block_num < 64; ++block_num) { + const auto s0 = right_rotate(blocks[block_num - 15], 7) ^ + right_rotate(blocks[block_num - 15], 18) ^ + (blocks[block_num - 15] >> 3); + const auto s1 = right_rotate(blocks[block_num - 2], 17) ^ + right_rotate(blocks[block_num - 2], 19) ^ + (blocks[block_num - 2] >> 10); + blocks[block_num] = + blocks[block_num - 16] + s0 + blocks[block_num - 7] + s1; + } + + return blocks; +} + +/** + * @brief Computes the final hash value + * @param input Input string + * @return std::string The final hash value + */ +std::string sha256(const std::string &input) { + Hash h; + // Process message in successive 512-bit (64-byte) chunks + for (size_t byte_num = 0; byte_num < compute_padded_size(input.length()); + byte_num += 64) { + h.update(create_message_schedule_array(input, byte_num)); + } + return h.to_string(); +} +} // namespace sha256 +} // namespace hashing + +/** + * @brief Self-test implementations + * @returns void + */ +static void test_compute_padded_size() { + assert(hashing::sha256::compute_padded_size(55) == 64); + assert(hashing::sha256::compute_padded_size(56) == 128); + assert(hashing::sha256::compute_padded_size(130) == 192); +} + +static void test_extract_byte() { + assert(hashing::sha256::extract_byte(512, 0) == 0); + assert(hashing::sha256::extract_byte(512, 1) == 2); + bool exception = false; + try { + hashing::sha256::extract_byte(512, 5); + } catch (const std::out_of_range &) { + exception = true; + } + assert(exception); +} + +static void test_get_char() { + assert(hashing::sha256::get_char("test", 3) == 't'); + assert(hashing::sha256::get_char("test", 4) == '\x80'); + assert(hashing::sha256::get_char("test", 5) == '\x00'); + assert(hashing::sha256::get_char("test", 63) == 32); + bool exception = false; + try { + hashing::sha256::get_char("test", 64); + } catch (const std::out_of_range &) { + exception = true; + } + assert(exception); +} + +static void test_right_rotate() { + assert(hashing::sha256::right_rotate(128, 3) == 16); + assert(hashing::sha256::right_rotate(1, 30) == 4); + assert(hashing::sha256::right_rotate(6, 30) == 24); +} + +static void test_sha256() { + struct TestCase { + const std::string input; + const std::string expected_hash; + TestCase(std::string input, std::string expected_hash) + : input(std::move(input)), + expected_hash(std::move(expected_hash)) {} + }; + const std::vector test_cases{ + TestCase( + "", + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), + TestCase( + "test", + "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"), + TestCase( + "Hello World", + "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e"), + TestCase("Hello World!", + "7f83b1657ff1fc53b92dc18148a1d65dfc2d4b1fa3d677284addd200126d9" + "069")}; + for (const auto &tc : test_cases) { + assert(hashing::sha256::sha256(tc.input) == tc.expected_hash); + } +} + +static void test() { + test_compute_padded_size(); + test_extract_byte(); + test_get_char(); + test_right_rotate(); + test_sha256(); + + std::cout << "All tests have successfully passed!\n"; +} + +/** + * @brief Main function + * @returns 0 on exit + */ +int main() { + test(); // Run self-test implementations + return 0; +} diff --git a/math/check_factorial.cpp b/math/check_factorial.cpp index 2170b81a0..5573f4f83 100644 --- a/math/check_factorial.cpp +++ b/math/check_factorial.cpp @@ -1,64 +1,73 @@ /** * @file - * @brief A simple program to check if the given number is a factorial of some + * @brief A simple program to check if the given number is a [factorial](https://en.wikipedia.org/wiki/Factorial) of some * number or not. + * + * @details A factorial number is the sum of k! where any value of k is a + * positive integer. https://www.mathsisfun.com/numbers/factorial.html + * * @author [Divyajyoti Ukirde](https://github.com/divyajyotiuk) + * @author [ewd00010](https://github.com/ewd00010) */ -#include -#include +#include /// for assert +#include /// for cout /** - * Function to check if the given number is factorial of some number or not. - * @param n number to be checked. - * @return if number is a factorial, returns true, else false. + * @namespace + * @brief Mathematical algorithms + */ +namespace math { +/** + * @brief Function to check if the given number is factorial of some number or + * not. + * @param n number to be checked. + * @return true if number is a factorial returns true + * @return false if number is not a factorial */ - bool is_factorial(uint64_t n) { - if (n <= 0) { + if (n <= 0) { // factorial numbers are only ever positive Integers return false; } - for (uint32_t i = 1;; i++) { - if (n % i != 0) { - break; - } - n = n / i; - } - if (n == 1) { - return true; - } else { - return false; - } -} -/** Test function + /*! + * this loop is basically a reverse factorial calculation, where instead + * of multiplying we are dividing. We start at i = 2 since i = 1 has + * no impact division wise + */ + int i = 2; + while (n % i == 0) { + n = n / i; + i++; + } + + /*! + * if n was the sum of a factorial then it should be divided until it + * becomes 1 + */ + return (n == 1); +} +} // namespace math + +/** + * @brief Self-test implementations * @returns void */ -void tests() { - std::cout << "Test 1:\t n=50\n"; - assert(is_factorial(50) == false); - std::cout << "passed\n"; +static void tests() { + assert(math::is_factorial(50) == false); + assert(math::is_factorial(720) == true); + assert(math::is_factorial(0) == false); + assert(math::is_factorial(1) == true); + assert(math::is_factorial(479001600) == true); + assert(math::is_factorial(-24) == false); - std::cout << "Test 2:\t n=720\n"; - assert(is_factorial(720) == true); - std::cout << "passed\n"; - - std::cout << "Test 3:\t n=0\n"; - assert(is_factorial(0) == false); - std::cout << "passed\n"; - - std::cout << "Test 4:\t n=479001600\n"; - assert(is_factorial(479001600) == true); - std::cout << "passed\n"; - - std::cout << "Test 5:\t n=-24\n"; - assert(is_factorial(-24) == false); - std::cout << "passed\n"; + std::cout << "All tests have successfully passed!" << std::endl; } -/** Main function +/** + * @brief Main function * @returns 0 on exit */ int main() { - tests(); + tests(); // run self-test implementations return 0; } diff --git a/math/check_prime.cpp b/math/check_prime.cpp index a7b313551..a05bd8517 100644 --- a/math/check_prime.cpp +++ b/math/check_prime.cpp @@ -1,62 +1,83 @@ /** - * Copyright 2020 @author omkarlanghe - * * @file - * A simple program to check if the given number if prime or not. - * * @brief - * Reduced all possibilities of a number which cannot be prime. - * Eg: No even number, except 2 can be a prime number, hence we will increment - * our loop with i+6 jumping and check for i or i+2 to be a factor of the - * number; if it's a factor then we will return false otherwise true after the - * loop terminates at the terminating condition which is (i*i<=num) + * A simple program to check if the given number is [Prime](https://en.wikipedia.org/wiki/Primality_test) or not. + * @details + * A prime number is any number that can be divided only by itself and 1. It must + * be positive and a whole number, therefore any prime number is part of the + * set of natural numbers. The majority of prime numbers are even numbers, with + * the exception of 2. This algorithm finds prime numbers using this information. + * additional ways to solve the prime check problem: + * https://cp-algorithms.com/algebra/primality_tests.html#practice-problems + * @author [Omkar Langhe](https://github.com/omkarlanghe) + * @author [ewd00010](https://github.com/ewd00010) */ #include /// for assert #include /// for IO operations /** - * Function to check if the given number is prime or not. - * @param num number to be checked. - * @return if number is prime, it returns @ true, else it returns @ false. + * @brief Mathematical algorithms + * @namespace */ -template -bool is_prime(T num) { - bool result = true; - if (num <= 1) { - return false; - } else if (num == 2 || num == 3) { - return true; - } else if ((num % 2) == 0 || num % 3 == 0) { - return false; - } else { - for (T i = 5; (i * i) <= (num); i = (i + 6)) { - if ((num % i) == 0 || (num % (i + 2) == 0)) { - result = false; - break; +namespace math { + /** + * @brief Function to check if the given number is prime or not. + * @param num number to be checked. + * @return true if number is a prime + * @return false if number is not a prime. + */ + bool is_prime(int64_t num) { + /*! + * Reduce all possibilities of a number which cannot be prime with the first + * 3 if, else if conditionals. Example: Since no even number, except 2 can + * be a prime number and the next prime we find after our checks is 5, + * we will start the for loop with i = 5. then for each loop we increment + * i by +6 and check if i or i+2 is a factor of the number; if it's a factor + * then we will return false. otherwise, true will be returned after the + * loop terminates at the terminating condition which is i*i <= num + */ + if (num <= 1) { + return false; + } else if (num == 2 || num == 3) { + return true; + } else if (num % 2 == 0 || num % 3 == 0) { + return false; + } else { + for (int64_t i = 5; i * i <= num; i = i + 6) { + if (num % i == 0 || num % (i + 2) == 0) { + return false; + } } } + return true; } - return (result); +} // namespace math + +/** + * @brief Self-test implementations + * @returns void + */ +static void tests() { + assert(math::is_prime(1) == false); + assert(math::is_prime(2) == true); + assert(math::is_prime(3) == true); + assert(math::is_prime(4) == false); + assert(math::is_prime(-4) == false); + assert(math::is_prime(7) == true); + assert(math::is_prime(-7) == false); + assert(math::is_prime(19) == true); + assert(math::is_prime(50) == false); + assert(math::is_prime(115249) == true); + + std::cout << "All tests have successfully passed!" << std::endl; } /** - * Main function + * @brief Main function + * @returns 0 on exit */ int main() { - // perform self-test - assert(is_prime(50) == false); - assert(is_prime(115249) == true); - - int num = 0; - std::cout << "Enter the number to check if it is prime or not" << std::endl; - std::cin >> num; - bool result = is_prime(num); - if (result) { - std::cout << num << " is a prime number" << std::endl; - } else { - std::cout << num << " is not a prime number" << std::endl; - } - + tests(); // perform self-tests implementations return 0; } diff --git a/physics/CMakeLists.txt b/physics/CMakeLists.txt new file mode 100644 index 000000000..4d33e8eec --- /dev/null +++ b/physics/CMakeLists.txt @@ -0,0 +1,16 @@ +# If necessary, use the RELATIVE flag, otherwise each source file may be listed +# with full pathname. The RELATIVE flag makes it easier to extract an executable's name +# automatically. + +file( GLOB APP_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp ) +foreach( testsourcefile ${APP_SOURCES} ) + string( REPLACE ".cpp" "" testname ${testsourcefile} ) # File type. Example: `.cpp` + add_executable( ${testname} ${testsourcefile} ) + + set_target_properties(${testname} PROPERTIES LINKER_LANGUAGE CXX) + if(OpenMP_CXX_FOUND) + target_link_libraries(${testname} OpenMP::OpenMP_CXX) + endif() + install(TARGETS ${testname} DESTINATION "bin/physics") # Folder name. Do NOT include `<>` + +endforeach( testsourcefile ${APP_SOURCES} ) diff --git a/strings/boyer_moore.cpp b/strings/boyer_moore.cpp new file mode 100644 index 000000000..a8c4cbf8d --- /dev/null +++ b/strings/boyer_moore.cpp @@ -0,0 +1,270 @@ +/** + * @file + * @brief + * The [Boyer–Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm) algorithm searches for occurrences of pattern P in text T by + * performing explicit character comparisons at different alignments. Instead of + * a brute-force search of all alignments (of which there are n - m + 1), + * Boyer–Moore uses information gained by preprocessing P to skip as many + * alignments as possible. + * + * @details + * The key insight in this algorithm is that if the end of the pattern is + * compared to the text, then jumps along the text can be made rather than + * checking every character of the text. The reason that this works is that in + * lining up the pattern against the text, the last character of the pattern is + * compared to the character in the text. + * + * If the characters do not match, there is no need to continue searching + * backwards along the text. This leaves us with two cases. + * + * Case 1: + * If the character in the text does not match any of the characters in the + * pattern, then the next character in the text to check is located m characters + * farther along the text, where m is the length of the pattern. + * + * Case 2: + * If the character in the text is in the pattern, then a partial shift of the + * pattern along the text is done to line up along the matching character and + * the process is repeated. + * + * There are two shift rules: + * + * [The bad character rule] + * (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_bad_character_rule) + * + * [The good suffix rule] + * (https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#The_good_suffix_rule) + * + * The shift rules are implemented as constant-time table lookups, using tables + * generated during the preprocessing of P. + * @author [Stoycho Kyosev](https://github.com/stoychoX) + */ + +#include /// for assert +#include /// for CHAR_MAX macro +#include /// for strlen +#include /// for IO operations +#include /// for std::string +#include /// for std::vector + +#define APLHABET_SIZE CHAR_MAX ///< number of symbols in the alphabet we use + +/** + * @namespace + * @brief String algorithms + */ +namespace strings { +/** + * @namespace + * @brief Functions for the [Boyer + * Moore](https://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm) + * algorithm implementation + */ +namespace boyer_moore { +/** + * @brief A structure representing all the data we need to search the + * preprocessed pattern in text. + */ +struct pattern { + std::string pat; + + std::vector + bad_char; ///< bad char table used in [Bad Character + ///< Heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-for-pattern-searching/) + + std::vector + good_suffix; ///< good suffix table used for [Good Suffix + ///< heuristic](https://www.geeksforgeeks.org/boyer-moore-algorithm-good-suffix-heuristic/?ref=rp) +}; + +/** + * @brief A function that preprocess the good suffix thable + * + * @param str The string being preprocessed + * @param arg The good suffix table + * @returns void + */ +void init_good_suffix(const std::string& str, std::vector& arg) { + arg.resize(str.size() + 1, 0); + + // border_pos[i] - the index of the longest proper suffix of str[i..] which + // is also a proper prefix. + std::vector border_pos(str.size() + 1, 0); + + size_t current_char = str.length(); + + size_t border_index = str.length() + 1; + + border_pos[current_char] = border_index; + + while (current_char > 0) { + while (border_index <= str.length() && + str[current_char - 1] != str[border_index - 1]) { + if (arg[border_index] == 0) { + arg[border_index] = border_index - current_char; + } + + border_index = border_pos[border_index]; + } + + current_char--; + border_index--; + border_pos[current_char] = border_index; + } + + size_t largest_border_index = border_pos[0]; + + for (size_t i = 0; i < str.size(); i++) { + if (arg[i] == 0) { + arg[i] = largest_border_index; + } + + // If we go pass the largest border we find the next one as we iterate + if (i == largest_border_index) { + largest_border_index = border_pos[largest_border_index]; + } + } +} + +/** + * @brief A function that preprocess the bad char table + * + * @param str The string being preprocessed + * @param arg The bad char table + * @returns void + */ +void init_bad_char(const std::string& str, std::vector& arg) { + arg.resize(APLHABET_SIZE, str.length()); + + for (size_t i = 0; i < str.length(); i++) { + arg[str[i]] = str.length() - i - 1; + } +} + +/** + * @brief A function that initializes pattern + * + * @param str Text used for initialization + * @param arg Initialized structure + * @returns void + */ +void init_pattern(const std::string& str, pattern& arg) { + arg.pat = str; + init_bad_char(str, arg.bad_char); + init_good_suffix(str, arg.good_suffix); +} +/** + * @brief A function that implements Boyer-Moore's algorithm. + * + * @param str Text we are seatching in. + * @param arg pattern structure containing the preprocessed pattern + * @return Vector of indexes of the occurrences of pattern in text + */ +std::vector search(const std::string& str, const pattern& arg) { + size_t index_position = arg.pat.size() - 1; + std::vector index_storage; + + while (index_position < str.length()) { + size_t index_string = index_position; + int index_pattern = static_cast(arg.pat.size()) - 1; + + while (index_pattern >= 0 && + str[index_string] == arg.pat[index_pattern]) { + --index_pattern; + --index_string; + } + + if (index_pattern < 0) { + index_storage.push_back(index_position - arg.pat.length() + 1); + index_position += arg.good_suffix[0]; + } else { + index_position += std::max(arg.bad_char[str[index_string]], + arg.good_suffix[index_pattern + 1]); + } + } + + return index_storage; +} + +/** + * @brief Check if pat is prefix of str. + * + * @param str pointer to some part of the input text. + * @param pat the searched pattern. + * @param len length of the searched pattern + * @returns `true` if pat IS prefix of str. + * @returns `false` if pat is NOT a prefix of str. + */ +bool is_prefix(const char* str, const char* pat, size_t len) { + if (strlen(str) < len) { + return false; + } + + for (size_t i = 0; i < len; i++) { + if (str[i] != pat[i]) { + return false; + } + } + + return true; +} +} // namespace boyer_moore +} // namespace strings +/** + * @brief A test case in which we search for every appearance of the word 'and' + * @param text The text in which we search for appearance of the word 'and' + * @returns void + */ +void and_test(const char* text) { + strings::boyer_moore::pattern ands; + strings::boyer_moore::init_pattern("and", ands); + std::vector indexes = strings::boyer_moore::search(text, ands); + + assert(indexes.size() == 2); + assert(strings::boyer_moore::is_prefix(text + indexes[0], "and", 3)); + assert(strings::boyer_moore::is_prefix(text + indexes[1], "and", 3)); +} + +/** + * @brief A test case in which we search for every appearance of the word 'pat' + * @param text The text in which we search for appearance of the word 'pat' + * @returns void + */ +void pat_test(const char* text) { + strings::boyer_moore::pattern pat; + strings::boyer_moore::init_pattern("pat", pat); + std::vector indexes = strings::boyer_moore::search(text, pat); + + assert(indexes.size() == 6); + + for (const auto& currentIndex : indexes) { + assert(strings::boyer_moore::is_prefix(text + currentIndex, "pat", 3)); + } +} +/** + * @brief Self-test implementations + * @returns void + */ +static void tests() { + const char* text = + "When pat Mr. and Mrs. pat Dursley woke up on the dull, gray \ + Tuesday our story starts, \ + there was nothing about pat the cloudy sky outside to pat suggest that\ + strange and \ + mysterious things would pat soon be happening all pat over the \ + country."; + + and_test(text); + pat_test(text); + + std::cout << "All tests have successfully passed!\n"; +} + +/** + * @brief Main function + * @returns 0 on exit + */ +int main() { + tests(); // run self-test implementations + return 0; +}