From 85721be69b4011d4feda665c03147525bab67b7a Mon Sep 17 00:00:00 2001 From: Anuran Roy <76481787+anuran-roy@users.noreply.github.com> Date: Fri, 15 Oct 2021 00:04:55 +0530 Subject: [PATCH] feat: Modify search/text_search.cpp (#1662) * Modified search/text_search.cpp * Added tests * Added a few test cases * Added a few more test cases and documentation * Minor fix * Minor fixes * Minor fixes * Minor output fixes * Minor output fixes * Minor readability fixes * clang-format and clang-tidy fixes for a01765a6 * Restored original settings * clang-format and clang-tidy fixes for 6a8f3a4e Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: David Leal --- data_structures/stack_using_linked_list.cpp | 24 ++-- dynamic_programming/armstrong_number.cpp | 26 ++-- probability/geometric_dist.cpp | 128 ++++++++++++-------- probability/windowed_median.cpp | 90 ++++++++------ search/text_search.cpp | 72 +++++++++-- 5 files changed, 217 insertions(+), 123 deletions(-) diff --git a/data_structures/stack_using_linked_list.cpp b/data_structures/stack_using_linked_list.cpp index 202c5c9b6..a87e6efb0 100644 --- a/data_structures/stack_using_linked_list.cpp +++ b/data_structures/stack_using_linked_list.cpp @@ -15,7 +15,7 @@ void push(int x) { } void pop() { - if (top_var == NULL) { + if (top_var == nullptr) { std::cout << "\nUnderflow"; } else { node *t = top_var; @@ -27,14 +27,14 @@ void pop() { void show() { node *t = top_var; - while (t != NULL) { + while (t != nullptr) { std::cout << t->val << "\n"; t = t->next; } } int main() { - int ch, x; + int ch = 0, x = 0; do { std::cout << "\n0. Exit or Ctrl+C"; std::cout << "\n1. Push"; @@ -42,17 +42,23 @@ int main() { std::cout << "\n3. Print"; std::cout << "\nEnter Your Choice: "; std::cin >> ch; - switch(ch){ - case 0: break; - case 1: std::cout << "\nInsert : "; + switch (ch) { + case 0: + break; + case 1: + std::cout << "\nInsert : "; std::cin >> x; push(x); break; - case 2: pop(); + case 2: + pop(); break; - case 3: show(); + case 3: + show(); + break; + default: + std::cout << "Invalid option!\n"; break; - default: std::cout << "Invalid option!\n"; break; } } while (ch != 0); diff --git a/dynamic_programming/armstrong_number.cpp b/dynamic_programming/armstrong_number.cpp index ba8f054dd..53f1be9fe 100644 --- a/dynamic_programming/armstrong_number.cpp +++ b/dynamic_programming/armstrong_number.cpp @@ -1,6 +1,6 @@ // Program to check whether a number is an armstrong number or not -#include #include +#include using std::cin; using std::cout; @@ -8,34 +8,32 @@ int main() { int n = 0, temp = 0, rem = 0, count = 0, sum = 0; cout << "Enter a number: "; cin >> n; - + temp = n; - + /* First Count the number of digits in the given number */ - while(temp != 0) { + while (temp != 0) { temp /= 10; count++; } - /* Calaculation for checking of armstrongs number i.e. + /* Calaculation for checking of armstrongs number i.e. in a n digit number sum of the digits raised to a power of n is equal to the original number */ - + temp = n; - while(temp!=0) { - rem = temp%10; - sum += (int) pow(rem,count); - temp/=10; + while (temp != 0) { + rem = temp % 10; + sum += static_cast(pow(rem, count)); + temp /= 10; } - if (sum == n) { cout << n << " is an armstrong number"; - } - else { + } else { cout << n << " is not an armstrong number"; } - + return 0; } diff --git a/probability/geometric_dist.cpp b/probability/geometric_dist.cpp index 9be1d0e77..a9bc7aeb6 100644 --- a/probability/geometric_dist.cpp +++ b/probability/geometric_dist.cpp @@ -1,13 +1,16 @@ /** * @file - * @brief [Geometric Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) + * @brief [Geometric + * Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) * * @details - * The geometric distribution models the experiment of doing Bernoulli trials until a - * sucess was observed. There are two formulations of the geometric distribution: - * 1) The probability distribution of the number X of Bernoulli trials needed to get one success, supported on the set { 1, 2, 3, ... } - * 2) The probability distribution of the number Y = X − 1 of failures before the first success, supported on the set { 0, 1, 2, 3, ... } - * Here, the first one is implemented. + * The geometric distribution models the experiment of doing Bernoulli trials + * until a sucess was observed. There are two formulations of the geometric + * distribution: 1) The probability distribution of the number X of Bernoulli + * trials needed to get one success, supported on the set { 1, 2, 3, ... } 2) + * The probability distribution of the number Y = X − 1 of failures before the + * first success, supported on the set { 0, 1, 2, 3, ... } Here, the first one + * is implemented. * * Common variables used: * p - The success probability @@ -16,14 +19,14 @@ * @author [Domenic Zingsheim](https://github.com/DerAndereDomenic) */ -#include /// for assert -#include /// for math functions -#include /// for fixed size data types -#include /// for time to initialize rng -#include /// for std::cout -#include /// for std::numeric_limits -#include /// for random numbers -#include /// for std::vector +#include /// for assert +#include /// for math functions +#include /// for fixed size data types +#include /// for time to initialize rng +#include /// for std::cout +#include /// for std::numeric_limits +#include /// for random numbers +#include /// for std::vector /** * @namespace probability @@ -32,12 +35,15 @@ namespace probability { /** * @namespace geometric_dist - * @brief Functions for the [Geometric Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) algorithm implementation + * @brief Functions for the [Geometric + * Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) algorithm + * implementation */ namespace geometric_dist { /** * @brief Returns a random number between [0,1] - * @returns A uniformly distributed random number between 0 (included) and 1 (included) + * @returns A uniformly distributed random number between 0 (included) and 1 + * (included) */ float generate_uniform() { return static_cast(rand()) / static_cast(RAND_MAX); @@ -46,12 +52,11 @@ float generate_uniform() { /** * @brief A class to model the geometric distribution */ -class geometric_distribution -{ -private: - float p; ///< The succes probability p +class geometric_distribution { + private: + float p; ///< The succes probability p -public: + public: /** * @brief Constructor for the geometric distribution * @param p The success probability @@ -59,28 +64,24 @@ public: explicit geometric_distribution(const float& p) : p(p) {} /** - * @brief The expected value of a geometrically distributed random variable X + * @brief The expected value of a geometrically distributed random variable + * X * @returns E[X] = 1/p */ - float expected_value() const { - return 1.0f/ p; - } + float expected_value() const { return 1.0f / p; } /** * @brief The variance of a geometrically distributed random variable X * @returns V[X] = (1 - p) / p^2 */ - float variance() const { - return (1.0f - p) / (p * p); - } + float variance() const { return (1.0f - p) / (p * p); } /** - * @brief The standard deviation of a geometrically distributed random variable X + * @brief The standard deviation of a geometrically distributed random + * variable X * @returns \sigma = \sqrt{V[X]} */ - float standard_deviation() const { - return std::sqrt(variance()); - } + float standard_deviation() const { return std::sqrt(variance()); } /** * @brief The probability density function @@ -95,7 +96,8 @@ public: /** * @brief The cumulative distribution function - * @details The sum of all probabilities up to (and including) k trials. Basically CDF(k) = P(x <= k) + * @details The sum of all probabilities up to (and including) k trials. + * Basically CDF(k) = P(x <= k) * @param k The number of trials in [1,\infty) * @returns The probability to have success within k trials */ @@ -105,8 +107,9 @@ public: /** * @brief The inverse cumulative distribution function - * @details This functions answers the question: Up to how many trials are needed to have success with a probability of cdf? - * The exact floating point value is reported. + * @details This functions answers the question: Up to how many trials are + * needed to have success with a probability of cdf? The exact floating + * point value is reported. * @param cdf The probability in [0,1] * @returns The number of (exact) trials. */ @@ -115,26 +118,37 @@ public: } /** - * @brief Generates a (discrete) sample according to the geometrical distribution + * @brief Generates a (discrete) sample according to the geometrical + * distribution * @returns A geometrically distributed number in [1,\infty) */ uint32_t draw_sample() const { float uniform_sample = generate_uniform(); - return static_cast(inverse_cumulative_distribution(uniform_sample)) + 1; + return static_cast( + inverse_cumulative_distribution(uniform_sample)) + + 1; } /** - * @brief This function computes the probability to have success in a given range of tries + * @brief This function computes the probability to have success in a given + * range of tries * @details Computes P(min_tries <= x <= max_tries). - * Can be used to calculate P(x >= min_tries) by not passing a second argument. - * Can be used to calculate P(x <= max_tries) by passing 1 as the first argument + * Can be used to calculate P(x >= min_tries) by not passing a second + * argument. Can be used to calculate P(x <= max_tries) by passing 1 as the + * first argument * @param min_tries The minimum number of tries in [1,\infty) (inclusive) - * @param max_tries The maximum number of tries in [min_tries, \infty) (inclusive) - * @returns The probability of having success within a range of tries [min_tries, max_tries] + * @param max_tries The maximum number of tries in [min_tries, \infty) + * (inclusive) + * @returns The probability of having success within a range of tries + * [min_tries, max_tries] */ - float range_tries(const uint32_t& min_tries = 1, const uint32_t& max_tries = std::numeric_limits::max()) const { + float range_tries(const uint32_t& min_tries = 1, + const uint32_t& max_tries = + std::numeric_limits::max()) const { float cdf_lower = cumulative_distribution(min_tries - 1); - float cdf_upper = max_tries == std::numeric_limits::max() ? 1.0f : cumulative_distribution(max_tries); + float cdf_upper = max_tries == std::numeric_limits::max() + ? 1.0f + : cumulative_distribution(max_tries); return cdf_upper - cdf_lower; } }; @@ -144,10 +158,12 @@ public: /** * @brief Tests the sampling method of the geometric distribution * @details Draws 1000000 random samples and estimates mean and variance - * These should be close to the expected value and variance of the given distribution to pass. + * These should be close to the expected value and variance of the given + * distribution to pass. * @param dist The distribution to test */ -void sample_test(const probability::geometric_dist::geometric_distribution& dist) { +void sample_test( + const probability::geometric_dist::geometric_distribution& dist) { uint32_t n_tries = 1000000; std::vector tries; tries.resize(n_tries); @@ -165,11 +181,13 @@ void sample_test(const probability::geometric_dist::geometric_distribution& dist var += (tries[i] - mean) * (tries[i] - mean); } - //Unbiased estimate of variance + // Unbiased estimate of variance var /= static_cast(n_tries - 1); - std::cout << "This value should be near " << dist.expected_value() << ": " << mean << std::endl; - std::cout << "This value should be near " << dist.variance() << ": " << var << std::endl; + std::cout << "This value should be near " << dist.expected_value() << ": " + << mean << std::endl; + std::cout << "This value should be near " << dist.variance() << ": " << var + << std::endl; } /** @@ -187,7 +205,9 @@ static void test() { assert(std::abs(dist.standard_deviation() - 2.788866755) < threshold); assert(std::abs(dist.probability_density(5) - 0.07203) < threshold); assert(std::abs(dist.cumulative_distribution(6) - 0.882351) < threshold); - assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold); + assert(std::abs(dist.inverse_cumulative_distribution( + dist.cumulative_distribution(8)) - + 8) < threshold); assert(std::abs(dist.range_tries() - 1.0f) < threshold); assert(std::abs(dist.range_tries(3) - 0.49f) < threshold); assert(std::abs(dist.range_tries(5, 11) - 0.2203267f) < threshold); @@ -202,7 +222,9 @@ static void test() { assert(std::abs(dist.standard_deviation() - 1.4142135f) < threshold); assert(std::abs(dist.probability_density(5) - 0.03125) < threshold); assert(std::abs(dist.cumulative_distribution(6) - 0.984375) < threshold); - assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold); + assert(std::abs(dist.inverse_cumulative_distribution( + dist.cumulative_distribution(8)) - + 8) < threshold); assert(std::abs(dist.range_tries() - 1.0f) < threshold); assert(std::abs(dist.range_tries(3) - 0.25f) < threshold); assert(std::abs(dist.range_tries(5, 11) - 0.062011f) < threshold); @@ -217,7 +239,9 @@ static void test() { assert(std::abs(dist.standard_deviation() - 0.559016f) < threshold); assert(std::abs(dist.probability_density(5) - 0.00128) < threshold); assert(std::abs(dist.cumulative_distribution(6) - 0.999936) < threshold); - assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold); + assert(std::abs(dist.inverse_cumulative_distribution( + dist.cumulative_distribution(8)) - + 8) < threshold); assert(std::abs(dist.range_tries() - 1.0f) < threshold); assert(std::abs(dist.range_tries(3) - 0.04f) < threshold); assert(std::abs(dist.range_tries(5, 11) - 0.00159997f) < threshold); diff --git a/probability/windowed_median.cpp b/probability/windowed_median.cpp index 8945b063e..52c70ae24 100644 --- a/probability/windowed_median.cpp +++ b/probability/windowed_median.cpp @@ -4,8 +4,8 @@ * data stream * * @details - * Given a stream of integers, the algorithm calculates the median of a fixed size - * window at the back of the stream. The leading time complexity of this + * Given a stream of integers, the algorithm calculates the median of a fixed + * size window at the back of the stream. The leading time complexity of this * algorithm is O(log(N), and it is inspired by the known algorithm to [find * median from (infinite) data * stream](https://www.tutorialcup.com/interview/algorithm/find-median-from-data-stream.htm), @@ -17,13 +17,13 @@ * pushing and popping. Each new value is pushed to the window back, while a * value from the front of the window is popped. In addition, the algorithm * manages a multi-value binary search tree (BST), implemented by std::multiset. - * For each new value that is inserted into the window, it is also inserted to the - * BST. When a value is popped from the window, it is also erased from the BST. - * Both insertion and erasion to/from the BST are O(logN) in time, with N the - * size of the window. Finally, the algorithm keeps a pointer to the root of the - * BST, and updates its position whenever values are inserted or erased to/from - * BST. The root of the tree is the median! Hence, median retrieval is always - * O(1) + * For each new value that is inserted into the window, it is also inserted to + * the BST. When a value is popped from the window, it is also erased from the + * BST. Both insertion and erasion to/from the BST are O(logN) in time, with N + * the size of the window. Finally, the algorithm keeps a pointer to the root of + * the BST, and updates its position whenever values are inserted or erased + * to/from BST. The root of the tree is the median! Hence, median retrieval is + * always O(1) * * Time complexity: O(logN). Space complexity: O(N). N - size of window * @author [Yaniv Hollander](https://github.com/YanivHollander) @@ -32,8 +32,8 @@ #include /// for std::rand - needed in testing #include /// for std::time - needed in testing #include /// for std::list - used to manage sliding window -#include /// for std::multiset - used to manage multi-value sorted sliding window values -#include /// for std::vector - needed in testing +#include /// for std::multiset - used to manage multi-value sorted sliding window values +#include /// for std::vector - needed in testing /** * @namespace probability @@ -55,7 +55,7 @@ using size_type = Window::size_type; */ class WindowedMedian { const size_type _windowSize; ///< sliding window size - Window _window; ///< a sliding window of values along the stream + Window _window; ///< a sliding window of values along the stream std::multiset _sortedValues; ///< a DS to represent a balanced /// multi-value binary search tree (BST) std::multiset::const_iterator @@ -103,13 +103,14 @@ class WindowedMedian { } /// However, if the erased value is on the right branch or the median - /// itself, and the number of elements is odd, the new median will be the - /// left child of the current one + /// itself, and the number of elements is odd, the new median will be + /// the left child of the current one else if (value >= *_itMedian && sz % 2 != 0) { --_itMedian; // O(1) - traversing one step to the left child } - /// Find the (first) position of the value we want to erase, and erase it + /// Find the (first) position of the value we want to erase, and erase + /// it const auto it = _sortedValues.find(value); // O(logN) _sortedValues.erase(it); // O(logN) } @@ -126,16 +127,16 @@ class WindowedMedian { * @param value New value to insert */ void insert(int value) { - /// Push new value to the back of the sliding window - O(1) _window.push_back(value); insertToSorted(value); // Insert value to the multi-value BST - O(logN) - if (_window.size() > _windowSize) { /// If exceeding size of window, pop - /// from its left side - eraseFromSorted(_window.front()); /// Erase from the multi-value BST - /// the window left side value - _window - .pop_front(); /// Pop the left side value from the window - O(1) + if (_window.size() > _windowSize) { /// If exceeding size of window, + /// pop from its left side + eraseFromSorted( + _window.front()); /// Erase from the multi-value BST + /// the window left side value + _window.pop_front(); /// Pop the left side value from the window - + /// O(1) } } @@ -170,8 +171,8 @@ class WindowedMedian { 0.5f * *next(window.begin(), window.size() / 2 - 1); /// O(N) } }; -} /// namespace windowed_median -} /// namespace probability +} // namespace windowed_median +} // namespace probability /** * @brief Self-test implementations @@ -195,32 +196,41 @@ static void test(const std::vector &vals, int windowSize) { * @returns 0 on exit */ int main(int argc, const char *argv[]) { - /// A few fixed test cases - test({1, 2, 3, 4, 5, 6, 7, 8, 9}, 3); /// Array of sorted values; odd window size - test({9, 8, 7, 6, 5, 4, 3, 2, 1}, 3); /// Array of sorted values - decreasing; odd window size - test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size - test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values - test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one - test({4, 3, 3, -5, -5, 1, 3, 4, 5}, 5); /// Array that includes repeating values including negatives - - /// Array with large values - sum of few pairs exceeds MAX_INT. Window size is even - testing calculation of - /// average median between two middle values + test({1, 2, 3, 4, 5, 6, 7, 8, 9}, + 3); /// Array of sorted values; odd window size + test({9, 8, 7, 6, 5, 4, 3, 2, 1}, + 3); /// Array of sorted values - decreasing; odd window size + test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size + test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values + test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one + test({4, 3, 3, -5, -5, 1, 3, 4, 5}, + 5); /// Array that includes repeating values including negatives + + /// Array with large values - sum of few pairs exceeds MAX_INT. Window size + /// is even - testing calculation of average median between two middle + /// values test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440, - 1115438165, 1784484492, 74243042, 114807987}, 6); - + 1115438165, 1784484492, 74243042, 114807987}, + 6); + /// Random test cases std::srand(static_cast(std::time(nullptr))); std::vector vals; for (int i = 8; i < 100; i++) { - const auto n = 1 + std::rand() / ((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20] - auto windowSize = 1 + std::rand() / ((RAND_MAX + 3u) / 10); /// Window size in the range [3, 10] + const auto n = + 1 + std::rand() / + ((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20] + auto windowSize = + 1 + std::rand() / ((RAND_MAX + 3u) / + 10); /// Window size in the range [3, 10] vals.clear(); vals.reserve(n); for (int i = 0; i < n; i++) { - vals.push_back(rand() - RAND_MAX); /// Random array values (positive/negative) + vals.push_back( + rand() - RAND_MAX); /// Random array values (positive/negative) } - test(vals, windowSize); /// Testing randomized test + test(vals, windowSize); /// Testing randomized test } return 0; } diff --git a/search/text_search.cpp b/search/text_search.cpp index ee66a506a..291b1df85 100644 --- a/search/text_search.cpp +++ b/search/text_search.cpp @@ -2,6 +2,7 @@ * \file * \brief Search for words in a long textual paragraph. */ +#include #include #include #ifdef _MSC_VER @@ -10,9 +11,38 @@ #include #endif -/** Main function +/** + * @brief function to convert a C++ string to lower case + * @param word takes an std::string as input + * @returns std::string + */ +std::string lower(std::string word) { + int length = word.length(); + std::string lc = ""; + + for (int i = 0; i < length; i++) { + lc += tolower(word[i]); + } + + return lc; +} + +/** + * @brief Self-test implementations + * @returns void + */ +static void test() { + assert(lower("abcd").compare("abcd") == 0); + assert(lower("abc").compare("abcd") == -1); + assert(lower("abcd").compare("abc") == 1); +} + +/** + * @brief Main function + * @returns 0 on exit */ int main() { + test(); // run self-test implementations std::string paragraph; std::cout << "Please enter your paragraph: \n"; std::getline(std::cin, paragraph); @@ -23,20 +53,46 @@ int main() { if (paragraph.empty()) { std::cout << "\nThe paragraph is empty" << std::endl; } else { + int ch = 0; while (true) { std::string word; std::cout << "Please enter the word you are searching for: "; std::getline(std::cin, word); - std::cout << "Hello, your word is " << word << "!\n"; - if (paragraph.find(word) == std::string::npos) { - std::cout << word << " does not exist in the sentence" - << std::endl; + std::cout << "Ignore case-sensitive? 1 = Yes, 0 = No" << std::endl; + std::cin >> ch; + if (ch == 1) { + std::string lowerCase = lower( + paragraph); // convert std::string paragraph to lowercase + // and store it in std::string lowerCase + std::string lowerCaseWord = + lower(word); // convert std::string paragraph to lowercase + // and store it in std::string lowerCase + + std::cout << "Hello, your word is " << word << "!\n"; + if (lowerCase.find(lowerCaseWord) == std::string::npos) { + std::cout << word << " does not exist in the sentence" + << std::endl; + } else { + std::cout << "The word " << word + << " is now found at location " + << lowerCase.find(lowerCaseWord) << std::endl + << std::endl; + } } else { - std::cout << "The word " << word << " is now found at location " - << paragraph.find(word) << std::endl - << std::endl; + std::cout << "Hello, your word is " << word << "!\n"; + if (paragraph.find(word) == std::string::npos) { + std::cout << word << " does not exist in the sentence" + << std::endl; + } else { + std::cout << "The word " << word + << " is now found at location " + << paragraph.find(word) << std::endl + << std::endl; + } } + std::cout << "\nPress Ctrl + C to exit the program.\n\n"; std::cin.get(); } } + return 0; }