feat: Modify search/text_search.cpp (#1662)

* Modified search/text_search.cpp

* Added tests

* Added a few test cases

* Added a few more test cases and documentation

* Minor fix

* Minor fixes

* Minor fixes

* Minor output fixes

* Minor output fixes

* Minor readability fixes

* clang-format and clang-tidy fixes for a01765a6

* Restored original settings

* clang-format and clang-tidy fixes for 6a8f3a4e

Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
Co-authored-by: David Leal <halfpacho@gmail.com>
This commit is contained in:
Anuran Roy 2021-10-15 00:04:55 +05:30 committed by GitHub
parent 5a654fb85b
commit 85721be69b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 217 additions and 123 deletions

View File

@ -15,7 +15,7 @@ void push(int x) {
}
void pop() {
if (top_var == NULL) {
if (top_var == nullptr) {
std::cout << "\nUnderflow";
} else {
node *t = top_var;
@ -27,14 +27,14 @@ void pop() {
void show() {
node *t = top_var;
while (t != NULL) {
while (t != nullptr) {
std::cout << t->val << "\n";
t = t->next;
}
}
int main() {
int ch, x;
int ch = 0, x = 0;
do {
std::cout << "\n0. Exit or Ctrl+C";
std::cout << "\n1. Push";
@ -42,17 +42,23 @@ int main() {
std::cout << "\n3. Print";
std::cout << "\nEnter Your Choice: ";
std::cin >> ch;
switch(ch){
case 0: break;
case 1: std::cout << "\nInsert : ";
switch (ch) {
case 0:
break;
case 1:
std::cout << "\nInsert : ";
std::cin >> x;
push(x);
break;
case 2: pop();
case 2:
pop();
break;
case 3: show();
case 3:
show();
break;
default:
std::cout << "Invalid option!\n";
break;
default: std::cout << "Invalid option!\n"; break;
}
} while (ch != 0);

View File

@ -1,6 +1,6 @@
// Program to check whether a number is an armstrong number or not
#include <iostream>
#include <cmath>
#include <iostream>
using std::cin;
using std::cout;
@ -8,34 +8,32 @@ int main() {
int n = 0, temp = 0, rem = 0, count = 0, sum = 0;
cout << "Enter a number: ";
cin >> n;
temp = n;
/* First Count the number of digits
in the given number */
while(temp != 0) {
while (temp != 0) {
temp /= 10;
count++;
}
/* Calaculation for checking of armstrongs number i.e.
/* Calaculation for checking of armstrongs number i.e.
in a n digit number sum of the digits raised to a power of n
is equal to the original number */
temp = n;
while(temp!=0) {
rem = temp%10;
sum += (int) pow(rem,count);
temp/=10;
while (temp != 0) {
rem = temp % 10;
sum += static_cast<int>(pow(rem, count));
temp /= 10;
}
if (sum == n) {
cout << n << " is an armstrong number";
}
else {
} else {
cout << n << " is not an armstrong number";
}
return 0;
}

View File

@ -1,13 +1,16 @@
/**
* @file
* @brief [Geometric Distribution](https://en.wikipedia.org/wiki/Geometric_distribution)
* @brief [Geometric
* Distribution](https://en.wikipedia.org/wiki/Geometric_distribution)
*
* @details
* The geometric distribution models the experiment of doing Bernoulli trials until a
* sucess was observed. There are two formulations of the geometric distribution:
* 1) The probability distribution of the number X of Bernoulli trials needed to get one success, supported on the set { 1, 2, 3, ... }
* 2) The probability distribution of the number Y = X 1 of failures before the first success, supported on the set { 0, 1, 2, 3, ... }
* Here, the first one is implemented.
* The geometric distribution models the experiment of doing Bernoulli trials
* until a sucess was observed. There are two formulations of the geometric
* distribution: 1) The probability distribution of the number X of Bernoulli
* trials needed to get one success, supported on the set { 1, 2, 3, ... } 2)
* The probability distribution of the number Y = X 1 of failures before the
* first success, supported on the set { 0, 1, 2, 3, ... } Here, the first one
* is implemented.
*
* Common variables used:
* p - The success probability
@ -16,14 +19,14 @@
* @author [Domenic Zingsheim](https://github.com/DerAndereDomenic)
*/
#include <cassert> /// for assert
#include <cmath> /// for math functions
#include <cstdint> /// for fixed size data types
#include <ctime> /// for time to initialize rng
#include <iostream> /// for std::cout
#include <limits> /// for std::numeric_limits
#include <random> /// for random numbers
#include <vector> /// for std::vector
#include <cassert> /// for assert
#include <cmath> /// for math functions
#include <cstdint> /// for fixed size data types
#include <ctime> /// for time to initialize rng
#include <iostream> /// for std::cout
#include <limits> /// for std::numeric_limits
#include <random> /// for random numbers
#include <vector> /// for std::vector
/**
* @namespace probability
@ -32,12 +35,15 @@
namespace probability {
/**
* @namespace geometric_dist
* @brief Functions for the [Geometric Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) algorithm implementation
* @brief Functions for the [Geometric
* Distribution](https://en.wikipedia.org/wiki/Geometric_distribution) algorithm
* implementation
*/
namespace geometric_dist {
/**
* @brief Returns a random number between [0,1]
* @returns A uniformly distributed random number between 0 (included) and 1 (included)
* @returns A uniformly distributed random number between 0 (included) and 1
* (included)
*/
float generate_uniform() {
return static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
@ -46,12 +52,11 @@ float generate_uniform() {
/**
* @brief A class to model the geometric distribution
*/
class geometric_distribution
{
private:
float p; ///< The succes probability p
class geometric_distribution {
private:
float p; ///< The succes probability p
public:
public:
/**
* @brief Constructor for the geometric distribution
* @param p The success probability
@ -59,28 +64,24 @@ public:
explicit geometric_distribution(const float& p) : p(p) {}
/**
* @brief The expected value of a geometrically distributed random variable X
* @brief The expected value of a geometrically distributed random variable
* X
* @returns E[X] = 1/p
*/
float expected_value() const {
return 1.0f/ p;
}
float expected_value() const { return 1.0f / p; }
/**
* @brief The variance of a geometrically distributed random variable X
* @returns V[X] = (1 - p) / p^2
*/
float variance() const {
return (1.0f - p) / (p * p);
}
float variance() const { return (1.0f - p) / (p * p); }
/**
* @brief The standard deviation of a geometrically distributed random variable X
* @brief The standard deviation of a geometrically distributed random
* variable X
* @returns \sigma = \sqrt{V[X]}
*/
float standard_deviation() const {
return std::sqrt(variance());
}
float standard_deviation() const { return std::sqrt(variance()); }
/**
* @brief The probability density function
@ -95,7 +96,8 @@ public:
/**
* @brief The cumulative distribution function
* @details The sum of all probabilities up to (and including) k trials. Basically CDF(k) = P(x <= k)
* @details The sum of all probabilities up to (and including) k trials.
* Basically CDF(k) = P(x <= k)
* @param k The number of trials in [1,\infty)
* @returns The probability to have success within k trials
*/
@ -105,8 +107,9 @@ public:
/**
* @brief The inverse cumulative distribution function
* @details This functions answers the question: Up to how many trials are needed to have success with a probability of cdf?
* The exact floating point value is reported.
* @details This functions answers the question: Up to how many trials are
* needed to have success with a probability of cdf? The exact floating
* point value is reported.
* @param cdf The probability in [0,1]
* @returns The number of (exact) trials.
*/
@ -115,26 +118,37 @@ public:
}
/**
* @brief Generates a (discrete) sample according to the geometrical distribution
* @brief Generates a (discrete) sample according to the geometrical
* distribution
* @returns A geometrically distributed number in [1,\infty)
*/
uint32_t draw_sample() const {
float uniform_sample = generate_uniform();
return static_cast<uint32_t>(inverse_cumulative_distribution(uniform_sample)) + 1;
return static_cast<uint32_t>(
inverse_cumulative_distribution(uniform_sample)) +
1;
}
/**
* @brief This function computes the probability to have success in a given range of tries
* @brief This function computes the probability to have success in a given
* range of tries
* @details Computes P(min_tries <= x <= max_tries).
* Can be used to calculate P(x >= min_tries) by not passing a second argument.
* Can be used to calculate P(x <= max_tries) by passing 1 as the first argument
* Can be used to calculate P(x >= min_tries) by not passing a second
* argument. Can be used to calculate P(x <= max_tries) by passing 1 as the
* first argument
* @param min_tries The minimum number of tries in [1,\infty) (inclusive)
* @param max_tries The maximum number of tries in [min_tries, \infty) (inclusive)
* @returns The probability of having success within a range of tries [min_tries, max_tries]
* @param max_tries The maximum number of tries in [min_tries, \infty)
* (inclusive)
* @returns The probability of having success within a range of tries
* [min_tries, max_tries]
*/
float range_tries(const uint32_t& min_tries = 1, const uint32_t& max_tries = std::numeric_limits<uint32_t>::max()) const {
float range_tries(const uint32_t& min_tries = 1,
const uint32_t& max_tries =
std::numeric_limits<uint32_t>::max()) const {
float cdf_lower = cumulative_distribution(min_tries - 1);
float cdf_upper = max_tries == std::numeric_limits<uint32_t>::max() ? 1.0f : cumulative_distribution(max_tries);
float cdf_upper = max_tries == std::numeric_limits<uint32_t>::max()
? 1.0f
: cumulative_distribution(max_tries);
return cdf_upper - cdf_lower;
}
};
@ -144,10 +158,12 @@ public:
/**
* @brief Tests the sampling method of the geometric distribution
* @details Draws 1000000 random samples and estimates mean and variance
* These should be close to the expected value and variance of the given distribution to pass.
* These should be close to the expected value and variance of the given
* distribution to pass.
* @param dist The distribution to test
*/
void sample_test(const probability::geometric_dist::geometric_distribution& dist) {
void sample_test(
const probability::geometric_dist::geometric_distribution& dist) {
uint32_t n_tries = 1000000;
std::vector<float> tries;
tries.resize(n_tries);
@ -165,11 +181,13 @@ void sample_test(const probability::geometric_dist::geometric_distribution& dist
var += (tries[i] - mean) * (tries[i] - mean);
}
//Unbiased estimate of variance
// Unbiased estimate of variance
var /= static_cast<float>(n_tries - 1);
std::cout << "This value should be near " << dist.expected_value() << ": " << mean << std::endl;
std::cout << "This value should be near " << dist.variance() << ": " << var << std::endl;
std::cout << "This value should be near " << dist.expected_value() << ": "
<< mean << std::endl;
std::cout << "This value should be near " << dist.variance() << ": " << var
<< std::endl;
}
/**
@ -187,7 +205,9 @@ static void test() {
assert(std::abs(dist.standard_deviation() - 2.788866755) < threshold);
assert(std::abs(dist.probability_density(5) - 0.07203) < threshold);
assert(std::abs(dist.cumulative_distribution(6) - 0.882351) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(
dist.cumulative_distribution(8)) -
8) < threshold);
assert(std::abs(dist.range_tries() - 1.0f) < threshold);
assert(std::abs(dist.range_tries(3) - 0.49f) < threshold);
assert(std::abs(dist.range_tries(5, 11) - 0.2203267f) < threshold);
@ -202,7 +222,9 @@ static void test() {
assert(std::abs(dist.standard_deviation() - 1.4142135f) < threshold);
assert(std::abs(dist.probability_density(5) - 0.03125) < threshold);
assert(std::abs(dist.cumulative_distribution(6) - 0.984375) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(
dist.cumulative_distribution(8)) -
8) < threshold);
assert(std::abs(dist.range_tries() - 1.0f) < threshold);
assert(std::abs(dist.range_tries(3) - 0.25f) < threshold);
assert(std::abs(dist.range_tries(5, 11) - 0.062011f) < threshold);
@ -217,7 +239,9 @@ static void test() {
assert(std::abs(dist.standard_deviation() - 0.559016f) < threshold);
assert(std::abs(dist.probability_density(5) - 0.00128) < threshold);
assert(std::abs(dist.cumulative_distribution(6) - 0.999936) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(dist.cumulative_distribution(8)) - 8) < threshold);
assert(std::abs(dist.inverse_cumulative_distribution(
dist.cumulative_distribution(8)) -
8) < threshold);
assert(std::abs(dist.range_tries() - 1.0f) < threshold);
assert(std::abs(dist.range_tries(3) - 0.04f) < threshold);
assert(std::abs(dist.range_tries(5, 11) - 0.00159997f) < threshold);

View File

@ -4,8 +4,8 @@
* data stream
*
* @details
* Given a stream of integers, the algorithm calculates the median of a fixed size
* window at the back of the stream. The leading time complexity of this
* Given a stream of integers, the algorithm calculates the median of a fixed
* size window at the back of the stream. The leading time complexity of this
* algorithm is O(log(N), and it is inspired by the known algorithm to [find
* median from (infinite) data
* stream](https://www.tutorialcup.com/interview/algorithm/find-median-from-data-stream.htm),
@ -17,13 +17,13 @@
* pushing and popping. Each new value is pushed to the window back, while a
* value from the front of the window is popped. In addition, the algorithm
* manages a multi-value binary search tree (BST), implemented by std::multiset.
* For each new value that is inserted into the window, it is also inserted to the
* BST. When a value is popped from the window, it is also erased from the BST.
* Both insertion and erasion to/from the BST are O(logN) in time, with N the
* size of the window. Finally, the algorithm keeps a pointer to the root of the
* BST, and updates its position whenever values are inserted or erased to/from
* BST. The root of the tree is the median! Hence, median retrieval is always
* O(1)
* For each new value that is inserted into the window, it is also inserted to
* the BST. When a value is popped from the window, it is also erased from the
* BST. Both insertion and erasion to/from the BST are O(logN) in time, with N
* the size of the window. Finally, the algorithm keeps a pointer to the root of
* the BST, and updates its position whenever values are inserted or erased
* to/from BST. The root of the tree is the median! Hence, median retrieval is
* always O(1)
*
* Time complexity: O(logN). Space complexity: O(N). N - size of window
* @author [Yaniv Hollander](https://github.com/YanivHollander)
@ -32,8 +32,8 @@
#include <cstdlib> /// for std::rand - needed in testing
#include <ctime> /// for std::time - needed in testing
#include <list> /// for std::list - used to manage sliding window
#include <set> /// for std::multiset - used to manage multi-value sorted sliding window values
#include <vector> /// for std::vector - needed in testing
#include <set> /// for std::multiset - used to manage multi-value sorted sliding window values
#include <vector> /// for std::vector - needed in testing
/**
* @namespace probability
@ -55,7 +55,7 @@ using size_type = Window::size_type;
*/
class WindowedMedian {
const size_type _windowSize; ///< sliding window size
Window _window; ///< a sliding window of values along the stream
Window _window; ///< a sliding window of values along the stream
std::multiset<int> _sortedValues; ///< a DS to represent a balanced
/// multi-value binary search tree (BST)
std::multiset<int>::const_iterator
@ -103,13 +103,14 @@ class WindowedMedian {
}
/// However, if the erased value is on the right branch or the median
/// itself, and the number of elements is odd, the new median will be the
/// left child of the current one
/// itself, and the number of elements is odd, the new median will be
/// the left child of the current one
else if (value >= *_itMedian && sz % 2 != 0) {
--_itMedian; // O(1) - traversing one step to the left child
}
/// Find the (first) position of the value we want to erase, and erase it
/// Find the (first) position of the value we want to erase, and erase
/// it
const auto it = _sortedValues.find(value); // O(logN)
_sortedValues.erase(it); // O(logN)
}
@ -126,16 +127,16 @@ class WindowedMedian {
* @param value New value to insert
*/
void insert(int value) {
/// Push new value to the back of the sliding window - O(1)
_window.push_back(value);
insertToSorted(value); // Insert value to the multi-value BST - O(logN)
if (_window.size() > _windowSize) { /// If exceeding size of window, pop
/// from its left side
eraseFromSorted(_window.front()); /// Erase from the multi-value BST
/// the window left side value
_window
.pop_front(); /// Pop the left side value from the window - O(1)
if (_window.size() > _windowSize) { /// If exceeding size of window,
/// pop from its left side
eraseFromSorted(
_window.front()); /// Erase from the multi-value BST
/// the window left side value
_window.pop_front(); /// Pop the left side value from the window -
/// O(1)
}
}
@ -170,8 +171,8 @@ class WindowedMedian {
0.5f * *next(window.begin(), window.size() / 2 - 1); /// O(N)
}
};
} /// namespace windowed_median
} /// namespace probability
} // namespace windowed_median
} // namespace probability
/**
* @brief Self-test implementations
@ -195,32 +196,41 @@ static void test(const std::vector<int> &vals, int windowSize) {
* @returns 0 on exit
*/
int main(int argc, const char *argv[]) {
/// A few fixed test cases
test({1, 2, 3, 4, 5, 6, 7, 8, 9}, 3); /// Array of sorted values; odd window size
test({9, 8, 7, 6, 5, 4, 3, 2, 1}, 3); /// Array of sorted values - decreasing; odd window size
test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size
test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values
test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one
test({4, 3, 3, -5, -5, 1, 3, 4, 5}, 5); /// Array that includes repeating values including negatives
/// Array with large values - sum of few pairs exceeds MAX_INT. Window size is even - testing calculation of
/// average median between two middle values
test({1, 2, 3, 4, 5, 6, 7, 8, 9},
3); /// Array of sorted values; odd window size
test({9, 8, 7, 6, 5, 4, 3, 2, 1},
3); /// Array of sorted values - decreasing; odd window size
test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size
test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values
test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one
test({4, 3, 3, -5, -5, 1, 3, 4, 5},
5); /// Array that includes repeating values including negatives
/// Array with large values - sum of few pairs exceeds MAX_INT. Window size
/// is even - testing calculation of average median between two middle
/// values
test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440,
1115438165, 1784484492, 74243042, 114807987}, 6);
1115438165, 1784484492, 74243042, 114807987},
6);
/// Random test cases
std::srand(static_cast<unsigned int>(std::time(nullptr)));
std::vector<int> vals;
for (int i = 8; i < 100; i++) {
const auto n = 1 + std::rand() / ((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20]
auto windowSize = 1 + std::rand() / ((RAND_MAX + 3u) / 10); /// Window size in the range [3, 10]
const auto n =
1 + std::rand() /
((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20]
auto windowSize =
1 + std::rand() / ((RAND_MAX + 3u) /
10); /// Window size in the range [3, 10]
vals.clear();
vals.reserve(n);
for (int i = 0; i < n; i++) {
vals.push_back(rand() - RAND_MAX); /// Random array values (positive/negative)
vals.push_back(
rand() - RAND_MAX); /// Random array values (positive/negative)
}
test(vals, windowSize); /// Testing randomized test
test(vals, windowSize); /// Testing randomized test
}
return 0;
}

View File

@ -2,6 +2,7 @@
* \file
* \brief Search for words in a long textual paragraph.
*/
#include <cassert>
#include <cstdlib>
#include <iostream>
#ifdef _MSC_VER
@ -10,9 +11,38 @@
#include <cstring>
#endif
/** Main function
/**
* @brief function to convert a C++ string to lower case
* @param word takes an std::string as input
* @returns std::string
*/
std::string lower(std::string word) {
int length = word.length();
std::string lc = "";
for (int i = 0; i < length; i++) {
lc += tolower(word[i]);
}
return lc;
}
/**
* @brief Self-test implementations
* @returns void
*/
static void test() {
assert(lower("abcd").compare("abcd") == 0);
assert(lower("abc").compare("abcd") == -1);
assert(lower("abcd").compare("abc") == 1);
}
/**
* @brief Main function
* @returns 0 on exit
*/
int main() {
test(); // run self-test implementations
std::string paragraph;
std::cout << "Please enter your paragraph: \n";
std::getline(std::cin, paragraph);
@ -23,20 +53,46 @@ int main() {
if (paragraph.empty()) {
std::cout << "\nThe paragraph is empty" << std::endl;
} else {
int ch = 0;
while (true) {
std::string word;
std::cout << "Please enter the word you are searching for: ";
std::getline(std::cin, word);
std::cout << "Hello, your word is " << word << "!\n";
if (paragraph.find(word) == std::string::npos) {
std::cout << word << " does not exist in the sentence"
<< std::endl;
std::cout << "Ignore case-sensitive? 1 = Yes, 0 = No" << std::endl;
std::cin >> ch;
if (ch == 1) {
std::string lowerCase = lower(
paragraph); // convert std::string paragraph to lowercase
// and store it in std::string lowerCase
std::string lowerCaseWord =
lower(word); // convert std::string paragraph to lowercase
// and store it in std::string lowerCase
std::cout << "Hello, your word is " << word << "!\n";
if (lowerCase.find(lowerCaseWord) == std::string::npos) {
std::cout << word << " does not exist in the sentence"
<< std::endl;
} else {
std::cout << "The word " << word
<< " is now found at location "
<< lowerCase.find(lowerCaseWord) << std::endl
<< std::endl;
}
} else {
std::cout << "The word " << word << " is now found at location "
<< paragraph.find(word) << std::endl
<< std::endl;
std::cout << "Hello, your word is " << word << "!\n";
if (paragraph.find(word) == std::string::npos) {
std::cout << word << " does not exist in the sentence"
<< std::endl;
} else {
std::cout << "The word " << word
<< " is now found at location "
<< paragraph.find(word) << std::endl
<< std::endl;
}
}
std::cout << "\nPress Ctrl + C to exit the program.\n\n";
std::cin.get();
}
}
return 0;
}