mirror of
https://hub.njuu.cf/TheAlgorithms/C-Plus-Plus.git
synced 2023-10-11 13:05:55 +08:00
feat: Add the Windowed Median algorithm (#1649)
* Create windowed_median.cpp
* Update windowed_median.cpp
* Update windowed_median.cpp
* updating DIRECTORY.md
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update windowed_median.cpp
* Update windowed_median.cpp
* Update windowed_median.cpp
* Update windowed_median.cpp
* Update windowed_median.cpp
* Update windowed_median.cpp
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* clang-format and clang-tidy fixes for 7634ece0
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update probability/windowed_median.cpp
Co-authored-by: David Leal <halfpacho@gmail.com>
* Update windowed_median.cpp
* Update windowed_median.cpp
Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
Co-authored-by: David Leal <halfpacho@gmail.com>
This commit is contained in:
parent
c05b05463d
commit
7309ef1370
@ -267,6 +267,7 @@
|
|||||||
* [Bayes Theorem](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/bayes_theorem.cpp)
|
* [Bayes Theorem](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/bayes_theorem.cpp)
|
||||||
* [Binomial Dist](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/binomial_dist.cpp)
|
* [Binomial Dist](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/binomial_dist.cpp)
|
||||||
* [Poisson Dist](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/poisson_dist.cpp)
|
* [Poisson Dist](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/poisson_dist.cpp)
|
||||||
|
* [Windowed Median](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/probability/windowed_median.cpp)
|
||||||
|
|
||||||
## Range Queries
|
## Range Queries
|
||||||
* [Fenwick Tree](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/range_queries/fenwick_tree.cpp)
|
* [Fenwick Tree](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/range_queries/fenwick_tree.cpp)
|
||||||
|
226
probability/windowed_median.cpp
Normal file
226
probability/windowed_median.cpp
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
/**
|
||||||
|
* @file
|
||||||
|
* @brief An implementation of a median calculation of a sliding window along a
|
||||||
|
* data stream
|
||||||
|
*
|
||||||
|
* @details
|
||||||
|
* Given a stream of integers, the algorithm calculates the median of a fixed size
|
||||||
|
* window at the back of the stream. The leading time complexity of this
|
||||||
|
* algorithm is O(log(N), and it is inspired by the known algorithm to [find
|
||||||
|
* median from (infinite) data
|
||||||
|
* stream](https://www.tutorialcup.com/interview/algorithm/find-median-from-data-stream.htm),
|
||||||
|
* with the proper modifications to account for the finite window size for which
|
||||||
|
* the median is requested
|
||||||
|
*
|
||||||
|
* ### Algorithm
|
||||||
|
* The sliding window is managed by a list, which guarantees O(1) for both
|
||||||
|
* pushing and popping. Each new value is pushed to the window back, while a
|
||||||
|
* value from the front of the window is popped. In addition, the algorithm
|
||||||
|
* manages a multi-value binary search tree (BST), implemented by std::multiset.
|
||||||
|
* For each new value that is inserted into the window, it is also inserted to the
|
||||||
|
* BST. When a value is popped from the window, it is also erased from the BST.
|
||||||
|
* Both insertion and erasion to/from the BST are O(logN) in time, with N the
|
||||||
|
* size of the window. Finally, the algorithm keeps a pointer to the root of the
|
||||||
|
* BST, and updates its position whenever values are inserted or erased to/from
|
||||||
|
* BST. The root of the tree is the median! Hence, median retrieval is always
|
||||||
|
* O(1)
|
||||||
|
*
|
||||||
|
* Time complexity: O(logN). Space complexity: O(N). N - size of window
|
||||||
|
* @author [Yaniv Hollander](https://github.com/YanivHollander)
|
||||||
|
*/
|
||||||
|
#include <cassert> /// for assert
|
||||||
|
#include <cstdlib> /// for std::rand - needed in testing
|
||||||
|
#include <ctime> /// for std::time - needed in testing
|
||||||
|
#include <list> /// for std::list - used to manage sliding window
|
||||||
|
#include <set> /// for std::multiset - used to manage multi-value sorted sliding window values
|
||||||
|
#include <vector> /// for std::vector - needed in testing
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @namespace probability
|
||||||
|
* @brief Probability algorithms
|
||||||
|
*/
|
||||||
|
namespace probability {
|
||||||
|
/**
|
||||||
|
* @namespace windowed_median
|
||||||
|
* @brief Functions for the Windowed Median algorithm implementation
|
||||||
|
*/
|
||||||
|
namespace windowed_median {
|
||||||
|
using Window = std::list<int>;
|
||||||
|
using size_type = Window::size_type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @class WindowedMedian
|
||||||
|
* @brief A class to calculate the median of a leading sliding window at the
|
||||||
|
* back of a stream of integer values.
|
||||||
|
*/
|
||||||
|
class WindowedMedian {
|
||||||
|
const size_type _windowSize; ///< sliding window size
|
||||||
|
Window _window; ///< a sliding window of values along the stream
|
||||||
|
std::multiset<int> _sortedValues; ///< a DS to represent a balanced
|
||||||
|
/// multi-value binary search tree (BST)
|
||||||
|
std::multiset<int>::const_iterator
|
||||||
|
_itMedian; ///< an iterator that points to the root of the multi-value
|
||||||
|
/// BST
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Inserts a value to a sorted multi-value BST
|
||||||
|
* @param value Value to insert
|
||||||
|
*/
|
||||||
|
void insertToSorted(int value) {
|
||||||
|
_sortedValues.insert(value); /// Insert value to BST - O(logN)
|
||||||
|
const auto sz = _sortedValues.size();
|
||||||
|
if (sz == 1) { /// For the first value, set median iterator to BST root
|
||||||
|
_itMedian = _sortedValues.begin();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If new value goes to left tree branch, and number of elements is
|
||||||
|
/// even, the new median in the balanced tree is the left child of the
|
||||||
|
/// median before the insertion
|
||||||
|
if (value < *_itMedian && sz % 2 == 0) {
|
||||||
|
--_itMedian; // O(1) - traversing one step to the left child
|
||||||
|
}
|
||||||
|
|
||||||
|
/// However, if the new value goes to the right branch, the previous
|
||||||
|
/// median's right child is the new median in the balanced tree
|
||||||
|
else if (value >= *_itMedian && sz % 2 != 0) {
|
||||||
|
++_itMedian; /// O(1) - traversing one step to the right child
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Erases a value from a sorted multi-value BST
|
||||||
|
* @param value Value to insert
|
||||||
|
*/
|
||||||
|
void eraseFromSorted(int value) {
|
||||||
|
const auto sz = _sortedValues.size();
|
||||||
|
|
||||||
|
/// If the erased value is on the left branch or the median itself and
|
||||||
|
/// the number of elements is even, the new median will be the right
|
||||||
|
/// child of the current one
|
||||||
|
if (value <= *_itMedian && sz % 2 == 0) {
|
||||||
|
++_itMedian; /// O(1) - traversing one step to the right child
|
||||||
|
}
|
||||||
|
|
||||||
|
/// However, if the erased value is on the right branch or the median
|
||||||
|
/// itself, and the number of elements is odd, the new median will be the
|
||||||
|
/// left child of the current one
|
||||||
|
else if (value >= *_itMedian && sz % 2 != 0) {
|
||||||
|
--_itMedian; // O(1) - traversing one step to the left child
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the (first) position of the value we want to erase, and erase it
|
||||||
|
const auto it = _sortedValues.find(value); // O(logN)
|
||||||
|
_sortedValues.erase(it); // O(logN)
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Constructs a WindowedMedian object
|
||||||
|
* @param windowSize Sliding window size
|
||||||
|
*/
|
||||||
|
explicit WindowedMedian(size_type windowSize) : _windowSize(windowSize){};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Insert a new value to the stream
|
||||||
|
* @param value New value to insert
|
||||||
|
*/
|
||||||
|
void insert(int value) {
|
||||||
|
|
||||||
|
/// Push new value to the back of the sliding window - O(1)
|
||||||
|
_window.push_back(value);
|
||||||
|
insertToSorted(value); // Insert value to the multi-value BST - O(logN)
|
||||||
|
if (_window.size() > _windowSize) { /// If exceeding size of window, pop
|
||||||
|
/// from its left side
|
||||||
|
eraseFromSorted(_window.front()); /// Erase from the multi-value BST
|
||||||
|
/// the window left side value
|
||||||
|
_window
|
||||||
|
.pop_front(); /// Pop the left side value from the window - O(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Gets the median of the values in the sliding window
|
||||||
|
* @return Median of sliding window. For even window size return the average
|
||||||
|
* between the two values in the middle
|
||||||
|
*/
|
||||||
|
float getMedian() const {
|
||||||
|
if (_sortedValues.size() % 2 != 0) {
|
||||||
|
return *_itMedian; // O(1)
|
||||||
|
}
|
||||||
|
return 0.5f * *_itMedian + 0.5f * *next(_itMedian); /// O(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief A naive and inefficient method to obtain the median of the sliding
|
||||||
|
* window. Used for testing!
|
||||||
|
* @return Median of sliding window. For even window size return the average
|
||||||
|
* between the two values in the middle
|
||||||
|
*/
|
||||||
|
float getMedianNaive() const {
|
||||||
|
auto window = _window;
|
||||||
|
window.sort(); /// Sort window - O(NlogN)
|
||||||
|
auto median =
|
||||||
|
*next(window.begin(),
|
||||||
|
window.size() / 2); /// Find value in the middle - O(N)
|
||||||
|
if (window.size() % 2 != 0) {
|
||||||
|
return median;
|
||||||
|
}
|
||||||
|
return 0.5f * median +
|
||||||
|
0.5f * *next(window.begin(), window.size() / 2 - 1); /// O(N)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} /// namespace windowed_median
|
||||||
|
} /// namespace probability
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Self-test implementations
|
||||||
|
* @param vals Stream of values
|
||||||
|
* @param windowSize Size of sliding window
|
||||||
|
*/
|
||||||
|
static void test(const std::vector<int> &vals, int windowSize) {
|
||||||
|
probability::windowed_median::WindowedMedian windowedMedian(windowSize);
|
||||||
|
for (const auto val : vals) {
|
||||||
|
windowedMedian.insert(val);
|
||||||
|
|
||||||
|
/// Comparing medians: efficient function vs. Naive one
|
||||||
|
assert(windowedMedian.getMedian() == windowedMedian.getMedianNaive());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Main function
|
||||||
|
* @param argc command line argument count (ignored)
|
||||||
|
* @param argv command line array of arguments (ignored)
|
||||||
|
* @returns 0 on exit
|
||||||
|
*/
|
||||||
|
int main(int argc, const char *argv[]) {
|
||||||
|
|
||||||
|
/// A few fixed test cases
|
||||||
|
test({1, 2, 3, 4, 5, 6, 7, 8, 9}, 3); /// Array of sorted values; odd window size
|
||||||
|
test({9, 8, 7, 6, 5, 4, 3, 2, 1}, 3); /// Array of sorted values - decreasing; odd window size
|
||||||
|
test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size
|
||||||
|
test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values
|
||||||
|
test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one
|
||||||
|
test({4, 3, 3, -5, -5, 1, 3, 4, 5}, 5); /// Array that includes repeating values including negatives
|
||||||
|
|
||||||
|
/// Array with large values - sum of few pairs exceeds MAX_INT. Window size is even - testing calculation of
|
||||||
|
/// average median between two middle values
|
||||||
|
test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440,
|
||||||
|
1115438165, 1784484492, 74243042, 114807987}, 6);
|
||||||
|
|
||||||
|
/// Random test cases
|
||||||
|
std::srand(static_cast<unsigned int>(std::time(nullptr)));
|
||||||
|
std::vector<int> vals;
|
||||||
|
for (int i = 8; i < 100; i++) {
|
||||||
|
const auto n = 1 + std::rand() / ((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20]
|
||||||
|
auto windowSize = 1 + std::rand() / ((RAND_MAX + 3u) / 10); /// Window size in the range [3, 10]
|
||||||
|
vals.clear();
|
||||||
|
vals.reserve(n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
vals.push_back(rand() - RAND_MAX); /// Random array values (positive/negative)
|
||||||
|
}
|
||||||
|
test(vals, windowSize); /// Testing randomized test
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user