From 7ea1ca6c9cb802db7b9b43e3aa443ba4755ba622 Mon Sep 17 00:00:00 2001 From: Venkata Bharath Malapati <9440110838bharath@gmail.com> Date: Thu, 18 Mar 2021 14:28:33 -0400 Subject: [PATCH] feat: Trie data structure using hashmap, predict_words method, class Trie (#1464) * feat: add Trie data structure using hashmap, predict_words method, class Trie * updating DIRECTORY.md * fix: fixed clang.tidy warnings * docs: added documentation toclass trie * docs: updated documentation * fix: added namespace trie_using_hashmap * docs: updated documentation with comments * docs: updated documentation with spelling corrections * docs: updated requested changes * docs : updated requested changes in documentation * fix: Apply suggestions from code review Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> Co-authored-by: David Leal --- DIRECTORY.md | 1 + data_structures/trie_using_hashmap.cpp | 345 +++++++++++++++++++++++++ 2 files changed, 346 insertions(+) create mode 100644 data_structures/trie_using_hashmap.cpp diff --git a/DIRECTORY.md b/DIRECTORY.md index 1878fcb41..c046be5c7 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -54,6 +54,7 @@ * [Tree 234](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/data_structures/tree_234.cpp) * [Trie Modern](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/data_structures/trie_modern.cpp) * [Trie Tree](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/data_structures/trie_tree.cpp) + * [Trie Using Hashmap](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/data_structures/trie_using_hashmap.cpp) ## Dynamic Programming * [0 1 Knapsack](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/dynamic_programming/0_1_knapsack.cpp) diff --git a/data_structures/trie_using_hashmap.cpp b/data_structures/trie_using_hashmap.cpp new file mode 100644 index 000000000..5fb9602a9 --- /dev/null +++ b/data_structures/trie_using_hashmap.cpp @@ -0,0 +1,345 @@ +/** + * @file + * @author [Venkata Bharath](https://github.com/bharath000) + * @brief Implementation of [Trie](https://en.wikipedia.org/wiki/Trie) data + * structure using HashMap for different characters and method for predicting + * words based on prefix. + * @details The Trie data structure is implemented using unordered map to use + * memory optimally, predict_words method is developed to recommend words based + * on a given prefix along with other methods insert, delete, search, startwith + * in trie. + * @see trie_modern.cpp for difference + */ +#include /// for assert +#include /// for IO operations +#include /// for std::shared_ptr +#include /// for std::stack +#include /// for std::unordered_map +#include /// for std::vector + +/** + * @namespace data_structures + * @brief Data structures algorithms + */ +namespace data_structures { + +/** + * @namespace trie_using_hashmap + * @brief Functions for [Trie](https://en.wikipedia.org/wiki/Trie) data + * structure using hashmap implementation + */ +namespace trie_using_hashmap { + +/** + * @brief Trie class, implementation of trie using hashmap in each trie node + * for all the characters of char16_t(UTF-16)type with methods to insert, + * delete, search, start with and to recommend words based on a given + * prefix. + */ +class Trie { + private: + /** + * @brief struct representing a trie node. + */ + struct Node { + std::unordered_map> + children; ///< unordered map with key type char16_t and value is a + ///< shared pointer type of Node + bool word_end = false; ///< boolean variable to represent the node end + }; + + std::shared_ptr root_node = + std::make_shared(); ///< declaring root node of trie + + public: + ///< Constructor + Trie() = default; + + /** + * @brief insert the string into the trie + * @param word string to insert in the trie + */ + void insert(const std::string& word) { + std::shared_ptr curr = root_node; + for (char ch : word) { + if (curr->children.find(ch) == curr->children.end()) { + curr->children[ch] = std::make_shared(); + } + curr = curr->children[ch]; + } + + if (!curr->word_end && curr != root_node) { + curr->word_end = true; + } + } + + /** + * @brief search a word/string inside the trie + * @param word string to search for + * @returns `true` if found + * @returns `false` if not found + */ + bool search(const std::string& word) { + std::shared_ptr curr = root_node; + for (char ch : word) { + if (curr->children.find(ch) == curr->children.end()) { + return false; + } + curr = curr->children[ch]; + if (!curr) { + return false; + } + } + + if (curr->word_end) { + return true; + } else { + return false; + } + } + + /** + * @brief search a word/string that starts with a given prefix + * @param prefix string to search for + * @returns `true` if found + * @returns `false` if not found + */ + bool startwith(const std::string& prefix) { + std::shared_ptr curr = root_node; + for (char ch : prefix) { + if (curr->children.find(ch) == curr->children.end()) { + return false; + } + curr = curr->children[ch]; + } + return true; + } + + /** + * @brief delete a word/string from a trie + * @param word string to delete from trie + */ + void delete_word(std::string word) { + std::shared_ptr curr = root_node; + std::stack> nodes; + int cnt = 0; + for (char ch : word) { + if (curr->children.find(ch) == curr->children.end()) { + return; + } + if (curr->word_end) { + cnt++; + } + + nodes.push(curr->children[ch]); + curr = curr->children[ch]; + } + // Delete only when it's a word, and it has children after + // or prefix in the line + if (nodes.top()->word_end) { + nodes.top()->word_end = false; + } + // Delete only when it has no children after + // and also no prefix in the line + while (!(nodes.top()->word_end) && nodes.top()->children.empty()) { + nodes.pop(); + nodes.top()->children.erase(word.back()); + word.pop_back(); + } + } + + /** + * @brief helper function to predict/recommend words that starts with a + * given prefix from the end of prefix's node iterate through all the child + * nodes by recursively appending all the possible words below the trie + * @param prefix string to recommend the words + * @param element node at the end of the given prefix + * @param results list to store the all possible words + * @returns list of recommended words + */ + std::vector get_all_words(std::vector results, + const std::shared_ptr& element, + std::string prefix) { + if (element->word_end) { + results.push_back(prefix); + } + if (element->children.empty()) { + return results; + } + for (auto const& x : element->children) { + std::string key = ""; + key = x.first; + prefix += key; + + results = + get_all_words(results, element->children[x.first], prefix); + + prefix.pop_back(); + } + + return results; + } + + /** + * @brief predict/recommend a word that starts with a given prefix + * @param prefix string to search for + * @returns list of recommended words + */ + std::vector predict_words(const std::string& prefix) { + std::vector result; + std::shared_ptr curr = root_node; + // traversing until the end of the given prefix in trie + + for (char ch : prefix) { + if (curr->children.find(ch) == curr->children.end()) { + return result; + } + + curr = curr->children[ch]; + } + + // if the given prefix is the only word without children + if (curr->word_end && curr->children.empty()) { + result.push_back(prefix); + return result; + } + + result = get_all_words( + result, curr, + prefix); ///< iteratively and recursively get the recommended words + + return result; + } +}; +} // namespace trie_using_hashmap +} // namespace data_structures + +/** + * @brief Self-test implementations + * @returns void + */ +static void test() { + data_structures::trie_using_hashmap::Trie obj; + // Inserting data into trie using the insert + // method and testing it with search method + obj.insert("app"); + obj.insert("abscond"); + obj.insert("about"); + obj.insert("apps"); + obj.insert("apen"); + obj.insert("apples"); + obj.insert("apple"); + obj.insert("approach"); + obj.insert("bus"); + obj.insert("buses"); + obj.insert("Apple"); + obj.insert("Bounce"); + + assert(!obj.search("appy")); + std::cout << "appy is not a word in trie" << std::endl; + + assert(!obj.search("car")); + std::cout << "car is not a word in trie" << std::endl; + assert(obj.search("app")); + assert(obj.search("apple")); + assert(obj.search("apples")); + assert(obj.search("apps")); + assert(obj.search("apen")); + assert(obj.search("approach")); + assert(obj.search("about")); + assert(obj.search("abscond")); + assert(obj.search("bus")); + assert(obj.search("buses")); + assert(obj.search("Bounce")); + assert(obj.search("Apple")); + + std::cout << "All the Inserted words are present in the trie" << std::endl; + + // test for startwith prefix method + assert(!obj.startwith("approachs")); + assert(obj.startwith("approach")); + assert(obj.startwith("about")); + assert(!obj.startwith("appy")); + assert(obj.startwith("abscond")); + assert(obj.startwith("bus")); + assert(obj.startwith("buses")); + assert(obj.startwith("Bounce")); + assert(obj.startwith("Apple")); + assert(obj.startwith("abs")); + assert(obj.startwith("b")); + assert(obj.startwith("bus")); + assert(obj.startwith("Bo")); + assert(obj.startwith("A")); + assert(!obj.startwith("Ca")); + + assert(!obj.startwith("C")); + + std::cout << "All the tests passed for startwith method" << std::endl; + + // test for predict_words/recommendation of words based on a given prefix + + std::vector pred_words = obj.predict_words("a"); + + for (const std::string& str : obj.predict_words("a")) { + std::cout << str << std::endl; + } + assert(pred_words.size() == 8); + std::cout << "Returned all words that start with prefix a " << std::endl; + pred_words = obj.predict_words("app"); + + for (const std::string& str : pred_words) { + std::cout << str << std::endl; + } + + assert(pred_words.size() == 5); + std::cout << "Returned all words that start with prefix app " << std::endl; + pred_words = obj.predict_words("A"); + + for (const std::string& str : pred_words) { + std::cout << str << std::endl; + } + + assert(pred_words.size() == 1); + std::cout << "Returned all words that start with prefix A " << std::endl; + pred_words = obj.predict_words("bu"); + + for (const std::string& str : pred_words) { + std::cout << str << std::endl; + } + + assert(pred_words.size() == 2); + std::cout << "Returned all words that start with prefix bu " << std::endl; + + // tests for delete method + + obj.delete_word("app"); + assert(!obj.search("app")); + std::cout << "word app is deleted sucessful" << std::endl; + + pred_words = obj.predict_words("app"); + for (const std::string& str : pred_words) { + std::cout << str << std::endl; + } + assert(pred_words.size() == 4); + std::cout << "app is deleted sucessful" << std::endl; + + // test case for Chinese language + + obj.insert("苹果"); + assert(obj.startwith("苹")); + pred_words = obj.predict_words("h"); + + assert(pred_words.size() == 0); + std::cout << "No word starts with prefix h in trie" << std::endl; + + std::cout << "All tests passed" << std::endl; +} + +/** + * @brief Main function + * @returns 0 on exit + */ +int main() { + test(); // run self-test implementaions + return 0; +}