/** * @file * @brief [Trie datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/) * with search variants * @details * This provides multiple variants of search functions * on a trie structure utilizing STL. The trie is valid * for only English alphabets. * @author [Ghanashyam](https://github.com/g-s-k-zoro) */ #include /// for std::count #include /// for assert #include /// for tolower #include /// for string operations #include /// for IO Operations #include /// for std::priority_queue /** * @namespace operations_on_datastructures * @brief Operations on data structures */ namespace operations_on_datastructures { /** * @namespace trie_operations * @brief Functions for [Trie datastructure](https://iq.opengenus.org/autocomplete-using-trie-data-structure/) * implementation */ namespace trie_operations { /** * @brief Class defining the structure of trie node and containing the methods * to perform operations on them. */ class Tnode { private: static constexpr uint8_t ENGLISH_ALPHABET_SIZE = 26; // pointers to alphabets std::vector english; // To mark the end of word bool endOfWord; // To store the frequency of searches for the word uint32_t frequency; public: Tnode() { english.resize(ENGLISH_ALPHABET_SIZE, nullptr); endOfWord = false; frequency = 0; } // Copy Constructor Tnode(const Tnode &node) { english = node.english; endOfWord = node.endOfWord; frequency = node.frequency; } Tnode &operator=(const Tnode &node) = default; Tnode(Tnode &&) = default; Tnode &operator=(Tnode &&) = default; /** * @brief Function to count the number of children a node in the trie has * @param node a trie node whose children need to be counted * @return count of the number of children of the given node (max 26) */ inline uint8_t numberOfChildren(Tnode *node) { return ENGLISH_ALPHABET_SIZE - std::count(node->english.begin(), node->english.end(), nullptr); } // Functions to perform operations on trie void Insert(const std::string &entry); void Delete(std::string entry); void DeleteFrom(Tnode *delete_from, std::string delete_string, int remove_index); bool SearchPresence(const std::string &key); void SuggestAutocomplete(Tnode *new_root, const std::string &prefix); void SearchSuggestions(const std::string &key); void SuggestFreqAutocomplete( Tnode *new_root, const std::string &prefix, std::priority_queue > *suggestions); void SearchFreqSuggestions(const std::string &key); void SelectionTop_3( std::priority_queue > *suggestions); // To free up the dynamically allocated objects ~Tnode() { int i = 0; for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { if (english[i]) { delete english[i]; } } } }; /** * @brief Function to insert a word in the trie * @param entry string entry to be inserted in the trie */ void Tnode::Insert(const std::string &entry) { Tnode *cur_pos = this; int letter_index = 0; for (auto &i : entry) { // To ignore case letter_index = tolower(i) - 97; // Allocate a node for each character of entry if not present in the // trie if (cur_pos->english[letter_index] == nullptr) { cur_pos->english[letter_index] = new Tnode(); } cur_pos = cur_pos->english[letter_index]; } // cur_pos points to the last char, mark it as end of word cur_pos->endOfWord = true; } /** * @brief Function recursively deletes the substring character by * character iterating through the string to be deleted. It traverses till the * end of word in a recursive fashion, from there it deletes characters one by * one till it reaches back to the initial call. * @param delete_from the acting root to the required suffix to be deleted * @param delete_string the string to be deleted from the trie * @param remove_index index denoting the beginning of the substring to be * deleted */ void Tnode::DeleteFrom(Tnode *delete_from, std::string delete_string, int remove_index) { if (delete_string.size() == remove_index) { int letter_index = tolower(delete_string[remove_index]) - 97; DeleteFrom(delete_from->english[letter_index], delete_string, remove_index + 1); delete delete_from; } } /** * @brief Function to verify presence and hence delete an entry from the trie * @param entry string entry to be deleted from the trie */ void Tnode::Delete(std::string entry) { Tnode *cur_pos = this, *delete_from = this; // Current pointer pointing to root int letter_index = 0, delete_from_index = 0, i = 0, n = entry.size(); for (i = 0; i < n; i++) { // To ignore case letter_index = tolower(entry[i]) - 97; // Display error message when given entry is not present in the tree if (cur_pos->english[letter_index] == nullptr) { std::cout << "Entry not Found" << std::endl; return; } // If the current node is end of word for the current prefix or if it // has 2 or more branches It cannot be deleted while deleting the // required entry. if (numberOfChildren(cur_pos) > 1 || cur_pos->endOfWord) { delete_from = cur_pos; // denotes the beginning of the shortest // suffix that is allowed to be deleted delete_from_index = i - 1; // Beginning index of the suffix // corresponding to the 'entry' } // Traversing through the entry cur_pos = cur_pos->english[letter_index]; } // cur_pos now points to the last char of entry. Display message if that // entry does not exist if (!cur_pos->endOfWord) { std::cout << "Entry not Found" << std::endl; return; } // If cur_pos is not a leaf node, unmark end of word and assign 0 to it's // frequency for deletion if (numberOfChildren(cur_pos)) { cur_pos->endOfWord = false; cur_pos->frequency = 0; return; } // The first character of the suffix to be deleted letter_index = tolower(entry[delete_from_index + 1]) - 97; // Point cur_pos to the next node cur_pos = delete_from->english[letter_index]; // Sever the connection from the main trie delete_from->english[letter_index] = nullptr; // If number of characters in the suffix are more than 1, recursively delete // each character starting from cur_pos using the helper function if (n > delete_from_index + 2) { DeleteFrom(cur_pos, entry, delete_from_index + 2); } // If the suffix is only 1 char in length else { delete cur_pos; } } /** * @brief Function to check a word's presence in the trie (Basic) * @param key the string key to be searched in the trie * @return true if the key is found * @return false if the key is not found */ bool Tnode::SearchPresence(const std::string &key) { Tnode *cur_pos = this; int letter_index = 0; for (auto &i : key) { letter_index = tolower(i) - 97; // If any character in the order of the key is absent, word not found! if (cur_pos->english[letter_index] == nullptr) { return false; } cur_pos = cur_pos->english[letter_index]; } // Word is only present in the trie if the key is a valid complete entry and // not just a prefix. if (cur_pos->endOfWord) { (cur_pos->frequency)++; return true; } else { return false; } } /** * @brief Recursive function to suggest all the entries of trie * which have a given common prefix * @param new_root pointer pointing to the node corresponding to the last char * of prefix * @param prefix the common prefix that all the suggestions must have */ void Tnode::SuggestAutocomplete(Tnode *new_root, const std::string &prefix) { // Iterate through all 26 nodes as we have to print all strings with the // given prefix int i = 0; for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { if (new_root->english[i] != nullptr) { // Print the sugestion only if it's a valid complete entry and not // just a prefix if (new_root->english[i]->endOfWord) { std::cout << prefix + char(i + 97) << std::endl; } SuggestAutocomplete(new_root->english[i], prefix + char(i + 97)); } } } /** * @brief Lists out all the words in trie with the longest prefix * of the search key that is present in the trie. For example - if trie contains * "abc", "abcde", "abcdefg", "abcddef" and if the search key is "abcdezz", then * the longest common prefix is "abcde" and hence search results will be * "abcde", "abcdefg". * @param key the string key to be searched for suggestions */ void Tnode::SearchSuggestions(const std::string &key) { Tnode *cur_pos = nullptr, *prev_pos = nullptr; cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root int letter_index = 0; std::string prefix = ""; // variable storing the updated value of longest common prefix for (auto &i : key) { letter_index = tolower(i) - 97; prev_pos = cur_pos; // Previous pointer updated to point to the last // char of the longest common prefix // When the node for the character does not exist, longest prefix has // been determined and SuggestAutocomplete is called if (cur_pos->english[letter_index] == nullptr) { SuggestAutocomplete(prev_pos, prefix); std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " << std::endl; return; } // Updating the longest common prefix prefix += char(tolower(i)); cur_pos = cur_pos->english[letter_index]; } // If the key is a valid entry of trie, display it @ top of the suggestions if (cur_pos->endOfWord) { std::cout << key << std::endl; (cur_pos->frequency)++; } (void)prev_pos; // Idiom to ignore previous pointer // Call for suggestions when the search key is present as an entry/a prefix // in the trie SuggestAutocomplete(cur_pos, prefix); std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " << std::endl; return; } /** * @brief Function to display the 3 suggestions with highest frequency * of search hits * @param suggestions a max heap that contains pairs of (frequency, word) * heapified based on frequency */ void Tnode::SelectionTop_3( std::priority_queue > *suggestions) { // Display Either top 3 or total number of suggestions, whichever is smaller int n = suggestions->size(), Top = 0; Top = n < 3 ? n : 3; while (Top--) { std::cout << suggestions->top().second << std::endl; suggestions->pop(); } } /** * @brief Recursive function to suggest most frequently * searched entries of trie which have a given common prefix * @param new_root pointer pointing to the node corresponding to the last char * of prefix * @param prefix the common prefix that all the suggestions must have * @param suggestions a max heap that contains pairs of (frequency, word) * heapified based on frequency */ void Tnode::SuggestFreqAutocomplete( Tnode *new_root, const std::string &prefix, std::priority_queue > *suggestions) { int i = 0; for (i = 0; i < ENGLISH_ALPHABET_SIZE; i++) { if (new_root->english[i] != nullptr) { // Add to sugestions only if it's a valid complete entry and not // just a prefix if (new_root->english[i]->endOfWord) { suggestions->push(std::make_pair( new_root->english[i]->frequency, prefix + char(i + 97))); } SuggestFreqAutocomplete(new_root->english[i], prefix + char(i + 97), suggestions); } } } /** * @brief Lists out the most frequent words in trie with the * longest prefix of the search key that is present in the trie. For example - * if trie contains "abc", "abcde", "abcdefg", "abcddef" and they have been * previously searched for 3, 1, 2, 4 times respectively, if the search key is * "ab", then the longest common prefix is "ab" and only the top 3 frequencies * among the matches would be displayed viz. "abcddef", "abc", "abcdefg". * @param key the string key to be searched for suggestions */ void Tnode::SearchFreqSuggestions(const std::string &key) { Tnode *cur_pos = nullptr, *prev_pos = nullptr; cur_pos = prev_pos = this; // maintaining 2 pointers, initialized to root int letter_index = 0; std::string prefix = ""; // variable storing the updated value of longest common prefix std::priority_queue > suggestions; // max heap to store (frequency, word) in descending order // of freq std::priority_queue > *Suggestions = &suggestions; for (auto &i : key) { letter_index = tolower(i) - 97; prev_pos = cur_pos; // Previous pointer updated to point to the last // char of the longest common prefix // When the node for the character does not exist, longest prefix has // been determined and SuggestFreqAutocomplete is called if (cur_pos->english[letter_index] == nullptr) { SuggestFreqAutocomplete(prev_pos, prefix, Suggestions); // To display the top 3 results SelectionTop_3(Suggestions); std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " << std::endl; return; } // Updating the longest common prefix prefix += char(tolower(i)); cur_pos = cur_pos->english[letter_index]; } // If the key is a valid entry of trie, display it @ top of the suggestions if (cur_pos->endOfWord) { (cur_pos->frequency)++; std::cout << key << std::endl; } (void)prev_pos; // Idiom to ignore previous pointer // Call for Suggestions when the search key is present as an entry/a prefix // in the trie SuggestFreqAutocomplete(cur_pos, prefix, Suggestions); // Display the top 3 results SelectionTop_3(Suggestions); std::cout << "- - - - - - - - - - - - - - - - - - - - - - - - - - " << std::endl; return; } } // namespace trie_operations } // namespace operations_on_datastructures /** * @brief Function to test a simple search before and after deleting * an entry. And to test out the multiple variants of search. */ static void test() { auto root = new operations_on_datastructures::trie_operations::Tnode(); std::vector inputs = { "abcde", "sss", "ssss", "ssst", "sssu", "sssv", "sst", "ssts", "sstt", "sstu", "tutu", "tutuv", "tutuu", "tutuvs", "tutus", "tvst", "tvsu", "vvvv"}; for (auto &i : inputs) { root->Insert(i); } // Search an existing entry assert(root->SearchPresence("vvvv")); std::cout << root->SearchPresence("vvvv") << std::endl; // Delete it root->Delete("vvvv"); // Search for the entry again assert(!root->SearchPresence("vvvv")); std::cout << root->SearchPresence("vvvv") << std::endl; std::cout << root->SearchPresence("tutu") << std::endl; root->SearchSuggestions("tutu"); std::cout << root->SearchPresence("tutu") << std::endl; root->SearchSuggestions("tutuv"); std::cout << root->SearchPresence("tutuv") << std::endl; root->SearchSuggestions("tutuvs"); root->SearchFreqSuggestions( "tu"); // The top 3 frequent entries with prefix tu are tutu, tutuv & // tutuvs respectively root->SearchSuggestions( ""); // Empty search to list all the entries in the trie } /** * @brief Main function * @param argc commandline argument count (ignored) * @param argv commandline array of arguments (ignored) * @returns 0 on exit */ int main(int argc, char const *argv[]) { test(); // run self-test implementations return 0; }