mirror of
https://github.moeyy.xyz/https://github.com/TheAlgorithms/C.git
synced 2023-10-11 15:56:24 +08:00
2314a19586
The trie example had some issues; * It did not follow the code convention in CONTRIBUTING.md * The createTrieNode used an inefficient zeroing method (looping over the entries) which also does not zero out holes in the structure (e.g. an alternative would be to use "*node = &(TrieNode){0}", but calloc does all that anyway * It used an inefficient and clumsy printArray method * It used strlen inside the algorithm; this new method could get rid of any strlen/strnlen usage (inserts/searches could be sanitized by snprintf) * This version can allow for a custom mapping function, e.g. if NULL is a valid separator (say that you want a trie for certain binary packages) * The previous version actually contained out-of-bounds array indexing; there were no checks for out-of-bound indexing and words in the word list did contain out of bounds words. It's a surprise it was working so well. * This version just returns 'int' to allow for error checks (instead of a printf inside the algorithm), and uses double pointers for return values (good practice) * The usage example contained unnecessary mallocs, switched that out for scanf. The example is just an example after all, in real applications you'd have better input sanitazion.
207 lines
5.1 KiB
C
207 lines
5.1 KiB
C
/*------------------Trie Data Structure----------------------------------*/
|
|
/*-------------Implimented for search a word in dictionary---------------*/
|
|
|
|
/*-----character - 97 used for get the character from the ASCII value-----*/
|
|
|
|
// needed for strnlen
|
|
#define _POSIX_C_SOURCE 200809L
|
|
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define ALPHABET_SIZE 26
|
|
|
|
/*--Node in the Trie--*/
|
|
struct trie {
|
|
struct trie *children[ALPHABET_SIZE];
|
|
bool end_of_word;
|
|
};
|
|
|
|
|
|
/*--Create new trie node--*/
|
|
int trie_new (
|
|
struct trie ** trie
|
|
)
|
|
{
|
|
*trie = calloc(1, sizeof(struct trie));
|
|
if (NULL == *trie) {
|
|
// memory allocation failed
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*--Insert new word to Trie--*/
|
|
int trie_insert (
|
|
struct trie * trie,
|
|
char *word,
|
|
unsigned word_len
|
|
)
|
|
{
|
|
int ret = 0;
|
|
|
|
// this is the end of this word; add an end-of-word marker here and we're
|
|
// done.
|
|
if (0 == word_len) {
|
|
trie->end_of_word = true;
|
|
return 0;
|
|
}
|
|
|
|
// if you have some more complex mapping, you could introduce one here. In
|
|
// this easy example, we just subtract 'a' (97) from it, meaning that 'a' is 0,
|
|
// 'b' is 1, and so on.
|
|
const unsigned int index = word[0] - 'a';
|
|
|
|
// this index is outside the alphabet size; indexing this would mean an
|
|
// out-of-bound memory access (bad!). If you introduce a separate map
|
|
// function for indexing, then you could move the out-of-bounds index in
|
|
// there.
|
|
if (ALPHABET_SIZE <= index) {
|
|
return -1;
|
|
}
|
|
|
|
// The index does not exist yet, allocate it.
|
|
if (NULL == trie->children[index]) {
|
|
ret = trie_new(&trie->children[index]);
|
|
if (-1 == ret) {
|
|
// creating new trie node failed
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// recurse into the child node
|
|
return trie_insert(
|
|
/* trie = */ trie->children[index],
|
|
/* word = */ word + 1,
|
|
/* word_len = */ word_len - 1
|
|
);
|
|
}
|
|
|
|
|
|
/*--Search a word in the Trie--*/
|
|
int trie_search(
|
|
struct trie * trie,
|
|
char *word,
|
|
unsigned word_len,
|
|
struct trie ** result
|
|
)
|
|
{
|
|
// we found a match
|
|
if (0 == word_len) {
|
|
*result = trie;
|
|
return 0;
|
|
}
|
|
|
|
// same here as in trie_insert, if you have a separate index mapping, add
|
|
// it here. In this example, we just subtract 'a'.
|
|
const unsigned int index = word[0] - 'a';
|
|
|
|
// This word contains letters outside the alphabet length; it's invalid.
|
|
// Remember to do this to prevent buffer overflows.
|
|
if (ALPHABET_SIZE <= index) {
|
|
return -1;
|
|
}
|
|
|
|
// No match
|
|
if (NULL == trie->children[index]) {
|
|
return -1;
|
|
}
|
|
|
|
// traverse the trie
|
|
return trie_search(
|
|
/* trie = */ trie->children[index],
|
|
/* word = */ word + 1,
|
|
/* word_len = */ word_len - 1,
|
|
/* result = */ result
|
|
);
|
|
}
|
|
|
|
/*---Return all the related words------*/
|
|
void trie_print (
|
|
struct trie * trie,
|
|
char prefix[],
|
|
unsigned prefix_len
|
|
)
|
|
{
|
|
|
|
// An end-of-word marker means that this is a complete word, print it.
|
|
if (true == trie->end_of_word) {
|
|
printf("%.*s\n", prefix_len, prefix);
|
|
}
|
|
|
|
// However, there can be longer words with the same prefix; traverse into
|
|
// those as well.
|
|
for (int i = 0; i < ALPHABET_SIZE; i++) {
|
|
|
|
// No words on this character
|
|
if (NULL == trie->children[i]) {
|
|
continue;
|
|
}
|
|
|
|
// If you have a separate index mapping, then you'd need the inverse of
|
|
// the map here. Since we subtracted 'a' for the index, we can just add
|
|
// 'a' to get the inverse map function.
|
|
prefix[prefix_len] = i + 'a';
|
|
|
|
// traverse the print into the child
|
|
trie_print(trie->children[i], prefix, prefix_len + 1);
|
|
}
|
|
}
|
|
|
|
|
|
/*------Demonstrate purposes uses text file called dictionary -------*/
|
|
|
|
int main() {
|
|
int ret = 0;
|
|
struct trie * root = NULL;
|
|
struct trie * trie = NULL;
|
|
char word[100] = {0};
|
|
|
|
// Create a root trie
|
|
ret = trie_new(&root);
|
|
if (-1 == ret) {
|
|
fprintf(stderr, "Could not create trie\n");
|
|
exit(1);
|
|
}
|
|
|
|
// open the dictionary file
|
|
FILE *fp = fopen("dictionary.txt", "r");
|
|
if (NULL == fp) {
|
|
fprintf(stderr, "Error while opening dictionary file");
|
|
exit(1);
|
|
}
|
|
|
|
// insert all the words from the dictionary
|
|
while (1 == fscanf(fp, "%100s\n", word)) {
|
|
ret = trie_insert(root, word, strnlen(word, 100));
|
|
if (-1 == ret) {
|
|
fprintf(stderr, "Could not insert word into trie\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
while (1) {
|
|
printf("Enter keyword: ");
|
|
if (1 != scanf("%100s", word)) {
|
|
break;
|
|
}
|
|
|
|
printf(
|
|
"\n==========================================================\n");
|
|
printf("\n********************* Possible Words ********************\n");
|
|
|
|
ret = trie_search(root, word, strnlen(word, 100), &trie);
|
|
if (-1 == ret) {
|
|
printf("No results\n");
|
|
continue;
|
|
}
|
|
|
|
trie_print(trie, word, strnlen(word, 100));
|
|
|
|
printf("\n==========================================================\n");
|
|
}
|
|
}
|