diff --git a/data_structures/hash_set/Makefile b/data_structures/hash_set/Makefile new file mode 100644 index 00000000..275be8bd --- /dev/null +++ b/data_structures/hash_set/Makefile @@ -0,0 +1,13 @@ +CC = gcc +CFLAGS = -g -Wall + +all: main + +main: main.o hash_set.o + $(CC) $(CFLAGS) $^ -o $@ + +hash_set.o: hash_set.c + $(CC) $(CFLAGS) -c $^ + +clean: + rm *.o main \ No newline at end of file diff --git a/data_structures/hash_set/hash_set.c b/data_structures/hash_set/hash_set.c new file mode 100644 index 00000000..1d340e68 --- /dev/null +++ b/data_structures/hash_set/hash_set.c @@ -0,0 +1,92 @@ +#include +#include + +#include "hash_set.h" + +extern hash_set_t *init_hash_set() +{ + hash_set_t *set = (hash_set_t *)malloc(sizeof(hash_set_t)); + set->keys = calloc(DEFAULT_HASH_SET_CAPACITY, sizeof(void **)); + set->values = calloc(DEFAULT_HASH_SET_CAPACITY, sizeof(void **)); + set->length = 0; + set->capacity = DEFAULT_HASH_SET_CAPACITY; + + return set; +} + +unsigned add(hash_set_t *set, void *value) +{ + return put(set, hash(value), value); +} + +unsigned put(hash_set_t *set, long long hash, void *value) +{ + if (contains_hash(set, hash)) { + if (set->keys[retrieve_index_from_hash(hash, set->capacity)] == value) { + return 0; + } + + // collision + resize(set); + + return put(set, hash, value); + } + + set->keys[retrieve_index_from_hash(hash, set->capacity)] = value; + set->values[set->length++] = value; + + return 1; +} + +int contains(hash_set_t *set, void *value) +{ + return set->keys[retrieve_index_from_hash(hash(value), set->capacity)] == value ? 1 : 0; +} + +int contains_hash(hash_set_t *set, long long hash) +{ + return set->keys[retrieve_index_from_hash(hash, set->capacity)] ? 1 : 0; +} + +void delete(hash_set_t *set, void *value) { + set->keys[retrieve_index_from_hash(hash(value), set->capacity)] = NULL; +} + + +// adler_32 hash +long long hash(void *value) +{ + char *str = value; + + int a = 1; + int b = 0; + const int MODADLER = 65521; + + for (int i = 0; str[i] != '\0'; i++) { + a = (a + str[i]) % MODADLER; + b = (b + a) % MODADLER; + } + + return (b << 16) | a; +} + +unsigned retrieve_index_from_hash(const long long hash, const unsigned capacity) +{ + return (capacity - 1) & (hash ^ (hash >> 12)); +} + +void resize(hash_set_t *set) +{ + void **keys_resized = calloc((set->capacity <<= 1), sizeof(void **)); + + for (int i = 0; i < set->length; i++) { + keys_resized[retrieve_index_from_hash(hash(set->values[i]), set->capacity)] = set->values[i]; + } + + free(set->keys); + + set->keys = keys_resized; + + void **new_values = (void **)realloc(set->values, set->capacity * sizeof(void **)); + set->values = new_values; +} diff --git a/data_structures/hash_set/hash_set.h b/data_structures/hash_set/hash_set.h new file mode 100644 index 00000000..cb79d24b --- /dev/null +++ b/data_structures/hash_set/hash_set.h @@ -0,0 +1,31 @@ +#ifndef __HASH_SET__ +#define __HASH_SET__ + +#define DEFAULT_HASH_SET_CAPACITY 1 << 10 + +typedef struct { + unsigned capacity; + unsigned length; + void **values; + void **keys; +} hash_set_t; + +extern hash_set_t *init_hash_set(); + +extern unsigned add(hash_set_t *set, void *value); + +unsigned put(hash_set_t *set, long long hash, void *value); + +extern int contains(hash_set_t *set, void *value); + +int contains_hash(hash_set_t *set, long long hash); + +extern void delete(hash_set_t *set, void *value); + +extern long long hash(void *value); + +extern unsigned retrieve_index_from_hash(const long long hash, const unsigned capacity); + +extern void resize(hash_set_t *set); + +#endif \ No newline at end of file diff --git a/data_structures/hash_set/main.c b/data_structures/hash_set/main.c new file mode 100644 index 00000000..6517aea7 --- /dev/null +++ b/data_structures/hash_set/main.c @@ -0,0 +1,42 @@ +#include + +#include "hash_set.h" + +int main() +{ + hash_set_t *set = init_hash_set(); + + int v1 = 10, v2 = 20, v3 = 30, v4 = 40, v5 = 50, v6 = 60, v7 = 70; + + printf("Value %d was add ? %d\n", v1, add(set, &v1)); + printf("Value %d was add ? %d\n", v1, add(set, &v1)); + printf("contains %d ? %d\n", v1, contains(set, &v1)); + + printf("Value %d was add ? %d\n", v2, add(set, &v2)); + printf("Value %d was add ? %d\n", v2, add(set, &v2)); + printf("contains %d ? %d\n", v2, contains(set, &v2)); + + printf("Value %d was add ? %d\n", v3, add(set, &v3)); + printf("Value %d is add ? %d\n", v3, add(set, &v3)); + printf("contains %d ? %d\n", v3, contains(set, &v3)); + + printf("Value %d was add ? %d\n", v4, add(set, &v4)); + printf("Value %d was add ? %d\n", v4, add(set, &v4)); + printf("contains %d ? %d\n", v4, contains(set, &v4)); + + printf("Value %d was add ? %d\n", v5, add(set, &v5)); + printf("Value %d was add ? %d\n", v5, add(set, &v5)); + printf("contains %d ? %d\n", v5, contains(set, &v5)); + + printf("Value %d is add ? %d\n", v6, add(set, &v6)); + printf("Value %d is add ? %d\n", v6, add(set, &v6)); + printf("contains %d ? %d\n", v6, contains(set, &v6)); + + printf("contains %d ? %d\n", v7, contains(set, &v7)); + + delete(set, &v6); + + printf("contains %d ? %d\n", v6, contains(set, &v6)); + + return 0; +} \ No newline at end of file