diff --git a/CMakeLists.txt b/CMakeLists.txt index c720fa1..d929f93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,8 @@ set(SimpleArchiver_SOURCES src/main.c src/parser.c src/data_structures/linked_list.c + src/data_structures/hash_map.c + src/algorithms/linear_congruential_gen.c ) add_compile_options( @@ -24,6 +26,8 @@ endif() add_executable(simplearchiver ${SimpleArchiver_SOURCES}) add_executable(test_datastructures - src/data_structures/test.c - src/data_structures/linked_list.c + src/data_structures/test.c + src/data_structures/linked_list.c + src/data_structures/hash_map.c + src/algorithms/linear_congruential_gen.c ) diff --git a/cosmopolitan/Makefile b/cosmopolitan/Makefile index d81601f..a9dfc20 100644 --- a/cosmopolitan/Makefile +++ b/cosmopolitan/Makefile @@ -6,11 +6,13 @@ OUTDIR = out SOURCES = \ ../src/main.c \ ../src/parser.c \ - ../src/data_structures/linked_list.c + ../src/data_structures/linked_list.c \ + ../src/data_structures/hash_map.c HEADERS = \ ../src/parser.h \ - ../src/data_structures/linked_list.h + ../src/data_structures/linked_list.h \ + ../src/data_structures/hash_map.h OBJECTS = $(addprefix ${OBJDIR}/,$(subst ..,PREVDIR,$(patsubst %.c,%.c.o,${SOURCES}))) diff --git a/src/algorithms/linear_congruential_gen.c b/src/algorithms/linear_congruential_gen.c new file mode 100644 index 0000000..da21c76 --- /dev/null +++ b/src/algorithms/linear_congruential_gen.c @@ -0,0 +1,27 @@ +/* + * Copyright 2024 Stephen Seo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * `linear_congruential_gen.c` is the source for the linear congruential + * generator algorithm. + */ + +#include "linear_congruential_gen.h" + +unsigned long long simple_archiver_algo_lcg(unsigned long long seed, + unsigned long long a, + unsigned long long c) { + // "m" is implicity 2^64. + return seed * a + c; +} diff --git a/src/algorithms/linear_congruential_gen.h b/src/algorithms/linear_congruential_gen.h new file mode 100644 index 0000000..a30fe2a --- /dev/null +++ b/src/algorithms/linear_congruential_gen.h @@ -0,0 +1,30 @@ +/* + * Copyright 2024 Stephen Seo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * `linear_congruential_gen.h` is the header for the linear congruential + * generator algorithm. + */ + +#ifndef SEODISPARATE_COM_ALGORITHMS_LINEAR_CONGRUENTIAL_GEN_H_ +#define SEODISPARATE_COM_ALGORITHMS_LINEAR_CONGRUENTIAL_GEN_H_ + +#define SC_ALGO_LCG_DEFAULT_A 0x9ABD +#define SC_ALGO_LCG_DEFAULT_C 0x2A9A9A9 + +unsigned long long simple_archiver_algo_lcg(unsigned long long seed, + unsigned long long a, + unsigned long long c); + +#endif diff --git a/src/data_structures/hash_map.c b/src/data_structures/hash_map.c new file mode 100644 index 0000000..56df22a --- /dev/null +++ b/src/data_structures/hash_map.c @@ -0,0 +1,244 @@ +/* + * Copyright 2024 Stephen Seo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * `hash_map.c` is the source for a hash map implementation. + */ + +#include "hash_map.h" + +#include +#include + +#include "../algorithms/linear_congruential_gen.h" + +typedef struct SDArchiverHashMapData { + void *value; + void *key; + unsigned int key_size; + void (*value_cleanup_fn)(void *); + void (*key_cleanup_fn)(void *); +} SDArchiverHashMapData; + +typedef struct SDArchiverHashMapKeyData { + void *key; + unsigned int key_size; +} SDArchiverHashMapKeyData; + +void simple_archiver_hash_map_internal_cleanup_data(void *data) { + SDArchiverHashMapData *hash_map_data = data; + if (hash_map_data->value) { + if (hash_map_data->value_cleanup_fn) { + hash_map_data->value_cleanup_fn(hash_map_data->value); + } else { + free(hash_map_data->value); + } + } + + if (hash_map_data->key) { + if (hash_map_data->key_cleanup_fn) { + hash_map_data->key_cleanup_fn(hash_map_data->key); + } else { + free(hash_map_data->key); + } + } + + free(data); +} + +int simple_archiver_hash_map_internal_pick_in_list(void *data, void *ud) { + SDArchiverHashMapData *hash_map_data = data; + SDArchiverHashMapKeyData *key_data = ud; + + return hash_map_data->key_size == key_data->key_size && + memcmp(hash_map_data->key, key_data->key, + key_data->key_size) == 0 + ? 1 + : 0; +} + +unsigned long long simple_archiver_hash_map_internal_key_to_hash( + void *key, unsigned int key_size) { + unsigned long long seed = 0; + unsigned long long temp = 0; + unsigned int count = 0; + for (unsigned int idx = 0; idx < key_size; ++idx) { + temp |= *((unsigned char *)key + count++); + if (count >= 8) { + count = 0; + seed += temp; + temp = 0; + } + } + if (temp != 0) { + seed += temp; + } + + return simple_archiver_algo_lcg(seed, SC_ALGO_LCG_DEFAULT_A, + SC_ALGO_LCG_DEFAULT_C); +} + +/// Returns 0 on success. +int simple_archiver_hash_map_internal_rehash(SDArchiverHashMap **hash_map) { + if (!hash_map || !*hash_map) { + return 1; + } + SDArchiverHashMap *new_hash_map = malloc(sizeof(SDArchiverHashMap)); + new_hash_map->buckets_size = (*hash_map)->buckets_size * 2; + // Pointers have the same size (at least on the same machine), so + // sizeof(void*) should be ok. + new_hash_map->buckets = malloc(sizeof(void *) * new_hash_map->buckets_size); + for (unsigned int idx = 0; idx < new_hash_map->buckets_size; ++idx) { + new_hash_map->buckets[idx] = simple_archiver_list_init(); + } + new_hash_map->count = 0; + + // Iterate through the old hash map to populate the new hash map. + for (unsigned int bucket_idx = 0; bucket_idx < (*hash_map)->buckets_size; + ++bucket_idx) { + SDArchiverLLNode *node = (*hash_map)->buckets[bucket_idx]->head; + while (node) { + node = node->next; + if (node && node != (*hash_map)->buckets[bucket_idx]->tail && + node->data) { + SDArchiverHashMapData *data = node->data; + simple_archiver_hash_map_insert(&new_hash_map, data->value, data->key, + data->key_size, data->value_cleanup_fn, + data->key_cleanup_fn); + node->data = NULL; + } + } + } + + simple_archiver_hash_map_free(hash_map); + *hash_map = new_hash_map; + + return 0; +} + +SDArchiverHashMap *simple_archiver_hash_map_init(void) { + SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap)); + hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE; + // Pointers have the same size (at least on the same machine), so + // sizeof(void*) should be ok. + hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size); + for (unsigned int idx = 0; idx < hash_map->buckets_size; ++idx) { + hash_map->buckets[idx] = simple_archiver_list_init(); + } + hash_map->count = 0; + + return hash_map; +} + +void simple_archiver_hash_map_free(SDArchiverHashMap **hash_map) { + if (hash_map && *hash_map) { + for (unsigned int idx = 0; idx < (*hash_map)->buckets_size; ++idx) { + SDArchiverLinkedList **linked_list = (*hash_map)->buckets + idx; + simple_archiver_list_free(linked_list); + } + + free((*hash_map)->buckets); + free(*hash_map); + + *hash_map = NULL; + } +} + +int simple_archiver_hash_map_insert(SDArchiverHashMap **hash_map, void *value, + void *key, unsigned int key_size, + void (*value_cleanup_fn)(void *), + void (*key_cleanup_fn)(void *)) { + if ((*hash_map)->buckets_size <= (*hash_map)->count) { + simple_archiver_hash_map_internal_rehash(hash_map); + } + + SDArchiverHashMapData *data = malloc(sizeof(SDArchiverHashMapData)); + data->value = value; + data->key = key; + data->key_size = key_size; + data->value_cleanup_fn = value_cleanup_fn; + data->key_cleanup_fn = key_cleanup_fn; + + unsigned long long hash = + simple_archiver_hash_map_internal_key_to_hash(key, key_size) % + (*hash_map)->buckets_size; + int result = simple_archiver_list_add_front( + (*hash_map)->buckets[hash], data, + simple_archiver_hash_map_internal_cleanup_data); + + if (result == 0) { + ++(*hash_map)->count; + return 0; + } else { + if (value) { + if (value_cleanup_fn) { + value_cleanup_fn(value); + } else { + free(value); + } + } + if (key) { + if (key_cleanup_fn) { + key_cleanup_fn(key); + } else { + free(key); + } + } + + free(data); + return 1; + } +} + +void *simple_archiver_hash_map_get(SDArchiverHashMap *hash_map, void *key, + unsigned int key_size) { + unsigned long long hash = + simple_archiver_hash_map_internal_key_to_hash(key, key_size) % + hash_map->buckets_size; + + SDArchiverLLNode *node = hash_map->buckets[hash]->head; + while (node) { + node = node->next; + if (node && node != hash_map->buckets[hash]->tail && node->data) { + SDArchiverHashMapData *data = node->data; + if (key_size == data->key_size && memcmp(data->key, key, key_size) == 0) { + return data->value; + } + } + } + + return NULL; +} + +int simple_archiver_hash_map_remove(SDArchiverHashMap *hash_map, void *key, + unsigned int key_size) { + unsigned long long hash = + simple_archiver_hash_map_internal_key_to_hash(key, key_size) % + hash_map->buckets_size; + + SDArchiverHashMapKeyData key_data; + key_data.key = key; + key_data.key_size = key_size; + + int result = simple_archiver_list_remove( + hash_map->buckets[hash], simple_archiver_hash_map_internal_pick_in_list, + &key_data); + if (result == 1) { + return 0; + } else if (result > 1) { + return 1; + } else { + return 2; + } +} diff --git a/src/data_structures/hash_map.h b/src/data_structures/hash_map.h new file mode 100644 index 0000000..7a61794 --- /dev/null +++ b/src/data_structures/hash_map.h @@ -0,0 +1,54 @@ +/* + * Copyright 2024 Stephen Seo + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * `hash_map.h` is the header for a hash map implementation. + */ + +#ifndef SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_HASH_MAP_H_ +#define SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_HASH_MAP_H_ + +#define SC_SA_DS_HASH_MAP_START_BUCKET_SIZE 32 + +#include "linked_list.h" + +typedef struct SDArchiverHashMap { + SDArchiverLinkedList **buckets; + unsigned int buckets_size; + unsigned int count; +} SDArchiverHashMap; + +SDArchiverHashMap *simple_archiver_hash_map_init(void); +void simple_archiver_hash_map_free(SDArchiverHashMap **hash_map); + +/// Returns zero on success. +/// On failure, frees the value and key using the given functions. +/// key must remain valid for the lifetime of its entry in the hash map. +/// If value_cleanup_fn is NULL, then "free" is used instead. +/// If key_cleanup_fn is NULL, then "free" is used instead. +int simple_archiver_hash_map_insert(SDArchiverHashMap **hash_map, void *value, + void *key, unsigned int key_size, + void (*value_cleanup_fn)(void *), + void (*key_cleanup_fn)(void *)); + +/// Returns NULL if not found. +void *simple_archiver_hash_map_get(SDArchiverHashMap *hash_map, void *key, + unsigned int key_size); + +/// Returns zero on success. Returns one if more than one entry was removed. +/// Otherwise returns non-zero and non-one value on error. +int simple_archiver_hash_map_remove(SDArchiverHashMap *hash_map, void *key, + unsigned int key_size); + +#endif diff --git a/src/data_structures/linked_list.c b/src/data_structures/linked_list.c index e2af7b4..c4ad0dd 100644 --- a/src/data_structures/linked_list.c +++ b/src/data_structures/linked_list.c @@ -84,8 +84,30 @@ int simple_archiver_list_add(SDArchiverLinkedList *list, void *data, return 0; } +int simple_archiver_list_add_front(SDArchiverLinkedList *list, void *data, + void (*data_free_fn)(void *)) { + if (!list) { + return 1; + } + + SDArchiverLLNode *new_node = malloc(sizeof(SDArchiverLLNode)); + new_node->data = data; + new_node->data_free_fn = data_free_fn; + + new_node->next = list->head->next; + new_node->prev = list->head; + + list->head->next->prev = new_node; + list->head->next = new_node; + + ++list->count; + + return 0; +} + int simple_archiver_list_remove(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)) { + int (*data_check_fn)(void *, void *), + void *user_data) { if (!list) { return 0; } @@ -100,7 +122,7 @@ int simple_archiver_list_remove(SDArchiverLinkedList *list, } iter_removed = 0; if (node && node != list->tail) { - if (data_check_fn(node->data) != 0) { + if (data_check_fn(node->data, user_data) != 0) { SDArchiverLLNode *temp = node->next; if (node->data_free_fn) { @@ -125,7 +147,8 @@ int simple_archiver_list_remove(SDArchiverLinkedList *list, } int simple_archiver_list_remove_once(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)) { + int (*data_check_fn)(void *, void *), + void *user_data) { if (!list) { return 0; } @@ -134,7 +157,7 @@ int simple_archiver_list_remove_once(SDArchiverLinkedList *list, while (node) { node = node->next; if (node && node != list->tail) { - if (data_check_fn(node->data) != 0) { + if (data_check_fn(node->data, user_data) != 0) { if (node->data_free_fn) { node->data_free_fn(node->data); } else { @@ -156,7 +179,8 @@ int simple_archiver_list_remove_once(SDArchiverLinkedList *list, } void *simple_archiver_list_get(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)) { + int (*data_check_fn)(void *, void *), + void *user_data) { if (!list) { return NULL; } @@ -165,7 +189,7 @@ void *simple_archiver_list_get(SDArchiverLinkedList *list, while (node) { node = node->next; if (node && node != list->tail) { - if (data_check_fn(node->data) != 0) { + if (data_check_fn(node->data, user_data) != 0) { return node->data; } } diff --git a/src/data_structures/linked_list.h b/src/data_structures/linked_list.h index 93609d5..fbebf8e 100644 --- a/src/data_structures/linked_list.h +++ b/src/data_structures/linked_list.h @@ -35,26 +35,35 @@ typedef struct SDArchiverLinkedList { SDArchiverLinkedList *simple_archiver_list_init(void); void simple_archiver_list_free(SDArchiverLinkedList **list); -/// Returns 0 on success. +/// Returns 0 on success. Puts data at the end of the list +/// If data_free_fn is NULL, then "free" is used instead. int simple_archiver_list_add(SDArchiverLinkedList *list, void *data, void (*data_free_fn)(void *)); +/// Returns 0 on success. Puts data at the front of the list +/// If data_free_fn is NULL, then "free" is used instead. +int simple_archiver_list_add_front(SDArchiverLinkedList *list, void *data, + void (*data_free_fn)(void *)); + /// Returns number of removed items. /// data_check_fn must return non-zero if the data passed to it is to be /// removed. int simple_archiver_list_remove(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)); + int (*data_check_fn)(void *, void *), + void *user_data); /// Returns 1 on removed, 0 if not removed. /// data_check_fn must return non-zero if the data passed to it is to be /// removed. int simple_archiver_list_remove_once(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)); + int (*data_check_fn)(void *, void *), + void *user_data); /// Returns non-null on success. /// data_check_fn must return non-zero if the data passed to it is to be /// returned. void *simple_archiver_list_get(SDArchiverLinkedList *list, - int (*data_check_fn)(void *)); + int (*data_check_fn)(void *, void *), + void *user_data); #endif diff --git a/src/data_structures/test.c b/src/data_structures/test.c index 4121fb7..721232e 100644 --- a/src/data_structures/test.c +++ b/src/data_structures/test.c @@ -17,8 +17,10 @@ */ #include +#include #include +#include "hash_map.h" #include "linked_list.h" static int checks_checked = 0; @@ -45,11 +47,17 @@ static int checks_passed = 0; void no_free_fn(__attribute__((unused)) void *unused) { return; } -int get_one_fn(void *data) { return strcmp(data, "one") == 0 ? 1 : 0; } +int get_one_fn(void *data, __attribute__((unused)) void *ud) { + return strcmp(data, "one") == 0 ? 1 : 0; +} -int get_two_fn(void *data) { return strcmp(data, "two") == 0 ? 1 : 0; } +int get_two_fn(void *data, __attribute__((unused)) void *ud) { + return strcmp(data, "two") == 0 ? 1 : 0; +} -int get_three_fn(void *data) { return strcmp(data, "three") == 0 ? 1 : 0; } +int get_three_fn(void *data, __attribute__((unused)) void *ud) { + return strcmp(data, "three") == 0 ? 1 : 0; +} int main(void) { // Test LinkedList. @@ -74,28 +82,75 @@ int main(void) { CHECK_TRUE(list->count == 3); - void *ptr = simple_archiver_list_get(list, get_one_fn); + void *ptr = simple_archiver_list_get(list, get_one_fn, NULL); CHECK_TRUE(ptr == one); - ptr = simple_archiver_list_get(list, get_two_fn); + ptr = simple_archiver_list_get(list, get_two_fn, NULL); CHECK_TRUE(ptr == two); - ptr = simple_archiver_list_get(list, get_three_fn); + ptr = simple_archiver_list_get(list, get_three_fn, NULL); CHECK_TRUE(ptr == three); - CHECK_TRUE(simple_archiver_list_remove(list, get_two_fn) == 1); + CHECK_TRUE(simple_archiver_list_remove(list, get_two_fn, NULL) == 1); CHECK_TRUE(list->count == 2); - CHECK_TRUE(simple_archiver_list_get(list, get_two_fn) == NULL); + CHECK_TRUE(simple_archiver_list_get(list, get_two_fn, NULL) == NULL); - CHECK_TRUE(simple_archiver_list_remove_once(list, get_one_fn) == 1); + CHECK_TRUE(simple_archiver_list_remove_once(list, get_one_fn, NULL) == 1); CHECK_TRUE(list->count == 1); - CHECK_TRUE(simple_archiver_list_get(list, get_one_fn) == NULL); + CHECK_TRUE(simple_archiver_list_get(list, get_one_fn, NULL) == NULL); simple_archiver_list_free(&list); CHECK_TRUE(list == NULL); } + // Test HashMap. + { + SDArchiverHashMap *hash_map = simple_archiver_hash_map_init(); + simple_archiver_hash_map_free(&hash_map); + + hash_map = simple_archiver_hash_map_init(); + + { + int *value, *key; + + for (unsigned int idx = 0; idx < 20; ++idx) { + value = malloc(sizeof(int)); + key = malloc(sizeof(int)); + *value = idx; + *key = idx; + simple_archiver_hash_map_insert(&hash_map, value, key, sizeof(int), + NULL, NULL); + } + } + + int value, key; + void *value_ptr; + + for (value = 0, key = 0; value < 20 && key < 20; ++value, ++key) { + value_ptr = simple_archiver_hash_map_get(hash_map, &key, sizeof(int)); + CHECK_TRUE(value_ptr != NULL); + CHECK_TRUE(memcmp(value_ptr, &value, sizeof(int)) == 0); + } + + key = 5; + simple_archiver_hash_map_remove(hash_map, &key, sizeof(int)); + key = 15; + simple_archiver_hash_map_remove(hash_map, &key, sizeof(int)); + + for (value = 0, key = 0; value < 20 && key < 20; ++value, ++key) { + value_ptr = simple_archiver_hash_map_get(hash_map, &key, sizeof(int)); + if (key != 5 && key != 15) { + CHECK_TRUE(value_ptr != NULL); + CHECK_TRUE(memcmp(value_ptr, &value, sizeof(int)) == 0); + } else { + CHECK_TRUE(value_ptr == NULL); + } + } + + simple_archiver_hash_map_free(&hash_map); + } + printf("Checks checked: %u\n", checks_checked); printf("Checks passed: %u\n", checks_passed); return checks_passed == checks_checked ? 0 : 1;