Impl. hash_map

Also implemented the linear congruential generator algorithm for
generating hash values for the hash_map.

Tweaks to linked_list api to support "check functions" with supplied
user data.
This commit is contained in:
Stephen Seo 2024-06-28 17:31:34 +09:00
parent 6ac7edbb3b
commit 4d260e4a7b
9 changed files with 473 additions and 24 deletions

View file

@ -7,6 +7,8 @@ set(SimpleArchiver_SOURCES
src/main.c src/main.c
src/parser.c src/parser.c
src/data_structures/linked_list.c src/data_structures/linked_list.c
src/data_structures/hash_map.c
src/algorithms/linear_congruential_gen.c
) )
add_compile_options( add_compile_options(
@ -24,6 +26,8 @@ endif()
add_executable(simplearchiver ${SimpleArchiver_SOURCES}) add_executable(simplearchiver ${SimpleArchiver_SOURCES})
add_executable(test_datastructures add_executable(test_datastructures
src/data_structures/test.c src/data_structures/test.c
src/data_structures/linked_list.c src/data_structures/linked_list.c
src/data_structures/hash_map.c
src/algorithms/linear_congruential_gen.c
) )

View file

@ -6,11 +6,13 @@ OUTDIR = out
SOURCES = \ SOURCES = \
../src/main.c \ ../src/main.c \
../src/parser.c \ ../src/parser.c \
../src/data_structures/linked_list.c ../src/data_structures/linked_list.c \
../src/data_structures/hash_map.c
HEADERS = \ HEADERS = \
../src/parser.h \ ../src/parser.h \
../src/data_structures/linked_list.h ../src/data_structures/linked_list.h \
../src/data_structures/hash_map.h
OBJECTS = $(addprefix ${OBJDIR}/,$(subst ..,PREVDIR,$(patsubst %.c,%.c.o,${SOURCES}))) OBJECTS = $(addprefix ${OBJDIR}/,$(subst ..,PREVDIR,$(patsubst %.c,%.c.o,${SOURCES})))

View file

@ -0,0 +1,27 @@
/*
* Copyright 2024 Stephen Seo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* `linear_congruential_gen.c` is the source for the linear congruential
* generator algorithm.
*/
#include "linear_congruential_gen.h"
unsigned long long simple_archiver_algo_lcg(unsigned long long seed,
unsigned long long a,
unsigned long long c) {
// "m" is implicity 2^64.
return seed * a + c;
}

View file

@ -0,0 +1,30 @@
/*
* Copyright 2024 Stephen Seo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* `linear_congruential_gen.h` is the header for the linear congruential
* generator algorithm.
*/
#ifndef SEODISPARATE_COM_ALGORITHMS_LINEAR_CONGRUENTIAL_GEN_H_
#define SEODISPARATE_COM_ALGORITHMS_LINEAR_CONGRUENTIAL_GEN_H_
#define SC_ALGO_LCG_DEFAULT_A 0x9ABD
#define SC_ALGO_LCG_DEFAULT_C 0x2A9A9A9
unsigned long long simple_archiver_algo_lcg(unsigned long long seed,
unsigned long long a,
unsigned long long c);
#endif

View file

@ -0,0 +1,244 @@
/*
* Copyright 2024 Stephen Seo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* `hash_map.c` is the source for a hash map implementation.
*/
#include "hash_map.h"
#include <stdlib.h>
#include <string.h>
#include "../algorithms/linear_congruential_gen.h"
typedef struct SDArchiverHashMapData {
void *value;
void *key;
unsigned int key_size;
void (*value_cleanup_fn)(void *);
void (*key_cleanup_fn)(void *);
} SDArchiverHashMapData;
typedef struct SDArchiverHashMapKeyData {
void *key;
unsigned int key_size;
} SDArchiverHashMapKeyData;
void simple_archiver_hash_map_internal_cleanup_data(void *data) {
SDArchiverHashMapData *hash_map_data = data;
if (hash_map_data->value) {
if (hash_map_data->value_cleanup_fn) {
hash_map_data->value_cleanup_fn(hash_map_data->value);
} else {
free(hash_map_data->value);
}
}
if (hash_map_data->key) {
if (hash_map_data->key_cleanup_fn) {
hash_map_data->key_cleanup_fn(hash_map_data->key);
} else {
free(hash_map_data->key);
}
}
free(data);
}
int simple_archiver_hash_map_internal_pick_in_list(void *data, void *ud) {
SDArchiverHashMapData *hash_map_data = data;
SDArchiverHashMapKeyData *key_data = ud;
return hash_map_data->key_size == key_data->key_size &&
memcmp(hash_map_data->key, key_data->key,
key_data->key_size) == 0
? 1
: 0;
}
unsigned long long simple_archiver_hash_map_internal_key_to_hash(
void *key, unsigned int key_size) {
unsigned long long seed = 0;
unsigned long long temp = 0;
unsigned int count = 0;
for (unsigned int idx = 0; idx < key_size; ++idx) {
temp |= *((unsigned char *)key + count++);
if (count >= 8) {
count = 0;
seed += temp;
temp = 0;
}
}
if (temp != 0) {
seed += temp;
}
return simple_archiver_algo_lcg(seed, SC_ALGO_LCG_DEFAULT_A,
SC_ALGO_LCG_DEFAULT_C);
}
/// Returns 0 on success.
int simple_archiver_hash_map_internal_rehash(SDArchiverHashMap **hash_map) {
if (!hash_map || !*hash_map) {
return 1;
}
SDArchiverHashMap *new_hash_map = malloc(sizeof(SDArchiverHashMap));
new_hash_map->buckets_size = (*hash_map)->buckets_size * 2;
// Pointers have the same size (at least on the same machine), so
// sizeof(void*) should be ok.
new_hash_map->buckets = malloc(sizeof(void *) * new_hash_map->buckets_size);
for (unsigned int idx = 0; idx < new_hash_map->buckets_size; ++idx) {
new_hash_map->buckets[idx] = simple_archiver_list_init();
}
new_hash_map->count = 0;
// Iterate through the old hash map to populate the new hash map.
for (unsigned int bucket_idx = 0; bucket_idx < (*hash_map)->buckets_size;
++bucket_idx) {
SDArchiverLLNode *node = (*hash_map)->buckets[bucket_idx]->head;
while (node) {
node = node->next;
if (node && node != (*hash_map)->buckets[bucket_idx]->tail &&
node->data) {
SDArchiverHashMapData *data = node->data;
simple_archiver_hash_map_insert(&new_hash_map, data->value, data->key,
data->key_size, data->value_cleanup_fn,
data->key_cleanup_fn);
node->data = NULL;
}
}
}
simple_archiver_hash_map_free(hash_map);
*hash_map = new_hash_map;
return 0;
}
SDArchiverHashMap *simple_archiver_hash_map_init(void) {
SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap));
hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE;
// Pointers have the same size (at least on the same machine), so
// sizeof(void*) should be ok.
hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size);
for (unsigned int idx = 0; idx < hash_map->buckets_size; ++idx) {
hash_map->buckets[idx] = simple_archiver_list_init();
}
hash_map->count = 0;
return hash_map;
}
void simple_archiver_hash_map_free(SDArchiverHashMap **hash_map) {
if (hash_map && *hash_map) {
for (unsigned int idx = 0; idx < (*hash_map)->buckets_size; ++idx) {
SDArchiverLinkedList **linked_list = (*hash_map)->buckets + idx;
simple_archiver_list_free(linked_list);
}
free((*hash_map)->buckets);
free(*hash_map);
*hash_map = NULL;
}
}
int simple_archiver_hash_map_insert(SDArchiverHashMap **hash_map, void *value,
void *key, unsigned int key_size,
void (*value_cleanup_fn)(void *),
void (*key_cleanup_fn)(void *)) {
if ((*hash_map)->buckets_size <= (*hash_map)->count) {
simple_archiver_hash_map_internal_rehash(hash_map);
}
SDArchiverHashMapData *data = malloc(sizeof(SDArchiverHashMapData));
data->value = value;
data->key = key;
data->key_size = key_size;
data->value_cleanup_fn = value_cleanup_fn;
data->key_cleanup_fn = key_cleanup_fn;
unsigned long long hash =
simple_archiver_hash_map_internal_key_to_hash(key, key_size) %
(*hash_map)->buckets_size;
int result = simple_archiver_list_add_front(
(*hash_map)->buckets[hash], data,
simple_archiver_hash_map_internal_cleanup_data);
if (result == 0) {
++(*hash_map)->count;
return 0;
} else {
if (value) {
if (value_cleanup_fn) {
value_cleanup_fn(value);
} else {
free(value);
}
}
if (key) {
if (key_cleanup_fn) {
key_cleanup_fn(key);
} else {
free(key);
}
}
free(data);
return 1;
}
}
void *simple_archiver_hash_map_get(SDArchiverHashMap *hash_map, void *key,
unsigned int key_size) {
unsigned long long hash =
simple_archiver_hash_map_internal_key_to_hash(key, key_size) %
hash_map->buckets_size;
SDArchiverLLNode *node = hash_map->buckets[hash]->head;
while (node) {
node = node->next;
if (node && node != hash_map->buckets[hash]->tail && node->data) {
SDArchiverHashMapData *data = node->data;
if (key_size == data->key_size && memcmp(data->key, key, key_size) == 0) {
return data->value;
}
}
}
return NULL;
}
int simple_archiver_hash_map_remove(SDArchiverHashMap *hash_map, void *key,
unsigned int key_size) {
unsigned long long hash =
simple_archiver_hash_map_internal_key_to_hash(key, key_size) %
hash_map->buckets_size;
SDArchiverHashMapKeyData key_data;
key_data.key = key;
key_data.key_size = key_size;
int result = simple_archiver_list_remove(
hash_map->buckets[hash], simple_archiver_hash_map_internal_pick_in_list,
&key_data);
if (result == 1) {
return 0;
} else if (result > 1) {
return 1;
} else {
return 2;
}
}

View file

@ -0,0 +1,54 @@
/*
* Copyright 2024 Stephen Seo
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* `hash_map.h` is the header for a hash map implementation.
*/
#ifndef SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_HASH_MAP_H_
#define SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_HASH_MAP_H_
#define SC_SA_DS_HASH_MAP_START_BUCKET_SIZE 32
#include "linked_list.h"
typedef struct SDArchiverHashMap {
SDArchiverLinkedList **buckets;
unsigned int buckets_size;
unsigned int count;
} SDArchiverHashMap;
SDArchiverHashMap *simple_archiver_hash_map_init(void);
void simple_archiver_hash_map_free(SDArchiverHashMap **hash_map);
/// Returns zero on success.
/// On failure, frees the value and key using the given functions.
/// key must remain valid for the lifetime of its entry in the hash map.
/// If value_cleanup_fn is NULL, then "free" is used instead.
/// If key_cleanup_fn is NULL, then "free" is used instead.
int simple_archiver_hash_map_insert(SDArchiverHashMap **hash_map, void *value,
void *key, unsigned int key_size,
void (*value_cleanup_fn)(void *),
void (*key_cleanup_fn)(void *));
/// Returns NULL if not found.
void *simple_archiver_hash_map_get(SDArchiverHashMap *hash_map, void *key,
unsigned int key_size);
/// Returns zero on success. Returns one if more than one entry was removed.
/// Otherwise returns non-zero and non-one value on error.
int simple_archiver_hash_map_remove(SDArchiverHashMap *hash_map, void *key,
unsigned int key_size);
#endif

View file

@ -84,8 +84,30 @@ int simple_archiver_list_add(SDArchiverLinkedList *list, void *data,
return 0; return 0;
} }
int simple_archiver_list_add_front(SDArchiverLinkedList *list, void *data,
void (*data_free_fn)(void *)) {
if (!list) {
return 1;
}
SDArchiverLLNode *new_node = malloc(sizeof(SDArchiverLLNode));
new_node->data = data;
new_node->data_free_fn = data_free_fn;
new_node->next = list->head->next;
new_node->prev = list->head;
list->head->next->prev = new_node;
list->head->next = new_node;
++list->count;
return 0;
}
int simple_archiver_list_remove(SDArchiverLinkedList *list, int simple_archiver_list_remove(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)) { int (*data_check_fn)(void *, void *),
void *user_data) {
if (!list) { if (!list) {
return 0; return 0;
} }
@ -100,7 +122,7 @@ int simple_archiver_list_remove(SDArchiverLinkedList *list,
} }
iter_removed = 0; iter_removed = 0;
if (node && node != list->tail) { if (node && node != list->tail) {
if (data_check_fn(node->data) != 0) { if (data_check_fn(node->data, user_data) != 0) {
SDArchiverLLNode *temp = node->next; SDArchiverLLNode *temp = node->next;
if (node->data_free_fn) { if (node->data_free_fn) {
@ -125,7 +147,8 @@ int simple_archiver_list_remove(SDArchiverLinkedList *list,
} }
int simple_archiver_list_remove_once(SDArchiverLinkedList *list, int simple_archiver_list_remove_once(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)) { int (*data_check_fn)(void *, void *),
void *user_data) {
if (!list) { if (!list) {
return 0; return 0;
} }
@ -134,7 +157,7 @@ int simple_archiver_list_remove_once(SDArchiverLinkedList *list,
while (node) { while (node) {
node = node->next; node = node->next;
if (node && node != list->tail) { if (node && node != list->tail) {
if (data_check_fn(node->data) != 0) { if (data_check_fn(node->data, user_data) != 0) {
if (node->data_free_fn) { if (node->data_free_fn) {
node->data_free_fn(node->data); node->data_free_fn(node->data);
} else { } else {
@ -156,7 +179,8 @@ int simple_archiver_list_remove_once(SDArchiverLinkedList *list,
} }
void *simple_archiver_list_get(SDArchiverLinkedList *list, void *simple_archiver_list_get(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)) { int (*data_check_fn)(void *, void *),
void *user_data) {
if (!list) { if (!list) {
return NULL; return NULL;
} }
@ -165,7 +189,7 @@ void *simple_archiver_list_get(SDArchiverLinkedList *list,
while (node) { while (node) {
node = node->next; node = node->next;
if (node && node != list->tail) { if (node && node != list->tail) {
if (data_check_fn(node->data) != 0) { if (data_check_fn(node->data, user_data) != 0) {
return node->data; return node->data;
} }
} }

View file

@ -35,26 +35,35 @@ typedef struct SDArchiverLinkedList {
SDArchiverLinkedList *simple_archiver_list_init(void); SDArchiverLinkedList *simple_archiver_list_init(void);
void simple_archiver_list_free(SDArchiverLinkedList **list); void simple_archiver_list_free(SDArchiverLinkedList **list);
/// Returns 0 on success. /// Returns 0 on success. Puts data at the end of the list
/// If data_free_fn is NULL, then "free" is used instead.
int simple_archiver_list_add(SDArchiverLinkedList *list, void *data, int simple_archiver_list_add(SDArchiverLinkedList *list, void *data,
void (*data_free_fn)(void *)); void (*data_free_fn)(void *));
/// Returns 0 on success. Puts data at the front of the list
/// If data_free_fn is NULL, then "free" is used instead.
int simple_archiver_list_add_front(SDArchiverLinkedList *list, void *data,
void (*data_free_fn)(void *));
/// Returns number of removed items. /// Returns number of removed items.
/// data_check_fn must return non-zero if the data passed to it is to be /// data_check_fn must return non-zero if the data passed to it is to be
/// removed. /// removed.
int simple_archiver_list_remove(SDArchiverLinkedList *list, int simple_archiver_list_remove(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)); int (*data_check_fn)(void *, void *),
void *user_data);
/// Returns 1 on removed, 0 if not removed. /// Returns 1 on removed, 0 if not removed.
/// data_check_fn must return non-zero if the data passed to it is to be /// data_check_fn must return non-zero if the data passed to it is to be
/// removed. /// removed.
int simple_archiver_list_remove_once(SDArchiverLinkedList *list, int simple_archiver_list_remove_once(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)); int (*data_check_fn)(void *, void *),
void *user_data);
/// Returns non-null on success. /// Returns non-null on success.
/// data_check_fn must return non-zero if the data passed to it is to be /// data_check_fn must return non-zero if the data passed to it is to be
/// returned. /// returned.
void *simple_archiver_list_get(SDArchiverLinkedList *list, void *simple_archiver_list_get(SDArchiverLinkedList *list,
int (*data_check_fn)(void *)); int (*data_check_fn)(void *, void *),
void *user_data);
#endif #endif

View file

@ -17,8 +17,10 @@
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h> #include <string.h>
#include "hash_map.h"
#include "linked_list.h" #include "linked_list.h"
static int checks_checked = 0; static int checks_checked = 0;
@ -45,11 +47,17 @@ static int checks_passed = 0;
void no_free_fn(__attribute__((unused)) void *unused) { return; } void no_free_fn(__attribute__((unused)) void *unused) { return; }
int get_one_fn(void *data) { return strcmp(data, "one") == 0 ? 1 : 0; } int get_one_fn(void *data, __attribute__((unused)) void *ud) {
return strcmp(data, "one") == 0 ? 1 : 0;
}
int get_two_fn(void *data) { return strcmp(data, "two") == 0 ? 1 : 0; } int get_two_fn(void *data, __attribute__((unused)) void *ud) {
return strcmp(data, "two") == 0 ? 1 : 0;
}
int get_three_fn(void *data) { return strcmp(data, "three") == 0 ? 1 : 0; } int get_three_fn(void *data, __attribute__((unused)) void *ud) {
return strcmp(data, "three") == 0 ? 1 : 0;
}
int main(void) { int main(void) {
// Test LinkedList. // Test LinkedList.
@ -74,28 +82,75 @@ int main(void) {
CHECK_TRUE(list->count == 3); CHECK_TRUE(list->count == 3);
void *ptr = simple_archiver_list_get(list, get_one_fn); void *ptr = simple_archiver_list_get(list, get_one_fn, NULL);
CHECK_TRUE(ptr == one); CHECK_TRUE(ptr == one);
ptr = simple_archiver_list_get(list, get_two_fn); ptr = simple_archiver_list_get(list, get_two_fn, NULL);
CHECK_TRUE(ptr == two); CHECK_TRUE(ptr == two);
ptr = simple_archiver_list_get(list, get_three_fn); ptr = simple_archiver_list_get(list, get_three_fn, NULL);
CHECK_TRUE(ptr == three); CHECK_TRUE(ptr == three);
CHECK_TRUE(simple_archiver_list_remove(list, get_two_fn) == 1); CHECK_TRUE(simple_archiver_list_remove(list, get_two_fn, NULL) == 1);
CHECK_TRUE(list->count == 2); CHECK_TRUE(list->count == 2);
CHECK_TRUE(simple_archiver_list_get(list, get_two_fn) == NULL); CHECK_TRUE(simple_archiver_list_get(list, get_two_fn, NULL) == NULL);
CHECK_TRUE(simple_archiver_list_remove_once(list, get_one_fn) == 1); CHECK_TRUE(simple_archiver_list_remove_once(list, get_one_fn, NULL) == 1);
CHECK_TRUE(list->count == 1); CHECK_TRUE(list->count == 1);
CHECK_TRUE(simple_archiver_list_get(list, get_one_fn) == NULL); CHECK_TRUE(simple_archiver_list_get(list, get_one_fn, NULL) == NULL);
simple_archiver_list_free(&list); simple_archiver_list_free(&list);
CHECK_TRUE(list == NULL); CHECK_TRUE(list == NULL);
} }
// Test HashMap.
{
SDArchiverHashMap *hash_map = simple_archiver_hash_map_init();
simple_archiver_hash_map_free(&hash_map);
hash_map = simple_archiver_hash_map_init();
{
int *value, *key;
for (unsigned int idx = 0; idx < 20; ++idx) {
value = malloc(sizeof(int));
key = malloc(sizeof(int));
*value = idx;
*key = idx;
simple_archiver_hash_map_insert(&hash_map, value, key, sizeof(int),
NULL, NULL);
}
}
int value, key;
void *value_ptr;
for (value = 0, key = 0; value < 20 && key < 20; ++value, ++key) {
value_ptr = simple_archiver_hash_map_get(hash_map, &key, sizeof(int));
CHECK_TRUE(value_ptr != NULL);
CHECK_TRUE(memcmp(value_ptr, &value, sizeof(int)) == 0);
}
key = 5;
simple_archiver_hash_map_remove(hash_map, &key, sizeof(int));
key = 15;
simple_archiver_hash_map_remove(hash_map, &key, sizeof(int));
for (value = 0, key = 0; value < 20 && key < 20; ++value, ++key) {
value_ptr = simple_archiver_hash_map_get(hash_map, &key, sizeof(int));
if (key != 5 && key != 15) {
CHECK_TRUE(value_ptr != NULL);
CHECK_TRUE(memcmp(value_ptr, &value, sizeof(int)) == 0);
} else {
CHECK_TRUE(value_ptr == NULL);
}
}
simple_archiver_hash_map_free(&hash_map);
}
printf("Checks checked: %u\n", checks_checked); printf("Checks checked: %u\n", checks_checked);
printf("Checks passed: %u\n", checks_passed); printf("Checks passed: %u\n", checks_passed);
return checks_passed == checks_checked ? 0 : 1; return checks_passed == checks_checked ? 0 : 1;