From: Stephen Seo Date: Fri, 15 Nov 2024 08:36:07 +0000 (+0900) Subject: Refactor hash-map X-Git-Tag: 1.7~4 X-Git-Url: https://git.seodisparate.com/stephenseo/css/v4-font-face.min.css?a=commitdiff_plain;h=7bdeb049d47027211c6ac0faa18adc2f7ed0412f;p=SimpleArchiver Refactor hash-map Tweaked the default hash function and hash-map-bucket-size. --- diff --git a/src/data_structures/hash_map.c b/src/data_structures/hash_map.c index c292e82..6150e55 100644 --- a/src/data_structures/hash_map.c +++ b/src/data_structures/hash_map.c @@ -76,19 +76,17 @@ int simple_archiver_hash_map_internal_pick_in_list(void *data, void *ud) { uint64_t simple_archiver_hash_default_fn(const void *key, size_t key_size) { uint64_t seed = 0; - uint64_t temp = 0; - size_t count = 0; + uint64_t temp; for (size_t idx = 0; idx < key_size; ++idx) { - temp |= ((uint64_t) * ((uint8_t *)key + idx)) << (8 * count); - ++count; - if (count >= 8) { - count = 0; - seed += temp; - temp = 0; + temp = (uint64_t)(((uint8_t*)key)[idx]) + seed; + if (idx % 3 == 0) { + temp ^= 0xA5A538A5A9B5A5A5; + } else if (idx % 3 == 1) { + temp ^= 0xD7A58BD7A58BD7AA; + } else { + temp ^= 0x8B7A8B8B87CB8B84; } - } - if (temp != 0) { - seed += temp; + seed += simple_archiver_algo_lcg_defaults(temp); } return simple_archiver_algo_lcg_defaults(seed); @@ -106,7 +104,7 @@ int simple_archiver_hash_map_internal_rehash(SDArchiverHashMap *hash_map) { } SDArchiverHashMap new_hash_map; new_hash_map.hash_fn = hash_map->hash_fn; - new_hash_map.buckets_size = hash_map->buckets_size * 2; + new_hash_map.buckets_size = (hash_map->buckets_size - 1) * 2 + 1; // Pointers have the same size (at least on the same machine), so // sizeof(void*) should be ok. new_hash_map.buckets = malloc(sizeof(void *) * new_hash_map.buckets_size); @@ -154,7 +152,7 @@ SDArchiverHashMap *simple_archiver_hash_map_init_custom_hasher( uint64_t (*hash_fn)(const void *, size_t)) { SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap)); hash_map->hash_fn = hash_fn; - hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE; + hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE + 1; // Pointers have the same size (at least on the same machine), so // sizeof(void*) should be ok. hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size); diff --git a/src/data_structures/test.c b/src/data_structures/test.c index cbd0ae4..1992d1c 100644 --- a/src/data_structures/test.c +++ b/src/data_structures/test.c @@ -219,6 +219,59 @@ int main(void) { simple_archiver_hash_map_free(&hash_map); } + // Test hashing. + //{ + // printf("Distribution of 13 over 33...\n"); + // unsigned int counts[33]; + // memset(counts, 0, sizeof(unsigned int) * 33); + + // uint64_t hash; + + // hash = simple_archiver_hash_default_fn("/", 2); + // printf("%s in bucket %lu (%lu)\n", "/", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/faq", 5); + // printf("%s in bucket %lu (%lu)\n", "/faq", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/FAQ", 5); + // printf("%s in bucket %lu (%lu)\n", "/FAQ", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/url", 5); + // printf("%s in bucket %lu (%lu)\n", "/url", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/home", 6); + // printf("%s in bucket %lu (%lu)\n", "/home", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/blog", 6); + // printf("%s in bucket %lu (%lu)\n", "/blog", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/test", 6); + // printf("%s in bucket %lu (%lu)\n", "/test", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/menu", 6); + // printf("%s in bucket %lu (%lu)\n", "/menu", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/posts", 7); + // printf("%s in bucket %lu (%lu)\n", "/posts", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/about", 7); + // printf("%s in bucket %lu (%lu)\n", "/about", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/media", 7); + // printf("%s in bucket %lu (%lu)\n", "/media", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/social", 8); + // printf("%s in bucket %lu (%lu)\n", "/social", hash % 33, hash); + // counts[hash % 33] += 1; + // hash = simple_archiver_hash_default_fn("/projects", 10); + // printf("%s in bucket %lu (%lu)\n", "/projects", hash % 33, hash); + // counts[hash % 33] += 1; + + // for (unsigned int idx = 0; idx < 33; ++idx) { + // printf("Bucket %u: %u\n", idx, counts[idx]); + // } + //} + // Test PriorityHeap. { SDArchiverPHeap *priority_heap = simple_archiver_priority_heap_init();