Compare commits

..

5 commits

Author SHA1 Message Date
ce7400a298 Update Changelog.md
All checks were successful
Build for Releases / ensure-release-exists (push) Successful in 2s
Build for Releases / push-build-x86_64 (push) Successful in 6s
Run Unit Tests / build-and-run-unit-tests (push) Successful in 1m11s
Build for Releases / push-build-aarch64 (push) Successful in 59s
Build for Releases / push-build-x86_64_debian (push) Successful in 35s
Build for Releases / push-build-aarch64_debian (push) Successful in 5m13s
2024-11-15 17:39:10 +09:00
a58034aa0b Bump CMakeLists.txt cmake_minimum_required version 2024-11-15 17:38:38 +09:00
5484da169c Update CMakeLists.txt (version 1.7) 2024-11-15 17:37:23 +09:00
6f59393e0b Update Changelog.md, version 1.7 2024-11-15 17:37:06 +09:00
7bdeb049d4 Refactor hash-map
Tweaked the default hash function and hash-map-bucket-size.
2024-11-15 17:36:07 +09:00
4 changed files with 72 additions and 15 deletions

View file

@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.7)
cmake_minimum_required(VERSION 3.10)
project(SimpleArchiver C)
set(SimpleArchiver_VERSION 1.0)
set(SimpleArchiver_VERSION 1.7)
set(SimpleArchiver_SOURCES
src/main.c

View file

@ -2,6 +2,12 @@
## Upcoming Changes
## Version 1.7
Refactor the internal hash-map data structure.
Minor update to CMakeLists.txt.
## Version 1.6
Enforce "safe-links" on extraction by ensuring every extracted symlink actually

View file

@ -76,19 +76,17 @@ int simple_archiver_hash_map_internal_pick_in_list(void *data, void *ud) {
uint64_t simple_archiver_hash_default_fn(const void *key, size_t key_size) {
uint64_t seed = 0;
uint64_t temp = 0;
size_t count = 0;
uint64_t temp;
for (size_t idx = 0; idx < key_size; ++idx) {
temp |= ((uint64_t) * ((uint8_t *)key + idx)) << (8 * count);
++count;
if (count >= 8) {
count = 0;
seed += temp;
temp = 0;
temp = (uint64_t)(((uint8_t*)key)[idx]) + seed;
if (idx % 3 == 0) {
temp ^= 0xA5A538A5A9B5A5A5;
} else if (idx % 3 == 1) {
temp ^= 0xD7A58BD7A58BD7AA;
} else {
temp ^= 0x8B7A8B8B87CB8B84;
}
}
if (temp != 0) {
seed += temp;
seed += simple_archiver_algo_lcg_defaults(temp);
}
return simple_archiver_algo_lcg_defaults(seed);
@ -106,7 +104,7 @@ int simple_archiver_hash_map_internal_rehash(SDArchiverHashMap *hash_map) {
}
SDArchiverHashMap new_hash_map;
new_hash_map.hash_fn = hash_map->hash_fn;
new_hash_map.buckets_size = hash_map->buckets_size * 2;
new_hash_map.buckets_size = (hash_map->buckets_size - 1) * 2 + 1;
// Pointers have the same size (at least on the same machine), so
// sizeof(void*) should be ok.
new_hash_map.buckets = malloc(sizeof(void *) * new_hash_map.buckets_size);
@ -154,7 +152,7 @@ SDArchiverHashMap *simple_archiver_hash_map_init_custom_hasher(
uint64_t (*hash_fn)(const void *, size_t)) {
SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap));
hash_map->hash_fn = hash_fn;
hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE;
hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE + 1;
// Pointers have the same size (at least on the same machine), so
// sizeof(void*) should be ok.
hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size);

View file

@ -219,6 +219,59 @@ int main(void) {
simple_archiver_hash_map_free(&hash_map);
}
// Test hashing.
//{
// printf("Distribution of 13 over 33...\n");
// unsigned int counts[33];
// memset(counts, 0, sizeof(unsigned int) * 33);
// uint64_t hash;
// hash = simple_archiver_hash_default_fn("/", 2);
// printf("%s in bucket %lu (%lu)\n", "/", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/faq", 5);
// printf("%s in bucket %lu (%lu)\n", "/faq", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/FAQ", 5);
// printf("%s in bucket %lu (%lu)\n", "/FAQ", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/url", 5);
// printf("%s in bucket %lu (%lu)\n", "/url", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/home", 6);
// printf("%s in bucket %lu (%lu)\n", "/home", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/blog", 6);
// printf("%s in bucket %lu (%lu)\n", "/blog", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/test", 6);
// printf("%s in bucket %lu (%lu)\n", "/test", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/menu", 6);
// printf("%s in bucket %lu (%lu)\n", "/menu", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/posts", 7);
// printf("%s in bucket %lu (%lu)\n", "/posts", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/about", 7);
// printf("%s in bucket %lu (%lu)\n", "/about", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/media", 7);
// printf("%s in bucket %lu (%lu)\n", "/media", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/social", 8);
// printf("%s in bucket %lu (%lu)\n", "/social", hash % 33, hash);
// counts[hash % 33] += 1;
// hash = simple_archiver_hash_default_fn("/projects", 10);
// printf("%s in bucket %lu (%lu)\n", "/projects", hash % 33, hash);
// counts[hash % 33] += 1;
// for (unsigned int idx = 0; idx < 33; ++idx) {
// printf("Bucket %u: %u\n", idx, counts[idx]);
// }
//}
// Test PriorityHeap.
{
SDArchiverPHeap *priority_heap = simple_archiver_priority_heap_init();