Compare commits
24 commits
Author | SHA1 | Date | |
---|---|---|---|
ce7400a298 | |||
a58034aa0b | |||
5484da169c | |||
6f59393e0b | |||
7bdeb049d4 | |||
f6f9803439 | |||
b256350fbc | |||
5d67e0dc50 | |||
a415ab22ad | |||
f81d007e7c | |||
14986f5c4a | |||
fb1c24ba2c | |||
9d31798da6 | |||
1c06462ca7 | |||
50bc4a8a07 | |||
df37f68bcf | |||
edcaee803c | |||
50b1f4b274 | |||
cef3e4184a | |||
7ee54bddf2 | |||
9d84b28efe | |||
3dfc258fa4 | |||
1b7bfde458 | |||
04f4897dd3 |
11 changed files with 726 additions and 116 deletions
|
@ -1,7 +1,7 @@
|
||||||
cmake_minimum_required(VERSION 3.7)
|
cmake_minimum_required(VERSION 3.10)
|
||||||
project(SimpleArchiver C)
|
project(SimpleArchiver C)
|
||||||
|
|
||||||
set(SimpleArchiver_VERSION 1.0)
|
set(SimpleArchiver_VERSION 1.7)
|
||||||
|
|
||||||
set(SimpleArchiver_SOURCES
|
set(SimpleArchiver_SOURCES
|
||||||
src/main.c
|
src/main.c
|
||||||
|
|
43
Changelog.md
43
Changelog.md
|
@ -2,6 +2,49 @@
|
||||||
|
|
||||||
## Upcoming Changes
|
## Upcoming Changes
|
||||||
|
|
||||||
|
## Version 1.7
|
||||||
|
|
||||||
|
Refactor the internal hash-map data structure.
|
||||||
|
|
||||||
|
Minor update to CMakeLists.txt.
|
||||||
|
|
||||||
|
## Version 1.6
|
||||||
|
|
||||||
|
Enforce "safe-links" on extraction by ensuring every extracted symlink actually
|
||||||
|
points to a file in the archive. Additionally any extracted symlinks that don't
|
||||||
|
point to a valid destination is removed. This "enforce safe-links on extract"
|
||||||
|
can be disabled with the "--no-safe-links" option.
|
||||||
|
|
||||||
|
Add "--preserve-symlinks" option that will verbatim store the symlinks' target.
|
||||||
|
Not recommended if symlinks are pointing to absolute paths, which will be
|
||||||
|
clobbered on extraction to a different directory unless if "--no-safe-links" is
|
||||||
|
specified on extraction.
|
||||||
|
|
||||||
|
## Version 1.5
|
||||||
|
|
||||||
|
Previous file-format-v1 implementation of "safe links" still created a symlink
|
||||||
|
if a relative or absolute link existed in the file. This version fixes this, and
|
||||||
|
prevents invalid symlinks from being created. (This check is only done if the
|
||||||
|
bit-flag is set in the file as mentioned in the file-format spec for v1 files.)
|
||||||
|
|
||||||
|
## Version 1.4
|
||||||
|
|
||||||
|
Do "safe links" behavior by default: symlinks pointing to outside of archived
|
||||||
|
files (or invalid symlinks) should not be included in the archive, unless if the
|
||||||
|
option "--no-safe-links" is specified. This is supported in both v0 and v1 file
|
||||||
|
formats.
|
||||||
|
|
||||||
|
## Version 1.3
|
||||||
|
|
||||||
|
Prevent `simplearchiver` from busy-waiting during non-blocking IO by sleeping
|
||||||
|
in "EWOULDBLOCK" conditions. This results in less consumed cpu time by the
|
||||||
|
process, especially during compression.
|
||||||
|
|
||||||
|
## Version 1.2
|
||||||
|
|
||||||
|
Proper handling of Ctrl+C (SIGINT). This prevents temporary files from
|
||||||
|
persisting by doing a proper cleanup before stopping the program.
|
||||||
|
|
||||||
## Version 1.1
|
## Version 1.1
|
||||||
|
|
||||||
More robust handling of de/compression process (handling SIGPIPE).
|
More robust handling of de/compression process (handling SIGPIPE).
|
||||||
|
|
|
@ -27,6 +27,8 @@ API calls.
|
||||||
--overwrite-create : allows overwriting an archive file
|
--overwrite-create : allows overwriting an archive file
|
||||||
--overwrite-extract : allows overwriting when extracting
|
--overwrite-extract : allows overwriting when extracting
|
||||||
--no-abs-symlink : do not store absolute paths for symlinks
|
--no-abs-symlink : do not store absolute paths for symlinks
|
||||||
|
--preserve-symlinks : preserve the symlink's path on archive creation instead of deriving abs/relative paths, ignores "--no-abs-symlink" (It is not recommended to use this option, as absolute-path-symlinks may be clobbered on extraction)
|
||||||
|
--no-safe-links : keep symlinks that link to outside archive contents
|
||||||
--temp-files-dir <dir> : where to store temporary files created when compressing (defaults to current working directory)
|
--temp-files-dir <dir> : where to store temporary files created when compressing (defaults to current working directory)
|
||||||
--write-version <version> : Force write version file format (default 1)
|
--write-version <version> : Force write version file format (default 1)
|
||||||
--chunk-min-size <bytes> : v1 file format minimum chunk size (default 4194304 or 4MiB)
|
--chunk-min-size <bytes> : v1 file format minimum chunk size (default 4194304 or 4MiB)
|
||||||
|
@ -38,6 +40,11 @@ API calls.
|
||||||
Note that `--compressor` and `--decompressor` cmds must accept data from stdin
|
Note that `--compressor` and `--decompressor` cmds must accept data from stdin
|
||||||
and return processed data to stdout.
|
and return processed data to stdout.
|
||||||
|
|
||||||
|
## Using the Cosmopolitan-Compiled Version
|
||||||
|
|
||||||
|
Note that on Linux, the `actually_portable_simplearchiver` binaries may attempt
|
||||||
|
to open via Wine (if Wine is installed). [A workaround is mentioned here.](https://github.com/jart/cosmopolitan/blob/master/README.md#linux)
|
||||||
|
|
||||||
## Changes
|
## Changes
|
||||||
|
|
||||||
See the [Changelog](https://git.seodisparate.com/stephenseo/SimpleArchiver/src/branch/main/Changelog.md).
|
See the [Changelog](https://git.seodisparate.com/stephenseo/SimpleArchiver/src/branch/main/Changelog.md).
|
||||||
|
|
|
@ -57,6 +57,11 @@ Following the file-count bytes, the following bytes are added for each file:
|
||||||
2. The second bit is "other execute permission".
|
2. The second bit is "other execute permission".
|
||||||
3. The third bit is UNSET if relative links are preferred, and is SET
|
3. The third bit is UNSET if relative links are preferred, and is SET
|
||||||
if absolute links are preferred.
|
if absolute links are preferred.
|
||||||
|
4. The fourth bit is set if this file/symlink-entry is invalid and must
|
||||||
|
be skipped. Ignore following bytes after these 4 bytes bit-flags in
|
||||||
|
this specification and skip to the next entry; if marked invalid,
|
||||||
|
the following specification bytes for this file/symlink entry must
|
||||||
|
not exist.
|
||||||
3. The third byte.
|
3. The third byte.
|
||||||
1. Currently unused.
|
1. Currently unused.
|
||||||
4. The fourth byte.
|
4. The fourth byte.
|
||||||
|
@ -128,6 +133,9 @@ Following the link-count bytes, the following bytes are added for each symlink:
|
||||||
2. The second byte.
|
2. The second byte.
|
||||||
1. The first bit is "other write permission".
|
1. The first bit is "other write permission".
|
||||||
2. The second bit is "other execute permission".
|
2. The second bit is "other execute permission".
|
||||||
|
3. If this bit is set, then this entry is marked invalid. The link name
|
||||||
|
will be preserved in this entry, but the following link target paths
|
||||||
|
will be set to zero-length and will not be stored.
|
||||||
2. 2 bytes 16-bit unsigned integer "link name" in big-endian. This does not
|
2. 2 bytes 16-bit unsigned integer "link name" in big-endian. This does not
|
||||||
include the NULL at the end of the string. Must not be zero.
|
include the NULL at the end of the string. Must not be zero.
|
||||||
3. X bytes of link-name (length defined by previous value). Is a NULL-terminated
|
3. X bytes of link-name (length defined by previous value). Is a NULL-terminated
|
||||||
|
|
554
src/archiver.c
554
src/archiver.c
File diff suppressed because it is too large
Load diff
|
@ -52,7 +52,8 @@ typedef enum SDArchiverStateReturns {
|
||||||
SDAS_FAILED_TO_CREATE_MAP,
|
SDAS_FAILED_TO_CREATE_MAP,
|
||||||
SDAS_FAILED_TO_EXTRACT_SYMLINK,
|
SDAS_FAILED_TO_EXTRACT_SYMLINK,
|
||||||
SDAS_FAILED_TO_CHANGE_CWD,
|
SDAS_FAILED_TO_CHANGE_CWD,
|
||||||
SDAS_INVALID_WRITE_VERSION
|
SDAS_INVALID_WRITE_VERSION,
|
||||||
|
SDAS_SIGINT
|
||||||
} SDArchiverStateReturns;
|
} SDArchiverStateReturns;
|
||||||
|
|
||||||
/// Returned pointer must not be freed.
|
/// Returned pointer must not be freed.
|
||||||
|
@ -108,4 +109,9 @@ char *simple_archiver_file_abs_path(const char *filename);
|
||||||
/// Returns 5 if "filepath" is NULL.
|
/// Returns 5 if "filepath" is NULL.
|
||||||
int simple_archiver_validate_file_path(const char *filepath);
|
int simple_archiver_validate_file_path(const char *filepath);
|
||||||
|
|
||||||
|
/// Removes links from "links_list" in cwd if it is not valid or does not point
|
||||||
|
/// to a file in "files_map".
|
||||||
|
void simple_archiver_safe_links_enforce(SDArchiverLinkedList *links_list,
|
||||||
|
SDArchiverHashMap *files_map);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -76,19 +76,17 @@ int simple_archiver_hash_map_internal_pick_in_list(void *data, void *ud) {
|
||||||
|
|
||||||
uint64_t simple_archiver_hash_default_fn(const void *key, size_t key_size) {
|
uint64_t simple_archiver_hash_default_fn(const void *key, size_t key_size) {
|
||||||
uint64_t seed = 0;
|
uint64_t seed = 0;
|
||||||
uint64_t temp = 0;
|
uint64_t temp;
|
||||||
size_t count = 0;
|
|
||||||
for (size_t idx = 0; idx < key_size; ++idx) {
|
for (size_t idx = 0; idx < key_size; ++idx) {
|
||||||
temp |= ((uint64_t) * ((uint8_t *)key + idx)) << (8 * count);
|
temp = (uint64_t)(((uint8_t*)key)[idx]) + seed;
|
||||||
++count;
|
if (idx % 3 == 0) {
|
||||||
if (count >= 8) {
|
temp ^= 0xA5A538A5A9B5A5A5;
|
||||||
count = 0;
|
} else if (idx % 3 == 1) {
|
||||||
seed += temp;
|
temp ^= 0xD7A58BD7A58BD7AA;
|
||||||
temp = 0;
|
} else {
|
||||||
|
temp ^= 0x8B7A8B8B87CB8B84;
|
||||||
}
|
}
|
||||||
}
|
seed += simple_archiver_algo_lcg_defaults(temp);
|
||||||
if (temp != 0) {
|
|
||||||
seed += temp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return simple_archiver_algo_lcg_defaults(seed);
|
return simple_archiver_algo_lcg_defaults(seed);
|
||||||
|
@ -106,7 +104,7 @@ int simple_archiver_hash_map_internal_rehash(SDArchiverHashMap *hash_map) {
|
||||||
}
|
}
|
||||||
SDArchiverHashMap new_hash_map;
|
SDArchiverHashMap new_hash_map;
|
||||||
new_hash_map.hash_fn = hash_map->hash_fn;
|
new_hash_map.hash_fn = hash_map->hash_fn;
|
||||||
new_hash_map.buckets_size = hash_map->buckets_size * 2;
|
new_hash_map.buckets_size = (hash_map->buckets_size - 1) * 2 + 1;
|
||||||
// Pointers have the same size (at least on the same machine), so
|
// Pointers have the same size (at least on the same machine), so
|
||||||
// sizeof(void*) should be ok.
|
// sizeof(void*) should be ok.
|
||||||
new_hash_map.buckets = malloc(sizeof(void *) * new_hash_map.buckets_size);
|
new_hash_map.buckets = malloc(sizeof(void *) * new_hash_map.buckets_size);
|
||||||
|
@ -154,7 +152,7 @@ SDArchiverHashMap *simple_archiver_hash_map_init_custom_hasher(
|
||||||
uint64_t (*hash_fn)(const void *, size_t)) {
|
uint64_t (*hash_fn)(const void *, size_t)) {
|
||||||
SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap));
|
SDArchiverHashMap *hash_map = malloc(sizeof(SDArchiverHashMap));
|
||||||
hash_map->hash_fn = hash_fn;
|
hash_map->hash_fn = hash_fn;
|
||||||
hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE;
|
hash_map->buckets_size = SC_SA_DS_HASH_MAP_START_BUCKET_SIZE + 1;
|
||||||
// Pointers have the same size (at least on the same machine), so
|
// Pointers have the same size (at least on the same machine), so
|
||||||
// sizeof(void*) should be ok.
|
// sizeof(void*) should be ok.
|
||||||
hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size);
|
hash_map->buckets = malloc(sizeof(void *) * hash_map->buckets_size);
|
||||||
|
|
|
@ -219,6 +219,59 @@ int main(void) {
|
||||||
simple_archiver_hash_map_free(&hash_map);
|
simple_archiver_hash_map_free(&hash_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test hashing.
|
||||||
|
//{
|
||||||
|
// printf("Distribution of 13 over 33...\n");
|
||||||
|
// unsigned int counts[33];
|
||||||
|
// memset(counts, 0, sizeof(unsigned int) * 33);
|
||||||
|
|
||||||
|
// uint64_t hash;
|
||||||
|
|
||||||
|
// hash = simple_archiver_hash_default_fn("/", 2);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/faq", 5);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/faq", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/FAQ", 5);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/FAQ", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/url", 5);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/url", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/home", 6);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/home", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/blog", 6);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/blog", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/test", 6);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/test", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/menu", 6);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/menu", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/posts", 7);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/posts", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/about", 7);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/about", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/media", 7);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/media", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/social", 8);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/social", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
// hash = simple_archiver_hash_default_fn("/projects", 10);
|
||||||
|
// printf("%s in bucket %lu (%lu)\n", "/projects", hash % 33, hash);
|
||||||
|
// counts[hash % 33] += 1;
|
||||||
|
|
||||||
|
// for (unsigned int idx = 0; idx < 33; ++idx) {
|
||||||
|
// printf("Bucket %u: %u\n", idx, counts[idx]);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
// Test PriorityHeap.
|
// Test PriorityHeap.
|
||||||
{
|
{
|
||||||
SDArchiverPHeap *priority_heap = simple_archiver_priority_heap_init();
|
SDArchiverPHeap *priority_heap = simple_archiver_priority_heap_init();
|
||||||
|
|
16
src/parser.c
16
src/parser.c
|
@ -168,6 +168,15 @@ void simple_archiver_print_usage(void) {
|
||||||
fprintf(stderr, "--overwrite-extract : allows overwriting when extracting\n");
|
fprintf(stderr, "--overwrite-extract : allows overwriting when extracting\n");
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"--no-abs-symlink : do not store absolute paths for symlinks\n");
|
"--no-abs-symlink : do not store absolute paths for symlinks\n");
|
||||||
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"--preserve-symlinks : preserve the symlink's path on archive creation "
|
||||||
|
"instead of deriving abs/relative paths, ignores \"--no-abs-symlink\" "
|
||||||
|
"(It is not recommended to use this option, as absolute-path-symlinks "
|
||||||
|
"may be clobbered on extraction)\n");
|
||||||
|
fprintf(stderr,
|
||||||
|
"--no-safe-links : keep symlinks that link to outside archive "
|
||||||
|
"contents\n");
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"--temp-files-dir <dir> : where to store temporary files created "
|
"--temp-files-dir <dir> : where to store temporary files created "
|
||||||
"when compressing (defaults to current working directory)\n");
|
"when compressing (defaults to current working directory)\n");
|
||||||
|
@ -303,6 +312,13 @@ int simple_archiver_parse_args(int argc, const char **argv,
|
||||||
out->flags |= 0x8;
|
out->flags |= 0x8;
|
||||||
} else if (strcmp(argv[0], "--no-abs-symlink") == 0) {
|
} else if (strcmp(argv[0], "--no-abs-symlink") == 0) {
|
||||||
out->flags |= 0x20;
|
out->flags |= 0x20;
|
||||||
|
} else if (strcmp(argv[0], "--preserve-symlinks") == 0) {
|
||||||
|
out->flags |= 0x100;
|
||||||
|
} else if (strcmp(argv[0], "--no-safe-links") == 0) {
|
||||||
|
out->flags |= 0x80;
|
||||||
|
fprintf(stderr,
|
||||||
|
"NOTICE: Disabling safe-links, symlinks that point to outside "
|
||||||
|
"archived files will be preserved!\n");
|
||||||
} else if (strcmp(argv[0], "--temp-files-dir") == 0) {
|
} else if (strcmp(argv[0], "--temp-files-dir") == 0) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
fprintf(stderr, "ERROR: --temp-files-dir is missing an argument!\n");
|
fprintf(stderr, "ERROR: --temp-files-dir is missing an argument!\n");
|
||||||
|
|
|
@ -36,6 +36,8 @@ typedef struct SDArchiverParsed {
|
||||||
/// 0b xxx1 xxxx - Create archive to stdout or read archive from stdin.
|
/// 0b xxx1 xxxx - Create archive to stdout or read archive from stdin.
|
||||||
/// 0b xx1x xxxx - Do not save absolute paths for symlinks.
|
/// 0b xx1x xxxx - Do not save absolute paths for symlinks.
|
||||||
/// 0b x1xx xxxx - Sort files by size before archiving.
|
/// 0b x1xx xxxx - Sort files by size before archiving.
|
||||||
|
/// 0b 1xxx xxxx - No safe links.
|
||||||
|
/// 0b xxxx xxx1 xxxx xxxx - Preserve symlink target.
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
/// Null-terminated string.
|
/// Null-terminated string.
|
||||||
char *filename;
|
char *filename;
|
||||||
|
|
|
@ -256,6 +256,11 @@ int main(void) {
|
||||||
CHECK_STREQ(rel_path, "../other/dir/");
|
CHECK_STREQ(rel_path, "../other/dir/");
|
||||||
simple_archiver_helper_cleanup_c_string(&rel_path);
|
simple_archiver_helper_cleanup_c_string(&rel_path);
|
||||||
|
|
||||||
|
rel_path = simple_archiver_filenames_to_relative_path(
|
||||||
|
"/one/two/three/", "/one/two/three/four");
|
||||||
|
CHECK_STREQ(rel_path, "four");
|
||||||
|
simple_archiver_helper_cleanup_c_string(&rel_path);
|
||||||
|
|
||||||
CHECK_FALSE(simple_archiver_validate_file_path("Local/Path"));
|
CHECK_FALSE(simple_archiver_validate_file_path("Local/Path"));
|
||||||
CHECK_TRUE(simple_archiver_validate_file_path("/Abs/Path"));
|
CHECK_TRUE(simple_archiver_validate_file_path("/Abs/Path"));
|
||||||
CHECK_TRUE(simple_archiver_validate_file_path("Local/../../not/really"));
|
CHECK_TRUE(simple_archiver_validate_file_path("Local/../../not/really"));
|
||||||
|
|
Loading…
Reference in a new issue