From 5afefb949611b0c194241650a4cd31428bbec4bb Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Thu, 26 Sep 2024 17:31:06 +0900 Subject: [PATCH 01/37] Add compiler hardening flags --- CMakeLists.txt | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ecbcee..6bed693 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,49 @@ endif() add_executable(simplearchiver ${SimpleArchiver_SOURCES}) +target_compile_options(simplearchiver PUBLIC + -Wall -Wformat -Wformat=2 -Wconversion -Wimplicit-fallthrough + -Werror=format-security + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 + -D_GLIBCXX_ASSERTIONS + -fstrict-flex-arrays=3 + -fstack-clash-protection -fstack-protector-strong + -Wl,-z,nodlopen -Wl,-z,noexecstack + -Wl,-z,relro -Wl,-z,now + -Wl,--as-needed -Wl,--no-copy-dt-needed-entries + -fPIE -pie +) + +target_link_options(simplearchiver PUBLIC + -Wall -Wformat -Wformat=2 -Wconversion -Wimplicit-fallthrough + -Werror=format-security + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 + -D_GLIBCXX_ASSERTIONS + -fstrict-flex-arrays=3 + -fstack-clash-protection -fstack-protector-strong + -Wl,-z,nodlopen -Wl,-z,noexecstack + -Wl,-z,relro -Wl,-z,now + -Wl,--as-needed -Wl,--no-copy-dt-needed-entries + -fPIE -pie +) + +# Inhibit format-string-related warning in src/archiver.c . +set_source_files_properties(src/archiver.c + PROPERTIES + COMPILE_FLAGS -Wno-format-nonliteral +) + +if(CMAKE_BUILD_TYPE STREQUAL "Release") + target_compile_options(simplearchiver PUBLIC + -fno-delete-null-pointer-checks -fno-strict-overflow + -fno-strict-aliasing -ftrivial-auto-var-init=zero + ) + target_link_options(simplearchiver PUBLIC + -fno-delete-null-pointer-checks -fno-strict-overflow + -fno-strict-aliasing -ftrivial-auto-var-init=zero + ) +endif() + add_executable(test_datastructures src/data_structures/test.c src/data_structures/linked_list.c From a81ec4434a94e16d8576a1f609a6d7856baec8d6 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Sat, 28 Sep 2024 19:07:12 +0900 Subject: [PATCH 02/37] Fix potential NULL-ptr dereference --- src/archiver.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/archiver.c b/src/archiver.c index a993b18..1cd5e29 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1259,6 +1259,8 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, } uc_heap_buf[u16 - 1] = 0; fprintf(stderr, "Decompressor cmd: %s\n", uc_heap_buf); + decompressor_cmd = heap_buf; + heap_buf = NULL; } } else { fprintf(stderr, "De/compressor flag is NOT set.\n"); From b098fd6d694906855dac35fae4aea122011d4877 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 19:16:29 +0900 Subject: [PATCH 03/37] Error if "-C " where "dir" doesn't exist --- src/main.c | 9 +++++---- src/parser.c | 19 ++++++++++++++++++- src/parser.h | 10 +++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/main.c b/src/main.c index fef432f..71c59db 100644 --- a/src/main.c +++ b/src/main.c @@ -67,12 +67,13 @@ int main(int argc, const char **argv) { } } + SDArchiverParsedStatus parsed_status; __attribute__((cleanup(simple_archiver_list_free))) SDArchiverLinkedList *filenames = - simple_archiver_parsed_to_filenames(&parsed); - if (!filenames) { - fprintf(stderr, - "ERROR: Failed to resolve filenames from positional arguments!\n"); + simple_archiver_parsed_to_filenames(&parsed, &parsed_status); + if (!filenames || parsed_status != SDAPS_SUCCESS) { + fprintf(stderr, "ERROR: %s!\n", + simple_archiver_parsed_status_to_str(parsed_status)); return 8; } diff --git a/src/parser.c b/src/parser.c index a69dfbf..5da4653 100644 --- a/src/parser.c +++ b/src/parser.c @@ -131,6 +131,17 @@ int list_remove_same_str_fn(void *data, void *ud) { return 0; } +char *simple_archiver_parsed_status_to_str(SDArchiverParsedStatus status) { + switch (status) { + case SDAPS_SUCCESS: + return "Success"; + case SDAPS_NO_USER_CWD: + return "No user current working directory (-C )"; + default: + return "Unknown error"; + } +} + void simple_archiver_print_usage(void) { fprintf(stderr, "Usage flags:\n"); fprintf(stderr, "-c : create archive file\n"); @@ -367,7 +378,7 @@ void simple_archiver_free_parsed(SDArchiverParsed *parsed) { } SDArchiverLinkedList *simple_archiver_parsed_to_filenames( - const SDArchiverParsed *parsed) { + const SDArchiverParsed *parsed, SDArchiverParsedStatus *status_out) { SDArchiverLinkedList *files_list = simple_archiver_list_init(); __attribute__((cleanup(simple_archiver_hash_map_free))) SDArchiverHashMap *hash_map = simple_archiver_hash_map_init(); @@ -381,6 +392,9 @@ SDArchiverLinkedList *simple_archiver_parsed_to_filenames( original_cwd = realpath(".", NULL); if (chdir(parsed->user_cwd)) { simple_archiver_list_free(&files_list); + if (status_out) { + *status_out = SDAPS_NO_USER_CWD; + } return NULL; } } @@ -607,5 +621,8 @@ SDArchiverLinkedList *simple_archiver_parsed_to_filenames( } } + if (status_out) { + *status_out = SDAPS_SUCCESS; + } return files_list; } diff --git a/src/parser.h b/src/parser.h index 008e800..ad2e472 100644 --- a/src/parser.h +++ b/src/parser.h @@ -59,6 +59,14 @@ typedef struct SDArchiverFileInfo { char *link_dest; } SDArchiverFileInfo; +typedef enum SDArchiverParsedStatus { + SDAPS_SUCCESS, + SDAPS_NO_USER_CWD, +} SDArchiverParsedStatus; + +/// Returned c-string does not need to be free'd. +char *simple_archiver_parsed_status_to_str(SDArchiverParsedStatus status); + void simple_archiver_print_usage(void); SDArchiverParsed simple_archiver_create_parsed(void); @@ -74,6 +82,6 @@ void simple_archiver_free_parsed(SDArchiverParsed *parsed); /// Each entry in the linked list is an SDArchiverFileInfo object. SDArchiverLinkedList *simple_archiver_parsed_to_filenames( - const SDArchiverParsed *parsed); + const SDArchiverParsed *parsed, SDArchiverParsedStatus *status_out); #endif From efde02b4abab58ae9a7814d8db7673dc9f6b8058 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Wed, 2 Oct 2024 14:52:05 +0900 Subject: [PATCH 04/37] backport: Fixes for v0-file-format extract fn --- src/archiver.c | 185 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 164 insertions(+), 21 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 1cd5e29..67f38cf 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1325,6 +1325,17 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, } } else { skip = 0; + int fd = open((const char *)buf, O_RDONLY | O_NOFOLLOW); + if (fd == -1) { + if (errno == ELOOP) { + // Is an existing symbolic file. + unlink((const char *)buf); + } + } else { + close(fd); + // Is an existing file. + unlink((const char *)buf); + } } if (!skip) { out_f_name = malloc(strlen((const char *)buf) + 1); @@ -1334,17 +1345,17 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, } else { __attribute__(( cleanup(simple_archiver_helper_cleanup_malloced))) void *heap_buf = - malloc(u16 + 1); + malloc((uint32_t)u16 + 1); uint8_t *uc_heap_buf = heap_buf; - if (fread(uc_heap_buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { + if (fread(uc_heap_buf, 1, (uint32_t)u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - uc_heap_buf[u16 - 1] = 0; + uc_heap_buf[u16] = 0; fprintf(stderr, " Filename: %s\n", uc_heap_buf); if (do_extract) { if ((state->parsed->flags & 0x8) == 0) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) - FILE *test_fd = fopen((const char *)buf, "rb"); + FILE *test_fd = fopen((const char *)uc_heap_buf, "rb"); if (test_fd) { skip = 1; fprintf(stderr, @@ -1355,10 +1366,22 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, } } else { skip = 0; + int fd = open((const char *)uc_heap_buf, O_RDONLY | O_NOFOLLOW); + if (fd == -1) { + if (errno == ELOOP) { + // Is an existing symbolic file. + unlink((const char *)uc_heap_buf); + } + } else { + close(fd); + // Is an existing file. + unlink((const char *)uc_heap_buf); + } } if (!skip) { - out_f_name = malloc(strlen((const char *)buf) + 1); - memcpy(out_f_name, buf, strlen((const char *)buf) + 1); + out_f_name = malloc(strlen((const char *)uc_heap_buf) + 1); + memcpy(out_f_name, uc_heap_buf, + strlen((const char *)uc_heap_buf) + 1); } } } @@ -1481,6 +1504,12 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_make_dirs((const char *)out_f_name); out_f = fopen(out_f_name, "wb"); + if (!out_f) { + fprintf(stderr, + "WARNING: Failed to open \"%s\" for writing! (No write " + "permissions?)\n", + (char *)out_f_name); + } __attribute__(( cleanup(cleanup_temp_filename_delete))) void **ptrs_array = malloc(sizeof(void *) * 2); @@ -1489,7 +1518,7 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - if (is_compressed) { + if (is_compressed && out_f) { // Handle SIGPIPE. signal(SIGPIPE, handle_sig_pipe); @@ -1697,10 +1726,12 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, // Error. return SDAS_INTERNAL_ERROR; } - fwrite(buf, 1, fread_ret, out_f); - if (ferror(out_f)) { - // Error. - return SDAS_INTERNAL_ERROR; + if (out_f) { + fwrite(buf, 1, fread_ret, out_f); + if (ferror(out_f)) { + // Error. + return SDAS_INTERNAL_ERROR; + } } compressed_file_size -= fread_ret; } else { @@ -1709,10 +1740,12 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, // Error. return SDAS_INTERNAL_ERROR; } - fwrite(buf, 1, fread_ret, out_f); - if (ferror(out_f)) { - // Error. - return SDAS_INTERNAL_ERROR; + if (out_f) { + fwrite(buf, 1, fread_ret, out_f); + if (ferror(out_f)) { + // Error. + return SDAS_INTERNAL_ERROR; + } } compressed_file_size -= fread_ret; } @@ -1725,7 +1758,9 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, } ptrs_array[0] = NULL; - fprintf(stderr, " Extracted.\n"); + if (out_f) { + fprintf(stderr, " Extracted.\n"); + } #endif } else { while (u64 != 0) { @@ -1804,26 +1839,108 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " Link relative path: %s\n", (char *)rel_path); } - if (do_extract) { + if (do_extract && !skip) { simple_archiver_helper_make_dirs((const char *)out_f_name); if (abs_path && rel_path) { if (abs_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - int ret = symlink(abs_path, out_f_name); + int_fast8_t retry_symlink = 0; + int ret; + V0_SYMLINK_CREATE_RETRY_0: + ret = symlink(abs_path, out_f_name); if (ret == -1) { - return SDAS_FAILED_TO_EXTRACT_SYMLINK; + if (retry_symlink) { + fprintf(stderr, + "WARNING: Failed to create symlink after removing " + "existing symlink!\n"); + goto V0_SYMLINK_CREATE_AFTER_0; + } else if (errno == EEXIST) { + if ((state->parsed->flags & 8) == 0) { + fprintf( + stderr, + "WARNING: Symlink already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + goto V0_SYMLINK_CREATE_AFTER_0; + } else { + fprintf(stderr, + "NOTICE: Symlink already exists and " + "\"--overwrite-extract\" specified, attempting to " + "overwrite...\n"); + unlink(out_f_name); + retry_symlink = 1; + goto V0_SYMLINK_CREATE_RETRY_0; + } + } else { + return SDAS_FAILED_TO_EXTRACT_SYMLINK; + } } + ret = fchmodat(AT_FDCWD, out_f_name, permissions, + AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not " + "supported by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } + V0_SYMLINK_CREATE_AFTER_0: + retry_symlink = 1; #endif } else { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - int ret = symlink(rel_path, out_f_name); + int_fast8_t retry_symlink = 0; + int ret; + V0_SYMLINK_CREATE_RETRY_1: + ret = symlink(rel_path, out_f_name); if (ret == -1) { - return SDAS_FAILED_TO_EXTRACT_SYMLINK; + if (retry_symlink) { + fprintf(stderr, + "WARNING: Failed to create symlink after removing " + "existing symlink!\n"); + goto V0_SYMLINK_CREATE_AFTER_1; + } else if (errno == EEXIST) { + if ((state->parsed->flags & 8) == 0) { + fprintf( + stderr, + "WARNING: Symlink already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + goto V0_SYMLINK_CREATE_AFTER_1; + } else { + fprintf(stderr, + "NOTICE: Symlink already exists and " + "\"--overwrite-extract\" specified, attempting to " + "overwrite...\n"); + unlink(out_f_name); + retry_symlink = 1; + goto V0_SYMLINK_CREATE_RETRY_1; + } + } else { + return SDAS_FAILED_TO_EXTRACT_SYMLINK; + } } + ret = fchmodat(AT_FDCWD, out_f_name, permissions, + AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not " + "supported by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } + V0_SYMLINK_CREATE_AFTER_1: + retry_symlink = 1; #endif } } else if (abs_path) { @@ -1834,6 +1951,19 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, if (ret == -1) { return SDAS_FAILED_TO_EXTRACT_SYMLINK; } + ret = + fchmodat(AT_FDCWD, out_f_name, permissions, AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not supported " + "by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } #endif } else if (rel_path) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ @@ -1843,6 +1973,19 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, if (ret == -1) { return SDAS_FAILED_TO_EXTRACT_SYMLINK; } + ret = + fchmodat(AT_FDCWD, out_f_name, permissions, AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not supported " + "by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } #endif } else { fprintf( From f76e383e785675a52a6e3c53fb2956b11babd9ac Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 24 Sep 2024 18:43:47 +0900 Subject: [PATCH 05/37] Create file format for format version 1 This is in preparation of improving compression by concatenating files together before compressing them to reduce the per-file overhead. --- file_format.md | 102 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/file_format.md b/file_format.md index fb9c210..2f4831e 100644 --- a/file_format.md +++ b/file_format.md @@ -76,3 +76,105 @@ Following the file-count bytes, the following bytes are added for each file: 1. 8 bytes 64-bit unsigned integer "size of filename in this archive file" in big-endian. 2. X bytes file data (length defined by previous value). + +## Format Version 1 + +File extension is "*.simplearchive" but this isn't really checked. + +First 18 bytes of file will be (in ascii): + + SIMPLE_ARCHIVE_VER + +Next 2 bites is a 16-bit unsigned integer "version" in big-endian. It will be: + + 0x00 0x01 + +Next 4 bytes are bit-flags. + +1. The first byte + 1. The first bit is set if de/compressor is set for this archive. + +The remaining unused flags in the previous bit-flags bytes are reserved for +future revisions and are currently ignored. + +If the previous "de/compressor is set" flag is enabled, then the next section is +added: + +1. 2 bytes is 16-bit unsigned integer "compressor cmd+args" in big-endian. This + does not include the NULL at the end of the string. +2. X bytes of "compressor cmd+args" (length defined by previous value). Is a + NULL-terminated string. +3. 2 bytes is 16-bit unsigned integer "decompressor cmd+args" in big-endian. + This does not include the NULL at the end of the string. +4. X bytes of "decompressor cmd+args" (length defined by previous value). Is a + NULL-terminated string. + +The next 4 bytes is a 32-bit unsigned integer "link count" in big-endian which +will indicate the number of symbolic links in this archive. + +Following the link-count bytes, the following bytes are added for each symlink: + +1. 2 bytes bit-flags: + 1. The first byte. + 1. The first bit is UNSET if relative links are preferred, and is SET if + absolute links are preferred. + 2. The second byte. + 1. Currently unused. +2. 2 bytes is 16-bit unsigned integer "link target absolute path" in + big-endian. This does not include the NULL at the end of the string. +3. X bytes of link-target-absolute-path (length defined by previous value). + Is a NULL-terminated string. If the previous "size" value is 0, then + this entry does not exist and should be skipped. +4. 2 bytes is 16-bit unsigned integer "link target relative path" in + big-endian. This does not include the NULL at the end of the string. +5. X bytes of link-target-relative-path (length defined by previous value). + Is a NULL-terminated string. If the previous "size" value is 0, then + this entry does not exist and should be skipped. + +After the symlink related data, the next 4 bytes is a 32-bit unsigned integer +"chunk count" in big-endian which will indicate the number of chunks in this +archive. + +Following the chunk-count bytes, the following bytes are added for each chunk: + +1. 2 bytes that are a 16-bit unsigned integer "file count" in big-endian. + +The following bytes are added for each file within each chunk: + +1. 2 bytes that are a 16-bit unsigned integer "filename length" in big-endian. + This does not include the NULL at the end of the string. +2. X bytes of filename (length defined by previous value). Is a NULL-terminated + string. +3. 4 bytes bit-flags. + 1. The first byte. + 1. The first bit is "user read permission". + 2. The second bit is "user write permission". + 3. The third bit is "user execute permission". + 4. The fourth bit is "group read permission". + 5. The fifth bit is "group write permission". + 6. The sixth bit is "group execute permission". + 7. The seventh bit is "other read permission". + 8. The eighth bit is "other write permission". + 2. The second byte. + 1. The first bit is "other execute permission". + 3. The third byte. + 1. Currently unused. + 4. The fourth byte. + 1. Currently unused. +4. Two 4-byte unsigned integers in big-endian for UID and GID. + 1. A 32-bit unsigned integer in big endian that specifies the UID of the + file. Note that during extraction, if the user is not root, then this + value will be ignored. + 2. A 32-bit unsigned integer in big endian that specifies the GID of the + file. Note that during extraction, if the user is not root, then this + value will be ignored. +5. A 64-bit unsigned integer in big endian for the "size of file". + +After the files' metadata are the current chunk's data: + +1. A 64-bit unsigned integer in big endian for the "size of chunk". +2. X bytes of data for the current chunk of the previously specified size. If + not using de/compressor, this section is the previously mentioned files + concatenated with each other. If using de/compressor, this section is the + previously mentioned files concatenated and compressed into a single blob of + data. From a7aa31fc89d458b97283270c07f32b55e118c759 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 24 Sep 2024 18:48:15 +0900 Subject: [PATCH 06/37] Fix typo in file format specification version 1 --- file_format.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/file_format.md b/file_format.md index 2f4831e..9597e1b 100644 --- a/file_format.md +++ b/file_format.md @@ -85,7 +85,7 @@ First 18 bytes of file will be (in ascii): SIMPLE_ARCHIVE_VER -Next 2 bites is a 16-bit unsigned integer "version" in big-endian. It will be: +Next 2 bytes is a 16-bit unsigned integer "version" in big-endian. It will be: 0x00 0x01 From d625c1b1cb17188202ac7819803dc246ce3f1976 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 24 Sep 2024 18:54:38 +0900 Subject: [PATCH 07/37] Fix typo --- file_format.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/file_format.md b/file_format.md index 9597e1b..6558f57 100644 --- a/file_format.md +++ b/file_format.md @@ -139,7 +139,7 @@ Following the chunk-count bytes, the following bytes are added for each chunk: 1. 2 bytes that are a 16-bit unsigned integer "file count" in big-endian. -The following bytes are added for each file within each chunk: +The following bytes are added for each file within the current chunk: 1. 2 bytes that are a 16-bit unsigned integer "filename length" in big-endian. This does not include the NULL at the end of the string. From c1faae90e9218822371825ee937479acb621e286 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 27 Sep 2024 12:58:53 +0900 Subject: [PATCH 08/37] Split up handling of archive file based on version --- src/archiver.c | 41 +++++++++++++++++++++++++++++++++-------- src/archiver.h | 8 ++++++++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 67f38cf..700ada8 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1173,12 +1173,9 @@ int simple_archiver_write_all(FILE *out_f, SDArchiverState *state, int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state) { - uint8_t buf[1024]; - memset(buf, 0, 1024); + uint8_t buf[32]; + memset(buf, 0, 32); uint16_t u16; - uint32_t u32; - uint64_t u64; - int_fast8_t is_compressed = 0; if (fread(buf, 1, 18, in_f) != 18) { return SDAS_INVALID_FILE; @@ -1186,10 +1183,31 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, return SDAS_INVALID_FILE; } else if (fread(buf, 1, 2, in_f) != 2) { return SDAS_INVALID_FILE; - } else if (buf[0] != 0 || buf[1] != 0) { - // Version is not zero. + } + + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + + if (u16 == 0) { + return simple_archiver_parse_archive_version_0(in_f, do_extract, state); + } else if (u16 == 1) { + return simple_archiver_parse_archive_version_1(in_f, do_extract, state); + } else { + fprintf(stderr, "ERROR Unsupported archive version %u!\n", u16); return SDAS_INVALID_FILE; - } else if (fread(buf, 1, 4, in_f) != 4) { + } +} + +int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, + const SDArchiverState *state) { + uint8_t buf[1024]; + memset(buf, 0, 1024); + uint16_t u16; + uint32_t u32; + uint64_t u64; + int_fast8_t is_compressed = 0; + + if (fread(buf, 1, 4, in_f) != 4) { return SDAS_INVALID_FILE; } @@ -1999,6 +2017,13 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, return SDAS_SUCCESS; } +int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, + const SDArchiverState *state) { + // TODO Implement this. + fprintf(stderr, "ERROR Handling archive version 1 is unimplemented!\n"); + return SDAS_INTERNAL_ERROR; +} + int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], const char *cmd, void *pid_out) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ diff --git a/src/archiver.h b/src/archiver.h index e651b87..51b6abc 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -69,6 +69,14 @@ int simple_archiver_write_all(FILE *out_f, SDArchiverState *state, int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state); +/// Returns zero on success. +int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, + const SDArchiverState *state); + +/// Returns zero on success. +int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, + const SDArchiverState *state); + /// Returns zero on success. int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], const char *cmd, void *pid_out); From c71f4f45c7b49381d9641033a30a320e7be31c0c Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 27 Sep 2024 15:40:11 +0900 Subject: [PATCH 09/37] Impl. simple test/extract new file format (WIP) TODO: Extract symlinks in new format (implemented but untested). Extract compressed files in new format. --- file_format.md | 12 +- file_format_1_example_0 | Bin 0 -> 203 bytes src/archiver.c | 459 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 464 insertions(+), 7 deletions(-) create mode 100644 file_format_1_example_0 diff --git a/file_format.md b/file_format.md index 6558f57..0ee7d63 100644 --- a/file_format.md +++ b/file_format.md @@ -120,14 +120,18 @@ Following the link-count bytes, the following bytes are added for each symlink: absolute links are preferred. 2. The second byte. 1. Currently unused. -2. 2 bytes is 16-bit unsigned integer "link target absolute path" in +2. 2 bytes 16-bit unsigned integer "link name" in big-endian. This does not + include the NULL at the end of the string. Must not be zero. +3. X bytes of link-name (length defined by previous value). Is a NULL-terminated + string. +4. 2 bytes is 16-bit unsigned integer "link target absolute path" in big-endian. This does not include the NULL at the end of the string. -3. X bytes of link-target-absolute-path (length defined by previous value). +5. X bytes of link-target-absolute-path (length defined by previous value). Is a NULL-terminated string. If the previous "size" value is 0, then this entry does not exist and should be skipped. -4. 2 bytes is 16-bit unsigned integer "link target relative path" in +6. 2 bytes is 16-bit unsigned integer "link target relative path" in big-endian. This does not include the NULL at the end of the string. -5. X bytes of link-target-relative-path (length defined by previous value). +7. X bytes of link-target-relative-path (length defined by previous value). Is a NULL-terminated string. If the previous "size" value is 0, then this entry does not exist and should be skipped. diff --git a/file_format_1_example_0 b/file_format_1_example_0 new file mode 100644 index 0000000000000000000000000000000000000000..f28c89ed130d3445f8c84ba0f3f3521d17cc134e GIT binary patch literal 203 zcmWIc^bPQFjdu)k_V5gIjSq7TVqj!I044?|29}c4;t~dSupsjb1_lrY12zU$kdzU2 qIfxn_kP1UCusSZFVZ0!ge`$$6h&INq6Ge>)b~Q+*8-qbuf); @@ -977,6 +985,155 @@ int filenames_to_abs_map_fn(void *data, void *ud) { return 0; } +int read_buf_full_from_fd(FILE *fd, char *read_buf, const size_t read_buf_size, + const size_t amount_total, char *dst_buf) { + size_t amount = amount_total; + while (amount != 0) { + if (amount >= read_buf_size) { + if (fread(read_buf, 1, read_buf_size, fd) != read_buf_size) { + return SDAS_INVALID_FILE; + } + if (dst_buf) { + memcpy(dst_buf + (amount_total - amount), read_buf, read_buf_size); + } + amount -= read_buf_size; + } else { + if (fread(read_buf, 1, amount, fd) != amount) { + return SDAS_INVALID_FILE; + } + if (dst_buf) { + memcpy(dst_buf + (amount_total - amount), read_buf, amount); + } + amount = 0; + } + } + + return SDAS_SUCCESS; +} + +int read_fd_to_out_fd(FILE *in_fd, FILE *out_fd, char *read_buf, + const size_t read_buf_size, const size_t amount_total) { + size_t amount = amount_total; + while (amount != 0) { + if (amount >= read_buf_size) { + if (fread(read_buf, 1, read_buf_size, in_fd) != read_buf_size) { + return SDAS_INVALID_FILE; + } else if (fwrite(read_buf, 1, read_buf_size, out_fd) != read_buf_size) { + return SDAS_FAILED_TO_WRITE; + } + amount -= read_buf_size; + } else { + if (fread(read_buf, 1, amount, in_fd) != amount) { + return SDAS_INVALID_FILE; + } else if (fwrite(read_buf, 1, amount, out_fd) != amount) { + return SDAS_FAILED_TO_WRITE; + } + amount = 0; + } + } + return SDAS_SUCCESS; +} + +void free_internal_file_info(void *data) { + SDArchiverInternalFileInfo *file_info = data; + if (file_info) { + if (file_info->filename) { + free(file_info->filename); + } + free(file_info); + } +} + +void cleanup_internal_file_info(SDArchiverInternalFileInfo **file_info) { + if (file_info && *file_info) { + if ((*file_info)->filename) { + free((*file_info)->filename); + } + free(*file_info); + *file_info = NULL; + } +} + +mode_t permissions_from_bits_version_1(const uint8_t flags[4], + uint_fast8_t print) { + mode_t permissions = 0; + + if ((flags[0] & 1) != 0) { + permissions |= S_IRUSR; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 2) != 0) { + permissions |= S_IWUSR; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 4) != 0) { + permissions |= S_IXUSR; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 8) != 0) { + permissions |= S_IRGRP; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x10) != 0) { + permissions |= S_IWGRP; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x20) != 0) { + permissions |= S_IXGRP; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x40) != 0) { + permissions |= S_IROTH; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x80) != 0) { + permissions |= S_IWOTH; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[1] & 1) != 0) { + permissions |= S_IXOTH; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + + return permissions; +} + char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { switch (error) { case SDAS_SUCCESS: @@ -2019,9 +2176,305 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state) { - // TODO Implement this. - fprintf(stderr, "ERROR Handling archive version 1 is unimplemented!\n"); - return SDAS_INTERNAL_ERROR; + uint8_t buf[1024]; + memset(buf, 0, 1024); + uint16_t u16; + uint32_t u32; + uint64_t u64; + + if (fread(buf, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + + if (do_extract && state->parsed->user_cwd) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + if (chdir(state->parsed->user_cwd)) { + return SDAS_FAILED_TO_CHANGE_CWD; + } +#endif + } + + const int_fast8_t is_compressed = (buf[0] & 1) ? 1 : 0; + + __attribute__((cleanup( + simple_archiver_helper_cleanup_c_string))) char *compressor_cmd = NULL; + __attribute__((cleanup( + simple_archiver_helper_cleanup_c_string))) char *decompressor_cmd = NULL; + + if (is_compressed) { + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + compressor_cmd = malloc(u16 + 1); + int ret = + read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, compressor_cmd); + if (ret != SDAS_SUCCESS) { + return ret; + } + compressor_cmd[u16] = 0; + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + decompressor_cmd = malloc(u16 + 1); + ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, + decompressor_cmd); + if (ret != SDAS_SUCCESS) { + return ret; + } + decompressor_cmd[u16] = 0; + } + + // Link count. + if (fread(buf, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + memcpy(&u32, buf, 4); + simple_archiver_helper_32_bit_be(&u32); + + for (uint32_t idx = 0; idx < u32; ++idx) { + fprintf(stderr, "SYMLINK %3u of %3u\n", idx + 1, u32); + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + const uint_fast8_t absolute_preferred = (buf[0] & 1) ? 1 : 0; + uint_fast8_t link_extracted = 0; + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + + __attribute__(( + cleanup(simple_archiver_helper_cleanup_c_string))) char *link_name = + malloc(u16 + 1); + + int ret = + read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, link_name); + if (ret != SDAS_SUCCESS) { + return ret; + } + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + if (u16 != 0) { + __attribute__(( + cleanup(simple_archiver_helper_cleanup_c_string))) char *path = + malloc(u16 + 1); + ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, path); + if (ret != SDAS_SUCCESS) { + return ret; + } + path[u16] = 0; + if (do_extract && absolute_preferred) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + simple_archiver_helper_make_dirs(link_name); + ret = symlink(path, link_name); + if (ret == -1) { + return SDAS_FAILED_TO_EXTRACT_SYMLINK; + } + link_extracted = 1; + fprintf(stderr, " %s -> %s\n", link_name, path); +#endif + } + } + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + if (u16 != 0) { + __attribute__(( + cleanup(simple_archiver_helper_cleanup_c_string))) char *path = + malloc(u16 + 1); + ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, path); + if (ret != SDAS_SUCCESS) { + return ret; + } + path[u16] = 0; + if (do_extract && !absolute_preferred) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + simple_archiver_helper_make_dirs(link_name); + ret = symlink(path, link_name); + if (ret == -1) { + return SDAS_FAILED_TO_EXTRACT_SYMLINK; + } + link_extracted = 1; + fprintf(stderr, " %s -> %s\n", link_name, path); +#endif + } + } + + if (!link_extracted) { + fprintf(stderr, "WARNING Symlink \"%s\" was not created!\n", link_name); + } + } + + if (fread(buf, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + memcpy(&u32, buf, 4); + simple_archiver_helper_32_bit_be(&u32); + + const uint32_t chunk_count = u32; + for (uint32_t chunk_idx = 0; chunk_idx < chunk_count; ++chunk_idx) { + fprintf(stderr, "CHUNK %3u of %3u\n", chunk_idx + 1, chunk_count); + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + + const uint16_t file_count = u16; + + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *file_info_list = simple_archiver_list_init(); + + __attribute__((cleanup(cleanup_internal_file_info))) + SDArchiverInternalFileInfo *file_info = NULL; + + for (uint16_t file_idx = 0; file_idx < file_count; ++file_idx) { + file_info = malloc(sizeof(SDArchiverInternalFileInfo)); + memset(file_info, 0, sizeof(SDArchiverInternalFileInfo)); + + if (fread(buf, 1, 2, in_f) != 2) { + return SDAS_INVALID_FILE; + } + memcpy(&u16, buf, 2); + simple_archiver_helper_16_bit_be(&u16); + + file_info->filename = malloc(u16 + 1); + int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, + file_info->filename); + if (ret != SDAS_SUCCESS) { + return ret; + } + file_info->filename[u16] = 0; + + if (fread(file_info->bit_flags, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + + if (fread(buf, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + memcpy(&u32, buf, 4); + simple_archiver_helper_32_bit_be(&u32); + file_info->uid = u32; + + if (fread(buf, 1, 4, in_f) != 4) { + return SDAS_INVALID_FILE; + } + memcpy(&u32, buf, 4); + simple_archiver_helper_32_bit_be(&u32); + file_info->gid = u32; + + if (fread(buf, 1, 8, in_f) != 8) { + return SDAS_INVALID_FILE; + } + memcpy(&u64, buf, 8); + simple_archiver_helper_64_bit_be(&u64); + file_info->file_size = u64; + + simple_archiver_list_add(file_info_list, file_info, + free_internal_file_info); + file_info = NULL; + } + + if (fread(buf, 1, 8, in_f) != 8) { + return SDAS_INVALID_FILE; + } + memcpy(&u64, buf, 8); + simple_archiver_helper_64_bit_be(&u64); + + const uint64_t chunk_size = u64; + uint64_t chunk_idx = 0; + + SDArchiverLLNode *node = file_info_list->head; + uint16_t file_idx = 0; + + if (is_compressed) { + fprintf(stderr, "ERROR Extracting compressed chunks is unimplemented!\n"); + return SDAS_INTERNAL_ERROR; + } else { + while (node->next != file_info_list->tail) { + node = node->next; + const SDArchiverInternalFileInfo *file_info = node->data; + fprintf(stderr, " FILE %3u of %3u\n", ++file_idx, file_count); + fprintf(stderr, " Filename: %s\n", file_info->filename); + chunk_idx += file_info->file_size; + if (chunk_idx > chunk_size) { + fprintf(stderr, "ERROR Files in chunk is larger than chunk!\n"); + return SDAS_INTERNAL_ERROR; + } else if (do_extract) { + mode_t permissions = + permissions_from_bits_version_1(file_info->bit_flags, 0); + if ((state->parsed->flags & 8) == 0) { + // Check if file already exists. + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) + FILE *temp_fd = fopen(file_info->filename, "r"); + if (temp_fd) { + fprintf(stderr, + " WARNING: File already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + file_info->file_size, NULL); + if (ret != SDAS_SUCCESS) { + return ret; + } + continue; + } + } + simple_archiver_helper_make_dirs(file_info->filename); + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) + FILE *out_fd = fopen(file_info->filename, "wb"); + int ret = read_fd_to_out_fd(in_f, out_fd, (char *)buf, 1024, + file_info->file_size); + if (ret != SDAS_SUCCESS) { + return ret; + } + simple_archiver_helper_cleanup_FILE(&out_fd); + if (chmod(file_info->filename, permissions) == -1) { + return SDAS_INTERNAL_ERROR; + } + } else { + fprintf(stderr, " Permissions: "); + permissions_from_bits_version_1(file_info->bit_flags, 1); + fprintf(stderr, "\n UID: %u\n GID: %u\n", file_info->uid, + file_info->gid); + if (is_compressed) { + fprintf(stderr, " File size (compressed): %lu\n", + file_info->file_size); + } else { + fprintf(stderr, " File size: %lu\n", file_info->file_size); + } + int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + file_info->file_size, NULL); + if (ret != SDAS_SUCCESS) { + return ret; + } + } + } + } + } + + return SDAS_SUCCESS; } int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], From b273d91896c52988fb986ee75dc5840349313cda Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 27 Sep 2024 15:46:54 +0900 Subject: [PATCH 10/37] "Fix" Linux/Mac/Unix usage --- src/archiver.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/archiver.c b/src/archiver.c index fe662eb..1d6a4a5 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1054,6 +1054,9 @@ void cleanup_internal_file_info(SDArchiverInternalFileInfo **file_info) { } } +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX mode_t permissions_from_bits_version_1(const uint8_t flags[4], uint_fast8_t print) { mode_t permissions = 0; @@ -1133,6 +1136,7 @@ mode_t permissions_from_bits_version_1(const uint8_t flags[4], return permissions; } +#endif char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { switch (error) { @@ -1565,10 +1569,10 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, return SDAS_INVALID_FILE; } - mode_t permissions = 0; #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + mode_t permissions = 0; if (do_extract == 0) { fprintf(stderr, " Permissions: "); @@ -2423,8 +2427,12 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, "ERROR Files in chunk is larger than chunk!\n"); return SDAS_INTERNAL_ERROR; } else if (do_extract) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX mode_t permissions = permissions_from_bits_version_1(file_info->bit_flags, 0); +#endif if ((state->parsed->flags & 8) == 0) { // Check if file already exists. __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) @@ -2450,9 +2458,13 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } simple_archiver_helper_cleanup_FILE(&out_fd); +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX if (chmod(file_info->filename, permissions) == -1) { return SDAS_INTERNAL_ERROR; } +#endif } else { fprintf(stderr, " Permissions: "); permissions_from_bits_version_1(file_info->bit_flags, 1); From 3d58f466af2b4f81efcb716bb57686e5caf47c83 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 27 Sep 2024 16:09:23 +0900 Subject: [PATCH 11/37] Impl. setting stored UID/GID if EUID 0 --- src/archiver.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/archiver.c b/src/archiver.c index 1d6a4a5..b654cf5 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2463,6 +2463,13 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX if (chmod(file_info->filename, permissions) == -1) { return SDAS_INTERNAL_ERROR; + } else if (geteuid() == 0 && + chown(file_info->filename, file_info->uid, + file_info->gid) != 0) { + fprintf(stderr, + "ERROR Failed to set UID/GID as EUID 0 of file \"%s\"!\n", + file_info->filename); + return SDAS_INTERNAL_ERROR; } #endif } else { From 74079724501477fc145fc53726438f0de9e0ba7a Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 13:12:39 +0900 Subject: [PATCH 12/37] v1 extract skip non-specified args if exists --- src/archiver.c | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index b654cf5..3d3b0f3 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2186,6 +2186,21 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, uint32_t u32; uint64_t u64; + __attribute__((cleanup(simple_archiver_hash_map_free))) + SDArchiverHashMap *working_files_map = NULL; + if (state && state->parsed->working_files[0] != NULL) { + working_files_map = simple_archiver_hash_map_init(); + for (char **iter = state->parsed->working_files; *iter != NULL; ++iter) { + size_t len = strlen(*iter) + 1; + char *key = malloc(len); + memcpy(key, *iter, len); + key[len - 1] = 0; + simple_archiver_hash_map_insert( + working_files_map, key, key, len, + simple_archiver_helper_datastructure_cleanup_nop, NULL); + } + } + if (fread(buf, 1, 4, in_f) != 4) { return SDAS_INVALID_FILE; } @@ -2249,6 +2264,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } const uint_fast8_t absolute_preferred = (buf[0] & 1) ? 1 : 0; uint_fast8_t link_extracted = 0; + uint_fast8_t skip_due_to_map = 0; if (fread(buf, 1, 2, in_f) != 2) { return SDAS_INVALID_FILE; @@ -2264,6 +2280,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, link_name); if (ret != SDAS_SUCCESS) { return ret; + } else if (working_files_map && + simple_archiver_hash_map_get(working_files_map, link_name, + u16 + 1) == NULL) { + skip_due_to_map = 1; + fprintf(stderr, "Skipping not specified in args...\n"); } if (fread(buf, 1, 2, in_f) != 2) { @@ -2280,7 +2301,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && absolute_preferred) { + if (do_extract && !skip_due_to_map && absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -2309,7 +2330,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && !absolute_preferred) { + if (do_extract && !skip_due_to_map && !absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -2324,7 +2345,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } } - if (!link_extracted) { + if (!link_extracted && !skip_due_to_map) { fprintf(stderr, "WARNING Symlink \"%s\" was not created!\n", link_name); } } @@ -2426,7 +2447,17 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, if (chunk_idx > chunk_size) { fprintf(stderr, "ERROR Files in chunk is larger than chunk!\n"); return SDAS_INTERNAL_ERROR; - } else if (do_extract) { + } + + uint_fast8_t skip_due_to_map = 0; + if (working_files_map && simple_archiver_hash_map_get( + working_files_map, file_info->filename, + strlen(file_info->filename) + 1) == NULL) { + skip_due_to_map = 1; + fprintf(stderr, " Skipping not specified in args...\n"); + } + + if (do_extract && !skip_due_to_map) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -2472,7 +2503,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } #endif - } else { + } else if (!skip_due_to_map) { fprintf(stderr, " Permissions: "); permissions_from_bits_version_1(file_info->bit_flags, 1); fprintf(stderr, "\n UID: %u\n GID: %u\n", file_info->uid, @@ -2488,6 +2519,12 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, if (ret != SDAS_SUCCESS) { return ret; } + } else { + int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + file_info->file_size, NULL); + if (ret != SDAS_SUCCESS) { + return ret; + } } } } From 45fdffdc9c884b6ae76d6daf0679932d45361a3e Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 14:54:01 +0900 Subject: [PATCH 13/37] Impl. extract with decompressor file format v1 --- file_format_1_example_1 | Bin 0 -> 341 bytes src/archiver.c | 395 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 391 insertions(+), 4 deletions(-) create mode 100644 file_format_1_example_1 diff --git a/file_format_1_example_1 b/file_format_1_example_1 new file mode 100644 index 0000000000000000000000000000000000000000..044ca9465cbb0e4a714c868a06a6003f1c728ad7 GIT binary patch literal 341 zcmWIc^bPQFjdu)k_V5gIjSq7TVqj!s00Oq0s>}ifT?PhD2qOi=h5$weCI%0X=>kXd%I}!7B?(j zytwnfs`A}=e;wAW4LWd3!L4sMS3ifZe%UsE7Qc7Jx285tWAjy*;rdwbXj)X{;S}wh nx#?O0_Q4&gTsmU8&wk%Ob?U{n&YxTFIBbZn>v{z91e68<5Y>0w literal 0 HcmV?d00001 diff --git a/src/archiver.c b/src/archiver.c index 3d3b0f3..f30e36c 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -46,7 +46,7 @@ #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX -int is_sig_pipe_occurred = 0; +volatile int is_sig_pipe_occurred = 0; void handle_sig_pipe(int sig) { if (sig == SIGPIPE) { @@ -1034,6 +1034,100 @@ int read_fd_to_out_fd(FILE *in_fd, FILE *out_fd, char *read_buf, return SDAS_SUCCESS; } +/// Returns SDAS_SUCCESS on success. +int read_decomp_to_out_file(const char *out_filename, int in_pipe, + char *read_buf, const size_t read_buf_size, + const uint64_t file_size) { + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_fd = + NULL; + if (out_filename) { + out_fd = fopen(out_filename, "wb"); + if (!out_fd) { + fprintf(stderr, "ERROR Failed to open \"%s\" for writing!\n", + out_filename); + return SDAS_INTERNAL_ERROR; + } + } + + uint64_t written_amt = 0; + ssize_t read_ret; + size_t fwrite_ret; + while (written_amt < file_size) { + if (file_size - written_amt >= read_buf_size) { + read_ret = read(in_pipe, read_buf, read_buf_size); + if (read_ret > 0) { + if (out_fd) { + fwrite_ret = fwrite(read_buf, 1, (size_t)read_ret, out_fd); + if (fwrite_ret == (size_t)read_ret) { + written_amt += fwrite_ret; + } else if (ferror(out_fd)) { + fprintf(stderr, "ERROR Failed to write decompressed data!\n"); + return SDAS_INTERNAL_ERROR; + } else { + fprintf( + stderr, + "ERROR Failed to write decompressed data (invalid state)!\n"); + return SDAS_INTERNAL_ERROR; + } + } else { + written_amt += (size_t)read_ret; + } + } else if (read_ret == 0) { + // EOF. + if (written_amt < file_size) { + fprintf(stderr, + "ERROR Decompressed EOF while file needs more bytes!\n"); + return SDAS_INTERNAL_ERROR; + } else { + break; + } + } else { + // Error. + fprintf(stderr, "ERROR Failed to read from decompressor! (%lu)\n", + read_ret); + return SDAS_INTERNAL_ERROR; + } + } else { + read_ret = read(in_pipe, read_buf, file_size - written_amt); + if (read_ret > 0) { + if (out_fd) { + fwrite_ret = fwrite(read_buf, 1, (size_t)read_ret, out_fd); + if (fwrite_ret == (size_t)read_ret) { + written_amt += fwrite_ret; + } else if (ferror(out_fd)) { + fprintf(stderr, "ERROR Failed to write decompressed data!\n"); + return SDAS_INTERNAL_ERROR; + } else { + fprintf( + stderr, + "ERROR Failed to write decompressed data (invalid state)!\n"); + return SDAS_INTERNAL_ERROR; + } + } else { + written_amt += (size_t)read_ret; + } + } else if (read_ret == 0) { + // EOF. + if (written_amt < file_size) { + fprintf(stderr, + "ERROR Decompressed EOF while file needs more bytes!\n"); + return SDAS_INTERNAL_ERROR; + } else { + break; + } + } else { + // Error. + fprintf(stderr, "ERROR Failed to read from decompressor! (%d)\n", + errno); + fprintf(stderr, "EAGAIN %d, EWOULDBLOCK %d\n", EAGAIN, EWOULDBLOCK); + return SDAS_INTERNAL_ERROR; + } + } + } + + return written_amt == file_size ? SDAS_SUCCESS : SDAS_INTERNAL_ERROR; +} + void free_internal_file_info(void *data) { SDArchiverInternalFileInfo *file_info = data; if (file_info) { @@ -1138,6 +1232,52 @@ mode_t permissions_from_bits_version_1(const uint8_t flags[4], } #endif +void simple_archiver_internal_cleanup_int_fd(int *fd) { + if (fd && *fd >= 0) { + close(*fd); + *fd = -1; + } +} + +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX +void simple_archiver_internal_cleanup_decomp(pid_t *decomp_pid) { + if (decomp_pid && *decomp_pid >= 0) { + int decompressor_status; + int decompressor_return_val; + int retries = 0; + int decompressor_ret; + CHECK_DECOMPRESSER: + decompressor_ret = waitpid(*decomp_pid, &decompressor_status, 0); + if (decompressor_ret == *decomp_pid) { + // Status is available. + decompressor_return_val = WIFEXITED(decompressor_status); + if (decompressor_return_val && WEXITSTATUS(decompressor_status)) { + fprintf(stderr, + "WARNING: Exec failed (exec exit code %d)! Invalid " + "decompressor cmd?\n", + decompressor_return_val); + } + } else if (decompressor_ret == 0) { + // Probably still running. + ++retries; + if (retries > 5) { + fprintf(stderr, "WARNING Decompressor process not stopped!\n"); + return; + } + sleep(5); + goto CHECK_DECOMPRESSER; + } else { + // Error. + fprintf(stderr, + "WARNING: Exec failed (exec exit code unknown)! Invalid " + "decompressor cmd?\n"); + } + } +} +#endif + char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { switch (error) { case SDAS_SUCCESS: @@ -2188,7 +2328,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, __attribute__((cleanup(simple_archiver_hash_map_free))) SDArchiverHashMap *working_files_map = NULL; - if (state && state->parsed->working_files[0] != NULL) { + if (state && state->parsed->working_files && + state->parsed->working_files[0] != NULL) { working_files_map = simple_archiver_hash_map_init(); for (char **iter = state->parsed->working_files; *iter != NULL; ++iter) { size_t len = strlen(*iter) + 1; @@ -2236,6 +2377,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } compressor_cmd[u16] = 0; + fprintf(stderr, "Compressor command: %s\n", compressor_cmd); + if (fread(buf, 1, 2, in_f) != 2) { return SDAS_INVALID_FILE; } @@ -2248,6 +2391,12 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } decompressor_cmd[u16] = 0; + + fprintf(stderr, "Decompressor command: %s\n", decompressor_cmd); + if (state && state->parsed && state->parsed->decompressor) { + fprintf(stderr, "Overriding decompressor with: %s\n", + state->parsed->decompressor); + } } // Link count. @@ -2434,10 +2583,248 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, SDArchiverLLNode *node = file_info_list->head; uint16_t file_idx = 0; +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX if (is_compressed) { - fprintf(stderr, "ERROR Extracting compressed chunks is unimplemented!\n"); - return SDAS_INTERNAL_ERROR; + // Start the decompressing process and read into files. + + // Handle SIGPIPE. + signal(SIGPIPE, handle_sig_pipe); + + int pipe_into_cmd[2]; + int pipe_outof_cmd[2]; + __attribute__((cleanup( + simple_archiver_internal_cleanup_decomp))) pid_t decompressor_pid; + if (pipe(pipe_into_cmd) != 0) { + // Unable to create pipes. + break; + } else if (pipe(pipe_outof_cmd) != 0) { + // Unable to create second set of pipes. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + return SDAS_INTERNAL_ERROR; + } else if (fcntl(pipe_into_cmd[1], F_SETFL, O_NONBLOCK) != 0) { + // Unable to set non-blocking on into-write-pipe. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + close(pipe_outof_cmd[1]); + return SDAS_INTERNAL_ERROR; + } + // else if (fcntl(pipe_outof_cmd[0], F_SETFL, O_NONBLOCK) != 0) { + // // Unable to set non-blocking on outof-read-pipe. + // close(pipe_into_cmd[0]); + // close(pipe_into_cmd[1]); + // close(pipe_outof_cmd[0]); + // close(pipe_outof_cmd[1]); + // return SDAS_INTERNAL_ERROR; + // } + + if (state && state->parsed && state->parsed->decompressor) { + if (simple_archiver_de_compress(pipe_into_cmd, pipe_outof_cmd, + state->parsed->decompressor, + &decompressor_pid) != 0) { + // Failed to spawn compressor. + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + fprintf(stderr, + "WARNING: Failed to start decompressor cmd! Invalid cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + } else { + if (simple_archiver_de_compress(pipe_into_cmd, pipe_outof_cmd, + decompressor_cmd, + &decompressor_pid) != 0) { + // Failed to spawn compressor. + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + fprintf(stderr, + "WARNING: Failed to start decompressor cmd! Invalid cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + } + + // Close unnecessary pipe fds on this end of the transfer. + close(pipe_into_cmd[0]); + close(pipe_outof_cmd[1]); + + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = + pipe_into_cmd[1]; + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_outof_read = + pipe_outof_cmd[0]; + + int decompressor_status; + int decompressor_return_val; + int decompressor_ret = + waitpid(decompressor_pid, &decompressor_status, WNOHANG); + if (decompressor_ret == decompressor_pid) { + // Status is available. + if (WIFEXITED(decompressor_status)) { + decompressor_return_val = WEXITSTATUS(decompressor_status); + fprintf(stderr, + "WARNING: Exec failed (exec exit code %d)! Invalid " + "decompressor cmd?\n", + decompressor_return_val); + return SDAS_INTERNAL_ERROR; + } + } else if (decompressor_ret == 0) { + // Probably still running, continue on. + } else { + // Error. + fprintf(stderr, + "WARNING: Exec failed (exec exit code unknown)! Invalid " + "decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + + // Write all of chunk into decompressor. + uint64_t chunk_written = 0; + while (chunk_written < chunk_size) { + if (is_sig_pipe_occurred) { + fprintf(stderr, + "WARNING: Failed to write to decompressor (SIGPIPE)! Invalid " + "decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } else if (chunk_size - chunk_written >= 1024) { + if (fread(buf, 1, 1024, in_f) != 1024) { + fprintf(stderr, "ERROR Failed to read chunk for decompressing!\n"); + return SDAS_INTERNAL_ERROR; + } + ssize_t write_ret = write(pipe_into_cmd[1], buf, 1024); + if (write_ret > 0 && (size_t)write_ret == 1024) { + // Successful write. + } else if (write_ret == -1) { + fprintf(stderr, + "WARNING: Failed to write chunk data into decompressor! " + "Invalid decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } else { + fprintf(stderr, + "WARNING: Failed to write chunk data into decompressor! " + "Invalid decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + chunk_written += 1024; + } else { + if (fread(buf, 1, chunk_size - chunk_written, in_f) != + chunk_size - chunk_written) { + fprintf(stderr, "ERROR Failed to read chunk for decompressing!\n"); + return SDAS_INTERNAL_ERROR; + } + ssize_t write_ret = write(pipe_into_cmd[1], buf, 1024); + if (write_ret > 0 && (size_t)write_ret == 1024) { + // Successful write. + } else if (write_ret == -1) { + fprintf(stderr, + "WARNING: Failed to write chunk data into decompressor! " + "Invalid decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } else { + fprintf(stderr, + "WARNING: Failed to write chunk data into decompressor! " + "Invalid decompressor cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + chunk_written = chunk_size; + } + } + + simple_archiver_internal_cleanup_int_fd(&pipe_into_write); + + while (node->next != file_info_list->tail) { + node = node->next; + const SDArchiverInternalFileInfo *file_info = node->data; + fprintf(stderr, " FILE %3u of %3u\n", ++file_idx, file_count); + fprintf(stderr, " Filename: %s\n", file_info->filename); + + uint_fast8_t skip_due_to_map = 0; + if (working_files_map && simple_archiver_hash_map_get( + working_files_map, file_info->filename, + strlen(file_info->filename) + 1) == NULL) { + skip_due_to_map = 1; + fprintf(stderr, " Skipping not specified in args...\n"); + } + + if (do_extract && !skip_due_to_map) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + mode_t permissions = + permissions_from_bits_version_1(file_info->bit_flags, 0); +#endif + if ((state->parsed->flags & 8) == 0) { + // Check if file already exists. + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) + FILE *temp_fd = fopen(file_info->filename, "r"); + if (temp_fd) { + fprintf(stderr, + " WARNING: File already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + read_decomp_to_out_file(NULL, pipe_outof_cmd[0], (char *)buf, + 1024, file_info->file_size); + continue; + } + } + + simple_archiver_helper_make_dirs(file_info->filename); + int ret = + read_decomp_to_out_file(file_info->filename, pipe_outof_cmd[0], + (char *)buf, 1024, file_info->file_size); + if (ret != SDAS_SUCCESS) { + return ret; + } +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + if (chmod(file_info->filename, permissions) == -1) { + return SDAS_INTERNAL_ERROR; + } else if (geteuid() == 0 && + chown(file_info->filename, file_info->uid, + file_info->gid) != 0) { + fprintf(stderr, + "ERROR Failed to set UID/GID as EUID 0 of file \"%s\"!\n", + file_info->filename); + return SDAS_INTERNAL_ERROR; + } +#endif + } else if (!skip_due_to_map) { + fprintf(stderr, " Permissions: "); + permissions_from_bits_version_1(file_info->bit_flags, 1); + fprintf(stderr, "\n UID: %u\n GID: %u\n", file_info->uid, + file_info->gid); + if (is_compressed) { + fprintf(stderr, " File size (uncompressed): %lu\n", + file_info->file_size); + } else { + fprintf(stderr, " File size: %lu\n", file_info->file_size); + } + int ret = read_decomp_to_out_file( + NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size); + if (ret != SDAS_SUCCESS) { + return ret; + } + } else { + int ret = read_decomp_to_out_file( + NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size); + if (ret != SDAS_SUCCESS) { + return ret; + } + } + } + + // Ensure EOF is left from pipe. + ssize_t read_ret = read(pipe_outof_cmd[0], buf, 1024); + if (read_ret != 0) { + fprintf(stderr, "WARNING decompressor didn't reach EOF!\n"); + } } else { +#else + // } (This comment exists so that vim can correctly match curly-braces. + if (!is_compressed) { +#endif while (node->next != file_info_list->tail) { node = node->next; const SDArchiverInternalFileInfo *file_info = node->data; From 53fefb7ae860409ecb19507eb16c6ce52e6c1a7d Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 15:15:00 +0900 Subject: [PATCH 14/37] Remove unnecessary printf used for testing --- src/archiver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/archiver.c b/src/archiver.c index f30e36c..4ed4ed3 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1119,7 +1119,6 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, // Error. fprintf(stderr, "ERROR Failed to read from decompressor! (%d)\n", errno); - fprintf(stderr, "EAGAIN %d, EWOULDBLOCK %d\n", EAGAIN, EWOULDBLOCK); return SDAS_INTERNAL_ERROR; } } From 8fa430f842ea1a9f43c7db9ca834bc7428d105ae Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 15:45:48 +0900 Subject: [PATCH 15/37] Fix v1 archive decompression Previous implementation sent too many bytes to decompressor if size was less than 1024. --- src/archiver.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 4ed4ed3..e880489 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2713,8 +2713,10 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, "ERROR Failed to read chunk for decompressing!\n"); return SDAS_INTERNAL_ERROR; } - ssize_t write_ret = write(pipe_into_cmd[1], buf, 1024); - if (write_ret > 0 && (size_t)write_ret == 1024) { + ssize_t write_ret = + write(pipe_into_cmd[1], buf, chunk_size - chunk_written); + if (write_ret > 0 && + (size_t)write_ret == chunk_size - chunk_written) { // Successful write. } else if (write_ret == -1) { fprintf(stderr, From da18464d5d32287627dc91ad57964e8d3370478f Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 30 Sep 2024 17:53:39 +0900 Subject: [PATCH 16/37] Test/fix symlink test/extract in v1 file format --- file_format_1_example_1 | Bin 341 -> 405 bytes src/archiver.c | 24 +++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/file_format_1_example_1 b/file_format_1_example_1 index 044ca9465cbb0e4a714c868a06a6003f1c728ad7..3b384a86296f886667d14908e3a8a3f1cf54b0bd 100644 GIT binary patch delta 75 zcmcc0G?jURfddl*0|Q@5W^svrNosM4UPei74p4-FD>E-8wF1uN0jo$ZF4hBLkUR%i LHITE>K$Q^y8N(F- delta 10 RcmbQre3fZ}!NvkrMgSFD19t!b diff --git a/src/archiver.c b/src/archiver.c index e880489..18eefba 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2428,11 +2428,17 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, link_name); if (ret != SDAS_SUCCESS) { return ret; - } else if (working_files_map && - simple_archiver_hash_map_get(working_files_map, link_name, - u16 + 1) == NULL) { + } + + if (!do_extract) { + fprintf(stderr, " Link name: %s\n", link_name); + } + + if (working_files_map && + simple_archiver_hash_map_get(working_files_map, link_name, u16 + 1) == + NULL) { skip_due_to_map = 1; - fprintf(stderr, "Skipping not specified in args...\n"); + fprintf(stderr, " Skipping not specified in args...\n"); } if (fread(buf, 1, 2, in_f) != 2) { @@ -2461,7 +2467,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, link_extracted = 1; fprintf(stderr, " %s -> %s\n", link_name, path); #endif + } else { + fprintf(stderr, " Abs path: %s\n", path); } + } else if (!do_extract) { + fprintf(stderr, " No Absolute path.\n"); } if (fread(buf, 1, 2, in_f) != 2) { @@ -2490,10 +2500,14 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, link_extracted = 1; fprintf(stderr, " %s -> %s\n", link_name, path); #endif + } else { + fprintf(stderr, " Rel path: %s\n", path); } + } else if (!do_extract) { + fprintf(stderr, " No Relative path.\n"); } - if (!link_extracted && !skip_due_to_map) { + if (do_extract && !link_extracted && !skip_due_to_map) { fprintf(stderr, "WARNING Symlink \"%s\" was not created!\n", link_name); } } From 6376be28408e822972072c1d802ecf627e97b9e1 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Wed, 2 Oct 2024 11:38:39 +0900 Subject: [PATCH 17/37] Add support for writing multiple file formats --- src/archiver.c | 23 +++++++++++++++++++++++ src/archiver.h | 9 ++++++++- src/parser.c | 24 ++++++++++++++++++++++++ src/parser.h | 2 ++ 4 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/archiver.c b/src/archiver.c index 18eefba..b50a8b8 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1301,6 +1301,8 @@ char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { return "Failed to extract symlink (internal error)"; case SDAS_FAILED_TO_CHANGE_CWD: return "Failed to change current working directory"; + case SDAS_INVALID_WRITE_VERSION: + return "Unsupported write version file format"; default: return "Unknown error"; } @@ -1332,6 +1334,20 @@ void simple_archiver_free_state(SDArchiverState **state) { int simple_archiver_write_all(FILE *out_f, SDArchiverState *state, const SDArchiverLinkedList *filenames) { + switch (state->parsed->write_version) { + case 0: + return simple_archiver_write_v0(out_f, state, filenames); + case 1: + return simple_archiver_write_v1(out_f, state, filenames); + default: + fprintf(stderr, "ERROR: Unsupported write version %u!\n", + state->parsed->write_version); + return SDAS_INVALID_WRITE_VERSION; + } +} + +int simple_archiver_write_v0(FILE *out_f, SDArchiverState *state, + const SDArchiverLinkedList *filenames) { // First create a "set" of absolute paths to given filenames. __attribute__((cleanup(simple_archiver_hash_map_free))) SDArchiverHashMap *abs_filenames = simple_archiver_hash_map_init(); @@ -1471,6 +1487,13 @@ int simple_archiver_write_all(FILE *out_f, SDArchiverState *state, return SDAS_SUCCESS; } +int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, + const SDArchiverLinkedList *filenames) { + // TODO Impl. + fprintf(stderr, "Writing v1 unimplemented\n"); + return SDAS_INTERNAL_ERROR; +} + int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state) { uint8_t buf[32]; diff --git a/src/archiver.h b/src/archiver.h index 51b6abc..2b8fbf2 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -51,7 +51,8 @@ enum SDArchiverStateReturns { SDAS_INTERNAL_ERROR, SDAS_FAILED_TO_CREATE_MAP, SDAS_FAILED_TO_EXTRACT_SYMLINK, - SDAS_FAILED_TO_CHANGE_CWD + SDAS_FAILED_TO_CHANGE_CWD, + SDAS_INVALID_WRITE_VERSION }; /// Returned pointer must not be freed. @@ -65,6 +66,12 @@ void simple_archiver_free_state(SDArchiverState **state); int simple_archiver_write_all(FILE *out_f, SDArchiverState *state, const SDArchiverLinkedList *filenames); +int simple_archiver_write_v0(FILE *out_f, SDArchiverState *state, + const SDArchiverLinkedList *filenames); + +int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, + const SDArchiverLinkedList *filenames); + /// Returns zero on success. int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state); diff --git a/src/parser.c b/src/parser.c index 5da4653..2bc5b25 100644 --- a/src/parser.c +++ b/src/parser.c @@ -169,6 +169,9 @@ void simple_archiver_print_usage(void) { fprintf(stderr, "--temp-files-dir : where to store temporary files created " "when compressing (defaults to current working directory)\n"); + fprintf(stderr, + "--write-version : Force write version file format " + "(default 1)\n"); fprintf(stderr, "-- : specifies remaining arguments are files to archive/extract\n"); fprintf( @@ -189,6 +192,7 @@ SDArchiverParsed simple_archiver_create_parsed(void) { parsed.working_files = NULL; parsed.temp_dir = NULL; parsed.user_cwd = NULL; + parsed.write_version = 0; return parsed; } @@ -299,6 +303,26 @@ int simple_archiver_parse_args(int argc, const char **argv, out->temp_dir = argv[1]; --argc; ++argv; + } else if (strcmp(argv[0], "--write-version") == 0) { + if (argc < 2) { + fprintf(stderr, + "ERROR: --write-version expects an integer argument!\n"); + simple_archiver_print_usage(); + return 1; + } + int version = atoi(argv[1]); + if (version < 0) { + fprintf(stderr, "ERROR: --write-version cannot be negative!\n"); + simple_archiver_print_usage(); + return 1; + } else if (version > 1) { + fprintf(stderr, "ERROR: --write-version must be 0 or 1!\n"); + simple_archiver_print_usage(); + return 1; + } + out->write_version = (uint32_t)version; + --argc; + ++argv; } else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) { is_remaining_args = 1; } else if (argv[0][0] != '-') { diff --git a/src/parser.h b/src/parser.h index ad2e472..1c74427 100644 --- a/src/parser.h +++ b/src/parser.h @@ -51,6 +51,8 @@ typedef struct SDArchiverParsed { const char *temp_dir; /// Dir specified by "-C". const char *user_cwd; + /// Currently only 0 and 1 is supported. + uint32_t write_version; } SDArchiverParsed; typedef struct SDArchiverFileInfo { From b09948d2452870aeb92ab5c90453d781940bb854 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Wed, 2 Oct 2024 13:15:30 +0900 Subject: [PATCH 18/37] Split "rel-path-from-abs-paths" into function --- CMakeLists.txt | 1 + src/archiver.c | 92 ++++++++++++++++++++++++++++---------------------- src/archiver.h | 4 +++ src/test.c | 16 +++++++++ 4 files changed, 73 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bed693..4b53330 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,6 +83,7 @@ add_executable(test_simplearchiver src/test.c src/parser.c src/helpers.c + src/archiver.c src/algorithms/linear_congruential_gen.c src/data_structures/linked_list.c src/data_structures/hash_map.c diff --git a/src/archiver.c b/src/archiver.c index b50a8b8..b151dca 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -767,46 +767,8 @@ int write_files_fn(void *data, void *ud) { // fprintf(stderr, "DEBUG: abs_path: %s\nDEBUG: link_abs_path: %s\n", // (char*)abs_path, (char*)link_abs_path); - // Compare paths to get relative path. - // Get first non-common char. - size_t idx; - size_t last_slash; - for (idx = 0, last_slash = 0; - idx < strlen(abs_path) && idx < strlen(link_abs_path); ++idx) { - if (((const char *)abs_path)[idx] != - ((const char *)link_abs_path)[idx]) { - break; - } else if (((const char *)abs_path)[idx] == '/') { - last_slash = idx + 1; - } - } - // Get substrings of both paths. - char *link_substr = (char *)link_abs_path + last_slash; - char *dest_substr = (char *)abs_path + last_slash; - rel_path = malloc(strlen(dest_substr) + 1); - strncpy(rel_path, dest_substr, strlen(dest_substr) + 1); - // fprintf(stderr, "DEBUG: link_substr: %s\nDEBUG: dest_substr: %s\n", - // link_substr, dest_substr); - - // Generate the relative path. - int_fast8_t has_slash = 0; - idx = 0; - do { - for (; link_substr[idx] != '/' && link_substr[idx] != 0; ++idx); - if (link_substr[idx] == 0) { - has_slash = 0; - } else { - has_slash = 1; - char *new_rel_path = malloc(strlen(rel_path) + 1 + 3); - new_rel_path[0] = '.'; - new_rel_path[1] = '.'; - new_rel_path[2] = '/'; - strncpy(new_rel_path + 3, rel_path, strlen(rel_path) + 1); - free(rel_path); - rel_path = new_rel_path; - ++idx; - } - } while (has_slash); + rel_path = + simple_archiver_filenames_to_relative_path(link_abs_path, abs_path); } } @@ -3015,3 +2977,53 @@ int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], return 1; #endif } + +char *simple_archiver_filenames_to_relative_path(const char *from_abs, + const char *to_abs) { + if (!from_abs || !to_abs) { + return NULL; + } + + // Get first non-common char and last slash before it. + uint_fast32_t idx; + uint_fast32_t last_slash; + for (idx = 0, last_slash = 0; idx < strlen(from_abs) && idx < strlen(to_abs); + ++idx) { + if (((const char *)to_abs)[idx] != ((const char *)from_abs)[idx]) { + break; + } else if (((const char *)to_abs)[idx] == '/') { + last_slash = idx + 1; + } + } + + // Get substrings of both paths. + char *link_substr = (char *)from_abs + last_slash; + char *dest_substr = (char *)to_abs + last_slash; + char *rel_path = malloc(strlen(dest_substr) + 1); + strncpy(rel_path, dest_substr, strlen(dest_substr) + 1); + + // fprintf(stderr, "DEBUG: link_substr \"%s\", dest_substr \"%s\"\n", + // link_substr, dest_substr); + + // Get the relative path finally. + int_fast8_t has_slash = 0; + idx = 0; + do { + for (; link_substr[idx] != '/' && link_substr[idx] != 0; ++idx); + if (link_substr[idx] == 0) { + has_slash = 0; + } else { + has_slash = 1; + char *new_rel_path = malloc(strlen(rel_path) + 1 + 3); + new_rel_path[0] = '.'; + new_rel_path[1] = '.'; + new_rel_path[2] = '/'; + strncpy(new_rel_path + 3, rel_path, strlen(rel_path) + 1); + free(rel_path); + rel_path = new_rel_path; + ++idx; + } + } while (has_slash); + + return rel_path; +} diff --git a/src/archiver.h b/src/archiver.h index 2b8fbf2..6039852 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -88,4 +88,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], const char *cmd, void *pid_out); +/// If returns non-NULL, must be free'd. +char *simple_archiver_filenames_to_relative_path(const char *from_abs, + const char *to_abs); + #endif diff --git a/src/test.c b/src/test.c index 869816f..13caba9 100644 --- a/src/test.c +++ b/src/test.c @@ -23,6 +23,7 @@ #include // Local includes. +#include "archiver.h" #include "helpers.h" #include "parser_internal.h" @@ -241,6 +242,21 @@ int main(void) { free(out); } + // Test archiver. + { + __attribute__(( + cleanup(simple_archiver_helper_cleanup_c_string))) char *rel_path = + simple_archiver_filenames_to_relative_path( + "/one/two/three/four/five", "/one/two/branch/other/path"); + CHECK_STREQ(rel_path, "../../branch/other/path"); + simple_archiver_helper_cleanup_c_string(&rel_path); + + rel_path = simple_archiver_filenames_to_relative_path( + "/one/two/three/four/five", "/one/two/three/other/dir/"); + CHECK_STREQ(rel_path, "../other/dir/"); + simple_archiver_helper_cleanup_c_string(&rel_path); + } + printf("Checks checked: %u\n", checks_checked); printf("Checks passed: %u\n", checks_passed); return checks_passed == checks_checked ? 0 : 1; From c7cd44513953a4cc9abdfd3c9844413bcec50540 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Wed, 2 Oct 2024 15:10:22 +0900 Subject: [PATCH 19/37] Refactorings for v1 extract, other refactorings --- file_format.md | 10 +- file_format_1_example_1 | Bin 405 -> 405 bytes src/archiver.c | 250 ++++++++++++++++++++++++++++++++++++++++ src/archiver.h | 4 +- src/main.c | 18 ++- 5 files changed, 273 insertions(+), 9 deletions(-) diff --git a/file_format.md b/file_format.md index 0ee7d63..67c5866 100644 --- a/file_format.md +++ b/file_format.md @@ -118,8 +118,16 @@ Following the link-count bytes, the following bytes are added for each symlink: 1. The first byte. 1. The first bit is UNSET if relative links are preferred, and is SET if absolute links are preferred. + 2. The second bit is "user read permission". + 3. The third bit is "user write permission". + 4. The fourth bit is "user execute permission". + 5. The fifth bit is "group read permission". + 6. The sixth bit is "group write permission". + 7. The seventh bit is "group execute permission". + 8. The eighth bit is "other read permission". 2. The second byte. - 1. Currently unused. + 1. The first bit is "other write permission". + 2. The second bit is "other execute permission". 2. 2 bytes 16-bit unsigned integer "link name" in big-endian. This does not include the NULL at the end of the string. Must not be zero. 3. X bytes of link-name (length defined by previous value). Is a NULL-terminated diff --git a/file_format_1_example_1 b/file_format_1_example_1 index 3b384a86296f886667d14908e3a8a3f1cf54b0bd..b56b56c6a7fcd8bee2c0dc7cee54354451196ddc 100644 GIT binary patch delta 47 xcmbQrJe7HZp~^pI2ELTc;u8Im)Z!AojFQ|O1|Z717AvJafyCOYH^8PMoDfCP=tXiGcP5z0><4K>c9v9I7|%o diff --git a/src/archiver.c b/src/archiver.c index b151dca..c14227a 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1112,6 +1112,86 @@ void cleanup_internal_file_info(SDArchiverInternalFileInfo **file_info) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX +mode_t permissions_from_bits_v1_symlink(const uint8_t flags[2], + uint_fast8_t print) { + mode_t permissions = 0; + + if ((flags[0] & 2) != 0) { + permissions |= S_IRUSR; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 4) != 0) { + permissions |= S_IWUSR; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 8) != 0) { + permissions |= S_IXUSR; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x10) != 0) { + permissions |= S_IRGRP; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x20) != 0) { + permissions |= S_IWGRP; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x40) != 0) { + permissions |= S_IXGRP; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[0] & 0x80) != 0) { + permissions |= S_IROTH; + if (print) { + fprintf(stderr, "r"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[1] & 1) != 0) { + permissions |= S_IWOTH; + if (print) { + fprintf(stderr, "w"); + } + } else if (print) { + fprintf(stderr, "-"); + } + if ((flags[1] & 2) != 0) { + permissions |= S_IXOTH; + if (print) { + fprintf(stderr, "x"); + } + } else if (print) { + fprintf(stderr, "-"); + } + + return permissions; +} + mode_t permissions_from_bits_version_1(const uint8_t flags[4], uint_fast8_t print) { mode_t permissions = 0; @@ -1191,6 +1271,55 @@ mode_t permissions_from_bits_version_1(const uint8_t flags[4], return permissions; } + +void print_permissions(mode_t permissions) { + if ((permissions & S_IRUSR)) { + fprintf(stderr, "r"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IWUSR)) { + fprintf(stderr, "w"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IXUSR)) { + fprintf(stderr, "x"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IRGRP)) { + fprintf(stderr, "r"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IWGRP)) { + fprintf(stderr, "w"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IXGRP)) { + fprintf(stderr, "x"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IROTH)) { + fprintf(stderr, "r"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IWOTH)) { + fprintf(stderr, "w"); + } else { + fprintf(stderr, "-"); + } + if ((permissions & S_IXOTH)) { + fprintf(stderr, "x"); + } else { + fprintf(stderr, "-"); + } +} + #endif void simple_archiver_internal_cleanup_int_fd(int *fd) { @@ -1451,6 +1580,18 @@ int simple_archiver_write_v0(FILE *out_f, SDArchiverState *state, int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, const SDArchiverLinkedList *filenames) { + // First create a "set" of absolute paths to given filenames. + __attribute__((cleanup(simple_archiver_hash_map_free))) + SDArchiverHashMap *abs_filenames = simple_archiver_hash_map_init(); + void **ptr_array = malloc(sizeof(void *) * 2); + ptr_array[0] = abs_filenames; + ptr_array[1] = (void *)state->parsed->user_cwd; + if (simple_archiver_list_get(filenames, filenames_to_abs_map_fn, ptr_array)) { + free(ptr_array); + return SDAS_FAILED_TO_CREATE_MAP; + } + free(ptr_array); + // TODO Impl. fprintf(stderr, "Writing v1 unimplemented\n"); return SDAS_INTERNAL_ERROR; @@ -2396,6 +2537,13 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INVALID_FILE; } const uint_fast8_t absolute_preferred = (buf[0] & 1) ? 1 : 0; + +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + mode_t permissions = permissions_from_bits_v1_symlink(buf, 0); +#endif + uint_fast8_t link_extracted = 0; uint_fast8_t skip_due_to_map = 0; @@ -2417,6 +2565,13 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, if (!do_extract) { fprintf(stderr, " Link name: %s\n", link_name); +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + fprintf(stderr, " Link Permissions: "); + print_permissions(permissions); + fprintf(stderr, "\n"); +#endif } if (working_files_map && @@ -2445,12 +2600,49 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX simple_archiver_helper_make_dirs(link_name); + int_fast8_t link_create_retry = 0; + V1_SYMLINK_CREATE_RETRY_0: ret = symlink(path, link_name); if (ret == -1) { + if (link_create_retry) { + fprintf(stderr, + "WARNING: Failed to create symlink after removing existing " + "symlink!\n"); + goto V1_SYMLINK_CREATE_AFTER_0; + } else if (errno == EEXIST) { + if ((state->parsed->flags & 8) == 0) { + fprintf(stderr, + "WARNING: Symlink already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + goto V1_SYMLINK_CREATE_AFTER_0; + } else { + fprintf( + stderr, + "NOTICE: Symlink already exists and \"--overwrite-extract\" " + "specified, attempting to overwrite...\n"); + unlink(link_name); + link_create_retry = 1; + goto V1_SYMLINK_CREATE_RETRY_0; + } + } return SDAS_FAILED_TO_EXTRACT_SYMLINK; } + ret = fchmodat(AT_FDCWD, link_name, permissions, AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not supported " + "by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } link_extracted = 1; fprintf(stderr, " %s -> %s\n", link_name, path); + V1_SYMLINK_CREATE_AFTER_0: + link_create_retry = 1; #endif } else { fprintf(stderr, " Abs path: %s\n", path); @@ -2478,12 +2670,49 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX simple_archiver_helper_make_dirs(link_name); + int_fast8_t link_create_retry = 0; + V1_SYMLINK_CREATE_RETRY_1: ret = symlink(path, link_name); if (ret == -1) { + if (link_create_retry) { + fprintf(stderr, + "WARNING: Failed to create symlink after removing existing " + "symlink!\n"); + goto V1_SYMLINK_CREATE_AFTER_1; + } else if (errno == EEXIST) { + if ((state->parsed->flags & 8) == 0) { + fprintf(stderr, + "WARNING: Symlink already exists and " + "\"--overwrite-extract\" is not specified, skipping!\n"); + goto V1_SYMLINK_CREATE_AFTER_1; + } else { + fprintf( + stderr, + "NOTICE: Symlink already exists and \"--overwrite-extract\" " + "specified, attempting to overwrite...\n"); + unlink(link_name); + link_create_retry = 1; + goto V1_SYMLINK_CREATE_RETRY_1; + } + } return SDAS_FAILED_TO_EXTRACT_SYMLINK; } + ret = fchmodat(AT_FDCWD, link_name, permissions, AT_SYMLINK_NOFOLLOW); + if (ret == -1) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, + "NOTICE: Setting permissions of symlink is not supported " + "by FS/OS!\n"); + } else { + fprintf(stderr, + "WARNING: Failed to set permissions of symlink (%d)!\n", + errno); + } + } link_extracted = 1; fprintf(stderr, " %s -> %s\n", link_name, path); + V1_SYMLINK_CREATE_AFTER_1: + link_create_retry = 1; #endif } else { fprintf(stderr, " Rel path: %s\n", path); @@ -2539,6 +2768,27 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } file_info->filename[u16] = 0; + if (state && state->parsed && (state->parsed->flags & 8) != 0) { + int fd = open((const char *)buf, O_RDONLY | O_NOFOLLOW); + if (fd == -1) { + if (errno == ELOOP) { + // Exists as a symlink. + fprintf(stderr, + "WARNING: Filename \"%s\" already exists as symlink, " + "removing...\n", + (const char *)buf); + unlink((const char *)buf); + } else { + // File doesn't exist, do nothing. + } + } else { + close(fd); + fprintf(stderr, "WARNING: File \"%s\" already exists, removing...\n", + (const char *)buf); + unlink((const char *)buf); + } + } + if (fread(file_info->bit_flags, 1, 4, in_f) != 4) { return SDAS_INVALID_FILE; } diff --git a/src/archiver.h b/src/archiver.h index 6039852..48b932e 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -40,7 +40,7 @@ typedef struct SDArchiverState { size_t digits; } SDArchiverState; -enum SDArchiverStateReturns { +typedef enum SDArchiverStateReturns { SDAS_SUCCESS = 0, SDAS_HEADER_ALREADY_WRITTEN = 1, SDAS_FAILED_TO_WRITE, @@ -53,7 +53,7 @@ enum SDArchiverStateReturns { SDAS_FAILED_TO_EXTRACT_SYMLINK, SDAS_FAILED_TO_CHANGE_CWD, SDAS_INVALID_WRITE_VERSION -}; +} SDArchiverStateReturns; /// Returned pointer must not be freed. char *simple_archiver_error_to_string(enum SDArchiverStateReturns error); diff --git a/src/main.c b/src/main.c index 71c59db..937b5c2 100644 --- a/src/main.c +++ b/src/main.c @@ -97,7 +97,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_write_all(file, state, filenames); if (ret != SDAS_SUCCESS) { fprintf(stderr, "Error during writing.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } fclose(file); @@ -113,7 +114,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_write_all(stdout, state, filenames); if (ret != SDAS_SUCCESS) { fprintf(stderr, "Error during writing.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } } @@ -130,7 +132,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_parse_archive_info(file, 0, NULL); if (ret != 0) { fprintf(stderr, "Error during archive checking/examining.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } fclose(file); @@ -138,7 +141,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_parse_archive_info(stdin, 0, NULL); if (ret != 0) { fprintf(stderr, "Error during archive checking/examining.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } } @@ -157,7 +161,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_parse_archive_info(file, 1, state); if (ret != SDAS_SUCCESS) { fprintf(stderr, "Error during archive extracting.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } fclose(file); @@ -165,7 +170,8 @@ int main(int argc, const char **argv) { int ret = simple_archiver_parse_archive_info(stdin, 1, state); if (ret != SDAS_SUCCESS) { fprintf(stderr, "Error during archive extracting.\n"); - char *error_str = simple_archiver_error_to_string(ret); + char *error_str = + simple_archiver_error_to_string((SDArchiverStateReturns)ret); fprintf(stderr, " %s\n", error_str); } } From 7b6929397e0cd87a16ad357d45f95424b02b7890 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 11:19:57 +0900 Subject: [PATCH 20/37] Refactor function, add its declaration to header --- src/archiver.c | 90 +++++++++++++++++++++++++------------------------- src/archiver.h | 6 ++++ 2 files changed, 51 insertions(+), 45 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index c14227a..0d255cf 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -99,49 +99,6 @@ void cleanup_temp_filename_delete(void ***ptrs_array) { #endif } -char *filename_to_absolute_path(const char *filename) { -#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - __attribute__((cleanup(simple_archiver_helper_cleanup_malloced))) void *path = - malloc(strlen(filename) + 1); - strncpy(path, filename, strlen(filename) + 1); - - char *path_dir = dirname(path); - if (!path_dir) { - return NULL; - } - - __attribute__(( - cleanup(simple_archiver_helper_cleanup_malloced))) void *dir_realpath = - realpath(path_dir, NULL); - if (!dir_realpath) { - return NULL; - } - - // Recreate "path" since it may have been modified by dirname(). - simple_archiver_helper_cleanup_malloced(&path); - path = malloc(strlen(filename) + 1); - strncpy(path, filename, strlen(filename) + 1); - - char *filename_basename = basename(path); - if (!filename_basename) { - return NULL; - } - - // Get combined full path to file. - char *fullpath = - malloc(strlen(dir_realpath) + 1 + strlen(filename_basename) + 1); - strncpy(fullpath, dir_realpath, strlen(dir_realpath) + 1); - fullpath[strlen(dir_realpath)] = '/'; - strncpy(fullpath + strlen(dir_realpath) + 1, filename_basename, - strlen(filename_basename) + 1); - - return fullpath; -#endif - return NULL; -} - int write_files_fn(void *data, void *ud) { const SDArchiverFileInfo *file_info = data; SDArchiverState *state = ud; @@ -760,7 +717,7 @@ int write_files_fn(void *data, void *ud) { // First get absolute path of link. __attribute__((cleanup( simple_archiver_helper_cleanup_malloced))) void *link_abs_path = - filename_to_absolute_path(file_info->filename); + simple_archiver_file_abs_path(file_info->filename); if (!link_abs_path) { fprintf(stderr, "WARNING: Failed to get absolute path of link!\n"); } else { @@ -895,7 +852,7 @@ int filenames_to_abs_map_fn(void *data, void *ud) { } // Get combined full path to file. - char *fullpath = filename_to_absolute_path(file_info->filename); + char *fullpath = simple_archiver_file_abs_path(file_info->filename); if (!fullpath) { return 1; } @@ -3277,3 +3234,46 @@ char *simple_archiver_filenames_to_relative_path(const char *from_abs, return rel_path; } + +char *simple_archiver_file_abs_path(const char *filename) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + __attribute__((cleanup(simple_archiver_helper_cleanup_malloced))) void *path = + malloc(strlen(filename) + 1); + strncpy(path, filename, strlen(filename) + 1); + + char *path_dir = dirname(path); + if (!path_dir) { + return NULL; + } + + __attribute__(( + cleanup(simple_archiver_helper_cleanup_malloced))) void *dir_realpath = + realpath(path_dir, NULL); + if (!dir_realpath) { + return NULL; + } + + // Recreate "path" since it may have been modified by dirname(). + simple_archiver_helper_cleanup_malloced(&path); + path = malloc(strlen(filename) + 1); + strncpy(path, filename, strlen(filename) + 1); + + char *filename_basename = basename(path); + if (!filename_basename) { + return NULL; + } + + // Get combined full path to file. + char *fullpath = + malloc(strlen(dir_realpath) + 1 + strlen(filename_basename) + 1); + strncpy(fullpath, dir_realpath, strlen(dir_realpath) + 1); + fullpath[strlen(dir_realpath)] = '/'; + strncpy(fullpath + strlen(dir_realpath) + 1, filename_basename, + strlen(filename_basename) + 1); + + return fullpath; +#endif + return NULL; +} diff --git a/src/archiver.h b/src/archiver.h index 48b932e..6b62ac3 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -92,4 +92,10 @@ int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2], char *simple_archiver_filenames_to_relative_path(const char *from_abs, const char *to_abs); +/// Gets the absolute path to a file given a path to a file. +/// Should also work on symlinks such that the returned string is the path to +/// the link itself, not what it points to. +/// Non-NULL on success, and must be free'd if non-NULL. +char *simple_archiver_file_abs_path(const char *filename); + #endif From 8040006afe3ab4141812eb752bddd564f52d08eb Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 12:27:46 +0900 Subject: [PATCH 21/37] WIP Work on v1 create archive Implemented file-format up to list of symlinks. --- src/archiver.c | 281 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) diff --git a/src/archiver.c b/src/archiver.c index 0d255cf..0644051 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1325,6 +1325,27 @@ void simple_archiver_internal_cleanup_decomp(pid_t *decomp_pid) { } #endif +int symlinks_and_files_from_files(void *data, void *ud) { + SDArchiverFileInfo *file_info = data; + void **ptr_array = ud; + SDArchiverLinkedList *symlinks_list = ptr_array[0]; + SDArchiverLinkedList *files_list = ptr_array[1]; + + if (file_info->filename) { + if (file_info->link_dest) { + simple_archiver_list_add( + symlinks_list, file_info->filename, + simple_archiver_helper_datastructure_cleanup_nop); + } else { + simple_archiver_list_add( + files_list, file_info->filename, + simple_archiver_helper_datastructure_cleanup_nop); + } + } + + return 0; +} + char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { switch (error) { case SDAS_SUCCESS: @@ -1549,6 +1570,266 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } free(ptr_array); + // Get a list of symlinks and a list of files. + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *symlinks_list = simple_archiver_list_init(); + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *files_list = simple_archiver_list_init(); + + ptr_array = malloc(sizeof(void *) * 2); + ptr_array[0] = symlinks_list; + ptr_array[1] = files_list; + + if (simple_archiver_list_get(filenames, symlinks_and_files_from_files, + ptr_array)) { + free(ptr_array); + return SDAS_INTERNAL_ERROR; + } + free(ptr_array); + + if (fwrite("SIMPLE_ARCHIVE_VER", 1, 18, out_f) != 18) { + return SDAS_FAILED_TO_WRITE; + } + + char buf[1024]; + uint16_t u16 = 1; + + simple_archiver_helper_16_bit_be(&u16); + + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + + if (state->parsed->compressor && !state->parsed->decompressor) { + return SDAS_NO_DECOMPRESSOR; + } else if (!state->parsed->compressor && state->parsed->decompressor) { + return SDAS_NO_COMPRESSOR; + } else if (state->parsed->compressor && state->parsed->decompressor) { + // 4 bytes flags, using de/compressor. + memset(buf, 0, 4); + buf[0] |= 1; + if (fwrite(buf, 1, 4, out_f) != 4) { + return SDAS_FAILED_TO_WRITE; + } + + size_t len = strlen(state->parsed->compressor); + if (len >= 0xFFFF) { + fprintf(stderr, "ERROR: Compressor cmd is too long!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 1, 2, out_f) != 2) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + + if (fwrite(state->parsed->compressor, 1, u16 + 1, out_f) != + (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } + + len = strlen(state->parsed->decompressor); + if (len >= 0xFFFF) { + fprintf(stderr, "ERROR: Decompressor cmd is too long!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 1, 2, out_f) != 2) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + + if (fwrite(state->parsed->decompressor, 1, u16 + 1, out_f) != + (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } + } else { + // 4 bytes flags, not using de/compressor. + memset(buf, 0, 4); + if (fwrite(buf, 1, 4, out_f) != 4) { + return SDAS_FAILED_TO_WRITE; + } + } + + if (symlinks_list->count > 0xFFFFFFFF) { + fprintf(stderr, "ERROR: Too many symlinks!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + uint32_t u32 = (uint32_t)symlinks_list->count; + simple_archiver_helper_32_bit_be(&u32); + if (fwrite(&u32, 4, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_32_bit_be(&u32); + + { + __attribute__((cleanup( + simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = NULL; + if (state->parsed->user_cwd) { +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + original_cwd = realpath(".", NULL); + if (chdir(state->parsed->user_cwd)) { + return SDAS_INTERNAL_ERROR; + } +#endif + } + const SDArchiverLLNode *node = symlinks_list->head; + for (u32 = 0; + u32 < (uint32_t)symlinks_list->count && node != symlinks_list->tail;) { + node = node->next; + ++u32; + u16 = 0; +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + // Check if symlink points to thing to be stored into archive. + __attribute__(( + cleanup(simple_archiver_helper_cleanup_malloced))) void *abs_path = + realpath(node->data, NULL); + __attribute__((cleanup( + simple_archiver_helper_cleanup_malloced))) void *rel_path = NULL; + if (abs_path) { + __attribute__((cleanup( + simple_archiver_helper_cleanup_malloced))) void *link_abs_path = + simple_archiver_file_abs_path(node->data); + if (!link_abs_path) { + fprintf(stderr, "WARNING: Failed to get absolute path to link!\n"); + } else { + rel_path = simple_archiver_filenames_to_relative_path(link_abs_path, + abs_path); + } + } + if (abs_path && (state->parsed->flags & 0x20) == 0 && + !simple_archiver_hash_map_get(abs_filenames, abs_path, + strlen(abs_path) + 1)) { + // Is not a filename being archived, set preference to absolute path. + u16 |= 1; + } + + // Get symlink stats for permissions. + struct stat stat_buf; + memset(&stat_buf, 0, sizeof(struct stat)); + int stat_status = + fstatat(AT_FDCWD, node->data, &stat_buf, AT_SYMLINK_NOFOLLOW); + if (stat_status != 0) { + return SDAS_INTERNAL_ERROR; + } + + if ((stat_buf.st_mode & S_IRUSR) != 0) { + u16 |= 2; + } + if ((stat_buf.st_mode & S_IWUSR) != 0) { + u16 |= 4; + } + if ((stat_buf.st_mode & S_IXUSR) != 0) { + u16 |= 8; + } + if ((stat_buf.st_mode & S_IRGRP) != 0) { + u16 |= 0x10; + } + if ((stat_buf.st_mode & S_IWGRP) != 0) { + u16 |= 0x20; + } + if ((stat_buf.st_mode & S_IXGRP) != 0) { + u16 |= 0x40; + } + if ((stat_buf.st_mode & S_IROTH) != 0) { + u16 |= 0x80; + } + if ((stat_buf.st_mode & S_IWOTH) != 0) { + u16 |= 0x100; + } + if ((stat_buf.st_mode & S_IXOTH) != 0) { + u16 |= 0x200; + } +#else + u16 |= 0x3FE; +#endif + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + + size_t len = strlen(node->data); + if (len >= 0xFFFF) { + fprintf(stderr, "ERROR: Link name is too long!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(node->data, 1, u16 + 1, out_f) != (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } + + if (abs_path) { + len = strlen(abs_path); + if (len >= 0xFFFF) { + fprintf(stderr, + "ERROR: Symlink destination absolute path is too long!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(abs_path, 1, u16 + 1, out_f) != (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } + } else { + u16 = 0; + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + } + + if (rel_path) { + len = strlen(rel_path); + if (len >= 0xFFFF) { + fprintf(stderr, + "ERROR: Symlink destination relative path is too long!\n"); + return SDAS_INVALID_PARSED_STATE; + } + + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(rel_path, 1, u16 + 1, out_f) != (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } + } else { + u16 = 0; + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + } + } + if (u32 != (uint32_t)symlinks_list->count) { + fprintf(stderr, "ERROR: Iterated through %u symlinks out of %u total!\n", + u32, (uint32_t)symlinks_list->count); + return SDAS_INTERNAL_ERROR; + } + } + + // TODO Chunk count. + // TODO Impl. fprintf(stderr, "Writing v1 unimplemented\n"); return SDAS_INTERNAL_ERROR; From 166632fc15759d5390bfcafb4b97db5f4c835013 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 14:02:54 +0900 Subject: [PATCH 22/37] Update file_format for v1 Size of bytes for files per chunk was changed from 2 bytes to 4 bytes. --- file_format.md | 2 +- file_format_1_example_0 | Bin 203 -> 207 bytes file_format_1_example_1 | Bin 405 -> 407 bytes src/archiver.c | 10 +++++----- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/file_format.md b/file_format.md index 67c5866..4e39544 100644 --- a/file_format.md +++ b/file_format.md @@ -149,7 +149,7 @@ archive. Following the chunk-count bytes, the following bytes are added for each chunk: -1. 2 bytes that are a 16-bit unsigned integer "file count" in big-endian. +1. 4 bytes that are a 32-bit unsigned integer "file count" in big-endian. The following bytes are added for each file within the current chunk: diff --git a/file_format_1_example_0 b/file_format_1_example_0 index f28c89ed130d3445f8c84ba0f3f3521d17cc134e..a39ecf63812a798e5e1ca4829331e322f43c68ec 100644 GIT binary patch delta 16 WcmX@jc%E^BA|t~@1wSB>8wUU?j|8g# delta 12 TcmX@lc$#s7;zVWNiAAvhA7%uv diff --git a/file_format_1_example_1 b/file_format_1_example_1 index b56b56c6a7fcd8bee2c0dc7cee54354451196ddc..696a49f01f3583aeee48051f72487a10d1e15a7b 100644 GIT binary patch delta 12 TcmbQrJe_$$86(5SQUyi;8BPO% delta 10 RcmbQvJe7Gu*~SWaMgSDy1A+hm diff --git a/src/archiver.c b/src/archiver.c index 0644051..a0c37fb 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2974,13 +2974,13 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, for (uint32_t chunk_idx = 0; chunk_idx < chunk_count; ++chunk_idx) { fprintf(stderr, "CHUNK %3u of %3u\n", chunk_idx + 1, chunk_count); - if (fread(buf, 1, 2, in_f) != 2) { + if (fread(buf, 1, 4, in_f) != 4) { return SDAS_INVALID_FILE; } - memcpy(&u16, buf, 2); - simple_archiver_helper_16_bit_be(&u16); + memcpy(&u32, buf, 4); + simple_archiver_helper_32_bit_be(&u32); - const uint16_t file_count = u16; + const uint32_t file_count = u32; __attribute__((cleanup(simple_archiver_list_free))) SDArchiverLinkedList *file_info_list = simple_archiver_list_init(); @@ -2988,7 +2988,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, __attribute__((cleanup(cleanup_internal_file_info))) SDArchiverInternalFileInfo *file_info = NULL; - for (uint16_t file_idx = 0; file_idx < file_count; ++file_idx) { + for (uint32_t file_idx = 0; file_idx < file_count; ++file_idx) { file_info = malloc(sizeof(SDArchiverInternalFileInfo)); memset(file_info, 0, sizeof(SDArchiverInternalFileInfo)); From 1b7fcb2bfcd1892fee2bf712e4cdead7e2ebe699 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 15:36:53 +0900 Subject: [PATCH 23/37] WIP Impl. archiving without compression TODO: archiving with compression --- src/archiver.c | 327 ++++++++++++++++++++++++++++++++++++++++++++----- src/parser.c | 19 +++ src/parser.h | 2 + 3 files changed, 317 insertions(+), 31 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index a0c37fb..9c786a0 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1330,6 +1330,7 @@ int symlinks_and_files_from_files(void *data, void *ud) { void **ptr_array = ud; SDArchiverLinkedList *symlinks_list = ptr_array[0]; SDArchiverLinkedList *files_list = ptr_array[1]; + const char *user_cwd = ptr_array[2]; if (file_info->filename) { if (file_info->link_dest) { @@ -1337,15 +1338,118 @@ int symlinks_and_files_from_files(void *data, void *ud) { symlinks_list, file_info->filename, simple_archiver_helper_datastructure_cleanup_nop); } else { - simple_archiver_list_add( - files_list, file_info->filename, - simple_archiver_helper_datastructure_cleanup_nop); + SDArchiverInternalFileInfo *file_info_struct = + malloc(sizeof(SDArchiverInternalFileInfo)); + file_info_struct->filename = strdup(file_info->filename); + file_info_struct->bit_flags[0] = 0xFF; + file_info_struct->bit_flags[1] = 1; + file_info_struct->bit_flags[2] = 0; + file_info_struct->bit_flags[3] = 0; + file_info_struct->uid = 0; + file_info_struct->gid = 0; + file_info_struct->file_size = 0; +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + __attribute__((cleanup( + simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = + NULL; + if (user_cwd) { + original_cwd = realpath(".", NULL); + if (chdir(user_cwd)) { + free(file_info_struct); + return 1; + } + } + struct stat stat_buf; + memset(&stat_buf, 0, sizeof(struct stat)); + int stat_status = fstatat(AT_FDCWD, file_info_struct->filename, &stat_buf, + AT_SYMLINK_NOFOLLOW); + if (stat_status != 0) { + free(file_info_struct); + return 1; + } + file_info_struct->bit_flags[0] = 0; + file_info_struct->bit_flags[1] &= 0xFE; + if ((stat_buf.st_mode & S_IRUSR) != 0) { + file_info_struct->bit_flags[0] |= 1; + } + if ((stat_buf.st_mode & S_IWUSR) != 0) { + file_info_struct->bit_flags[0] |= 2; + } + if ((stat_buf.st_mode & S_IXUSR) != 0) { + file_info_struct->bit_flags[0] |= 4; + } + if ((stat_buf.st_mode & S_IRGRP) != 0) { + file_info_struct->bit_flags[0] |= 8; + } + if ((stat_buf.st_mode & S_IWGRP) != 0) { + file_info_struct->bit_flags[0] |= 0x10; + } + if ((stat_buf.st_mode & S_IXGRP) != 0) { + file_info_struct->bit_flags[0] |= 0x20; + } + if ((stat_buf.st_mode & S_IROTH) != 0) { + file_info_struct->bit_flags[0] |= 0x40; + } + if ((stat_buf.st_mode & S_IWOTH) != 0) { + file_info_struct->bit_flags[0] |= 0x80; + } + if ((stat_buf.st_mode & S_IXOTH) != 0) { + file_info_struct->bit_flags[1] |= 1; + } + file_info_struct->uid = stat_buf.st_uid; + file_info_struct->gid = stat_buf.st_gid; + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = + fopen(file_info_struct->filename, "rb"); + if (!fd) { + free(file_info_struct); + return 1; + } + if (fseek(fd, 0, SEEK_END) < 0) { + free(file_info_struct); + return 1; + } + long ftell_ret = ftell(fd); + if (ftell_ret < 0) { + free(file_info_struct); + return 1; + } + file_info_struct->file_size = (uint64_t)ftell_ret; + simple_archiver_list_add(files_list, file_info_struct, + free_internal_file_info); +#endif } } return 0; } +int files_to_chunk_count(void *data, void *ud) { + SDArchiverInternalFileInfo *file_info_struct = data; + void **ptrs = ud; + const uint64_t *chunk_size = ptrs[0]; + uint64_t *current_size = ptrs[1]; + uint64_t *current_count = ptrs[2]; + SDArchiverLinkedList *chunk_counts = ptrs[3]; + + ++(*current_count); + + // Get file size. + *current_size += file_info_struct->file_size; + + // Check size. + if (*current_size >= *chunk_size) { + uint64_t *count = malloc(sizeof(uint64_t)); + *count = *current_count; + simple_archiver_list_add(chunk_counts, count, NULL); + *current_count = 0; + *current_size = 0; + } + + return 0; +} + char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { switch (error) { case SDAS_SUCCESS: @@ -1576,9 +1680,10 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, __attribute__((cleanup(simple_archiver_list_free))) SDArchiverLinkedList *files_list = simple_archiver_list_init(); - ptr_array = malloc(sizeof(void *) * 2); + ptr_array = malloc(sizeof(void *) * 3); ptr_array[0] = symlinks_list; ptr_array[1] = files_list; + ptr_array[2] = (void *)state->parsed->user_cwd; if (simple_archiver_list_get(filenames, symlinks_and_files_from_files, ptr_array)) { @@ -1667,25 +1772,27 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } simple_archiver_helper_32_bit_be(&u32); - { - __attribute__((cleanup( - simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = NULL; - if (state->parsed->user_cwd) { + // Change cwd if user specified. + __attribute__((cleanup( + simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = NULL; + if (state->parsed->user_cwd) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - original_cwd = realpath(".", NULL); - if (chdir(state->parsed->user_cwd)) { - return SDAS_INTERNAL_ERROR; - } -#endif + original_cwd = realpath(".", NULL); + if (chdir(state->parsed->user_cwd)) { + return SDAS_INTERNAL_ERROR; } +#endif + } + + { const SDArchiverLLNode *node = symlinks_list->head; for (u32 = 0; u32 < (uint32_t)symlinks_list->count && node != symlinks_list->tail;) { node = node->next; ++u32; - u16 = 0; + memset(buf, 0, 2); #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -1710,7 +1817,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, !simple_archiver_hash_map_get(abs_filenames, abs_path, strlen(abs_path) + 1)) { // Is not a filename being archived, set preference to absolute path. - u16 |= 1; + buf[0] |= 1; } // Get symlink stats for permissions. @@ -1723,37 +1830,37 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } if ((stat_buf.st_mode & S_IRUSR) != 0) { - u16 |= 2; + buf[0] |= 2; } if ((stat_buf.st_mode & S_IWUSR) != 0) { - u16 |= 4; + buf[0] |= 4; } if ((stat_buf.st_mode & S_IXUSR) != 0) { - u16 |= 8; + buf[0] |= 8; } if ((stat_buf.st_mode & S_IRGRP) != 0) { - u16 |= 0x10; + buf[0] |= 0x10; } if ((stat_buf.st_mode & S_IWGRP) != 0) { - u16 |= 0x20; + buf[0] |= 0x20; } if ((stat_buf.st_mode & S_IXGRP) != 0) { - u16 |= 0x40; + buf[0] |= 0x40; } if ((stat_buf.st_mode & S_IROTH) != 0) { - u16 |= 0x80; + buf[0] |= (char)0x80; } if ((stat_buf.st_mode & S_IWOTH) != 0) { - u16 |= 0x100; + buf[1] |= 1; } if ((stat_buf.st_mode & S_IXOTH) != 0) { - u16 |= 0x200; + buf[1] |= 2; } #else - u16 |= 0x3FE; + buf[0] = 0xFE; + buf[1] = 3; #endif - simple_archiver_helper_16_bit_be(&u16); - if (fwrite(&u16, 2, 1, out_f) != 1) { + if (fwrite(buf, 1, 2, out_f) != 2) { return SDAS_FAILED_TO_WRITE; } @@ -1828,11 +1935,169 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } } - // TODO Chunk count. + __attribute__((cleanup(simple_archiver_list_free))) + SDArchiverLinkedList *chunk_counts = simple_archiver_list_init(); - // TODO Impl. - fprintf(stderr, "Writing v1 unimplemented\n"); - return SDAS_INTERNAL_ERROR; + { + uint64_t current_size = 0; + uint64_t current_count = 0; + void **ptrs = malloc(sizeof(void *) * 4); + ptrs[0] = (void *)&state->parsed->minimum_chunk_size; + ptrs[1] = ¤t_size; + ptrs[2] = ¤t_count; + ptrs[3] = chunk_counts; + if (simple_archiver_list_get(files_list, files_to_chunk_count, ptrs)) { + free(ptrs); + fprintf(stderr, "ERROR: Internal error calculating chunk counts!\n"); + return SDAS_INTERNAL_ERROR; + } + free(ptrs); + if (current_size > 0 && current_count > 0) { + uint64_t *count = malloc(sizeof(uint64_t)); + *count = current_count; + simple_archiver_list_add(chunk_counts, count, NULL); + } + } + + // Verify chunk counts. + { + uint64_t count = 0; + for (SDArchiverLLNode *node = chunk_counts->head->next; + node != chunk_counts->tail; node = node->next) { + if (*((uint64_t *)node->data) > 0xFFFFFFFF) { + fprintf(stderr, "ERROR: file count in chunk is too large!\n"); + return SDAS_INTERNAL_ERROR; + } + count += *((uint64_t *)node->data); + // fprintf(stderr, "DEBUG: chunk count %4llu\n", + // *((uint64_t*)node->data)); + } + if (count != files_list->count) { + fprintf(stderr, + "ERROR: Internal error calculating chunk counts (invalid number " + "of files)!\n"); + return SDAS_INTERNAL_ERROR; + } + } + + // Write number of chunks. + if (chunk_counts->count > 0xFFFFFFFF) { + fprintf(stderr, "ERROR: Too many chunks!\n"); + return SDAS_INTERNAL_ERROR; + } + u32 = (uint32_t)chunk_counts->count; + simple_archiver_helper_32_bit_be(&u32); + if (fwrite(&u32, 4, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + + __attribute__((cleanup(simple_archiver_helper_cleanup_malloced))) void + *non_compressing_chunk_size = NULL; + if (!state->parsed->compressor || !state->parsed->decompressor) { + non_compressing_chunk_size = malloc(sizeof(uint64_t)); + } + uint64_t *non_c_chunk_size = non_compressing_chunk_size; + + SDArchiverLLNode *file_node = files_list->head; + for (SDArchiverLLNode *chunk_c_node = chunk_counts->head->next; + chunk_c_node != chunk_counts->tail; chunk_c_node = chunk_c_node->next) { + // Write file count before iterating through files. + if (non_c_chunk_size) { + *non_c_chunk_size = 0; + } + u32 = (uint32_t)(*((uint64_t *)chunk_c_node->data)); + simple_archiver_helper_32_bit_be(&u32); + if (fwrite(&u32, 4, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + SDArchiverLLNode *saved_node = file_node; + for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); + ++file_idx) { + file_node = file_node->next; + if (file_node == files_list->tail) { + return SDAS_INTERNAL_ERROR; + } + const SDArchiverInternalFileInfo *file_info_struct = file_node->data; + if (non_c_chunk_size) { + *non_c_chunk_size += file_info_struct->file_size; + } + size_t len = strlen(file_info_struct->filename); + if (len >= 0xFFFF) { + fprintf(stderr, "ERROR: Filename is too large!\n"); + return SDAS_INVALID_FILE; + } + u16 = (uint16_t)len; + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(&u16, 2, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + simple_archiver_helper_16_bit_be(&u16); + if (fwrite(file_info_struct->filename, 1, u16 + 1, out_f) != + (size_t)u16 + 1) { + return SDAS_FAILED_TO_WRITE; + } else if (fwrite(file_info_struct->bit_flags, 1, 4, out_f) != 4) { + return SDAS_FAILED_TO_WRITE; + } + // UID and GID. + u32 = file_info_struct->uid; + simple_archiver_helper_32_bit_be(&u32); + if (fwrite(&u32, 4, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + u32 = file_info_struct->gid; + simple_archiver_helper_32_bit_be(&u32); + if (fwrite(&u32, 4, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + + uint64_t u64 = file_info_struct->file_size; + simple_archiver_helper_64_bit_be(&u64); + if (fwrite(&u64, 8, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + } + + file_node = saved_node; + + if (state->parsed->compressor && state->parsed->decompressor) { + // Is compressing. + fprintf(stderr, "Writing compressed v1 unimplemented\n"); + return SDAS_INTERNAL_ERROR; + } else { + // Is NOT compressing. + if (!non_c_chunk_size) { + return SDAS_INTERNAL_ERROR; + } + simple_archiver_helper_64_bit_be(non_c_chunk_size); + fwrite(non_c_chunk_size, 8, 1, out_f); + for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); + ++file_idx) { + file_node = file_node->next; + if (file_node == files_list->tail) { + return SDAS_INTERNAL_ERROR; + } + const SDArchiverInternalFileInfo *file_info_struct = file_node->data; + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = + fopen(file_info_struct->filename, "rb"); + while (!feof(fd)) { + if (ferror(fd)) { + fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); + return SDAS_INTERNAL_ERROR; + } + size_t fread_ret = fread(buf, 1, 1024, fd); + if (fread_ret > 0) { + size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f); + if (fwrite_ret != fread_ret) { + fprintf(stderr, "ERROR: Writing to chunk, file write error!\n"); + return SDAS_FAILED_TO_WRITE; + } + } + } + } + } + } + + return SDAS_SUCCESS; } int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, diff --git a/src/parser.c b/src/parser.c index 2bc5b25..612b49b 100644 --- a/src/parser.c +++ b/src/parser.c @@ -172,6 +172,9 @@ void simple_archiver_print_usage(void) { fprintf(stderr, "--write-version : Force write version file format " "(default 1)\n"); + fprintf(stderr, + "--chunk-min-size : v1 file format minimum chunk size " + "(default 4194304 or 4MiB)\n"); fprintf(stderr, "-- : specifies remaining arguments are files to archive/extract\n"); fprintf( @@ -193,6 +196,7 @@ SDArchiverParsed simple_archiver_create_parsed(void) { parsed.temp_dir = NULL; parsed.user_cwd = NULL; parsed.write_version = 0; + parsed.minimum_chunk_size = 4194304; return parsed; } @@ -323,6 +327,21 @@ int simple_archiver_parse_args(int argc, const char **argv, out->write_version = (uint32_t)version; --argc; ++argv; + } else if (strcmp(argv[0], "--chunk-min-size") == 0) { + if (argc < 2) { + fprintf(stderr, + "ERROR: --chunk-min-size expects an integer argument!\n"); + simple_archiver_print_usage(); + return 1; + } + out->minimum_chunk_size = strtoull(argv[1], NULL, 10); + if (out->minimum_chunk_size == 0) { + fprintf(stderr, "ERROR: --chunk-min-size cannot be zero!\n"); + simple_archiver_print_usage(); + return 1; + } + --argc; + ++argv; } else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) { is_remaining_args = 1; } else if (argv[0][0] != '-') { diff --git a/src/parser.h b/src/parser.h index 1c74427..c3734d0 100644 --- a/src/parser.h +++ b/src/parser.h @@ -53,6 +53,8 @@ typedef struct SDArchiverParsed { const char *user_cwd; /// Currently only 0 and 1 is supported. uint32_t write_version; + /// The minimum size of a chunk in bytes (the last chunk may be less). + uint64_t minimum_chunk_size; } SDArchiverParsed; typedef struct SDArchiverFileInfo { From 8982b15cc5b3d6e2b58849d1f6f477469b1a4ff7 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 17:20:01 +0900 Subject: [PATCH 24/37] Impl. create archive with compression, fixes --- src/archiver.c | 201 ++++++++++++++++++++++++++++++++++++++++++++----- src/parser.c | 2 +- 2 files changed, 184 insertions(+), 19 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 9c786a0..17c251a 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2061,8 +2061,187 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, if (state->parsed->compressor && state->parsed->decompressor) { // Is compressing. - fprintf(stderr, "Writing compressed v1 unimplemented\n"); - return SDAS_INTERNAL_ERROR; + + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) + FILE *temp_fd = NULL; + + size_t temp_filename_size = strlen(state->parsed->temp_dir) + 1 + 64; + __attribute__((cleanup( + simple_archiver_helper_cleanup_c_string))) char *temp_filename = + malloc(temp_filename_size); + + __attribute__((cleanup(cleanup_temp_filename_delete))) void **ptrs_array = + malloc(sizeof(void *) * 2); + ptrs_array[0] = NULL; + ptrs_array[1] = NULL; + if (state->parsed->temp_dir) { + size_t idx = 0; + size_t temp_dir_len = strlen(state->parsed->temp_dir); + snprintf(temp_filename, temp_filename_size, TEMP_FILENAME_CMP, + state->parsed->temp_dir, + state->parsed->temp_dir[temp_dir_len - 1] == '/' ? "" : "/", + idx); + do { + FILE *test_fd = fopen(temp_filename, "rb"); + if (test_fd) { + // File exists. + fclose(test_fd); + snprintf( + temp_filename, temp_filename_size, TEMP_FILENAME_CMP, + state->parsed->temp_dir, + state->parsed->temp_dir[temp_dir_len - 1] == '/' ? "" : "/", + ++idx); + } else if (idx > 0xFFFF) { + return SDAS_INTERNAL_ERROR; + } else { + break; + } + } while (1); + temp_fd = fopen(temp_filename, "w+b"); + ptrs_array[0] = temp_filename; + } else { + temp_fd = tmpfile(); + } + + if (!temp_fd) { + return SDAS_INTERNAL_ERROR; + } + + // Handle SIGPIPE. + is_sig_pipe_occurred = 0; + signal(SIGPIPE, handle_sig_pipe); + + int pipe_into_cmd[2]; + int pipe_outof_cmd[2]; + pid_t compressor_pid; + + if (pipe(pipe_into_cmd) != 0) { + // Unable to create pipes. + return SDAS_INTERNAL_ERROR; + } else if (pipe(pipe_outof_cmd) != 0) { + // Unable to create second set of pipes. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + return SDAS_INTERNAL_ERROR; + } else if (simple_archiver_de_compress(pipe_into_cmd, pipe_outof_cmd, + state->parsed->compressor, + &compressor_pid) != 0) { + // Failed to spawn compressor. + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + fprintf(stderr, + "WARNING: Failed to start compressor cmd! Invalid cmd?\n"); + return SDAS_INTERNAL_ERROR; + } + + // Close unnecessary pipe fds on this end of the transfer. + close(pipe_into_cmd[0]); + close(pipe_outof_cmd[1]); + + // Set up cleanup so that remaining open pipes in this side is cleaned up. + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = + pipe_into_cmd[1]; + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_outof_read = + pipe_outof_cmd[0]; + + for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); + ++file_idx) { + file_node = file_node->next; + if (file_node == files_list->tail) { + return SDAS_INTERNAL_ERROR; + } + const SDArchiverInternalFileInfo *file_info_struct = file_node->data; + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = + fopen(file_info_struct->filename, "rb"); + while (!feof(fd)) { + if (ferror(fd)) { + fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); + return SDAS_INTERNAL_ERROR; + } + size_t fread_ret = fread(buf, 1, 1024, fd); + if (fread_ret > 0) { + ssize_t write_ret = write(pipe_into_write, buf, fread_ret); + if (write_ret < 0) { + fprintf(stderr, + "ERROR: Writing to compressor, pipe write error!\n"); + return SDAS_FAILED_TO_WRITE; + } else if ((size_t)write_ret != fread_ret) { + fprintf(stderr, + "ERROR: Writing to compressor, unable to write bytes!\n"); + return SDAS_FAILED_TO_WRITE; + } + } + } + } + + // Close write to pipe to compressor as the chunk is written. + simple_archiver_internal_cleanup_int_fd(&pipe_into_write); + + // Read compressed data into temporary file. + do { + ssize_t read_ret = read(pipe_outof_read, buf, 1024); + if (read_ret < 0) { + fprintf(stderr, "ERROR: Reading from compressor, pipe read error!\n"); + return SDAS_INTERNAL_ERROR; + } else if (read_ret == 0) { + // EOF. + break; + } else { + size_t fwrite_ret = fwrite(buf, 1, (size_t)read_ret, temp_fd); + if (fwrite_ret != (size_t)read_ret) { + fprintf(stderr, + "ERROR: Reading from compressor, failed to write to " + "temporary file!\n"); + return SDAS_INTERNAL_ERROR; + } + } + } while (1); + + // Close read from pipe from compressor as chunk is fully compressed. + simple_archiver_internal_cleanup_int_fd(&pipe_outof_read); + + // Wait on compressor to stop. + waitpid(compressor_pid, NULL, 0); + + long comp_chunk_size = ftell(temp_fd); + if (comp_chunk_size < 0) { + fprintf(stderr, + "ERROR: Temp file reported negative size after compression!\n"); + return SDAS_INTERNAL_ERROR; + } + + // Write compressed chunk size. + uint64_t u64 = (uint64_t)comp_chunk_size; + simple_archiver_helper_64_bit_be(&u64); + if (fwrite(&u64, 8, 1, out_f) != 1) { + return SDAS_FAILED_TO_WRITE; + } + + if (fseek(temp_fd, 0, SEEK_SET) != 0) { + return SDAS_INTERNAL_ERROR; + } + + // Write compressed chunk. + while (!feof(temp_fd)) { + if (ferror(temp_fd)) { + return SDAS_INTERNAL_ERROR; + } + size_t fread_ret = fread(buf, 1, 1024, temp_fd); + if (fread_ret > 0) { + size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f); + if (fwrite_ret != fread_ret) { + fprintf(stderr, + "ERROR: Partial write of read bytes from temp file to " + "output file!\n"); + return SDAS_FAILED_TO_WRITE; + } + } + } + + // Cleanup and remove temp_fd. + simple_archiver_helper_cleanup_FILE(&temp_fd); } else { // Is NOT compressing. if (!non_c_chunk_size) { @@ -3355,22 +3534,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, close(pipe_into_cmd[0]); close(pipe_into_cmd[1]); return SDAS_INTERNAL_ERROR; - } else if (fcntl(pipe_into_cmd[1], F_SETFL, O_NONBLOCK) != 0) { - // Unable to set non-blocking on into-write-pipe. - close(pipe_into_cmd[0]); - close(pipe_into_cmd[1]); - close(pipe_outof_cmd[0]); - close(pipe_outof_cmd[1]); - return SDAS_INTERNAL_ERROR; } - // else if (fcntl(pipe_outof_cmd[0], F_SETFL, O_NONBLOCK) != 0) { - // // Unable to set non-blocking on outof-read-pipe. - // close(pipe_into_cmd[0]); - // close(pipe_into_cmd[1]); - // close(pipe_outof_cmd[0]); - // close(pipe_outof_cmd[1]); - // return SDAS_INTERNAL_ERROR; - // } if (state && state->parsed && state->parsed->decompressor) { if (simple_archiver_de_compress(pipe_into_cmd, pipe_outof_cmd, @@ -3450,7 +3614,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } else if (write_ret == -1) { fprintf(stderr, "WARNING: Failed to write chunk data into decompressor! " - "Invalid decompressor cmd?\n"); + "Invalid decompressor cmd? (errno %d)\n", + errno); return SDAS_INTERNAL_ERROR; } else { fprintf(stderr, diff --git a/src/parser.c b/src/parser.c index 612b49b..f9b8ae6 100644 --- a/src/parser.c +++ b/src/parser.c @@ -195,7 +195,7 @@ SDArchiverParsed simple_archiver_create_parsed(void) { parsed.working_files = NULL; parsed.temp_dir = NULL; parsed.user_cwd = NULL; - parsed.write_version = 0; + parsed.write_version = 1; parsed.minimum_chunk_size = 4194304; return parsed; From 8e620fb0e9e1f3ac1d09c1d133268d9fde886abe Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 17:32:14 +0900 Subject: [PATCH 25/37] Reorder cleanup to proper location --- src/archiver.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 17c251a..646ad15 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2062,6 +2062,11 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, if (state->parsed->compressor && state->parsed->decompressor) { // Is compressing. + __attribute__((cleanup(cleanup_temp_filename_delete))) void **ptrs_array = + malloc(sizeof(void *) * 2); + ptrs_array[0] = NULL; + ptrs_array[1] = NULL; + __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *temp_fd = NULL; @@ -2070,10 +2075,6 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, simple_archiver_helper_cleanup_c_string))) char *temp_filename = malloc(temp_filename_size); - __attribute__((cleanup(cleanup_temp_filename_delete))) void **ptrs_array = - malloc(sizeof(void *) * 2); - ptrs_array[0] = NULL; - ptrs_array[1] = NULL; if (state->parsed->temp_dir) { size_t idx = 0; size_t temp_dir_len = strlen(state->parsed->temp_dir); From 302f7f804d7683cbfff9a87aa7943654fabc2659 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 17:35:12 +0900 Subject: [PATCH 26/37] Fix reordering of cleanup fns in archiver.c --- src/archiver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 646ad15..21976d9 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2062,6 +2062,11 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, if (state->parsed->compressor && state->parsed->decompressor) { // Is compressing. + size_t temp_filename_size = strlen(state->parsed->temp_dir) + 1 + 64; + __attribute__((cleanup( + simple_archiver_helper_cleanup_c_string))) char *temp_filename = + malloc(temp_filename_size); + __attribute__((cleanup(cleanup_temp_filename_delete))) void **ptrs_array = malloc(sizeof(void *) * 2); ptrs_array[0] = NULL; @@ -2070,11 +2075,6 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *temp_fd = NULL; - size_t temp_filename_size = strlen(state->parsed->temp_dir) + 1 + 64; - __attribute__((cleanup( - simple_archiver_helper_cleanup_c_string))) char *temp_filename = - malloc(temp_filename_size); - if (state->parsed->temp_dir) { size_t idx = 0; size_t temp_dir_len = strlen(state->parsed->temp_dir); From b1745172f73a2e10707e09aee17f9c94469effb3 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 17:46:30 +0900 Subject: [PATCH 27/37] Fix release build compiler warnings --- src/archiver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 21976d9..407764c 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2734,7 +2734,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, int_fast8_t write_again = 0; int_fast8_t write_pipe_done = 0; int_fast8_t read_pipe_done = 0; - size_t fread_ret; + size_t fread_ret = 0; char recv_buf[1024]; size_t amount_to_read; while (!write_pipe_done || !read_pipe_done) { @@ -3933,11 +3933,12 @@ char *simple_archiver_filenames_to_relative_path(const char *from_abs, has_slash = 0; } else { has_slash = 1; - char *new_rel_path = malloc(strlen(rel_path) + 1 + 3); + size_t new_rel_path_size = strlen(rel_path) + 1 + 3; + char *new_rel_path = malloc(new_rel_path_size); new_rel_path[0] = '.'; new_rel_path[1] = '.'; new_rel_path[2] = '/'; - strncpy(new_rel_path + 3, rel_path, strlen(rel_path) + 1); + strncpy(new_rel_path + 3, rel_path, new_rel_path_size - 3); free(rel_path); rel_path = new_rel_path; ++idx; @@ -3978,12 +3979,13 @@ char *simple_archiver_file_abs_path(const char *filename) { } // Get combined full path to file. - char *fullpath = - malloc(strlen(dir_realpath) + 1 + strlen(filename_basename) + 1); - strncpy(fullpath, dir_realpath, strlen(dir_realpath) + 1); - fullpath[strlen(dir_realpath)] = '/'; - strncpy(fullpath + strlen(dir_realpath) + 1, filename_basename, - strlen(filename_basename) + 1); + const size_t realpath_size = strlen(dir_realpath) + 1; + const size_t basename_size = strlen(filename_basename) + 1; + const size_t fullpath_size = realpath_size + basename_size; + char *fullpath = malloc(fullpath_size); + strncpy(fullpath, dir_realpath, realpath_size); + fullpath[realpath_size - 1] = '/'; + strcpy(fullpath + realpath_size, filename_basename); return fullpath; #endif From b8c56026d1a878732ff1a6416e359054c008a0b2 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 21:24:10 +0900 Subject: [PATCH 28/37] Add filename validation for test/extracting This should prevent creation of files/symlinks outside of current-working-directory or user-set-cwd. --- invalid_file_format_0_example_0 | Bin 0 -> 127 bytes invalid_file_format_0_example_1 | Bin 0 -> 220 bytes invalid_file_format_1_example_0 | Bin 0 -> 78 bytes invalid_file_format_1_example_1 | Bin 0 -> 206 bytes invalid_file_format_1_example_2 | Bin 0 -> 308 bytes src/archiver.c | 77 +++++++++++++++++++++++++++----- src/archiver.h | 10 +++++ src/test.c | 6 +++ 8 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 invalid_file_format_0_example_0 create mode 100644 invalid_file_format_0_example_1 create mode 100644 invalid_file_format_1_example_0 create mode 100644 invalid_file_format_1_example_1 create mode 100644 invalid_file_format_1_example_2 diff --git a/invalid_file_format_0_example_0 b/invalid_file_format_0_example_0 new file mode 100644 index 0000000000000000000000000000000000000000..97559990aacd2d4b2f0722cd50a0c1bc8c2f87fb GIT binary patch literal 127 zcmWIc^bPQFjdu)k_V5gIjSq7TVt@fA1_eDm{mi_w#GK3&{j|)S)V##pRQ;U%ymXi- b!!(#;1_q`?E(S$CeVkgrniElUByj-%0{$Nt literal 0 HcmV?d00001 diff --git a/invalid_file_format_0_example_1 b/invalid_file_format_0_example_1 new file mode 100644 index 0000000000000000000000000000000000000000..77f44242c0ac2756bde4714cbeb9f738bed2786b GIT binary patch literal 220 zcmWIc^bPQFjdu)k_V5gIjSq7TVqjoo00Oq0s>}ifT?PhD2qOi=W?*1qP|(xQ%qvUG z$xP8t%gjm5OUzBxPt8k#3hL|WF-!v~ga9?4D9->!9tMM^Isbv+15lKG;j6n$Fli{w iponQ^d1gt5eolT~x<1GXBW$J`GQmwf^6B_DbW;I5ia2xt literal 0 HcmV?d00001 diff --git a/invalid_file_format_1_example_0 b/invalid_file_format_1_example_0 new file mode 100644 index 0000000000000000000000000000000000000000..29d91591deee570ff95de25f0fc1b9eba4eeb30d GIT binary patch literal 78 zcmWIc^bPQFjdu)k_V5gIjSq7TVqj!|0LDoS3~YLO`l-bwAOX4J%-n*U)cC}rQt++{~1eoK)=Uc%eq6=B4QC>0ws`F_YQQ$Qa0CWZ;0=k54sYA^<$b9vA=s literal 0 HcmV?d00001 diff --git a/invalid_file_format_1_example_2 b/invalid_file_format_1_example_2 new file mode 100644 index 0000000000000000000000000000000000000000..9b003b0d5863b855d042ee29328081e0ac5a3740 GIT binary patch literal 308 zcmWIc^bPQFjdu)k_V5gIjSq7TVqj!s00Oq0s>}ifT?PhD2qOi=W?*3a$IPIpr>9?% zT3n)!i%w)<@Y2u7&rQ`&Ni8caPA$?;&n(d|&de>yNsUh|O3uhE1B!u+@GmXV&&khA zk59|YNzF^lO^r{>FN#mjNX$#m1d8S)mZTOjFengK0(2S3bwnuj26=;l`3n$#12Y*I gn4q+pPn2f>BM*Z?)13c6@Bt{yzVOvuxHME50KSY`B>(^b literal 0 HcmV?d00001 diff --git a/src/archiver.c b/src/archiver.c index 407764c..087bf56 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -66,6 +66,8 @@ typedef struct SDArchiverInternalFileInfo { uint32_t uid; uint32_t gid; uint64_t file_size; + /// xxxx xxx1 - is invalid. + int_fast8_t other_flags; } SDArchiverInternalFileInfo; void free_internal_to_write(void *data) { @@ -2403,7 +2405,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, const size_t digits = simple_archiver_helper_num_digits(size); char format_str[128]; snprintf(format_str, 128, FILE_COUNTS_OUTPUT_FORMAT_STR_0, digits, digits); - int_fast8_t skip = 0; + int_fast8_t skip; __attribute__((cleanup(simple_archiver_hash_map_free))) SDArchiverHashMap *hash_map = NULL; if (state && state->parsed->working_files && @@ -2421,6 +2423,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } } for (uint32_t idx = 0; idx < size; ++idx) { + skip = 0; fprintf(stderr, format_str, idx + 1, size); if (feof(in_f) || ferror(in_f)) { return SDAS_INVALID_FILE; @@ -2438,7 +2441,12 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } buf[1023] = 0; fprintf(stderr, " Filename: %s\n", buf); - if (do_extract) { + if (simple_archiver_validate_file_path((char*)buf)) { + fprintf(stderr, " ERROR: Invalid filename!\n"); + skip = 1; + } + + if (do_extract && !skip) { if ((state->parsed->flags & 0x8) == 0) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *test_fd = fopen((const char *)buf, "rb"); @@ -2479,7 +2487,13 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } uc_heap_buf[u16] = 0; fprintf(stderr, " Filename: %s\n", uc_heap_buf); - if (do_extract) { + + if (simple_archiver_validate_file_path((char*)uc_heap_buf)) { + fprintf(stderr, " ERROR: Invalid filename!\n"); + skip = 1; + } + + if (do_extract && !skip) { if ((state->parsed->flags & 0x8) == 0) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *test_fd = fopen((const char *)uc_heap_buf, "rb"); @@ -3229,6 +3243,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, uint_fast8_t link_extracted = 0; uint_fast8_t skip_due_to_map = 0; + uint_fast8_t skip_due_to_invalid = 0; if (fread(buf, 1, 2, in_f) != 2) { return SDAS_INVALID_FILE; @@ -3257,6 +3272,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, #endif } + if (simple_archiver_validate_file_path(link_name)) { + fprintf(stderr, " WARNING: Invalid link name \"%s\"!\n", link_name); + skip_due_to_invalid = 1; + } + if (working_files_map && simple_archiver_hash_map_get(working_files_map, link_name, u16 + 1) == NULL) { @@ -3278,7 +3298,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && !skip_due_to_map && absolute_preferred) { + if (do_extract && !skip_due_to_map && !skip_due_to_invalid && absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3348,7 +3368,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && !skip_due_to_map && !absolute_preferred) { + if (do_extract && !skip_due_to_map && !skip_due_to_invalid && !absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3404,8 +3424,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " No Relative path.\n"); } - if (do_extract && !link_extracted && !skip_due_to_map) { - fprintf(stderr, "WARNING Symlink \"%s\" was not created!\n", link_name); + if (do_extract && !link_extracted && !skip_due_to_map && !skip_due_to_invalid) { + fprintf(stderr, " WARNING: Symlink \"%s\" was not created!\n", link_name); } } @@ -3451,6 +3471,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } file_info->filename[u16] = 0; + if (simple_archiver_validate_file_path(file_info->filename)) { + fprintf(stderr, "ERROR: File idx %u: Invalid filename!\n", file_idx); + file_info->other_flags |= 1; + } + if (state && state->parsed && (state->parsed->flags & 8) != 0) { int fd = open((const char *)buf, O_RDONLY | O_NOFOLLOW); if (fd == -1) { @@ -3665,9 +3690,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, strlen(file_info->filename) + 1) == NULL) { skip_due_to_map = 1; fprintf(stderr, " Skipping not specified in args...\n"); + } else if ((file_info->other_flags & 1) != 0) { + fprintf(stderr, " Skipping invalid filename...\n"); } - if (do_extract && !skip_due_to_map) { + if (do_extract && !skip_due_to_map && (file_info->other_flags & 1) == 0) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3709,7 +3736,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } #endif - } else if (!skip_due_to_map) { + } else if (!skip_due_to_map && (file_info->other_flags & 1) == 0) { fprintf(stderr, " Permissions: "); permissions_from_bits_version_1(file_info->bit_flags, 1); fprintf(stderr, "\n UID: %u\n GID: %u\n", file_info->uid, @@ -3761,9 +3788,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, strlen(file_info->filename) + 1) == NULL) { skip_due_to_map = 1; fprintf(stderr, " Skipping not specified in args...\n"); + } else if (file_info->other_flags & 1) { + fprintf(stderr, " Skipping invalid filename...\n"); } - if (do_extract && !skip_due_to_map) { + if (do_extract && !skip_due_to_map && (file_info->other_flags & 1) == 0) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3809,7 +3838,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } #endif - } else if (!skip_due_to_map) { + } else if (!skip_due_to_map && (file_info->other_flags & 1) == 0) { fprintf(stderr, " Permissions: "); permissions_from_bits_version_1(file_info->bit_flags, 1); fprintf(stderr, "\n UID: %u\n GID: %u\n", file_info->uid, @@ -3991,3 +4020,29 @@ char *simple_archiver_file_abs_path(const char *filename) { #endif return NULL; } + +int simple_archiver_validate_file_path(const char *filepath) { + if (!filepath) { + return 5; + } + + const size_t len = strlen(filepath); + + if (len >= 1 && filepath[0] == '/') { + return 1; + } else if (len >= 3 && filepath[0] == '.' && filepath[1] == '.' && filepath[2] == '/') { + return 2; + } else if (len >= 3 && filepath[len - 1] == '.' && filepath[len - 2] == '.' && filepath[len - 3] == '/') { + return 4; + } + + for (size_t idx = 0; idx < len; ++idx) { + if (len - idx < 4) { + break; + } else if (strncmp(filepath + idx, "/../", 4) == 0) { + return 3; + } + } + + return 0; +} diff --git a/src/archiver.h b/src/archiver.h index 6b62ac3..012fd39 100644 --- a/src/archiver.h +++ b/src/archiver.h @@ -98,4 +98,14 @@ char *simple_archiver_filenames_to_relative_path(const char *from_abs, /// Non-NULL on success, and must be free'd if non-NULL. char *simple_archiver_file_abs_path(const char *filename); +/// Used to validate a file in a ".simplearchive" file to avoid writing outside +/// of current working directory. +/// Returns zero if file is OK. +/// Returns 1 if file starts with '/'. +/// Returns 2 if file contains '../' at the start. +/// Returns 3 if file contains '/../' in the middle. +/// Returns 4 if file contains '/..' at the end. +/// Returns 5 if "filepath" is NULL. +int simple_archiver_validate_file_path(const char *filepath); + #endif diff --git a/src/test.c b/src/test.c index 13caba9..420fc30 100644 --- a/src/test.c +++ b/src/test.c @@ -255,6 +255,12 @@ int main(void) { "/one/two/three/four/five", "/one/two/three/other/dir/"); CHECK_STREQ(rel_path, "../other/dir/"); simple_archiver_helper_cleanup_c_string(&rel_path); + + CHECK_FALSE(simple_archiver_validate_file_path("Local/Path")); + CHECK_TRUE(simple_archiver_validate_file_path("/Abs/Path")); + CHECK_TRUE(simple_archiver_validate_file_path("Local/../../not/really")); + CHECK_TRUE(simple_archiver_validate_file_path("./../almost")); + CHECK_TRUE(simple_archiver_validate_file_path("strange/..")); } printf("Checks checked: %u\n", checks_checked); From cf032cd9c149e48d2176a4d8a7c0b40e4f16da50 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 21:28:29 +0900 Subject: [PATCH 29/37] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index d1aaaba..7d4a7af 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ API calls. --overwrite-extract : allows overwriting when extracting --no-abs-symlink : do not store absolute paths for symlinks --temp-files-dir : where to store temporary files created when compressing (defaults to current working directory) + --write-version : Force write version file format (default 1) + --chunk-min-size : v1 file format minimum chunk size (default 4194304 or 4MiB) -- : specifies remaining arguments are files to archive/extract If creating archive file, remaining args specify files to archive. If extracting archive file, remaining args specify files to extract. From 36fb7bf04258da45d59ba4b99d6df6730a7af378 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 4 Oct 2024 21:39:13 +0900 Subject: [PATCH 30/37] clang-format, ensure --no-abs-symlink works in v1 --- src/archiver.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 087bf56..929b7d2 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1882,7 +1882,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, return SDAS_FAILED_TO_WRITE; } - if (abs_path) { + if (abs_path && (state->parsed->flags & 0x20) == 0) { len = strlen(abs_path); if (len >= 0xFFFF) { fprintf(stderr, @@ -2441,7 +2441,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } buf[1023] = 0; fprintf(stderr, " Filename: %s\n", buf); - if (simple_archiver_validate_file_path((char*)buf)) { + if (simple_archiver_validate_file_path((char *)buf)) { fprintf(stderr, " ERROR: Invalid filename!\n"); skip = 1; } @@ -2488,7 +2488,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, uc_heap_buf[u16] = 0; fprintf(stderr, " Filename: %s\n", uc_heap_buf); - if (simple_archiver_validate_file_path((char*)uc_heap_buf)) { + if (simple_archiver_validate_file_path((char *)uc_heap_buf)) { fprintf(stderr, " ERROR: Invalid filename!\n"); skip = 1; } @@ -3263,6 +3263,11 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, if (!do_extract) { fprintf(stderr, " Link name: %s\n", link_name); + if (absolute_preferred) { + fprintf(stderr, " Absolute path preferred.\n"); + } else { + fprintf(stderr, " Relative path preferred.\n"); + } #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3298,7 +3303,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && !skip_due_to_map && !skip_due_to_invalid && absolute_preferred) { + if (do_extract && !skip_due_to_map && !skip_due_to_invalid && + absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3368,7 +3374,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return ret; } path[u16] = 0; - if (do_extract && !skip_due_to_map && !skip_due_to_invalid && !absolute_preferred) { + if (do_extract && !skip_due_to_map && !skip_due_to_invalid && + !absolute_preferred) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3424,8 +3431,10 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " No Relative path.\n"); } - if (do_extract && !link_extracted && !skip_due_to_map && !skip_due_to_invalid) { - fprintf(stderr, " WARNING: Symlink \"%s\" was not created!\n", link_name); + if (do_extract && !link_extracted && !skip_due_to_map && + !skip_due_to_invalid) { + fprintf(stderr, " WARNING: Symlink \"%s\" was not created!\n", + link_name); } } @@ -3694,7 +3703,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " Skipping invalid filename...\n"); } - if (do_extract && !skip_due_to_map && (file_info->other_flags & 1) == 0) { + if (do_extract && !skip_due_to_map && + (file_info->other_flags & 1) == 0) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -3792,7 +3802,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " Skipping invalid filename...\n"); } - if (do_extract && !skip_due_to_map && (file_info->other_flags & 1) == 0) { + if (do_extract && !skip_due_to_map && + (file_info->other_flags & 1) == 0) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -4030,9 +4041,11 @@ int simple_archiver_validate_file_path(const char *filepath) { if (len >= 1 && filepath[0] == '/') { return 1; - } else if (len >= 3 && filepath[0] == '.' && filepath[1] == '.' && filepath[2] == '/') { + } else if (len >= 3 && filepath[0] == '.' && filepath[1] == '.' && + filepath[2] == '/') { return 2; - } else if (len >= 3 && filepath[len - 1] == '.' && filepath[len - 2] == '.' && filepath[len - 3] == '/') { + } else if (len >= 3 && filepath[len - 1] == '.' && filepath[len - 2] == '.' && + filepath[len - 3] == '/') { return 4; } From 1a16c2c3bbd5c0e029aa0fe589b417420653b22f Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 7 Oct 2024 11:23:10 +0900 Subject: [PATCH 31/37] Impl. more robust decompression --- src/archiver.c | 213 +++++++++++++++++++++++++++++++------------------ 1 file changed, 137 insertions(+), 76 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 929b7d2..f920ebf 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #endif @@ -955,10 +956,95 @@ int read_fd_to_out_fd(FILE *in_fd, FILE *out_fd, char *read_buf, return SDAS_SUCCESS; } +int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, + char *buf, const size_t buf_size) { + if (*to_dec_pipe >= 0) { + uint_fast32_t loop_count = 0; + if (*chunk_remaining > 0) { + if (*chunk_remaining > buf_size) { + size_t fread_ret = fread(buf, 1, 1024, in_f); + if (fread_ret == 0) { + goto TRY_WRITE_TO_DECOMP_END; + } else { + ssize_t write_ret; + TRY_WRITE_TO_DECOMP_AGAIN_0: + write_ret = write(*to_dec_pipe, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking write. +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + struct timespec sleep_time; + sleep_time.tv_sec = 0; + sleep_time.tv_nsec = 100000000; + nanosleep(&sleep_time, NULL); +#endif + if (++loop_count > 10) { + return SDAS_INTERNAL_ERROR; + } + goto TRY_WRITE_TO_DECOMP_AGAIN_0; + } else { + return SDAS_INTERNAL_ERROR; + } + } else if (write_ret == 0) { + return SDAS_INTERNAL_ERROR; + } else { + *chunk_remaining -= (size_t)write_ret; + } + } + } else { + size_t fread_ret = fread(buf, 1, *chunk_remaining, in_f); + if (fread_ret == 0) { + goto TRY_WRITE_TO_DECOMP_END; + } else { + ssize_t write_ret; + TRY_WRITE_TO_DECOMP_AGAIN_1: + write_ret = write(*to_dec_pipe, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking write. +#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ + SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX + struct timespec sleep_time; + sleep_time.tv_sec = 0; + sleep_time.tv_nsec = 100000000; + nanosleep(&sleep_time, NULL); +#endif + if (++loop_count > 10) { + return SDAS_INTERNAL_ERROR; + } + goto TRY_WRITE_TO_DECOMP_AGAIN_1; + } else { + return SDAS_INTERNAL_ERROR; + } + } else if (write_ret == 0) { + return SDAS_INTERNAL_ERROR; + } else if ((size_t)write_ret <= *chunk_remaining) { + *chunk_remaining -= (size_t)write_ret; + } else { + return SDAS_INTERNAL_ERROR; + } + } + } + } + } + +TRY_WRITE_TO_DECOMP_END: + if (*to_dec_pipe >= 0 && *chunk_remaining == 0) { + close(*to_dec_pipe); + *to_dec_pipe = -1; + } + + return SDAS_SUCCESS; +} + /// Returns SDAS_SUCCESS on success. int read_decomp_to_out_file(const char *out_filename, int in_pipe, char *read_buf, const size_t read_buf_size, - const uint64_t file_size) { + const uint64_t file_size, int *to_dec_pipe, + uint64_t *chunk_remaining, FILE *in_f) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_fd = NULL; if (out_filename) { @@ -974,6 +1060,8 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, ssize_t read_ret; size_t fwrite_ret; while (written_amt < file_size) { + try_write_to_decomp(to_dec_pipe, chunk_remaining, in_f, read_buf, + read_buf_size); if (file_size - written_amt >= read_buf_size) { read_ret = read(in_pipe, read_buf, read_buf_size); if (read_ret > 0) { @@ -1003,10 +1091,15 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, break; } } else { - // Error. - fprintf(stderr, "ERROR Failed to read from decompressor! (%lu)\n", - read_ret); - return SDAS_INTERNAL_ERROR; + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking read from pipe. + continue; + } else { + // Error. + fprintf(stderr, "ERROR Failed to read from decompressor! (%lu)\n", + read_ret); + return SDAS_INTERNAL_ERROR; + } } } else { read_ret = read(in_pipe, read_buf, file_size - written_amt); @@ -1037,10 +1130,15 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, break; } } else { - // Error. - fprintf(stderr, "ERROR Failed to read from decompressor! (%d)\n", - errno); - return SDAS_INTERNAL_ERROR; + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking read from pipe. + continue; + } else { + // Error. + fprintf(stderr, "ERROR Failed to read from decompressor! (%d)\n", + errno); + return SDAS_INTERNAL_ERROR; + } } } } @@ -1323,6 +1421,7 @@ void simple_archiver_internal_cleanup_decomp(pid_t *decomp_pid) { "WARNING: Exec failed (exec exit code unknown)! Invalid " "decompressor cmd?\n"); } + *decomp_pid = -1; } } #endif @@ -3543,6 +3642,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_64_bit_be(&u64); const uint64_t chunk_size = u64; + uint64_t chunk_remaining = chunk_size; uint64_t chunk_idx = 0; SDArchiverLLNode *node = file_info_list->head; @@ -3555,6 +3655,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, // Start the decompressing process and read into files. // Handle SIGPIPE. + is_sig_pipe_occurred = 0; signal(SIGPIPE, handle_sig_pipe); int pipe_into_cmd[2]; @@ -3569,6 +3670,20 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, close(pipe_into_cmd[0]); close(pipe_into_cmd[1]); return SDAS_INTERNAL_ERROR; + } else if (fcntl(pipe_into_cmd[1], F_SETFL, O_NONBLOCK) != 0) { + // Unable to set non-blocking on into-write-pipe. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + close(pipe_outof_cmd[1]); + return SDAS_INTERNAL_ERROR; + } else if (fcntl(pipe_outof_cmd[0], F_SETFL, O_NONBLOCK) != 0) { + // Unable to set non-blocking on outof-read-pipe. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + close(pipe_outof_cmd[1]); + return SDAS_INTERNAL_ERROR; } if (state && state->parsed && state->parsed->decompressor) { @@ -3599,12 +3714,12 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, close(pipe_into_cmd[0]); close(pipe_outof_cmd[1]); - __attribute__((cleanup( - simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = - pipe_into_cmd[1]; __attribute__((cleanup( simple_archiver_internal_cleanup_int_fd))) int pipe_outof_read = pipe_outof_cmd[0]; + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = + pipe_into_cmd[1]; int decompressor_status; int decompressor_return_val; @@ -3630,63 +3745,6 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } - // Write all of chunk into decompressor. - uint64_t chunk_written = 0; - while (chunk_written < chunk_size) { - if (is_sig_pipe_occurred) { - fprintf(stderr, - "WARNING: Failed to write to decompressor (SIGPIPE)! Invalid " - "decompressor cmd?\n"); - return SDAS_INTERNAL_ERROR; - } else if (chunk_size - chunk_written >= 1024) { - if (fread(buf, 1, 1024, in_f) != 1024) { - fprintf(stderr, "ERROR Failed to read chunk for decompressing!\n"); - return SDAS_INTERNAL_ERROR; - } - ssize_t write_ret = write(pipe_into_cmd[1], buf, 1024); - if (write_ret > 0 && (size_t)write_ret == 1024) { - // Successful write. - } else if (write_ret == -1) { - fprintf(stderr, - "WARNING: Failed to write chunk data into decompressor! " - "Invalid decompressor cmd? (errno %d)\n", - errno); - return SDAS_INTERNAL_ERROR; - } else { - fprintf(stderr, - "WARNING: Failed to write chunk data into decompressor! " - "Invalid decompressor cmd?\n"); - return SDAS_INTERNAL_ERROR; - } - chunk_written += 1024; - } else { - if (fread(buf, 1, chunk_size - chunk_written, in_f) != - chunk_size - chunk_written) { - fprintf(stderr, "ERROR Failed to read chunk for decompressing!\n"); - return SDAS_INTERNAL_ERROR; - } - ssize_t write_ret = - write(pipe_into_cmd[1], buf, chunk_size - chunk_written); - if (write_ret > 0 && - (size_t)write_ret == chunk_size - chunk_written) { - // Successful write. - } else if (write_ret == -1) { - fprintf(stderr, - "WARNING: Failed to write chunk data into decompressor! " - "Invalid decompressor cmd?\n"); - return SDAS_INTERNAL_ERROR; - } else { - fprintf(stderr, - "WARNING: Failed to write chunk data into decompressor! " - "Invalid decompressor cmd?\n"); - return SDAS_INTERNAL_ERROR; - } - chunk_written = chunk_size; - } - } - - simple_archiver_internal_cleanup_int_fd(&pipe_into_write); - while (node->next != file_info_list->tail) { node = node->next; const SDArchiverInternalFileInfo *file_info = node->data; @@ -3719,16 +3777,17 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " WARNING: File already exists and " "\"--overwrite-extract\" is not specified, skipping!\n"); - read_decomp_to_out_file(NULL, pipe_outof_cmd[0], (char *)buf, - 1024, file_info->file_size); + read_decomp_to_out_file(NULL, pipe_outof_read, (char *)buf, 1024, + file_info->file_size, &pipe_into_write, + &chunk_remaining, in_f); continue; } } simple_archiver_helper_make_dirs(file_info->filename); - int ret = - read_decomp_to_out_file(file_info->filename, pipe_outof_cmd[0], - (char *)buf, 1024, file_info->file_size); + int ret = read_decomp_to_out_file( + file_info->filename, pipe_outof_read, (char *)buf, 1024, + file_info->file_size, &pipe_into_write, &chunk_remaining, in_f); if (ret != SDAS_SUCCESS) { return ret; } @@ -3758,13 +3817,15 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " File size: %lu\n", file_info->file_size); } int ret = read_decomp_to_out_file( - NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size); + NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, + &pipe_into_write, &chunk_remaining, in_f); if (ret != SDAS_SUCCESS) { return ret; } } else { int ret = read_decomp_to_out_file( - NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size); + NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size, + &pipe_into_write, &chunk_remaining, in_f); if (ret != SDAS_SUCCESS) { return ret; } From f26509f2272f63f56717a2140a7c77315b648abd Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 7 Oct 2024 12:08:38 +0900 Subject: [PATCH 32/37] Impl. more robust compression --- src/archiver.c | 140 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 43 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index f920ebf..4d58b95 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1389,7 +1389,7 @@ void simple_archiver_internal_cleanup_int_fd(int *fd) { #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX -void simple_archiver_internal_cleanup_decomp(pid_t *decomp_pid) { +void simple_archiver_internal_cleanup_decomp_pid(pid_t *decomp_pid) { if (decomp_pid && *decomp_pid >= 0) { int decompressor_status; int decompressor_return_val; @@ -2215,7 +2215,9 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, int pipe_into_cmd[2]; int pipe_outof_cmd[2]; - pid_t compressor_pid; + __attribute__((cleanup( + simple_archiver_internal_cleanup_decomp_pid))) pid_t compressor_pid = + -1; if (pipe(pipe_into_cmd) != 0) { // Unable to create pipes. @@ -2225,6 +2227,20 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, close(pipe_into_cmd[0]); close(pipe_into_cmd[1]); return SDAS_INTERNAL_ERROR; + } else if (fcntl(pipe_into_cmd[1], F_SETFL, O_NONBLOCK) != 0) { + // Unable to set non-blocking on into-write-pipe. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + close(pipe_outof_cmd[1]); + return SDAS_INTERNAL_ERROR; + } else if (fcntl(pipe_outof_cmd[0], F_SETFL, O_NONBLOCK) != 0) { + // Unable to set non-blocking on outof-read-pipe. + close(pipe_into_cmd[0]); + close(pipe_into_cmd[1]); + close(pipe_outof_cmd[0]); + close(pipe_outof_cmd[1]); + return SDAS_INTERNAL_ERROR; } else if (simple_archiver_de_compress(pipe_into_cmd, pipe_outof_cmd, state->parsed->compressor, &compressor_pid) != 0) { @@ -2241,13 +2257,14 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, close(pipe_outof_cmd[1]); // Set up cleanup so that remaining open pipes in this side is cleaned up. - __attribute__((cleanup( - simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = - pipe_into_cmd[1]; __attribute__((cleanup( simple_archiver_internal_cleanup_int_fd))) int pipe_outof_read = pipe_outof_cmd[0]; + __attribute__((cleanup( + simple_archiver_internal_cleanup_int_fd))) int pipe_into_write = + pipe_into_cmd[1]; + int_fast8_t to_temp_finished = 0; for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); ++file_idx) { file_node = file_node->next; @@ -2257,55 +2274,92 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, const SDArchiverInternalFileInfo *file_info_struct = file_node->data; __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = fopen(file_info_struct->filename, "rb"); - while (!feof(fd)) { - if (ferror(fd)) { - fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); - return SDAS_INTERNAL_ERROR; + + int_fast8_t to_comp_finished = 0; + while (!to_comp_finished) { + if (!to_comp_finished) { + // Write to compressor. + if (ferror(fd)) { + fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); + return SDAS_INTERNAL_ERROR; + } + size_t fread_ret = fread(buf, 1, 1024, fd); + if (fread_ret > 0) { + ssize_t write_ret = write(pipe_into_write, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking write. + } else { + fprintf(stderr, + "ERROR: Writing to compressor, pipe write error!\n"); + return SDAS_FAILED_TO_WRITE; + } + } else if ((size_t)write_ret != fread_ret) { + fprintf( + stderr, + "ERROR: Writing to compressor, unable to write bytes!\n"); + return SDAS_FAILED_TO_WRITE; + } + } + + if (feof(fd)) { + to_comp_finished = 1; + } } - size_t fread_ret = fread(buf, 1, 1024, fd); - if (fread_ret > 0) { - ssize_t write_ret = write(pipe_into_write, buf, fread_ret); - if (write_ret < 0) { + + // Write compressed data to temp file. + ssize_t read_ret = read(pipe_outof_read, buf, 1024); + if (read_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking read. + } else { fprintf(stderr, - "ERROR: Writing to compressor, pipe write error!\n"); - return SDAS_FAILED_TO_WRITE; - } else if ((size_t)write_ret != fread_ret) { + "ERROR: Reading from compressor, pipe read error!\n"); + return SDAS_INTERNAL_ERROR; + } + } else if (read_ret == 0) { + // EOF. + to_temp_finished = 1; + } else { + size_t fwrite_ret = fwrite(buf, 1, (size_t)read_ret, temp_fd); + if (fwrite_ret != (size_t)read_ret) { fprintf(stderr, - "ERROR: Writing to compressor, unable to write bytes!\n"); - return SDAS_FAILED_TO_WRITE; + "ERROR: Reading from compressor, failed to write to " + "temporary file!\n"); + return SDAS_INTERNAL_ERROR; } } } } - // Close write to pipe to compressor as the chunk is written. simple_archiver_internal_cleanup_int_fd(&pipe_into_write); - // Read compressed data into temporary file. - do { - ssize_t read_ret = read(pipe_outof_read, buf, 1024); - if (read_ret < 0) { - fprintf(stderr, "ERROR: Reading from compressor, pipe read error!\n"); - return SDAS_INTERNAL_ERROR; - } else if (read_ret == 0) { - // EOF. - break; - } else { - size_t fwrite_ret = fwrite(buf, 1, (size_t)read_ret, temp_fd); - if (fwrite_ret != (size_t)read_ret) { - fprintf(stderr, - "ERROR: Reading from compressor, failed to write to " - "temporary file!\n"); - return SDAS_INTERNAL_ERROR; + // Finish writing. + if (!to_temp_finished) { + while (1) { + ssize_t read_ret = read(pipe_outof_read, buf, 1024); + if (read_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking read. + } else { + fprintf(stderr, + "ERROR: Reading from compressor, pipe read error!\n"); + return SDAS_INTERNAL_ERROR; + } + } else if (read_ret == 0) { + // EOF. + break; + } else { + size_t fwrite_ret = fwrite(buf, 1, (size_t)read_ret, temp_fd); + if (fwrite_ret != (size_t)read_ret) { + fprintf(stderr, + "ERROR: Reading from compressor, failed to write to " + "temporary file!\n"); + return SDAS_INTERNAL_ERROR; + } } } - } while (1); - - // Close read from pipe from compressor as chunk is fully compressed. - simple_archiver_internal_cleanup_int_fd(&pipe_outof_read); - - // Wait on compressor to stop. - waitpid(compressor_pid, NULL, 0); + } long comp_chunk_size = ftell(temp_fd); if (comp_chunk_size < 0) { @@ -3661,7 +3715,7 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, int pipe_into_cmd[2]; int pipe_outof_cmd[2]; __attribute__((cleanup( - simple_archiver_internal_cleanup_decomp))) pid_t decompressor_pid; + simple_archiver_internal_cleanup_decomp_pid))) pid_t decompressor_pid; if (pipe(pipe_into_cmd) != 0) { // Unable to create pipes. break; From 3c739f92b8f1cf676f3373b5906cf171c553363d Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 7 Oct 2024 13:23:44 +0900 Subject: [PATCH 33/37] Fix errors de/compressing chunks --- src/archiver.c | 196 +++++++++++++++++++++++++++++++------------------ 1 file changed, 125 insertions(+), 71 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 4d58b95..1090777 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -957,74 +957,82 @@ int read_fd_to_out_fd(FILE *in_fd, FILE *out_fd, char *read_buf, } int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, - char *buf, const size_t buf_size) { + char *buf, const size_t buf_size, char *hold_buf, + int *has_hold) { if (*to_dec_pipe >= 0) { - uint_fast32_t loop_count = 0; if (*chunk_remaining > 0) { if (*chunk_remaining > buf_size) { - size_t fread_ret = fread(buf, 1, 1024, in_f); - if (fread_ret == 0) { - goto TRY_WRITE_TO_DECOMP_END; - } else { - ssize_t write_ret; - TRY_WRITE_TO_DECOMP_AGAIN_0: - write_ret = write(*to_dec_pipe, buf, fread_ret); - if (write_ret < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK) { - // Non-blocking write. -#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - struct timespec sleep_time; - sleep_time.tv_sec = 0; - sleep_time.tv_nsec = 100000000; - nanosleep(&sleep_time, NULL); -#endif - if (++loop_count > 10) { + if (*has_hold < 0) { + size_t fread_ret = fread(buf, 1, 1024, in_f); + if (fread_ret == 0) { + goto TRY_WRITE_TO_DECOMP_END; + } else { + ssize_t write_ret = write(*to_dec_pipe, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + *has_hold = (int)fread_ret; + memcpy(hold_buf, buf, fread_ret); + return SDAS_SUCCESS; + } else { return SDAS_INTERNAL_ERROR; } - goto TRY_WRITE_TO_DECOMP_AGAIN_0; + } else if (write_ret == 0) { + return SDAS_INTERNAL_ERROR; + } else { + *chunk_remaining -= (size_t)write_ret; + } + } + } else { + ssize_t write_ret = write(*to_dec_pipe, hold_buf, (size_t)*has_hold); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return SDAS_SUCCESS; } else { return SDAS_INTERNAL_ERROR; } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; } else { - *chunk_remaining -= (size_t)write_ret; + *chunk_remaining -= (size_t)*has_hold; + *has_hold = -1; } } } else { - size_t fread_ret = fread(buf, 1, *chunk_remaining, in_f); - if (fread_ret == 0) { - goto TRY_WRITE_TO_DECOMP_END; - } else { - ssize_t write_ret; - TRY_WRITE_TO_DECOMP_AGAIN_1: - write_ret = write(*to_dec_pipe, buf, fread_ret); - if (write_ret < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK) { - // Non-blocking write. -#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ - SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX - struct timespec sleep_time; - sleep_time.tv_sec = 0; - sleep_time.tv_nsec = 100000000; - nanosleep(&sleep_time, NULL); -#endif - if (++loop_count > 10) { + if (*has_hold < 0) { + size_t fread_ret = fread(buf, 1, *chunk_remaining, in_f); + if (fread_ret == 0) { + goto TRY_WRITE_TO_DECOMP_END; + } else { + ssize_t write_ret = write(*to_dec_pipe, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + *has_hold = (int)fread_ret; + memcpy(hold_buf, buf, fread_ret); + return SDAS_SUCCESS; + } else { return SDAS_INTERNAL_ERROR; } - goto TRY_WRITE_TO_DECOMP_AGAIN_1; + } else if (write_ret == 0) { + return SDAS_INTERNAL_ERROR; + } else if ((size_t)write_ret <= *chunk_remaining) { + *chunk_remaining -= (size_t)write_ret; + } else { + return SDAS_INTERNAL_ERROR; + } + } + } else { + ssize_t write_ret = write(*to_dec_pipe, hold_buf, (size_t)*has_hold); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return SDAS_SUCCESS; } else { return SDAS_INTERNAL_ERROR; } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; - } else if ((size_t)write_ret <= *chunk_remaining) { - *chunk_remaining -= (size_t)write_ret; } else { - return SDAS_INTERNAL_ERROR; + *chunk_remaining -= (size_t)*has_hold; + *has_hold = -1; } } } @@ -1044,7 +1052,8 @@ TRY_WRITE_TO_DECOMP_END: int read_decomp_to_out_file(const char *out_filename, int in_pipe, char *read_buf, const size_t read_buf_size, const uint64_t file_size, int *to_dec_pipe, - uint64_t *chunk_remaining, FILE *in_f) { + uint64_t *chunk_remaining, FILE *in_f, + char *hold_buf, int *has_hold) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_fd = NULL; if (out_filename) { @@ -1060,8 +1069,11 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, ssize_t read_ret; size_t fwrite_ret; while (written_amt < file_size) { - try_write_to_decomp(to_dec_pipe, chunk_remaining, in_f, read_buf, - read_buf_size); + int ret = try_write_to_decomp(to_dec_pipe, chunk_remaining, in_f, read_buf, + read_buf_size, hold_buf, has_hold); + if (ret != SDAS_SUCCESS) { + return ret; + } if (file_size - written_amt >= read_buf_size) { read_ret = read(in_pipe, read_buf, read_buf_size); if (read_ret > 0) { @@ -2100,12 +2112,15 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, uint64_t *non_c_chunk_size = non_compressing_chunk_size; SDArchiverLLNode *file_node = files_list->head; + uint64_t chunk_count = 0; for (SDArchiverLLNode *chunk_c_node = chunk_counts->head->next; chunk_c_node != chunk_counts->tail; chunk_c_node = chunk_c_node->next) { + fprintf(stderr, "CHUNK %3lu of %3lu\n", ++chunk_count, chunk_counts->count); // Write file count before iterating through files. if (non_c_chunk_size) { *non_c_chunk_size = 0; } + u32 = (uint32_t)(*((uint64_t *)chunk_c_node->data)); simple_archiver_helper_32_bit_be(&u32); if (fwrite(&u32, 4, 1, out_f) != 1) { @@ -2267,6 +2282,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, int_fast8_t to_temp_finished = 0; for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); ++file_idx) { + fprintf(stderr, " FILE %3lu of %3lu\n", file_idx + 1, + *(uint64_t *)chunk_c_node->data); file_node = file_node->next; if (file_node == files_list->tail) { return SDAS_INTERNAL_ERROR; @@ -2276,6 +2293,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fopen(file_info_struct->filename, "rb"); int_fast8_t to_comp_finished = 0; + char hold_buf[1024]; + int has_hold = -1; while (!to_comp_finished) { if (!to_comp_finished) { // Write to compressor. @@ -2283,28 +2302,47 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); return SDAS_INTERNAL_ERROR; } - size_t fread_ret = fread(buf, 1, 1024, fd); - if (fread_ret > 0) { - ssize_t write_ret = write(pipe_into_write, buf, fread_ret); + if (has_hold < 0) { + size_t fread_ret = fread(buf, 1, 1024, fd); + if (fread_ret > 0) { + ssize_t write_ret = write(pipe_into_write, buf, fread_ret); + if (write_ret < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + // Non-blocking write. + has_hold = (int)fread_ret; + memcpy(hold_buf, buf, fread_ret); + } else { + fprintf( + stderr, + "ERROR: Writing to compressor, pipe write error!\n"); + return SDAS_FAILED_TO_WRITE; + } + } else if ((size_t)write_ret != fread_ret) { + fprintf( + stderr, + "ERROR: Writing to compressor, unable to write bytes!\n"); + return SDAS_FAILED_TO_WRITE; + } + } + + if (feof(fd)) { + to_comp_finished = 1; + } + } else { + ssize_t write_ret = + write(pipe_into_write, hold_buf, (size_t)has_hold); if (write_ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) { // Non-blocking write. } else { - fprintf(stderr, - "ERROR: Writing to compressor, pipe write error!\n"); - return SDAS_FAILED_TO_WRITE; + return SDAS_INTERNAL_ERROR; } - } else if ((size_t)write_ret != fread_ret) { - fprintf( - stderr, - "ERROR: Writing to compressor, unable to write bytes!\n"); - return SDAS_FAILED_TO_WRITE; + } else if (write_ret != has_hold) { + return SDAS_INTERNAL_ERROR; + } else { + has_hold = -1; } } - - if (feof(fd)) { - to_comp_finished = 1; - } } // Write compressed data to temp file. @@ -2379,6 +2417,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, return SDAS_INTERNAL_ERROR; } + size_t written_size = 0; + // Write compressed chunk. while (!feof(temp_fd)) { if (ferror(temp_fd)) { @@ -2387,6 +2427,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, size_t fread_ret = fread(buf, 1, 1024, temp_fd); if (fread_ret > 0) { size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f); + written_size += fread_ret; if (fwrite_ret != fread_ret) { fprintf(stderr, "ERROR: Partial write of read bytes from temp file to " @@ -2396,6 +2437,12 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } } + if (written_size != (size_t)comp_chunk_size) { + fprintf(stderr, + "ERROR: Written chunk size is not actual chunk size!\n"); + return SDAS_FAILED_TO_WRITE; + } + // Cleanup and remove temp_fd. simple_archiver_helper_cleanup_FILE(&temp_fd); } else { @@ -2407,6 +2454,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fwrite(non_c_chunk_size, 8, 1, out_f); for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); ++file_idx) { + fprintf(stderr, " FILE %3lu of %3lu\n", file_idx + 1, + *(uint64_t *)chunk_c_node->data); file_node = file_node->next; if (file_node == files_list->tail) { return SDAS_INTERNAL_ERROR; @@ -3799,6 +3848,9 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } + char hold_buf[1024]; + int has_hold = -1; + while (node->next != file_info_list->tail) { node = node->next; const SDArchiverInternalFileInfo *file_info = node->data; @@ -3833,7 +3885,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, "\"--overwrite-extract\" is not specified, skipping!\n"); read_decomp_to_out_file(NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, &pipe_into_write, - &chunk_remaining, in_f); + &chunk_remaining, in_f, hold_buf, + &has_hold); continue; } } @@ -3841,7 +3894,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_make_dirs(file_info->filename); int ret = read_decomp_to_out_file( file_info->filename, pipe_outof_read, (char *)buf, 1024, - file_info->file_size, &pipe_into_write, &chunk_remaining, in_f); + file_info->file_size, &pipe_into_write, &chunk_remaining, in_f, + hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } @@ -3872,14 +3926,14 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } int ret = read_decomp_to_out_file( NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, - &pipe_into_write, &chunk_remaining, in_f); + &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } } else { int ret = read_decomp_to_out_file( - NULL, pipe_outof_cmd[0], (char *)buf, 1024, file_info->file_size, - &pipe_into_write, &chunk_remaining, in_f); + NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, + &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } @@ -3887,8 +3941,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } // Ensure EOF is left from pipe. - ssize_t read_ret = read(pipe_outof_cmd[0], buf, 1024); - if (read_ret != 0) { + ssize_t read_ret = read(pipe_outof_read, buf, 1024); + if (read_ret > 0) { fprintf(stderr, "WARNING decompressor didn't reach EOF!\n"); } } else { From 8bae61d36d1a997909ca7cedce452adc6a022c5a Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 7 Oct 2024 14:03:12 +0900 Subject: [PATCH 34/37] Minor quality-of-life fixes --- src/archiver.c | 16 ++++++++-------- src/parser.c | 4 ++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 1090777..878937b 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -2282,13 +2282,13 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, int_fast8_t to_temp_finished = 0; for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); ++file_idx) { - fprintf(stderr, " FILE %3lu of %3lu\n", file_idx + 1, - *(uint64_t *)chunk_c_node->data); file_node = file_node->next; if (file_node == files_list->tail) { return SDAS_INTERNAL_ERROR; } const SDArchiverInternalFileInfo *file_info_struct = file_node->data; + fprintf(stderr, " FILE %3lu of %3lu: %s\n", file_idx + 1, + *(uint64_t *)chunk_c_node->data, file_info_struct->filename); __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = fopen(file_info_struct->filename, "rb"); @@ -2454,13 +2454,13 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fwrite(non_c_chunk_size, 8, 1, out_f); for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data); ++file_idx) { - fprintf(stderr, " FILE %3lu of %3lu\n", file_idx + 1, - *(uint64_t *)chunk_c_node->data); file_node = file_node->next; if (file_node == files_list->tail) { return SDAS_INTERNAL_ERROR; } const SDArchiverInternalFileInfo *file_info_struct = file_node->data; + fprintf(stderr, " FILE %3lu of %3lu: %s\n", file_idx + 1, + *(uint64_t *)chunk_c_node->data, file_info_struct->filename); __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = fopen(file_info_struct->filename, "rb"); while (!feof(fd)) { @@ -3854,8 +3854,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, while (node->next != file_info_list->tail) { node = node->next; const SDArchiverInternalFileInfo *file_info = node->data; - fprintf(stderr, " FILE %3u of %3u\n", ++file_idx, file_count); - fprintf(stderr, " Filename: %s\n", file_info->filename); + fprintf(stderr, " FILE %3u of %3u: %s\n", ++file_idx, file_count, + file_info->filename); uint_fast8_t skip_due_to_map = 0; if (working_files_map && simple_archiver_hash_map_get( @@ -3953,8 +3953,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, while (node->next != file_info_list->tail) { node = node->next; const SDArchiverInternalFileInfo *file_info = node->data; - fprintf(stderr, " FILE %3u of %3u\n", ++file_idx, file_count); - fprintf(stderr, " Filename: %s\n", file_info->filename); + fprintf(stderr, " FILE %3u of %3u: %s\n", ++file_idx, file_count, + file_info->filename); chunk_idx += file_info->file_size; if (chunk_idx > chunk_size) { fprintf(stderr, "ERROR Files in chunk is larger than chunk!\n"); diff --git a/src/parser.c b/src/parser.c index f9b8ae6..dd6383a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -347,6 +347,10 @@ int simple_archiver_parse_args(int argc, const char **argv, } else if (argv[0][0] != '-') { is_remaining_args = 1; continue; + } else { + fprintf(stderr, "ERROR: Got invalid arg \"%s\"!\n", argv[0]); + simple_archiver_print_usage(); + return 1; } } else { if (out->working_files == NULL) { From aa46172aa7573c963a6f1dbc5693a24a045c3273 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Mon, 7 Oct 2024 15:16:05 +0900 Subject: [PATCH 35/37] Buffer size to 32KiB, fixes --- src/archiver.c | 190 ++++++++++++++++++++++++++++++------------------- 1 file changed, 118 insertions(+), 72 deletions(-) diff --git a/src/archiver.c b/src/archiver.c index 878937b..8e57c5a 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -44,6 +44,8 @@ #define FILE_COUNTS_OUTPUT_FORMAT_STR_0 "\nFile %%%lulu of %%%lulu.\n" #define FILE_COUNTS_OUTPUT_FORMAT_STR_1 "[%%%lulu/%%%lulu]\n" +#define SIMPLE_ARCHIVER_BUFFER_SIZE (1024 * 32) + #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX @@ -246,8 +248,8 @@ int write_files_fn(void *data, void *ud) { } // Write file to pipe, and read from other pipe. - char write_buf[1024]; - char read_buf[1024]; + char write_buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; + char read_buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; int_fast8_t write_again = 0; int_fast8_t write_done = 0; int_fast8_t read_done = 0; @@ -265,7 +267,8 @@ int write_files_fn(void *data, void *ud) { // Read from file. if (!write_done) { if (!write_again) { - write_count = fread(write_buf, 1, 1024, file_fd); + write_count = + fread(write_buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, file_fd); } if (write_count > 0) { ret = write(pipe_into_cmd[1], write_buf, write_count); @@ -307,7 +310,7 @@ int write_files_fn(void *data, void *ud) { // Read from compressor. if (!read_done) { - ret = read(pipe_outof_cmd[0], read_buf, 1024); + ret = read(pipe_outof_cmd[0], read_buf, SIMPLE_ARCHIVER_BUFFER_SIZE); if (ret > 0) { read_count = fwrite(read_buf, 1, (size_t)ret, tmp_fd); if (read_count != (size_t)ret) { @@ -463,9 +466,9 @@ int write_files_fn(void *data, void *ud) { // Write file. fprintf(stderr, "Writing compressed file: %s\n", file_info->filename); do { - write_count = fread(write_buf, 1, 1024, tmp_fd); - if (write_count == 1024) { - fwrite(write_buf, 1, 1024, state->out_f); + write_count = fread(write_buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, tmp_fd); + if (write_count == SIMPLE_ARCHIVER_BUFFER_SIZE) { + fwrite(write_buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, state->out_f); } else if (write_count > 0) { fwrite(write_buf, 1, write_count, state->out_f); } @@ -604,12 +607,12 @@ int write_files_fn(void *data, void *ud) { // Write file. fprintf(stderr, "Writing file: %s\n", file_info->filename); - char buf[1024]; + char buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; size_t ret; do { - ret = fread(buf, 1, 1024, fd); - if (ret == 1024) { - fwrite(buf, 1, 1024, state->out_f); + ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, fd); + if (ret == SIMPLE_ARCHIVER_BUFFER_SIZE) { + fwrite(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, state->out_f); } else if (ret > 0) { fwrite(buf, 1, ret, state->out_f); } @@ -958,19 +961,19 @@ int read_fd_to_out_fd(FILE *in_fd, FILE *out_fd, char *read_buf, int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, char *buf, const size_t buf_size, char *hold_buf, - int *has_hold) { + ssize_t *has_hold) { if (*to_dec_pipe >= 0) { if (*chunk_remaining > 0) { if (*chunk_remaining > buf_size) { if (*has_hold < 0) { - size_t fread_ret = fread(buf, 1, 1024, in_f); + size_t fread_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, in_f); if (fread_ret == 0) { goto TRY_WRITE_TO_DECOMP_END; } else { ssize_t write_ret = write(*to_dec_pipe, buf, fread_ret); if (write_ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) { - *has_hold = (int)fread_ret; + *has_hold = (ssize_t)fread_ret; memcpy(hold_buf, buf, fread_ret); return SDAS_SUCCESS; } else { @@ -978,6 +981,11 @@ int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; + } else if ((size_t)write_ret < fread_ret) { + *chunk_remaining -= (size_t)write_ret; + *has_hold = (ssize_t)fread_ret - write_ret; + memcpy(hold_buf, buf + write_ret, (size_t)*has_hold); + return SDAS_SUCCESS; } else { *chunk_remaining -= (size_t)write_ret; } @@ -992,6 +1000,12 @@ int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; + } else if (write_ret < *has_hold) { + *chunk_remaining -= (size_t)write_ret; + memcpy(buf, hold_buf + write_ret, (size_t)(*has_hold - write_ret)); + memcpy(hold_buf, buf, (size_t)(*has_hold - write_ret)); + *has_hold = *has_hold - write_ret; + return SDAS_SUCCESS; } else { *chunk_remaining -= (size_t)*has_hold; *has_hold = -1; @@ -1014,6 +1028,11 @@ int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; + } else if ((size_t)write_ret < fread_ret) { + *chunk_remaining -= (size_t)write_ret; + *has_hold = (ssize_t)fread_ret - write_ret; + memcpy(hold_buf, buf + write_ret, (size_t)*has_hold); + return SDAS_SUCCESS; } else if ((size_t)write_ret <= *chunk_remaining) { *chunk_remaining -= (size_t)write_ret; } else { @@ -1030,6 +1049,12 @@ int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, } } else if (write_ret == 0) { return SDAS_INTERNAL_ERROR; + } else if (write_ret < *has_hold) { + *chunk_remaining -= (size_t)write_ret; + memcpy(buf, hold_buf + write_ret, (size_t)(*has_hold - write_ret)); + memcpy(hold_buf, buf, (size_t)(*has_hold - write_ret)); + *has_hold = *has_hold - write_ret; + return SDAS_SUCCESS; } else { *chunk_remaining -= (size_t)*has_hold; *has_hold = -1; @@ -1040,7 +1065,7 @@ int try_write_to_decomp(int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, } TRY_WRITE_TO_DECOMP_END: - if (*to_dec_pipe >= 0 && *chunk_remaining == 0) { + if (*to_dec_pipe >= 0 && *chunk_remaining == 0 && *has_hold < 0) { close(*to_dec_pipe); *to_dec_pipe = -1; } @@ -1053,7 +1078,7 @@ int read_decomp_to_out_file(const char *out_filename, int in_pipe, char *read_buf, const size_t read_buf_size, const uint64_t file_size, int *to_dec_pipe, uint64_t *chunk_remaining, FILE *in_f, - char *hold_buf, int *has_hold) { + char *hold_buf, ssize_t *has_hold) { __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_fd = NULL; if (out_filename) { @@ -1809,7 +1834,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, return SDAS_FAILED_TO_WRITE; } - char buf[1024]; + char buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; uint16_t u16 = 1; simple_archiver_helper_16_bit_be(&u16); @@ -2293,8 +2318,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fopen(file_info_struct->filename, "rb"); int_fast8_t to_comp_finished = 0; - char hold_buf[1024]; - int has_hold = -1; + char hold_buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; + ssize_t has_hold = -1; while (!to_comp_finished) { if (!to_comp_finished) { // Write to compressor. @@ -2303,7 +2328,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, return SDAS_INTERNAL_ERROR; } if (has_hold < 0) { - size_t fread_ret = fread(buf, 1, 1024, fd); + size_t fread_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, fd); if (fread_ret > 0) { ssize_t write_ret = write(pipe_into_write, buf, fread_ret); if (write_ret < 0) { @@ -2317,15 +2342,18 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, "ERROR: Writing to compressor, pipe write error!\n"); return SDAS_FAILED_TO_WRITE; } - } else if ((size_t)write_ret != fread_ret) { + } else if (write_ret == 0) { fprintf( stderr, "ERROR: Writing to compressor, unable to write bytes!\n"); return SDAS_FAILED_TO_WRITE; + } else if ((size_t)write_ret < fread_ret) { + has_hold = (ssize_t)fread_ret - write_ret; + memcpy(hold_buf, buf + write_ret, (size_t)has_hold); } } - if (feof(fd)) { + if (feof(fd) && has_hold < 0) { to_comp_finished = 1; } } else { @@ -2337,6 +2365,11 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } else { return SDAS_INTERNAL_ERROR; } + } else if (write_ret < has_hold) { + memcpy(buf, hold_buf + write_ret, + (size_t)(has_hold - write_ret)); + memcpy(hold_buf, buf, (size_t)(has_hold - write_ret)); + has_hold = has_hold - write_ret; } else if (write_ret != has_hold) { return SDAS_INTERNAL_ERROR; } else { @@ -2346,7 +2379,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, } // Write compressed data to temp file. - ssize_t read_ret = read(pipe_outof_read, buf, 1024); + ssize_t read_ret = + read(pipe_outof_read, buf, SIMPLE_ARCHIVER_BUFFER_SIZE); if (read_ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) { // Non-blocking read. @@ -2375,7 +2409,8 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, // Finish writing. if (!to_temp_finished) { while (1) { - ssize_t read_ret = read(pipe_outof_read, buf, 1024); + ssize_t read_ret = + read(pipe_outof_read, buf, SIMPLE_ARCHIVER_BUFFER_SIZE); if (read_ret < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK) { // Non-blocking read. @@ -2424,7 +2459,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, if (ferror(temp_fd)) { return SDAS_INTERNAL_ERROR; } - size_t fread_ret = fread(buf, 1, 1024, temp_fd); + size_t fread_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, temp_fd); if (fread_ret > 0) { size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f); written_size += fread_ret; @@ -2468,7 +2503,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state, fprintf(stderr, "ERROR: Writing to chunk, file read error!\n"); return SDAS_INTERNAL_ERROR; } - size_t fread_ret = fread(buf, 1, 1024, fd); + size_t fread_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, fd); if (fread_ret > 0) { size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f); if (fwrite_ret != fread_ret) { @@ -2513,8 +2548,7 @@ int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state) { - uint8_t buf[1024]; - memset(buf, 0, 1024); + uint8_t buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; uint16_t u16; uint32_t u32; uint64_t u64; @@ -2547,11 +2581,11 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } simple_archiver_helper_16_bit_be(&u16); fprintf(stderr, "Compressor size is %u\n", u16); - if (u16 < 1024) { + if (u16 < SIMPLE_ARCHIVER_BUFFER_SIZE) { if (fread(buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - buf[1023] = 0; + buf[SIMPLE_ARCHIVER_BUFFER_SIZE - 1] = 0; fprintf(stderr, "Compressor cmd: %s\n", buf); } else { __attribute__(( @@ -2571,11 +2605,11 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, } simple_archiver_helper_16_bit_be(&u16); fprintf(stderr, "Decompressor size is %u\n", u16); - if (u16 < 1024) { + if (u16 < SIMPLE_ARCHIVER_BUFFER_SIZE) { if (fread(buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - buf[1023] = 0; + buf[SIMPLE_ARCHIVER_BUFFER_SIZE - 1] = 0; fprintf(stderr, "Decompressor cmd: %s\n", buf); decompressor_cmd = malloc(u16 + 1); memcpy((char *)decompressor_cmd, buf, u16 + 1); @@ -2637,11 +2671,11 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_cleanup_malloced))) void *out_f_name = NULL; __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_f = NULL; - if (u16 < 1024) { + if (u16 < SIMPLE_ARCHIVER_BUFFER_SIZE) { if (fread(buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - buf[1023] = 0; + buf[SIMPLE_ARCHIVER_BUFFER_SIZE - 1] = 0; fprintf(stderr, " Filename: %s\n", buf); if (simple_archiver_validate_file_path((char *)buf)) { fprintf(stderr, " ERROR: Invalid filename!\n"); @@ -2951,7 +2985,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, int_fast8_t write_pipe_done = 0; int_fast8_t read_pipe_done = 0; size_t fread_ret = 0; - char recv_buf[1024]; + char recv_buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; size_t amount_to_read; while (!write_pipe_done || !read_pipe_done) { if (is_sig_pipe_occurred) { @@ -2964,8 +2998,8 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, // Read from file. if (!write_pipe_done) { if (!write_again && compressed_file_size != 0) { - if (compressed_file_size > 1024) { - amount_to_read = 1024; + if (compressed_file_size > SIMPLE_ARCHIVER_BUFFER_SIZE) { + amount_to_read = SIMPLE_ARCHIVER_BUFFER_SIZE; } else { amount_to_read = compressed_file_size; } @@ -3007,7 +3041,8 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, // Read output from decompressor and write to file. if (!read_pipe_done) { - ssize_t read_ret = read(pipe_outof_cmd[0], recv_buf, 1024); + ssize_t read_ret = read(pipe_outof_cmd[0], recv_buf, + SIMPLE_ARCHIVER_BUFFER_SIZE); if (read_ret > 0) { size_t fwrite_ret = fwrite(recv_buf, 1, (size_t)read_ret, out_f); @@ -3063,8 +3098,8 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, uint64_t compressed_file_size = u64; size_t fread_ret; while (compressed_file_size != 0) { - if (compressed_file_size > 1024) { - fread_ret = fread(buf, 1, 1024, in_f); + if (compressed_file_size > SIMPLE_ARCHIVER_BUFFER_SIZE) { + fread_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, in_f); if (ferror(in_f)) { // Error. return SDAS_INTERNAL_ERROR; @@ -3107,8 +3142,8 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, #endif } else { while (u64 != 0) { - if (u64 > 1024) { - size_t read_ret = fread(buf, 1, 1024, in_f); + if (u64 > SIMPLE_ARCHIVER_BUFFER_SIZE) { + size_t read_ret = fread(buf, 1, SIMPLE_ARCHIVER_BUFFER_SIZE, in_f); if (read_ret > 0) { u64 -= read_ret; } else if (ferror(in_f)) { @@ -3142,11 +3177,11 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_16_bit_be(&u16); if (u16 == 0) { fprintf(stderr, " Link does not have absolute path.\n"); - } else if (u16 < 1024) { + } else if (u16 < SIMPLE_ARCHIVER_BUFFER_SIZE) { if (fread(buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - buf[1023] = 0; + buf[SIMPLE_ARCHIVER_BUFFER_SIZE - 1] = 0; fprintf(stderr, " Link absolute path: %s\n", buf); abs_path = malloc((size_t)u16 + 1); strncpy(abs_path, (char *)buf, (size_t)u16 + 1); @@ -3165,11 +3200,11 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_16_bit_be(&u16); if (u16 == 0) { fprintf(stderr, " Link does not have relative path.\n"); - } else if (u16 < 1024) { + } else if (u16 < SIMPLE_ARCHIVER_BUFFER_SIZE) { if (fread(buf, 1, u16 + 1, in_f) != (size_t)u16 + 1) { return SDAS_INVALID_FILE; } - buf[1023] = 0; + buf[SIMPLE_ARCHIVER_BUFFER_SIZE - 1] = 0; fprintf(stderr, " Link relative path: %s\n", buf); rel_path = malloc((size_t)u16 + 1); strncpy(rel_path, (char *)buf, (size_t)u16 + 1); @@ -3344,8 +3379,7 @@ int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract, int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, const SDArchiverState *state) { - uint8_t buf[1024]; - memset(buf, 0, 1024); + uint8_t buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; uint16_t u16; uint32_t u32; uint64_t u64; @@ -3395,7 +3429,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_16_bit_be(&u16); compressor_cmd = malloc(u16 + 1); int ret = - read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, compressor_cmd); + read_buf_full_from_fd(in_f, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, + u16 + 1, compressor_cmd); if (ret != SDAS_SUCCESS) { return ret; } @@ -3409,8 +3444,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, memcpy(&u16, buf, 2); simple_archiver_helper_16_bit_be(&u16); decompressor_cmd = malloc(u16 + 1); - ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, - decompressor_cmd); + ret = read_buf_full_from_fd(in_f, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, + u16 + 1, decompressor_cmd); if (ret != SDAS_SUCCESS) { return ret; } @@ -3457,8 +3492,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, cleanup(simple_archiver_helper_cleanup_c_string))) char *link_name = malloc(u16 + 1); - int ret = - read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, link_name); + int ret = read_buf_full_from_fd( + in_f, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, u16 + 1, link_name); if (ret != SDAS_SUCCESS) { return ret; } @@ -3500,7 +3535,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, __attribute__(( cleanup(simple_archiver_helper_cleanup_c_string))) char *path = malloc(u16 + 1); - ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, path); + ret = read_buf_full_from_fd(in_f, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, u16 + 1, path); if (ret != SDAS_SUCCESS) { return ret; } @@ -3571,7 +3607,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, __attribute__(( cleanup(simple_archiver_helper_cleanup_c_string))) char *path = malloc(u16 + 1); - ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, path); + ret = read_buf_full_from_fd(in_f, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, u16 + 1, path); if (ret != SDAS_SUCCESS) { return ret; } @@ -3675,8 +3712,9 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_16_bit_be(&u16); file_info->filename = malloc(u16 + 1); - int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, u16 + 1, - file_info->filename); + int ret = + read_buf_full_from_fd(in_f, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, + u16 + 1, file_info->filename); if (ret != SDAS_SUCCESS) { return ret; } @@ -3848,8 +3886,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, return SDAS_INTERNAL_ERROR; } - char hold_buf[1024]; - int has_hold = -1; + char hold_buf[SIMPLE_ARCHIVER_BUFFER_SIZE]; + ssize_t has_hold = -1; while (node->next != file_info_list->tail) { node = node->next; @@ -3883,7 +3921,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " WARNING: File already exists and " "\"--overwrite-extract\" is not specified, skipping!\n"); - read_decomp_to_out_file(NULL, pipe_outof_read, (char *)buf, 1024, + read_decomp_to_out_file(NULL, pipe_outof_read, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size, &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); @@ -3893,9 +3932,9 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_make_dirs(file_info->filename); int ret = read_decomp_to_out_file( - file_info->filename, pipe_outof_read, (char *)buf, 1024, - file_info->file_size, &pipe_into_write, &chunk_remaining, in_f, - hold_buf, &has_hold); + file_info->filename, pipe_outof_read, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size, + &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } @@ -3925,15 +3964,17 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " File size: %lu\n", file_info->file_size); } int ret = read_decomp_to_out_file( - NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, - &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); + NULL, pipe_outof_read, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, + file_info->file_size, &pipe_into_write, &chunk_remaining, in_f, + hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } } else { int ret = read_decomp_to_out_file( - NULL, pipe_outof_read, (char *)buf, 1024, file_info->file_size, - &pipe_into_write, &chunk_remaining, in_f, hold_buf, &has_hold); + NULL, pipe_outof_read, (char *)buf, SIMPLE_ARCHIVER_BUFFER_SIZE, + file_info->file_size, &pipe_into_write, &chunk_remaining, in_f, + hold_buf, &has_hold); if (ret != SDAS_SUCCESS) { return ret; } @@ -3941,7 +3982,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } // Ensure EOF is left from pipe. - ssize_t read_ret = read(pipe_outof_read, buf, 1024); + ssize_t read_ret = + read(pipe_outof_read, buf, SIMPLE_ARCHIVER_BUFFER_SIZE); if (read_ret > 0) { fprintf(stderr, "WARNING decompressor didn't reach EOF!\n"); } @@ -3987,7 +4029,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, fprintf(stderr, " WARNING: File already exists and " "\"--overwrite-extract\" is not specified, skipping!\n"); - int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + int ret = read_buf_full_from_fd(in_f, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size, NULL); if (ret != SDAS_SUCCESS) { return ret; @@ -3998,7 +4041,8 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, simple_archiver_helper_make_dirs(file_info->filename); __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *out_fd = fopen(file_info->filename, "wb"); - int ret = read_fd_to_out_fd(in_f, out_fd, (char *)buf, 1024, + int ret = read_fd_to_out_fd(in_f, out_fd, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size); if (ret != SDAS_SUCCESS) { return ret; @@ -4029,13 +4073,15 @@ int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract, } else { fprintf(stderr, " File size: %lu\n", file_info->file_size); } - int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + int ret = read_buf_full_from_fd(in_f, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size, NULL); if (ret != SDAS_SUCCESS) { return ret; } } else { - int ret = read_buf_full_from_fd(in_f, (char *)buf, 1024, + int ret = read_buf_full_from_fd(in_f, (char *)buf, + SIMPLE_ARCHIVER_BUFFER_SIZE, file_info->file_size, NULL); if (ret != SDAS_SUCCESS) { return ret; From bef9c37d724d07401ba9fecb0a8816ff06d373bf Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 8 Oct 2024 10:39:21 +0900 Subject: [PATCH 36/37] Minor fix --- src/archiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/archiver.c b/src/archiver.c index 8e57c5a..e1ca153 100644 --- a/src/archiver.c +++ b/src/archiver.c @@ -1538,6 +1538,7 @@ int symlinks_and_files_from_files(void *data, void *ud) { } file_info_struct->uid = stat_buf.st_uid; file_info_struct->gid = stat_buf.st_gid; +#endif __attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd = fopen(file_info_struct->filename, "rb"); if (!fd) { @@ -1556,7 +1557,6 @@ int symlinks_and_files_from_files(void *data, void *ud) { file_info_struct->file_size = (uint64_t)ftell_ret; simple_archiver_list_add(files_list, file_info_struct, free_internal_file_info); -#endif } } From 62195c4d7d7c987fe45bf55b931025b2a06776c1 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 8 Oct 2024 10:55:24 +0900 Subject: [PATCH 37/37] Update README.md, usage text --- README.md | 4 ++-- src/parser.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7d4a7af..ec456c3 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,8 @@ API calls. Use "-f -" to work on stdout when creating archive or stdin when reading archive NOTICE: "-f" is not affected by "-C"! -C : Change current working directory before archiving/extracting - --compressor : requires --decompressor - --decompressor : requires --compressor + --compressor : requires --decompressor and cmd must use stdin/stdout + --decompressor : requires --compressor and cmd must use stdin/stdout Specifying "--decompressor" when extracting overrides archive file's stored decompressor cmd --overwrite-create : allows overwriting an archive file --overwrite-extract : allows overwriting when extracting diff --git a/src/parser.c b/src/parser.c index dd6383a..31a9ea8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -156,9 +156,11 @@ void simple_archiver_print_usage(void) { "-C : Change current working directory before " "archiving/extracting\n"); fprintf(stderr, - "--compressor : requires --decompressor\n"); + "--compressor : requires --decompressor and cmd " + "must use stdin/stdout\n"); fprintf(stderr, - "--decompressor : requires --compressor\n"); + "--decompressor : requires --compressor and " + "cmd must use stdin/stdout\n"); fprintf(stderr, " Specifying \"--decompressor\" when extracting overrides archive " "file's stored decompressor cmd\n");