Compare commits

...

34 commits

Author SHA1 Message Date
70b7050836 Merge branch 'issue_18_improve_compression'
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 1m7s
Resolves #18
2024-10-08 12:42:02 +09:00
62195c4d7d Update README.md, usage text
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-08 10:55:24 +09:00
bef9c37d72 Minor fix
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 5s
2024-10-08 10:39:21 +09:00
aa46172aa7 Buffer size to 32KiB, fixes
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-07 15:16:05 +09:00
8bae61d36d Minor quality-of-life fixes
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-07 14:03:12 +09:00
3c739f92b8 Fix errors de/compressing chunks
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-07 13:23:44 +09:00
f26509f227 Impl. more robust compression 2024-10-07 12:11:03 +09:00
1a16c2c3bb Impl. more robust decompression 2024-10-07 11:37:36 +09:00
36fb7bf042 clang-format, ensure --no-abs-symlink works in v1
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 9s
2024-10-04 21:39:13 +09:00
cf032cd9c1 Update README.md
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 7s
2024-10-04 21:28:29 +09:00
b8c56026d1 Add filename validation for test/extracting
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 8s
This should prevent creation of files/symlinks outside of
current-working-directory or user-set-cwd.
2024-10-04 21:24:10 +09:00
b1745172f7 Fix release build compiler warnings
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-04 17:46:30 +09:00
302f7f804d Fix reordering of cleanup fns in archiver.c
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 15s
2024-10-04 17:35:12 +09:00
8e620fb0e9 Reorder cleanup to proper location
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
2024-10-04 17:32:14 +09:00
8982b15cc5 Impl. create archive with compression, fixes
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 7s
2024-10-04 17:20:01 +09:00
1b7fcb2bfc WIP Impl. archiving without compression
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s
TODO:
    archiving with compression
2024-10-04 15:36:53 +09:00
166632fc15 Update file_format for v1
Size of bytes for files per chunk was changed from 2 bytes to 4 bytes.
2024-10-04 14:02:54 +09:00
8040006afe WIP Work on v1 create archive
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 5s
Implemented file-format up to list of symlinks.
2024-10-04 12:27:46 +09:00
7b6929397e Refactor function, add its declaration to header 2024-10-04 11:19:57 +09:00
c7cd445139 Refactorings for v1 extract, other refactorings
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 5s
2024-10-02 15:11:23 +09:00
b09948d245 Split "rel-path-from-abs-paths" into function 2024-10-02 15:11:23 +09:00
6376be2840 Add support for writing multiple file formats 2024-10-02 15:11:23 +09:00
da18464d5d Test/fix symlink test/extract in v1 file format 2024-10-02 15:11:23 +09:00
8fa430f842 Fix v1 archive decompression
Previous implementation sent too many bytes to decompressor if size was
less than 1024.
2024-10-02 15:11:23 +09:00
53fefb7ae8 Remove unnecessary printf used for testing 2024-10-02 15:11:23 +09:00
45fdffdc9c Impl. extract with decompressor file format v1 2024-10-02 15:11:23 +09:00
7407972450 v1 extract skip non-specified args if exists 2024-10-02 15:11:23 +09:00
3d58f466af Impl. setting stored UID/GID if EUID 0 2024-10-02 15:11:23 +09:00
b273d91896 "Fix" Linux/Mac/Unix usage 2024-10-02 15:11:23 +09:00
c71f4f45c7 Impl. simple test/extract new file format (WIP)
TODO:
    Extract symlinks in new format (implemented but untested).
    Extract compressed files in new format.
2024-10-02 15:11:23 +09:00
c1faae90e9 Split up handling of archive file based on version 2024-10-02 15:11:23 +09:00
d625c1b1cb Fix typo 2024-10-02 15:11:23 +09:00
a7aa31fc89 Fix typo in file format specification version 1 2024-10-02 15:11:23 +09:00
f76e383e78 Create file format for format version 1
This is in preparation of improving compression by concatenating files
together before compressing them to reduce the per-file overhead.
2024-10-02 15:11:23 +09:00
16 changed files with 2592 additions and 141 deletions

View file

@ -83,6 +83,7 @@ add_executable(test_simplearchiver
src/test.c
src/parser.c
src/helpers.c
src/archiver.c
src/algorithms/linear_congruential_gen.c
src/data_structures/linked_list.c
src/data_structures/hash_map.c

View file

@ -20,13 +20,15 @@ API calls.
Use "-f -" to work on stdout when creating archive or stdin when reading archive
NOTICE: "-f" is not affected by "-C"!
-C <dir> : Change current working directory before archiving/extracting
--compressor <full_compress_cmd> : requires --decompressor
--decompressor <full_decompress_cmd> : requires --compressor
--compressor <full_compress_cmd> : requires --decompressor and cmd must use stdin/stdout
--decompressor <full_decompress_cmd> : requires --compressor and cmd must use stdin/stdout
Specifying "--decompressor" when extracting overrides archive file's stored decompressor cmd
--overwrite-create : allows overwriting an archive file
--overwrite-extract : allows overwriting when extracting
--no-abs-symlink : do not store absolute paths for symlinks
--temp-files-dir <dir> : where to store temporary files created when compressing (defaults to current working directory)
--write-version <version> : Force write version file format (default 1)
--chunk-min-size <bytes> : v1 file format minimum chunk size (default 4194304 or 4MiB)
-- : specifies remaining arguments are files to archive/extract
If creating archive file, remaining args specify files to archive.
If extracting archive file, remaining args specify files to extract.

View file

@ -76,3 +76,117 @@ Following the file-count bytes, the following bytes are added for each file:
1. 8 bytes 64-bit unsigned integer "size of filename in this archive file"
in big-endian.
2. X bytes file data (length defined by previous value).
## Format Version 1
File extension is "*.simplearchive" but this isn't really checked.
First 18 bytes of file will be (in ascii):
SIMPLE_ARCHIVE_VER
Next 2 bytes is a 16-bit unsigned integer "version" in big-endian. It will be:
0x00 0x01
Next 4 bytes are bit-flags.
1. The first byte
1. The first bit is set if de/compressor is set for this archive.
The remaining unused flags in the previous bit-flags bytes are reserved for
future revisions and are currently ignored.
If the previous "de/compressor is set" flag is enabled, then the next section is
added:
1. 2 bytes is 16-bit unsigned integer "compressor cmd+args" in big-endian. This
does not include the NULL at the end of the string.
2. X bytes of "compressor cmd+args" (length defined by previous value). Is a
NULL-terminated string.
3. 2 bytes is 16-bit unsigned integer "decompressor cmd+args" in big-endian.
This does not include the NULL at the end of the string.
4. X bytes of "decompressor cmd+args" (length defined by previous value). Is a
NULL-terminated string.
The next 4 bytes is a 32-bit unsigned integer "link count" in big-endian which
will indicate the number of symbolic links in this archive.
Following the link-count bytes, the following bytes are added for each symlink:
1. 2 bytes bit-flags:
1. The first byte.
1. The first bit is UNSET if relative links are preferred, and is SET if
absolute links are preferred.
2. The second bit is "user read permission".
3. The third bit is "user write permission".
4. The fourth bit is "user execute permission".
5. The fifth bit is "group read permission".
6. The sixth bit is "group write permission".
7. The seventh bit is "group execute permission".
8. The eighth bit is "other read permission".
2. The second byte.
1. The first bit is "other write permission".
2. The second bit is "other execute permission".
2. 2 bytes 16-bit unsigned integer "link name" in big-endian. This does not
include the NULL at the end of the string. Must not be zero.
3. X bytes of link-name (length defined by previous value). Is a NULL-terminated
string.
4. 2 bytes is 16-bit unsigned integer "link target absolute path" in
big-endian. This does not include the NULL at the end of the string.
5. X bytes of link-target-absolute-path (length defined by previous value).
Is a NULL-terminated string. If the previous "size" value is 0, then
this entry does not exist and should be skipped.
6. 2 bytes is 16-bit unsigned integer "link target relative path" in
big-endian. This does not include the NULL at the end of the string.
7. X bytes of link-target-relative-path (length defined by previous value).
Is a NULL-terminated string. If the previous "size" value is 0, then
this entry does not exist and should be skipped.
After the symlink related data, the next 4 bytes is a 32-bit unsigned integer
"chunk count" in big-endian which will indicate the number of chunks in this
archive.
Following the chunk-count bytes, the following bytes are added for each chunk:
1. 4 bytes that are a 32-bit unsigned integer "file count" in big-endian.
The following bytes are added for each file within the current chunk:
1. 2 bytes that are a 16-bit unsigned integer "filename length" in big-endian.
This does not include the NULL at the end of the string.
2. X bytes of filename (length defined by previous value). Is a NULL-terminated
string.
3. 4 bytes bit-flags.
1. The first byte.
1. The first bit is "user read permission".
2. The second bit is "user write permission".
3. The third bit is "user execute permission".
4. The fourth bit is "group read permission".
5. The fifth bit is "group write permission".
6. The sixth bit is "group execute permission".
7. The seventh bit is "other read permission".
8. The eighth bit is "other write permission".
2. The second byte.
1. The first bit is "other execute permission".
3. The third byte.
1. Currently unused.
4. The fourth byte.
1. Currently unused.
4. Two 4-byte unsigned integers in big-endian for UID and GID.
1. A 32-bit unsigned integer in big endian that specifies the UID of the
file. Note that during extraction, if the user is not root, then this
value will be ignored.
2. A 32-bit unsigned integer in big endian that specifies the GID of the
file. Note that during extraction, if the user is not root, then this
value will be ignored.
5. A 64-bit unsigned integer in big endian for the "size of file".
After the files' metadata are the current chunk's data:
1. A 64-bit unsigned integer in big endian for the "size of chunk".
2. X bytes of data for the current chunk of the previously specified size. If
not using de/compressor, this section is the previously mentioned files
concatenated with each other. If using de/compressor, this section is the
previously mentioned files concatenated and compressed into a single blob of
data.

BIN
file_format_1_example_0 Normal file

Binary file not shown.

BIN
file_format_1_example_1 Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -40,7 +40,7 @@ typedef struct SDArchiverState {
size_t digits;
} SDArchiverState;
enum SDArchiverStateReturns {
typedef enum SDArchiverStateReturns {
SDAS_SUCCESS = 0,
SDAS_HEADER_ALREADY_WRITTEN = 1,
SDAS_FAILED_TO_WRITE,
@ -51,8 +51,9 @@ enum SDArchiverStateReturns {
SDAS_INTERNAL_ERROR,
SDAS_FAILED_TO_CREATE_MAP,
SDAS_FAILED_TO_EXTRACT_SYMLINK,
SDAS_FAILED_TO_CHANGE_CWD
};
SDAS_FAILED_TO_CHANGE_CWD,
SDAS_INVALID_WRITE_VERSION
} SDArchiverStateReturns;
/// Returned pointer must not be freed.
char *simple_archiver_error_to_string(enum SDArchiverStateReturns error);
@ -65,12 +66,46 @@ void simple_archiver_free_state(SDArchiverState **state);
int simple_archiver_write_all(FILE *out_f, SDArchiverState *state,
const SDArchiverLinkedList *filenames);
int simple_archiver_write_v0(FILE *out_f, SDArchiverState *state,
const SDArchiverLinkedList *filenames);
int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
const SDArchiverLinkedList *filenames);
/// Returns zero on success.
int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract,
const SDArchiverState *state);
/// Returns zero on success.
int simple_archiver_parse_archive_version_0(FILE *in_f, int_fast8_t do_extract,
const SDArchiverState *state);
/// Returns zero on success.
int simple_archiver_parse_archive_version_1(FILE *in_f, int_fast8_t do_extract,
const SDArchiverState *state);
/// Returns zero on success.
int simple_archiver_de_compress(int pipe_fd_in[2], int pipe_fd_out[2],
const char *cmd, void *pid_out);
/// If returns non-NULL, must be free'd.
char *simple_archiver_filenames_to_relative_path(const char *from_abs,
const char *to_abs);
/// Gets the absolute path to a file given a path to a file.
/// Should also work on symlinks such that the returned string is the path to
/// the link itself, not what it points to.
/// Non-NULL on success, and must be free'd if non-NULL.
char *simple_archiver_file_abs_path(const char *filename);
/// Used to validate a file in a ".simplearchive" file to avoid writing outside
/// of current working directory.
/// Returns zero if file is OK.
/// Returns 1 if file starts with '/'.
/// Returns 2 if file contains '../' at the start.
/// Returns 3 if file contains '/../' in the middle.
/// Returns 4 if file contains '/..' at the end.
/// Returns 5 if "filepath" is NULL.
int simple_archiver_validate_file_path(const char *filepath);
#endif

View file

@ -97,7 +97,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_write_all(file, state, filenames);
if (ret != SDAS_SUCCESS) {
fprintf(stderr, "Error during writing.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
fclose(file);
@ -113,7 +114,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_write_all(stdout, state, filenames);
if (ret != SDAS_SUCCESS) {
fprintf(stderr, "Error during writing.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
}
@ -130,7 +132,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_parse_archive_info(file, 0, NULL);
if (ret != 0) {
fprintf(stderr, "Error during archive checking/examining.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
fclose(file);
@ -138,7 +141,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_parse_archive_info(stdin, 0, NULL);
if (ret != 0) {
fprintf(stderr, "Error during archive checking/examining.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
}
@ -157,7 +161,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_parse_archive_info(file, 1, state);
if (ret != SDAS_SUCCESS) {
fprintf(stderr, "Error during archive extracting.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
fclose(file);
@ -165,7 +170,8 @@ int main(int argc, const char **argv) {
int ret = simple_archiver_parse_archive_info(stdin, 1, state);
if (ret != SDAS_SUCCESS) {
fprintf(stderr, "Error during archive extracting.\n");
char *error_str = simple_archiver_error_to_string(ret);
char *error_str =
simple_archiver_error_to_string((SDArchiverStateReturns)ret);
fprintf(stderr, " %s\n", error_str);
}
}

View file

@ -156,9 +156,11 @@ void simple_archiver_print_usage(void) {
"-C <dir> : Change current working directory before "
"archiving/extracting\n");
fprintf(stderr,
"--compressor <full_compress_cmd> : requires --decompressor\n");
"--compressor <full_compress_cmd> : requires --decompressor and cmd "
"must use stdin/stdout\n");
fprintf(stderr,
"--decompressor <full_decompress_cmd> : requires --compressor\n");
"--decompressor <full_decompress_cmd> : requires --compressor and "
"cmd must use stdin/stdout\n");
fprintf(stderr,
" Specifying \"--decompressor\" when extracting overrides archive "
"file's stored decompressor cmd\n");
@ -169,6 +171,12 @@ void simple_archiver_print_usage(void) {
fprintf(stderr,
"--temp-files-dir <dir> : where to store temporary files created "
"when compressing (defaults to current working directory)\n");
fprintf(stderr,
"--write-version <version> : Force write version file format "
"(default 1)\n");
fprintf(stderr,
"--chunk-min-size <bytes> : v1 file format minimum chunk size "
"(default 4194304 or 4MiB)\n");
fprintf(stderr,
"-- : specifies remaining arguments are files to archive/extract\n");
fprintf(
@ -189,6 +197,8 @@ SDArchiverParsed simple_archiver_create_parsed(void) {
parsed.working_files = NULL;
parsed.temp_dir = NULL;
parsed.user_cwd = NULL;
parsed.write_version = 1;
parsed.minimum_chunk_size = 4194304;
return parsed;
}
@ -299,11 +309,50 @@ int simple_archiver_parse_args(int argc, const char **argv,
out->temp_dir = argv[1];
--argc;
++argv;
} else if (strcmp(argv[0], "--write-version") == 0) {
if (argc < 2) {
fprintf(stderr,
"ERROR: --write-version expects an integer argument!\n");
simple_archiver_print_usage();
return 1;
}
int version = atoi(argv[1]);
if (version < 0) {
fprintf(stderr, "ERROR: --write-version cannot be negative!\n");
simple_archiver_print_usage();
return 1;
} else if (version > 1) {
fprintf(stderr, "ERROR: --write-version must be 0 or 1!\n");
simple_archiver_print_usage();
return 1;
}
out->write_version = (uint32_t)version;
--argc;
++argv;
} else if (strcmp(argv[0], "--chunk-min-size") == 0) {
if (argc < 2) {
fprintf(stderr,
"ERROR: --chunk-min-size expects an integer argument!\n");
simple_archiver_print_usage();
return 1;
}
out->minimum_chunk_size = strtoull(argv[1], NULL, 10);
if (out->minimum_chunk_size == 0) {
fprintf(stderr, "ERROR: --chunk-min-size cannot be zero!\n");
simple_archiver_print_usage();
return 1;
}
--argc;
++argv;
} else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) {
is_remaining_args = 1;
} else if (argv[0][0] != '-') {
is_remaining_args = 1;
continue;
} else {
fprintf(stderr, "ERROR: Got invalid arg \"%s\"!\n", argv[0]);
simple_archiver_print_usage();
return 1;
}
} else {
if (out->working_files == NULL) {

View file

@ -51,6 +51,10 @@ typedef struct SDArchiverParsed {
const char *temp_dir;
/// Dir specified by "-C".
const char *user_cwd;
/// Currently only 0 and 1 is supported.
uint32_t write_version;
/// The minimum size of a chunk in bytes (the last chunk may be less).
uint64_t minimum_chunk_size;
} SDArchiverParsed;
typedef struct SDArchiverFileInfo {

View file

@ -23,6 +23,7 @@
#include <string.h>
// Local includes.
#include "archiver.h"
#include "helpers.h"
#include "parser_internal.h"
@ -241,6 +242,27 @@ int main(void) {
free(out);
}
// Test archiver.
{
__attribute__((
cleanup(simple_archiver_helper_cleanup_c_string))) char *rel_path =
simple_archiver_filenames_to_relative_path(
"/one/two/three/four/five", "/one/two/branch/other/path");
CHECK_STREQ(rel_path, "../../branch/other/path");
simple_archiver_helper_cleanup_c_string(&rel_path);
rel_path = simple_archiver_filenames_to_relative_path(
"/one/two/three/four/five", "/one/two/three/other/dir/");
CHECK_STREQ(rel_path, "../other/dir/");
simple_archiver_helper_cleanup_c_string(&rel_path);
CHECK_FALSE(simple_archiver_validate_file_path("Local/Path"));
CHECK_TRUE(simple_archiver_validate_file_path("/Abs/Path"));
CHECK_TRUE(simple_archiver_validate_file_path("Local/../../not/really"));
CHECK_TRUE(simple_archiver_validate_file_path("./../almost"));
CHECK_TRUE(simple_archiver_validate_file_path("strange/.."));
}
printf("Checks checked: %u\n", checks_checked);
printf("Checks passed: %u\n", checks_passed);
return checks_passed == checks_checked ? 0 : 1;