WIP Impl. archiving without compression
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s

TODO:
    archiving with compression
This commit is contained in:
Stephen Seo 2024-10-04 15:36:53 +09:00
parent 166632fc15
commit 1b7fcb2bfc
3 changed files with 317 additions and 31 deletions

View file

@ -1330,6 +1330,7 @@ int symlinks_and_files_from_files(void *data, void *ud) {
void **ptr_array = ud; void **ptr_array = ud;
SDArchiverLinkedList *symlinks_list = ptr_array[0]; SDArchiverLinkedList *symlinks_list = ptr_array[0];
SDArchiverLinkedList *files_list = ptr_array[1]; SDArchiverLinkedList *files_list = ptr_array[1];
const char *user_cwd = ptr_array[2];
if (file_info->filename) { if (file_info->filename) {
if (file_info->link_dest) { if (file_info->link_dest) {
@ -1337,11 +1338,114 @@ int symlinks_and_files_from_files(void *data, void *ud) {
symlinks_list, file_info->filename, symlinks_list, file_info->filename,
simple_archiver_helper_datastructure_cleanup_nop); simple_archiver_helper_datastructure_cleanup_nop);
} else { } else {
simple_archiver_list_add( SDArchiverInternalFileInfo *file_info_struct =
files_list, file_info->filename, malloc(sizeof(SDArchiverInternalFileInfo));
simple_archiver_helper_datastructure_cleanup_nop); file_info_struct->filename = strdup(file_info->filename);
file_info_struct->bit_flags[0] = 0xFF;
file_info_struct->bit_flags[1] = 1;
file_info_struct->bit_flags[2] = 0;
file_info_struct->bit_flags[3] = 0;
file_info_struct->uid = 0;
file_info_struct->gid = 0;
file_info_struct->file_size = 0;
#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \
SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \
SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX
__attribute__((cleanup(
simple_archiver_helper_cleanup_chdir_back))) char *original_cwd =
NULL;
if (user_cwd) {
original_cwd = realpath(".", NULL);
if (chdir(user_cwd)) {
free(file_info_struct);
return 1;
} }
} }
struct stat stat_buf;
memset(&stat_buf, 0, sizeof(struct stat));
int stat_status = fstatat(AT_FDCWD, file_info_struct->filename, &stat_buf,
AT_SYMLINK_NOFOLLOW);
if (stat_status != 0) {
free(file_info_struct);
return 1;
}
file_info_struct->bit_flags[0] = 0;
file_info_struct->bit_flags[1] &= 0xFE;
if ((stat_buf.st_mode & S_IRUSR) != 0) {
file_info_struct->bit_flags[0] |= 1;
}
if ((stat_buf.st_mode & S_IWUSR) != 0) {
file_info_struct->bit_flags[0] |= 2;
}
if ((stat_buf.st_mode & S_IXUSR) != 0) {
file_info_struct->bit_flags[0] |= 4;
}
if ((stat_buf.st_mode & S_IRGRP) != 0) {
file_info_struct->bit_flags[0] |= 8;
}
if ((stat_buf.st_mode & S_IWGRP) != 0) {
file_info_struct->bit_flags[0] |= 0x10;
}
if ((stat_buf.st_mode & S_IXGRP) != 0) {
file_info_struct->bit_flags[0] |= 0x20;
}
if ((stat_buf.st_mode & S_IROTH) != 0) {
file_info_struct->bit_flags[0] |= 0x40;
}
if ((stat_buf.st_mode & S_IWOTH) != 0) {
file_info_struct->bit_flags[0] |= 0x80;
}
if ((stat_buf.st_mode & S_IXOTH) != 0) {
file_info_struct->bit_flags[1] |= 1;
}
file_info_struct->uid = stat_buf.st_uid;
file_info_struct->gid = stat_buf.st_gid;
__attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd =
fopen(file_info_struct->filename, "rb");
if (!fd) {
free(file_info_struct);
return 1;
}
if (fseek(fd, 0, SEEK_END) < 0) {
free(file_info_struct);
return 1;
}
long ftell_ret = ftell(fd);
if (ftell_ret < 0) {
free(file_info_struct);
return 1;
}
file_info_struct->file_size = (uint64_t)ftell_ret;
simple_archiver_list_add(files_list, file_info_struct,
free_internal_file_info);
#endif
}
}
return 0;
}
int files_to_chunk_count(void *data, void *ud) {
SDArchiverInternalFileInfo *file_info_struct = data;
void **ptrs = ud;
const uint64_t *chunk_size = ptrs[0];
uint64_t *current_size = ptrs[1];
uint64_t *current_count = ptrs[2];
SDArchiverLinkedList *chunk_counts = ptrs[3];
++(*current_count);
// Get file size.
*current_size += file_info_struct->file_size;
// Check size.
if (*current_size >= *chunk_size) {
uint64_t *count = malloc(sizeof(uint64_t));
*count = *current_count;
simple_archiver_list_add(chunk_counts, count, NULL);
*current_count = 0;
*current_size = 0;
}
return 0; return 0;
} }
@ -1576,9 +1680,10 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
__attribute__((cleanup(simple_archiver_list_free))) __attribute__((cleanup(simple_archiver_list_free)))
SDArchiverLinkedList *files_list = simple_archiver_list_init(); SDArchiverLinkedList *files_list = simple_archiver_list_init();
ptr_array = malloc(sizeof(void *) * 2); ptr_array = malloc(sizeof(void *) * 3);
ptr_array[0] = symlinks_list; ptr_array[0] = symlinks_list;
ptr_array[1] = files_list; ptr_array[1] = files_list;
ptr_array[2] = (void *)state->parsed->user_cwd;
if (simple_archiver_list_get(filenames, symlinks_and_files_from_files, if (simple_archiver_list_get(filenames, symlinks_and_files_from_files,
ptr_array)) { ptr_array)) {
@ -1667,7 +1772,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
} }
simple_archiver_helper_32_bit_be(&u32); simple_archiver_helper_32_bit_be(&u32);
{ // Change cwd if user specified.
__attribute__((cleanup( __attribute__((cleanup(
simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = NULL; simple_archiver_helper_cleanup_chdir_back))) char *original_cwd = NULL;
if (state->parsed->user_cwd) { if (state->parsed->user_cwd) {
@ -1680,12 +1785,14 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
} }
#endif #endif
} }
{
const SDArchiverLLNode *node = symlinks_list->head; const SDArchiverLLNode *node = symlinks_list->head;
for (u32 = 0; for (u32 = 0;
u32 < (uint32_t)symlinks_list->count && node != symlinks_list->tail;) { u32 < (uint32_t)symlinks_list->count && node != symlinks_list->tail;) {
node = node->next; node = node->next;
++u32; ++u32;
u16 = 0; memset(buf, 0, 2);
#if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \ #if SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_COSMOPOLITAN || \
SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \ SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_MAC || \
SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX SIMPLE_ARCHIVER_PLATFORM == SIMPLE_ARCHIVER_PLATFORM_LINUX
@ -1710,7 +1817,7 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
!simple_archiver_hash_map_get(abs_filenames, abs_path, !simple_archiver_hash_map_get(abs_filenames, abs_path,
strlen(abs_path) + 1)) { strlen(abs_path) + 1)) {
// Is not a filename being archived, set preference to absolute path. // Is not a filename being archived, set preference to absolute path.
u16 |= 1; buf[0] |= 1;
} }
// Get symlink stats for permissions. // Get symlink stats for permissions.
@ -1723,37 +1830,37 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
} }
if ((stat_buf.st_mode & S_IRUSR) != 0) { if ((stat_buf.st_mode & S_IRUSR) != 0) {
u16 |= 2; buf[0] |= 2;
} }
if ((stat_buf.st_mode & S_IWUSR) != 0) { if ((stat_buf.st_mode & S_IWUSR) != 0) {
u16 |= 4; buf[0] |= 4;
} }
if ((stat_buf.st_mode & S_IXUSR) != 0) { if ((stat_buf.st_mode & S_IXUSR) != 0) {
u16 |= 8; buf[0] |= 8;
} }
if ((stat_buf.st_mode & S_IRGRP) != 0) { if ((stat_buf.st_mode & S_IRGRP) != 0) {
u16 |= 0x10; buf[0] |= 0x10;
} }
if ((stat_buf.st_mode & S_IWGRP) != 0) { if ((stat_buf.st_mode & S_IWGRP) != 0) {
u16 |= 0x20; buf[0] |= 0x20;
} }
if ((stat_buf.st_mode & S_IXGRP) != 0) { if ((stat_buf.st_mode & S_IXGRP) != 0) {
u16 |= 0x40; buf[0] |= 0x40;
} }
if ((stat_buf.st_mode & S_IROTH) != 0) { if ((stat_buf.st_mode & S_IROTH) != 0) {
u16 |= 0x80; buf[0] |= (char)0x80;
} }
if ((stat_buf.st_mode & S_IWOTH) != 0) { if ((stat_buf.st_mode & S_IWOTH) != 0) {
u16 |= 0x100; buf[1] |= 1;
} }
if ((stat_buf.st_mode & S_IXOTH) != 0) { if ((stat_buf.st_mode & S_IXOTH) != 0) {
u16 |= 0x200; buf[1] |= 2;
} }
#else #else
u16 |= 0x3FE; buf[0] = 0xFE;
buf[1] = 3;
#endif #endif
simple_archiver_helper_16_bit_be(&u16); if (fwrite(buf, 1, 2, out_f) != 2) {
if (fwrite(&u16, 2, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE; return SDAS_FAILED_TO_WRITE;
} }
@ -1828,12 +1935,170 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
} }
} }
// TODO Chunk count. __attribute__((cleanup(simple_archiver_list_free)))
SDArchiverLinkedList *chunk_counts = simple_archiver_list_init();
// TODO Impl. {
fprintf(stderr, "Writing v1 unimplemented\n"); uint64_t current_size = 0;
uint64_t current_count = 0;
void **ptrs = malloc(sizeof(void *) * 4);
ptrs[0] = (void *)&state->parsed->minimum_chunk_size;
ptrs[1] = &current_size;
ptrs[2] = &current_count;
ptrs[3] = chunk_counts;
if (simple_archiver_list_get(files_list, files_to_chunk_count, ptrs)) {
free(ptrs);
fprintf(stderr, "ERROR: Internal error calculating chunk counts!\n");
return SDAS_INTERNAL_ERROR; return SDAS_INTERNAL_ERROR;
} }
free(ptrs);
if (current_size > 0 && current_count > 0) {
uint64_t *count = malloc(sizeof(uint64_t));
*count = current_count;
simple_archiver_list_add(chunk_counts, count, NULL);
}
}
// Verify chunk counts.
{
uint64_t count = 0;
for (SDArchiverLLNode *node = chunk_counts->head->next;
node != chunk_counts->tail; node = node->next) {
if (*((uint64_t *)node->data) > 0xFFFFFFFF) {
fprintf(stderr, "ERROR: file count in chunk is too large!\n");
return SDAS_INTERNAL_ERROR;
}
count += *((uint64_t *)node->data);
// fprintf(stderr, "DEBUG: chunk count %4llu\n",
// *((uint64_t*)node->data));
}
if (count != files_list->count) {
fprintf(stderr,
"ERROR: Internal error calculating chunk counts (invalid number "
"of files)!\n");
return SDAS_INTERNAL_ERROR;
}
}
// Write number of chunks.
if (chunk_counts->count > 0xFFFFFFFF) {
fprintf(stderr, "ERROR: Too many chunks!\n");
return SDAS_INTERNAL_ERROR;
}
u32 = (uint32_t)chunk_counts->count;
simple_archiver_helper_32_bit_be(&u32);
if (fwrite(&u32, 4, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
__attribute__((cleanup(simple_archiver_helper_cleanup_malloced))) void
*non_compressing_chunk_size = NULL;
if (!state->parsed->compressor || !state->parsed->decompressor) {
non_compressing_chunk_size = malloc(sizeof(uint64_t));
}
uint64_t *non_c_chunk_size = non_compressing_chunk_size;
SDArchiverLLNode *file_node = files_list->head;
for (SDArchiverLLNode *chunk_c_node = chunk_counts->head->next;
chunk_c_node != chunk_counts->tail; chunk_c_node = chunk_c_node->next) {
// Write file count before iterating through files.
if (non_c_chunk_size) {
*non_c_chunk_size = 0;
}
u32 = (uint32_t)(*((uint64_t *)chunk_c_node->data));
simple_archiver_helper_32_bit_be(&u32);
if (fwrite(&u32, 4, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
SDArchiverLLNode *saved_node = file_node;
for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data);
++file_idx) {
file_node = file_node->next;
if (file_node == files_list->tail) {
return SDAS_INTERNAL_ERROR;
}
const SDArchiverInternalFileInfo *file_info_struct = file_node->data;
if (non_c_chunk_size) {
*non_c_chunk_size += file_info_struct->file_size;
}
size_t len = strlen(file_info_struct->filename);
if (len >= 0xFFFF) {
fprintf(stderr, "ERROR: Filename is too large!\n");
return SDAS_INVALID_FILE;
}
u16 = (uint16_t)len;
simple_archiver_helper_16_bit_be(&u16);
if (fwrite(&u16, 2, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
simple_archiver_helper_16_bit_be(&u16);
if (fwrite(file_info_struct->filename, 1, u16 + 1, out_f) !=
(size_t)u16 + 1) {
return SDAS_FAILED_TO_WRITE;
} else if (fwrite(file_info_struct->bit_flags, 1, 4, out_f) != 4) {
return SDAS_FAILED_TO_WRITE;
}
// UID and GID.
u32 = file_info_struct->uid;
simple_archiver_helper_32_bit_be(&u32);
if (fwrite(&u32, 4, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
u32 = file_info_struct->gid;
simple_archiver_helper_32_bit_be(&u32);
if (fwrite(&u32, 4, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
uint64_t u64 = file_info_struct->file_size;
simple_archiver_helper_64_bit_be(&u64);
if (fwrite(&u64, 8, 1, out_f) != 1) {
return SDAS_FAILED_TO_WRITE;
}
}
file_node = saved_node;
if (state->parsed->compressor && state->parsed->decompressor) {
// Is compressing.
fprintf(stderr, "Writing compressed v1 unimplemented\n");
return SDAS_INTERNAL_ERROR;
} else {
// Is NOT compressing.
if (!non_c_chunk_size) {
return SDAS_INTERNAL_ERROR;
}
simple_archiver_helper_64_bit_be(non_c_chunk_size);
fwrite(non_c_chunk_size, 8, 1, out_f);
for (uint64_t file_idx = 0; file_idx < *((uint64_t *)chunk_c_node->data);
++file_idx) {
file_node = file_node->next;
if (file_node == files_list->tail) {
return SDAS_INTERNAL_ERROR;
}
const SDArchiverInternalFileInfo *file_info_struct = file_node->data;
__attribute__((cleanup(simple_archiver_helper_cleanup_FILE))) FILE *fd =
fopen(file_info_struct->filename, "rb");
while (!feof(fd)) {
if (ferror(fd)) {
fprintf(stderr, "ERROR: Writing to chunk, file read error!\n");
return SDAS_INTERNAL_ERROR;
}
size_t fread_ret = fread(buf, 1, 1024, fd);
if (fread_ret > 0) {
size_t fwrite_ret = fwrite(buf, 1, fread_ret, out_f);
if (fwrite_ret != fread_ret) {
fprintf(stderr, "ERROR: Writing to chunk, file write error!\n");
return SDAS_FAILED_TO_WRITE;
}
}
}
}
}
}
return SDAS_SUCCESS;
}
int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract, int simple_archiver_parse_archive_info(FILE *in_f, int_fast8_t do_extract,
const SDArchiverState *state) { const SDArchiverState *state) {

View file

@ -172,6 +172,9 @@ void simple_archiver_print_usage(void) {
fprintf(stderr, fprintf(stderr,
"--write-version <version> : Force write version file format " "--write-version <version> : Force write version file format "
"(default 1)\n"); "(default 1)\n");
fprintf(stderr,
"--chunk-min-size <bytes> : v1 file format minimum chunk size "
"(default 4194304 or 4MiB)\n");
fprintf(stderr, fprintf(stderr,
"-- : specifies remaining arguments are files to archive/extract\n"); "-- : specifies remaining arguments are files to archive/extract\n");
fprintf( fprintf(
@ -193,6 +196,7 @@ SDArchiverParsed simple_archiver_create_parsed(void) {
parsed.temp_dir = NULL; parsed.temp_dir = NULL;
parsed.user_cwd = NULL; parsed.user_cwd = NULL;
parsed.write_version = 0; parsed.write_version = 0;
parsed.minimum_chunk_size = 4194304;
return parsed; return parsed;
} }
@ -323,6 +327,21 @@ int simple_archiver_parse_args(int argc, const char **argv,
out->write_version = (uint32_t)version; out->write_version = (uint32_t)version;
--argc; --argc;
++argv; ++argv;
} else if (strcmp(argv[0], "--chunk-min-size") == 0) {
if (argc < 2) {
fprintf(stderr,
"ERROR: --chunk-min-size expects an integer argument!\n");
simple_archiver_print_usage();
return 1;
}
out->minimum_chunk_size = strtoull(argv[1], NULL, 10);
if (out->minimum_chunk_size == 0) {
fprintf(stderr, "ERROR: --chunk-min-size cannot be zero!\n");
simple_archiver_print_usage();
return 1;
}
--argc;
++argv;
} else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) { } else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) {
is_remaining_args = 1; is_remaining_args = 1;
} else if (argv[0][0] != '-') { } else if (argv[0][0] != '-') {

View file

@ -53,6 +53,8 @@ typedef struct SDArchiverParsed {
const char *user_cwd; const char *user_cwd;
/// Currently only 0 and 1 is supported. /// Currently only 0 and 1 is supported.
uint32_t write_version; uint32_t write_version;
/// The minimum size of a chunk in bytes (the last chunk may be less).
uint64_t minimum_chunk_size;
} SDArchiverParsed; } SDArchiverParsed;
typedef struct SDArchiverFileInfo { typedef struct SDArchiverFileInfo {