Impl. pre-sort files by size before put in chunks
All checks were successful
Run Unit Tests / build-and-run-unit-tests (push) Successful in 6s

This commit is contained in:
Stephen Seo 2024-10-08 15:12:44 +09:00
parent d2d202235c
commit d54fc441a3
5 changed files with 38 additions and 3 deletions

View file

@ -87,4 +87,5 @@ add_executable(test_simplearchiver
src/algorithms/linear_congruential_gen.c src/algorithms/linear_congruential_gen.c
src/data_structures/linked_list.c src/data_structures/linked_list.c
src/data_structures/hash_map.c src/data_structures/hash_map.c
src/data_structures/priority_heap.c
) )

View file

@ -4,6 +4,9 @@
More robust handling of de/compression process (handling SIGPIPE). More robust handling of de/compression process (handling SIGPIPE).
Add option to pre-sort files by size so that the first files put in chunks are
the largest.
## Version 1.0 ## Version 1.0
First release. First release.

View file

@ -38,6 +38,7 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "data_structures/priority_heap.h"
#include "helpers.h" #include "helpers.h"
#define TEMP_FILENAME_CMP "%s%ssimple_archiver_compressed_%lu.tmp" #define TEMP_FILENAME_CMP "%s%ssimple_archiver_compressed_%lu.tmp"
@ -1475,6 +1476,7 @@ int symlinks_and_files_from_files(void *data, void *ud) {
SDArchiverLinkedList *symlinks_list = ptr_array[0]; SDArchiverLinkedList *symlinks_list = ptr_array[0];
SDArchiverLinkedList *files_list = ptr_array[1]; SDArchiverLinkedList *files_list = ptr_array[1];
const char *user_cwd = ptr_array[2]; const char *user_cwd = ptr_array[2];
SDArchiverPHeap *pheap = ptr_array[3];
if (file_info->filename) { if (file_info->filename) {
if (file_info->link_dest) { if (file_info->link_dest) {
@ -1561,10 +1563,16 @@ int symlinks_and_files_from_files(void *data, void *ud) {
return 1; return 1;
} }
file_info_struct->file_size = (uint64_t)ftell_ret; file_info_struct->file_size = (uint64_t)ftell_ret;
if (pheap) {
simple_archiver_priority_heap_insert(
pheap, (int64_t)file_info_struct->file_size, file_info_struct,
free_internal_file_info);
} else {
simple_archiver_list_add(files_list, file_info_struct, simple_archiver_list_add(files_list, file_info_struct,
free_internal_file_info); free_internal_file_info);
} }
} }
}
return 0; return 0;
} }
@ -1594,6 +1602,8 @@ int files_to_chunk_count(void *data, void *ud) {
return 0; return 0;
} }
int greater_fn(int64_t a, int64_t b) { return a > b; }
char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) { char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) {
switch (error) { switch (error) {
case SDAS_SUCCESS: case SDAS_SUCCESS:
@ -1823,11 +1833,17 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
SDArchiverLinkedList *symlinks_list = simple_archiver_list_init(); SDArchiverLinkedList *symlinks_list = simple_archiver_list_init();
__attribute__((cleanup(simple_archiver_list_free))) __attribute__((cleanup(simple_archiver_list_free)))
SDArchiverLinkedList *files_list = simple_archiver_list_init(); SDArchiverLinkedList *files_list = simple_archiver_list_init();
__attribute__((cleanup(simple_archiver_priority_heap_free)))
SDArchiverPHeap *files_pheap =
(state->parsed->flags & 0x40)
? simple_archiver_priority_heap_init_less_fn(greater_fn)
: NULL;
ptr_array = malloc(sizeof(void *) * 3); ptr_array = malloc(sizeof(void *) * 4);
ptr_array[0] = symlinks_list; ptr_array[0] = symlinks_list;
ptr_array[1] = files_list; ptr_array[1] = files_list;
ptr_array[2] = (void *)state->parsed->user_cwd; ptr_array[2] = (void *)state->parsed->user_cwd;
ptr_array[3] = files_pheap;
if (simple_archiver_list_get(filenames, symlinks_and_files_from_files, if (simple_archiver_list_get(filenames, symlinks_and_files_from_files,
ptr_array)) { ptr_array)) {
@ -1836,6 +1852,15 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
} }
free(ptr_array); free(ptr_array);
if (files_pheap) {
while (files_pheap->size > 0) {
simple_archiver_list_add(files_list,
simple_archiver_priority_heap_pop(files_pheap),
free_internal_file_info);
}
simple_archiver_priority_heap_free(&files_pheap);
}
if (fwrite("SIMPLE_ARCHIVE_VER", 1, 18, out_f) != 18) { if (fwrite("SIMPLE_ARCHIVE_VER", 1, 18, out_f) != 18) {
return SDAS_FAILED_TO_WRITE; return SDAS_FAILED_TO_WRITE;
} }

View file

@ -177,6 +177,9 @@ void simple_archiver_print_usage(void) {
fprintf(stderr, fprintf(stderr,
"--chunk-min-size <bytes> : v1 file format minimum chunk size " "--chunk-min-size <bytes> : v1 file format minimum chunk size "
"(default 4194304 or 4MiB)\n"); "(default 4194304 or 4MiB)\n");
fprintf(stderr,
"--pre-sort-files : pre-sorts files by size so that the first file "
"is the largest\n");
fprintf(stderr, fprintf(stderr,
"-- : specifies remaining arguments are files to archive/extract\n"); "-- : specifies remaining arguments are files to archive/extract\n");
fprintf( fprintf(
@ -344,6 +347,8 @@ int simple_archiver_parse_args(int argc, const char **argv,
} }
--argc; --argc;
++argv; ++argv;
} else if (strcmp(argv[0], "--pre-sort-files") == 0) {
out->flags |= 0x40;
} else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) { } else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) {
is_remaining_args = 1; is_remaining_args = 1;
} else if (argv[0][0] != '-') { } else if (argv[0][0] != '-') {

View file

@ -35,6 +35,7 @@ typedef struct SDArchiverParsed {
/// 0b xxxx 1xxx - Allow extract overwrite. /// 0b xxxx 1xxx - Allow extract overwrite.
/// 0b xxx1 xxxx - Create archive to stdout or read archive from stdin. /// 0b xxx1 xxxx - Create archive to stdout or read archive from stdin.
/// 0b xx1x xxxx - Do not save absolute paths for symlinks. /// 0b xx1x xxxx - Do not save absolute paths for symlinks.
/// 0b x1xx xxxx - Sort files by size before archiving.
uint32_t flags; uint32_t flags;
/// Null-terminated string. /// Null-terminated string.
char *filename; char *filename;