]> git.seodisparate.com - SimpleArchiver/commitdiff
Impl. pre-sort files by size before put in chunks
authorStephen Seo <seo.disparate@gmail.com>
Tue, 8 Oct 2024 06:12:44 +0000 (15:12 +0900)
committerStephen Seo <seo.disparate@gmail.com>
Tue, 8 Oct 2024 06:12:44 +0000 (15:12 +0900)
CMakeLists.txt
Changelog.md
src/archiver.c
src/parser.c
src/parser.h

index 4b53330feb855738ee19aabd5c6ed1c65c68af83..31da10a54dc78e1d3b71694275e4244959a855dd 100644 (file)
@@ -87,4 +87,5 @@ add_executable(test_simplearchiver
     src/algorithms/linear_congruential_gen.c
     src/data_structures/linked_list.c
     src/data_structures/hash_map.c
+    src/data_structures/priority_heap.c
 )
index abd603a9b7b9ee0a021f36a3800baa9d1aec4823..0fe79abfe37f7d68890448f7df5195586b2b7505 100644 (file)
@@ -4,6 +4,9 @@
 
 More robust handling of de/compression process (handling SIGPIPE).
 
+Add option to pre-sort files by size so that the first files put in chunks are
+the largest.
+
 ## Version 1.0
 
 First release.
index ad2cbc5a73f07341850bd96ff5cf5f3276f1de77..b3634ab0a90ebc1903338861283c6930d4f5ab75 100644 (file)
@@ -38,6 +38,7 @@
 #include <unistd.h>
 #endif
 
+#include "data_structures/priority_heap.h"
 #include "helpers.h"
 
 #define TEMP_FILENAME_CMP "%s%ssimple_archiver_compressed_%lu.tmp"
@@ -1475,6 +1476,7 @@ int symlinks_and_files_from_files(void *data, void *ud) {
   SDArchiverLinkedList *symlinks_list = ptr_array[0];
   SDArchiverLinkedList *files_list = ptr_array[1];
   const char *user_cwd = ptr_array[2];
+  SDArchiverPHeap *pheap = ptr_array[3];
 
   if (file_info->filename) {
     if (file_info->link_dest) {
@@ -1561,8 +1563,14 @@ int symlinks_and_files_from_files(void *data, void *ud) {
         return 1;
       }
       file_info_struct->file_size = (uint64_t)ftell_ret;
-      simple_archiver_list_add(files_list, file_info_struct,
-                               free_internal_file_info);
+      if (pheap) {
+        simple_archiver_priority_heap_insert(
+            pheap, (int64_t)file_info_struct->file_size, file_info_struct,
+            free_internal_file_info);
+      } else {
+        simple_archiver_list_add(files_list, file_info_struct,
+                                 free_internal_file_info);
+      }
     }
   }
 
@@ -1594,6 +1602,8 @@ int files_to_chunk_count(void *data, void *ud) {
   return 0;
 }
 
+int greater_fn(int64_t a, int64_t b) { return a > b; }
+
 char *simple_archiver_error_to_string(enum SDArchiverStateReturns error) {
   switch (error) {
     case SDAS_SUCCESS:
@@ -1823,11 +1833,17 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
   SDArchiverLinkedList *symlinks_list = simple_archiver_list_init();
   __attribute__((cleanup(simple_archiver_list_free)))
   SDArchiverLinkedList *files_list = simple_archiver_list_init();
+  __attribute__((cleanup(simple_archiver_priority_heap_free)))
+  SDArchiverPHeap *files_pheap =
+      (state->parsed->flags & 0x40)
+          ? simple_archiver_priority_heap_init_less_fn(greater_fn)
+          : NULL;
 
-  ptr_array = malloc(sizeof(void *) * 3);
+  ptr_array = malloc(sizeof(void *) * 4);
   ptr_array[0] = symlinks_list;
   ptr_array[1] = files_list;
   ptr_array[2] = (void *)state->parsed->user_cwd;
+  ptr_array[3] = files_pheap;
 
   if (simple_archiver_list_get(filenames, symlinks_and_files_from_files,
                                ptr_array)) {
@@ -1836,6 +1852,15 @@ int simple_archiver_write_v1(FILE *out_f, SDArchiverState *state,
   }
   free(ptr_array);
 
+  if (files_pheap) {
+    while (files_pheap->size > 0) {
+      simple_archiver_list_add(files_list,
+                               simple_archiver_priority_heap_pop(files_pheap),
+                               free_internal_file_info);
+    }
+    simple_archiver_priority_heap_free(&files_pheap);
+  }
+
   if (fwrite("SIMPLE_ARCHIVE_VER", 1, 18, out_f) != 18) {
     return SDAS_FAILED_TO_WRITE;
   }
index 31a9ea841d3fb51a9579af8ca8600a048da98bb8..d1c11b4b3a648145806087d7654185cee127a3c7 100644 (file)
@@ -177,6 +177,9 @@ void simple_archiver_print_usage(void) {
   fprintf(stderr,
           "--chunk-min-size <bytes> : v1 file format minimum chunk size "
           "(default 4194304 or 4MiB)\n");
+  fprintf(stderr,
+          "--pre-sort-files : pre-sorts files by size so that the first file "
+          "is the largest\n");
   fprintf(stderr,
           "-- : specifies remaining arguments are files to archive/extract\n");
   fprintf(
@@ -344,6 +347,8 @@ int simple_archiver_parse_args(int argc, const char **argv,
         }
         --argc;
         ++argv;
+      } else if (strcmp(argv[0], "--pre-sort-files") == 0) {
+        out->flags |= 0x40;
       } else if (argv[0][0] == '-' && argv[0][1] == '-' && argv[0][2] == 0) {
         is_remaining_args = 1;
       } else if (argv[0][0] != '-') {
index c3734d0c6ad91af8e3c0f78dbbb5990727d3dba8..43d58f810bef02c5e6aecf21969347fa24fbe8a6 100644 (file)
@@ -35,6 +35,7 @@ typedef struct SDArchiverParsed {
   /// 0b xxxx 1xxx - Allow extract overwrite.
   /// 0b xxx1 xxxx - Create archive to stdout or read archive from stdin.
   /// 0b xx1x xxxx - Do not save absolute paths for symlinks.
+  /// 0b x1xx xxxx - Sort files by size before archiving.
   uint32_t flags;
   /// Null-terminated string.
   char *filename;