]> git.seodisparate.com - SimpleArchiver/commitdiff
Impl chunked_array for priority-heap refactoring
authorStephen Seo <seo.disparate@gmail.com>
Tue, 22 Apr 2025 03:13:31 +0000 (12:13 +0900)
committerStephen Seo <seo.disparate@gmail.com>
Tue, 22 Apr 2025 03:13:31 +0000 (12:13 +0900)
CMakeLists.txt
cosmopolitan/Makefile
src/data_structures/chunked_array.c [new file with mode: 0644]
src/data_structures/chunked_array.h [new file with mode: 0644]
src/data_structures/test.c

index c2afe3c202d7c261d15fac0e249eff3aa7ab0bec..dcd7d44388bc7ef2d6d3f49f2f9850a89bd8e7b9 100644 (file)
@@ -10,6 +10,7 @@ set(SimpleArchiver_SOURCES
     src/archiver.c
     src/data_structures/linked_list.c
     src/data_structures/hash_map.c
+    src/data_structures/chunked_array.c
     src/data_structures/priority_heap.c
     src/algorithms/linear_congruential_gen.c
     src/users.c
@@ -108,6 +109,7 @@ add_executable(test_datastructures
     src/data_structures/test.c
     src/data_structures/linked_list.c
     src/data_structures/hash_map.c
+    src/data_structures/chunked_array.c
     src/data_structures/priority_heap.c
     src/algorithms/linear_congruential_gen.c
 )
index 0ad84cad9db10a59388ab3c7a303955f379fc988..fba8b00de5b780eccb4db70dfa586af289e361a9 100644 (file)
@@ -15,6 +15,7 @@ SOURCES = \
                ../src/algorithms/linear_congruential_gen.c \
                ../src/data_structures/linked_list.c \
                ../src/data_structures/hash_map.c \
+               ../src/data_structures/chunked_array.c \
                ../src/data_structures/priority_heap.c \
                ../src/users.c
 
@@ -26,6 +27,7 @@ HEADERS = \
                ../src/algorithms/linear_congruential_gen.h \
                ../src/data_structures/linked_list.h \
                ../src/data_structures/hash_map.h \
+               ../src/data_structures/chunked_array.h \
                ../src/data_structures/priority_heap.h \
                ../src/platforms.h \
                ../src/users.h \
diff --git a/src/data_structures/chunked_array.c b/src/data_structures/chunked_array.c
new file mode 100644 (file)
index 0000000..7415291
--- /dev/null
@@ -0,0 +1,190 @@
+// ISC License
+//
+// Copyright (c) 2024-2025 Stephen Seo
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+//
+// `chunked_array.c` is the source for a chunked-array implementation.
+
+#include "chunked_array.h"
+
+// Standard library includes.
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef NDEBUG
+# include <stdio.h>
+# include <inttypes.h>
+#endif
+
+SDArchiverChunkedArr simple_archiver_chunked_array_init(
+    void (*elem_cleanup_fn)(void*), uint32_t elem_size) {
+
+  SDArchiverChunkedArr chunked_array =
+    (SDArchiverChunkedArr)
+      {.chunk_count=1,
+       .last_size=0,
+       .elem_size=elem_size,
+       .elem_cleanup_fn=elem_cleanup_fn,
+       .array=malloc(sizeof(void*))
+      };
+  chunked_array.array[0] =
+    malloc(elem_size * SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE);
+
+  return chunked_array;
+}
+
+void simple_archiver_chunked_array_cleanup(
+    SDArchiverChunkedArr *chunked_array) {
+
+  if (chunked_array->chunk_count == 0 || !chunked_array->array) {
+    return;
+  }
+
+  for (uint64_t idx = 0; idx < chunked_array->chunk_count; ++idx) {
+    if (idx + 1 != chunked_array->chunk_count) {
+      for (size_t inner_idx = 0;
+           inner_idx < SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE;
+           ++inner_idx) {
+
+        chunked_array->elem_cleanup_fn(
+          (char*)chunked_array->array[idx]
+          + inner_idx * chunked_array->elem_size);
+      }
+    } else {
+      for (size_t inner_idx = 0;
+           inner_idx < chunked_array->last_size;
+           ++inner_idx) {
+
+        chunked_array->elem_cleanup_fn(
+          (char*)chunked_array->array[idx]
+          + inner_idx * chunked_array->elem_size);
+      }
+    }
+    free(chunked_array->array[idx]);
+  }
+  free(chunked_array->array);
+  chunked_array->array = 0;
+  chunked_array->chunk_count = 0;
+}
+
+void *simple_archiver_chunked_array_at(SDArchiverChunkedArr *chunked_array,
+                                       uint64_t idx) {
+  if (chunked_array->chunk_count == 0 || !chunked_array->array) {
+    return 0;
+  }
+
+  const uint64_t chunk_idx = idx / SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE;
+  const uint64_t inner_idx = idx % SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE;
+
+  if (chunk_idx >= chunked_array->chunk_count) {
+    return 0;
+  } else if (chunk_idx + 1 == chunked_array->chunk_count
+             && inner_idx >= chunked_array->last_size) {
+    return 0;
+  }
+
+  return (char*)chunked_array->array[chunk_idx]
+           + inner_idx * chunked_array->elem_size;
+}
+
+int simple_archiver_chunked_array_push(SDArchiverChunkedArr *chunked_array,
+                                       void *to_copy) {
+  if (chunked_array->chunk_count == 0 || !chunked_array->array) {
+    return 1;
+  }
+
+  const uint64_t chunk_idx = chunked_array->chunk_count - 1;
+  const uint64_t inner_idx = chunked_array->last_size;
+
+  void *elem_ptr = (char*)chunked_array->array[chunk_idx]
+                     + inner_idx * chunked_array->elem_size;
+
+  memcpy(elem_ptr, to_copy, chunked_array->elem_size);
+
+  ++chunked_array->last_size;
+
+  if (chunked_array->last_size >= SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE) {
+    void **new_array = malloc(sizeof(void*) * (chunked_array->chunk_count + 1));
+    memcpy(new_array,
+           chunked_array->array,
+           chunked_array->chunk_count * sizeof(void*));
+
+    new_array[chunked_array->chunk_count] =
+      malloc(chunked_array->elem_size
+             * SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE);
+
+    ++chunked_array->chunk_count;
+    chunked_array->last_size = 0;
+
+    free(chunked_array->array);
+    chunked_array->array = new_array;
+  }
+
+  return 0;
+}
+
+void *simple_archiver_chunked_array_pop(SDArchiverChunkedArr *chunked_array) {
+  if (chunked_array->chunk_count == 0 || !chunked_array->array) {
+    return 0;
+  }
+
+  void *ret = malloc(chunked_array->elem_size);
+
+  uint64_t chunk_idx;
+  uint64_t inner_idx;
+
+  if (chunked_array->last_size == 0) {
+    if (chunked_array->chunk_count <= 1) {
+      free(ret);
+      return 0;
+    }
+
+    chunk_idx = chunked_array->chunk_count - 2;
+    inner_idx = SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE - 1;
+
+    void **new_array = malloc(sizeof(void*) * chunked_array->chunk_count - 1);
+    memcpy(new_array,
+           chunked_array->array,
+           sizeof(void*) * chunked_array->chunk_count - 1);
+    free(chunked_array->array[chunked_array->chunk_count - 1]);
+    free(chunked_array->array);
+    chunked_array->array = new_array;
+
+    --chunked_array->chunk_count;
+    chunked_array->last_size = SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE - 1;
+  } else {
+    chunk_idx = chunked_array->chunk_count - 1;
+    inner_idx = chunked_array->last_size - 1;
+
+    --chunked_array->last_size;
+  }
+
+  memcpy(ret,
+         (char*)chunked_array->array[chunk_idx]
+           + inner_idx * chunked_array->elem_size,
+         chunked_array->elem_size);
+
+  return ret;
+}
+
+void simple_archiver_chunked_array_clear(SDArchiverChunkedArr *chunked_array) {
+  if (chunked_array->chunk_count == 0 || !chunked_array->array) {
+    return;
+  }
+
+  void (*elem_cleanup)(void *) = chunked_array->elem_cleanup_fn;
+  uint32_t elem_size = chunked_array->elem_size;
+  simple_archiver_chunked_array_cleanup(chunked_array);
+  *chunked_array = simple_archiver_chunked_array_init(elem_cleanup, elem_size);
+}
diff --git a/src/data_structures/chunked_array.h b/src/data_structures/chunked_array.h
new file mode 100644 (file)
index 0000000..164dc78
--- /dev/null
@@ -0,0 +1,55 @@
+// ISC License
+//
+// Copyright (c) 2024-2025 Stephen Seo
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+// PERFORMANCE OF THIS SOFTWARE.
+//
+// `chunked_array.h` is the header for a chunked-array implementation.
+
+#ifndef SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_CHUNKED_ARRAY_H_
+#define SEODISPARATE_COM_SIMPLE_ARCHIVER_DATA_STRUCTURE_CHUNKED_ARRAY_H_
+
+// Standard library includes
+#include <stdint.h>
+#include <stddef.h>
+
+#define SD_SA_DS_CHUNKED_ARR_DEFAULT_CHUNK_SIZE 32
+#define SD_SA_DS_CHUNKED_ARR_DEFAULT_CAPACITY 4
+
+typedef struct SDArchiverChunkedArr {
+    uint64_t chunk_count;
+    uint32_t last_size;
+    uint32_t elem_size;
+    void (*elem_cleanup_fn)(void*);
+    void **array;
+} SDArchiverChunkedArr;
+
+SDArchiverChunkedArr simple_archiver_chunked_array_init(
+  void (*elem_cleanup_fn)(void*), uint32_t elem_size);
+
+void simple_archiver_chunked_array_cleanup(SDArchiverChunkedArr *);
+
+/// Returns non-void ptr to element on success.
+void *simple_archiver_chunked_array_at(SDArchiverChunkedArr *, uint64_t idx);
+
+/// Returns 0 on success.
+int simple_archiver_chunked_array_push(SDArchiverChunkedArr *, void *to_copy);
+
+/// Returns non-null on success.
+/// Returned ptr is newly allocated and must be free'd.
+void *simple_archiver_chunked_array_pop(SDArchiverChunkedArr *);
+
+/// Clears the chunked array so that it is as if it was newly initialized.
+void simple_archiver_chunked_array_clear(SDArchiverChunkedArr *);
+
+#endif
index 86be0e9733f0f9182488c67b12d2d4784b7969ff..fc0d9715dbc145e52afff84a5d05c2ffb2b16a1d 100644 (file)
@@ -27,6 +27,7 @@
 #include "../algorithms/linear_congruential_gen.h"
 #include "hash_map.h"
 #include "linked_list.h"
+#include "chunked_array.h"
 #include "priority_heap.h"
 
 #define SDARCHIVER_DS_TEST_HASH_MAP_ITER_SIZE 100
@@ -109,6 +110,21 @@ void test_iter_fn_priority_heap(void *data, void *user_data) {
   }
 }
 
+typedef struct TestStruct {
+  int *first;
+  int *second;
+} TestStruct;
+
+void cleanup_test_struct_fn(void *ptr) {
+  TestStruct *t = ptr;
+  if (t->first) {
+    free(t->first);
+  }
+  if (t->second) {
+    free(t->second);
+  }
+}
+
 int main(void) {
   puts("Begin data-structures unit test.");
   fflush(stdout);
@@ -296,6 +312,144 @@ int main(void) {
   //  }
   //}
 
+  // Test ChunkedArray.
+  {
+    // Test cleanup immediately after init.
+    SDArchiverChunkedArr chunked_array =
+      simple_archiver_chunked_array_init(no_free_fn,
+                                         sizeof(int));
+
+    simple_archiver_chunked_array_cleanup(&chunked_array);
+
+    // Test cleanup after pushing some values.
+    chunked_array =
+      simple_archiver_chunked_array_init(no_free_fn,
+                                         sizeof(int));
+
+    int value = 1;
+    CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &value) == 0);
+
+    value = 20;
+    CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &value) == 0);
+
+    value = 300;
+    CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &value) == 0);
+
+    int *int_ptr = simple_archiver_chunked_array_at(&chunked_array, 0);
+    CHECK_TRUE(int_ptr);
+    CHECK_TRUE(*int_ptr == 1);
+
+    int_ptr = simple_archiver_chunked_array_at(&chunked_array, 1);
+    CHECK_TRUE(int_ptr);
+    CHECK_TRUE(*int_ptr == 20);
+
+    int_ptr = simple_archiver_chunked_array_at(&chunked_array, 2);
+    CHECK_TRUE(int_ptr);
+    CHECK_TRUE(*int_ptr == 300);
+
+    int_ptr = simple_archiver_chunked_array_at(&chunked_array, 3);
+    CHECK_FALSE(int_ptr);
+
+    int_ptr = simple_archiver_chunked_array_at(&chunked_array, 4);
+    CHECK_FALSE(int_ptr);
+
+    simple_archiver_chunked_array_cleanup(&chunked_array);
+
+    // Test arbitrary data.
+    chunked_array =
+      simple_archiver_chunked_array_init(cleanup_test_struct_fn,
+                                         sizeof(TestStruct));
+    TestStruct t = (TestStruct){.first=malloc(sizeof(int)),
+                                .second=malloc(sizeof(int))};
+    *t.first = 100;
+    *t.second = 200;
+
+    CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &t) == 0);
+
+    t.first = malloc(sizeof(int));
+    *t.first = 3000;
+
+    t.second = malloc(sizeof(int));
+    *t.second = 4444;
+
+    CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &t) == 0);
+
+    TestStruct *t_ptr = simple_archiver_chunked_array_at(&chunked_array, 0);
+
+    CHECK_TRUE(t_ptr);
+    CHECK_TRUE(*t_ptr->first == 100);
+    CHECK_TRUE(*t_ptr->second == 200);
+
+    t_ptr = simple_archiver_chunked_array_at(&chunked_array, 1);
+
+    CHECK_TRUE(t_ptr);
+    CHECK_TRUE(*t_ptr->first == 3000);
+    CHECK_TRUE(*t_ptr->second == 4444);
+
+    CHECK_FALSE(simple_archiver_chunked_array_at(&chunked_array, 2));
+    CHECK_FALSE(simple_archiver_chunked_array_at(&chunked_array, 3));
+    CHECK_FALSE(simple_archiver_chunked_array_at(&chunked_array, 3090));
+
+    simple_archiver_chunked_array_cleanup(&chunked_array);
+
+    // Test push more than 32 elements.
+    chunked_array =
+      simple_archiver_chunked_array_init(no_free_fn,
+                                         sizeof(int));
+
+    for (int idx = 0; idx < 100; ++idx) {
+      value = idx;
+      CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &value) == 0);
+    }
+
+    for (int idx = 0; idx < 110; ++idx) {
+      int_ptr = simple_archiver_chunked_array_at(&chunked_array, idx);
+      if (idx < 100) {
+        CHECK_TRUE(int_ptr);
+        CHECK_TRUE(*int_ptr == idx);
+      } else {
+        CHECK_FALSE(int_ptr);
+      }
+    }
+
+    for (int idx = 100; idx-- > 0;) {
+      int_ptr = simple_archiver_chunked_array_pop(&chunked_array);
+      CHECK_TRUE(int_ptr);
+      CHECK_TRUE(*int_ptr == idx);
+      free(int_ptr);
+    }
+
+    for (int idx = 0; idx < 10; ++idx) {
+      int_ptr = simple_archiver_chunked_array_pop(&chunked_array);
+      CHECK_FALSE(int_ptr);
+    }
+
+    simple_archiver_chunked_array_cleanup(&chunked_array);
+
+    // Repeat test but use "clear" at end.
+    chunked_array =
+      simple_archiver_chunked_array_init(no_free_fn,
+                                         sizeof(int));
+
+    for (int idx = 0; idx < 100; ++idx) {
+      value = idx;
+      CHECK_TRUE(simple_archiver_chunked_array_push(&chunked_array, &value) == 0);
+    }
+
+    for (int idx = 0; idx < 110; ++idx) {
+      int_ptr = simple_archiver_chunked_array_at(&chunked_array, idx);
+      if (idx < 100) {
+        CHECK_TRUE(int_ptr);
+        CHECK_TRUE(*int_ptr == idx);
+      } else {
+        CHECK_FALSE(int_ptr);
+      }
+    }
+
+    simple_archiver_chunked_array_clear(&chunked_array);
+    simple_archiver_chunked_array_cleanup(&chunked_array);
+  }
+
   // Test PriorityHeap.
   {
     SDArchiverPHeap *priority_heap = simple_archiver_priority_heap_init();