]> git.seodisparate.com - blue_noise_generation/commitdiff
Attempt to optimize vulkan compute
authorStephen Seo <seo.disparate@gmail.com>
Mon, 25 Mar 2024 07:53:42 +0000 (16:53 +0900)
committerStephen Seo <seo.disparate@gmail.com>
Mon, 25 Mar 2024 07:53:42 +0000 (16:53 +0900)
src/blue_noise.cpp
src/blue_noise.hpp

index 4f9ee9026b9dc4744595b54466707319aecc55e1..ca4c7148744fa256c0820c62c4b5a17159fce971 100644 (file)
@@ -170,58 +170,84 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
       (std::size_t)std::ceil((float)size / (float)local_size);
 
   std::vector<bool> pbp = random_noise(size, pixel_count);
-  std::vector<int> pbp_i(pbp.size());
-  std::vector<float> filter(size);
   bool reversed_pbp = false;
 
-  const auto get_filter = [device, phys_device, command_buffer, command_pool,
-                           queue, pbp_buf, pipeline, pipeline_layout,
-                           descriptor_set, filter_out_buf, size, &pbp, &pbp_i,
-                           &reversed_pbp, global_size, &filter]() -> bool {
+  VkBuffer staging_pbp_buffer;
+  VkDeviceMemory staging_pbp_buffer_mem;
+  void *pbp_mapped;
+  if (!internal::vulkan_create_buffer(device, phys_device, size * sizeof(int),
+                                      VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                      staging_pbp_buffer,
+                                      staging_pbp_buffer_mem)) {
+    std::clog << "get_filter ERROR: Failed to create staging pbp buffer!\n";
+    return {};
+  }
+  utility::Cleanup cleanup_staging_pbp_buf(
+      [device](void *ptr) {
+        vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
+      },
+      &staging_pbp_buffer);
+  utility::Cleanup cleanup_staging_pbp_buf_mem(
+      [device](void *ptr) {
+        vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
+      },
+      &staging_pbp_buffer_mem);
+  vkMapMemory(device, staging_pbp_buffer_mem, 0, size * sizeof(int), 0,
+              &pbp_mapped);
+  utility::Cleanup cleanup_pbp_mapped(
+      [device](void *ptr) { vkUnmapMemory(device, *((VkDeviceMemory  *)ptr)); },
+      &staging_pbp_buffer_mem);
+  int *pbp_mapped_int = (int *)pbp_mapped;
+
+  VkBuffer staging_filter_buffer;
+  VkDeviceMemory staging_filter_buffer_mem;
+  void *filter_mapped;
+  if (!internal::vulkan_create_buffer(device, phys_device, size * sizeof(int),
+                                      VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+                                      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+                                      staging_filter_buffer,
+                                      staging_filter_buffer_mem)) {
+    std::clog << "get_filter ERROR: Failed to create staging pbp buffer!\n";
+    return {};
+  }
+  utility::Cleanup cleanup_staging_filter_buf(
+      [device](void *ptr) {
+        vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
+      },
+      &staging_filter_buffer);
+  utility::Cleanup cleanup_staging_filter_buf_mem(
+      [device](void *ptr) {
+        vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
+      },
+      &staging_filter_buffer_mem);
+  vkMapMemory(device, staging_filter_buffer_mem, 0, size * sizeof(float), 0,
+              &filter_mapped);
+  utility::Cleanup cleanup_filter_mapped(
+      [device](void *ptr) { vkUnmapMemory(device, *((VkDeviceMemory  *)ptr)); },
+      &staging_filter_buffer_mem);
+  float *filter_mapped_float = (float *)filter_mapped;
+
+  const auto get_filter = [device, command_buffer, command_pool, queue, pbp_buf,
+                           pipeline, pipeline_layout, descriptor_set,
+                           filter_out_buf, size, &pbp, &reversed_pbp,
+                           global_size, pbp_mapped_int, staging_pbp_buffer,
+                           staging_filter_buffer]() -> bool {
+    vkResetCommandBuffer(command_buffer, 0);
+
     for (unsigned int i = 0; i < pbp.size(); ++i) {
       if (reversed_pbp) {
-        pbp_i[i] = pbp[i] ? 0 : 1;
+        pbp_mapped_int[i] = pbp[i] ? 0 : 1;
       } else {
-        pbp_i[i] = pbp[i] ? 1 : 0;
+        pbp_mapped_int[i] = pbp[i] ? 1 : 0;
       }
     }
 
-    vkResetCommandBuffer(command_buffer, 0);
-
     // Copy pbp buffer.
-    {
-      VkBuffer staging_buffer;
-      VkDeviceMemory staging_buffer_mem;
-
-      if (!internal::vulkan_create_buffer(
-              device, phys_device, size * sizeof(int),
-              VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
-              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-              staging_buffer, staging_buffer_mem)) {
-        std::clog << "get_filter ERROR: Failed to create staging buffer!\n";
-        return false;
-      }
-      utility::Cleanup cleanup_staging_buf(
-          [device](void *ptr) {
-            vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
-          },
-          &staging_buffer);
-      utility::Cleanup cleanup_staging_buf_mem(
-          [device](void *ptr) {
-            vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
-          },
-          &staging_buffer_mem);
-
-      void *data_ptr;
-      vkMapMemory(device, staging_buffer_mem, 0, size * sizeof(int), 0,
-                  &data_ptr);
-      std::memcpy(data_ptr, pbp_i.data(), size * sizeof(int));
-      vkUnmapMemory(device, staging_buffer_mem);
-
-      vulkan_copy_buffer(device, command_pool, queue, staging_buffer, pbp_buf,
-                         size * sizeof(int));
-    }
+    vulkan_copy_buffer(device, command_pool, queue, staging_pbp_buffer, pbp_buf,
+                       size * sizeof(int));
 
     VkCommandBufferBeginInfo begin_info{};
     begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@@ -263,39 +289,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     }
 
     // Copy back filter_out buffer.
-    {
-      VkBuffer staging_buffer;
-      VkDeviceMemory staging_buffer_mem;
-
-      if (!internal::vulkan_create_buffer(
-              device, phys_device, size * sizeof(float),
-              VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-              staging_buffer, staging_buffer_mem)) {
-        std::clog << "get_filter ERROR: Failed to create staging buffer!\n";
-        return false;
-      }
-      utility::Cleanup cleanup_staging_buf(
-          [device](void *ptr) {
-            vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
-          },
-          &staging_buffer);
-      utility::Cleanup cleanup_staging_buf_mem(
-          [device](void *ptr) {
-            vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
-          },
-          &staging_buffer_mem);
-
-      vulkan_copy_buffer(device, command_pool, queue, filter_out_buf,
-                         staging_buffer, size * sizeof(float));
-
-      void *data_ptr;
-      vkMapMemory(device, staging_buffer_mem, 0, size * sizeof(float), 0,
-                  &data_ptr);
-      std::memcpy(filter.data(), data_ptr, size * sizeof(float));
-      vkUnmapMemory(device, staging_buffer_mem);
-    }
+    vulkan_copy_buffer(device, command_pool, queue, filter_out_buf,
+                       staging_filter_buffer, size * sizeof(float));
 
     return true;
   };
@@ -322,7 +317,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     std::cerr << "Vulkan: Failed to execute get_filter at start!\n";
   } else {
 #ifndef NDEBUG
-    internal::write_filter(filter, width, "filter_out_start.pgm");
+    internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
+                           "filter_out_start.pgm");
 #endif
   }
 
@@ -340,7 +336,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     }
 
     int min, max;
-    std::tie(min, max) = internal::filter_minmax(filter, pbp);
+    std::tie(min, max) =
+        internal::filter_minmax_raw_array(filter_mapped_float, size, pbp);
 
     pbp[max] = false;
 
@@ -351,7 +348,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
 
     // get second buffer's min
     int second_min;
-    std::tie(second_min, std::ignore) = internal::filter_minmax(filter, pbp);
+    std::tie(second_min, std::ignore) =
+        internal::filter_minmax_raw_array(filter_mapped_float, size, pbp);
 
     if (second_min == max) {
       pbp[max] = true;
@@ -383,7 +381,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     std::cerr << "Vulkan: Failed to execute do_filter (at end)\n";
   } else {
 #ifndef NDEBUG
-    internal::write_filter(filter, width, "filter_out_final.pgm");
+    internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
+                           "filter_out_final.pgm");
     FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
     fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
     for (int y = 0; y < height; ++y) {
@@ -418,7 +417,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
       std::cout << i << ' ';
 #endif
       get_filter();
-      std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp);
+      std::tie(std::ignore, max) =
+          internal::filter_minmax_raw_array(filter_mapped_float, size, pbp);
       pbp.at(max) = false;
       dither_array.at(max) = i;
 #ifndef NDEBUG
@@ -441,7 +441,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     std::cout << i << ' ';
 #endif
     get_filter();
-    std::tie(min, std::ignore) = internal::filter_minmax(filter, pbp);
+    std::tie(min, std::ignore) =
+        internal::filter_minmax_raw_array(filter_mapped_float, size, pbp);
     pbp.at(min) = true;
     dither_array.at(min) = i;
 #ifndef NDEBUG
@@ -457,7 +458,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     image::Bl min_pixels = internal::rangeToBl(dither_array, width);
     min_pixels.writeToFile(image::file_type::PNG, true, "da_mid_pixels.png");
     get_filter();
-    internal::write_filter(filter, width, "filter_mid.pgm");
+    internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
+                           "filter_mid.pgm");
     image::Bl pbp_image = toBl(pbp, width);
     pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_mid.png");
   }
@@ -469,7 +471,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
     std::cout << i << ' ';
 #endif
     get_filter();
-    std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp);
+    std::tie(std::ignore, max) =
+        internal::filter_minmax_raw_array(filter_mapped_float, size, pbp);
     pbp.at(max) = true;
     dither_array.at(max) = i;
 #ifndef NDEBUG
@@ -485,7 +488,8 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
 #ifndef NDEBUG
   {
     get_filter();
-    internal::write_filter(filter, width, "filter_after.pgm");
+    internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
+                           "filter_after.pgm");
     image::Bl pbp_image = toBl(pbp, width);
     pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_after.png");
   }
@@ -493,6 +497,14 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
 
   return dither_array;
 }
+std::vector<float> dither::internal::vulkan_buf_to_vec(float *mapped,
+                                                       unsigned int size) {
+  std::vector<float> v(size);
+
+  std::memcpy(v.data(), mapped, size * sizeof(float));
+
+  return v;
+}
 
 #endif  // DITHERING_VULKAN_ENABLED == 1
 
index d64b0e5cd4635ad972209c96f3676f20d88ce0ce..882e6f1cca880ae845a74f55166207abe9456526 100644 (file)
@@ -66,6 +66,8 @@ std::vector<unsigned int> blue_noise_vulkan_impl(
     VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
     VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int width,
     const int height);
+
+std::vector<float> vulkan_buf_to_vec(float *mapped, unsigned int size);
 #endif
 
 #if DITHERING_OPENCL_ENABLED == 1
@@ -300,6 +302,42 @@ inline std::pair<int, int> filter_minmax(const std::vector<float> &filter,
   return {min_index, max_index};
 }
 
+inline std::pair<int, int> filter_minmax_raw_array(const float *const filter,
+                                                   unsigned int size,
+                                                   std::vector<bool> pbp) {
+  // ensure minority pixel is "true"
+  unsigned int count = 0;
+  for (bool value : pbp) {
+    if (value) {
+      ++count;
+    }
+  }
+  if (count * 2 >= pbp.size()) {
+    // std::cout << "MINMAX flip\n"; // DEBUG
+    for (unsigned int i = 0; i < pbp.size(); ++i) {
+      pbp[i] = !pbp[i];
+    }
+  }
+
+  float min = std::numeric_limits<float>::infinity();
+  float max = -std::numeric_limits<float>::infinity();
+  int min_index = -1;
+  int max_index = -1;
+
+  for (unsigned int i = 0; i < size; ++i) {
+    if (!pbp[i] && filter[i] < min) {
+      min_index = i;
+      min = filter[i];
+    }
+    if (pbp[i] && filter[i] > max) {
+      max_index = i;
+      max = filter[i];
+    }
+  }
+
+  return {min_index, max_index};
+}
+
 inline std::pair<int, int> filter_abs_minmax(const std::vector<float> &filter) {
   float min = std::numeric_limits<float>::infinity();
   float max = -std::numeric_limits<float>::infinity();