]> git.seodisparate.com - blue_noise_generation/commitdiff
Impl. Vulkan compute
authorStephen Seo <seo.disparate@gmail.com>
Mon, 25 Mar 2024 04:27:06 +0000 (13:27 +0900)
committerStephen Seo <seo.disparate@gmail.com>
Mon, 25 Mar 2024 04:27:06 +0000 (13:27 +0900)
Tests indicate Vulkan compute runs 2x slower than OpenCL, so there
probably is room for optimization.

src/blue_noise.cpp
src/blue_noise.glsl
src/blue_noise.hpp

index f2aeb7a06b4b6a42275cc947e91a85ced75410c1..4f9ee9026b9dc4744595b54466707319aecc55e1 100644 (file)
@@ -157,6 +157,343 @@ void dither::internal::vulkan_copy_buffer(VkDevice device,
   vkFreeCommandBuffers(device, command_pool, 1, &command_buf);
 }
 
+std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
+    VkDevice device, VkPhysicalDevice phys_device,
+    VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
+    VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
+    VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int width,
+    const int height) {
+  const int size = width * height;
+  const int pixel_count = size * 4 / 10;
+  const int local_size = 256;
+  const std::size_t global_size =
+      (std::size_t)std::ceil((float)size / (float)local_size);
+
+  std::vector<bool> pbp = random_noise(size, pixel_count);
+  std::vector<int> pbp_i(pbp.size());
+  std::vector<float> filter(size);
+  bool reversed_pbp = false;
+
+  const auto get_filter = [device, phys_device, command_buffer, command_pool,
+                           queue, pbp_buf, pipeline, pipeline_layout,
+                           descriptor_set, filter_out_buf, size, &pbp, &pbp_i,
+                           &reversed_pbp, global_size, &filter]() -> bool {
+    for (unsigned int i = 0; i < pbp.size(); ++i) {
+      if (reversed_pbp) {
+        pbp_i[i] = pbp[i] ? 0 : 1;
+      } else {
+        pbp_i[i] = pbp[i] ? 1 : 0;
+      }
+    }
+
+    vkResetCommandBuffer(command_buffer, 0);
+
+    // Copy pbp buffer.
+    {
+      VkBuffer staging_buffer;
+      VkDeviceMemory staging_buffer_mem;
+
+      if (!internal::vulkan_create_buffer(
+              device, phys_device, size * sizeof(int),
+              VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+              staging_buffer, staging_buffer_mem)) {
+        std::clog << "get_filter ERROR: Failed to create staging buffer!\n";
+        return false;
+      }
+      utility::Cleanup cleanup_staging_buf(
+          [device](void *ptr) {
+            vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
+          },
+          &staging_buffer);
+      utility::Cleanup cleanup_staging_buf_mem(
+          [device](void *ptr) {
+            vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
+          },
+          &staging_buffer_mem);
+
+      void *data_ptr;
+      vkMapMemory(device, staging_buffer_mem, 0, size * sizeof(int), 0,
+                  &data_ptr);
+      std::memcpy(data_ptr, pbp_i.data(), size * sizeof(int));
+      vkUnmapMemory(device, staging_buffer_mem);
+
+      vulkan_copy_buffer(device, command_pool, queue, staging_buffer, pbp_buf,
+                         size * sizeof(int));
+    }
+
+    VkCommandBufferBeginInfo begin_info{};
+    begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+
+    if (vkBeginCommandBuffer(command_buffer, &begin_info) != VK_SUCCESS) {
+      std::clog << "get_filter ERROR: Failed to begin recording compute "
+                   "command buffer!\n";
+      return false;
+    }
+
+    vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+    vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+                            pipeline_layout, 0, 1, &descriptor_set, 0, nullptr);
+    vkCmdDispatch(command_buffer, global_size, 1, 1);
+    if (vkEndCommandBuffer(command_buffer) != VK_SUCCESS) {
+      std::clog
+          << "get_filter ERROR: Failed to record compute command buffer!\n";
+      return false;
+    }
+
+    {
+      VkSubmitInfo submit_info{};
+      submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+      submit_info.commandBufferCount = 1;
+      submit_info.pCommandBuffers = &command_buffer;
+      submit_info.signalSemaphoreCount = 0;
+      submit_info.pSignalSemaphores = nullptr;
+
+      if (vkQueueSubmit(queue, 1, &submit_info, nullptr) != VK_SUCCESS) {
+        std::clog
+            << "get_filter ERROR: Failed to submit compute command buffer!\n";
+        return false;
+      }
+    }
+
+    if (vkDeviceWaitIdle(device) != VK_SUCCESS) {
+      std::clog << "get_filter ERROR: Failed to vkDeviceWaitIdle!\n";
+      return false;
+    }
+
+    // Copy back filter_out buffer.
+    {
+      VkBuffer staging_buffer;
+      VkDeviceMemory staging_buffer_mem;
+
+      if (!internal::vulkan_create_buffer(
+              device, phys_device, size * sizeof(float),
+              VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+              VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+              staging_buffer, staging_buffer_mem)) {
+        std::clog << "get_filter ERROR: Failed to create staging buffer!\n";
+        return false;
+      }
+      utility::Cleanup cleanup_staging_buf(
+          [device](void *ptr) {
+            vkDestroyBuffer(device, *((VkBuffer *)ptr), nullptr);
+          },
+          &staging_buffer);
+      utility::Cleanup cleanup_staging_buf_mem(
+          [device](void *ptr) {
+            vkFreeMemory(device, *((VkDeviceMemory *)ptr), nullptr);
+          },
+          &staging_buffer_mem);
+
+      vulkan_copy_buffer(device, command_pool, queue, filter_out_buf,
+                         staging_buffer, size * sizeof(float));
+
+      void *data_ptr;
+      vkMapMemory(device, staging_buffer_mem, 0, size * sizeof(float), 0,
+                  &data_ptr);
+      std::memcpy(filter.data(), data_ptr, size * sizeof(float));
+      vkUnmapMemory(device, staging_buffer_mem);
+    }
+
+    return true;
+  };
+
+  {
+#ifndef NDEBUG
+    printf("Inserting %d pixels into image of max count %d\n", pixel_count,
+           size);
+    // generate image from randomized pbp
+    FILE *random_noise_image = fopen("random_noise.pbm", "w");
+    fprintf(random_noise_image, "P1\n%d %d\n", width, height);
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        fprintf(random_noise_image, "%d ",
+                pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0);
+      }
+      fputc('\n', random_noise_image);
+    }
+    fclose(random_noise_image);
+#endif
+  }
+
+  if (!get_filter()) {
+    std::cerr << "Vulkan: Failed to execute get_filter at start!\n";
+  } else {
+#ifndef NDEBUG
+    internal::write_filter(filter, width, "filter_out_start.pgm");
+#endif
+  }
+
+  int iterations = 0;
+
+  std::cout << "Begin BinaryArray generation loop\n";
+  while (true) {
+#ifndef NDEBUG
+    printf("Iteration %d\n", ++iterations);
+#endif
+
+    if (!get_filter()) {
+      std::cerr << "Vulkan: Failed to execute do_filter\n";
+      break;
+    }
+
+    int min, max;
+    std::tie(min, max) = internal::filter_minmax(filter, pbp);
+
+    pbp[max] = false;
+
+    if (!get_filter()) {
+      std::cerr << "Vulkan: Failed to execute do_filter\n";
+      break;
+    }
+
+    // get second buffer's min
+    int second_min;
+    std::tie(second_min, std::ignore) = internal::filter_minmax(filter, pbp);
+
+    if (second_min == max) {
+      pbp[max] = true;
+      break;
+    } else {
+      pbp[second_min] = true;
+    }
+
+    if (iterations % 100 == 0) {
+#ifndef NDEBUG
+      std::cout << "max was " << max << ", second_min is " << second_min
+                << std::endl;
+      // generate blue_noise image from pbp
+      FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
+      fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
+      for (int y = 0; y < height; ++y) {
+        for (int x = 0; x < width; ++x) {
+          fprintf(blue_noise_image, "%d ",
+                  pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0);
+        }
+        fputc('\n', blue_noise_image);
+      }
+      fclose(blue_noise_image);
+#endif
+    }
+  }
+
+  if (!get_filter()) {
+    std::cerr << "Vulkan: Failed to execute do_filter (at end)\n";
+  } else {
+#ifndef NDEBUG
+    internal::write_filter(filter, width, "filter_out_final.pgm");
+    FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
+    fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
+    for (int y = 0; y < height; ++y) {
+      for (int x = 0; x < width; ++x) {
+        fprintf(blue_noise_image, "%d ",
+                pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0);
+      }
+      fputc('\n', blue_noise_image);
+    }
+    fclose(blue_noise_image);
+#endif
+  }
+
+#ifndef NDEBUG
+  {
+    image::Bl pbp_image = toBl(pbp, width);
+    pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_before.png");
+  }
+#endif
+
+  std::cout << "Generating dither_array...\n";
+#ifndef NDEBUG
+  std::unordered_set<unsigned int> set;
+#endif
+  std::vector<unsigned int> dither_array(size, 0);
+  int min, max;
+  {
+    std::vector<bool> pbp_copy(pbp);
+    std::cout << "Ranking minority pixels...\n";
+    for (unsigned int i = pixel_count; i-- > 0;) {
+#ifndef NDEBUG
+      std::cout << i << ' ';
+#endif
+      get_filter();
+      std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp);
+      pbp.at(max) = false;
+      dither_array.at(max) = i;
+#ifndef NDEBUG
+      if (set.find(max) != set.end()) {
+        std::cout << "\nWARNING: Reusing index " << max << '\n';
+      } else {
+        set.insert(max);
+      }
+#endif
+    }
+    pbp = pbp_copy;
+#ifndef NDEBUG
+    image::Bl min_pixels = internal::rangeToBl(dither_array, width);
+    min_pixels.writeToFile(image::file_type::PNG, true, "da_min_pixels.png");
+#endif
+  }
+  std::cout << "\nRanking remainder of first half of pixels...\n";
+  for (unsigned int i = pixel_count; i < (unsigned int)((size + 1) / 2); ++i) {
+#ifndef NDEBUG
+    std::cout << i << ' ';
+#endif
+    get_filter();
+    std::tie(min, std::ignore) = internal::filter_minmax(filter, pbp);
+    pbp.at(min) = true;
+    dither_array.at(min) = i;
+#ifndef NDEBUG
+    if (set.find(min) != set.end()) {
+      std::cout << "\nWARNING: Reusing index " << min << '\n';
+    } else {
+      set.insert(min);
+    }
+#endif
+  }
+#ifndef NDEBUG
+  {
+    image::Bl min_pixels = internal::rangeToBl(dither_array, width);
+    min_pixels.writeToFile(image::file_type::PNG, true, "da_mid_pixels.png");
+    get_filter();
+    internal::write_filter(filter, width, "filter_mid.pgm");
+    image::Bl pbp_image = toBl(pbp, width);
+    pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_mid.png");
+  }
+#endif
+  std::cout << "\nRanking last half of pixels...\n";
+  reversed_pbp = true;
+  for (unsigned int i = (size + 1) / 2; i < (unsigned int)size; ++i) {
+#ifndef NDEBUG
+    std::cout << i << ' ';
+#endif
+    get_filter();
+    std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp);
+    pbp.at(max) = true;
+    dither_array.at(max) = i;
+#ifndef NDEBUG
+    if (set.find(max) != set.end()) {
+      std::cout << "\nWARNING: Reusing index " << max << '\n';
+    } else {
+      set.insert(max);
+    }
+#endif
+  }
+  std::cout << std::endl;
+
+#ifndef NDEBUG
+  {
+    get_filter();
+    internal::write_filter(filter, width, "filter_after.pgm");
+    image::Bl pbp_image = toBl(pbp, width);
+    pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_after.png");
+  }
+#endif
+
+  return dither_array;
+}
+
 #endif  // DITHERING_VULKAN_ENABLED == 1
 
 #include "image.hpp"
@@ -840,10 +1177,18 @@ image::Bl dither::blue_noise(int width, int height, int threads,
         goto ENDOF_VULKAN;
       }
     }
+
+    auto result = dither::internal::blue_noise_vulkan_impl(
+        device, phys_device, command_buffer, command_pool, compute_queue,
+        pbp_buf, compute_pipeline, compute_pipeline_layout,
+        compute_descriptor_set, filter_out_buf, width, height);
+    if (!result.empty()) {
+      return internal::rangeToBl(result, width);
+    }
+    std::cout << "ERROR: Empty result\n";
+    return {};
   }
 ENDOF_VULKAN:
-  std::clog << "TODO: Remove this once Vulkan support is implemented.\n";
-  return {};
 #else
   std::clog << "WARNING: Not compiled with Vulkan support!\n";
 #endif  // DITHERING_VULKAN_ENABLED == 1
index e060014450a07dcef271b0992ac1ca72d76a1f99..c2cd5584a9b14941be196e1e1f7a5f085671575e 100644 (file)
@@ -12,15 +12,13 @@ int twoToOne(int x, int y, int width, int height) {
   return x + y * width;
 }
 
-layout(std140, binding = 0) readonly buffer PreComputed {
-  float precomputed[];
-};
+layout(binding = 0) readonly buffer PreComputed { float precomputed[]; };
 
-layout(std140, binding = 1) writeonly buffer FilterOut { float filter_out[]; };
+layout(binding = 1) writeonly buffer FilterOut { float filter_out[]; };
 
-layout(std140, binding = 2) readonly buffer PBP { int pbp[]; };
+layout(binding = 2) readonly buffer PBP { int pbp[]; };
 
-layout(std140, binding = 3) readonly buffer Other {
+layout(binding = 3) readonly buffer Other {
   int width;
   int height;
   int filter_size;
index 3be540bb9ffdb87c259e8c885bf57c91ad2a1ee4..d64b0e5cd4635ad972209c96f3676f20d88ce0ce 100644 (file)
@@ -7,7 +7,6 @@
 #if DITHERING_VULKAN_ENABLED == 1
 #include <vulkan/vulkan.h>
 #endif
-#include <sys/sysinfo.h>
 
 #include <cassert>
 #include <chrono>
@@ -60,6 +59,13 @@ bool vulkan_create_buffer(VkDevice device, VkPhysicalDevice phys_dev,
 void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool,
                         VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf,
                         VkDeviceSize size);
+
+std::vector<unsigned int> blue_noise_vulkan_impl(
+    VkDevice device, VkPhysicalDevice phys_device,
+    VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
+    VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
+    VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int width,
+    const int height);
 #endif
 
 #if DITHERING_OPENCL_ENABLED == 1