From dfc78540db9b5d158f29291ffd543a39d3791e04 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Wed, 10 Apr 2024 17:43:24 +0900 Subject: [PATCH] WIP combine Vulkan filter and min_max to cmd buf This commit does some preparation for a new Vulkan compute shader "blue_noise_filter.glsl". Note that every call of "filter" is followed by a call to "min_max". The goal is to combine a single invocation of Vulkan "filter" and log(n) invocations of Vulkan "min_max" in the same command buffer, which may help with performance. This will be achieved by passing the "max_in_buf" to the new "filter" compute shader, which will hold the results of applying the precomputed-gaussian. This buffer will then be copied to "min_in_buf", and then all is set to call the Vulkan "min_max" compute shader log(n) times. Note that log(n) comes from the fact that the Vulkan "min_max" compute shader does a "reduce" on the input buffers where each SIMD invocation compares two values and reduces it to 1. Doing this approximately log(n) (log base 2) times will reduce the input gradually into a single minimum and single maximum. This works due to having two separate "layouts" for the same "min_max" shader where the "in" and "out" buffers are swapped per "layout", and so by calling the other layout each time ensures that the proper buffers are reduced. (This work has already been done. What's left is to combine the "filter" and "min_max" Vulkan compute shaders into the same Vulkan command buffer. But first, the actual setup for the new Vulkan "filter" compute shader still has some work to do.) --- src/blue_noise.cpp | 241 +++++++++++++++++++++++++++++++++++++ src/blue_noise.hpp | 6 +- src/blue_noise_filter.glsl | 57 +++++++++ 3 files changed, 301 insertions(+), 3 deletions(-) create mode 100644 src/blue_noise_filter.glsl diff --git a/src/blue_noise.cpp b/src/blue_noise.cpp index cf55962..d71496a 100644 --- a/src/blue_noise.cpp +++ b/src/blue_noise.cpp @@ -1251,6 +1251,50 @@ image::Bl dither::blue_noise(int width, int height, int threads, &compute_desc_set_layout); } + VkDescriptorSetLayout filter_in_out_layout; + utility::Cleanup filter_in_out_layout_cleanup{}; + { + std::array compute_layout_bindings{}; + compute_layout_bindings[0].binding = 0; + compute_layout_bindings[0].descriptorCount = 1; + compute_layout_bindings[0].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + compute_layout_bindings[0].pImmutableSamplers = nullptr; + compute_layout_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + compute_layout_bindings[1].binding = 1; + compute_layout_bindings[1].descriptorCount = 1; + compute_layout_bindings[1].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + compute_layout_bindings[1].pImmutableSamplers = nullptr; + compute_layout_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + compute_layout_bindings[2].binding = 2; + compute_layout_bindings[2].descriptorCount = 1; + compute_layout_bindings[2].descriptorType = + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + compute_layout_bindings[2].pImmutableSamplers = nullptr; + compute_layout_bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + VkDescriptorSetLayoutCreateInfo layout_info{}; + layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_info.bindingCount = compute_layout_bindings.size(); + layout_info.pBindings = compute_layout_bindings.data(); + + if (vkCreateDescriptorSetLayout(device, &layout_info, nullptr, + &filter_in_out_layout) != VK_SUCCESS) { + std::clog << "WARNING: Failed to create compute descriptor set layout " + "(filter_in_out)!\n"; + goto ENDOF_VULKAN; + } + filter_in_out_layout_cleanup = utility::Cleanup( + [device](void *ptr) { + vkDestroyDescriptorSetLayout( + device, *((VkDescriptorSetLayout *)ptr), nullptr); + }, + &filter_in_out_layout); + } + std::array minmax_desc_set_layouts{}; utility::Cleanup cleanup_minmax_compute_desc_set_layout{}; { @@ -1344,6 +1388,31 @@ image::Bl dither::blue_noise(int width, int height, int threads, goto ENDOF_VULKAN; } + std::array filter_in_out_filenames{ + "blue_noise_filter.glsl", "src/blue_noise_filter.glsl", + "../src/blue_noise_filter.glsl"}; + success = false; + for (const auto filename : filter_in_out_filenames) { + std::ifstream ifs(filename); + if (ifs.good()) { + ifs.close(); + std::string command( + "glslc -fshader-stage=compute -o compute_filter.spv "); + command.append(filename); + if (std::system(command.c_str()) != 0) { + std::clog << "WARNING: Failed to compile " << filename << "!\n"; + goto ENDOF_VULKAN; + } else { + success = true; + break; + } + } + } + if (!success) { + std::clog << "WARNING: Could not find blue_noise_filter.glsl!\n"; + goto ENDOF_VULKAN; + } + std::array minmax_filenames{ "blue_noise_minmax.glsl", "src/blue_noise_minmax.glsl", "../src/blue_noise_minmax.glsl"}; @@ -1455,6 +1524,94 @@ image::Bl dither::blue_noise(int width, int height, int threads, &compute_pipeline); } + VkPipelineLayout filter_in_out_pipeline_layout; + VkPipeline filter_in_out_pipeline; + utility::Cleanup cleanup_filter_in_out_pipeline_layout{}; + utility::Cleanup cleanup_filter_in_out_pipeline{}; + { + // Load shader. + std::vector shader; + { + std::ifstream ifs("compute_filter.spv"); + if (!ifs.good()) { + std::clog << "WARNING: Failed to find compute.spv!\n"; + goto ENDOF_VULKAN; + } + ifs.seekg(0, std::ios_base::end); + auto size = ifs.tellg(); + shader.resize(size); + + ifs.seekg(0); + ifs.read(shader.data(), size); + ifs.close(); + } + + VkShaderModuleCreateInfo shader_module_create_info{}; + shader_module_create_info.sType = + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_module_create_info.codeSize = shader.size(); + shader_module_create_info.pCode = + reinterpret_cast(shader.data()); + + VkShaderModule compute_shader_module; + if (vkCreateShaderModule(device, &shader_module_create_info, nullptr, + &compute_shader_module) != VK_SUCCESS) { + std::clog + << "WARNING: Failed to create shader module (filter_in_out)!\n"; + goto ENDOF_VULKAN; + } + + utility::Cleanup cleanup_shader_module( + [device](void *ptr) { + vkDestroyShaderModule(device, *((VkShaderModule *)ptr), nullptr); + }, + &compute_shader_module); + + VkPipelineShaderStageCreateInfo shader_stage_info{}; + shader_stage_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shader_stage_info.stage = VK_SHADER_STAGE_COMPUTE_BIT; + shader_stage_info.module = compute_shader_module; + shader_stage_info.pName = "main"; + + VkPipelineLayoutCreateInfo pipeline_layout_info{}; + pipeline_layout_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_layout_info.setLayoutCount = 1; + pipeline_layout_info.pSetLayouts = &filter_in_out_layout; + + if (vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr, + &filter_in_out_pipeline_layout) != + VK_SUCCESS) { + std::clog + << "WARNING: Failed to create pipeline layout (filter_in_out)!\n"; + goto ENDOF_VULKAN; + } + cleanup_filter_in_out_pipeline_layout = utility::Cleanup( + [device](void *ptr) { + vkDestroyPipelineLayout(device, *((VkPipelineLayout *)ptr), + nullptr); + }, + &filter_in_out_pipeline_layout); + + VkComputePipelineCreateInfo pipeline_info{}; + pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_info.layout = filter_in_out_pipeline_layout; + pipeline_info.stage = shader_stage_info; + + if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, + nullptr, + &filter_in_out_pipeline) != VK_SUCCESS) { + std::clog << "WARNING: Failed to create pipeline (filter_in_out)!\n"; + goto ENDOF_VULKAN; + } + cleanup_filter_in_out_pipeline = utility::Cleanup( + [device](void *ptr) { + vkDestroyPipeline(device, *((VkPipeline *)ptr), nullptr); + }, + &filter_in_out_pipeline); + } + VkPipelineLayout minmax_compute_pipeline_layout; VkPipeline minmax_compute_pipeline; utility::Cleanup cleanup_minmax_pipeline_layout{}; @@ -1874,6 +2031,34 @@ image::Bl dither::blue_noise(int width, int height, int threads, &descriptor_pool); } + VkDescriptorPool filter_in_out_desc_pool; + utility::Cleanup cleanup_filter_in_out_desc_pool{}; + { + VkDescriptorPoolSize pool_size{}; + pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + pool_size.descriptorCount = 3; + + VkDescriptorPoolCreateInfo pool_info{}; + pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_info.poolSizeCount = 1; + pool_info.pPoolSizes = &pool_size; + pool_info.maxSets = 1; + + if (vkCreateDescriptorPool(device, &pool_info, nullptr, + &filter_in_out_desc_pool) != VK_SUCCESS) { + std::clog + << "WARNING: Failed to create descriptor pool (filter_in_out)!\n"; + goto ENDOF_VULKAN; + } + + cleanup_filter_in_out_desc_pool = utility::Cleanup( + [device](void *ptr) { + vkDestroyDescriptorPool(device, *((VkDescriptorPool *)ptr), + nullptr); + }, + &filter_in_out_desc_pool); + } + VkDescriptorPool minmax_descriptor_pool; utility::Cleanup cleanup_minmax_descriptor_pool{}; { @@ -1971,6 +2156,62 @@ image::Bl dither::blue_noise(int width, int height, int threads, descriptor_writes.data(), 0, nullptr); } + VkDescriptorSet filter_in_out_desc_set; + { + VkDescriptorSetAllocateInfo alloc_info{}; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorSetCount = 1; + alloc_info.pSetLayouts = &filter_in_out_layout; + + if (vkAllocateDescriptorSets(device, &alloc_info, + &filter_in_out_desc_set) != VK_SUCCESS) { + std::clog << "WARNING: Failed to allocate descriptor set!\n"; + goto ENDOF_VULKAN; + } + + std::array descriptor_writes{}; + + descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_writes[0].dstSet = filter_in_out_desc_set; + descriptor_writes[0].dstBinding = 0; + descriptor_writes[0].dstArrayElement = 0; + descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_writes[0].descriptorCount = 1; + VkDescriptorBufferInfo precomputed_info{}; + precomputed_info.buffer = precomputed_buf; + precomputed_info.offset = 0; + precomputed_info.range = VK_WHOLE_SIZE; + descriptor_writes[0].pBufferInfo = &precomputed_info; + + descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_writes[1].dstSet = filter_in_out_desc_set; + descriptor_writes[1].dstBinding = 1; + descriptor_writes[1].dstArrayElement = 0; + descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_writes[1].descriptorCount = 1; + VkDescriptorBufferInfo max_in_buf_info{}; + precomputed_info.buffer = max_in_buf; + precomputed_info.offset = 0; + precomputed_info.range = VK_WHOLE_SIZE; + descriptor_writes[1].pBufferInfo = &max_in_buf_info; + + descriptor_writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_writes[2].dstSet = filter_in_out_desc_set; + descriptor_writes[2].dstBinding = 2; + descriptor_writes[2].dstArrayElement = 0; + descriptor_writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptor_writes[2].descriptorCount = 1; + VkDescriptorBufferInfo other_info{}; + precomputed_info.buffer = other_buf; + precomputed_info.offset = 0; + precomputed_info.range = VK_WHOLE_SIZE; + descriptor_writes[2].pBufferInfo = &other_info; + + vkUpdateDescriptorSets(device, descriptor_writes.size(), + descriptor_writes.data(), 0, nullptr); + } + std::array minmax_compute_desc_sets; { VkDescriptorSetAllocateInfo alloc_info{}; diff --git a/src/blue_noise.hpp b/src/blue_noise.hpp index 531fe57..c428e6f 100644 --- a/src/blue_noise.hpp +++ b/src/blue_noise.hpp @@ -70,12 +70,12 @@ void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool, void vulkan_copy_buffer_pieces( VkDevice device, VkCommandPool command_pool, VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf, - const std::vector > &pieces); + const std::vector> &pieces); void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory); void vulkan_flush_buffer_pieces( VkDevice device, const VkDeviceSize phys_atom_size, VkDeviceMemory memory, - const std::vector > &pieces); + const std::vector> &pieces); void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory); std::vector blue_noise_vulkan_impl( @@ -128,7 +128,7 @@ inline bool vulkan_get_filter( // Copy pbp buffer. if (changed != nullptr && changed->size() > 0) { - std::vector > pieces; + std::vector> pieces; for (auto idx : *changed) { pieces.emplace_back(std::make_tuple(sizeof(int), idx * sizeof(int))); } diff --git a/src/blue_noise_filter.glsl b/src/blue_noise_filter.glsl new file mode 100644 index 0000000..ce13e9e --- /dev/null +++ b/src/blue_noise_filter.glsl @@ -0,0 +1,57 @@ +#version 450 + +struct FloatAndIndex { + float value; + int pbp; + int idx; +}; + +int twoToOne(int x, int y, int width, int height) { + while (x < 0) { + x += width; + } + while (y < 0) { + y += height; + } + x = x % width; + y = y % height; + return x + y * width; +} + +layout(binding = 0) readonly buffer PreComputed { float precomputed[]; }; + +layout(binding = 1) writeonly buffer FilterInOut { + FloatAndIndex filter_in_out[]; +}; + +layout(binding = 2) readonly buffer Other { + int width; + int height; + int filter_size; +}; + +layout(local_size_x = 256) in; + +void main() { + uint index = gl_GlobalInvocationID.x; + if (index >= width * height) { + return; + } + + filter_in_out[index].idx = index; + + int x = int(index % width); + int y = int(index / width); + + filter_in_out[index].value = 0.0F; + for (int q = 0; q < filter_size; ++q) { + int q_prime = height - filter_size / 2 + y + q; + for (int p = 0; p < filter_size; ++p) { + int p_prime = width - filter_size / 2 + x + p; + if (filter_in_out[twoToOne(p_prime, q_prime, width, height)].pbp != 0) { + filter_in_out[index].value += + precomputed[twoToOne(p, q, filter_size, filter_size)]; + } + } + } +}