WIP combine Vulkan filter and min_max to cmd buf

This commit does some preparation for a new Vulkan compute shader
"blue_noise_filter.glsl". Note that every call of "filter" is followed
by a call to "min_max". The goal is to combine a single invocation of
Vulkan "filter" and log(n) invocations of Vulkan "min_max" in the same
command buffer, which may help with performance. This will be achieved
by passing the "max_in_buf" to the new "filter" compute shader, which
will hold the results of applying the precomputed-gaussian. This buffer
will then be copied to "min_in_buf", and then all is set to call the
Vulkan "min_max" compute shader log(n) times.

Note that log(n) comes from the fact that the Vulkan "min_max" compute
shader does a "reduce" on the input buffers where each SIMD invocation
compares two values and reduces it to 1. Doing this approximately log(n)
(log base 2) times will reduce the input gradually into a single minimum
and single maximum. This works due to having two separate "layouts" for
the same "min_max" shader where the "in" and "out" buffers are swapped
per "layout", and so by calling the other layout each time ensures that
the proper buffers are reduced. (This work has already been done. What's
left is to combine the "filter" and "min_max" Vulkan compute shaders
into the same Vulkan command buffer. But first, the actual setup for the
new Vulkan "filter" compute shader still has some work to do.)
This commit is contained in:
Stephen Seo 2024-04-10 17:43:24 +09:00
parent 728d872af4
commit dfc78540db
3 changed files with 301 additions and 3 deletions

View file

@ -1251,6 +1251,50 @@ image::Bl dither::blue_noise(int width, int height, int threads,
&compute_desc_set_layout);
}
VkDescriptorSetLayout filter_in_out_layout;
utility::Cleanup filter_in_out_layout_cleanup{};
{
std::array<VkDescriptorSetLayoutBinding, 3> compute_layout_bindings{};
compute_layout_bindings[0].binding = 0;
compute_layout_bindings[0].descriptorCount = 1;
compute_layout_bindings[0].descriptorType =
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
compute_layout_bindings[0].pImmutableSamplers = nullptr;
compute_layout_bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
compute_layout_bindings[1].binding = 1;
compute_layout_bindings[1].descriptorCount = 1;
compute_layout_bindings[1].descriptorType =
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
compute_layout_bindings[1].pImmutableSamplers = nullptr;
compute_layout_bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
compute_layout_bindings[2].binding = 2;
compute_layout_bindings[2].descriptorCount = 1;
compute_layout_bindings[2].descriptorType =
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
compute_layout_bindings[2].pImmutableSamplers = nullptr;
compute_layout_bindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
VkDescriptorSetLayoutCreateInfo layout_info{};
layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
layout_info.bindingCount = compute_layout_bindings.size();
layout_info.pBindings = compute_layout_bindings.data();
if (vkCreateDescriptorSetLayout(device, &layout_info, nullptr,
&filter_in_out_layout) != VK_SUCCESS) {
std::clog << "WARNING: Failed to create compute descriptor set layout "
"(filter_in_out)!\n";
goto ENDOF_VULKAN;
}
filter_in_out_layout_cleanup = utility::Cleanup(
[device](void *ptr) {
vkDestroyDescriptorSetLayout(
device, *((VkDescriptorSetLayout *)ptr), nullptr);
},
&filter_in_out_layout);
}
std::array<VkDescriptorSetLayout, 2> minmax_desc_set_layouts{};
utility::Cleanup cleanup_minmax_compute_desc_set_layout{};
{
@ -1344,6 +1388,31 @@ image::Bl dither::blue_noise(int width, int height, int threads,
goto ENDOF_VULKAN;
}
std::array<const char *, 3> filter_in_out_filenames{
"blue_noise_filter.glsl", "src/blue_noise_filter.glsl",
"../src/blue_noise_filter.glsl"};
success = false;
for (const auto filename : filter_in_out_filenames) {
std::ifstream ifs(filename);
if (ifs.good()) {
ifs.close();
std::string command(
"glslc -fshader-stage=compute -o compute_filter.spv ");
command.append(filename);
if (std::system(command.c_str()) != 0) {
std::clog << "WARNING: Failed to compile " << filename << "!\n";
goto ENDOF_VULKAN;
} else {
success = true;
break;
}
}
}
if (!success) {
std::clog << "WARNING: Could not find blue_noise_filter.glsl!\n";
goto ENDOF_VULKAN;
}
std::array<const char *, 3> minmax_filenames{
"blue_noise_minmax.glsl", "src/blue_noise_minmax.glsl",
"../src/blue_noise_minmax.glsl"};
@ -1455,6 +1524,94 @@ image::Bl dither::blue_noise(int width, int height, int threads,
&compute_pipeline);
}
VkPipelineLayout filter_in_out_pipeline_layout;
VkPipeline filter_in_out_pipeline;
utility::Cleanup cleanup_filter_in_out_pipeline_layout{};
utility::Cleanup cleanup_filter_in_out_pipeline{};
{
// Load shader.
std::vector<char> shader;
{
std::ifstream ifs("compute_filter.spv");
if (!ifs.good()) {
std::clog << "WARNING: Failed to find compute.spv!\n";
goto ENDOF_VULKAN;
}
ifs.seekg(0, std::ios_base::end);
auto size = ifs.tellg();
shader.resize(size);
ifs.seekg(0);
ifs.read(shader.data(), size);
ifs.close();
}
VkShaderModuleCreateInfo shader_module_create_info{};
shader_module_create_info.sType =
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_module_create_info.codeSize = shader.size();
shader_module_create_info.pCode =
reinterpret_cast<const uint32_t *>(shader.data());
VkShaderModule compute_shader_module;
if (vkCreateShaderModule(device, &shader_module_create_info, nullptr,
&compute_shader_module) != VK_SUCCESS) {
std::clog
<< "WARNING: Failed to create shader module (filter_in_out)!\n";
goto ENDOF_VULKAN;
}
utility::Cleanup cleanup_shader_module(
[device](void *ptr) {
vkDestroyShaderModule(device, *((VkShaderModule *)ptr), nullptr);
},
&compute_shader_module);
VkPipelineShaderStageCreateInfo shader_stage_info{};
shader_stage_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage_info.stage = VK_SHADER_STAGE_COMPUTE_BIT;
shader_stage_info.module = compute_shader_module;
shader_stage_info.pName = "main";
VkPipelineLayoutCreateInfo pipeline_layout_info{};
pipeline_layout_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipeline_layout_info.setLayoutCount = 1;
pipeline_layout_info.pSetLayouts = &filter_in_out_layout;
if (vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr,
&filter_in_out_pipeline_layout) !=
VK_SUCCESS) {
std::clog
<< "WARNING: Failed to create pipeline layout (filter_in_out)!\n";
goto ENDOF_VULKAN;
}
cleanup_filter_in_out_pipeline_layout = utility::Cleanup(
[device](void *ptr) {
vkDestroyPipelineLayout(device, *((VkPipelineLayout *)ptr),
nullptr);
},
&filter_in_out_pipeline_layout);
VkComputePipelineCreateInfo pipeline_info{};
pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pipeline_info.layout = filter_in_out_pipeline_layout;
pipeline_info.stage = shader_stage_info;
if (vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipeline_info,
nullptr,
&filter_in_out_pipeline) != VK_SUCCESS) {
std::clog << "WARNING: Failed to create pipeline (filter_in_out)!\n";
goto ENDOF_VULKAN;
}
cleanup_filter_in_out_pipeline = utility::Cleanup(
[device](void *ptr) {
vkDestroyPipeline(device, *((VkPipeline *)ptr), nullptr);
},
&filter_in_out_pipeline);
}
VkPipelineLayout minmax_compute_pipeline_layout;
VkPipeline minmax_compute_pipeline;
utility::Cleanup cleanup_minmax_pipeline_layout{};
@ -1874,6 +2031,34 @@ image::Bl dither::blue_noise(int width, int height, int threads,
&descriptor_pool);
}
VkDescriptorPool filter_in_out_desc_pool;
utility::Cleanup cleanup_filter_in_out_desc_pool{};
{
VkDescriptorPoolSize pool_size{};
pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
pool_size.descriptorCount = 3;
VkDescriptorPoolCreateInfo pool_info{};
pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_info.poolSizeCount = 1;
pool_info.pPoolSizes = &pool_size;
pool_info.maxSets = 1;
if (vkCreateDescriptorPool(device, &pool_info, nullptr,
&filter_in_out_desc_pool) != VK_SUCCESS) {
std::clog
<< "WARNING: Failed to create descriptor pool (filter_in_out)!\n";
goto ENDOF_VULKAN;
}
cleanup_filter_in_out_desc_pool = utility::Cleanup(
[device](void *ptr) {
vkDestroyDescriptorPool(device, *((VkDescriptorPool *)ptr),
nullptr);
},
&filter_in_out_desc_pool);
}
VkDescriptorPool minmax_descriptor_pool;
utility::Cleanup cleanup_minmax_descriptor_pool{};
{
@ -1971,6 +2156,62 @@ image::Bl dither::blue_noise(int width, int height, int threads,
descriptor_writes.data(), 0, nullptr);
}
VkDescriptorSet filter_in_out_desc_set;
{
VkDescriptorSetAllocateInfo alloc_info{};
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &filter_in_out_layout;
if (vkAllocateDescriptorSets(device, &alloc_info,
&filter_in_out_desc_set) != VK_SUCCESS) {
std::clog << "WARNING: Failed to allocate descriptor set!\n";
goto ENDOF_VULKAN;
}
std::array<VkWriteDescriptorSet, 3> descriptor_writes{};
descriptor_writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writes[0].dstSet = filter_in_out_desc_set;
descriptor_writes[0].dstBinding = 0;
descriptor_writes[0].dstArrayElement = 0;
descriptor_writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
descriptor_writes[0].descriptorCount = 1;
VkDescriptorBufferInfo precomputed_info{};
precomputed_info.buffer = precomputed_buf;
precomputed_info.offset = 0;
precomputed_info.range = VK_WHOLE_SIZE;
descriptor_writes[0].pBufferInfo = &precomputed_info;
descriptor_writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writes[1].dstSet = filter_in_out_desc_set;
descriptor_writes[1].dstBinding = 1;
descriptor_writes[1].dstArrayElement = 0;
descriptor_writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
descriptor_writes[1].descriptorCount = 1;
VkDescriptorBufferInfo max_in_buf_info{};
precomputed_info.buffer = max_in_buf;
precomputed_info.offset = 0;
precomputed_info.range = VK_WHOLE_SIZE;
descriptor_writes[1].pBufferInfo = &max_in_buf_info;
descriptor_writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptor_writes[2].dstSet = filter_in_out_desc_set;
descriptor_writes[2].dstBinding = 2;
descriptor_writes[2].dstArrayElement = 0;
descriptor_writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
descriptor_writes[2].descriptorCount = 1;
VkDescriptorBufferInfo other_info{};
precomputed_info.buffer = other_buf;
precomputed_info.offset = 0;
precomputed_info.range = VK_WHOLE_SIZE;
descriptor_writes[2].pBufferInfo = &other_info;
vkUpdateDescriptorSets(device, descriptor_writes.size(),
descriptor_writes.data(), 0, nullptr);
}
std::array<VkDescriptorSet, 2> minmax_compute_desc_sets;
{
VkDescriptorSetAllocateInfo alloc_info{};

View file

@ -70,12 +70,12 @@ void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool,
void vulkan_copy_buffer_pieces(
VkDevice device, VkCommandPool command_pool, VkQueue queue,
VkBuffer src_buf, VkBuffer dst_buf,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize>> &pieces);
void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory);
void vulkan_flush_buffer_pieces(
VkDevice device, const VkDeviceSize phys_atom_size, VkDeviceMemory memory,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize>> &pieces);
void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory);
std::vector<unsigned int> blue_noise_vulkan_impl(
@ -128,7 +128,7 @@ inline bool vulkan_get_filter(
// Copy pbp buffer.
if (changed != nullptr && changed->size() > 0) {
std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > pieces;
std::vector<std::tuple<VkDeviceSize, VkDeviceSize>> pieces;
for (auto idx : *changed) {
pieces.emplace_back(std::make_tuple(sizeof(int), idx * sizeof(int)));
}

View file

@ -0,0 +1,57 @@
#version 450
struct FloatAndIndex {
float value;
int pbp;
int idx;
};
int twoToOne(int x, int y, int width, int height) {
while (x < 0) {
x += width;
}
while (y < 0) {
y += height;
}
x = x % width;
y = y % height;
return x + y * width;
}
layout(binding = 0) readonly buffer PreComputed { float precomputed[]; };
layout(binding = 1) writeonly buffer FilterInOut {
FloatAndIndex filter_in_out[];
};
layout(binding = 2) readonly buffer Other {
int width;
int height;
int filter_size;
};
layout(local_size_x = 256) in;
void main() {
uint index = gl_GlobalInvocationID.x;
if (index >= width * height) {
return;
}
filter_in_out[index].idx = index;
int x = int(index % width);
int y = int(index / width);
filter_in_out[index].value = 0.0F;
for (int q = 0; q < filter_size; ++q) {
int q_prime = height - filter_size / 2 + y + q;
for (int p = 0; p < filter_size; ++p) {
int p_prime = width - filter_size / 2 + x + p;
if (filter_in_out[twoToOne(p_prime, q_prime, width, height)].pbp != 0) {
filter_in_out[index].value +=
precomputed[twoToOne(p, q, filter_size, filter_size)];
}
}
}
}