Compare commits

..

6 commits

Author SHA1 Message Date
Stephen Seo 728d872af4 Vulkan compute: move buffer init to before fn call 2024-04-03 18:23:46 +09:00
Stephen Seo 2e6f414baf Vulkan compute: resize max/min out buffers
Change max/min out buffers to have same size as max/min in buffers.
2024-04-03 18:22:14 +09:00
Stephen Seo 97cfcddfb3 Vulkan compute: minor refactoring 2024-04-03 18:22:14 +09:00
Stephen Seo 06115a7a2d Vulkan compute: combine all minmax calls
This commit combines the minmax execution via Vulkan compute. The
previous implementation executed compute in vulkan_minmax with a new
command buffer each time. This implementation combines all required
executions of compute in vulkan_minmax in a single command buffer and
uses a pipeline to ensure the enqueued compute calls stay in order.
2024-04-03 18:22:13 +09:00
Stephen Seo 52e6a09abd Do "minmax" on Vulkan compute
Was an attempt to speed up blue-noise-generation with Vulkan compute,
but this implementation seems to slow it down instead.
2024-04-03 18:20:22 +09:00
Stephen Seo ef7f623fb3 Optimize buffer writes in vulkan_get_filter calls
When only a single item in pbp buffer is changed, only update the single
item in the staging buffer and the device buffer prior to Vulkan compute
execution on buffers.
2024-04-03 17:55:37 +09:00
2 changed files with 204 additions and 65 deletions

View file

@ -161,6 +161,49 @@ void dither::internal::vulkan_copy_buffer(VkDevice device,
vkFreeCommandBuffers(device, command_pool, 1, &command_buf);
}
void dither::internal::vulkan_copy_buffer_pieces(
VkDevice device, VkCommandPool command_pool, VkQueue queue,
VkBuffer src_buf, VkBuffer dst_buf,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize>> &pieces) {
VkCommandBufferAllocateInfo alloc_info{};
alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
alloc_info.commandPool = command_pool;
alloc_info.commandBufferCount = 1;
VkCommandBuffer command_buf;
vkAllocateCommandBuffers(device, &alloc_info, &command_buf);
VkCommandBufferBeginInfo begin_info{};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
vkBeginCommandBuffer(command_buf, &begin_info);
std::vector<VkBufferCopy> regions;
for (auto tuple : pieces) {
VkBufferCopy copy_region{};
copy_region.size = std::get<0>(tuple);
copy_region.srcOffset = std::get<1>(tuple);
copy_region.dstOffset = std::get<1>(tuple);
regions.push_back(copy_region);
}
vkCmdCopyBuffer(command_buf, src_buf, dst_buf, regions.size(),
regions.data());
vkEndCommandBuffer(command_buf);
VkSubmitInfo submit_info{};
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &command_buf;
vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
vkQueueWaitIdle(queue);
vkFreeCommandBuffers(device, command_pool, 1, &command_buf);
}
void dither::internal::vulkan_flush_buffer(VkDevice device,
VkDeviceMemory memory) {
VkMappedMemoryRange range{};
@ -175,6 +218,40 @@ void dither::internal::vulkan_flush_buffer(VkDevice device,
}
}
void dither::internal::vulkan_flush_buffer_pieces(
VkDevice device, const VkDeviceSize phys_atom_size, VkDeviceMemory memory,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize>> &pieces) {
std::vector<VkMappedMemoryRange> ranges;
for (auto tuple : pieces) {
VkMappedMemoryRange range{};
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = memory;
range.offset = std::get<1>(tuple);
range.size = std::get<0>(tuple);
// TODO dynamically handle multiple pieces for more efficient flushes.
// This may not be necessary if pieces is always size 1.
if (range.offset % phys_atom_size != 0) {
range.offset = (range.offset / phys_atom_size) * phys_atom_size;
}
if (range.size < phys_atom_size) {
range.size = phys_atom_size;
} else if (range.size % phys_atom_size != 0) {
range.size = (range.size / phys_atom_size) * phys_atom_size;
}
ranges.push_back(range);
}
if (vkFlushMappedMemoryRanges(device, ranges.size(), ranges.data()) !=
VK_SUCCESS) {
std::clog << "WARNING: vulkan_flush_buffer failed!\n";
}
}
void dither::internal::vulkan_invalidate_buffer(VkDevice device,
VkDeviceMemory memory) {
VkMappedMemoryRange range{};
@ -267,6 +344,15 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
&staging_filter_buffer_mem);
float *filter_mapped_float = (float *)filter_mapped;
std::vector<std::size_t> changed_indices;
VkDeviceSize phys_atom_size;
{
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(phys_device, &props);
phys_atom_size = props.limits.nonCoherentAtomSize;
}
{
#ifndef NDEBUG
printf("Inserting %d pixels into image of max count %d\n", pixel_count,
@ -285,12 +371,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#endif
}
if (!vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer)) {
if (!vulkan_get_filter(
device, phys_atom_size, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf, size, pbp,
reversed_pbp, global_size, pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, nullptr)) {
std::cerr << "Vulkan: Failed to execute get_filter at start!\n";
} else {
#ifndef NDEBUG
@ -309,12 +395,13 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
printf("Iteration %d\n", ++iterations);
#endif
if (!vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer)) {
if (!vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout,
descriptor_set, filter_out_buf, size, pbp,
reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer,
&changed_indices)) {
std::cerr << "Vulkan: Failed to execute do_filter\n";
break;
}
@ -343,12 +430,15 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
pbp[max] = false;
if (!vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer)) {
changed_indices.push_back(max);
if (!vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout,
descriptor_set, filter_out_buf, size, pbp,
reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer,
&changed_indices)) {
std::cerr << "Vulkan: Failed to execute do_filter\n";
break;
}
@ -369,9 +459,11 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
if (second_min == max) {
pbp[max] = true;
changed_indices.push_back(max);
break;
} else {
pbp[second_min] = true;
changed_indices.push_back(second_min);
}
#ifndef NDEBUG
@ -393,12 +485,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#endif
}
if (!vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer)) {
if (!vulkan_get_filter(
device, phys_atom_size, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf, size, pbp,
reversed_pbp, global_size, pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, &changed_indices)) {
std::cerr << "Vulkan: Failed to execute do_filter (at end)\n";
} else {
#ifndef NDEBUG
@ -437,12 +529,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#ifndef NDEBUG
std::cout << i << ' ';
#endif
vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout,
descriptor_set, filter_out_buf, size, pbp, reversed_pbp,
global_size, pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer);
staging_filter_buffer, &changed_indices);
auto vulkan_minmax_opt = vulkan_minmax(
device, phys_device, command_buffer, command_pool, queue,
minmax_pipeline, minmax_pipeline_layout, minmax_desc_sets, max_in_buf,
@ -456,6 +548,7 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
std::tie(std::ignore, max) = vulkan_minmax_opt.value();
pbp.at(max) = false;
dither_array.at(max) = i;
changed_indices.push_back(max);
#ifndef NDEBUG
if (set.find(max) != set.end()) {
std::cout << "\nWARNING: Reusing index " << max << '\n';
@ -475,11 +568,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#ifndef NDEBUG
std::cout << i << ' ';
#endif
vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf,
size, pbp, reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer);
vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, &changed_indices);
auto vulkan_minmax_opt = vulkan_minmax(
device, phys_device, command_buffer, command_pool, queue,
minmax_pipeline, minmax_pipeline_layout, minmax_desc_sets, max_in_buf,
@ -493,6 +587,7 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
std::tie(min, std::ignore) = vulkan_minmax_opt.value();
pbp.at(min) = true;
dither_array.at(min) = i;
changed_indices.push_back(min);
#ifndef NDEBUG
if (set.find(min) != set.end()) {
std::cout << "\nWARNING: Reusing index " << min << '\n';
@ -505,11 +600,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
{
image::Bl min_pixels = internal::rangeToBl(dither_array, width);
min_pixels.writeToFile(image::file_type::PNG, true, "da_mid_pixels.png");
vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf,
size, pbp, reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer);
vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, &changed_indices);
internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
"filter_mid.pgm");
image::Bl pbp_image = toBl(pbp, width);
@ -518,15 +614,21 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#endif
std::cout << "\nRanking last half of pixels...\n";
reversed_pbp = true;
bool first_reversed_run = true;
for (unsigned int i = (size + 1) / 2; i < (unsigned int)size; ++i) {
#ifndef NDEBUG
std::cout << i << ' ';
#endif
vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf,
size, pbp, reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer);
if (first_reversed_run) {
changed_indices.clear();
first_reversed_run = false;
}
vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, &changed_indices);
auto vulkan_minmax_opt = vulkan_minmax(
device, phys_device, command_buffer, command_pool, queue,
minmax_pipeline, minmax_pipeline_layout, minmax_desc_sets, max_in_buf,
@ -540,6 +642,7 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
std::tie(std::ignore, max) = vulkan_minmax_opt.value();
pbp.at(max) = true;
dither_array.at(max) = i;
changed_indices.push_back(max);
#ifndef NDEBUG
if (set.find(max) != set.end()) {
std::cout << "\nWARNING: Reusing index " << max << '\n';
@ -552,11 +655,12 @@ std::vector<unsigned int> dither::internal::blue_noise_vulkan_impl(
#ifndef NDEBUG
{
vulkan_get_filter(device, command_buffer, command_pool, queue, pbp_buf,
pipeline, pipeline_layout, descriptor_set, filter_out_buf,
size, pbp, reversed_pbp, global_size, pbp_mapped_int,
staging_pbp_buffer, staging_pbp_buffer_mem,
staging_filter_buffer_mem, staging_filter_buffer);
vulkan_get_filter(device, phys_atom_size, command_buffer, command_pool,
queue, pbp_buf, pipeline, pipeline_layout, descriptor_set,
filter_out_buf, size, pbp, reversed_pbp, global_size,
pbp_mapped_int, staging_pbp_buffer,
staging_pbp_buffer_mem, staging_filter_buffer_mem,
staging_filter_buffer, nullptr);
internal::write_filter(vulkan_buf_to_vec(filter_mapped_float, size), width,
"filter_after.pgm");
image::Bl pbp_image = toBl(pbp, width);

View file

@ -21,6 +21,7 @@
#include <random>
#include <stdexcept>
#include <thread>
#include <tuple>
#include <unordered_set>
#include <vector>
@ -66,8 +67,15 @@ void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool,
VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf,
VkDeviceSize size, VkDeviceSize src_offset = 0,
VkDeviceSize dst_offset = 0);
void vulkan_copy_buffer_pieces(
VkDevice device, VkCommandPool command_pool, VkQueue queue,
VkBuffer src_buf, VkBuffer dst_buf,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory);
void vulkan_flush_buffer_pieces(
VkDevice device, const VkDeviceSize phys_atom_size, VkDeviceMemory memory,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory);
std::vector<unsigned int> blue_noise_vulkan_impl(
@ -85,30 +93,57 @@ std::vector<unsigned int> blue_noise_vulkan_impl(
std::vector<float> vulkan_buf_to_vec(float *mapped, unsigned int size);
inline bool vulkan_get_filter(
VkDevice device, VkCommandBuffer command_buffer, VkCommandPool command_pool,
VkQueue queue, VkBuffer pbp_buf, VkPipeline pipeline,
VkPipelineLayout pipeline_layout, VkDescriptorSet descriptor_set,
VkBuffer filter_out_buf, const int size, std::vector<bool> &pbp,
bool reversed_pbp, const std::size_t global_size, int *pbp_mapped_int,
VkBuffer staging_pbp_buffer, VkDeviceMemory staging_pbp_buffer_mem,
VkDeviceMemory staging_filter_buffer_mem, VkBuffer staging_filter_buffer) {
VkDevice device, const VkDeviceSize phys_atom_size,
VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int size,
std::vector<bool> &pbp, bool reversed_pbp, const std::size_t global_size,
int *pbp_mapped_int, VkBuffer staging_pbp_buffer,
VkDeviceMemory staging_pbp_buffer_mem,
VkDeviceMemory staging_filter_buffer_mem, VkBuffer staging_filter_buffer,
std::vector<std::size_t> *changed) {
vkResetCommandBuffer(command_buffer, 0);
if (reversed_pbp) {
for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 0 : 1;
if (changed != nullptr && changed->size() > 0) {
if (reversed_pbp) {
for (auto idx : *changed) {
pbp_mapped_int[idx] = pbp[idx] ? 0 : 1;
}
} else {
for (auto idx : *changed) {
pbp_mapped_int[idx] = pbp[idx] ? 1 : 0;
}
}
} else {
for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 1 : 0;
if (reversed_pbp) {
for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 0 : 1;
}
} else {
for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 1 : 0;
}
}
}
vulkan_flush_buffer(device, staging_pbp_buffer_mem);
// Copy pbp buffer.
vulkan_copy_buffer(device, command_pool, queue, staging_pbp_buffer, pbp_buf,
size * sizeof(int));
if (changed != nullptr && changed->size() > 0) {
std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > pieces;
for (auto idx : *changed) {
pieces.emplace_back(std::make_tuple(sizeof(int), idx * sizeof(int)));
}
vulkan_flush_buffer_pieces(device, phys_atom_size, staging_pbp_buffer_mem,
pieces);
vulkan_copy_buffer_pieces(device, command_pool, queue, staging_pbp_buffer,
pbp_buf, pieces);
changed->clear();
} else {
vulkan_flush_buffer(device, staging_pbp_buffer_mem);
vulkan_copy_buffer(device, command_pool, queue, staging_pbp_buffer, pbp_buf,
size * sizeof(int));
}
VkCommandBufferBeginInfo begin_info{};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;