Compare commits

...

6 commits

Author SHA1 Message Date
728d872af4 Vulkan compute: move buffer init to before fn call 2024-04-03 18:23:46 +09:00
2e6f414baf Vulkan compute: resize max/min out buffers
Change max/min out buffers to have same size as max/min in buffers.
2024-04-03 18:22:14 +09:00
97cfcddfb3 Vulkan compute: minor refactoring 2024-04-03 18:22:14 +09:00
06115a7a2d Vulkan compute: combine all minmax calls
This commit combines the minmax execution via Vulkan compute. The
previous implementation executed compute in vulkan_minmax with a new
command buffer each time. This implementation combines all required
executions of compute in vulkan_minmax in a single command buffer and
uses a pipeline to ensure the enqueued compute calls stay in order.
2024-04-03 18:22:13 +09:00
52e6a09abd Do "minmax" on Vulkan compute
Was an attempt to speed up blue-noise-generation with Vulkan compute,
but this implementation seems to slow it down instead.
2024-04-03 18:20:22 +09:00
ef7f623fb3 Optimize buffer writes in vulkan_get_filter calls
When only a single item in pbp buffer is changed, only update the single
item in the staging buffer and the device buffer prior to Vulkan compute
execution on buffers.
2024-04-03 17:55:37 +09:00
3 changed files with 1071 additions and 82 deletions

File diff suppressed because it is too large Load diff

View file

@ -21,6 +21,7 @@
#include <random> #include <random>
#include <stdexcept> #include <stdexcept>
#include <thread> #include <thread>
#include <tuple>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
@ -47,6 +48,12 @@ struct QueueFamilyIndices {
QueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice device); QueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice device);
struct FloatAndIndex {
float value;
int pbp;
int idx;
};
std::optional<uint32_t> vulkan_find_memory_type(VkPhysicalDevice phys_dev, std::optional<uint32_t> vulkan_find_memory_type(VkPhysicalDevice phys_dev,
uint32_t t_filter, uint32_t t_filter,
VkMemoryPropertyFlags props); VkMemoryPropertyFlags props);
@ -58,45 +65,85 @@ bool vulkan_create_buffer(VkDevice device, VkPhysicalDevice phys_dev,
void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool, void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool,
VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf, VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf,
VkDeviceSize size); VkDeviceSize size, VkDeviceSize src_offset = 0,
VkDeviceSize dst_offset = 0);
void vulkan_copy_buffer_pieces(
VkDevice device, VkCommandPool command_pool, VkQueue queue,
VkBuffer src_buf, VkBuffer dst_buf,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory); void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory);
void vulkan_flush_buffer_pieces(
VkDevice device, const VkDeviceSize phys_atom_size, VkDeviceMemory memory,
const std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > &pieces);
void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory); void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory);
std::vector<unsigned int> blue_noise_vulkan_impl( std::vector<unsigned int> blue_noise_vulkan_impl(
VkDevice device, VkPhysicalDevice phys_device, VkDevice device, VkPhysicalDevice phys_device,
VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue, VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout, VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int width, VkDescriptorSet descriptor_set, VkBuffer filter_out_buf,
const int height); VkPipeline minmax_pipeline, VkPipelineLayout minmax_pipeline_layout,
std::array<VkDescriptorSet, 2> minmax_descriptor_sets, VkBuffer max_in_buf,
VkBuffer min_in_buf, VkBuffer max_out_buf, VkBuffer min_out_buf,
VkBuffer state_buf, const int width, const int height,
VkBuffer minmax_staging_buf, VkDeviceMemory minmax_staging_buf_mem,
void *minmax_mapped);
std::vector<float> vulkan_buf_to_vec(float *mapped, unsigned int size); std::vector<float> vulkan_buf_to_vec(float *mapped, unsigned int size);
inline bool vulkan_get_filter( inline bool vulkan_get_filter(
VkDevice device, VkCommandBuffer command_buffer, VkCommandPool command_pool, VkDevice device, const VkDeviceSize phys_atom_size,
VkQueue queue, VkBuffer pbp_buf, VkPipeline pipeline, VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
VkPipelineLayout pipeline_layout, VkDescriptorSet descriptor_set, VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
VkBuffer filter_out_buf, const int size, std::vector<bool> &pbp, VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int size,
bool reversed_pbp, const std::size_t global_size, int *pbp_mapped_int, std::vector<bool> &pbp, bool reversed_pbp, const std::size_t global_size,
VkBuffer staging_pbp_buffer, VkDeviceMemory staging_pbp_buffer_mem, int *pbp_mapped_int, VkBuffer staging_pbp_buffer,
VkDeviceMemory staging_filter_buffer_mem, VkBuffer staging_filter_buffer) { VkDeviceMemory staging_pbp_buffer_mem,
VkDeviceMemory staging_filter_buffer_mem, VkBuffer staging_filter_buffer,
std::vector<std::size_t> *changed) {
vkResetCommandBuffer(command_buffer, 0); vkResetCommandBuffer(command_buffer, 0);
if (reversed_pbp) { if (changed != nullptr && changed->size() > 0) {
for (unsigned int i = 0; i < pbp.size(); ++i) { if (reversed_pbp) {
pbp_mapped_int[i] = pbp[i] ? 0 : 1; for (auto idx : *changed) {
pbp_mapped_int[idx] = pbp[idx] ? 0 : 1;
}
} else {
for (auto idx : *changed) {
pbp_mapped_int[idx] = pbp[idx] ? 1 : 0;
}
} }
} else { } else {
for (unsigned int i = 0; i < pbp.size(); ++i) { if (reversed_pbp) {
pbp_mapped_int[i] = pbp[i] ? 1 : 0; for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 0 : 1;
}
} else {
for (unsigned int i = 0; i < pbp.size(); ++i) {
pbp_mapped_int[i] = pbp[i] ? 1 : 0;
}
} }
} }
vulkan_flush_buffer(device, staging_pbp_buffer_mem);
// Copy pbp buffer. // Copy pbp buffer.
vulkan_copy_buffer(device, command_pool, queue, staging_pbp_buffer, pbp_buf, if (changed != nullptr && changed->size() > 0) {
size * sizeof(int)); std::vector<std::tuple<VkDeviceSize, VkDeviceSize> > pieces;
for (auto idx : *changed) {
pieces.emplace_back(std::make_tuple(sizeof(int), idx * sizeof(int)));
}
vulkan_flush_buffer_pieces(device, phys_atom_size, staging_pbp_buffer_mem,
pieces);
vulkan_copy_buffer_pieces(device, command_pool, queue, staging_pbp_buffer,
pbp_buf, pieces);
changed->clear();
} else {
vulkan_flush_buffer(device, staging_pbp_buffer_mem);
vulkan_copy_buffer(device, command_pool, queue, staging_pbp_buffer, pbp_buf,
size * sizeof(int));
}
VkCommandBufferBeginInfo begin_info{}; VkCommandBufferBeginInfo begin_info{};
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@ -145,6 +192,16 @@ inline bool vulkan_get_filter(
return true; return true;
} }
std::optional<std::pair<int, int>> vulkan_minmax(
VkDevice device, VkPhysicalDevice phys_dev, VkCommandBuffer command_buffer,
VkCommandPool command_pool, VkQueue queue, VkPipeline minmax_pipeline,
VkPipelineLayout minmax_pipeline_layout,
std::array<VkDescriptorSet, 2> minmax_desc_sets, VkBuffer max_in_buf,
VkBuffer min_in_buf, VkBuffer max_out_buf, VkBuffer min_out_buf,
VkBuffer state_buf, const int size, const float *const filter_mapped,
const std::vector<bool> &pbp, VkBuffer staging_buf,
VkDeviceMemory staging_buf_mem, void *staging_mapped);
#endif #endif
#if DITHERING_OPENCL_ENABLED == 1 #if DITHERING_OPENCL_ENABLED == 1

View file

@ -0,0 +1,54 @@
#version 450
struct FloatAndIndex {
float value;
int pbp;
int idx;
};
layout(binding = 0) readonly buffer MaxIn { FloatAndIndex max_in[]; };
layout(binding = 1) readonly buffer MinIn { FloatAndIndex min_in[]; };
layout(binding = 2) writeonly buffer MaxOut { FloatAndIndex max_out[]; };
layout(binding = 3) writeonly buffer MinOut { FloatAndIndex min_out[]; };
layout(binding = 4) readonly buffer State { int size; };
layout(local_size_x = 256) in;
void main() {
uint index = gl_GlobalInvocationID.x;
if (index >= (size + 1) / 2) {
return;
}
if (index * 2 + 1 < size) {
if (max_in[index * 2].pbp != 0 && max_in[index * 2 + 1].pbp != 0) {
if (max_in[index * 2].value > max_in[index * 2 + 1].value) {
max_out[index] = max_in[index * 2];
} else {
max_out[index] = max_in[index * 2 + 1];
}
} else if (max_in[index * 2].pbp != 0 && max_in[index * 2 + 1].pbp == 0) {
max_out[index] = max_in[index * 2];
} else {
max_out[index] = max_in[index * 2 + 1];
}
if (min_in[index * 2].pbp == 0 && min_in[index * 2 + 1].pbp == 0) {
if (min_in[index * 2].value < min_in[index * 2 + 1].value) {
min_out[index] = min_in[index * 2];
} else {
min_out[index] = min_in[index * 2 + 1];
}
} else if (min_in[index * 2].pbp == 0 && min_in[index * 2 + 1].pbp != 0) {
min_out[index] = min_in[index * 2];
} else {
min_out[index] = min_in[index * 2 + 1];
}
} else {
max_out[index] = max_in[index * 2];
max_out[index + 1].pbp = 0;
min_out[index] = min_in[index * 2];
min_out[index + 1].pbp = 1;
}
}