Compare commits

...

7 commits

Author SHA1 Message Date
bd7afeb8bd Vulkan compute: move buffer init to before fn call 2024-04-01 12:50:54 +09:00
320a33842b Vulkan compute: resize max/min out buffers
Change max/min out buffers to have same size as max/min in buffers.
2024-04-01 12:50:54 +09:00
2abf3de665 Vulkan compute: minor refactoring 2024-04-01 12:50:54 +09:00
11de490e94 Vulkan compute: combine all minmax calls
This commit combines the minmax execution via Vulkan compute. The
previous implementation executed compute in vulkan_minmax with a new
command buffer each time. This implementation combines all required
executions of compute in vulkan_minmax in a single command buffer and
uses a pipeline to ensure the enqueued compute calls stay in order.
2024-04-01 12:50:54 +09:00
7bcb385625 Do "minmax" on Vulkan compute
Was an attempt to speed up blue-noise-generation with Vulkan compute,
but this implementation seems to slow it down instead.
2024-04-01 12:50:54 +09:00
ff36314092 Update README.md 2024-04-01 11:30:42 +09:00
2854aa5104 Make OpenCL default backend
Order of backends to use:
OpenCL -> Vulkan -> CPU threads

Unless I figure out a way to make Vulkan faster, OpenCL will be the
default backend used, or at least it will have higher priority than
Vulkan if both OpenCL and Vulkan is available.
2024-04-01 11:19:55 +09:00
4 changed files with 958 additions and 107 deletions

View file

@ -1,4 +1,5 @@
# Blue_Noise_Generation
Currently, this project only generates blue-noise, which can be used for dithering.
The blue-noise can be generated via OpenCL or with threads on the CPU.
The blue-noise can be generated via OpenCL, Vulkan compute, or with threads on
the CPU.

File diff suppressed because it is too large Load diff

View file

@ -47,6 +47,12 @@ struct QueueFamilyIndices {
QueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice device);
struct FloatAndIndex {
float value;
int pbp;
int idx;
};
std::optional<uint32_t> vulkan_find_memory_type(VkPhysicalDevice phys_dev,
uint32_t t_filter,
VkMemoryPropertyFlags props);
@ -58,7 +64,8 @@ bool vulkan_create_buffer(VkDevice device, VkPhysicalDevice phys_dev,
void vulkan_copy_buffer(VkDevice device, VkCommandPool command_pool,
VkQueue queue, VkBuffer src_buf, VkBuffer dst_buf,
VkDeviceSize size);
VkDeviceSize size, VkDeviceSize src_offset = 0,
VkDeviceSize dst_offset = 0);
void vulkan_flush_buffer(VkDevice device, VkDeviceMemory memory);
void vulkan_invalidate_buffer(VkDevice device, VkDeviceMemory memory);
@ -67,8 +74,13 @@ std::vector<unsigned int> blue_noise_vulkan_impl(
VkDevice device, VkPhysicalDevice phys_device,
VkCommandBuffer command_buffer, VkCommandPool command_pool, VkQueue queue,
VkBuffer pbp_buf, VkPipeline pipeline, VkPipelineLayout pipeline_layout,
VkDescriptorSet descriptor_set, VkBuffer filter_out_buf, const int width,
const int height);
VkDescriptorSet descriptor_set, VkBuffer filter_out_buf,
VkPipeline minmax_pipeline, VkPipelineLayout minmax_pipeline_layout,
std::array<VkDescriptorSet, 2> minmax_descriptor_sets, VkBuffer max_in_buf,
VkBuffer min_in_buf, VkBuffer max_out_buf, VkBuffer min_out_buf,
VkBuffer state_buf, const int width, const int height,
VkBuffer minmax_staging_buf, VkDeviceMemory minmax_staging_buf_mem,
void *minmax_mapped);
std::vector<float> vulkan_buf_to_vec(float *mapped, unsigned int size);
@ -145,6 +157,16 @@ inline bool vulkan_get_filter(
return true;
}
std::optional<std::pair<int, int>> vulkan_minmax(
VkDevice device, VkPhysicalDevice phys_dev, VkCommandBuffer command_buffer,
VkCommandPool command_pool, VkQueue queue, VkPipeline minmax_pipeline,
VkPipelineLayout minmax_pipeline_layout,
std::array<VkDescriptorSet, 2> minmax_desc_sets, VkBuffer max_in_buf,
VkBuffer min_in_buf, VkBuffer max_out_buf, VkBuffer min_out_buf,
VkBuffer state_buf, const int size, const float *const filter_mapped,
const std::vector<bool> &pbp, VkBuffer staging_buf,
VkDeviceMemory staging_buf_mem, void *staging_mapped);
#endif
#if DITHERING_OPENCL_ENABLED == 1

View file

@ -0,0 +1,54 @@
#version 450
struct FloatAndIndex {
float value;
int pbp;
int idx;
};
layout(binding = 0) readonly buffer MaxIn { FloatAndIndex max_in[]; };
layout(binding = 1) readonly buffer MinIn { FloatAndIndex min_in[]; };
layout(binding = 2) writeonly buffer MaxOut { FloatAndIndex max_out[]; };
layout(binding = 3) writeonly buffer MinOut { FloatAndIndex min_out[]; };
layout(binding = 4) readonly buffer State { int size; };
layout(local_size_x = 256) in;
void main() {
uint index = gl_GlobalInvocationID.x;
if (index >= (size + 1) / 2) {
return;
}
if (index * 2 + 1 < size) {
if (max_in[index * 2].pbp != 0 && max_in[index * 2 + 1].pbp != 0) {
if (max_in[index * 2].value > max_in[index * 2 + 1].value) {
max_out[index] = max_in[index * 2];
} else {
max_out[index] = max_in[index * 2 + 1];
}
} else if (max_in[index * 2].pbp != 0 && max_in[index * 2 + 1].pbp == 0) {
max_out[index] = max_in[index * 2];
} else {
max_out[index] = max_in[index * 2 + 1];
}
if (min_in[index * 2].pbp == 0 && min_in[index * 2 + 1].pbp == 0) {
if (min_in[index * 2].value < min_in[index * 2 + 1].value) {
min_out[index] = min_in[index * 2];
} else {
min_out[index] = min_in[index * 2 + 1];
}
} else if (min_in[index * 2].pbp == 0 && min_in[index * 2 + 1].pbp != 0) {
min_out[index] = min_in[index * 2];
} else {
min_out[index] = min_in[index * 2 + 1];
}
} else {
max_out[index] = max_in[index * 2];
max_out[index + 1].pbp = 0;
min_out[index] = min_in[index * 2];
min_out[index + 1].pbp = 1;
}
}