Impl use of OpenCL for generating blue noise
This commit is contained in:
parent
26df83affc
commit
e24606ea2c
5 changed files with 486 additions and 26 deletions
5
Makefile
5
Makefile
|
@ -1,5 +1,6 @@
|
||||||
|
|
||||||
COMMON_FLAGS=-Wall -Wextra -Wpedantic -std=c++17 -lpthread
|
COMMON_FLAGS=-Wall -Wextra -Wpedantic -std=c++17
|
||||||
|
LINKER_FLAGS=-lpthread -lOpenCL
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
CPPFLAGS=${COMMON_FLAGS} -g -O0
|
CPPFLAGS=${COMMON_FLAGS} -g -O0
|
||||||
else
|
else
|
||||||
|
@ -14,7 +15,7 @@ OBJECTS=${subst .cpp,.o,${SOURCES}}
|
||||||
all: Dithering
|
all: Dithering
|
||||||
|
|
||||||
Dithering: ${OBJECTS}
|
Dithering: ${OBJECTS}
|
||||||
${CXX} ${CPPFLAGS} -o Dithering $^
|
${CXX} ${CPPFLAGS} ${LINKER_FLAGS} -o Dithering $^
|
||||||
|
|
||||||
.PHONY:
|
.PHONY:
|
||||||
|
|
||||||
|
|
25
src/blue_noise.cl
Normal file
25
src/blue_noise.cl
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
__kernel void do_filter(
|
||||||
|
__global float *filter_out, __global float *precomputed,
|
||||||
|
__global int *pbp, int width, int height, int filter_size) {
|
||||||
|
int i = get_global_id(0);
|
||||||
|
if(i < 0 || i >= width * height) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
int x = i % width;
|
||||||
|
int y = i / width;
|
||||||
|
|
||||||
|
float sum = 0.0f;
|
||||||
|
for(int q = 0; q < filter_size; ++q) {
|
||||||
|
int q_prime = (height + filter_size / 2 + y - q) % height;
|
||||||
|
for(int p = 0; p < filter_size; ++p) {
|
||||||
|
int p_prime = (width + filter_size / 2 + x - p) % width;
|
||||||
|
if(pbp[p_prime + q_prime * width] != 0) {
|
||||||
|
sum += precomputed[p + q * filter_size];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filter_out[i] = sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
// vim: syntax=c
|
|
@ -3,42 +3,110 @@
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
# include <cstdio>
|
# include <cstdio>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::vector<bool> dither::blue_noise(int width, int height, int threads) {
|
std::vector<bool> dither::blue_noise(int width, int height, int threads) {
|
||||||
|
|
||||||
|
bool use_opencl = false;
|
||||||
|
|
||||||
|
// try to use OpenCL
|
||||||
|
do {
|
||||||
|
cl_device_id device;
|
||||||
|
cl_context context;
|
||||||
|
cl_program program;
|
||||||
|
cl_int err;
|
||||||
|
|
||||||
|
cl_platform_id platform;
|
||||||
|
|
||||||
|
int filter_size = (width + height) / 2;
|
||||||
|
|
||||||
|
err = clGetPlatformIDs(1, &platform, nullptr);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to identify a platform\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to get a device\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
|
||||||
|
|
||||||
|
{
|
||||||
|
char buf[1024];
|
||||||
|
std::ifstream program_file("src/blue_noise.cl");
|
||||||
|
std::string program_string;
|
||||||
|
while(program_file.good()) {
|
||||||
|
program_file.read(buf, 1024);
|
||||||
|
if(int read_count = program_file.gcount(); read_count > 0) {
|
||||||
|
program_string.append(buf, read_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *string_ptr = program_string.c_str();
|
||||||
|
std::size_t program_size = program_string.size();
|
||||||
|
program = clCreateProgramWithSource(context, 1, (const char**)&string_ptr, &program_size, &err);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to create the program\n";
|
||||||
|
clReleaseContext(context);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to build the program\n";
|
||||||
|
|
||||||
|
std::size_t log_size;
|
||||||
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
|
||||||
|
std::unique_ptr<char[]> log = std::make_unique<char[]>(log_size + 1);
|
||||||
|
log[log_size] = 0;
|
||||||
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, log.get(), nullptr);
|
||||||
|
std::cerr << log.get() << std::endl;
|
||||||
|
|
||||||
|
clReleaseProgram(program);
|
||||||
|
clReleaseContext(context);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "OpenCL: Initialized, trying cl_impl..." << std::endl;
|
||||||
|
std::vector<bool> result = internal::blue_noise_cl_impl(
|
||||||
|
width, height, filter_size, context, device, program);
|
||||||
|
|
||||||
|
clReleaseProgram(program);
|
||||||
|
clReleaseContext(context);
|
||||||
|
|
||||||
|
if(!result.empty()) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} while (false);
|
||||||
|
|
||||||
|
if(!use_opencl) {
|
||||||
|
std::cout << "OpenCL: Failed to setup/use, using regular impl..." << std::endl;
|
||||||
|
return internal::blue_noise_impl(width, height, threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<bool> dither::internal::blue_noise_impl(int width, int height, int threads) {
|
||||||
int count = width * height;
|
int count = width * height;
|
||||||
std::vector<float> filter_out;
|
std::vector<float> filter_out;
|
||||||
filter_out.resize(count);
|
filter_out.resize(count);
|
||||||
|
|
||||||
std::vector<bool> pbp; // Prototype Binary Pattern
|
int pixel_count = count * 4 / 10;
|
||||||
|
std::vector<bool> pbp = random_noise(count, count * 4 / 10);
|
||||||
pbp.resize(count);
|
pbp.resize(count);
|
||||||
|
|
||||||
std::default_random_engine re(std::random_device{}());
|
|
||||||
std::uniform_int_distribution<int> dist(0, count - 1);
|
|
||||||
|
|
||||||
const int pixel_count = count * 4 / 10;
|
|
||||||
|
|
||||||
// initialize pbp
|
|
||||||
for(int i = 0; i < count; ++i) {
|
|
||||||
if(i < pixel_count) {
|
|
||||||
pbp[i] = true;
|
|
||||||
} else {
|
|
||||||
pbp[i] = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// randomize pbp
|
|
||||||
for(int i = 0; i < count-1; ++i) {
|
|
||||||
decltype(dist)::param_type range{i+1, count-1};
|
|
||||||
int ridx = dist(re, range);
|
|
||||||
// probably can't use std::swap since using std::vector<bool>
|
|
||||||
bool temp = pbp[i];
|
|
||||||
pbp[i] = pbp[ridx];
|
|
||||||
pbp[ridx] = temp;
|
|
||||||
}
|
|
||||||
//#ifndef NDEBUG
|
//#ifndef NDEBUG
|
||||||
printf("Inserting %d pixels into image of max count %d\n", pixel_count, count);
|
printf("Inserting %d pixels into image of max count %d\n", pixel_count, count);
|
||||||
// generate image from randomized pbp
|
// generate image from randomized pbp
|
||||||
|
@ -167,3 +235,335 @@ std::vector<bool> dither::blue_noise(int width, int height, int threads) {
|
||||||
|
|
||||||
return pbp;
|
return pbp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<bool> dither::internal::blue_noise_cl_impl(
|
||||||
|
int width, int height, int filter_size, cl_context context, cl_device_id device, cl_program program) {
|
||||||
|
cl_int err;
|
||||||
|
cl_kernel kernel;
|
||||||
|
cl_command_queue queue;
|
||||||
|
cl_mem d_filter_out, d_precomputed, d_pbp;
|
||||||
|
std::size_t global_size, local_size;
|
||||||
|
|
||||||
|
std::vector<float> precomputed = precompute_gaussian(filter_size);
|
||||||
|
|
||||||
|
int count = width * height;
|
||||||
|
int pixel_count = count * 4 / 10;
|
||||||
|
std::vector<bool> pbp = random_noise(count, pixel_count);
|
||||||
|
std::vector<int> pbp_i(pbp.size());
|
||||||
|
|
||||||
|
queue = clCreateCommandQueueWithProperties(context, device, nullptr, &err);
|
||||||
|
|
||||||
|
d_filter_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, count * sizeof(float), nullptr, nullptr);
|
||||||
|
d_precomputed = clCreateBuffer(context, CL_MEM_READ_ONLY, filter_size * filter_size * sizeof(float), nullptr, nullptr);
|
||||||
|
d_pbp = clCreateBuffer(context, CL_MEM_READ_ONLY, count * sizeof(int), nullptr, nullptr);
|
||||||
|
|
||||||
|
err = clEnqueueWriteBuffer(queue, d_precomputed, CL_TRUE, 0, filter_size * filter_size * sizeof(float), &precomputed[0], 0, nullptr, nullptr);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to write to d_precomputed buffer\n";
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
err = clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), &pbp_i[0], 0, nullptr, nullptr);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to write to d_pbp buffer\n";
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
kernel = clCreateKernel(program, "do_filter", &err);
|
||||||
|
if(err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to create kernel: ";
|
||||||
|
switch(err) {
|
||||||
|
case CL_INVALID_PROGRAM:
|
||||||
|
std::cerr << "invalid program\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||||
|
std::cerr << "invalid program executable\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_KERNEL_NAME:
|
||||||
|
std::cerr << "invalid kernel name\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_KERNEL_DEFINITION:
|
||||||
|
std::cerr << "invalid kernel definition\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_VALUE:
|
||||||
|
std::cerr << "invalid value\n";
|
||||||
|
break;
|
||||||
|
case CL_OUT_OF_RESOURCES:
|
||||||
|
std::cerr << "out of resources\n";
|
||||||
|
break;
|
||||||
|
case CL_OUT_OF_HOST_MEMORY:
|
||||||
|
std::cerr << "out of host memory\n";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::cerr << "unknown error\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if(clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_filter_out) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 0\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if(clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_precomputed) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 1\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if(clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_pbp) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 2\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if(clSetKernelArg(kernel, 3, sizeof(int), &width) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 3\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if(clSetKernelArg(kernel, 4, sizeof(int), &height) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 4\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
if(clSetKernelArg(kernel, 5, sizeof(int), &filter_size) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to set kernel arg 4\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
if(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(std::size_t), &local_size, nullptr) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to get work group size\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
global_size = (std::size_t)std::ceil(count / (float)local_size) * local_size;
|
||||||
|
|
||||||
|
std::cout << "OpenCL: global = " << global_size << ", local = " << local_size
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
std::vector<float> filter(count);
|
||||||
|
|
||||||
|
const auto get_filter = [&queue, &kernel, &global_size, &local_size,
|
||||||
|
&d_filter_out, &d_pbp, &pbp, &pbp_i, &count, &filter, &err] () -> bool {
|
||||||
|
for(unsigned int i = 0; i < pbp.size(); ++i) {
|
||||||
|
pbp_i[i] = pbp[i] ? 1 : 0;
|
||||||
|
}
|
||||||
|
if(clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), &pbp_i[0], 0, nullptr, nullptr) != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to write to d_pbp buffer\n";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(err = clEnqueueNDRangeKernel(
|
||||||
|
queue, kernel, 1, nullptr, &global_size, &local_size,
|
||||||
|
0, nullptr, nullptr); err != CL_SUCCESS) {
|
||||||
|
std::cerr << "OpenCL: Failed to enqueue task: ";
|
||||||
|
switch(err) {
|
||||||
|
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||||
|
std::cerr << "invalid program executable\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_COMMAND_QUEUE:
|
||||||
|
std::cerr << "invalid command queue\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_KERNEL:
|
||||||
|
std::cerr << "invalid kernel\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_CONTEXT:
|
||||||
|
std::cerr << "invalid context\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_KERNEL_ARGS:
|
||||||
|
std::cerr << "invalid kernel args\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_WORK_DIMENSION:
|
||||||
|
std::cerr << "invalid work dimension\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||||
|
std::cerr << "invalid global work size\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_GLOBAL_OFFSET:
|
||||||
|
std::cerr << "invalid global offset\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_WORK_GROUP_SIZE:
|
||||||
|
std::cerr << "invalid work group size\n";
|
||||||
|
break;
|
||||||
|
case CL_INVALID_WORK_ITEM_SIZE:
|
||||||
|
std::cerr << "invalid work item size\n";
|
||||||
|
break;
|
||||||
|
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
||||||
|
std::cerr << "misaligned sub buffer offset\n";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::cerr << "Unknown\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
clFinish(queue);
|
||||||
|
|
||||||
|
clEnqueueReadBuffer(queue, d_filter_out, CL_TRUE, 0, count * sizeof(float), &filter[0], 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
{
|
||||||
|
printf("Inserting %d pixels into image of max count %d\n", pixel_count, count);
|
||||||
|
// generate image from randomized pbp
|
||||||
|
FILE *random_noise_image = fopen("random_noise.pbm", "w");
|
||||||
|
fprintf(random_noise_image, "P1\n%d %d\n", width, height);
|
||||||
|
for(int y = 0; y < height; ++y) {
|
||||||
|
for(int x = 0; x < width; ++x) {
|
||||||
|
fprintf(random_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
|
||||||
|
}
|
||||||
|
fputc('\n', random_noise_image);
|
||||||
|
}
|
||||||
|
fclose(random_noise_image);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!get_filter()) {
|
||||||
|
std::cerr << "OpenCL: Failed to execute do_filter (at start)\n";
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return {};
|
||||||
|
} else {
|
||||||
|
internal::write_filter(filter, width, "filter_out_start.pgm");
|
||||||
|
}
|
||||||
|
|
||||||
|
int iterations = 0;
|
||||||
|
|
||||||
|
while(true) {
|
||||||
|
printf("Iteration %d\n", ++iterations);
|
||||||
|
|
||||||
|
if(!get_filter()) {
|
||||||
|
std::cerr << "OpenCL: Failed to execute do_filter\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int min, max, min_zero, max_one;
|
||||||
|
std::tie(min, max) = internal::filter_minmax(filter);
|
||||||
|
if(!pbp[max]) {
|
||||||
|
max_one = internal::get_one_or_zero(pbp, true, max, width, height);
|
||||||
|
} else {
|
||||||
|
max_one = max;
|
||||||
|
}
|
||||||
|
if(!pbp[max_one]) {
|
||||||
|
std::cerr << "ERROR: Failed to find pbp[max] one" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pbp[min]) {
|
||||||
|
min_zero = internal::get_one_or_zero(pbp, false, min, width, height);
|
||||||
|
} else {
|
||||||
|
min_zero = min;
|
||||||
|
}
|
||||||
|
if(pbp[min_zero]) {
|
||||||
|
std::cerr << "ERROR: Failed to find pbp[min] zero" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pbp[max_one] = false;
|
||||||
|
|
||||||
|
if(!get_filter()) {
|
||||||
|
std::cerr << "OpenCL: Failed to execute do_filter\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get second buffer's min
|
||||||
|
int second_min;
|
||||||
|
std::tie(second_min, std::ignore) = internal::filter_minmax(filter);
|
||||||
|
if(pbp[second_min]) {
|
||||||
|
second_min = internal::get_one_or_zero(pbp, false, second_min, width, height);
|
||||||
|
if(pbp[second_min]) {
|
||||||
|
std::cerr << "ERROR: Failed to find pbp[second_min] zero" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(internal::dist(max_one, second_min, width) < 1.5f) {
|
||||||
|
pbp[max_one] = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
pbp[min_zero] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(iterations % 100 == 0) {
|
||||||
|
// generate blue_noise image from pbp
|
||||||
|
FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
|
||||||
|
fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
|
||||||
|
for(int y = 0; y < height; ++y) {
|
||||||
|
for(int x = 0; x < width; ++x) {
|
||||||
|
fprintf(blue_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
|
||||||
|
}
|
||||||
|
fputc('\n', blue_noise_image);
|
||||||
|
}
|
||||||
|
fclose(blue_noise_image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!get_filter()) {
|
||||||
|
std::cerr << "OpenCL: Failed to execute do_filter (at end)\n";
|
||||||
|
} else {
|
||||||
|
internal::write_filter(filter, width, "filter_out_final.pgm");
|
||||||
|
FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
|
||||||
|
fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
|
||||||
|
for(int y = 0; y < height; ++y) {
|
||||||
|
for(int x = 0; x < width; ++x) {
|
||||||
|
fprintf(blue_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
|
||||||
|
}
|
||||||
|
fputc('\n', blue_noise_image);
|
||||||
|
}
|
||||||
|
fclose(blue_noise_image);
|
||||||
|
}
|
||||||
|
|
||||||
|
clReleaseKernel(kernel);
|
||||||
|
clReleaseMemObject(d_pbp);
|
||||||
|
clReleaseMemObject(d_precomputed);
|
||||||
|
clReleaseMemObject(d_filter_out);
|
||||||
|
clReleaseCommandQueue(queue);
|
||||||
|
return pbp;
|
||||||
|
}
|
||||||
|
|
|
@ -12,12 +12,46 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
#include <CL/opencl.h>
|
||||||
|
|
||||||
namespace dither {
|
namespace dither {
|
||||||
|
|
||||||
std::vector<bool> blue_noise(int width, int height, int threads = 1);
|
std::vector<bool> blue_noise(int width, int height, int threads = 1);
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
std::vector<bool> blue_noise_impl(int width, int height, int threads = 1);
|
||||||
|
std::vector<bool> blue_noise_cl_impl(
|
||||||
|
int width, int height, int filter_size,
|
||||||
|
cl_context context, cl_device_id device, cl_program program);
|
||||||
|
|
||||||
|
inline std::vector<bool> random_noise(int size, int subsize) {
|
||||||
|
std::vector<bool> pbp(size);
|
||||||
|
std::default_random_engine re(std::random_device{}());
|
||||||
|
std::uniform_int_distribution<int> dist(0, size - 1);
|
||||||
|
|
||||||
|
// initialize pbp
|
||||||
|
for(int i = 0; i < size; ++i) {
|
||||||
|
if(i < subsize) {
|
||||||
|
pbp[i] = true;
|
||||||
|
} else {
|
||||||
|
pbp[i] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// randomize pbp
|
||||||
|
for(int i = 0; i < size-1; ++i) {
|
||||||
|
decltype(dist)::param_type range{i+1, size-1};
|
||||||
|
int ridx = dist(re, range);
|
||||||
|
// probably can't use std::swap since using std::vector<bool>
|
||||||
|
bool temp = pbp[i];
|
||||||
|
pbp[i] = pbp[ridx];
|
||||||
|
pbp[ridx] = temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pbp;
|
||||||
|
}
|
||||||
|
|
||||||
inline int twoToOne(int x, int y, int width) {
|
inline int twoToOne(int x, int y, int width) {
|
||||||
return x + y * width;
|
return x + y * width;
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
//#ifndef NDEBUG
|
//#ifndef NDEBUG
|
||||||
std::cout << "Trying blue_noise..." << std::endl;
|
std::cout << "Trying blue_noise..." << std::endl;
|
||||||
dither::blue_noise(70, 70, 8);
|
dither::blue_noise(100, 100, 8);
|
||||||
//#endif
|
//#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in a new issue