From ecd65fc42b3f7e8eec055c45dfc2dd5aab14382c Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Fri, 31 Mar 2023 18:47:33 +0900 Subject: [PATCH] clang-format --style=google --- src/arg_parse.cpp | 29 +- src/arg_parse.hpp | 20 +- src/blue_noise.cl | 71 +-- src/blue_noise.cpp | 1352 ++++++++++++++++++++++---------------------- src/blue_noise.hpp | 833 ++++++++++++++------------- src/image.cpp | 348 ++++++------ src/image.hpp | 137 ++--- src/main.cpp | 88 +-- src/utility.hpp | 48 +- 9 files changed, 1446 insertions(+), 1480 deletions(-) diff --git a/src/arg_parse.cpp b/src/arg_parse.cpp index 1ed1746..654fca0 100644 --- a/src/arg_parse.cpp +++ b/src/arg_parse.cpp @@ -12,17 +12,21 @@ Args::Args() output_filename_("output.png") {} void Args::DisplayHelp() { - std::cout - << "[-h | --help] [-b | --blue-noise ] [--usecl | " - "--nousecl]\n" - " -h | --help\t\t\t\tDisplay this help text\n" - " -b | --blue-noise \tGenerate blue noise square with " - "size\n" - " --usecl | --nousecl\t\t\tUse/Disable OpenCL (enabled by default)\n" - " -t | --threads \t\tUse CPU thread count when not using " - "OpenCL\n" - " -o | --output \tOutput filename to use\n" - " --overwrite\t\t\t\tEnable overwriting of file (default disabled)\n"; + std::cout << "[-h | --help] [-b | --blue-noise ] [--usecl | " + "--nousecl]\n" + " -h | --help\t\t\t\tDisplay this help text\n" + " -b | --blue-noise \tGenerate blue noise " + "square with " + "size\n" + " --usecl | --nousecl\t\t\tUse/Disable OpenCL (enabled by " + "default)\n" + " -t | --threads \t\tUse CPU thread count when " + "not using " + "OpenCL\n" + " -o | --output \tOutput filename to " + "use\n" + " --overwrite\t\t\t\tEnable overwriting of file (default " + "disabled)\n"; } bool Args::ParseArgs(int argc, char **argv) { @@ -53,7 +57,8 @@ bool Args::ParseArgs(int argc, char **argv) { std::strcmp(argv[0], "--threads") == 0)) { threads_ = std::strtoul(argv[1], nullptr, 10); if (threads_ == 0) { - std::cout << "ERROR: Failed to parse thread count, using 4 by default" + std::cout << "ERROR: Failed to parse thread count, using 4 by " + "default" << std::endl; threads_ = 4; } diff --git a/src/arg_parse.hpp b/src/arg_parse.hpp index ea6c63b..d6af176 100644 --- a/src/arg_parse.hpp +++ b/src/arg_parse.hpp @@ -4,19 +4,19 @@ #include struct Args { - Args(); + Args(); - static void DisplayHelp(); + static void DisplayHelp(); - /// Returns true if help was printed - bool ParseArgs(int argc, char **argv); + /// Returns true if help was printed + bool ParseArgs(int argc, char **argv); - bool generate_blue_noise_; - bool use_opencl_; - bool overwrite_file_; - unsigned int blue_noise_size_; - unsigned int threads_; - std::string output_filename_; + bool generate_blue_noise_; + bool use_opencl_; + bool overwrite_file_; + unsigned int blue_noise_size_; + unsigned int threads_; + std::string output_filename_; }; #endif diff --git a/src/blue_noise.cl b/src/blue_noise.cl index e2c8440..1a07bed 100644 --- a/src/blue_noise.cl +++ b/src/blue_noise.cl @@ -1,44 +1,45 @@ int twoToOne(int x, int y, int width, int height) { - while(x < 0) { - x += width; - } - while(y < 0) { - y += height; - } - x = x % width; - y = y % height; - return x + y * width; + while (x < 0) { + x += width; + } + while (y < 0) { + y += height; + } + x = x % width; + y = y % height; + return x + y * width; } -//float gaussian(float x, float y) { -// return exp(-(x*x + y*y) / (1.5F * 1.5F * 2.0F)); -//} +// float gaussian(float x, float y) { +// return exp(-(x*x + y*y) / (1.5F * 1.5F * 2.0F)); +// } -__kernel void do_filter( - __global float *filter_out, __global const float *precomputed, - __global const int *pbp, const int width, const int height, - const int filter_size) { - int i = get_global_id(0); - if(i < 0 || i >= width * height) { - return; +__kernel void do_filter(__global float *filter_out, + __global const float *precomputed, + __global const int *pbp, const int width, + const int height, const int filter_size) { + int i = get_global_id(0); + if (i < 0 || i >= width * height) { + return; + } + + int x = i % width; + int y = i / width; + + float sum = 0.0F; + for (int q = 0; q < filter_size; ++q) { + int q_prime = height - filter_size / 2 + y + q; + for (int p = 0; p < filter_size; ++p) { + int p_prime = width - filter_size / 2 + x + p; + if (pbp[twoToOne(p_prime, q_prime, width, height)] != 0) { + sum += precomputed[twoToOne(p, q, filter_size, filter_size)]; + // sum += gaussian(p - filter_size / 2.0F + 0.5F, q - + // filter_size / 2.0F + 0.5F); + } } + } - int x = i % width; - int y = i / width; - - float sum = 0.0F; - for(int q = 0; q < filter_size; ++q) { - int q_prime = height - filter_size / 2 + y + q; - for(int p = 0; p < filter_size; ++p) { - int p_prime = width - filter_size / 2 + x + p; - if(pbp[twoToOne(p_prime, q_prime, width, height)] != 0) { - sum += precomputed[twoToOne(p, q, filter_size, filter_size)]; - //sum += gaussian(p - filter_size / 2.0F + 0.5F, q - filter_size / 2.0F + 0.5F); - } - } - } - - filter_out[i] = sum; + filter_out[i] = sum; } // vim: syntax=c diff --git a/src/blue_noise.cpp b/src/blue_noise.cpp index f372fd5..c0b62da 100644 --- a/src/blue_noise.cpp +++ b/src/blue_noise.cpp @@ -17,126 +17,510 @@ image::Bl dither::blue_noise(int width, int height, int threads, bool use_opencl) { - bool using_opencl = false; + bool using_opencl = false; #if DITHERING_OPENCL_ENABLED == 1 - if (use_opencl) { - // try to use OpenCL - do { - cl_device_id device; - cl_context context; - cl_program program; - cl_int err; + if (use_opencl) { + // try to use OpenCL + do { + cl_device_id device; + cl_context context; + cl_program program; + cl_int err; - cl_platform_id platform; + cl_platform_id platform; - int filter_size = (width + height) / 2; + int filter_size = (width + height) / 2; - err = clGetPlatformIDs(1, &platform, nullptr); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to identify a platform\n"; - break; - } + err = clGetPlatformIDs(1, &platform, nullptr); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to identify a platform\n"; + break; + } - err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, - nullptr); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to get a device\n"; - break; - } + err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to get a device\n"; + break; + } - context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, - &err); + context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); - { - char buf[1024]; - std::ifstream program_file("src/blue_noise.cl"); - if (!program_file.good()) { - std::cerr << "ERROR: Failed to read \"src/blue_noise.cl\" " - "(not found?)\n"; - break; - } - std::string program_string; - while (program_file.good()) { - program_file.read(buf, 1024); - if (int read_count = program_file.gcount(); - read_count > 0) { - program_string.append(buf, read_count); - } - } + { + char buf[1024]; + std::ifstream program_file("src/blue_noise.cl"); + if (!program_file.good()) { + std::cerr << "ERROR: Failed to read \"src/blue_noise.cl\" " + "(not found?)\n"; + break; + } + std::string program_string; + while (program_file.good()) { + program_file.read(buf, 1024); + if (int read_count = program_file.gcount(); read_count > 0) { + program_string.append(buf, read_count); + } + } - const char *string_ptr = program_string.c_str(); - std::size_t program_size = program_string.size(); - program = clCreateProgramWithSource(context, 1, - (const char **)&string_ptr, - &program_size, &err); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to create the program\n"; - clReleaseContext(context); - break; - } + const char *string_ptr = program_string.c_str(); + std::size_t program_size = program_string.size(); + program = clCreateProgramWithSource( + context, 1, (const char **)&string_ptr, &program_size, &err); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to create the program\n"; + clReleaseContext(context); + break; + } - err = clBuildProgram(program, 1, &device, nullptr, nullptr, - nullptr); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to build the program\n"; + err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to build the program\n"; - std::size_t log_size; - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, - 0, nullptr, &log_size); - std::unique_ptr log = - std::make_unique(log_size + 1); - log[log_size] = 0; - clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, - log_size, log.get(), nullptr); - std::cerr << log.get() << std::endl; + std::size_t log_size; + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, + nullptr, &log_size); + std::unique_ptr log = std::make_unique(log_size + 1); + log[log_size] = 0; + clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, + log.get(), nullptr); + std::cerr << log.get() << std::endl; - clReleaseProgram(program); - clReleaseContext(context); - break; - } - } + clReleaseProgram(program); + clReleaseContext(context); + break; + } + } - std::cout << "OpenCL: Initialized, trying cl_impl..." << std::endl; - std::vector result = internal::blue_noise_cl_impl( - width, height, filter_size, context, device, program); + std::cout << "OpenCL: Initialized, trying cl_impl..." << std::endl; + std::vector result = internal::blue_noise_cl_impl( + width, height, filter_size, context, device, program); - clReleaseProgram(program); - clReleaseContext(context); + clReleaseProgram(program); + clReleaseContext(context); - if (!result.empty()) { - return internal::rangeToBl(result, width); - } - std::cout << "ERROR: Empty result\n"; - } while (false); - } + if (!result.empty()) { + return internal::rangeToBl(result, width); + } + std::cout << "ERROR: Empty result\n"; + } while (false); + } #else - std::clog << "WARNING: Not compiled with OpenCL support!\n"; + std::clog << "WARNING: Not compiled with OpenCL support!\n"; #endif - if (!using_opencl) { - std::cout << "OpenCL: Failed to setup/use or is not enabled, using " - "regular impl..." - << std::endl; - return internal::rangeToBl( - internal::blue_noise_impl(width, height, threads), width); - } + if (!using_opencl) { + std::cout << "OpenCL: Failed to setup/use or is not enabled, using " + "regular impl..." + << std::endl; + return internal::rangeToBl( + internal::blue_noise_impl(width, height, threads), width); + } - std::cout << "ERROR: Invalid state (end of blue_noise fn)\n"; - return {}; + std::cout << "ERROR: Invalid state (end of blue_noise fn)\n"; + return {}; } std::vector dither::internal::blue_noise_impl(int width, int height, int threads) { - int count = width * height; - std::vector filter_out; - filter_out.resize(count); + int count = width * height; + std::vector filter_out; + filter_out.resize(count); - int pixel_count = count * 4 / 10; - std::vector pbp = random_noise(count, count * 4 / 10); - pbp.resize(count); + int pixel_count = count * 4 / 10; + std::vector pbp = random_noise(count, count * 4 / 10); + pbp.resize(count); +#ifndef NDEBUG + printf("Inserting %d pixels into image of max count %d\n", pixel_count, + count); + // generate image from randomized pbp + FILE *random_noise_image = fopen("random_noise.pbm", "w"); + fprintf(random_noise_image, "P1\n%d %d\n", width, height); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + fprintf(random_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', random_noise_image); + } + fclose(random_noise_image); +#endif + + // #ifndef NDEBUG + int iterations = 0; + // #endif + + int filter_size = (width + height) / 2; + + std::unique_ptr> precomputed = + std::make_unique>( + internal::precompute_gaussian(filter_size)); + + internal::compute_filter(pbp, width, height, count, filter_size, filter_out, + precomputed.get(), threads); +#ifndef NDEBUG + internal::write_filter(filter_out, width, "filter_out_start.pgm"); +#endif + std::cout << "Begin BinaryArray generation loop\n"; + while (true) { +#ifndef NDEBUG + // if(++iterations % 10 == 0) { + printf("Iteration %d\n", ++iterations); +// } +#endif + // get filter values + internal::compute_filter(pbp, width, height, count, filter_size, filter_out, + precomputed.get(), threads); + + // #ifndef NDEBUG + // for(int i = 0; i < count; ++i) { + // int x, y; + // std::tie(x, y) = internal::oneToTwo(i, width); + // printf("%d (%d, %d): %f\n", i, x, y, filter_out[i]); + // } + // #endif + + int min, max; + std::tie(min, max) = internal::filter_minmax(filter_out, pbp); + + // remove 1 + pbp[max] = false; + + // get filter values again + internal::compute_filter(pbp, width, height, count, filter_size, filter_out, + precomputed.get(), threads); + + // get second buffer's min + int second_min; + std::tie(second_min, std::ignore) = + internal::filter_minmax(filter_out, pbp); + + if (second_min == max) { + pbp[max] = true; + break; + } else { + pbp[second_min] = true; + } + + if (iterations % 100 == 0) { + // generate blue_noise image from pbp +#ifndef NDEBUG + FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); + fprintf(blue_noise_image, "P1\n%d %d\n", width, height); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + fprintf(blue_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', blue_noise_image); + } + fclose(blue_noise_image); +#endif + } + } + internal::compute_filter(pbp, width, height, count, filter_size, filter_out, + precomputed.get(), threads); +#ifndef NDEBUG + internal::write_filter(filter_out, width, "filter_out_final.pgm"); +#endif + +#ifndef NDEBUG + // generate blue_noise image from pbp + FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); + fprintf(blue_noise_image, "P1\n%d %d\n", width, height); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + fprintf(blue_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', blue_noise_image); + } + fclose(blue_noise_image); +#endif + + std::cout << "Generating dither_array...\n"; + std::vector dither_array(count); + int min, max; + { + std::vector pbp_copy(pbp); + std::cout << "Ranking minority pixels...\n"; + for (unsigned int i = pixel_count; i-- > 0;) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + internal::compute_filter(pbp, width, height, count, filter_size, + filter_out, precomputed.get(), threads); + std::tie(std::ignore, max) = internal::filter_minmax(filter_out, pbp); + pbp[max] = false; + dither_array[max] = i; + } + pbp = pbp_copy; + } + std::cout << "\nRanking remainder of first half of pixels...\n"; + for (unsigned int i = pixel_count; i < (unsigned int)((count + 1) / 2); ++i) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + internal::compute_filter(pbp, width, height, count, filter_size, filter_out, + precomputed.get(), threads); + std::tie(min, std::ignore) = internal::filter_minmax(filter_out, pbp); + pbp[min] = true; + dither_array[min] = i; + } + std::cout << "\nRanking last half of pixels...\n"; + std::vector reversed_pbp(pbp); + for (unsigned int i = (count + 1) / 2; i < (unsigned int)count; ++i) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + for (unsigned int i = 0; i < pbp.size(); ++i) { + reversed_pbp[i] = !pbp[i]; + } + internal::compute_filter(reversed_pbp, width, height, count, filter_size, + filter_out, precomputed.get(), threads); + std::tie(std::ignore, max) = internal::filter_minmax(filter_out, pbp); + pbp[max] = true; + dither_array[max] = i; + } + + return dither_array; +} + +#if DITHERING_OPENCL_ENABLED == 1 +std::vector dither::internal::blue_noise_cl_impl( + const int width, const int height, const int filter_size, + cl_context context, cl_device_id device, cl_program program) { + cl_int err; + cl_kernel kernel; + cl_command_queue queue; + cl_mem d_filter_out, d_precomputed, d_pbp; + std::size_t global_size, local_size; + + std::vector precomputed = precompute_gaussian(filter_size); + + int count = width * height; + int pixel_count = count * 4 / 10; + std::vector pbp = random_noise(count, pixel_count); + std::vector pbp_i(pbp.size()); + + queue = clCreateCommandQueueWithProperties(context, device, nullptr, &err); + + d_filter_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + count * sizeof(float), nullptr, nullptr); + d_precomputed = + clCreateBuffer(context, CL_MEM_READ_ONLY, + precomputed.size() * sizeof(float), nullptr, nullptr); + d_pbp = clCreateBuffer(context, CL_MEM_READ_ONLY, count * sizeof(int), + nullptr, nullptr); + + err = clEnqueueWriteBuffer(queue, d_precomputed, CL_TRUE, 0, + precomputed.size() * sizeof(float), + &precomputed[0], 0, nullptr, nullptr); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to write to d_precomputed buffer\n"; + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + + kernel = clCreateKernel(program, "do_filter", &err); + if (err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to create kernel: "; + switch (err) { + case CL_INVALID_PROGRAM: + std::cerr << "invalid program\n"; + break; + case CL_INVALID_PROGRAM_EXECUTABLE: + std::cerr << "invalid program executable\n"; + break; + case CL_INVALID_KERNEL_NAME: + std::cerr << "invalid kernel name\n"; + break; + case CL_INVALID_KERNEL_DEFINITION: + std::cerr << "invalid kernel definition\n"; + break; + case CL_INVALID_VALUE: + std::cerr << "invalid value\n"; + break; + case CL_OUT_OF_RESOURCES: + std::cerr << "out of resources\n"; + break; + case CL_OUT_OF_HOST_MEMORY: + std::cerr << "out of host memory\n"; + break; + default: + std::cerr << "unknown error\n"; + break; + } + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + + if (clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_filter_out) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 0\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + if (clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_precomputed) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 1\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + if (clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_pbp) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 2\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + if (clSetKernelArg(kernel, 3, sizeof(int), &width) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 3\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + if (clSetKernelArg(kernel, 4, sizeof(int), &height) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 4\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + if (filter_size % 2 == 0) { + int filter_size_odd = filter_size + 1; + if (clSetKernelArg(kernel, 5, sizeof(int), &filter_size_odd) != + CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 4\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + } else { + if (clSetKernelArg(kernel, 5, sizeof(int), &filter_size) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to set kernel arg 4\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + } + + if (clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(std::size_t), &local_size, + nullptr) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to get work group size\n"; + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return {}; + } + global_size = (std::size_t)std::ceil(count / (float)local_size) * local_size; + + std::cout << "OpenCL: global = " << global_size << ", local = " << local_size + << std::endl; + + std::vector filter(count); + + bool reversed_pbp = false; + + const auto get_filter = [&queue, &kernel, &global_size, &local_size, + &d_filter_out, &d_pbp, &pbp, &pbp_i, &count, &filter, + &err, &reversed_pbp]() -> bool { + for (unsigned int i = 0; i < pbp.size(); ++i) { + if (reversed_pbp) { + pbp_i[i] = pbp[i] ? 0 : 1; + } else { + pbp_i[i] = pbp[i] ? 1 : 0; + } + } + if (clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), + &pbp_i[0], 0, nullptr, nullptr) != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to write to d_pbp buffer\n"; + return false; + } + + if (err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &global_size, + &local_size, 0, nullptr, nullptr); + err != CL_SUCCESS) { + std::cerr << "OpenCL: Failed to enqueue task: "; + switch (err) { + case CL_INVALID_PROGRAM_EXECUTABLE: + std::cerr << "invalid program executable\n"; + break; + case CL_INVALID_COMMAND_QUEUE: + std::cerr << "invalid command queue\n"; + break; + case CL_INVALID_KERNEL: + std::cerr << "invalid kernel\n"; + break; + case CL_INVALID_CONTEXT: + std::cerr << "invalid context\n"; + break; + case CL_INVALID_KERNEL_ARGS: + std::cerr << "invalid kernel args\n"; + break; + case CL_INVALID_WORK_DIMENSION: + std::cerr << "invalid work dimension\n"; + break; + case CL_INVALID_GLOBAL_WORK_SIZE: + std::cerr << "invalid global work size\n"; + break; + case CL_INVALID_GLOBAL_OFFSET: + std::cerr << "invalid global offset\n"; + break; + case CL_INVALID_WORK_GROUP_SIZE: + std::cerr << "invalid work group size\n"; + break; + case CL_INVALID_WORK_ITEM_SIZE: + std::cerr << "invalid work item size\n"; + break; + case CL_MISALIGNED_SUB_BUFFER_OFFSET: + std::cerr << "misaligned sub buffer offset\n"; + break; + default: + std::cerr << "Unknown\n"; + break; + } + return false; + } + + clFinish(queue); + + clEnqueueReadBuffer(queue, d_filter_out, CL_TRUE, 0, count * sizeof(float), + &filter[0], 0, nullptr, nullptr); + + return true; + }; + + { #ifndef NDEBUG printf("Inserting %d pixels into image of max count %d\n", pixel_count, count); @@ -144,608 +528,200 @@ std::vector dither::internal::blue_noise_impl(int width, FILE *random_noise_image = fopen("random_noise.pbm", "w"); fprintf(random_noise_image, "P1\n%d %d\n", width, height); for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(random_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); - } - fputc('\n', random_noise_image); + for (int x = 0; x < width; ++x) { + fprintf(random_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', random_noise_image); } fclose(random_noise_image); #endif + } - // #ifndef NDEBUG - int iterations = 0; - // #endif - - int filter_size = (width + height) / 2; - - std::unique_ptr> precomputed = - std::make_unique>( - internal::precompute_gaussian(filter_size)); - - internal::compute_filter(pbp, width, height, count, filter_size, filter_out, - precomputed.get(), threads); -#ifndef NDEBUG - internal::write_filter(filter_out, width, "filter_out_start.pgm"); -#endif - std::cout << "Begin BinaryArray generation loop\n"; - while (true) { -#ifndef NDEBUG - // if(++iterations % 10 == 0) { - printf("Iteration %d\n", ++iterations); -// } -#endif - // get filter values - internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, precomputed.get(), threads); - - // #ifndef NDEBUG - // for(int i = 0; i < count; ++i) { - // int x, y; - // std::tie(x, y) = internal::oneToTwo(i, width); - // printf("%d (%d, %d): %f\n", i, x, y, filter_out[i]); - // } - // #endif - - int min, max; - std::tie(min, max) = internal::filter_minmax(filter_out, pbp); - - // remove 1 - pbp[max] = false; - - // get filter values again - internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, precomputed.get(), threads); - - // get second buffer's min - int second_min; - std::tie(second_min, std::ignore) = - internal::filter_minmax(filter_out, pbp); - - if (second_min == max) { - pbp[max] = true; - break; - } else { - pbp[second_min] = true; - } - - if (iterations % 100 == 0) { - // generate blue_noise image from pbp -#ifndef NDEBUG - FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); - fprintf(blue_noise_image, "P1\n%d %d\n", width, height); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(blue_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 - : 0); - } - fputc('\n', blue_noise_image); - } - fclose(blue_noise_image); -#endif - } - } - internal::compute_filter(pbp, width, height, count, filter_size, filter_out, - precomputed.get(), threads); -#ifndef NDEBUG - internal::write_filter(filter_out, width, "filter_out_final.pgm"); -#endif - -#ifndef NDEBUG - // generate blue_noise image from pbp - FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); - fprintf(blue_noise_image, "P1\n%d %d\n", width, height); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(blue_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); - } - fputc('\n', blue_noise_image); - } - fclose(blue_noise_image); -#endif - - std::cout << "Generating dither_array...\n"; - std::vector dither_array(count); - int min, max; - { - std::vector pbp_copy(pbp); - std::cout << "Ranking minority pixels...\n"; - for (unsigned int i = pixel_count; i-- > 0;) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, precomputed.get(), threads); - std::tie(std::ignore, max) = - internal::filter_minmax(filter_out, pbp); - pbp[max] = false; - dither_array[max] = i; - } - pbp = pbp_copy; - } - std::cout << "\nRanking remainder of first half of pixels...\n"; - for (unsigned int i = pixel_count; i < (unsigned int)((count + 1) / 2); - ++i) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, precomputed.get(), threads); - std::tie(min, std::ignore) = internal::filter_minmax(filter_out, pbp); - pbp[min] = true; - dither_array[min] = i; - } - std::cout << "\nRanking last half of pixels...\n"; - std::vector reversed_pbp(pbp); - for (unsigned int i = (count + 1) / 2; i < (unsigned int)count; ++i) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - for (unsigned int i = 0; i < pbp.size(); ++i) { - reversed_pbp[i] = !pbp[i]; - } - internal::compute_filter(reversed_pbp, width, height, count, - filter_size, filter_out, precomputed.get(), - threads); - std::tie(std::ignore, max) = internal::filter_minmax(filter_out, pbp); - pbp[max] = true; - dither_array[max] = i; - } - - return dither_array; -} - -#if DITHERING_OPENCL_ENABLED == 1 -std::vector dither::internal::blue_noise_cl_impl( - const int width, const int height, const int filter_size, - cl_context context, cl_device_id device, cl_program program) { - cl_int err; - cl_kernel kernel; - cl_command_queue queue; - cl_mem d_filter_out, d_precomputed, d_pbp; - std::size_t global_size, local_size; - - std::vector precomputed = precompute_gaussian(filter_size); - - int count = width * height; - int pixel_count = count * 4 / 10; - std::vector pbp = random_noise(count, pixel_count); - std::vector pbp_i(pbp.size()); - - queue = clCreateCommandQueueWithProperties(context, device, nullptr, &err); - - d_filter_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - count * sizeof(float), nullptr, nullptr); - d_precomputed = clCreateBuffer(context, CL_MEM_READ_ONLY, - precomputed.size() * sizeof(float), nullptr, - nullptr); - d_pbp = clCreateBuffer(context, CL_MEM_READ_ONLY, count * sizeof(int), - nullptr, nullptr); - - err = clEnqueueWriteBuffer(queue, d_precomputed, CL_TRUE, 0, - precomputed.size() * sizeof(float), - &precomputed[0], 0, nullptr, nullptr); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to write to d_precomputed buffer\n"; - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - - kernel = clCreateKernel(program, "do_filter", &err); - if (err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to create kernel: "; - switch (err) { - case CL_INVALID_PROGRAM: - std::cerr << "invalid program\n"; - break; - case CL_INVALID_PROGRAM_EXECUTABLE: - std::cerr << "invalid program executable\n"; - break; - case CL_INVALID_KERNEL_NAME: - std::cerr << "invalid kernel name\n"; - break; - case CL_INVALID_KERNEL_DEFINITION: - std::cerr << "invalid kernel definition\n"; - break; - case CL_INVALID_VALUE: - std::cerr << "invalid value\n"; - break; - case CL_OUT_OF_RESOURCES: - std::cerr << "out of resources\n"; - break; - case CL_OUT_OF_HOST_MEMORY: - std::cerr << "out of host memory\n"; - break; - default: - std::cerr << "unknown error\n"; - break; - } - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - - if (clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_filter_out) != - CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 0\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - if (clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_precomputed) != - CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 1\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - if (clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_pbp) != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 2\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - if (clSetKernelArg(kernel, 3, sizeof(int), &width) != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 3\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - if (clSetKernelArg(kernel, 4, sizeof(int), &height) != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 4\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - if (filter_size % 2 == 0) { - int filter_size_odd = filter_size + 1; - if (clSetKernelArg(kernel, 5, sizeof(int), &filter_size_odd) != - CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 4\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - } else { - if (clSetKernelArg(kernel, 5, sizeof(int), &filter_size) != - CL_SUCCESS) { - std::cerr << "OpenCL: Failed to set kernel arg 4\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - } - - if (clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(std::size_t), &local_size, - nullptr) != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to get work group size\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } - global_size = - (std::size_t)std::ceil(count / (float)local_size) * local_size; - - std::cout << "OpenCL: global = " << global_size - << ", local = " << local_size << std::endl; - - std::vector filter(count); - - bool reversed_pbp = false; - - const auto get_filter = [&queue, &kernel, &global_size, &local_size, - &d_filter_out, &d_pbp, &pbp, &pbp_i, &count, - &filter, &err, &reversed_pbp]() -> bool { - for (unsigned int i = 0; i < pbp.size(); ++i) { - if (reversed_pbp) { - pbp_i[i] = pbp[i] ? 0 : 1; - } else { - pbp_i[i] = pbp[i] ? 1 : 0; - } - } - if (clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), - &pbp_i[0], 0, nullptr, - nullptr) != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to write to d_pbp buffer\n"; - return false; - } - - if (err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, - &global_size, &local_size, 0, nullptr, - nullptr); - err != CL_SUCCESS) { - std::cerr << "OpenCL: Failed to enqueue task: "; - switch (err) { - case CL_INVALID_PROGRAM_EXECUTABLE: - std::cerr << "invalid program executable\n"; - break; - case CL_INVALID_COMMAND_QUEUE: - std::cerr << "invalid command queue\n"; - break; - case CL_INVALID_KERNEL: - std::cerr << "invalid kernel\n"; - break; - case CL_INVALID_CONTEXT: - std::cerr << "invalid context\n"; - break; - case CL_INVALID_KERNEL_ARGS: - std::cerr << "invalid kernel args\n"; - break; - case CL_INVALID_WORK_DIMENSION: - std::cerr << "invalid work dimension\n"; - break; - case CL_INVALID_GLOBAL_WORK_SIZE: - std::cerr << "invalid global work size\n"; - break; - case CL_INVALID_GLOBAL_OFFSET: - std::cerr << "invalid global offset\n"; - break; - case CL_INVALID_WORK_GROUP_SIZE: - std::cerr << "invalid work group size\n"; - break; - case CL_INVALID_WORK_ITEM_SIZE: - std::cerr << "invalid work item size\n"; - break; - case CL_MISALIGNED_SUB_BUFFER_OFFSET: - std::cerr << "misaligned sub buffer offset\n"; - break; - default: - std::cerr << "Unknown\n"; - break; - } - return false; - } - - clFinish(queue); - - clEnqueueReadBuffer(queue, d_filter_out, CL_TRUE, 0, - count * sizeof(float), &filter[0], 0, nullptr, - nullptr); - - return true; - }; - - { -#ifndef NDEBUG - printf("Inserting %d pixels into image of max count %d\n", pixel_count, - count); - // generate image from randomized pbp - FILE *random_noise_image = fopen("random_noise.pbm", "w"); - fprintf(random_noise_image, "P1\n%d %d\n", width, height); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(random_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); - } - fputc('\n', random_noise_image); - } - fclose(random_noise_image); -#endif - } - - if (!get_filter()) { - std::cerr << "OpenCL: Failed to execute do_filter (at start)\n"; - clReleaseKernel(kernel); - clReleaseMemObject(d_pbp); - clReleaseMemObject(d_precomputed); - clReleaseMemObject(d_filter_out); - clReleaseCommandQueue(queue); - return {}; - } else { -#ifndef NDEBUG - internal::write_filter(filter, width, "filter_out_start.pgm"); -#endif - } - - int iterations = 0; - - std::cout << "Begin BinaryArray generation loop\n"; - while (true) { -#ifndef NDEBUG - printf("Iteration %d\n", ++iterations); -#endif - - if (!get_filter()) { - std::cerr << "OpenCL: Failed to execute do_filter\n"; - break; - } - - int min, max; - std::tie(min, max) = internal::filter_minmax(filter, pbp); - - pbp[max] = false; - - if (!get_filter()) { - std::cerr << "OpenCL: Failed to execute do_filter\n"; - break; - } - - // get second buffer's min - int second_min; - std::tie(second_min, std::ignore) = - internal::filter_minmax(filter, pbp); - - if (second_min == max) { - pbp[max] = true; - break; - } else { - pbp[second_min] = true; - } - - if (iterations % 100 == 0) { -#ifndef NDEBUG - std::cout << "max was " << max << ", second_min is " << second_min - << std::endl; - // generate blue_noise image from pbp - FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); - fprintf(blue_noise_image, "P1\n%d %d\n", width, height); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(blue_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 - : 0); - } - fputc('\n', blue_noise_image); - } - fclose(blue_noise_image); -#endif - } - } - - if (!get_filter()) { - std::cerr << "OpenCL: Failed to execute do_filter (at end)\n"; - } else { -#ifndef NDEBUG - internal::write_filter(filter, width, "filter_out_final.pgm"); - FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); - fprintf(blue_noise_image, "P1\n%d %d\n", width, height); - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - fprintf(blue_noise_image, "%d ", - pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); - } - fputc('\n', blue_noise_image); - } - fclose(blue_noise_image); -#endif - } - -#ifndef NDEBUG - { - image::Bl pbp_image = toBl(pbp, width); - pbp_image.writeToFile(image::file_type::PNG, true, - "debug_pbp_before.png"); - } -#endif - - std::cout << "Generating dither_array...\n"; -#ifndef NDEBUG - std::unordered_set set; -#endif - std::vector dither_array(count, 0); - int min, max; - { - std::vector pbp_copy(pbp); - std::cout << "Ranking minority pixels...\n"; - for (unsigned int i = pixel_count; i-- > 0;) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - get_filter(); - std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp); - pbp.at(max) = false; - dither_array.at(max) = i; -#ifndef NDEBUG - if (set.find(max) != set.end()) { - std::cout << "\nWARNING: Reusing index " << max << '\n'; - } else { - set.insert(max); - } -#endif - } - pbp = pbp_copy; -#ifndef NDEBUG - image::Bl min_pixels = internal::rangeToBl(dither_array, width); - min_pixels.writeToFile(image::file_type::PNG, true, - "da_min_pixels.png"); -#endif - } - std::cout << "\nRanking remainder of first half of pixels...\n"; - for (unsigned int i = pixel_count; i < (unsigned int)((count + 1) / 2); - ++i) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - get_filter(); - std::tie(min, std::ignore) = internal::filter_minmax(filter, pbp); - pbp.at(min) = true; - dither_array.at(min) = i; -#ifndef NDEBUG - if (set.find(min) != set.end()) { - std::cout << "\nWARNING: Reusing index " << min << '\n'; - } else { - set.insert(min); - } -#endif - } -#ifndef NDEBUG - { - image::Bl min_pixels = internal::rangeToBl(dither_array, width); - min_pixels.writeToFile(image::file_type::PNG, true, - "da_mid_pixels.png"); - get_filter(); - internal::write_filter(filter, width, "filter_mid.pgm"); - image::Bl pbp_image = toBl(pbp, width); - pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_mid.png"); - } -#endif - std::cout << "\nRanking last half of pixels...\n"; - reversed_pbp = true; - for (unsigned int i = (count + 1) / 2; i < (unsigned int)count; ++i) { -#ifndef NDEBUG - std::cout << i << ' '; -#endif - get_filter(); - std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp); - pbp.at(max) = true; - dither_array.at(max) = i; -#ifndef NDEBUG - if (set.find(max) != set.end()) { - std::cout << "\nWARNING: Reusing index " << max << '\n'; - } else { - set.insert(max); - } -#endif - } - std::cout << std::endl; - -#ifndef NDEBUG - { - get_filter(); - internal::write_filter(filter, width, "filter_after.pgm"); - image::Bl pbp_image = toBl(pbp, width); - pbp_image.writeToFile(image::file_type::PNG, true, - "debug_pbp_after.png"); - } -#endif - + if (!get_filter()) { + std::cerr << "OpenCL: Failed to execute do_filter (at start)\n"; clReleaseKernel(kernel); clReleaseMemObject(d_pbp); clReleaseMemObject(d_precomputed); clReleaseMemObject(d_filter_out); clReleaseCommandQueue(queue); - return dither_array; + return {}; + } else { +#ifndef NDEBUG + internal::write_filter(filter, width, "filter_out_start.pgm"); +#endif + } + + int iterations = 0; + + std::cout << "Begin BinaryArray generation loop\n"; + while (true) { +#ifndef NDEBUG + printf("Iteration %d\n", ++iterations); +#endif + + if (!get_filter()) { + std::cerr << "OpenCL: Failed to execute do_filter\n"; + break; + } + + int min, max; + std::tie(min, max) = internal::filter_minmax(filter, pbp); + + pbp[max] = false; + + if (!get_filter()) { + std::cerr << "OpenCL: Failed to execute do_filter\n"; + break; + } + + // get second buffer's min + int second_min; + std::tie(second_min, std::ignore) = internal::filter_minmax(filter, pbp); + + if (second_min == max) { + pbp[max] = true; + break; + } else { + pbp[second_min] = true; + } + + if (iterations % 100 == 0) { +#ifndef NDEBUG + std::cout << "max was " << max << ", second_min is " << second_min + << std::endl; + // generate blue_noise image from pbp + FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); + fprintf(blue_noise_image, "P1\n%d %d\n", width, height); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + fprintf(blue_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', blue_noise_image); + } + fclose(blue_noise_image); +#endif + } + } + + if (!get_filter()) { + std::cerr << "OpenCL: Failed to execute do_filter (at end)\n"; + } else { +#ifndef NDEBUG + internal::write_filter(filter, width, "filter_out_final.pgm"); + FILE *blue_noise_image = fopen("blue_noise.pbm", "w"); + fprintf(blue_noise_image, "P1\n%d %d\n", width, height); + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + fprintf(blue_noise_image, "%d ", + pbp[utility::twoToOne(x, y, width, height)] ? 1 : 0); + } + fputc('\n', blue_noise_image); + } + fclose(blue_noise_image); +#endif + } + +#ifndef NDEBUG + { + image::Bl pbp_image = toBl(pbp, width); + pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_before.png"); + } +#endif + + std::cout << "Generating dither_array...\n"; +#ifndef NDEBUG + std::unordered_set set; +#endif + std::vector dither_array(count, 0); + int min, max; + { + std::vector pbp_copy(pbp); + std::cout << "Ranking minority pixels...\n"; + for (unsigned int i = pixel_count; i-- > 0;) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + get_filter(); + std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp); + pbp.at(max) = false; + dither_array.at(max) = i; +#ifndef NDEBUG + if (set.find(max) != set.end()) { + std::cout << "\nWARNING: Reusing index " << max << '\n'; + } else { + set.insert(max); + } +#endif + } + pbp = pbp_copy; +#ifndef NDEBUG + image::Bl min_pixels = internal::rangeToBl(dither_array, width); + min_pixels.writeToFile(image::file_type::PNG, true, "da_min_pixels.png"); +#endif + } + std::cout << "\nRanking remainder of first half of pixels...\n"; + for (unsigned int i = pixel_count; i < (unsigned int)((count + 1) / 2); ++i) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + get_filter(); + std::tie(min, std::ignore) = internal::filter_minmax(filter, pbp); + pbp.at(min) = true; + dither_array.at(min) = i; +#ifndef NDEBUG + if (set.find(min) != set.end()) { + std::cout << "\nWARNING: Reusing index " << min << '\n'; + } else { + set.insert(min); + } +#endif + } +#ifndef NDEBUG + { + image::Bl min_pixels = internal::rangeToBl(dither_array, width); + min_pixels.writeToFile(image::file_type::PNG, true, "da_mid_pixels.png"); + get_filter(); + internal::write_filter(filter, width, "filter_mid.pgm"); + image::Bl pbp_image = toBl(pbp, width); + pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_mid.png"); + } +#endif + std::cout << "\nRanking last half of pixels...\n"; + reversed_pbp = true; + for (unsigned int i = (count + 1) / 2; i < (unsigned int)count; ++i) { +#ifndef NDEBUG + std::cout << i << ' '; +#endif + get_filter(); + std::tie(std::ignore, max) = internal::filter_minmax(filter, pbp); + pbp.at(max) = true; + dither_array.at(max) = i; +#ifndef NDEBUG + if (set.find(max) != set.end()) { + std::cout << "\nWARNING: Reusing index " << max << '\n'; + } else { + set.insert(max); + } +#endif + } + std::cout << std::endl; + +#ifndef NDEBUG + { + get_filter(); + internal::write_filter(filter, width, "filter_after.pgm"); + image::Bl pbp_image = toBl(pbp, width); + pbp_image.writeToFile(image::file_type::PNG, true, "debug_pbp_after.png"); + } +#endif + + clReleaseKernel(kernel); + clReleaseMemObject(d_pbp); + clReleaseMemObject(d_precomputed); + clReleaseMemObject(d_filter_out); + clReleaseCommandQueue(queue); + return dither_array; } #endif diff --git a/src/blue_noise.hpp b/src/blue_noise.hpp index 16527b9..29050fc 100644 --- a/src/blue_noise.hpp +++ b/src/blue_noise.hpp @@ -1,470 +1,469 @@ #ifndef BLUE_NOISE_HPP #define BLUE_NOISE_HPP -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include "utility.hpp" #include "image.hpp" +#include "utility.hpp" namespace dither { -image::Bl blue_noise(int width, int height, int threads = 1, bool use_opencl = true); +image::Bl blue_noise(int width, int height, int threads = 1, + bool use_opencl = true); namespace internal { - std::vector blue_noise_impl(int width, int height, int threads = 1); - std::vector blue_noise_cl_impl( - const int width, const int height, const int filter_size, - cl_context context, cl_device_id device, cl_program program); +std::vector blue_noise_impl(int width, int height, + int threads = 1); +std::vector blue_noise_cl_impl(const int width, const int height, + const int filter_size, + cl_context context, + cl_device_id device, + cl_program program); - inline std::vector random_noise(int size, int subsize) { - std::vector pbp(size); - std::default_random_engine re(std::random_device{}()); - std::uniform_int_distribution dist(0, size - 1); +inline std::vector random_noise(int size, int subsize) { + std::vector pbp(size); + std::default_random_engine re(std::random_device{}()); + std::uniform_int_distribution dist(0, size - 1); - // initialize pbp - for(int i = 0; i < size; ++i) { - if(i < subsize) { - pbp[i] = true; - } else { - pbp[i] = false; - } - } - // randomize pbp - for(int i = 0; i < size-1; ++i) { - decltype(dist)::param_type range{i+1, size-1}; - int ridx = dist(re, range); - // probably can't use std::swap since using std::vector - bool temp = pbp[i]; - pbp[i] = pbp[ridx]; - pbp[ridx] = temp; - } - - return pbp; + // initialize pbp + for (int i = 0; i < size; ++i) { + if (i < subsize) { + pbp[i] = true; + } else { + pbp[i] = false; } + } + // randomize pbp + for (int i = 0; i < size - 1; ++i) { + decltype(dist)::param_type range{i + 1, size - 1}; + int ridx = dist(re, range); + // probably can't use std::swap since using std::vector + bool temp = pbp[i]; + pbp[i] = pbp[ridx]; + pbp[ridx] = temp; + } - constexpr float mu = 1.5F; - constexpr float mu_squared = mu * mu; - constexpr float double_mu_squared = 2.0F * mu * mu; + return pbp; +} - inline float gaussian(float x, float y) { - return std::exp(-(x*x + y*y)/(double_mu_squared)); +constexpr float mu = 1.5F; +constexpr float mu_squared = mu * mu; +constexpr float double_mu_squared = 2.0F * mu * mu; + +inline float gaussian(float x, float y) { + return std::exp(-(x * x + y * y) / (double_mu_squared)); +} + +inline std::vector precompute_gaussian(int size) { + std::vector precomputed; + if (size % 2 == 0) { + ++size; + } + precomputed.reserve(size * size); + + for (int i = 0; i < size * size; ++i) { + auto xy = utility::oneToTwo(i, size); + precomputed.push_back( + gaussian(xy.first - (size / 2), xy.second - (size / 2))); + } + + return precomputed; +} + +inline float filter(const std::vector &pbp, int x, int y, int width, + int height, int filter_size) { + float sum = 0.0f; + + if (filter_size % 2 == 0) { + ++filter_size; + } + + // Should be range -M/2 to M/2, but size_t cannot be negative, so range + // is 0 to M. + // p' = (M + x - (p - M/2)) % M = (3M/2 + x - p) % M + // q' = (N + y - (q - M/2)) % N = (N + M/2 + y - q) % N + for (int q = 0; q < filter_size; ++q) { + int q_prime = (height - filter_size / 2 + y + q) % height; + for (int p = 0; p < filter_size; ++p) { + int p_prime = (width - filter_size / 2 + x + p) % width; + if (pbp[utility::twoToOne(p_prime, q_prime, width, height)]) { + sum += gaussian(p - filter_size / 2, q - filter_size / 2); + } } + } - inline std::vector precompute_gaussian(int size) { - std::vector precomputed; - if (size % 2 == 0) { - ++size; - } - precomputed.reserve(size * size); + return sum; +} - for(int i = 0; i < size * size; ++i) { - auto xy = utility::oneToTwo(i, size); - precomputed.push_back(gaussian( - xy.first - (size / 2), - xy.second - (size / 2))); - } +inline float filter_with_precomputed(const std::vector &pbp, int x, int y, + int width, int height, int filter_size, + const std::vector &precomputed) { + float sum = 0.0f; - return precomputed; + if (filter_size % 2 == 0) { + ++filter_size; + } + + for (int q = 0; q < filter_size; ++q) { + int q_prime = (height - filter_size / 2 + y + q) % height; + for (int p = 0; p < filter_size; ++p) { + int p_prime = (width - filter_size / 2 + x + p) % width; + if (pbp[utility::twoToOne(p_prime, q_prime, width, height)]) { + sum += precomputed[utility::twoToOne(p, q, filter_size, filter_size)]; + } } + } - inline float filter( - const std::vector& pbp, - int x, int y, - int width, int height, int filter_size) { - float sum = 0.0f; + return sum; +} - if (filter_size % 2 == 0) { - ++filter_size; +inline void compute_filter(const std::vector &pbp, int width, int height, + int count, int filter_size, + std::vector &filter_out, + const std::vector *precomputed = nullptr, + int threads = 1) { + if (threads == 1) { + if (precomputed) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + filter_out[utility::twoToOne(x, y, width, height)] = + internal::filter_with_precomputed(pbp, x, y, width, height, + filter_size, *precomputed); } - - // Should be range -M/2 to M/2, but size_t cannot be negative, so range - // is 0 to M. - // p' = (M + x - (p - M/2)) % M = (3M/2 + x - p) % M - // q' = (N + y - (q - M/2)) % N = (N + M/2 + y - q) % N - for(int q = 0; q < filter_size; ++q) { - int q_prime = (height - filter_size / 2 + y + q) % height; - for(int p = 0; p < filter_size; ++p) { - int p_prime = (width - filter_size / 2 + x + p) % width; - if(pbp[utility::twoToOne(p_prime, q_prime, width, height)]) { - sum += gaussian(p - filter_size/2, - q - filter_size/2); - } - } + } + } else { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + filter_out[utility::twoToOne(x, y, width, height)] = + internal::filter(pbp, x, y, width, height, filter_size); } - - return sum; + } } - - inline float filter_with_precomputed( - const std::vector& pbp, - int x, int y, - int width, int height, int filter_size, - const std::vector &precomputed) { - float sum = 0.0f; - - if (filter_size % 2 == 0) { - ++filter_size; - } - - for(int q = 0; q < filter_size; ++q) { - int q_prime = (height - filter_size / 2 + y + q) % height; - for(int p = 0; p < filter_size; ++p) { - int p_prime = (width - filter_size / 2 + x + p) % width; - if(pbp[utility::twoToOne(p_prime, q_prime, width, height)]) { - sum += precomputed[utility::twoToOne(p, q, filter_size, filter_size)]; - } - } - } - - return sum; + } else { + if (threads == 0) { + threads = 10; } - - inline void compute_filter( - const std::vector &pbp, int width, int height, - int count, int filter_size, std::vector &filter_out, - const std::vector *precomputed = nullptr, - int threads = 1) { - if(threads == 1) { - if(precomputed) { - for(int y = 0; y < height; ++y) { - for(int x = 0; x < width; ++x) { - filter_out[utility::twoToOne(x, y, width, height)] = - internal::filter_with_precomputed( - pbp, x, y, width, height, filter_size, *precomputed); - } - } - } else { - for(int y = 0; y < height; ++y) { - for(int x = 0; x < width; ++x) { - filter_out[utility::twoToOne(x, y, width, height)] = - internal::filter(pbp, x, y, width, height, filter_size); - } - } - } - } else { - if(threads == 0) { - threads = 10; - } - int active_count = 0; - std::mutex cv_mutex; - std::condition_variable cv; - if(precomputed) { - for(int i = 0; i < count; ++i) { - { - std::unique_lock lock(cv_mutex); - active_count += 1; - } - std::thread t([] (int *ac, std::mutex *cvm, - std::condition_variable *cv, int i, - const std::vector *pbp, int width, - int height, int filter_size, - std::vector *fout, - const std::vector *precomputed) { - int x, y; - std::tie(x, y) = utility::oneToTwo(i, width); - (*fout)[i] = internal::filter_with_precomputed( - *pbp, x, y, width, height, filter_size, *precomputed); - std::unique_lock lock(*cvm); - *ac -= 1; - cv->notify_all(); - }, - &active_count, &cv_mutex, &cv, i, &pbp, width, height, - filter_size, &filter_out, precomputed); - t.detach(); - - std::unique_lock lock(cv_mutex); - while(active_count >= threads) { - cv.wait_for(lock, std::chrono::seconds(1)); - } - } - } else { - for(int i = 0; i < count; ++i) { - { - std::unique_lock lock(cv_mutex); - active_count += 1; - } - std::thread t([] (int *ac, std::mutex *cvm, - std::condition_variable *cv, int i, - const std::vector *pbp, int width, - int height, int filter_size, - std::vector *fout) { - int x, y; - std::tie(x, y) = utility::oneToTwo(i, width); - (*fout)[i] = internal::filter( - *pbp, x, y, width, height, filter_size); - std::unique_lock lock(*cvm); - *ac -= 1; - cv->notify_all(); - }, - &active_count, &cv_mutex, &cv, i, &pbp, width, height, - filter_size, &filter_out); - t.detach(); - - std::unique_lock lock(cv_mutex); - while(active_count >= threads) { - cv.wait_for(lock, std::chrono::seconds(1)); - } - } - } - std::unique_lock lock(cv_mutex); - while(active_count > 0) { - cv.wait_for(lock, std::chrono::seconds(1)); - } + int active_count = 0; + std::mutex cv_mutex; + std::condition_variable cv; + if (precomputed) { + for (int i = 0; i < count; ++i) { + { + std::unique_lock lock(cv_mutex); + active_count += 1; } + std::thread t( + [](int *ac, std::mutex *cvm, std::condition_variable *cv, int i, + const std::vector *pbp, int width, int height, + int filter_size, std::vector *fout, + const std::vector *precomputed) { + int x, y; + std::tie(x, y) = utility::oneToTwo(i, width); + (*fout)[i] = internal::filter_with_precomputed( + *pbp, x, y, width, height, filter_size, *precomputed); + std::unique_lock lock(*cvm); + *ac -= 1; + cv->notify_all(); + }, + &active_count, &cv_mutex, &cv, i, &pbp, width, height, filter_size, + &filter_out, precomputed); + t.detach(); + std::unique_lock lock(cv_mutex); + while (active_count >= threads) { + cv.wait_for(lock, std::chrono::seconds(1)); + } + } + } else { + for (int i = 0; i < count; ++i) { + { + std::unique_lock lock(cv_mutex); + active_count += 1; + } + std::thread t( + [](int *ac, std::mutex *cvm, std::condition_variable *cv, int i, + const std::vector *pbp, int width, int height, + int filter_size, std::vector *fout) { + int x, y; + std::tie(x, y) = utility::oneToTwo(i, width); + (*fout)[i] = + internal::filter(*pbp, x, y, width, height, filter_size); + std::unique_lock lock(*cvm); + *ac -= 1; + cv->notify_all(); + }, + &active_count, &cv_mutex, &cv, i, &pbp, width, height, filter_size, + &filter_out); + t.detach(); + + std::unique_lock lock(cv_mutex); + while (active_count >= threads) { + cv.wait_for(lock, std::chrono::seconds(1)); + } + } } - - inline std::pair filter_minmax(const std::vector &filter, - std::vector pbp) { - // ensure minority pixel is "true" - unsigned int count = 0; - for (bool value : pbp) { - if(value) { - ++count; - } - } - if (count * 2 >= pbp.size()) { - //std::cout << "MINMAX flip\n"; // DEBUG - for (unsigned int i = 0; i < pbp.size(); ++i) { - pbp[i] = !pbp[i]; - } - } - - float min = std::numeric_limits::infinity(); - float max = -std::numeric_limits::infinity(); - int min_index = -1; - int max_index = -1; - - for(std::vector::size_type i = 0; i < filter.size(); ++i) { - if(!pbp[i] && filter[i] < min) { - min_index = i; - min = filter[i]; - } - if(pbp[i] && filter[i] > max) { - max_index = i; - max = filter[i]; - } - } - - return {min_index, max_index}; + std::unique_lock lock(cv_mutex); + while (active_count > 0) { + cv.wait_for(lock, std::chrono::seconds(1)); } + } +} - inline std::pair filter_abs_minmax( - const std::vector &filter) { - float min = std::numeric_limits::infinity(); - float max = -std::numeric_limits::infinity(); - int min_index = -1; - int max_index = -1; - - std::default_random_engine re(std::random_device{}()); - std::size_t startIdx = std::uniform_int_distribution(0, filter.size() - 1)(re); - - for(std::vector::size_type i = startIdx; i < filter.size(); ++i) { - if(filter[i] < min) { - min_index = i; - min = filter[i]; - } - if(filter[i] > max) { - max_index = i; - max = filter[i]; - } - } - for(std::vector::size_type i = 0; i < startIdx; ++i) { - if(filter[i] < min) { - min_index = i; - min = filter[i]; - } - if(filter[i] > max) { - max_index = i; - max = filter[i]; - } - } - - return {min_index, max_index}; +inline std::pair filter_minmax(const std::vector &filter, + std::vector pbp) { + // ensure minority pixel is "true" + unsigned int count = 0; + for (bool value : pbp) { + if (value) { + ++count; } - - inline int get_one_or_zero( - const std::vector& pbp, bool get_one, - int idx, int width, int height) { - std::queue checking_indices; - - auto xy = utility::oneToTwo(idx, width); - int count = 0; - int loops = 0; - enum { D_DOWN = 0, D_LEFT = 1, D_UP = 2, D_RIGHT = 3 } dir = D_RIGHT; - int next; - - while(true) { - if(count == 0) { - switch(dir) { - case D_RIGHT: - xy.first = (xy.first + 1) % width; - ++loops; - count = loops * 2 - 1; - dir = D_DOWN; - break; - case D_DOWN: - xy.first = (xy.first + width - 1) % width; - count = loops * 2 - 1; - dir = D_LEFT; - break; - case D_LEFT: - xy.second = (xy.second + height - 1) % height; - count = loops * 2 - 1; - dir = D_UP; - break; - case D_UP: - xy.first = (xy.first + 1) % width; - count = loops * 2 - 1; - dir = D_RIGHT; - break; - } - } else { - switch(dir) { - case D_DOWN: - xy.second = (xy.second + 1) % height; - --count; - break; - case D_LEFT: - xy.first = (xy.first + width - 1) % width; - --count; - break; - case D_UP: - xy.second = (xy.second + height - 1) % height; - --count; - break; - case D_RIGHT: - xy.first = (xy.first + 1) % width; - --count; - break; - } - } - next = utility::twoToOne(xy.first, xy.second, width, height); - if((get_one && pbp[next]) || (!get_one && !pbp[next])) { - return next; - } - } - return idx; + } + if (count * 2 >= pbp.size()) { + // std::cout << "MINMAX flip\n"; // DEBUG + for (unsigned int i = 0; i < pbp.size(); ++i) { + pbp[i] = !pbp[i]; } + } - inline void write_filter(const std::vector &filter, int width, const char *filename) { - int min, max; - std::tie(min, max) = filter_abs_minmax(filter); + float min = std::numeric_limits::infinity(); + float max = -std::numeric_limits::infinity(); + int min_index = -1; + int max_index = -1; - printf("Writing to %s, min is %.3f, max is %.3f\n", filename, filter[min], filter[max]); - FILE *filter_image = fopen(filename, "w"); - fprintf(filter_image, "P2\n%d %d\n255\n", width, (int)filter.size() / width); - for(std::vector::size_type i = 0; i < filter.size(); ++i) { - fprintf(filter_image, "%d ", - (int)(((filter[i] - filter[min]) - / (filter[max] - filter[min])) - * 255.0f)); - if((i + 1) % width == 0) { - fputc('\n', filter_image); - } - } - fclose(filter_image); + for (std::vector::size_type i = 0; i < filter.size(); ++i) { + if (!pbp[i] && filter[i] < min) { + min_index = i; + min = filter[i]; } - - inline image::Bl toBl(const std::vector& pbp, int width) { - image::Bl bwImage(width, pbp.size() / width); - assert((unsigned long)bwImage.getSize() >= pbp.size() - && "New image::Bl size too small (pbp's size is not a multiple of width)"); - - for(unsigned int i = 0; i < pbp.size(); ++i) { - bwImage.getData()[i] = pbp[i] ? 255 : 0; - } - - return bwImage; + if (pbp[i] && filter[i] > max) { + max_index = i; + max = filter[i]; } + } - inline image::Bl rangeToBl(const std::vector &values, int width) { - int min = std::numeric_limits::max(); - int max = std::numeric_limits::min(); + return {min_index, max_index}; +} - for (int value : values) { - if (value < min) { - min = value; - } - if (value > max) { - max = value; - } - } +inline std::pair filter_abs_minmax(const std::vector &filter) { + float min = std::numeric_limits::infinity(); + float max = -std::numeric_limits::infinity(); + int min_index = -1; + int max_index = -1; + + std::default_random_engine re(std::random_device{}()); + std::size_t startIdx = + std::uniform_int_distribution(0, filter.size() - 1)(re); + + for (std::vector::size_type i = startIdx; i < filter.size(); ++i) { + if (filter[i] < min) { + min_index = i; + min = filter[i]; + } + if (filter[i] > max) { + max_index = i; + max = filter[i]; + } + } + for (std::vector::size_type i = 0; i < startIdx; ++i) { + if (filter[i] < min) { + min_index = i; + min = filter[i]; + } + if (filter[i] > max) { + max_index = i; + max = filter[i]; + } + } + + return {min_index, max_index}; +} + +inline int get_one_or_zero(const std::vector &pbp, bool get_one, int idx, + int width, int height) { + std::queue checking_indices; + + auto xy = utility::oneToTwo(idx, width); + int count = 0; + int loops = 0; + enum { D_DOWN = 0, D_LEFT = 1, D_UP = 2, D_RIGHT = 3 } dir = D_RIGHT; + int next; + + while (true) { + if (count == 0) { + switch (dir) { + case D_RIGHT: + xy.first = (xy.first + 1) % width; + ++loops; + count = loops * 2 - 1; + dir = D_DOWN; + break; + case D_DOWN: + xy.first = (xy.first + width - 1) % width; + count = loops * 2 - 1; + dir = D_LEFT; + break; + case D_LEFT: + xy.second = (xy.second + height - 1) % height; + count = loops * 2 - 1; + dir = D_UP; + break; + case D_UP: + xy.first = (xy.first + 1) % width; + count = loops * 2 - 1; + dir = D_RIGHT; + break; + } + } else { + switch (dir) { + case D_DOWN: + xy.second = (xy.second + 1) % height; + --count; + break; + case D_LEFT: + xy.first = (xy.first + width - 1) % width; + --count; + break; + case D_UP: + xy.second = (xy.second + height - 1) % height; + --count; + break; + case D_RIGHT: + xy.first = (xy.first + 1) % width; + --count; + break; + } + } + next = utility::twoToOne(xy.first, xy.second, width, height); + if ((get_one && pbp[next]) || (!get_one && !pbp[next])) { + return next; + } + } + return idx; +} + +inline void write_filter(const std::vector &filter, int width, + const char *filename) { + int min, max; + std::tie(min, max) = filter_abs_minmax(filter); + + printf("Writing to %s, min is %.3f, max is %.3f\n", filename, filter[min], + filter[max]); + FILE *filter_image = fopen(filename, "w"); + fprintf(filter_image, "P2\n%d %d\n255\n", width, (int)filter.size() / width); + for (std::vector::size_type i = 0; i < filter.size(); ++i) { + fprintf(filter_image, "%d ", + (int)(((filter[i] - filter[min]) / (filter[max] - filter[min])) * + 255.0f)); + if ((i + 1) % width == 0) { + fputc('\n', filter_image); + } + } + fclose(filter_image); +} + +inline image::Bl toBl(const std::vector &pbp, int width) { + image::Bl bwImage(width, pbp.size() / width); + assert((unsigned long)bwImage.getSize() >= pbp.size() && + "New image::Bl size too small (pbp's size is not a multiple of " + "width)"); + + for (unsigned int i = 0; i < pbp.size(); ++i) { + bwImage.getData()[i] = pbp[i] ? 255 : 0; + } + + return bwImage; +} + +inline image::Bl rangeToBl(const std::vector &values, int width) { + int min = std::numeric_limits::max(); + int max = std::numeric_limits::min(); + + for (int value : values) { + if (value < min) { + min = value; + } + if (value > max) { + max = value; + } + } #ifndef NDEBUG - std::cout << "rangeToBl: Got min == " << min << " and max == " << max << std::endl; + std::cout << "rangeToBl: Got min == " << min << " and max == " << max + << std::endl; #endif - max -= min; + max -= min; - image::Bl grImage(width, values.size() / width); - assert((unsigned long)grImage.getSize() >= values.size() - && "New image::Bl size too small (values' size is not a multiple of width)"); + image::Bl grImage(width, values.size() / width); + assert((unsigned long)grImage.getSize() >= values.size() && + "New image::Bl size too small (values' size is not a multiple of " + "width)"); - for(unsigned int i = 0; i < values.size(); ++i) { - grImage.getData()[i] = std::round(((float)((int)(values[i]) - min) / (float)max) * 255.0F); - } + for (unsigned int i = 0; i < values.size(); ++i) { + grImage.getData()[i] = + std::round(((float)((int)(values[i]) - min) / (float)max) * 255.0F); + } - return grImage; + return grImage; +} + +inline std::pair filter_minmax_in_range( + int start, int width, int height, int range, + const std::vector &vec) { + float max = -std::numeric_limits::infinity(); + float min = std::numeric_limits::infinity(); + + int maxIdx = -1; + int minIdx = -1; + + auto startXY = utility::oneToTwo(start, width); + for (int y = startXY.second - range / 2; y <= startXY.second + range / 2; + ++y) { + for (int x = startXY.first - range / 2; x <= startXY.first + range / 2; + ++x) { + int idx = utility::twoToOne(x, y, width, height); + if (idx == start) { + continue; + } + + if (vec[idx] < min) { + min = vec[idx]; + minIdx = idx; + } + + if (vec[idx] > max) { + max = vec[idx]; + maxIdx = idx; + } } + } - inline std::pair filter_minmax_in_range(int start, int width, - int height, - int range, - const std::vector &vec) { - float max = -std::numeric_limits::infinity(); - float min = std::numeric_limits::infinity(); + if (minIdx < 0) { + throw std::runtime_error("Invalid minIdx value"); + } else if (maxIdx < 0) { + throw std::runtime_error("Invalid maxIdx value"); + } + return {minIdx, maxIdx}; +} +} // namespace internal - int maxIdx = -1; - int minIdx = -1; - - auto startXY = utility::oneToTwo(start, width); - for(int y = startXY.second - range / 2; y <= startXY.second + range / 2; ++y) { - for(int x = startXY.first - range / 2; x <= startXY.first + range / 2; ++x) { - int idx = utility::twoToOne(x, y, width, height); - if(idx == start) { - continue; - } - - if(vec[idx] < min) { - min = vec[idx]; - minIdx = idx; - } - - if(vec[idx] > max) { - max = vec[idx]; - maxIdx = idx; - } - } - } - - if(minIdx < 0) { - throw std::runtime_error("Invalid minIdx value"); - } else if(maxIdx < 0) { - throw std::runtime_error("Invalid maxIdx value"); - } - return {minIdx, maxIdx}; - } -} // namespace dither::internal - -} // namespace dither +} // namespace dither #endif diff --git a/src/image.cpp b/src/image.cpp index 81485a7..4dd2281 100644 --- a/src/image.cpp +++ b/src/image.cpp @@ -1,241 +1,221 @@ #include "image.hpp" -#include -#include -#include - #include +#include +#include +#include + bool image::Base::isValid() const { - return getWidth() > 0 && getHeight() > 0 && getSize() > 0; + return getWidth() > 0 && getHeight() > 0 && getSize() > 0; } -image::Bl::Bl() : -data(), -width(0), -height(0) -{} +image::Bl::Bl() : data(), width(0), height(0) {} -image::Bl::Bl(int width, int height) : -data(width * height), -width(width), -height(height) -{} +image::Bl::Bl(int width, int height) + : data(width * height), width(width), height(height) {} -image::Bl::Bl(const std::vector &data, int width) : -data(data), -width(width), -height(data.size() / width) -{} +image::Bl::Bl(const std::vector &data, int width) + : data(data), width(width), height(data.size() / width) {} -image::Bl::Bl(std::vector &&data, int width) : -data(std::move(data)), -width(width), -height(data.size() / width) -{} +image::Bl::Bl(std::vector &&data, int width) + : data(std::move(data)), width(width), height(data.size() / width) {} -image::Bl::Bl(const std::vector &data, int width) : - data{}, - width(width), - height(data.size() / width) -{ - for(float gspixel : data) { - this->data.push_back(static_cast(255.0F * gspixel)); - } +image::Bl::Bl(const std::vector &data, int width) + : data{}, width(width), height(data.size() / width) { + for (float gspixel : data) { + this->data.push_back(static_cast(255.0F * gspixel)); + } } void image::Bl::randomize() { - if(!isValid()) { - return; - } + if (!isValid()) { + return; + } - std::default_random_engine re(std::random_device{}()); - std::uniform_int_distribution dist; + std::default_random_engine re(std::random_device{}()); + std::uniform_int_distribution dist; - for(unsigned int i = 0; i < data.size(); ++i) { - data[i] = i < data.size() / 2 ? 255 : 0; - } + for (unsigned int i = 0; i < data.size(); ++i) { + data[i] = i < data.size() / 2 ? 255 : 0; + } - for(unsigned int i = 0; i < data.size() - 1; ++i) { - int ridx = dist(re, decltype(dist)::param_type{i+1, (unsigned int)data.size()-1}); - uint8_t temp = data[i]; - data[i] = data[ridx]; - data[ridx] = temp; - } + for (unsigned int i = 0; i < data.size() - 1; ++i) { + int ridx = dist( + re, decltype(dist)::param_type{i + 1, (unsigned int)data.size() - 1}); + uint8_t temp = data[i]; + data[i] = data[ridx]; + data[ridx] = temp; + } } -unsigned int image::Bl::getSize() const { - return data.size(); +unsigned int image::Bl::getSize() const { return data.size(); } + +uint8_t *image::Bl::getData() { + if (!isValid()) { + return nullptr; + } + return &data[0]; } -uint8_t* image::Bl::getData() { - if(!isValid()) { - return nullptr; - } - return &data[0]; +const uint8_t *image::Bl::getDataC() const { + if (!isValid()) { + return nullptr; + } + return &data[0]; } -const uint8_t* image::Bl::getDataC() const { - if(!isValid()) { - return nullptr; - } - return &data[0]; -} +unsigned int image::Bl::getWidth() const { return width; } -unsigned int image::Bl::getWidth() const { - return width; -} - -unsigned int image::Bl::getHeight() const { - return height; -} +unsigned int image::Bl::getHeight() const { return height; } bool image::Bl::canWriteFile(file_type type) { - if(!isValid()) { - std::cout << "Cannot write image because isValid() is false\n"; - return false; - } - switch(type) { + if (!isValid()) { + std::cout << "Cannot write image because isValid() is false\n"; + return false; + } + switch (type) { case file_type::PBM: case file_type::PGM: case file_type::PPM: case file_type::PNG: - return true; + return true; default: - std::cout << "Cannot write image because received invalid file_type\n"; - return false; - } + std::cout << "Cannot write image because received invalid " + "file_type\n"; + return false; + } } -bool image::Bl::writeToFile(file_type type, bool canOverwrite, const char *filename) { - if(!isValid() || !canWriteFile(type)) { - std::cout << "ERROR: Image is not valid or cannot write file type\n"; - return false; +bool image::Bl::writeToFile(file_type type, bool canOverwrite, + const char *filename) { + if (!isValid() || !canWriteFile(type)) { + std::cout << "ERROR: Image is not valid or cannot write file type\n"; + return false; + } + + FILE *file = fopen(filename, "r"); + if (file && !canOverwrite) { + fclose(file); + std::cout << "ERROR: Will not overwite existing file \"" << filename << "\"" + << std::endl; + return false; + } + + if (file) { + fclose(file); + } + + if (type == file_type::PNG) { + FILE *outfile = fopen(filename, "wb"); + if (outfile == nullptr) { + std::cout << "ERROR: Failed to open file for writing (png)\n"; + return false; + } + const static auto pngErrorLFn = [](png_structp /* unused */, + png_const_charp message) { + std::cerr << "WARNING [libpng]: " << message << std::endl; + }; + const static auto pngWarnLFn = [](png_structp /* unused */, + png_const_charp message) { + std::cerr << "ERROR [libpng]: " << message << std::endl; + }; + + png_structp png_ptr = png_create_write_struct( + PNG_LIBPNG_VER_STRING, nullptr, pngErrorLFn, pngWarnLFn); + + if (png_ptr == nullptr) { + fclose(outfile); + std::cout << "ERROR: Failed to set up writing png file (png_ptr)\n"; + return false; } - FILE *file = fopen(filename, "r"); - if(file && !canOverwrite) { - fclose(file); - std::cout << "ERROR: Will not overwite existing file \"" << filename - << "\"" << std::endl; - return false; + png_infop info_ptr = png_create_info_struct(png_ptr); + if (info_ptr == nullptr) { + png_destroy_write_struct(&png_ptr, nullptr); + fclose(outfile); + std::cout << "ERROR: Failed to set up writing png file (png_infop)\n"; + return false; } - if(file) { - fclose(file); + if (setjmp(png_jmpbuf(png_ptr))) { + png_destroy_write_struct(&png_ptr, &info_ptr); + fclose(outfile); + std::cout << "ERROR: Failed to write image file (png error)\n"; + return false; } - if(type == file_type::PNG) { - FILE *outfile = fopen(filename, "wb"); - if (outfile == nullptr) { - std::cout << "ERROR: Failed to open file for writing (png)\n"; - return false; - } - const static auto pngErrorLFn = [] (png_structp /* unused */, - png_const_charp message) { - std::cerr << "WARNING [libpng]: " << message << std::endl; - }; - const static auto pngWarnLFn = [] (png_structp /* unused */, - png_const_charp message) { - std::cerr << "ERROR [libpng]: " << message << std::endl; - }; + png_init_io(png_ptr, outfile); - png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, - nullptr, - pngErrorLFn, - pngWarnLFn); + png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_GRAY, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); - if (png_ptr == nullptr) { - fclose(outfile); - std::cout << "ERROR: Failed to set up writing png file (png_ptr)\n"; - return false; - } + png_write_info(png_ptr, info_ptr); - png_infop info_ptr = png_create_info_struct(png_ptr); - if (info_ptr == nullptr) { - png_destroy_write_struct(&png_ptr, nullptr); - fclose(outfile); - std::cout << "ERROR: Failed to set up writing png file (png_infop)\n"; - return false; - } + // png_set_filler(png_ptr, 0, PNG_FILLER_AFTER); - if (setjmp(png_jmpbuf(png_ptr))) { - png_destroy_write_struct(&png_ptr, &info_ptr); - fclose(outfile); - std::cout << "ERROR: Failed to write image file (png error)\n"; - return false; - } - - png_init_io(png_ptr, outfile); - - png_set_IHDR(png_ptr, info_ptr, width, height, 8, PNG_COLOR_TYPE_GRAY, - PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, - PNG_FILTER_TYPE_DEFAULT); - - png_write_info(png_ptr, info_ptr); - - //png_set_filler(png_ptr, 0, PNG_FILLER_AFTER); - - for (unsigned int j = 0; j < this->data.size() / this->width; ++j) { - unsigned char *dataPtr = &this->data.at(j * this->width); - png_write_rows(png_ptr, &dataPtr, 1); - } - - png_write_end(png_ptr, nullptr); - - png_destroy_write_struct(&png_ptr, &info_ptr); - - fclose(outfile); - return true; + for (unsigned int j = 0; j < this->data.size() / this->width; ++j) { + unsigned char *dataPtr = &this->data.at(j * this->width); + png_write_rows(png_ptr, &dataPtr, 1); } - switch(type) { + png_write_end(png_ptr, nullptr); + + png_destroy_write_struct(&png_ptr, &info_ptr); + + fclose(outfile); + return true; + } + + switch (type) { case file_type::PBM: - file = fopen(filename, "w"); - fprintf(file, "P1\n%d %d", width, height); - break; + file = fopen(filename, "w"); + fprintf(file, "P1\n%d %d", width, height); + break; case file_type::PGM: - file = fopen(filename, "wb"); - fprintf(file, "P5\n%d %d\n255\n", width, height); - break; + file = fopen(filename, "wb"); + fprintf(file, "P5\n%d %d\n255\n", width, height); + break; case file_type::PPM: - file = fopen(filename, "wb"); - fprintf(file, "P6\n%d %d\n255\n", width, height); - break; + file = fopen(filename, "wb"); + fprintf(file, "P6\n%d %d\n255\n", width, height); + break; default: + fclose(file); + std::cout << "ERROR: Cannot write image file, invalid type\n"; + return false; + } + for (unsigned int i = 0; i < data.size(); ++i) { + if (type == file_type::PBM && i % width == 0) { + fprintf(file, "\n"); + } + switch (type) { + case file_type::PBM: + fprintf(file, "%d ", data[i] == 0 ? 0 : 1); + break; + case file_type::PGM: + // fprintf(file, "%c ", data[i]); + fputc(data[i], file); + break; + case file_type::PPM: + // fprintf(file, "%c %c %c ", data[i], data[i], data[i]); + fputc(data[i], file); + fputc(data[i], file); + fputc(data[i], file); + break; + default: fclose(file); std::cout << "ERROR: Cannot write image file, invalid type\n"; return false; } - for(unsigned int i = 0; i < data.size(); ++i) { - if(type == file_type::PBM && i % width == 0) { - fprintf(file, "\n"); - } - switch(type) { - case file_type::PBM: - fprintf(file, "%d ", data[i] == 0 ? 0 : 1); - break; - case file_type::PGM: - //fprintf(file, "%c ", data[i]); - fputc(data[i], file); - break; - case file_type::PPM: - //fprintf(file, "%c %c %c ", data[i], data[i], data[i]); - fputc(data[i], file); - fputc(data[i], file); - fputc(data[i], file); - break; - default: - fclose(file); - std::cout << "ERROR: Cannot write image file, invalid type\n"; - return false; - } - } + } - fclose(file); - return true; + fclose(file); + return true; } -bool image::Bl::writeToFile(file_type type, bool canOverwrite, const std::string &filename) { - return writeToFile(type, canOverwrite, filename.c_str()); +bool image::Bl::writeToFile(file_type type, bool canOverwrite, + const std::string &filename) { + return writeToFile(type, canOverwrite, filename.c_str()); } diff --git a/src/image.hpp b/src/image.hpp index f976f6f..f354f09 100644 --- a/src/image.hpp +++ b/src/image.hpp @@ -2,91 +2,96 @@ #define DITHERING_IMAGE_HPP #include -#include #include +#include namespace image { - enum class color_type { - Black, - Red, - Green, - Blue, - Alpha, - }; +enum class color_type { + Black, + Red, + Green, + Blue, + Alpha, +}; - enum class file_type { - PBM, - PGM, - PPM, - PNG, - }; +enum class file_type { + PBM, + PGM, + PPM, + PNG, +}; - class Base { - public: - Base() = default; - virtual ~Base() {} +class Base { + public: + Base() = default; + virtual ~Base() {} - Base(const Base &other) = default; - Base(Base &&other) = default; + Base(const Base &other) = default; + Base(Base &&other) = default; - Base& operator=(const Base &other) = default; - Base& operator=(Base &&other) = default; + Base &operator=(const Base &other) = default; + Base &operator=(Base &&other) = default; - virtual void randomize() = 0; + virtual void randomize() = 0; - virtual unsigned int getSize() const = 0; - virtual uint8_t* getData() = 0; - virtual const uint8_t* getDataC() const = 0; + virtual unsigned int getSize() const = 0; + virtual uint8_t *getData() = 0; + virtual const uint8_t *getDataC() const = 0; - virtual unsigned int getWidth() const = 0; - virtual unsigned int getHeight() const = 0; + virtual unsigned int getWidth() const = 0; + virtual unsigned int getHeight() const = 0; - virtual int getTypesCount() = 0; - virtual std::vector getTypes() = 0; - virtual int getTypeStride(color_type type) = 0; + virtual int getTypesCount() = 0; + virtual std::vector getTypes() = 0; + virtual int getTypeStride(color_type type) = 0; - virtual bool canWriteFile(file_type type) = 0; - virtual bool writeToFile(file_type type, bool canOverwrite, const char *filename) = 0; - virtual bool writeToFile(file_type type, bool canOverwrite, const std::string &filename) = 0; - bool isValid() const; - }; + virtual bool canWriteFile(file_type type) = 0; + virtual bool writeToFile(file_type type, bool canOverwrite, + const char *filename) = 0; + virtual bool writeToFile(file_type type, bool canOverwrite, + const std::string &filename) = 0; + bool isValid() const; +}; - class Bl : public Base { - public: - Bl(); - Bl(int width, int height); - Bl(const std::vector &data, int width); - Bl(std::vector &&data, int width); - Bl(const std::vector &data, int width); - virtual ~Bl() {} +class Bl : public Base { + public: + Bl(); + Bl(int width, int height); + Bl(const std::vector &data, int width); + Bl(std::vector &&data, int width); + Bl(const std::vector &data, int width); + virtual ~Bl() {} - Bl(const Bl &other) = default; - Bl(Bl &&other) = default; + Bl(const Bl &other) = default; + Bl(Bl &&other) = default; - Bl& operator=(const Bl &other) = default; - Bl& operator=(Bl &&other) = default; + Bl &operator=(const Bl &other) = default; + Bl &operator=(Bl &&other) = default; - void randomize() override; + void randomize() override; - unsigned int getSize() const override; - uint8_t* getData() override; - const uint8_t* getDataC() const override; + unsigned int getSize() const override; + uint8_t *getData() override; + const uint8_t *getDataC() const override; - unsigned int getWidth() const override; - unsigned int getHeight() const override; + unsigned int getWidth() const override; + unsigned int getHeight() const override; - int getTypesCount() override { return 1; } - std::vector getTypes() override { return { color_type::Black }; } - int getTypeStride(color_type) override { return 0; } + int getTypesCount() override { return 1; } + std::vector getTypes() override { return {color_type::Black}; } + int getTypeStride(color_type) override { return 0; } - bool canWriteFile(file_type type) override; - bool writeToFile(file_type type, bool canOverwrite, const char *filename) override; - bool writeToFile(file_type type, bool canOverwrite, const std::string &filename) override; - private: - std::vector data; - int width; - int height; - }; -} + bool canWriteFile(file_type type) override; + bool writeToFile(file_type type, bool canOverwrite, + const char *filename) override; + bool writeToFile(file_type type, bool canOverwrite, + const std::string &filename) override; + + private: + std::vector data; + int width; + int height; +}; +} // namespace image #endif diff --git a/src/main.cpp b/src/main.cpp index 2c0c471..71d5f9a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,52 +1,52 @@ -#include #include +#include #include "arg_parse.hpp" #include "blue_noise.hpp" int main(int argc, char **argv) { - Args args; - if(args.ParseArgs(argc, argv)) { - return 0; - } - - // validation - if (args.generate_blue_noise_) { - if (args.output_filename_.empty()) { - std::cout << "ERROR: Cannot generate blue-noise, output filename is not specified" - << std::endl; - Args::DisplayHelp(); - return 1; - } else if (args.blue_noise_size_ < 16) { - std::cout << "ERROR: blue-noise size is too small" - << std::endl; - Args::DisplayHelp(); - return 1; - } else if (!args.overwrite_file_) { - FILE *file = std::fopen(args.output_filename_.c_str(), "r"); - if (file) { - std::fclose(file); - std::cout << "ERROR: overwrite not specified, but filename exists" - << std::endl; - Args::DisplayHelp(); - return 1; - } - } - } else { - std::cout << "ERROR: No operation specified\n"; - Args::DisplayHelp(); - } - - if (args.generate_blue_noise_) { - std::cout << "Generating blue_noise..." << std::endl; - image::Bl bl = dither::blue_noise(args.blue_noise_size_, - args.blue_noise_size_, - args.threads_, - args.use_opencl_); - if(!bl.writeToFile(image::file_type::PNG, args.overwrite_file_, args.output_filename_)) { - std::cout << "ERROR: Failed to write blue-noise to file\n"; - } - } - + Args args; + if (args.ParseArgs(argc, argv)) { return 0; + } + + // validation + if (args.generate_blue_noise_) { + if (args.output_filename_.empty()) { + std::cout << "ERROR: Cannot generate blue-noise, output filename " + "is not specified" + << std::endl; + Args::DisplayHelp(); + return 1; + } else if (args.blue_noise_size_ < 16) { + std::cout << "ERROR: blue-noise size is too small" << std::endl; + Args::DisplayHelp(); + return 1; + } else if (!args.overwrite_file_) { + FILE *file = std::fopen(args.output_filename_.c_str(), "r"); + if (file) { + std::fclose(file); + std::cout << "ERROR: overwrite not specified, but filename exists" + << std::endl; + Args::DisplayHelp(); + return 1; + } + } + } else { + std::cout << "ERROR: No operation specified\n"; + Args::DisplayHelp(); + } + + if (args.generate_blue_noise_) { + std::cout << "Generating blue_noise..." << std::endl; + image::Bl bl = + dither::blue_noise(args.blue_noise_size_, args.blue_noise_size_, + args.threads_, args.use_opencl_); + if (!bl.writeToFile(image::file_type::PNG, args.overwrite_file_, + args.output_filename_)) { + std::cout << "ERROR: Failed to write blue-noise to file\n"; + } + } + + return 0; } diff --git a/src/utility.hpp b/src/utility.hpp index 17b3000..b28feef 100644 --- a/src/utility.hpp +++ b/src/utility.hpp @@ -1,33 +1,33 @@ #ifndef DITHERING_UTILITY_HPP #define DITHERING_UTILITY_HPP -#include #include +#include namespace utility { - inline int twoToOne(int x, int y, int width, int height) { - while(x < 0) { - x += width; - } - while(y < 0) { - y += height; - } - x = x % width; - y = y % height; - return x + y * width; - } - - inline std::pair oneToTwo(int i, int width) { - return {i % width, i / width}; - } - - inline float dist(int a, int b, int width) { - auto axy = utility::oneToTwo(a, width); - auto bxy = utility::oneToTwo(b, width); - float dx = axy.first - bxy.first; - float dy = axy.second - bxy.second; - return std::sqrt(dx * dx + dy * dy); - } +inline int twoToOne(int x, int y, int width, int height) { + while (x < 0) { + x += width; + } + while (y < 0) { + y += height; + } + x = x % width; + y = y % height; + return x + y * width; } +inline std::pair oneToTwo(int i, int width) { + return {i % width, i / width}; +} + +inline float dist(int a, int b, int width) { + auto axy = utility::oneToTwo(a, width); + auto bxy = utility::oneToTwo(b, width); + float dx = axy.first - bxy.first; + float dy = axy.second - bxy.second; + return std::sqrt(dx * dx + dy * dy); +} +} // namespace utility + #endif