]> git.seodisparate.com - blue_noise_generation/commitdiff
Impl use of OpenCL for generating blue noise
authorStephen Seo <seo.disparate@gmail.com>
Sat, 23 Jan 2021 07:42:45 +0000 (16:42 +0900)
committerStephen Seo <seo.disparate@gmail.com>
Sat, 23 Jan 2021 07:42:45 +0000 (16:42 +0900)
Makefile
src/blue_noise.cl [new file with mode: 0644]
src/blue_noise.cpp
src/blue_noise.hpp
src/main.cpp

index 449bec920cf3bc2f941e9ed63884381435e10832..7eb90139f0a1b00a1ad64a2b876a5fb6d765d196 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
 
-COMMON_FLAGS=-Wall -Wextra -Wpedantic -std=c++17 -lpthread
+COMMON_FLAGS=-Wall -Wextra -Wpedantic -std=c++17
+LINKER_FLAGS=-lpthread -lOpenCL
 ifdef DEBUG
        CPPFLAGS=${COMMON_FLAGS} -g -O0
 else
@@ -14,7 +15,7 @@ OBJECTS=${subst .cpp,.o,${SOURCES}}
 all: Dithering
 
 Dithering: ${OBJECTS}
-       ${CXX} ${CPPFLAGS} -o Dithering $^
+       ${CXX} ${CPPFLAGS} ${LINKER_FLAGS} -o Dithering $^
 
 .PHONY:
 
diff --git a/src/blue_noise.cl b/src/blue_noise.cl
new file mode 100644 (file)
index 0000000..48a94b5
--- /dev/null
@@ -0,0 +1,25 @@
+__kernel void do_filter(
+        __global float *filter_out, __global float *precomputed,
+        __global int *pbp, int width, int height, int filter_size) {
+    int i = get_global_id(0);
+    if(i < 0 || i >= width * height) {
+        return;
+    }
+    int x = i % width;
+    int y = i / width;
+
+    float sum = 0.0f;
+    for(int q = 0; q < filter_size; ++q) {
+        int q_prime = (height + filter_size / 2 + y - q) % height;
+        for(int p = 0; p < filter_size; ++p) {
+            int p_prime = (width + filter_size / 2 + x - p) % width;
+            if(pbp[p_prime + q_prime * width] != 0) {
+                sum += precomputed[p + q * filter_size];
+            }
+        }
+    }
+
+    filter_out[i] = sum;
+}
+
+// vim: syntax=c
index c5c1f32203a8c25ff8d7082eda0f31a1a2012fe9..1f0aea93fe04ffd55075fb4eaa45eb7a01f266e3 100644 (file)
 #include <random>
 #include <cassert>
 #include <iostream>
+#include <fstream>
 #include <memory>
 
+#include <CL/opencl.h>
+
 #ifndef NDEBUG
 # include <cstdio>
 #endif
 
 std::vector<bool> dither::blue_noise(int width, int height, int threads) {
-    int count = width * height;
-    std::vector<float> filter_out;
-    filter_out.resize(count);
 
-    std::vector<bool> pbp; // Prototype Binary Pattern
-    pbp.resize(count);
+    bool use_opencl = false;
 
-    std::default_random_engine re(std::random_device{}());
-    std::uniform_int_distribution<int> dist(0, count - 1);
+    // try to use OpenCL
+    do {
+        cl_device_id device;
+        cl_context context;
+        cl_program program;
+        cl_int err;
 
-    const int pixel_count = count * 4 / 10;
+        cl_platform_id platform;
 
-    // initialize pbp
-    for(int i = 0; i < count; ++i) {
-        if(i < pixel_count) {
-            pbp[i] = true;
-        } else {
-            pbp[i] = false;
+        int filter_size = (width + height) / 2;
+
+        err = clGetPlatformIDs(1, &platform, nullptr);
+        if(err != CL_SUCCESS) {
+            std::cerr << "OpenCL: Failed to identify a platform\n";
+            break;
         }
+
+        err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, nullptr);
+        if(err != CL_SUCCESS) {
+            std::cerr << "OpenCL: Failed to get a device\n";
+            break;
+        }
+
+        context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
+
+        {
+            char buf[1024];
+            std::ifstream program_file("src/blue_noise.cl");
+            std::string program_string;
+            while(program_file.good()) {
+                program_file.read(buf, 1024);
+                if(int read_count = program_file.gcount(); read_count > 0) {
+                    program_string.append(buf, read_count);
+                }
+            }
+
+            const char *string_ptr = program_string.c_str();
+            std::size_t program_size = program_string.size();
+            program = clCreateProgramWithSource(context, 1, (const char**)&string_ptr, &program_size, &err);
+            if(err != CL_SUCCESS) {
+                std::cerr << "OpenCL: Failed to create the program\n";
+                clReleaseContext(context);
+                break;
+            }
+
+            err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+            if(err != CL_SUCCESS) {
+                std::cerr << "OpenCL: Failed to build the program\n";
+
+                std::size_t log_size;
+                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
+                std::unique_ptr<char[]> log = std::make_unique<char[]>(log_size + 1);
+                log[log_size] = 0;
+                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, log_size, log.get(), nullptr);
+                std::cerr << log.get() << std::endl;
+
+                clReleaseProgram(program);
+                clReleaseContext(context);
+                break;
+            }
+        }
+
+        std::cout << "OpenCL: Initialized, trying cl_impl..." << std::endl;
+        std::vector<bool> result = internal::blue_noise_cl_impl(
+            width, height, filter_size, context, device, program);
+
+        clReleaseProgram(program);
+        clReleaseContext(context);
+
+        if(!result.empty()) {
+            return result;
+        }
+    } while (false);
+
+    if(!use_opencl) {
+        std::cout << "OpenCL: Failed to setup/use, using regular impl..." << std::endl;
+        return internal::blue_noise_impl(width, height, threads);
     }
-    // randomize pbp
-    for(int i = 0; i < count-1; ++i) {
-        decltype(dist)::param_type range{i+1, count-1};
-        int ridx = dist(re, range);
-        // probably can't use std::swap since using std::vector<bool>
-        bool temp = pbp[i];
-        pbp[i] = pbp[ridx];
-        pbp[ridx] = temp;
-    }
+
+    return {};
+}
+
+std::vector<bool> dither::internal::blue_noise_impl(int width, int height, int threads) {
+    int count = width * height;
+    std::vector<float> filter_out;
+    filter_out.resize(count);
+
+    int pixel_count = count * 4 / 10;
+    std::vector<bool> pbp = random_noise(count, count * 4 / 10);
+    pbp.resize(count);
+
 //#ifndef NDEBUG
     printf("Inserting %d pixels into image of max count %d\n", pixel_count, count);
     // generate image from randomized pbp
@@ -167,3 +235,335 @@ std::vector<bool> dither::blue_noise(int width, int height, int threads) {
 
     return pbp;
 }
+
+std::vector<bool> dither::internal::blue_noise_cl_impl(
+        int width, int height, int filter_size, cl_context context, cl_device_id device, cl_program program) {
+    cl_int err;
+    cl_kernel kernel;
+    cl_command_queue queue;
+    cl_mem d_filter_out, d_precomputed, d_pbp;
+    std::size_t global_size, local_size;
+
+    std::vector<float> precomputed = precompute_gaussian(filter_size);
+
+    int count = width * height;
+    int pixel_count = count * 4 / 10;
+    std::vector<bool> pbp = random_noise(count, pixel_count);
+    std::vector<int> pbp_i(pbp.size());
+
+    queue = clCreateCommandQueueWithProperties(context, device, nullptr, &err);
+
+    d_filter_out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, count * sizeof(float), nullptr, nullptr);
+    d_precomputed = clCreateBuffer(context, CL_MEM_READ_ONLY, filter_size * filter_size * sizeof(float), nullptr, nullptr);
+    d_pbp = clCreateBuffer(context, CL_MEM_READ_ONLY, count * sizeof(int), nullptr, nullptr);
+
+    err = clEnqueueWriteBuffer(queue, d_precomputed, CL_TRUE, 0, filter_size * filter_size * sizeof(float), &precomputed[0], 0, nullptr, nullptr);
+    if(err != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to write to d_precomputed buffer\n";
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+
+    /*
+    err = clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), &pbp_i[0], 0, nullptr, nullptr);
+    if(err != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to write to d_pbp buffer\n";
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    */
+
+    kernel = clCreateKernel(program, "do_filter", &err);
+    if(err != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to create kernel: ";
+        switch(err) {
+        case CL_INVALID_PROGRAM:
+            std::cerr << "invalid program\n";
+            break;
+        case CL_INVALID_PROGRAM_EXECUTABLE:
+            std::cerr << "invalid program executable\n";
+            break;
+        case CL_INVALID_KERNEL_NAME:
+            std::cerr << "invalid kernel name\n";
+            break;
+        case CL_INVALID_KERNEL_DEFINITION:
+            std::cerr << "invalid kernel definition\n";
+            break;
+        case CL_INVALID_VALUE:
+            std::cerr << "invalid value\n";
+            break;
+        case CL_OUT_OF_RESOURCES:
+            std::cerr << "out of resources\n";
+            break;
+        case CL_OUT_OF_HOST_MEMORY:
+            std::cerr << "out of host memory\n";
+            break;
+        default:
+            std::cerr << "unknown error\n";
+            break;
+        }
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+
+    if(clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_filter_out) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 0\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    if(clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_precomputed) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 1\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    if(clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_pbp) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 2\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    if(clSetKernelArg(kernel, 3, sizeof(int), &width) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 3\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    if(clSetKernelArg(kernel, 4, sizeof(int), &height) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 4\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    if(clSetKernelArg(kernel, 5, sizeof(int), &filter_size) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to set kernel arg 4\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+
+    if(clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(std::size_t), &local_size, nullptr) != CL_SUCCESS) {
+        std::cerr << "OpenCL: Failed to get work group size\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    }
+    global_size = (std::size_t)std::ceil(count / (float)local_size) * local_size;
+
+    std::cout << "OpenCL: global = " << global_size << ", local = " << local_size
+        << std::endl;
+
+    std::vector<float> filter(count);
+
+    const auto get_filter = [&queue, &kernel, &global_size, &local_size,
+            &d_filter_out, &d_pbp, &pbp, &pbp_i, &count, &filter, &err] () -> bool {
+        for(unsigned int i = 0; i < pbp.size(); ++i) {
+            pbp_i[i] = pbp[i] ? 1 : 0;
+        }
+        if(clEnqueueWriteBuffer(queue, d_pbp, CL_TRUE, 0, count * sizeof(int), &pbp_i[0], 0, nullptr, nullptr) != CL_SUCCESS) {
+            std::cerr << "OpenCL: Failed to write to d_pbp buffer\n";
+            return false;
+        }
+
+        if(err = clEnqueueNDRangeKernel(
+                queue, kernel, 1, nullptr, &global_size, &local_size,
+                0, nullptr, nullptr); err != CL_SUCCESS) {
+            std::cerr << "OpenCL: Failed to enqueue task: ";
+            switch(err) {
+            case CL_INVALID_PROGRAM_EXECUTABLE:
+                std::cerr << "invalid program executable\n";
+                break;
+            case CL_INVALID_COMMAND_QUEUE:
+                std::cerr << "invalid command queue\n";
+                break;
+            case CL_INVALID_KERNEL:
+                std::cerr << "invalid kernel\n";
+                break;
+            case CL_INVALID_CONTEXT:
+                std::cerr << "invalid context\n";
+                break;
+            case CL_INVALID_KERNEL_ARGS:
+                std::cerr << "invalid kernel args\n";
+                break;
+            case CL_INVALID_WORK_DIMENSION:
+                std::cerr << "invalid work dimension\n";
+                break;
+            case CL_INVALID_GLOBAL_WORK_SIZE:
+                std::cerr << "invalid global work size\n";
+                break;
+            case CL_INVALID_GLOBAL_OFFSET:
+                std::cerr << "invalid global offset\n";
+                break;
+            case CL_INVALID_WORK_GROUP_SIZE:
+                std::cerr << "invalid work group size\n";
+                break;
+            case CL_INVALID_WORK_ITEM_SIZE:
+                std::cerr << "invalid work item size\n";
+                break;
+            case CL_MISALIGNED_SUB_BUFFER_OFFSET:
+                std::cerr << "misaligned sub buffer offset\n";
+                break;
+            default:
+                std::cerr << "Unknown\n";
+                break;
+            }
+            return false;
+        }
+
+        clFinish(queue);
+
+        clEnqueueReadBuffer(queue, d_filter_out, CL_TRUE, 0, count * sizeof(float), &filter[0], 0, nullptr, nullptr);
+
+        return true;
+    };
+
+    {
+        printf("Inserting %d pixels into image of max count %d\n", pixel_count, count);
+        // generate image from randomized pbp
+        FILE *random_noise_image = fopen("random_noise.pbm", "w");
+        fprintf(random_noise_image, "P1\n%d %d\n", width, height);
+        for(int y = 0; y < height; ++y) {
+            for(int x = 0; x < width; ++x) {
+                fprintf(random_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
+            }
+            fputc('\n', random_noise_image);
+        }
+        fclose(random_noise_image);
+    }
+
+    if(!get_filter()) {
+        std::cerr << "OpenCL: Failed to execute do_filter (at start)\n";
+        clReleaseKernel(kernel);
+        clReleaseMemObject(d_pbp);
+        clReleaseMemObject(d_precomputed);
+        clReleaseMemObject(d_filter_out);
+        clReleaseCommandQueue(queue);
+        return {};
+    } else {
+        internal::write_filter(filter, width, "filter_out_start.pgm");
+    }
+
+    int iterations = 0;
+
+    while(true) {
+        printf("Iteration %d\n", ++iterations);
+
+        if(!get_filter()) {
+            std::cerr << "OpenCL: Failed to execute do_filter\n";
+            break;
+        }
+
+        int min, max, min_zero, max_one;
+        std::tie(min, max) = internal::filter_minmax(filter);
+        if(!pbp[max]) {
+            max_one = internal::get_one_or_zero(pbp, true, max, width, height);
+        } else {
+            max_one = max;
+        }
+        if(!pbp[max_one]) {
+            std::cerr << "ERROR: Failed to find pbp[max] one" << std::endl;
+            break;
+        }
+
+        if(pbp[min]) {
+            min_zero = internal::get_one_or_zero(pbp, false, min, width, height);
+        } else {
+            min_zero = min;
+        }
+        if(pbp[min_zero]) {
+            std::cerr << "ERROR: Failed to find pbp[min] zero" << std::endl;
+            break;
+        }
+
+        pbp[max_one] = false;
+
+        if(!get_filter()) {
+            std::cerr << "OpenCL: Failed to execute do_filter\n";
+            break;
+        }
+
+        // get second buffer's min
+        int second_min;
+        std::tie(second_min, std::ignore) = internal::filter_minmax(filter);
+        if(pbp[second_min]) {
+            second_min = internal::get_one_or_zero(pbp, false, second_min, width, height);
+            if(pbp[second_min]) {
+                std::cerr << "ERROR: Failed to find pbp[second_min] zero" << std::endl;
+                break;
+            }
+        }
+
+        if(internal::dist(max_one, second_min, width) < 1.5f) {
+            pbp[max_one] = true;
+            break;
+        } else {
+            pbp[min_zero] = true;
+        }
+
+        if(iterations % 100 == 0) {
+            // generate blue_noise image from pbp
+            FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
+            fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
+            for(int y = 0; y < height; ++y) {
+                for(int x = 0; x < width; ++x) {
+                    fprintf(blue_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
+                }
+                fputc('\n', blue_noise_image);
+            }
+            fclose(blue_noise_image);
+        }
+    }
+
+    if(!get_filter()) {
+        std::cerr << "OpenCL: Failed to execute do_filter (at end)\n";
+    } else {
+        internal::write_filter(filter, width, "filter_out_final.pgm");
+        FILE *blue_noise_image = fopen("blue_noise.pbm", "w");
+        fprintf(blue_noise_image, "P1\n%d %d\n", width, height);
+        for(int y = 0; y < height; ++y) {
+            for(int x = 0; x < width; ++x) {
+                fprintf(blue_noise_image, "%d ", pbp[internal::twoToOne(x, y, width)] ? 1 : 0);
+            }
+            fputc('\n', blue_noise_image);
+        }
+        fclose(blue_noise_image);
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseMemObject(d_pbp);
+    clReleaseMemObject(d_precomputed);
+    clReleaseMemObject(d_filter_out);
+    clReleaseCommandQueue(queue);
+    return pbp;
+}
index fff6e1b6a6cfc4aff06a4201c5a3f06154e236ff..41ca3837e890b23ebcce4933a01f38c1ceeab2e4 100644 (file)
 #include <chrono>
 #include <cstdio>
 #include <queue>
+#include <random>
+
+#include <CL/opencl.h>
 
 namespace dither {
 
 std::vector<bool> blue_noise(int width, int height, int threads = 1);
 
 namespace internal {
+    std::vector<bool> blue_noise_impl(int width, int height, int threads = 1);
+    std::vector<bool> blue_noise_cl_impl(
+        int width, int height, int filter_size,
+        cl_context context, cl_device_id device, cl_program program);
+
+    inline std::vector<bool> random_noise(int size, int subsize) {
+        std::vector<bool> pbp(size);
+        std::default_random_engine re(std::random_device{}());
+        std::uniform_int_distribution<int> dist(0, size - 1);
+
+        // initialize pbp
+        for(int i = 0; i < size; ++i) {
+            if(i < subsize) {
+                pbp[i] = true;
+            } else {
+                pbp[i] = false;
+            }
+        }
+        // randomize pbp
+        for(int i = 0; i < size-1; ++i) {
+            decltype(dist)::param_type range{i+1, size-1};
+            int ridx = dist(re, range);
+            // probably can't use std::swap since using std::vector<bool>
+            bool temp = pbp[i];
+            pbp[i] = pbp[ridx];
+            pbp[ridx] = temp;
+        }
+
+        return pbp;
+    }
+
     inline int twoToOne(int x, int y, int width) {
         return x + y * width;
     }
index d86e9c7d7353e588d7a6c1be3483fbc3446df1b9..51e17aa840ec50a2f76f2b5321cfbf173ddd719c 100644 (file)
@@ -5,7 +5,7 @@
 int main(int argc, char **argv) {
 //#ifndef NDEBUG
     std::cout << "Trying blue_noise..." << std::endl;
-    dither::blue_noise(70, 70, 8);
+    dither::blue_noise(100, 100, 8);
 //#endif
 
     return 0;