diff --git a/src/blue_noise.cpp b/src/blue_noise.cpp index 1a018c0..c5c1f32 100644 --- a/src/blue_noise.cpp +++ b/src/blue_noise.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #ifndef NDEBUG # include @@ -58,8 +59,10 @@ std::vector dither::blue_noise(int width, int height, int threads) { int filter_size = (width + height) / 2; + std::unique_ptr> precomputed = std::make_unique>(internal::precompute_gaussian(filter_size)); + internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, threads); + filter_out, precomputed.get(), threads); internal::write_filter(filter_out, width, "filter_out_start.pgm"); while(true) { //#ifndef NDEBUG @@ -69,7 +72,7 @@ std::vector dither::blue_noise(int width, int height, int threads) { //#endif // get filter values internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, threads); + filter_out, precomputed.get(), threads); #ifndef NDEBUG // for(int i = 0; i < count; ++i) { @@ -112,7 +115,7 @@ std::vector dither::blue_noise(int width, int height, int threads) { // get filter values again internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, threads); + filter_out, precomputed.get(), threads); // get second buffer's min int second_min; @@ -146,7 +149,7 @@ std::vector dither::blue_noise(int width, int height, int threads) { } } internal::compute_filter(pbp, width, height, count, filter_size, - filter_out, threads); + filter_out, precomputed.get(), threads); internal::write_filter(filter_out, width, "filter_out_final.pgm"); //#ifndef NDEBUG diff --git a/src/blue_noise.hpp b/src/blue_noise.hpp index 548d7b1..fff6e1b 100644 --- a/src/blue_noise.hpp +++ b/src/blue_noise.hpp @@ -32,6 +32,19 @@ namespace internal { return std::exp(-(x*x + y*y)/(2*mu_squared)); } + inline std::vector precompute_gaussian(int size) { + std::vector precomputed; + precomputed.reserve(size * size); + + for(int i = 0; i < size * size; ++i) { + auto xy = oneToTwo(i, size); + precomputed.push_back(gaussian( + (float)xy.first - size / 2.0f, (float)xy.second - size / 2.0f)); + } + + return precomputed; + } + inline float filter( const std::vector& pbp, int x, int y, @@ -46,8 +59,7 @@ namespace internal { int q_prime = (height + filter_size / 2 + y - q) % height; for(int p = 0; p < filter_size; ++p) { int p_prime = (width + filter_size / 2 + x - p) % width; - bool pbp_value = pbp[twoToOne(p_prime, q_prime, width)]; - if(pbp_value) { + if(pbp[twoToOne(p_prime, q_prime, width)]) { sum += gaussian((float)p - filter_size/2.0f, (float)q - filter_size/2.0f); } } @@ -56,15 +68,46 @@ namespace internal { return sum; } + inline float filter_with_precomputed( + const std::vector& pbp, + int x, int y, + int width, int height, int filter_size, + const std::vector &precomputed) { + float sum = 0.0f; + + for(int q = 0; q < filter_size; ++q) { + int q_prime = (height + filter_size / 2 + y - q) % height; + for(int p = 0; p < filter_size; ++p) { + int p_prime = (width + filter_size / 2 + x - p) % width; + if(pbp[twoToOne(p_prime, q_prime, width)]) { + sum += precomputed[twoToOne(p, q, filter_size)]; + } + } + } + + return sum; + } + inline void compute_filter( const std::vector &pbp, int width, int height, int count, int filter_size, std::vector &filter_out, + const std::vector *precomputed = nullptr, int threads = 1) { if(threads == 1) { - for(int y = 0; y < height; ++y) { - for(int x = 0; x < width; ++x) { - filter_out[internal::twoToOne(x, y, width)] = - internal::filter(pbp, x, y, width, height, filter_size); + if(precomputed) { + for(int y = 0; y < height; ++y) { + for(int x = 0; x < width; ++x) { + filter_out[internal::twoToOne(x, y, width)] = + internal::filter_with_precomputed( + pbp, x, y, width, height, filter_size, *precomputed); + } + } + } else { + for(int y = 0; y < height; ++y) { + for(int x = 0; x < width; ++x) { + filter_out[internal::twoToOne(x, y, width)] = + internal::filter(pbp, x, y, width, height, filter_size); + } } } } else { @@ -74,39 +117,62 @@ namespace internal { int active_count = 0; std::mutex cv_mutex; std::condition_variable cv; - for(int i = 0; i < count; ++i) { - { - std::unique_lock lock(cv_mutex); - active_count += 1; - } - std::thread t([] (int *ac, std::mutex *cvm, - std::condition_variable *cv, int i, - const std::vector *pbp, int width, - int height, int filter_size, - std::vector *fout) { - int x, y; - std::tie(x, y) = internal::oneToTwo(i, width); - (*fout)[i] = internal::filter( - *pbp, x, y, width, height, filter_size); - std::unique_lock lock(*cvm); - *ac -= 1; - cv->notify_all(); - }, - &active_count, &cv_mutex, &cv, i, &pbp, width, height, - filter_size, &filter_out); - t.detach(); + if(precomputed) { + for(int i = 0; i < count; ++i) { + { + std::unique_lock lock(cv_mutex); + active_count += 1; + } + std::thread t([] (int *ac, std::mutex *cvm, + std::condition_variable *cv, int i, + const std::vector *pbp, int width, + int height, int filter_size, + std::vector *fout, + const std::vector *precomputed) { + int x, y; + std::tie(x, y) = internal::oneToTwo(i, width); + (*fout)[i] = internal::filter_with_precomputed( + *pbp, x, y, width, height, filter_size, *precomputed); + std::unique_lock lock(*cvm); + *ac -= 1; + cv->notify_all(); + }, + &active_count, &cv_mutex, &cv, i, &pbp, width, height, + filter_size, &filter_out, precomputed); + t.detach(); - std::unique_lock lock(cv_mutex); - while(active_count >= threads) { -#ifndef NDEBUG -// std::cout << "0, active_count = " << active_count -// << ", pre wait_for" << std::endl; -#endif - cv.wait_for(lock, std::chrono::seconds(1)); -#ifndef NDEBUG -// std::cout << "0, active_count = " << active_count -// << ", post wait_for" << std::endl; -#endif + std::unique_lock lock(cv_mutex); + while(active_count >= threads) { + cv.wait_for(lock, std::chrono::seconds(1)); + } + } + } else { + for(int i = 0; i < count; ++i) { + { + std::unique_lock lock(cv_mutex); + active_count += 1; + } + std::thread t([] (int *ac, std::mutex *cvm, + std::condition_variable *cv, int i, + const std::vector *pbp, int width, + int height, int filter_size, + std::vector *fout) { + int x, y; + std::tie(x, y) = internal::oneToTwo(i, width); + (*fout)[i] = internal::filter( + *pbp, x, y, width, height, filter_size); + std::unique_lock lock(*cvm); + *ac -= 1; + cv->notify_all(); + }, + &active_count, &cv_mutex, &cv, i, &pbp, width, height, + filter_size, &filter_out); + t.detach(); + + std::unique_lock lock(cv_mutex); + while(active_count >= threads) { + cv.wait_for(lock, std::chrono::seconds(1)); + } } } std::unique_lock lock(cv_mutex);