From 11f48592bf5b507b981806df2f9fd543ac0ba1e6 Mon Sep 17 00:00:00 2001 From: Stephen Seo Date: Tue, 30 Nov 2021 16:02:51 +0900 Subject: [PATCH] Impl convert video frames to color dithered pngs --- CMakeLists.txt | 3 +- src/image.cc | 4 + src/image.h | 2 +- src/main.cc | 25 ++--- src/video.cc | 251 ++++++++++++++++++++++++++++++++++--------------- src/video.h | 19 +++- 6 files changed, 203 insertions(+), 101 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1518449..368bc1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,7 +26,8 @@ find_package(OpenCL REQUIRED) find_package(PNG REQUIRED) find_package(PkgConfig REQUIRED) -pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec libavformat libavutil) +pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED + libavcodec libavformat libavutil libswscale) target_include_directories(DitheringProject PUBLIC ${OpenCL_INCLUDE_DIRS} diff --git a/src/image.cc b/src/image.cc index c391a0e..23c1c37 100644 --- a/src/image.cc +++ b/src/image.cc @@ -817,6 +817,10 @@ OpenCLHandle::Ptr Image::GetOpenCLHandle() { void Image::DecodePNG(const std::string &filename) { FILE *file = std::fopen(filename.c_str(), "rb"); + if (!file) { + std::cout << "ERROR: Failed to open \"" << filename << '"' << std::endl; + return; + } // Check header of file to check if it is actually a png file. { diff --git a/src/image.h b/src/image.h index 8671ea4..2ccc8e5 100644 --- a/src/image.h +++ b/src/image.h @@ -135,7 +135,7 @@ class Image { static const std::array dither_bw_palette_; static const std::array dither_color_palette_; OpenCLHandle::Ptr opencl_handle_; - /// Internally holds rgba + /// Internally holds rgba or grayscale (1 channel) std::vector data_; unsigned int width_; unsigned int height_; diff --git a/src/main.cc b/src/main.cc index 3172e32..90e7ba5 100644 --- a/src/main.cc +++ b/src/main.cc @@ -1,30 +1,19 @@ #include #include "image.h" +#include "video.h" int main(int argc, char **argv) { - // Image image("testin.ppm"); - // image.SaveAsPNG("testout.png", true); - - Image input("input.png"); - if (!input.IsValid()) { - std::cout << "ERROR: input.png is invalid" << std::endl; + Image blue_noise("bluenoise.png"); + if (!blue_noise.IsValid()) { + std::cout << "ERROR: Invalid bluenoise.png" << std::endl; return 1; } - - Image bluenoise("bluenoise.png"); - if (!bluenoise.IsValid()) { - std::cout << "ERROR: bluenoise.png is invalid" << std::endl; + Video video("input.mp4"); + if (!video.DitherVideo("output.mp4", &blue_noise)) { + std::cout << "ERROR: Failed to dither video" << std::endl; return 1; } - // auto output = input.ToGrayscaleDitheredWithBlueNoise(&bluenoise); - auto output = input.ToColorDitheredWithBlueNoise(&bluenoise); - if (!output || !output->IsValid()) { - std::cout << "ERROR: output Image is invalid" << std::endl; - return 1; - } - output->SaveAsPNG("output.png", true); - return 0; } diff --git a/src/video.cc b/src/video.cc index ac86500..bcf8795 100644 --- a/src/video.cc +++ b/src/video.cc @@ -1,5 +1,6 @@ #include "video.h" +#include #include #include #include @@ -11,18 +12,28 @@ extern "C" { Video::Video(const char *video_filename) : Video(std::string(video_filename)) {} Video::Video(const std::string &video_filename) - : image(), input_filename(video_filename) {} + : image_(), + input_filename_(video_filename), + sws_context_(nullptr), + frame_count_(0), + packet_count_(0) {} -bool Video::DitherGrayscale(const char *output_filename) { - return DitherGrayscale(std::string(output_filename)); +Video::~Video() { + if (sws_context_ != nullptr) { + sws_freeContext(sws_context_); + } } -bool Video::DitherGrayscale(const std::string &output_filename) { - // determine input file format +bool Video::DitherVideo(const char *output_filename, Image *blue_noise, + bool grayscale) { + return DitherVideo(std::string(output_filename), blue_noise, grayscale); +} +bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise, + bool grayscale) { // Get AVFormatContext for input file AVFormatContext *avf_context = nullptr; - std::string url = std::string("file:") + input_filename; + std::string url = std::string("file:") + input_filename_; int return_value = avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr); if (return_value != 0) { @@ -49,42 +60,43 @@ bool Video::DitherGrayscale(const std::string &output_filename) { avformat_close_input(&avf_context); return false; } - - // cleanup AVFormatContext as it is no longer needed - avformat_close_input(&avf_context); - - // Init required objects for decoding - - // Init parser - AVCodecParserContext *parser = av_parser_init(avcodec->id); - if (!parser) { - std::cout << "ERROR: Failed to init codec parser" << std::endl; - return false; - } + int video_stream_idx = return_value; // Alloc codec context AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec); if (!codec_ctx) { std::cout << "ERROR: Failed to alloc codec context" << std::endl; - av_parser_close(parser); + avformat_close_input(&avf_context); + return false; + } + + // Set codec parameters from input stream + return_value = avcodec_parameters_to_context( + codec_ctx, avf_context->streams[video_stream_idx]->codecpar); + if (return_value < 0) { + std::cout << "ERROR: Failed to set codec parameters from input stream" + << std::endl; + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_context); return false; } // Init codec context return_value = avcodec_open2(codec_ctx, avcodec, nullptr); - if (return_value == 0) { + if (return_value < 0) { std::cout << "ERROR: Failed to init codec context" << std::endl; avcodec_free_context(&codec_ctx); - av_parser_close(parser); + avformat_close_input(&avf_context); return false; } + av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0); + // Alloc a packet object for reading packets AVPacket *pkt = av_packet_alloc(); if (!pkt) { std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl; avcodec_free_context(&codec_ctx); - av_parser_close(parser); return false; } @@ -93,76 +105,161 @@ bool Video::DitherGrayscale(const std::string &output_filename) { if (!frame) { std::cout << "ERROR: Failed to alloc video frame object" << std::endl; av_packet_free(&pkt); - av_parser_close(parser); avcodec_free_context(&codec_ctx); return false; } - // Now the file will be opened for decoding the "best" video stream - std::ifstream ifs(input_filename); - if (!ifs.is_open() || !ifs.good()) { - std::cout << "ERROR: Failed to open input file \"" << input_filename << '"' - << std::endl; - av_frame_free(&frame); - av_packet_free(&pkt); - avcodec_free_context(&codec_ctx); - av_parser_close(parser); - return false; - } - - // Set up buffer to read from input file - std::array buf; - // Fill end of buffer with 0 to avoid possible overreading (as shown in - // example code) - std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize); - - std::streamsize read_count; - uint8_t *data_ptr; - while (ifs.good()) { - ifs.read(reinterpret_cast(buf.data()), kReadBufSize); - read_count = ifs.gcount(); - data_ptr = buf.data(); - if (read_count == 0) { - // read 0 bytes, probably reached exactly EOF - break; - } - - while (read_count > 0) { - return_value = - av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr, - read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0); - if (return_value < 0) { - std::cout << "ERROR: Failed to parse input file" << std::endl; - av_frame_free(&frame); - av_packet_free(&pkt); - avcodec_free_context(&codec_ctx); - av_parser_close(parser); + // read frames + while (av_read_frame(avf_context, pkt) >= 0) { + if (pkt->stream_index == video_stream_idx) { + if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale)) { return false; } - data_ptr += return_value; - read_count -= return_value; - - if (pkt->size) { - // TODO use packet - } } } - if (ifs.fail()) { - std::cout << "ERROR: Read error on input file" << std::endl; - av_frame_free(&frame); - av_packet_free(&pkt); - avcodec_free_context(&codec_ctx); - av_parser_close(parser); + // flush decoders + if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale)) { return false; } - // TODO flush decoder - // cleanup av_frame_free(&frame); av_packet_free(&pkt); avcodec_free_context(&codec_ctx); - av_parser_close(parser); + avformat_close_input(&avf_context); + return true; +} + +bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, + AVFrame *frame, Image *blue_noise, + bool grayscale) { + int return_value = avcodec_send_packet(codec_ctx, pkt); + if (return_value < 0) { + std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')' + << std::endl; + return false; + } + + return_value = 0; + while (return_value >= 0) { + return_value = avcodec_receive_frame(codec_ctx, frame); + if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) { + return true; + } else if (return_value < 0) { + std::cout << "ERROR: Failed to get frame from decoded packet(s)" + << std::endl; + return false; + } + ++frame_count_; + + std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG + + // output buffer info for converting pixel format to RGBA + uint8_t *dst[AV_NUM_DATA_POINTERS]; + dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16, + sizeof(uint8_t)); + for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) { + dst[i] = nullptr; + } + std::array dst_strides = { + frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0}; + + unsigned int line_count = 0; + for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { + if (frame->linesize[i] > 0) { + ++line_count; + } + } + + if (line_count == 0) { + std::cout << "ERROR: Invalid number of picture planes" << std::endl; + for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { + free(dst[i]); + } + return false; + } + + // Convert colors to RGBA + if (sws_context_ == nullptr) { + sws_context_ = sws_getContext(frame->width, frame->height, + (AVPixelFormat)frame->format, frame->width, + frame->height, + grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8 + : AVPixelFormat::AV_PIX_FMT_RGBA, + SWS_BILINEAR, nullptr, nullptr, nullptr); + if (sws_context_ == nullptr) { + std::cout << "ERROR: Failed to init sws_context_" << std::endl; + for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { + free(dst[i]); + } + return false; + } + } + + return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0, + frame->height, dst, dst_strides.data()); + if (return_value < 0) { + std::cout << "ERROR: Failed to convert pixel format of frame" + << std::endl; + for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { + free(dst[i]); + } + return false; + } + + // put RGBA data into image + image_.width_ = frame->width; + image_.height_ = frame->height; + if (grayscale) { + image_.is_grayscale_ = true; + image_.data_.resize(frame->width * frame->height); + for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) { + image_.data_.at(i) = dst[0][i]; + } + } else { + image_.is_grayscale_ = false; + image_.data_.resize(frame->width * frame->height * 4); + for (unsigned int y = 0; (int)y < frame->height; ++y) { + for (unsigned int x = 0; (int)x < frame->width; ++x) { + image_.data_.at(x * 4 + y * 4 * frame->width) = + dst[0][x * 4 + y * 4 * frame->width]; + image_.data_.at(1 + x * 4 + y * 4 * frame->width) = + dst[0][1 + x * 4 + y * 4 * frame->width]; + image_.data_.at(2 + x * 4 + y * 4 * frame->width) = + dst[0][2 + x * 4 + y * 4 * frame->width]; + image_.data_.at(3 + x * 4 + y * 4 * frame->width) = + dst[0][3 + x * 4 + y * 4 * frame->width]; + } + } + } + + std::unique_ptr dithered_image; + if (grayscale) { + dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise); + } else { + dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise); + } + + std::string out_name = "output_"; + if (frame_count_ < 10) { + out_name += "000" + std::to_string(frame_count_); + } else if (frame_count_ < 100) { + out_name += "00" + std::to_string(frame_count_); + } else if (frame_count_ < 1000) { + out_name += "0" + std::to_string(frame_count_); + } else { + out_name += std::to_string(frame_count_); + } + out_name += ".png"; + dithered_image->SaveAsPNG(out_name, false); + // TODO encode video with dithered_image + + // cleanup + for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { + free(dst[i]); + } + } + return true; } diff --git a/src/video.h b/src/video.h index ac0af82..77f2c93 100644 --- a/src/video.h +++ b/src/video.h @@ -3,6 +3,7 @@ extern "C" { #include +#include } #include "image.h" @@ -17,12 +18,22 @@ class Video { explicit Video(const char *video_filename); explicit Video(const std::string &video_filename); - bool DitherGrayscale(const char *output_filename); - bool DitherGrayscale(const std::string &output_filename); + ~Video(); + + bool DitherVideo(const char *output_filename, Image *blue_noise, + bool grayscale = false); + bool DitherVideo(const std::string &output_filename, Image *blue_noise, + bool grayscale = false); private: - Image image; - std::string input_filename; + Image image_; + std::string input_filename_; + SwsContext *sws_context_; + unsigned int frame_count_; + unsigned int packet_count_; + + bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, + AVFrame *frame, Image *blue_noise, bool grayscale); }; #endif