Impl convert video frames to color dithered pngs

This commit is contained in:
Stephen Seo 2021-11-30 16:02:51 +09:00
parent 6677fba89c
commit 11f48592bf
6 changed files with 203 additions and 101 deletions

View file

@ -26,7 +26,8 @@ find_package(OpenCL REQUIRED)
find_package(PNG REQUIRED) find_package(PNG REQUIRED)
find_package(PkgConfig REQUIRED) find_package(PkgConfig REQUIRED)
pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec libavformat libavutil) pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED
libavcodec libavformat libavutil libswscale)
target_include_directories(DitheringProject PUBLIC target_include_directories(DitheringProject PUBLIC
${OpenCL_INCLUDE_DIRS} ${OpenCL_INCLUDE_DIRS}

View file

@ -817,6 +817,10 @@ OpenCLHandle::Ptr Image::GetOpenCLHandle() {
void Image::DecodePNG(const std::string &filename) { void Image::DecodePNG(const std::string &filename) {
FILE *file = std::fopen(filename.c_str(), "rb"); FILE *file = std::fopen(filename.c_str(), "rb");
if (!file) {
std::cout << "ERROR: Failed to open \"" << filename << '"' << std::endl;
return;
}
// Check header of file to check if it is actually a png file. // Check header of file to check if it is actually a png file.
{ {

View file

@ -135,7 +135,7 @@ class Image {
static const std::array<png_color, 2> dither_bw_palette_; static const std::array<png_color, 2> dither_bw_palette_;
static const std::array<png_color, 8> dither_color_palette_; static const std::array<png_color, 8> dither_color_palette_;
OpenCLHandle::Ptr opencl_handle_; OpenCLHandle::Ptr opencl_handle_;
/// Internally holds rgba /// Internally holds rgba or grayscale (1 channel)
std::vector<uint8_t> data_; std::vector<uint8_t> data_;
unsigned int width_; unsigned int width_;
unsigned int height_; unsigned int height_;

View file

@ -1,30 +1,19 @@
#include <iostream> #include <iostream>
#include "image.h" #include "image.h"
#include "video.h"
int main(int argc, char **argv) { int main(int argc, char **argv) {
// Image image("testin.ppm"); Image blue_noise("bluenoise.png");
// image.SaveAsPNG("testout.png", true); if (!blue_noise.IsValid()) {
std::cout << "ERROR: Invalid bluenoise.png" << std::endl;
Image input("input.png");
if (!input.IsValid()) {
std::cout << "ERROR: input.png is invalid" << std::endl;
return 1; return 1;
} }
Video video("input.mp4");
Image bluenoise("bluenoise.png"); if (!video.DitherVideo("output.mp4", &blue_noise)) {
if (!bluenoise.IsValid()) { std::cout << "ERROR: Failed to dither video" << std::endl;
std::cout << "ERROR: bluenoise.png is invalid" << std::endl;
return 1; return 1;
} }
// auto output = input.ToGrayscaleDitheredWithBlueNoise(&bluenoise);
auto output = input.ToColorDitheredWithBlueNoise(&bluenoise);
if (!output || !output->IsValid()) {
std::cout << "ERROR: output Image is invalid" << std::endl;
return 1;
}
output->SaveAsPNG("output.png", true);
return 0; return 0;
} }

View file

@ -1,5 +1,6 @@
#include "video.h" #include "video.h"
#include <cstdlib>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
@ -11,18 +12,28 @@ extern "C" {
Video::Video(const char *video_filename) : Video(std::string(video_filename)) {} Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
Video::Video(const std::string &video_filename) Video::Video(const std::string &video_filename)
: image(), input_filename(video_filename) {} : image_(),
input_filename_(video_filename),
sws_context_(nullptr),
frame_count_(0),
packet_count_(0) {}
bool Video::DitherGrayscale(const char *output_filename) { Video::~Video() {
return DitherGrayscale(std::string(output_filename)); if (sws_context_ != nullptr) {
sws_freeContext(sws_context_);
}
} }
bool Video::DitherGrayscale(const std::string &output_filename) { bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
// determine input file format bool grayscale) {
return DitherVideo(std::string(output_filename), blue_noise, grayscale);
}
bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale) {
// Get AVFormatContext for input file // Get AVFormatContext for input file
AVFormatContext *avf_context = nullptr; AVFormatContext *avf_context = nullptr;
std::string url = std::string("file:") + input_filename; std::string url = std::string("file:") + input_filename_;
int return_value = int return_value =
avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr); avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
if (return_value != 0) { if (return_value != 0) {
@ -49,42 +60,43 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
avformat_close_input(&avf_context); avformat_close_input(&avf_context);
return false; return false;
} }
int video_stream_idx = return_value;
// cleanup AVFormatContext as it is no longer needed
avformat_close_input(&avf_context);
// Init required objects for decoding
// Init parser
AVCodecParserContext *parser = av_parser_init(avcodec->id);
if (!parser) {
std::cout << "ERROR: Failed to init codec parser" << std::endl;
return false;
}
// Alloc codec context // Alloc codec context
AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec); AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
if (!codec_ctx) { if (!codec_ctx) {
std::cout << "ERROR: Failed to alloc codec context" << std::endl; std::cout << "ERROR: Failed to alloc codec context" << std::endl;
av_parser_close(parser); avformat_close_input(&avf_context);
return false;
}
// Set codec parameters from input stream
return_value = avcodec_parameters_to_context(
codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
if (return_value < 0) {
std::cout << "ERROR: Failed to set codec parameters from input stream"
<< std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context);
return false; return false;
} }
// Init codec context // Init codec context
return_value = avcodec_open2(codec_ctx, avcodec, nullptr); return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
if (return_value == 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to init codec context" << std::endl; std::cout << "ERROR: Failed to init codec context" << std::endl;
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
av_parser_close(parser); avformat_close_input(&avf_context);
return false; return false;
} }
av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
// Alloc a packet object for reading packets // Alloc a packet object for reading packets
AVPacket *pkt = av_packet_alloc(); AVPacket *pkt = av_packet_alloc();
if (!pkt) { if (!pkt) {
std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl; std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
av_parser_close(parser);
return false; return false;
} }
@ -93,76 +105,161 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
if (!frame) { if (!frame) {
std::cout << "ERROR: Failed to alloc video frame object" << std::endl; std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
av_packet_free(&pkt); av_packet_free(&pkt);
av_parser_close(parser);
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
return false; return false;
} }
// Now the file will be opened for decoding the "best" video stream // read frames
std::ifstream ifs(input_filename); while (av_read_frame(avf_context, pkt) >= 0) {
if (!ifs.is_open() || !ifs.good()) { if (pkt->stream_index == video_stream_idx) {
std::cout << "ERROR: Failed to open input file \"" << input_filename << '"' if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale)) {
<< std::endl;
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
av_parser_close(parser);
return false;
}
// Set up buffer to read from input file
std::array<uint8_t, kReadBufSizeWithPadding> buf;
// Fill end of buffer with 0 to avoid possible overreading (as shown in
// example code)
std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize);
std::streamsize read_count;
uint8_t *data_ptr;
while (ifs.good()) {
ifs.read(reinterpret_cast<char *>(buf.data()), kReadBufSize);
read_count = ifs.gcount();
data_ptr = buf.data();
if (read_count == 0) {
// read 0 bytes, probably reached exactly EOF
break;
}
while (read_count > 0) {
return_value =
av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr,
read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (return_value < 0) {
std::cout << "ERROR: Failed to parse input file" << std::endl;
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
av_parser_close(parser);
return false; return false;
} }
data_ptr += return_value;
read_count -= return_value;
if (pkt->size) {
// TODO use packet
}
} }
} }
if (ifs.fail()) { // flush decoders
std::cout << "ERROR: Read error on input file" << std::endl; if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale)) {
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
av_parser_close(parser);
return false; return false;
} }
// TODO flush decoder
// cleanup // cleanup
av_frame_free(&frame); av_frame_free(&frame);
av_packet_free(&pkt); av_packet_free(&pkt);
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
av_parser_close(parser); avformat_close_input(&avf_context);
return true;
}
bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
AVFrame *frame, Image *blue_noise,
bool grayscale) {
int return_value = avcodec_send_packet(codec_ctx, pkt);
if (return_value < 0) {
std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
<< std::endl;
return false;
}
return_value = 0;
while (return_value >= 0) {
return_value = avcodec_receive_frame(codec_ctx, frame);
if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
return true;
} else if (return_value < 0) {
std::cout << "ERROR: Failed to get frame from decoded packet(s)"
<< std::endl;
return false;
}
++frame_count_;
std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG
// output buffer info for converting pixel format to RGBA
uint8_t *dst[AV_NUM_DATA_POINTERS];
dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
sizeof(uint8_t));
for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
dst[i] = nullptr;
}
std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
unsigned int line_count = 0;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
if (frame->linesize[i] > 0) {
++line_count;
}
}
if (line_count == 0) {
std::cout << "ERROR: Invalid number of picture planes" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
}
// Convert colors to RGBA
if (sws_context_ == nullptr) {
sws_context_ = sws_getContext(frame->width, frame->height,
(AVPixelFormat)frame->format, frame->width,
frame->height,
grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
: AVPixelFormat::AV_PIX_FMT_RGBA,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_context_" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
}
}
return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
frame->height, dst, dst_strides.data());
if (return_value < 0) {
std::cout << "ERROR: Failed to convert pixel format of frame"
<< std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
}
// put RGBA data into image
image_.width_ = frame->width;
image_.height_ = frame->height;
if (grayscale) {
image_.is_grayscale_ = true;
image_.data_.resize(frame->width * frame->height);
for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
image_.data_.at(i) = dst[0][i];
}
} else {
image_.is_grayscale_ = false;
image_.data_.resize(frame->width * frame->height * 4);
for (unsigned int y = 0; (int)y < frame->height; ++y) {
for (unsigned int x = 0; (int)x < frame->width; ++x) {
image_.data_.at(x * 4 + y * 4 * frame->width) =
dst[0][x * 4 + y * 4 * frame->width];
image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
dst[0][1 + x * 4 + y * 4 * frame->width];
image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
dst[0][2 + x * 4 + y * 4 * frame->width];
image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
dst[0][3 + x * 4 + y * 4 * frame->width];
}
}
}
std::unique_ptr<Image> dithered_image;
if (grayscale) {
dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
} else {
dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
}
std::string out_name = "output_";
if (frame_count_ < 10) {
out_name += "000" + std::to_string(frame_count_);
} else if (frame_count_ < 100) {
out_name += "00" + std::to_string(frame_count_);
} else if (frame_count_ < 1000) {
out_name += "0" + std::to_string(frame_count_);
} else {
out_name += std::to_string(frame_count_);
}
out_name += ".png";
dithered_image->SaveAsPNG(out_name, false);
// TODO encode video with dithered_image
// cleanup
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
}
return true; return true;
} }

View file

@ -3,6 +3,7 @@
extern "C" { extern "C" {
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libswscale/swscale.h>
} }
#include "image.h" #include "image.h"
@ -17,12 +18,22 @@ class Video {
explicit Video(const char *video_filename); explicit Video(const char *video_filename);
explicit Video(const std::string &video_filename); explicit Video(const std::string &video_filename);
bool DitherGrayscale(const char *output_filename); ~Video();
bool DitherGrayscale(const std::string &output_filename);
bool DitherVideo(const char *output_filename, Image *blue_noise,
bool grayscale = false);
bool DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale = false);
private: private:
Image image; Image image_;
std::string input_filename; std::string input_filename_;
SwsContext *sws_context_;
unsigned int frame_count_;
unsigned int packet_count_;
bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
AVFrame *frame, Image *blue_noise, bool grayscale);
}; };
#endif #endif