Impl output to video, update program args

Turns out output to video suffers from loss of quality problems when in
color (not grayscale). It may be better in some cases to just output
each frame as a png and combining them later with ffmpeg like how it is
mentioned here: https://trac.ffmpeg.org/wiki/Slideshow . Grayscale video
is noticably better, but both cases result in large video sizes, so care
may be needed if free disk-space is sparse.
This commit is contained in:
Stephen Seo 2021-12-03 20:09:56 +09:00
parent 8191a51d9b
commit 788ce5e490
5 changed files with 489 additions and 126 deletions

View file

@ -14,7 +14,8 @@ void Args::PrintUsage() {
std::cout std::cout
<< "Usage: [-h | --help] [-i <filename> | --input <filename>] [-o " << "Usage: [-h | --help] [-i <filename> | --input <filename>] [-o "
"<filename> | --output <filename>] [-b <filename> | --blue " "<filename> | --output <filename>] [-b <filename> | --blue "
"<filename>] [-g | --gray] [--image] [--video] [--overwrite]\n" "<filename>] [-g | --gray] [--image] [--video] [--video-pngs] "
"[--overwrite]\n"
" -h | --help\t\t\t\tPrint this usage text\n" " -h | --help\t\t\t\tPrint this usage text\n"
" -i <filename> | --input <filename>\tSet input filename\n" " -i <filename> | --input <filename>\tSet input filename\n"
" -o <filename> | --output <filename>\tSet output filename\n" " -o <filename> | --output <filename>\tSet output filename\n"
@ -22,6 +23,7 @@ void Args::PrintUsage() {
" -g | --gray\t\t\t\tDither output in grayscale\n" " -g | --gray\t\t\t\tDither output in grayscale\n"
" --image\t\t\t\tDither a single image\n" " --image\t\t\t\tDither a single image\n"
" --video\t\t\t\tDither frames in a video\n" " --video\t\t\t\tDither frames in a video\n"
" --video-pngs\t\t\t\tDither frames but output as individual pngs\n"
" --overwrite\t\t\t\tAllow overwriting existing files\n" " --overwrite\t\t\t\tAllow overwriting existing files\n"
<< std::endl; << std::endl;
} }
@ -56,6 +58,9 @@ bool Args::ParseArgs(int argc, char **argv) {
do_dither_image_ = true; do_dither_image_ = true;
} else if (std::strcmp(argv[0], "--video") == 0) { } else if (std::strcmp(argv[0], "--video") == 0) {
do_dither_image_ = false; do_dither_image_ = false;
} else if (std::strcmp(argv[0], "--video-pngs") == 0) {
do_dither_image_ = false;
do_video_pngs_ = true;
} else if (std::strcmp(argv[0], "--overwrite") == 0) { } else if (std::strcmp(argv[0], "--overwrite") == 0) {
do_overwrite_ = true; do_overwrite_ = true;
} else { } else {

View file

@ -14,6 +14,7 @@ struct Args {
bool do_dither_image_; bool do_dither_image_;
bool do_dither_grayscaled_; bool do_dither_grayscaled_;
bool do_overwrite_; bool do_overwrite_;
bool do_video_pngs_;
std::string input_filename; std::string input_filename;
std::string output_filename; std::string output_filename;
std::string blue_noise_filename; std::string blue_noise_filename;

View file

@ -60,7 +60,8 @@ int main(int argc, char **argv) {
} else { } else {
Video video(args.input_filename); Video video(args.input_filename);
if (!video.DitherVideo(args.output_filename, &blue_noise, if (!video.DitherVideo(args.output_filename, &blue_noise,
args.do_dither_grayscaled_, args.do_overwrite_)) { args.do_dither_grayscaled_, args.do_overwrite_,
args.do_video_pngs_)) {
std::cout << "ERROR: Failed to dither frames from input video \"" std::cout << "ERROR: Failed to dither frames from input video \""
<< args.input_filename << '"' << std::endl; << args.input_filename << '"' << std::endl;
Args::PrintUsage(); Args::PrintUsage();

View file

@ -1,42 +1,63 @@
#include "video.h" #include "video.h"
#include <cmath>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
extern "C" {
#include <libavformat/avformat.h>
}
Video::Video(const char *video_filename) : Video(std::string(video_filename)) {} Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
Video::Video(const std::string &video_filename) Video::Video(const std::string &video_filename)
: image_(), : image_(),
input_filename_(video_filename), input_filename_(video_filename),
sws_context_(nullptr), sws_dec_context_(nullptr),
sws_enc_context_(nullptr),
frame_count_(0), frame_count_(0),
packet_count_(0) {} packet_count_(0),
was_grayscale_(false) {}
Video::~Video() { Video::~Video() {
if (sws_context_ != nullptr) { if (sws_dec_context_ != nullptr) {
sws_freeContext(sws_context_); sws_freeContext(sws_dec_context_);
} }
} }
bool Video::DitherVideo(const char *output_filename, Image *blue_noise, bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
bool grayscale, bool overwrite) { bool grayscale, bool overwrite, bool output_as_pngs) {
return DitherVideo(std::string(output_filename), blue_noise, grayscale, return DitherVideo(std::string(output_filename), blue_noise, grayscale,
overwrite); overwrite, output_as_pngs);
} }
bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise, bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale, bool overwrite) { bool grayscale, bool overwrite, bool output_as_pngs) {
if (!overwrite && !output_as_pngs) {
// check if output_file exists
std::ifstream ifs(output_filename);
if (ifs.is_open()) {
std::cout << "ERROR: output file \"" << output_filename
<< "\" exists "
"and overwrite is disabled"
<< std::endl;
return false;
}
}
frame_count_ = 0;
bool color_changed = false;
if (was_grayscale_ != grayscale) {
color_changed = true;
}
was_grayscale_ = grayscale;
// set up decoding
// Get AVFormatContext for input file // Get AVFormatContext for input file
AVFormatContext *avf_context = nullptr; AVFormatContext *avf_dec_context = nullptr;
std::string url = std::string("file:") + input_filename_; std::string url = std::string("file:") + input_filename_;
int return_value = int return_value =
avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr); avformat_open_input(&avf_dec_context, url.c_str(), nullptr, nullptr);
if (return_value != 0) { if (return_value != 0) {
std::cout << "ERROR: Failed to open input file to determine format" std::cout << "ERROR: Failed to open input file to determine format"
<< std::endl; << std::endl;
@ -44,60 +65,70 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
} }
// Read from input file to fill in info in AVFormatContext // Read from input file to fill in info in AVFormatContext
return_value = avformat_find_stream_info(avf_context, nullptr); return_value = avformat_find_stream_info(avf_dec_context, nullptr);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to determine input file stream info" std::cout << "ERROR: Failed to determine input file stream info"
<< std::endl; << std::endl;
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false; return false;
} }
// Get "best" video stream // Get "best" video stream
AVCodec *avcodec = nullptr; AVCodec *dec_codec = nullptr;
return_value = av_find_best_stream( return_value = av_find_best_stream(
avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0); avf_dec_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &dec_codec, 0);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to get video stream in input file" << std::endl; std::cout << "ERROR: Failed to get video stream in input file" << std::endl;
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false; return false;
} }
int video_stream_idx = return_value; int video_stream_idx = return_value;
// Alloc codec context // Alloc codec context
AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec); AVCodecContext *codec_ctx = avcodec_alloc_context3(dec_codec);
if (!codec_ctx) { if (!codec_ctx) {
std::cout << "ERROR: Failed to alloc codec context" << std::endl; std::cout << "ERROR: Failed to alloc codec context" << std::endl;
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false; return false;
} }
// Set codec parameters from input stream // Set codec parameters from input stream
return_value = avcodec_parameters_to_context( return_value = avcodec_parameters_to_context(
codec_ctx, avf_context->streams[video_stream_idx]->codecpar); codec_ctx, avf_dec_context->streams[video_stream_idx]->codecpar);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to set codec parameters from input stream" std::cout << "ERROR: Failed to set codec parameters from input stream"
<< std::endl; << std::endl;
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false; return false;
} }
// Init codec context // Init codec context
return_value = avcodec_open2(codec_ctx, avcodec, nullptr); return_value = avcodec_open2(codec_ctx, dec_codec, nullptr);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to init codec context" << std::endl; std::cout << "ERROR: Failed to init codec context" << std::endl;
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false; return false;
} }
av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0); std::cout << "Dumping input video format info..." << std::endl;
av_dump_format(avf_dec_context, video_stream_idx, input_filename_.c_str(), 0);
// get input stream info
unsigned int width =
avf_dec_context->streams[video_stream_idx]->codecpar->width;
unsigned int height =
avf_dec_context->streams[video_stream_idx]->codecpar->height;
auto r_frame_rate = avf_dec_context->streams[video_stream_idx]->r_frame_rate;
decltype(r_frame_rate) time_base = {r_frame_rate.den, r_frame_rate.num};
// Alloc a packet object for reading packets // Alloc a packet object for reading packets
AVPacket *pkt = av_packet_alloc(); AVPacket *pkt = av_packet_alloc();
if (!pkt) { if (!pkt) {
std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl; std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false; return false;
} }
@ -107,162 +138,473 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
std::cout << "ERROR: Failed to alloc video frame object" << std::endl; std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
av_packet_free(&pkt); av_packet_free(&pkt);
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false; return false;
} }
// read frames // Set up encoding
while (av_read_frame(avf_context, pkt) >= 0) {
if (pkt->stream_index == video_stream_idx) { // alloc/init encoding AVFormatContext
if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale, AVFormatContext *avf_enc_context = nullptr;
overwrite)) { if (!output_as_pngs) {
return_value = avformat_alloc_output_context2(
&avf_enc_context, nullptr, nullptr, output_filename.c_str());
if (return_value < 0) {
std::cout << "ERROR: Failed to alloc/init avf_enc_context" << std::endl;
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false; return false;
} }
} }
// set output video codec (h264)
AVCodecContext *enc_codec_context = nullptr;
AVCodec *enc_codec = nullptr;
// get H264 codec
if (!output_as_pngs) {
enc_codec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264);
if (enc_codec == nullptr) {
std::cout << "ERROR: Failed to get H264 codec for encoding" << std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
}
// create new video stream
AVStream *enc_stream = nullptr;
if (!output_as_pngs) {
enc_stream = avformat_new_stream(avf_enc_context, enc_codec);
if (enc_stream == nullptr) {
std::cout << "ERROR: Failed to create encoding stream" << std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// assign its id
enc_stream->id = avf_enc_context->nb_streams - 1;
// alloc enc AVCodecContext
enc_codec_context = avcodec_alloc_context3(enc_codec);
if (enc_codec_context == nullptr) {
std::cout << "ERROR: Failed to create AVCodecContext for encoding"
<< std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// set values on enc_codec_context
enc_codec_context->codec_id = AVCodecID::AV_CODEC_ID_H264;
enc_codec_context->bit_rate = kOutputBitrate;
enc_codec_context->width = width;
enc_codec_context->height = height;
enc_stream->time_base = time_base;
enc_codec_context->time_base = time_base;
enc_codec_context->gop_size = 12;
enc_codec_context->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV444P;
if (avf_enc_context->oformat->flags & AVFMT_GLOBALHEADER) {
enc_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
// more init on enc_codec_context
return_value = avcodec_open2(enc_codec_context, enc_codec, nullptr);
if (return_value != 0) {
std::cout << "ERROR: Failed to init enc_codec_context" << std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
return_value = avcodec_parameters_from_context(enc_stream->codecpar,
enc_codec_context);
if (return_value < 0) {
std::cout << "ERROR: Failed to set encoding codec parameters in stream"
<< std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
std::cout << "Dumping output video format info..." << std::endl;
av_dump_format(avf_enc_context, enc_stream->id, output_filename.c_str(), 1);
// open output file if needed
if (!(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
return_value = avio_open(&avf_enc_context->pb, output_filename.c_str(),
AVIO_FLAG_WRITE);
if (return_value < 0) {
std::cout << "ERROR: Failed to open file \"" << output_filename
<< "\" for writing" << std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
}
// write header
return_value = avformat_write_header(avf_enc_context, nullptr);
if (return_value < 0) {
std::cout << "ERROR: Failed to write header in output video file"
<< std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
} // if (!output_as_pngs)
// do decoding, then encoding per frame
// read frames
while (av_read_frame(avf_dec_context, pkt) >= 0) {
if (pkt->stream_index == video_stream_idx) {
auto ret_tuple =
HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
color_changed, output_as_pngs);
if (!std::get<0>(ret_tuple)) {
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
} else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
for (auto *yuv_frame : std::get<1>(ret_tuple)) {
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context,
yuv_frame, enc_stream)) {
av_frame_free(&yuv_frame);
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
av_frame_free(&yuv_frame);
}
}
}
} }
// flush decoders // flush decoders
if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale, auto ret_tuple =
overwrite)) { HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
return false; color_changed, output_as_pngs);
} if (!std::get<0>(ret_tuple)) {
avcodec_close(enc_codec_context);
// cleanup avformat_free_context(avf_enc_context);
av_frame_free(&frame); av_frame_free(&frame);
av_packet_free(&pkt); av_packet_free(&pkt);
avcodec_free_context(&codec_ctx); avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context); avformat_close_input(&avf_dec_context);
return false;
} else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
for (auto *yuv_frame : std::get<1>(ret_tuple)) {
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, yuv_frame,
enc_stream)) {
av_frame_free(&yuv_frame);
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
av_frame_free(&yuv_frame);
}
}
if (!output_as_pngs) {
// flush encoder
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, nullptr,
enc_stream)) {
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// finish encoding
av_write_trailer(avf_enc_context);
}
// cleanup
if (enc_codec_context) {
avcodec_close(enc_codec_context);
}
if (!output_as_pngs && !(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
avio_closep(&avf_enc_context->pb);
}
if (avf_enc_context) {
avformat_free_context(avf_enc_context);
}
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return true; return true;
} }
bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, std::tuple<bool, std::vector<AVFrame *>> Video::HandleDecodingPacket(
AVFrame *frame, Image *blue_noise, AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, Image *blue_noise,
bool grayscale, bool overwrite) { bool grayscale, bool color_changed, bool output_as_pngs) {
int return_value = avcodec_send_packet(codec_ctx, pkt); int return_value = avcodec_send_packet(codec_ctx, pkt);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')' std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
<< std::endl; << std::endl;
return false; return {false, {}};
} }
return_value = 0; return_value = 0;
std::vector<AVFrame *> return_frames{};
while (return_value >= 0) { while (return_value >= 0) {
return_value = avcodec_receive_frame(codec_ctx, frame); return_value = avcodec_receive_frame(codec_ctx, frame);
if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) { if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
return true; return {true, return_frames};
} else if (return_value < 0) { } else if (return_value < 0) {
std::cout << "ERROR: Failed to get frame from decoded packet(s)" std::cout << "ERROR: Failed to get frame from decoded packet(s)"
<< std::endl; << std::endl;
return false; return {false, {}};
} }
++frame_count_; ++frame_count_;
std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG
// output buffer info for converting pixel format to RGBA AVFrame *temp_frame = av_frame_alloc();
uint8_t *dst[AV_NUM_DATA_POINTERS]; temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16, temp_frame->width = frame->width;
sizeof(uint8_t)); temp_frame->height = frame->height;
for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) { return_value = av_frame_get_buffer(temp_frame, 0);
dst[i] = nullptr; if (return_value != 0) {
} std::cout << "ERROR: Failed to init temp_frame to receive RGBA data"
std::array<int, AV_NUM_DATA_POINTERS> dst_strides = { << std::endl;
frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0}; av_frame_free(&temp_frame);
return {false, {}};
unsigned int line_count = 0;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
if (frame->linesize[i] > 0) {
++line_count;
}
}
if (line_count == 0) {
std::cout << "ERROR: Invalid number of picture planes" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
} }
// Convert colors to RGBA // Convert colors to RGBA
if (sws_context_ == nullptr) { if (sws_dec_context_ == nullptr) {
sws_context_ = sws_getContext(frame->width, frame->height, sws_dec_context_ = sws_getContext(
(AVPixelFormat)frame->format, frame->width, frame->width, frame->height, (AVPixelFormat)frame->format,
frame->height, frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_RGBA,
grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
: AVPixelFormat::AV_PIX_FMT_RGBA,
SWS_BILINEAR, nullptr, nullptr, nullptr); SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_context_ == nullptr) { if (sws_dec_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_context_" << std::endl; std::cout << "ERROR: Failed to init sws_dec_context_" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { av_frame_free(&temp_frame);
free(dst[i]); return {false, {}};
}
return false;
} }
} }
return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0, return_value =
frame->height, dst, dst_strides.data()); sws_scale(sws_dec_context_, frame->data, frame->linesize, 0,
frame->height, temp_frame->data, temp_frame->linesize);
if (return_value < 0) { if (return_value < 0) {
std::cout << "ERROR: Failed to convert pixel format of frame" std::cout << "ERROR: Failed to convert pixel format of frame"
<< std::endl; << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { av_frame_free(&temp_frame);
free(dst[i]); return {false, {}};
}
return false;
} }
// put RGBA data into image // put RGBA data into image
image_.width_ = frame->width; image_.width_ = frame->width;
image_.height_ = frame->height; image_.height_ = frame->height;
if (grayscale) {
image_.is_grayscale_ = true;
image_.data_.resize(frame->width * frame->height);
for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
image_.data_.at(i) = dst[0][i];
}
} else {
image_.is_grayscale_ = false; image_.is_grayscale_ = false;
image_.data_.resize(frame->width * frame->height * 4); image_.data_.resize(frame->width * frame->height * 4);
for (unsigned int y = 0; (int)y < frame->height; ++y) { for (unsigned int y = 0; (int)y < frame->height; ++y) {
for (unsigned int x = 0; (int)x < frame->width; ++x) { for (unsigned int x = 0; (int)x < frame->width; ++x) {
image_.data_.at(x * 4 + y * 4 * frame->width) = image_.data_.at(x * 4 + y * 4 * frame->width) =
dst[0][x * 4 + y * 4 * frame->width]; temp_frame->data[0][x * 4 + y * 4 * frame->width];
image_.data_.at(1 + x * 4 + y * 4 * frame->width) = image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
dst[0][1 + x * 4 + y * 4 * frame->width]; temp_frame->data[0][1 + x * 4 + y * 4 * frame->width];
image_.data_.at(2 + x * 4 + y * 4 * frame->width) = image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
dst[0][2 + x * 4 + y * 4 * frame->width]; temp_frame->data[0][2 + x * 4 + y * 4 * frame->width];
image_.data_.at(3 + x * 4 + y * 4 * frame->width) = image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
dst[0][3 + x * 4 + y * 4 * frame->width]; temp_frame->data[0][3 + x * 4 + y * 4 * frame->width];
}
} }
} }
av_frame_unref(temp_frame);
std::unique_ptr<Image> dithered_image; std::unique_ptr<Image> dithered_image;
if (grayscale) { if (grayscale) {
dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise); dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
} else { } else {
dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise); dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
} }
if (!dithered_image) {
std::cout << "ERROR: Failed to dither video frame" << std::endl;
return {false, {}};
}
if (output_as_pngs) {
std::string out_name = "output_"; std::string out_name = "output_";
if (frame_count_ < 10) { for (unsigned int i = 0; i < 9; ++i) {
out_name += "000" + std::to_string(frame_count_); if (frame_count_ < (unsigned int)std::pow(10, i)) {
} else if (frame_count_ < 100) { out_name += "0";
out_name += "00" + std::to_string(frame_count_); }
} else if (frame_count_ < 1000) { }
out_name += "0" + std::to_string(frame_count_);
} else {
out_name += std::to_string(frame_count_); out_name += std::to_string(frame_count_);
}
out_name += ".png"; out_name += ".png";
if (!dithered_image->SaveAsPNG(out_name, overwrite)) { if (!dithered_image->SaveAsPNG(out_name, true)) {
return false; return {false, {}};
}
return {true, {}};
}
// convert grayscale/RGBA to YUV444p
if (sws_enc_context_ != nullptr && color_changed) {
// switched between grayscale/RGBA, context needs to be recreated
sws_freeContext(sws_enc_context_);
sws_enc_context_ = nullptr;
}
if (sws_enc_context_ == nullptr) {
sws_enc_context_ = sws_getContext(
frame->width, frame->height,
grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
: AVPixelFormat::AV_PIX_FMT_RGBA,
frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_YUV444P,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_enc_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_enc_context_" << std::endl;
return {false, {}};
}
}
// rgba data info
if (grayscale) {
av_frame_free(&temp_frame);
temp_frame = av_frame_alloc();
temp_frame->format = AVPixelFormat::AV_PIX_FMT_GRAY8;
temp_frame->width = frame->width;
temp_frame->height = frame->height;
return_value = av_frame_get_buffer(temp_frame, 0);
if (return_value != 0) {
std::cout
<< "ERROR: Failed to init temp_frame for conversion from grayscale"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
frame->width * frame->height);
} else {
temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
temp_frame->width = frame->width;
temp_frame->height = frame->height;
return_value = av_frame_get_buffer(temp_frame, 0);
if (return_value != 0) {
std::cout << "ERROR: Failed to init temp_frame for conversion from RGBA"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
4 * frame->width * frame->height);
}
AVFrame *yuv_frame = av_frame_alloc();
if (frame == nullptr) {
std::cout
<< "ERROR: Failed to alloc AVFrame for receiving YUV444p from RGBA"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
yuv_frame->format = AVPixelFormat::AV_PIX_FMT_YUV444P;
yuv_frame->width = frame->width;
yuv_frame->height = frame->height;
return_value = av_frame_get_buffer(yuv_frame, 0);
return_value =
sws_scale(sws_enc_context_, temp_frame->data, temp_frame->linesize, 0,
frame->height, yuv_frame->data, yuv_frame->linesize);
if (return_value <= 0) {
std::cout << "ERROR: Failed to convert RGBA to YUV444p with sws_scale"
<< std::endl;
av_frame_free(&yuv_frame);
av_frame_free(&temp_frame);
return {false, {}};
} }
// TODO encode video with dithered_image
// cleanup // cleanup
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { av_frame_free(&temp_frame);
free(dst[i]); yuv_frame->pts = frame_count_ - 1;
yuv_frame->pkt_duration = 1;
return_frames.push_back(yuv_frame);
}
return {true, return_frames};
}
bool Video::HandleEncodingFrame(AVFormatContext *enc_format_ctx,
AVCodecContext *enc_codec_ctx,
AVFrame *yuv_frame, AVStream *video_stream) {
int return_value;
return_value = avcodec_send_frame(enc_codec_ctx, yuv_frame);
if (return_value < 0) {
std::cout << "ERROR: Failed to send frame to encoder" << std::endl;
return false;
}
AVPacket pkt;
std::memset(&pkt, 0, sizeof(AVPacket));
while (return_value >= 0) {
std::memset(&pkt, 0, sizeof(AVPacket));
return_value = avcodec_receive_packet(enc_codec_ctx, &pkt);
if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
break;
} else if (return_value < 0) {
std::cout << "ERROR: Failed to encode a frame" << std::endl;
return false;
}
// rescale timing fields (timestamps / durations)
av_packet_rescale_ts(&pkt, enc_codec_ctx->time_base,
video_stream->time_base);
pkt.stream_index = video_stream->index;
// write frame
return_value = av_interleaved_write_frame(enc_format_ctx, &pkt);
av_packet_unref(&pkt);
if (return_value < 0) {
std::cout << "ERROR: Failed to write encoding packet" << std::endl;
return false;
} }
} }

View file

@ -1,8 +1,11 @@
#ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_ #ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_
#define IGPUP_DITHERING_PROJECT_VIDEO_H_ #define IGPUP_DITHERING_PROJECT_VIDEO_H_
#include <tuple>
extern "C" { extern "C" {
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h> #include <libswscale/swscale.h>
} }
@ -13,6 +16,8 @@ constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
constexpr unsigned int kReadBufSizeWithPadding = constexpr unsigned int kReadBufSizeWithPadding =
kReadBufSize + kReadBufPaddingSize; kReadBufSize + kReadBufPaddingSize;
constexpr unsigned int kOutputBitrate = 40000000;
/*! /*!
* \brief Helper class that uses Image and OpenCLHandle to dither video frames. * \brief Helper class that uses Image and OpenCLHandle to dither video frames.
* *
@ -35,30 +40,39 @@ class Video {
/// Same as DitherVideo(const std::string&, Image*, bool, bool) /// Same as DitherVideo(const std::string&, Image*, bool, bool)
bool DitherVideo(const char *output_filename, Image *blue_noise, bool DitherVideo(const char *output_filename, Image *blue_noise,
bool grayscale = false, bool overwrite = false); bool grayscale = false, bool overwrite = false,
bool output_as_pngs = false);
/*! /*!
* \brief Dithers the frames in the input video. * \brief Dithers the frames in the input video.
* *
* Currently, the program doesn't create the output video, but instead outputs * If output_as_pngs is true, then the output will be individaul PNGs of each
* each frame as an individual image in the current directory. If things go * frame instead of a video file. This may be desireable because the output
* well, the expected behavior will be implemented soon. * video struggles to maintain video quality.
* *
* \return True on success. * \return True on success.
*/ */
bool DitherVideo(const std::string &output_filename, Image *blue_noise, bool DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale = false, bool overwrite = false); bool grayscale = false, bool overwrite = false,
bool output_as_pngs = false);
private: private:
Image image_; Image image_;
std::string input_filename_; std::string input_filename_;
SwsContext *sws_context_; SwsContext *sws_dec_context_;
SwsContext *sws_enc_context_;
unsigned int frame_count_; unsigned int frame_count_;
unsigned int packet_count_; unsigned int packet_count_;
bool was_grayscale_;
bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, std::tuple<bool, std::vector<AVFrame *>> HandleDecodingPacket(
AVFrame *frame, Image *blue_noise, bool grayscale, AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame,
bool overwrite); Image *blue_noise, bool grayscale, bool color_changed,
bool output_as_pngs);
bool HandleEncodingFrame(AVFormatContext *enc_format_ctx,
AVCodecContext *enc_codec_ctx, AVFrame *yuv_frame,
AVStream *video_stream);
}; };
#endif #endif