Impl output to video, update program args

Turns out output to video suffers from loss of quality problems when in
color (not grayscale). It may be better in some cases to just output
each frame as a png and combining them later with ffmpeg like how it is
mentioned here: https://trac.ffmpeg.org/wiki/Slideshow . Grayscale video
is noticably better, but both cases result in large video sizes, so care
may be needed if free disk-space is sparse.
This commit is contained in:
Stephen Seo 2021-12-03 20:09:56 +09:00
parent 8191a51d9b
commit 788ce5e490
5 changed files with 489 additions and 126 deletions

View file

@ -14,7 +14,8 @@ void Args::PrintUsage() {
std::cout
<< "Usage: [-h | --help] [-i <filename> | --input <filename>] [-o "
"<filename> | --output <filename>] [-b <filename> | --blue "
"<filename>] [-g | --gray] [--image] [--video] [--overwrite]\n"
"<filename>] [-g | --gray] [--image] [--video] [--video-pngs] "
"[--overwrite]\n"
" -h | --help\t\t\t\tPrint this usage text\n"
" -i <filename> | --input <filename>\tSet input filename\n"
" -o <filename> | --output <filename>\tSet output filename\n"
@ -22,6 +23,7 @@ void Args::PrintUsage() {
" -g | --gray\t\t\t\tDither output in grayscale\n"
" --image\t\t\t\tDither a single image\n"
" --video\t\t\t\tDither frames in a video\n"
" --video-pngs\t\t\t\tDither frames but output as individual pngs\n"
" --overwrite\t\t\t\tAllow overwriting existing files\n"
<< std::endl;
}
@ -56,6 +58,9 @@ bool Args::ParseArgs(int argc, char **argv) {
do_dither_image_ = true;
} else if (std::strcmp(argv[0], "--video") == 0) {
do_dither_image_ = false;
} else if (std::strcmp(argv[0], "--video-pngs") == 0) {
do_dither_image_ = false;
do_video_pngs_ = true;
} else if (std::strcmp(argv[0], "--overwrite") == 0) {
do_overwrite_ = true;
} else {

View file

@ -14,6 +14,7 @@ struct Args {
bool do_dither_image_;
bool do_dither_grayscaled_;
bool do_overwrite_;
bool do_video_pngs_;
std::string input_filename;
std::string output_filename;
std::string blue_noise_filename;

View file

@ -60,7 +60,8 @@ int main(int argc, char **argv) {
} else {
Video video(args.input_filename);
if (!video.DitherVideo(args.output_filename, &blue_noise,
args.do_dither_grayscaled_, args.do_overwrite_)) {
args.do_dither_grayscaled_, args.do_overwrite_,
args.do_video_pngs_)) {
std::cout << "ERROR: Failed to dither frames from input video \""
<< args.input_filename << '"' << std::endl;
Args::PrintUsage();

View file

@ -1,42 +1,63 @@
#include "video.h"
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
extern "C" {
#include <libavformat/avformat.h>
}
Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
Video::Video(const std::string &video_filename)
: image_(),
input_filename_(video_filename),
sws_context_(nullptr),
sws_dec_context_(nullptr),
sws_enc_context_(nullptr),
frame_count_(0),
packet_count_(0) {}
packet_count_(0),
was_grayscale_(false) {}
Video::~Video() {
if (sws_context_ != nullptr) {
sws_freeContext(sws_context_);
if (sws_dec_context_ != nullptr) {
sws_freeContext(sws_dec_context_);
}
}
bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
bool grayscale, bool overwrite) {
bool grayscale, bool overwrite, bool output_as_pngs) {
return DitherVideo(std::string(output_filename), blue_noise, grayscale,
overwrite);
overwrite, output_as_pngs);
}
bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale, bool overwrite) {
bool grayscale, bool overwrite, bool output_as_pngs) {
if (!overwrite && !output_as_pngs) {
// check if output_file exists
std::ifstream ifs(output_filename);
if (ifs.is_open()) {
std::cout << "ERROR: output file \"" << output_filename
<< "\" exists "
"and overwrite is disabled"
<< std::endl;
return false;
}
}
frame_count_ = 0;
bool color_changed = false;
if (was_grayscale_ != grayscale) {
color_changed = true;
}
was_grayscale_ = grayscale;
// set up decoding
// Get AVFormatContext for input file
AVFormatContext *avf_context = nullptr;
AVFormatContext *avf_dec_context = nullptr;
std::string url = std::string("file:") + input_filename_;
int return_value =
avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
avformat_open_input(&avf_dec_context, url.c_str(), nullptr, nullptr);
if (return_value != 0) {
std::cout << "ERROR: Failed to open input file to determine format"
<< std::endl;
@ -44,60 +65,70 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
}
// Read from input file to fill in info in AVFormatContext
return_value = avformat_find_stream_info(avf_context, nullptr);
return_value = avformat_find_stream_info(avf_dec_context, nullptr);
if (return_value < 0) {
std::cout << "ERROR: Failed to determine input file stream info"
<< std::endl;
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return false;
}
// Get "best" video stream
AVCodec *avcodec = nullptr;
AVCodec *dec_codec = nullptr;
return_value = av_find_best_stream(
avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0);
avf_dec_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &dec_codec, 0);
if (return_value < 0) {
std::cout << "ERROR: Failed to get video stream in input file" << std::endl;
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return false;
}
int video_stream_idx = return_value;
// Alloc codec context
AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
AVCodecContext *codec_ctx = avcodec_alloc_context3(dec_codec);
if (!codec_ctx) {
std::cout << "ERROR: Failed to alloc codec context" << std::endl;
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return false;
}
// Set codec parameters from input stream
return_value = avcodec_parameters_to_context(
codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
codec_ctx, avf_dec_context->streams[video_stream_idx]->codecpar);
if (return_value < 0) {
std::cout << "ERROR: Failed to set codec parameters from input stream"
<< std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return false;
}
// Init codec context
return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
return_value = avcodec_open2(codec_ctx, dec_codec, nullptr);
if (return_value < 0) {
std::cout << "ERROR: Failed to init codec context" << std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return false;
}
av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
std::cout << "Dumping input video format info..." << std::endl;
av_dump_format(avf_dec_context, video_stream_idx, input_filename_.c_str(), 0);
// get input stream info
unsigned int width =
avf_dec_context->streams[video_stream_idx]->codecpar->width;
unsigned int height =
avf_dec_context->streams[video_stream_idx]->codecpar->height;
auto r_frame_rate = avf_dec_context->streams[video_stream_idx]->r_frame_rate;
decltype(r_frame_rate) time_base = {r_frame_rate.den, r_frame_rate.num};
// Alloc a packet object for reading packets
AVPacket *pkt = av_packet_alloc();
if (!pkt) {
std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
@ -107,162 +138,473 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// read frames
while (av_read_frame(avf_context, pkt) >= 0) {
if (pkt->stream_index == video_stream_idx) {
if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
overwrite)) {
// Set up encoding
// alloc/init encoding AVFormatContext
AVFormatContext *avf_enc_context = nullptr;
if (!output_as_pngs) {
return_value = avformat_alloc_output_context2(
&avf_enc_context, nullptr, nullptr, output_filename.c_str());
if (return_value < 0) {
std::cout << "ERROR: Failed to alloc/init avf_enc_context" << std::endl;
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
}
// set output video codec (h264)
AVCodecContext *enc_codec_context = nullptr;
AVCodec *enc_codec = nullptr;
// get H264 codec
if (!output_as_pngs) {
enc_codec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264);
if (enc_codec == nullptr) {
std::cout << "ERROR: Failed to get H264 codec for encoding" << std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
}
// create new video stream
AVStream *enc_stream = nullptr;
if (!output_as_pngs) {
enc_stream = avformat_new_stream(avf_enc_context, enc_codec);
if (enc_stream == nullptr) {
std::cout << "ERROR: Failed to create encoding stream" << std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// assign its id
enc_stream->id = avf_enc_context->nb_streams - 1;
// alloc enc AVCodecContext
enc_codec_context = avcodec_alloc_context3(enc_codec);
if (enc_codec_context == nullptr) {
std::cout << "ERROR: Failed to create AVCodecContext for encoding"
<< std::endl;
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// set values on enc_codec_context
enc_codec_context->codec_id = AVCodecID::AV_CODEC_ID_H264;
enc_codec_context->bit_rate = kOutputBitrate;
enc_codec_context->width = width;
enc_codec_context->height = height;
enc_stream->time_base = time_base;
enc_codec_context->time_base = time_base;
enc_codec_context->gop_size = 12;
enc_codec_context->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV444P;
if (avf_enc_context->oformat->flags & AVFMT_GLOBALHEADER) {
enc_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
// more init on enc_codec_context
return_value = avcodec_open2(enc_codec_context, enc_codec, nullptr);
if (return_value != 0) {
std::cout << "ERROR: Failed to init enc_codec_context" << std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
return_value = avcodec_parameters_from_context(enc_stream->codecpar,
enc_codec_context);
if (return_value < 0) {
std::cout << "ERROR: Failed to set encoding codec parameters in stream"
<< std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
std::cout << "Dumping output video format info..." << std::endl;
av_dump_format(avf_enc_context, enc_stream->id, output_filename.c_str(), 1);
// open output file if needed
if (!(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
return_value = avio_open(&avf_enc_context->pb, output_filename.c_str(),
AVIO_FLAG_WRITE);
if (return_value < 0) {
std::cout << "ERROR: Failed to open file \"" << output_filename
<< "\" for writing" << std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
}
// write header
return_value = avformat_write_header(avf_enc_context, nullptr);
if (return_value < 0) {
std::cout << "ERROR: Failed to write header in output video file"
<< std::endl;
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
} // if (!output_as_pngs)
// do decoding, then encoding per frame
// read frames
while (av_read_frame(avf_dec_context, pkt) >= 0) {
if (pkt->stream_index == video_stream_idx) {
auto ret_tuple =
HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
color_changed, output_as_pngs);
if (!std::get<0>(ret_tuple)) {
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
} else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
for (auto *yuv_frame : std::get<1>(ret_tuple)) {
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context,
yuv_frame, enc_stream)) {
av_frame_free(&yuv_frame);
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
av_frame_free(&yuv_frame);
}
}
}
}
// flush decoders
if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
overwrite)) {
auto ret_tuple =
HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
color_changed, output_as_pngs);
if (!std::get<0>(ret_tuple)) {
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
} else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
for (auto *yuv_frame : std::get<1>(ret_tuple)) {
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, yuv_frame,
enc_stream)) {
av_frame_free(&yuv_frame);
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
av_frame_free(&yuv_frame);
}
}
if (!output_as_pngs) {
// flush encoder
if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, nullptr,
enc_stream)) {
avcodec_close(enc_codec_context);
avformat_free_context(avf_enc_context);
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_dec_context);
return false;
}
// finish encoding
av_write_trailer(avf_enc_context);
}
// cleanup
if (enc_codec_context) {
avcodec_close(enc_codec_context);
}
if (!output_as_pngs && !(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
avio_closep(&avf_enc_context->pb);
}
if (avf_enc_context) {
avformat_free_context(avf_enc_context);
}
av_frame_free(&frame);
av_packet_free(&pkt);
avcodec_free_context(&codec_ctx);
avformat_close_input(&avf_context);
avformat_close_input(&avf_dec_context);
return true;
}
bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
AVFrame *frame, Image *blue_noise,
bool grayscale, bool overwrite) {
std::tuple<bool, std::vector<AVFrame *>> Video::HandleDecodingPacket(
AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, Image *blue_noise,
bool grayscale, bool color_changed, bool output_as_pngs) {
int return_value = avcodec_send_packet(codec_ctx, pkt);
if (return_value < 0) {
std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
<< std::endl;
return false;
return {false, {}};
}
return_value = 0;
std::vector<AVFrame *> return_frames{};
while (return_value >= 0) {
return_value = avcodec_receive_frame(codec_ctx, frame);
if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
return true;
return {true, return_frames};
} else if (return_value < 0) {
std::cout << "ERROR: Failed to get frame from decoded packet(s)"
<< std::endl;
return false;
return {false, {}};
}
++frame_count_;
std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG
// output buffer info for converting pixel format to RGBA
uint8_t *dst[AV_NUM_DATA_POINTERS];
dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
sizeof(uint8_t));
for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
dst[i] = nullptr;
}
std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
unsigned int line_count = 0;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
if (frame->linesize[i] > 0) {
++line_count;
}
}
if (line_count == 0) {
std::cout << "ERROR: Invalid number of picture planes" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
AVFrame *temp_frame = av_frame_alloc();
temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
temp_frame->width = frame->width;
temp_frame->height = frame->height;
return_value = av_frame_get_buffer(temp_frame, 0);
if (return_value != 0) {
std::cout << "ERROR: Failed to init temp_frame to receive RGBA data"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
// Convert colors to RGBA
if (sws_context_ == nullptr) {
sws_context_ = sws_getContext(frame->width, frame->height,
(AVPixelFormat)frame->format, frame->width,
frame->height,
grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
: AVPixelFormat::AV_PIX_FMT_RGBA,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_context_" << std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
if (sws_dec_context_ == nullptr) {
sws_dec_context_ = sws_getContext(
frame->width, frame->height, (AVPixelFormat)frame->format,
frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_RGBA,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_dec_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_dec_context_" << std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
}
return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
frame->height, dst, dst_strides.data());
return_value =
sws_scale(sws_dec_context_, frame->data, frame->linesize, 0,
frame->height, temp_frame->data, temp_frame->linesize);
if (return_value < 0) {
std::cout << "ERROR: Failed to convert pixel format of frame"
<< std::endl;
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
}
return false;
av_frame_free(&temp_frame);
return {false, {}};
}
// put RGBA data into image
image_.width_ = frame->width;
image_.height_ = frame->height;
if (grayscale) {
image_.is_grayscale_ = true;
image_.data_.resize(frame->width * frame->height);
for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
image_.data_.at(i) = dst[0][i];
}
} else {
image_.is_grayscale_ = false;
image_.data_.resize(frame->width * frame->height * 4);
for (unsigned int y = 0; (int)y < frame->height; ++y) {
for (unsigned int x = 0; (int)x < frame->width; ++x) {
image_.data_.at(x * 4 + y * 4 * frame->width) =
dst[0][x * 4 + y * 4 * frame->width];
image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
dst[0][1 + x * 4 + y * 4 * frame->width];
image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
dst[0][2 + x * 4 + y * 4 * frame->width];
image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
dst[0][3 + x * 4 + y * 4 * frame->width];
}
image_.is_grayscale_ = false;
image_.data_.resize(frame->width * frame->height * 4);
for (unsigned int y = 0; (int)y < frame->height; ++y) {
for (unsigned int x = 0; (int)x < frame->width; ++x) {
image_.data_.at(x * 4 + y * 4 * frame->width) =
temp_frame->data[0][x * 4 + y * 4 * frame->width];
image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
temp_frame->data[0][1 + x * 4 + y * 4 * frame->width];
image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
temp_frame->data[0][2 + x * 4 + y * 4 * frame->width];
image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
temp_frame->data[0][3 + x * 4 + y * 4 * frame->width];
}
}
av_frame_unref(temp_frame);
std::unique_ptr<Image> dithered_image;
if (grayscale) {
dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
} else {
dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
}
if (!dithered_image) {
std::cout << "ERROR: Failed to dither video frame" << std::endl;
return {false, {}};
}
std::string out_name = "output_";
if (frame_count_ < 10) {
out_name += "000" + std::to_string(frame_count_);
} else if (frame_count_ < 100) {
out_name += "00" + std::to_string(frame_count_);
} else if (frame_count_ < 1000) {
out_name += "0" + std::to_string(frame_count_);
} else {
if (output_as_pngs) {
std::string out_name = "output_";
for (unsigned int i = 0; i < 9; ++i) {
if (frame_count_ < (unsigned int)std::pow(10, i)) {
out_name += "0";
}
}
out_name += std::to_string(frame_count_);
out_name += ".png";
if (!dithered_image->SaveAsPNG(out_name, true)) {
return {false, {}};
}
return {true, {}};
}
out_name += ".png";
if (!dithered_image->SaveAsPNG(out_name, overwrite)) {
return false;
// convert grayscale/RGBA to YUV444p
if (sws_enc_context_ != nullptr && color_changed) {
// switched between grayscale/RGBA, context needs to be recreated
sws_freeContext(sws_enc_context_);
sws_enc_context_ = nullptr;
}
if (sws_enc_context_ == nullptr) {
sws_enc_context_ = sws_getContext(
frame->width, frame->height,
grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
: AVPixelFormat::AV_PIX_FMT_RGBA,
frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_YUV444P,
SWS_BILINEAR, nullptr, nullptr, nullptr);
if (sws_enc_context_ == nullptr) {
std::cout << "ERROR: Failed to init sws_enc_context_" << std::endl;
return {false, {}};
}
}
// rgba data info
if (grayscale) {
av_frame_free(&temp_frame);
temp_frame = av_frame_alloc();
temp_frame->format = AVPixelFormat::AV_PIX_FMT_GRAY8;
temp_frame->width = frame->width;
temp_frame->height = frame->height;
return_value = av_frame_get_buffer(temp_frame, 0);
if (return_value != 0) {
std::cout
<< "ERROR: Failed to init temp_frame for conversion from grayscale"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
frame->width * frame->height);
} else {
temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
temp_frame->width = frame->width;
temp_frame->height = frame->height;
return_value = av_frame_get_buffer(temp_frame, 0);
if (return_value != 0) {
std::cout << "ERROR: Failed to init temp_frame for conversion from RGBA"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
4 * frame->width * frame->height);
}
AVFrame *yuv_frame = av_frame_alloc();
if (frame == nullptr) {
std::cout
<< "ERROR: Failed to alloc AVFrame for receiving YUV444p from RGBA"
<< std::endl;
av_frame_free(&temp_frame);
return {false, {}};
}
yuv_frame->format = AVPixelFormat::AV_PIX_FMT_YUV444P;
yuv_frame->width = frame->width;
yuv_frame->height = frame->height;
return_value = av_frame_get_buffer(yuv_frame, 0);
return_value =
sws_scale(sws_enc_context_, temp_frame->data, temp_frame->linesize, 0,
frame->height, yuv_frame->data, yuv_frame->linesize);
if (return_value <= 0) {
std::cout << "ERROR: Failed to convert RGBA to YUV444p with sws_scale"
<< std::endl;
av_frame_free(&yuv_frame);
av_frame_free(&temp_frame);
return {false, {}};
}
// TODO encode video with dithered_image
// cleanup
for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
free(dst[i]);
av_frame_free(&temp_frame);
yuv_frame->pts = frame_count_ - 1;
yuv_frame->pkt_duration = 1;
return_frames.push_back(yuv_frame);
}
return {true, return_frames};
}
bool Video::HandleEncodingFrame(AVFormatContext *enc_format_ctx,
AVCodecContext *enc_codec_ctx,
AVFrame *yuv_frame, AVStream *video_stream) {
int return_value;
return_value = avcodec_send_frame(enc_codec_ctx, yuv_frame);
if (return_value < 0) {
std::cout << "ERROR: Failed to send frame to encoder" << std::endl;
return false;
}
AVPacket pkt;
std::memset(&pkt, 0, sizeof(AVPacket));
while (return_value >= 0) {
std::memset(&pkt, 0, sizeof(AVPacket));
return_value = avcodec_receive_packet(enc_codec_ctx, &pkt);
if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
break;
} else if (return_value < 0) {
std::cout << "ERROR: Failed to encode a frame" << std::endl;
return false;
}
// rescale timing fields (timestamps / durations)
av_packet_rescale_ts(&pkt, enc_codec_ctx->time_base,
video_stream->time_base);
pkt.stream_index = video_stream->index;
// write frame
return_value = av_interleaved_write_frame(enc_format_ctx, &pkt);
av_packet_unref(&pkt);
if (return_value < 0) {
std::cout << "ERROR: Failed to write encoding packet" << std::endl;
return false;
}
}

View file

@ -1,8 +1,11 @@
#ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_
#define IGPUP_DITHERING_PROJECT_VIDEO_H_
#include <tuple>
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
}
@ -13,6 +16,8 @@ constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
constexpr unsigned int kReadBufSizeWithPadding =
kReadBufSize + kReadBufPaddingSize;
constexpr unsigned int kOutputBitrate = 40000000;
/*!
* \brief Helper class that uses Image and OpenCLHandle to dither video frames.
*
@ -35,30 +40,39 @@ class Video {
/// Same as DitherVideo(const std::string&, Image*, bool, bool)
bool DitherVideo(const char *output_filename, Image *blue_noise,
bool grayscale = false, bool overwrite = false);
bool grayscale = false, bool overwrite = false,
bool output_as_pngs = false);
/*!
* \brief Dithers the frames in the input video.
*
* Currently, the program doesn't create the output video, but instead outputs
* each frame as an individual image in the current directory. If things go
* well, the expected behavior will be implemented soon.
* If output_as_pngs is true, then the output will be individaul PNGs of each
* frame instead of a video file. This may be desireable because the output
* video struggles to maintain video quality.
*
* \return True on success.
*/
bool DitherVideo(const std::string &output_filename, Image *blue_noise,
bool grayscale = false, bool overwrite = false);
bool grayscale = false, bool overwrite = false,
bool output_as_pngs = false);
private:
Image image_;
std::string input_filename_;
SwsContext *sws_context_;
SwsContext *sws_dec_context_;
SwsContext *sws_enc_context_;
unsigned int frame_count_;
unsigned int packet_count_;
bool was_grayscale_;
bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
AVFrame *frame, Image *blue_noise, bool grayscale,
bool overwrite);
std::tuple<bool, std::vector<AVFrame *>> HandleDecodingPacket(
AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame,
Image *blue_noise, bool grayscale, bool color_changed,
bool output_as_pngs);
bool HandleEncodingFrame(AVFormatContext *enc_format_ctx,
AVCodecContext *enc_codec_ctx, AVFrame *yuv_frame,
AVStream *video_stream);
};
#endif