diff --git a/src/arg_parse.cc b/src/arg_parse.cc index b61c9fa..67f904c 100644 --- a/src/arg_parse.cc +++ b/src/arg_parse.cc @@ -14,7 +14,8 @@ void Args::PrintUsage() { std::cout << "Usage: [-h | --help] [-i | --input ] [-o " " | --output ] [-b | --blue " - "] [-g | --gray] [--image] [--video] [--overwrite]\n" + "] [-g | --gray] [--image] [--video] [--video-pngs] " + "[--overwrite]\n" " -h | --help\t\t\t\tPrint this usage text\n" " -i | --input \tSet input filename\n" " -o | --output \tSet output filename\n" @@ -22,6 +23,7 @@ void Args::PrintUsage() { " -g | --gray\t\t\t\tDither output in grayscale\n" " --image\t\t\t\tDither a single image\n" " --video\t\t\t\tDither frames in a video\n" + " --video-pngs\t\t\t\tDither frames but output as individual pngs\n" " --overwrite\t\t\t\tAllow overwriting existing files\n" << std::endl; } @@ -56,6 +58,9 @@ bool Args::ParseArgs(int argc, char **argv) { do_dither_image_ = true; } else if (std::strcmp(argv[0], "--video") == 0) { do_dither_image_ = false; + } else if (std::strcmp(argv[0], "--video-pngs") == 0) { + do_dither_image_ = false; + do_video_pngs_ = true; } else if (std::strcmp(argv[0], "--overwrite") == 0) { do_overwrite_ = true; } else { diff --git a/src/arg_parse.h b/src/arg_parse.h index 5f62494..0679d24 100644 --- a/src/arg_parse.h +++ b/src/arg_parse.h @@ -14,6 +14,7 @@ struct Args { bool do_dither_image_; bool do_dither_grayscaled_; bool do_overwrite_; + bool do_video_pngs_; std::string input_filename; std::string output_filename; std::string blue_noise_filename; diff --git a/src/main.cc b/src/main.cc index e6c1720..0724fd3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -60,7 +60,8 @@ int main(int argc, char **argv) { } else { Video video(args.input_filename); if (!video.DitherVideo(args.output_filename, &blue_noise, - args.do_dither_grayscaled_, args.do_overwrite_)) { + args.do_dither_grayscaled_, args.do_overwrite_, + args.do_video_pngs_)) { std::cout << "ERROR: Failed to dither frames from input video \"" << args.input_filename << '"' << std::endl; Args::PrintUsage(); diff --git a/src/video.cc b/src/video.cc index a8984aa..aac4f59 100644 --- a/src/video.cc +++ b/src/video.cc @@ -1,42 +1,63 @@ #include "video.h" +#include #include #include #include #include -extern "C" { -#include -} - Video::Video(const char *video_filename) : Video(std::string(video_filename)) {} Video::Video(const std::string &video_filename) : image_(), input_filename_(video_filename), - sws_context_(nullptr), + sws_dec_context_(nullptr), + sws_enc_context_(nullptr), frame_count_(0), - packet_count_(0) {} + packet_count_(0), + was_grayscale_(false) {} Video::~Video() { - if (sws_context_ != nullptr) { - sws_freeContext(sws_context_); + if (sws_dec_context_ != nullptr) { + sws_freeContext(sws_dec_context_); } } bool Video::DitherVideo(const char *output_filename, Image *blue_noise, - bool grayscale, bool overwrite) { + bool grayscale, bool overwrite, bool output_as_pngs) { return DitherVideo(std::string(output_filename), blue_noise, grayscale, - overwrite); + overwrite, output_as_pngs); } bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise, - bool grayscale, bool overwrite) { + bool grayscale, bool overwrite, bool output_as_pngs) { + if (!overwrite && !output_as_pngs) { + // check if output_file exists + std::ifstream ifs(output_filename); + if (ifs.is_open()) { + std::cout << "ERROR: output file \"" << output_filename + << "\" exists " + "and overwrite is disabled" + << std::endl; + return false; + } + } + + frame_count_ = 0; + + bool color_changed = false; + if (was_grayscale_ != grayscale) { + color_changed = true; + } + was_grayscale_ = grayscale; + + // set up decoding + // Get AVFormatContext for input file - AVFormatContext *avf_context = nullptr; + AVFormatContext *avf_dec_context = nullptr; std::string url = std::string("file:") + input_filename_; int return_value = - avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr); + avformat_open_input(&avf_dec_context, url.c_str(), nullptr, nullptr); if (return_value != 0) { std::cout << "ERROR: Failed to open input file to determine format" << std::endl; @@ -44,60 +65,70 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise, } // Read from input file to fill in info in AVFormatContext - return_value = avformat_find_stream_info(avf_context, nullptr); + return_value = avformat_find_stream_info(avf_dec_context, nullptr); if (return_value < 0) { std::cout << "ERROR: Failed to determine input file stream info" << std::endl; - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return false; } // Get "best" video stream - AVCodec *avcodec = nullptr; + AVCodec *dec_codec = nullptr; return_value = av_find_best_stream( - avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0); + avf_dec_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &dec_codec, 0); if (return_value < 0) { std::cout << "ERROR: Failed to get video stream in input file" << std::endl; - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return false; } int video_stream_idx = return_value; // Alloc codec context - AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec); + AVCodecContext *codec_ctx = avcodec_alloc_context3(dec_codec); if (!codec_ctx) { std::cout << "ERROR: Failed to alloc codec context" << std::endl; - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return false; } // Set codec parameters from input stream return_value = avcodec_parameters_to_context( - codec_ctx, avf_context->streams[video_stream_idx]->codecpar); + codec_ctx, avf_dec_context->streams[video_stream_idx]->codecpar); if (return_value < 0) { std::cout << "ERROR: Failed to set codec parameters from input stream" << std::endl; avcodec_free_context(&codec_ctx); - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return false; } // Init codec context - return_value = avcodec_open2(codec_ctx, avcodec, nullptr); + return_value = avcodec_open2(codec_ctx, dec_codec, nullptr); if (return_value < 0) { std::cout << "ERROR: Failed to init codec context" << std::endl; avcodec_free_context(&codec_ctx); - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return false; } - av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0); + std::cout << "Dumping input video format info..." << std::endl; + av_dump_format(avf_dec_context, video_stream_idx, input_filename_.c_str(), 0); + + // get input stream info + unsigned int width = + avf_dec_context->streams[video_stream_idx]->codecpar->width; + unsigned int height = + avf_dec_context->streams[video_stream_idx]->codecpar->height; + auto r_frame_rate = avf_dec_context->streams[video_stream_idx]->r_frame_rate; + decltype(r_frame_rate) time_base = {r_frame_rate.den, r_frame_rate.num}; // Alloc a packet object for reading packets AVPacket *pkt = av_packet_alloc(); if (!pkt) { std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl; avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); return false; } @@ -107,162 +138,473 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise, std::cout << "ERROR: Failed to alloc video frame object" << std::endl; av_packet_free(&pkt); avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); return false; } - // read frames - while (av_read_frame(avf_context, pkt) >= 0) { - if (pkt->stream_index == video_stream_idx) { - if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale, - overwrite)) { + // Set up encoding + + // alloc/init encoding AVFormatContext + AVFormatContext *avf_enc_context = nullptr; + if (!output_as_pngs) { + return_value = avformat_alloc_output_context2( + &avf_enc_context, nullptr, nullptr, output_filename.c_str()); + if (return_value < 0) { + std::cout << "ERROR: Failed to alloc/init avf_enc_context" << std::endl; + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + } + + // set output video codec (h264) + AVCodecContext *enc_codec_context = nullptr; + AVCodec *enc_codec = nullptr; + + // get H264 codec + if (!output_as_pngs) { + enc_codec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264); + if (enc_codec == nullptr) { + std::cout << "ERROR: Failed to get H264 codec for encoding" << std::endl; + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + } + + // create new video stream + AVStream *enc_stream = nullptr; + if (!output_as_pngs) { + enc_stream = avformat_new_stream(avf_enc_context, enc_codec); + if (enc_stream == nullptr) { + std::cout << "ERROR: Failed to create encoding stream" << std::endl; + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + // assign its id + enc_stream->id = avf_enc_context->nb_streams - 1; + // alloc enc AVCodecContext + enc_codec_context = avcodec_alloc_context3(enc_codec); + if (enc_codec_context == nullptr) { + std::cout << "ERROR: Failed to create AVCodecContext for encoding" + << std::endl; + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + + // set values on enc_codec_context + enc_codec_context->codec_id = AVCodecID::AV_CODEC_ID_H264; + enc_codec_context->bit_rate = kOutputBitrate; + enc_codec_context->width = width; + enc_codec_context->height = height; + enc_stream->time_base = time_base; + enc_codec_context->time_base = time_base; + enc_codec_context->gop_size = 12; + enc_codec_context->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV444P; + if (avf_enc_context->oformat->flags & AVFMT_GLOBALHEADER) { + enc_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + + // more init on enc_codec_context + return_value = avcodec_open2(enc_codec_context, enc_codec, nullptr); + if (return_value != 0) { + std::cout << "ERROR: Failed to init enc_codec_context" << std::endl; + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + + return_value = avcodec_parameters_from_context(enc_stream->codecpar, + enc_codec_context); + if (return_value < 0) { + std::cout << "ERROR: Failed to set encoding codec parameters in stream" + << std::endl; + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + + std::cout << "Dumping output video format info..." << std::endl; + av_dump_format(avf_enc_context, enc_stream->id, output_filename.c_str(), 1); + + // open output file if needed + if (!(avf_enc_context->oformat->flags & AVFMT_NOFILE)) { + return_value = avio_open(&avf_enc_context->pb, output_filename.c_str(), + AVIO_FLAG_WRITE); + if (return_value < 0) { + std::cout << "ERROR: Failed to open file \"" << output_filename + << "\" for writing" << std::endl; + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); return false; } } + + // write header + return_value = avformat_write_header(avf_enc_context, nullptr); + if (return_value < 0) { + std::cout << "ERROR: Failed to write header in output video file" + << std::endl; + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + } // if (!output_as_pngs) + + // do decoding, then encoding per frame + + // read frames + while (av_read_frame(avf_dec_context, pkt) >= 0) { + if (pkt->stream_index == video_stream_idx) { + auto ret_tuple = + HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale, + color_changed, output_as_pngs); + if (!std::get<0>(ret_tuple)) { + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) { + for (auto *yuv_frame : std::get<1>(ret_tuple)) { + if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, + yuv_frame, enc_stream)) { + av_frame_free(&yuv_frame); + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + av_frame_free(&yuv_frame); + } + } + } } // flush decoders - if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale, - overwrite)) { + auto ret_tuple = + HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale, + color_changed, output_as_pngs); + if (!std::get<0>(ret_tuple)) { + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); return false; + } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) { + for (auto *yuv_frame : std::get<1>(ret_tuple)) { + if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, yuv_frame, + enc_stream)) { + av_frame_free(&yuv_frame); + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + av_frame_free(&yuv_frame); + } + } + + if (!output_as_pngs) { + // flush encoder + if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, nullptr, + enc_stream)) { + avcodec_close(enc_codec_context); + avformat_free_context(avf_enc_context); + av_frame_free(&frame); + av_packet_free(&pkt); + avcodec_free_context(&codec_ctx); + avformat_close_input(&avf_dec_context); + return false; + } + + // finish encoding + av_write_trailer(avf_enc_context); } // cleanup + if (enc_codec_context) { + avcodec_close(enc_codec_context); + } + if (!output_as_pngs && !(avf_enc_context->oformat->flags & AVFMT_NOFILE)) { + avio_closep(&avf_enc_context->pb); + } + if (avf_enc_context) { + avformat_free_context(avf_enc_context); + } av_frame_free(&frame); av_packet_free(&pkt); avcodec_free_context(&codec_ctx); - avformat_close_input(&avf_context); + avformat_close_input(&avf_dec_context); return true; } -bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, - AVFrame *frame, Image *blue_noise, - bool grayscale, bool overwrite) { +std::tuple> Video::HandleDecodingPacket( + AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, Image *blue_noise, + bool grayscale, bool color_changed, bool output_as_pngs) { int return_value = avcodec_send_packet(codec_ctx, pkt); if (return_value < 0) { std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')' << std::endl; - return false; + return {false, {}}; } return_value = 0; + std::vector return_frames{}; + while (return_value >= 0) { return_value = avcodec_receive_frame(codec_ctx, frame); if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) { - return true; + return {true, return_frames}; } else if (return_value < 0) { std::cout << "ERROR: Failed to get frame from decoded packet(s)" << std::endl; - return false; + return {false, {}}; } ++frame_count_; std::cout << "Frame " << frame_count_ << std::endl; // TODO DEBUG - // output buffer info for converting pixel format to RGBA - uint8_t *dst[AV_NUM_DATA_POINTERS]; - dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16, - sizeof(uint8_t)); - for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) { - dst[i] = nullptr; - } - std::array dst_strides = { - frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0}; - - unsigned int line_count = 0; - for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { - if (frame->linesize[i] > 0) { - ++line_count; - } - } - - if (line_count == 0) { - std::cout << "ERROR: Invalid number of picture planes" << std::endl; - for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { - free(dst[i]); - } - return false; + AVFrame *temp_frame = av_frame_alloc(); + temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA; + temp_frame->width = frame->width; + temp_frame->height = frame->height; + return_value = av_frame_get_buffer(temp_frame, 0); + if (return_value != 0) { + std::cout << "ERROR: Failed to init temp_frame to receive RGBA data" + << std::endl; + av_frame_free(&temp_frame); + return {false, {}}; } // Convert colors to RGBA - if (sws_context_ == nullptr) { - sws_context_ = sws_getContext(frame->width, frame->height, - (AVPixelFormat)frame->format, frame->width, - frame->height, - grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8 - : AVPixelFormat::AV_PIX_FMT_RGBA, - SWS_BILINEAR, nullptr, nullptr, nullptr); - if (sws_context_ == nullptr) { - std::cout << "ERROR: Failed to init sws_context_" << std::endl; - for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { - free(dst[i]); - } - return false; + if (sws_dec_context_ == nullptr) { + sws_dec_context_ = sws_getContext( + frame->width, frame->height, (AVPixelFormat)frame->format, + frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_RGBA, + SWS_BILINEAR, nullptr, nullptr, nullptr); + if (sws_dec_context_ == nullptr) { + std::cout << "ERROR: Failed to init sws_dec_context_" << std::endl; + av_frame_free(&temp_frame); + return {false, {}}; } } - return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0, - frame->height, dst, dst_strides.data()); + return_value = + sws_scale(sws_dec_context_, frame->data, frame->linesize, 0, + frame->height, temp_frame->data, temp_frame->linesize); if (return_value < 0) { std::cout << "ERROR: Failed to convert pixel format of frame" << std::endl; - for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { - free(dst[i]); - } - return false; + av_frame_free(&temp_frame); + return {false, {}}; } // put RGBA data into image image_.width_ = frame->width; image_.height_ = frame->height; - if (grayscale) { - image_.is_grayscale_ = true; - image_.data_.resize(frame->width * frame->height); - for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) { - image_.data_.at(i) = dst[0][i]; - } - } else { - image_.is_grayscale_ = false; - image_.data_.resize(frame->width * frame->height * 4); - for (unsigned int y = 0; (int)y < frame->height; ++y) { - for (unsigned int x = 0; (int)x < frame->width; ++x) { - image_.data_.at(x * 4 + y * 4 * frame->width) = - dst[0][x * 4 + y * 4 * frame->width]; - image_.data_.at(1 + x * 4 + y * 4 * frame->width) = - dst[0][1 + x * 4 + y * 4 * frame->width]; - image_.data_.at(2 + x * 4 + y * 4 * frame->width) = - dst[0][2 + x * 4 + y * 4 * frame->width]; - image_.data_.at(3 + x * 4 + y * 4 * frame->width) = - dst[0][3 + x * 4 + y * 4 * frame->width]; - } + image_.is_grayscale_ = false; + image_.data_.resize(frame->width * frame->height * 4); + for (unsigned int y = 0; (int)y < frame->height; ++y) { + for (unsigned int x = 0; (int)x < frame->width; ++x) { + image_.data_.at(x * 4 + y * 4 * frame->width) = + temp_frame->data[0][x * 4 + y * 4 * frame->width]; + image_.data_.at(1 + x * 4 + y * 4 * frame->width) = + temp_frame->data[0][1 + x * 4 + y * 4 * frame->width]; + image_.data_.at(2 + x * 4 + y * 4 * frame->width) = + temp_frame->data[0][2 + x * 4 + y * 4 * frame->width]; + image_.data_.at(3 + x * 4 + y * 4 * frame->width) = + temp_frame->data[0][3 + x * 4 + y * 4 * frame->width]; } } + av_frame_unref(temp_frame); + std::unique_ptr dithered_image; if (grayscale) { dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise); } else { dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise); } + if (!dithered_image) { + std::cout << "ERROR: Failed to dither video frame" << std::endl; + return {false, {}}; + } - std::string out_name = "output_"; - if (frame_count_ < 10) { - out_name += "000" + std::to_string(frame_count_); - } else if (frame_count_ < 100) { - out_name += "00" + std::to_string(frame_count_); - } else if (frame_count_ < 1000) { - out_name += "0" + std::to_string(frame_count_); - } else { + if (output_as_pngs) { + std::string out_name = "output_"; + for (unsigned int i = 0; i < 9; ++i) { + if (frame_count_ < (unsigned int)std::pow(10, i)) { + out_name += "0"; + } + } out_name += std::to_string(frame_count_); + out_name += ".png"; + if (!dithered_image->SaveAsPNG(out_name, true)) { + return {false, {}}; + } + return {true, {}}; } - out_name += ".png"; - if (!dithered_image->SaveAsPNG(out_name, overwrite)) { - return false; + + // convert grayscale/RGBA to YUV444p + if (sws_enc_context_ != nullptr && color_changed) { + // switched between grayscale/RGBA, context needs to be recreated + sws_freeContext(sws_enc_context_); + sws_enc_context_ = nullptr; + } + if (sws_enc_context_ == nullptr) { + sws_enc_context_ = sws_getContext( + frame->width, frame->height, + grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8 + : AVPixelFormat::AV_PIX_FMT_RGBA, + frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_YUV444P, + SWS_BILINEAR, nullptr, nullptr, nullptr); + if (sws_enc_context_ == nullptr) { + std::cout << "ERROR: Failed to init sws_enc_context_" << std::endl; + return {false, {}}; + } + } + + // rgba data info + if (grayscale) { + av_frame_free(&temp_frame); + temp_frame = av_frame_alloc(); + temp_frame->format = AVPixelFormat::AV_PIX_FMT_GRAY8; + temp_frame->width = frame->width; + temp_frame->height = frame->height; + return_value = av_frame_get_buffer(temp_frame, 0); + if (return_value != 0) { + std::cout + << "ERROR: Failed to init temp_frame for conversion from grayscale" + << std::endl; + av_frame_free(&temp_frame); + return {false, {}}; + } + std::memcpy(temp_frame->data[0], dithered_image->data_.data(), + frame->width * frame->height); + } else { + temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA; + temp_frame->width = frame->width; + temp_frame->height = frame->height; + return_value = av_frame_get_buffer(temp_frame, 0); + if (return_value != 0) { + std::cout << "ERROR: Failed to init temp_frame for conversion from RGBA" + << std::endl; + av_frame_free(&temp_frame); + return {false, {}}; + } + std::memcpy(temp_frame->data[0], dithered_image->data_.data(), + 4 * frame->width * frame->height); + } + + AVFrame *yuv_frame = av_frame_alloc(); + if (frame == nullptr) { + std::cout + << "ERROR: Failed to alloc AVFrame for receiving YUV444p from RGBA" + << std::endl; + av_frame_free(&temp_frame); + return {false, {}}; + } + yuv_frame->format = AVPixelFormat::AV_PIX_FMT_YUV444P; + yuv_frame->width = frame->width; + yuv_frame->height = frame->height; + return_value = av_frame_get_buffer(yuv_frame, 0); + + return_value = + sws_scale(sws_enc_context_, temp_frame->data, temp_frame->linesize, 0, + frame->height, yuv_frame->data, yuv_frame->linesize); + if (return_value <= 0) { + std::cout << "ERROR: Failed to convert RGBA to YUV444p with sws_scale" + << std::endl; + av_frame_free(&yuv_frame); + av_frame_free(&temp_frame); + return {false, {}}; } - // TODO encode video with dithered_image // cleanup - for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) { - free(dst[i]); + av_frame_free(&temp_frame); + yuv_frame->pts = frame_count_ - 1; + yuv_frame->pkt_duration = 1; + return_frames.push_back(yuv_frame); + } + + return {true, return_frames}; +} + +bool Video::HandleEncodingFrame(AVFormatContext *enc_format_ctx, + AVCodecContext *enc_codec_ctx, + AVFrame *yuv_frame, AVStream *video_stream) { + int return_value; + + return_value = avcodec_send_frame(enc_codec_ctx, yuv_frame); + if (return_value < 0) { + std::cout << "ERROR: Failed to send frame to encoder" << std::endl; + return false; + } + + AVPacket pkt; + std::memset(&pkt, 0, sizeof(AVPacket)); + while (return_value >= 0) { + std::memset(&pkt, 0, sizeof(AVPacket)); + + return_value = avcodec_receive_packet(enc_codec_ctx, &pkt); + if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) { + break; + } else if (return_value < 0) { + std::cout << "ERROR: Failed to encode a frame" << std::endl; + return false; + } + + // rescale timing fields (timestamps / durations) + av_packet_rescale_ts(&pkt, enc_codec_ctx->time_base, + video_stream->time_base); + pkt.stream_index = video_stream->index; + + // write frame + return_value = av_interleaved_write_frame(enc_format_ctx, &pkt); + av_packet_unref(&pkt); + if (return_value < 0) { + std::cout << "ERROR: Failed to write encoding packet" << std::endl; + return false; } } diff --git a/src/video.h b/src/video.h index 3e6c2e7..ebfc7d0 100644 --- a/src/video.h +++ b/src/video.h @@ -1,8 +1,11 @@ #ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_ #define IGPUP_DITHERING_PROJECT_VIDEO_H_ +#include + extern "C" { #include +#include #include } @@ -13,6 +16,8 @@ constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE; constexpr unsigned int kReadBufSizeWithPadding = kReadBufSize + kReadBufPaddingSize; +constexpr unsigned int kOutputBitrate = 40000000; + /*! * \brief Helper class that uses Image and OpenCLHandle to dither video frames. * @@ -35,30 +40,39 @@ class Video { /// Same as DitherVideo(const std::string&, Image*, bool, bool) bool DitherVideo(const char *output_filename, Image *blue_noise, - bool grayscale = false, bool overwrite = false); + bool grayscale = false, bool overwrite = false, + bool output_as_pngs = false); /*! * \brief Dithers the frames in the input video. * - * Currently, the program doesn't create the output video, but instead outputs - * each frame as an individual image in the current directory. If things go - * well, the expected behavior will be implemented soon. + * If output_as_pngs is true, then the output will be individaul PNGs of each + * frame instead of a video file. This may be desireable because the output + * video struggles to maintain video quality. * * \return True on success. */ bool DitherVideo(const std::string &output_filename, Image *blue_noise, - bool grayscale = false, bool overwrite = false); + bool grayscale = false, bool overwrite = false, + bool output_as_pngs = false); private: Image image_; std::string input_filename_; - SwsContext *sws_context_; + SwsContext *sws_dec_context_; + SwsContext *sws_enc_context_; unsigned int frame_count_; unsigned int packet_count_; + bool was_grayscale_; - bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt, - AVFrame *frame, Image *blue_noise, bool grayscale, - bool overwrite); + std::tuple> HandleDecodingPacket( + AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, + Image *blue_noise, bool grayscale, bool color_changed, + bool output_as_pngs); + + bool HandleEncodingFrame(AVFormatContext *enc_format_ctx, + AVCodecContext *enc_codec_ctx, AVFrame *yuv_frame, + AVStream *video_stream); }; #endif