Impl output to video, update program args

author Stephen Seo <seo.disparate@gmail.com>

Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)

committer Stephen Seo <seo.disparate@gmail.com>

Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)
author Stephen Seo <seo.disparate@gmail.com>
Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)
committer Stephen Seo <seo.disparate@gmail.com>
Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)
diff --git a/src/arg_parse.cc b/src/arg_parse.cc

index b61c9fa1764b1139c606e23bf127784b284ee124..67f904ce6f2451172751b767d9a75515424d3c87 100644 (file)
--- a/src/arg_parse.cc
+++ b/src/arg_parse.cc
@@ -14,7 +14,8 @@ void Args::PrintUsage() {
    std::cout
        << "Usage: [-h | --help] [-i <filename> | --input <filename>] [-o "
           "<filename> | --output <filename>] [-b <filename> | --blue "
-         "<filename>] [-g | --gray] [--image] [--video] [--overwrite]\n"
+         "<filename>] [-g | --gray] [--image] [--video] [--video-pngs] "
+         "[--overwrite]\n"
           "  -h | --help\t\t\t\tPrint this usage text\n"
           "  -i <filename> | --input <filename>\tSet input filename\n"
           "  -o <filename> | --output <filename>\tSet output filename\n"
@@ -22,6 +23,7 @@ void Args::PrintUsage() {
           "  -g | --gray\t\t\t\tDither output in grayscale\n"
           "  --image\t\t\t\tDither a single image\n"
           "  --video\t\t\t\tDither frames in a video\n"
+         "  --video-pngs\t\t\t\tDither frames but output as individual pngs\n"
           "  --overwrite\t\t\t\tAllow overwriting existing files\n"
        << std::endl;
  }
@@ -56,6 +58,9 @@ bool Args::ParseArgs(int argc, char **argv) {
        do_dither_image_ = true;
      } else if (std::strcmp(argv[0], "--video") == 0) {
        do_dither_image_ = false;
+    } else if (std::strcmp(argv[0], "--video-pngs") == 0) {
+      do_dither_image_ = false;
+      do_video_pngs_ = true;
      } else if (std::strcmp(argv[0], "--overwrite") == 0) {
        do_overwrite_ = true;
      } else {
diff --git a/src/arg_parse.h b/src/arg_parse.h

index 5f624946a94d8442f362f2c92e63bdf618c50509..0679d241f51b72678906ec04adf92936849b4eb4 100644 (file)
--- a/src/arg_parse.h
+++ b/src/arg_parse.h
@@ -14,6 +14,7 @@ struct Args {
    bool do_dither_image_;
    bool do_dither_grayscaled_;
    bool do_overwrite_;
+  bool do_video_pngs_;
    std::string input_filename;
    std::string output_filename;
    std::string blue_noise_filename;
diff --git a/src/main.cc b/src/main.cc

index e6c172082bdd2d5e1257c326c9f7d4f7213ca177..0724fd382e382c0940b49ef088faddc3db7d92b5 100644 (file)
--- a/src/main.cc
+++ b/src/main.cc
@@ -60,7 +60,8 @@ int main(int argc, char **argv) {
    } else {
      Video video(args.input_filename);
      if (!video.DitherVideo(args.output_filename, &blue_noise,
-                           args.do_dither_grayscaled_, args.do_overwrite_)) {
+                           args.do_dither_grayscaled_, args.do_overwrite_,
+                           args.do_video_pngs_)) {
        std::cout << "ERROR: Failed to dither frames from input video \""
                  << args.input_filename << '"' << std::endl;
        Args::PrintUsage();
diff --git a/src/video.cc b/src/video.cc

index a8984aa646005bce96793d6fe4b7f65519fa7afd..aac4f59d92b27f9f411b085f33c32bfcfe519175 100644 (file)
--- a/src/video.cc
+++ b/src/video.cc
@@ -1,42 +1,63 @@
  #include "video.h"
  
+#include <cmath>
  #include <cstdlib>
  #include <cstring>
  #include <fstream>
  #include <iostream>
  
-extern "C" {
-#include <libavformat/avformat.h>
-}
-
  Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
  
  Video::Video(const std::string &video_filename)
      : image_(),
        input_filename_(video_filename),
-      sws_context_(nullptr),
+      sws_dec_context_(nullptr),
+      sws_enc_context_(nullptr),
        frame_count_(0),
-      packet_count_(0) {}
+      packet_count_(0),
+      was_grayscale_(false) {}
  
  Video::~Video() {
-  if (sws_context_ != nullptr) {
-    sws_freeContext(sws_context_);
+  if (sws_dec_context_ != nullptr) {
+    sws_freeContext(sws_dec_context_);
    }
  }
  
  bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
-                        bool grayscale, bool overwrite) {
+                        bool grayscale, bool overwrite, bool output_as_pngs) {
    return DitherVideo(std::string(output_filename), blue_noise, grayscale,
-                     overwrite);
+                     overwrite, output_as_pngs);
  }
  
  bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
-                        bool grayscale, bool overwrite) {
+                        bool grayscale, bool overwrite, bool output_as_pngs) {
+  if (!overwrite && !output_as_pngs) {
+    // check if output_file exists
+    std::ifstream ifs(output_filename);
+    if (ifs.is_open()) {
+      std::cout << "ERROR: output file \"" << output_filename
+                << "\" exists "
+                   "and overwrite is disabled"
+                << std::endl;
+      return false;
+    }
+  }
+
+  frame_count_ = 0;
+
+  bool color_changed = false;
+  if (was_grayscale_ != grayscale) {
+    color_changed = true;
+  }
+  was_grayscale_ = grayscale;
+
+  // set up decoding
+
    // Get AVFormatContext for input file
-  AVFormatContext *avf_context = nullptr;
+  AVFormatContext *avf_dec_context = nullptr;
    std::string url = std::string("file:") + input_filename_;
    int return_value =
-      avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
+      avformat_open_input(&avf_dec_context, url.c_str(), nullptr, nullptr);
    if (return_value != 0) {
      std::cout << "ERROR: Failed to open input file to determine format"
                << std::endl;
@@ -44,60 +65,70 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
    }
  
    // Read from input file to fill in info in AVFormatContext
-  return_value = avformat_find_stream_info(avf_context, nullptr);
+  return_value = avformat_find_stream_info(avf_dec_context, nullptr);
    if (return_value < 0) {
      std::cout << "ERROR: Failed to determine input file stream info"
                << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
    // Get "best" video stream
-  AVCodec *avcodec = nullptr;
+  AVCodec *dec_codec = nullptr;
    return_value = av_find_best_stream(
-      avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0);
+      avf_dec_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &dec_codec, 0);
    if (return_value < 0) {
      std::cout << "ERROR: Failed to get video stream in input file" << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
    int video_stream_idx = return_value;
  
    // Alloc codec context
-  AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
+  AVCodecContext *codec_ctx = avcodec_alloc_context3(dec_codec);
    if (!codec_ctx) {
      std::cout << "ERROR: Failed to alloc codec context" << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
    // Set codec parameters from input stream
    return_value = avcodec_parameters_to_context(
-      codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
+      codec_ctx, avf_dec_context->streams[video_stream_idx]->codecpar);
    if (return_value < 0) {
      std::cout << "ERROR: Failed to set codec parameters from input stream"
                << std::endl;
      avcodec_free_context(&codec_ctx);
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
    // Init codec context
-  return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
+  return_value = avcodec_open2(codec_ctx, dec_codec, nullptr);
    if (return_value < 0) {
      std::cout << "ERROR: Failed to init codec context" << std::endl;
      avcodec_free_context(&codec_ctx);
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
-  av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
+  std::cout << "Dumping input video format info..." << std::endl;
+  av_dump_format(avf_dec_context, video_stream_idx, input_filename_.c_str(), 0);
+
+  // get input stream info
+  unsigned int width =
+      avf_dec_context->streams[video_stream_idx]->codecpar->width;
+  unsigned int height =
+      avf_dec_context->streams[video_stream_idx]->codecpar->height;
+  auto r_frame_rate = avf_dec_context->streams[video_stream_idx]->r_frame_rate;
+  decltype(r_frame_rate) time_base = {r_frame_rate.den, r_frame_rate.num};
  
    // Alloc a packet object for reading packets
    AVPacket *pkt = av_packet_alloc();
    if (!pkt) {
      std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
      avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
@@ -107,162 +138,473 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
      std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
      av_packet_free(&pkt);
      avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
      return false;
    }
  
+  // Set up encoding
+
+  // alloc/init encoding AVFormatContext
+  AVFormatContext *avf_enc_context = nullptr;
+  if (!output_as_pngs) {
+    return_value = avformat_alloc_output_context2(
+        &avf_enc_context, nullptr, nullptr, output_filename.c_str());
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to alloc/init avf_enc_context" << std::endl;
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }
+
+  // set output video codec (h264)
+  AVCodecContext *enc_codec_context = nullptr;
+  AVCodec *enc_codec = nullptr;
+
+  // get H264 codec
+  if (!output_as_pngs) {
+    enc_codec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264);
+    if (enc_codec == nullptr) {
+      std::cout << "ERROR: Failed to get H264 codec for encoding" << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }
+
+  // create new video stream
+  AVStream *enc_stream = nullptr;
+  if (!output_as_pngs) {
+    enc_stream = avformat_new_stream(avf_enc_context, enc_codec);
+    if (enc_stream == nullptr) {
+      std::cout << "ERROR: Failed to create encoding stream" << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+    // assign its id
+    enc_stream->id = avf_enc_context->nb_streams - 1;
+    // alloc enc AVCodecContext
+    enc_codec_context = avcodec_alloc_context3(enc_codec);
+    if (enc_codec_context == nullptr) {
+      std::cout << "ERROR: Failed to create AVCodecContext for encoding"
+                << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    // set values on enc_codec_context
+    enc_codec_context->codec_id = AVCodecID::AV_CODEC_ID_H264;
+    enc_codec_context->bit_rate = kOutputBitrate;
+    enc_codec_context->width = width;
+    enc_codec_context->height = height;
+    enc_stream->time_base = time_base;
+    enc_codec_context->time_base = time_base;
+    enc_codec_context->gop_size = 12;
+    enc_codec_context->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV444P;
+    if (avf_enc_context->oformat->flags & AVFMT_GLOBALHEADER) {
+      enc_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    }
+
+    // more init on enc_codec_context
+    return_value = avcodec_open2(enc_codec_context, enc_codec, nullptr);
+    if (return_value != 0) {
+      std::cout << "ERROR: Failed to init enc_codec_context" << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    return_value = avcodec_parameters_from_context(enc_stream->codecpar,
+                                                   enc_codec_context);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to set encoding codec parameters in stream"
+                << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    std::cout << "Dumping output video format info..." << std::endl;
+    av_dump_format(avf_enc_context, enc_stream->id, output_filename.c_str(), 1);
+
+    // open output file if needed
+    if (!(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
+      return_value = avio_open(&avf_enc_context->pb, output_filename.c_str(),
+                               AVIO_FLAG_WRITE);
+      if (return_value < 0) {
+        std::cout << "ERROR: Failed to open file \"" << output_filename
+                  << "\" for writing" << std::endl;
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
+        return false;
+      }
+    }
+
+    // write header
+    return_value = avformat_write_header(avf_enc_context, nullptr);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to write header in output video file"
+                << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }  // if (!output_as_pngs)
+
+  // do decoding, then encoding per frame
+
    // read frames
-  while (av_read_frame(avf_context, pkt) >= 0) {
+  while (av_read_frame(avf_dec_context, pkt) >= 0) {
      if (pkt->stream_index == video_stream_idx) {
-      if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
-                                overwrite)) {
+      auto ret_tuple =
+          HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
+                               color_changed, output_as_pngs);
+      if (!std::get<0>(ret_tuple)) {
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
          return false;
+      } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
+        for (auto *yuv_frame : std::get<1>(ret_tuple)) {
+          if (!HandleEncodingFrame(avf_enc_context, enc_codec_context,
+                                   yuv_frame, enc_stream)) {
+            av_frame_free(&yuv_frame);
+            avcodec_close(enc_codec_context);
+            avformat_free_context(avf_enc_context);
+            av_frame_free(&frame);
+            av_packet_free(&pkt);
+            avcodec_free_context(&codec_ctx);
+            avformat_close_input(&avf_dec_context);
+            return false;
+          }
+          av_frame_free(&yuv_frame);
+        }
        }
      }
    }
  
    // flush decoders
-  if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
-                            overwrite)) {
+  auto ret_tuple =
+      HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
+                           color_changed, output_as_pngs);
+  if (!std::get<0>(ret_tuple)) {
+    avcodec_close(enc_codec_context);
+    avformat_free_context(avf_enc_context);
+    av_frame_free(&frame);
+    av_packet_free(&pkt);
+    avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
      return false;
+  } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
+    for (auto *yuv_frame : std::get<1>(ret_tuple)) {
+      if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, yuv_frame,
+                               enc_stream)) {
+        av_frame_free(&yuv_frame);
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
+        return false;
+      }
+      av_frame_free(&yuv_frame);
+    }
+  }
+
+  if (!output_as_pngs) {
+    // flush encoder
+    if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, nullptr,
+                             enc_stream)) {
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    // finish encoding
+    av_write_trailer(avf_enc_context);
    }
  
    // cleanup
+  if (enc_codec_context) {
+    avcodec_close(enc_codec_context);
+  }
+  if (!output_as_pngs && !(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
+    avio_closep(&avf_enc_context->pb);
+  }
+  if (avf_enc_context) {
+    avformat_free_context(avf_enc_context);
+  }
    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_free_context(&codec_ctx);
-  avformat_close_input(&avf_context);
+  avformat_close_input(&avf_dec_context);
    return true;
  }
  
-bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
-                                 AVFrame *frame, Image *blue_noise,
-                                 bool grayscale, bool overwrite) {
+std::tuple<bool, std::vector<AVFrame *>> Video::HandleDecodingPacket(
+    AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, Image *blue_noise,
+    bool grayscale, bool color_changed, bool output_as_pngs) {
    int return_value = avcodec_send_packet(codec_ctx, pkt);
    if (return_value < 0) {
      std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
                << std::endl;
-    return false;
+    return {false, {}};
    }
  
    return_value = 0;
+  std::vector<AVFrame *> return_frames{};
+
    while (return_value >= 0) {
      return_value = avcodec_receive_frame(codec_ctx, frame);
      if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
-      return true;
+      return {true, return_frames};
      } else if (return_value < 0) {
        std::cout << "ERROR: Failed to get frame from decoded packet(s)"
                  << std::endl;
-      return false;
+      return {false, {}};
      }
      ++frame_count_;
  
      std::cout << "Frame " << frame_count_ << std::endl;  // TODO DEBUG
  
-    // output buffer info for converting pixel format to RGBA
-    uint8_t *dst[AV_NUM_DATA_POINTERS];
-    dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
-                               sizeof(uint8_t));
-    for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
-      dst[i] = nullptr;
-    }
-    std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
-        frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
-
-    unsigned int line_count = 0;
-    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-      if (frame->linesize[i] > 0) {
-        ++line_count;
-      }
-    }
-
-    if (line_count == 0) {
-      std::cout << "ERROR: Invalid number of picture planes" << std::endl;
-      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-        free(dst[i]);
-      }
-      return false;
+    AVFrame *temp_frame = av_frame_alloc();
+    temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
+    temp_frame->width = frame->width;
+    temp_frame->height = frame->height;
+    return_value = av_frame_get_buffer(temp_frame, 0);
+    if (return_value != 0) {
+      std::cout << "ERROR: Failed to init temp_frame to receive RGBA data"
+                << std::endl;
+      av_frame_free(&temp_frame);
+      return {false, {}};
      }
  
      // Convert colors to RGBA
-    if (sws_context_ == nullptr) {
-      sws_context_ = sws_getContext(frame->width, frame->height,
-                                    (AVPixelFormat)frame->format, frame->width,
-                                    frame->height,
-                                    grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
-                                              : AVPixelFormat::AV_PIX_FMT_RGBA,
-                                    SWS_BILINEAR, nullptr, nullptr, nullptr);
-      if (sws_context_ == nullptr) {
-        std::cout << "ERROR: Failed to init sws_context_" << std::endl;
-        for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-          free(dst[i]);
-        }
-        return false;
+    if (sws_dec_context_ == nullptr) {
+      sws_dec_context_ = sws_getContext(
+          frame->width, frame->height, (AVPixelFormat)frame->format,
+          frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_RGBA,
+          SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_dec_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_dec_context_" << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
        }
      }
  
-    return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
-                             frame->height, dst, dst_strides.data());
+    return_value =
+        sws_scale(sws_dec_context_, frame->data, frame->linesize, 0,
+                  frame->height, temp_frame->data, temp_frame->linesize);
      if (return_value < 0) {
        std::cout << "ERROR: Failed to convert pixel format of frame"
                  << std::endl;
-      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-        free(dst[i]);
-      }
-      return false;
+      av_frame_free(&temp_frame);
+      return {false, {}};
      }
  
      // put RGBA data into image
      image_.width_ = frame->width;
      image_.height_ = frame->height;
-    if (grayscale) {
-      image_.is_grayscale_ = true;
-      image_.data_.resize(frame->width * frame->height);
-      for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
-        image_.data_.at(i) = dst[0][i];
-      }
-    } else {
-      image_.is_grayscale_ = false;
-      image_.data_.resize(frame->width * frame->height * 4);
-      for (unsigned int y = 0; (int)y < frame->height; ++y) {
-        for (unsigned int x = 0; (int)x < frame->width; ++x) {
-          image_.data_.at(x * 4 + y * 4 * frame->width) =
-              dst[0][x * 4 + y * 4 * frame->width];
-          image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
-              dst[0][1 + x * 4 + y * 4 * frame->width];
-          image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
-              dst[0][2 + x * 4 + y * 4 * frame->width];
-          image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
-              dst[0][3 + x * 4 + y * 4 * frame->width];
-        }
+    image_.is_grayscale_ = false;
+    image_.data_.resize(frame->width * frame->height * 4);
+    for (unsigned int y = 0; (int)y < frame->height; ++y) {
+      for (unsigned int x = 0; (int)x < frame->width; ++x) {
+        image_.data_.at(x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][x * 4 + y * 4 * frame->width];
+        image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][1 + x * 4 + y * 4 * frame->width];
+        image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][2 + x * 4 + y * 4 * frame->width];
+        image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][3 + x * 4 + y * 4 * frame->width];
        }
      }
  
+    av_frame_unref(temp_frame);
+
      std::unique_ptr<Image> dithered_image;
      if (grayscale) {
        dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
      } else {
        dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
      }
+    if (!dithered_image) {
+      std::cout << "ERROR: Failed to dither video frame" << std::endl;
+      return {false, {}};
+    }
  
-    std::string out_name = "output_";
-    if (frame_count_ < 10) {
-      out_name += "000" + std::to_string(frame_count_);
-    } else if (frame_count_ < 100) {
-      out_name += "00" + std::to_string(frame_count_);
-    } else if (frame_count_ < 1000) {
-      out_name += "0" + std::to_string(frame_count_);
-    } else {
+    if (output_as_pngs) {
+      std::string out_name = "output_";
+      for (unsigned int i = 0; i < 9; ++i) {
+        if (frame_count_ < (unsigned int)std::pow(10, i)) {
+          out_name += "0";
+        }
+      }
        out_name += std::to_string(frame_count_);
+      out_name += ".png";
+      if (!dithered_image->SaveAsPNG(out_name, true)) {
+        return {false, {}};
+      }
+      return {true, {}};
      }
-    out_name += ".png";
-    if (!dithered_image->SaveAsPNG(out_name, overwrite)) {
-      return false;
+
+    // convert grayscale/RGBA to YUV444p
+    if (sws_enc_context_ != nullptr && color_changed) {
+      // switched between grayscale/RGBA, context needs to be recreated
+      sws_freeContext(sws_enc_context_);
+      sws_enc_context_ = nullptr;
+    }
+    if (sws_enc_context_ == nullptr) {
+      sws_enc_context_ = sws_getContext(
+          frame->width, frame->height,
+          grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
+                    : AVPixelFormat::AV_PIX_FMT_RGBA,
+          frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_YUV444P,
+          SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_enc_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_enc_context_" << std::endl;
+        return {false, {}};
+      }
+    }
+
+    // rgba data info
+    if (grayscale) {
+      av_frame_free(&temp_frame);
+      temp_frame = av_frame_alloc();
+      temp_frame->format = AVPixelFormat::AV_PIX_FMT_GRAY8;
+      temp_frame->width = frame->width;
+      temp_frame->height = frame->height;
+      return_value = av_frame_get_buffer(temp_frame, 0);
+      if (return_value != 0) {
+        std::cout
+            << "ERROR: Failed to init temp_frame for conversion from grayscale"
+            << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
+      }
+      std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
+                  frame->width * frame->height);
+    } else {
+      temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
+      temp_frame->width = frame->width;
+      temp_frame->height = frame->height;
+      return_value = av_frame_get_buffer(temp_frame, 0);
+      if (return_value != 0) {
+        std::cout << "ERROR: Failed to init temp_frame for conversion from RGBA"
+                  << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
+      }
+      std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
+                  4 * frame->width * frame->height);
+    }
+
+    AVFrame *yuv_frame = av_frame_alloc();
+    if (frame == nullptr) {
+      std::cout
+          << "ERROR: Failed to alloc AVFrame for receiving YUV444p from RGBA"
+          << std::endl;
+      av_frame_free(&temp_frame);
+      return {false, {}};
+    }
+    yuv_frame->format = AVPixelFormat::AV_PIX_FMT_YUV444P;
+    yuv_frame->width = frame->width;
+    yuv_frame->height = frame->height;
+    return_value = av_frame_get_buffer(yuv_frame, 0);
+
+    return_value =
+        sws_scale(sws_enc_context_, temp_frame->data, temp_frame->linesize, 0,
+                  frame->height, yuv_frame->data, yuv_frame->linesize);
+    if (return_value <= 0) {
+      std::cout << "ERROR: Failed to convert RGBA to YUV444p with sws_scale"
+                << std::endl;
+      av_frame_free(&yuv_frame);
+      av_frame_free(&temp_frame);
+      return {false, {}};
      }
-    // TODO encode video with dithered_image
  
      // cleanup
-    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-      free(dst[i]);
+    av_frame_free(&temp_frame);
+    yuv_frame->pts = frame_count_ - 1;
+    yuv_frame->pkt_duration = 1;
+    return_frames.push_back(yuv_frame);
+  }
+
+  return {true, return_frames};
+}
+
+bool Video::HandleEncodingFrame(AVFormatContext *enc_format_ctx,
+                                AVCodecContext *enc_codec_ctx,
+                                AVFrame *yuv_frame, AVStream *video_stream) {
+  int return_value;
+
+  return_value = avcodec_send_frame(enc_codec_ctx, yuv_frame);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to send frame to encoder" << std::endl;
+    return false;
+  }
+
+  AVPacket pkt;
+  std::memset(&pkt, 0, sizeof(AVPacket));
+  while (return_value >= 0) {
+    std::memset(&pkt, 0, sizeof(AVPacket));
+
+    return_value = avcodec_receive_packet(enc_codec_ctx, &pkt);
+    if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
+      break;
+    } else if (return_value < 0) {
+      std::cout << "ERROR: Failed to encode a frame" << std::endl;
+      return false;
+    }
+
+    // rescale timing fields (timestamps / durations)
+    av_packet_rescale_ts(&pkt, enc_codec_ctx->time_base,
+                         video_stream->time_base);
+    pkt.stream_index = video_stream->index;
+
+    // write frame
+    return_value = av_interleaved_write_frame(enc_format_ctx, &pkt);
+    av_packet_unref(&pkt);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to write encoding packet" << std::endl;
+      return false;
      }
    }
  
diff --git a/src/video.h b/src/video.h

index 3e6c2e707c5055531ac405818af6118e1da01961..ebfc7d0b87da628679b65a442c8a94445f7e654b 100644 (file)
--- a/src/video.h
+++ b/src/video.h
@@ -1,8 +1,11 @@
  #ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_
  #define IGPUP_DITHERING_PROJECT_VIDEO_H_
  
+#include <tuple>
+
  extern "C" {
  #include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
  #include <libswscale/swscale.h>
  }
  
@@ -13,6 +16,8 @@ constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
  constexpr unsigned int kReadBufSizeWithPadding =
      kReadBufSize + kReadBufPaddingSize;
  
+constexpr unsigned int kOutputBitrate = 40000000;
+
  /*!
   * \brief Helper class that uses Image and OpenCLHandle to dither video frames.
   *
@@ -35,30 +40,39 @@ class Video {
  
    /// Same as DitherVideo(const std::string&, Image*, bool, bool)
    bool DitherVideo(const char *output_filename, Image *blue_noise,
-                   bool grayscale = false, bool overwrite = false);
+                   bool grayscale = false, bool overwrite = false,
+                   bool output_as_pngs = false);
  
    /*!
     * \brief Dithers the frames in the input video.
     *
-   * Currently, the program doesn't create the output video, but instead outputs
-   * each frame as an individual image in the current directory. If things go
-   * well, the expected behavior will be implemented soon.
+   * If output_as_pngs is true, then the output will be individaul PNGs of each
+   * frame instead of a video file. This may be desireable because the output
+   * video struggles to maintain video quality.
     *
     * \return True on success.
     */
    bool DitherVideo(const std::string &output_filename, Image *blue_noise,
-                   bool grayscale = false, bool overwrite = false);
+                   bool grayscale = false, bool overwrite = false,
+                   bool output_as_pngs = false);
  
   private:
    Image image_;
    std::string input_filename_;
-  SwsContext *sws_context_;
+  SwsContext *sws_dec_context_;
+  SwsContext *sws_enc_context_;
    unsigned int frame_count_;
    unsigned int packet_count_;
+  bool was_grayscale_;
+
+  std::tuple<bool, std::vector<AVFrame *>> HandleDecodingPacket(
+      AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame,
+      Image *blue_noise, bool grayscale, bool color_changed,
+      bool output_as_pngs);
  
-  bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
-                            AVFrame *frame, Image *blue_noise, bool grayscale,
-                            bool overwrite);
+  bool HandleEncodingFrame(AVFormatContext *enc_format_ctx,
+                           AVCodecContext *enc_codec_ctx, AVFrame *yuv_frame,
+                           AVStream *video_stream);
  };
  
  #endif
author	Stephen Seo <seo.disparate@gmail.com>
	Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)
committer	Stephen Seo <seo.disparate@gmail.com>
	Fri, 3 Dec 2021 11:09:56 +0000 (20:09 +0900)
src/arg_parse.cc		patch \| blob \| history
src/arg_parse.h		patch \| blob \| history
src/main.cc		patch \| blob \| history
src/video.cc		patch \| blob \| history
src/video.h		patch \| blob \| history