Impl convert video frames to color dithered pngs

2021-11-30 16:02:51 +09:00 · 2021-11-30 16:02:51 +09:00 · 11f48592bf
commit 11f48592bf
parent 6677fba89c
6 changed files with 203 additions and 101 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -26,7 +26,8 @@ find_package(OpenCL REQUIRED)
 find_package(PNG REQUIRED)
 find_package(PkgConfig REQUIRED)
-pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec libavformat libavutil)
+pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED
    libavcodec libavformat libavutil libswscale)
 target_include_directories(DitheringProject PUBLIC
  ${OpenCL_INCLUDE_DIRS}
--- a/src/image.cc
+++ b/src/image.cc
@ -817,6 +817,10 @@ OpenCLHandle::Ptr Image::GetOpenCLHandle() {
 void Image::DecodePNG(const std::string &filename) {
  FILE *file = std::fopen(filename.c_str(), "rb");
  if (!file) {
    std::cout << "ERROR: Failed to open \"" << filename << '"' << std::endl;
    return;
  }
  // Check header of file to check if it is actually a png file.
  {
--- a/src/image.h
+++ b/src/image.h
@ -135,7 +135,7 @@ class Image {
  static const std::array<png_color, 2> dither_bw_palette_;
  static const std::array<png_color, 8> dither_color_palette_;
  OpenCLHandle::Ptr opencl_handle_;
-  /// Internally holds rgba
+  /// Internally holds rgba or grayscale (1 channel)
  std::vector<uint8_t> data_;
  unsigned int width_;
  unsigned int height_;
--- a/src/main.cc
+++ b/src/main.cc
@ -1,30 +1,19 @@
 #include <iostream>
 #include "image.h"
 #include "video.h"
 int main(int argc, char **argv) {
-  // Image image("testin.ppm");
+  Image blue_noise("bluenoise.png");
-  // image.SaveAsPNG("testout.png", true);
+  if (!blue_noise.IsValid()) {
-
+    std::cout << "ERROR: Invalid bluenoise.png" << std::endl;
  Image input("input.png");
  if (!input.IsValid()) {
    std::cout << "ERROR: input.png is invalid" << std::endl;
    return 1;
  }
-
+  Video video("input.mp4");
-  Image bluenoise("bluenoise.png");
+  if (!video.DitherVideo("output.mp4", &blue_noise)) {
-  if (!bluenoise.IsValid()) {
+    std::cout << "ERROR: Failed to dither video" << std::endl;
    std::cout << "ERROR: bluenoise.png is invalid" << std::endl;
    return 1;
  }
  // auto output = input.ToGrayscaleDitheredWithBlueNoise(&bluenoise);
  auto output = input.ToColorDitheredWithBlueNoise(&bluenoise);
  if (!output || !output->IsValid()) {
    std::cout << "ERROR: output Image is invalid" << std::endl;
    return 1;
  }
  output->SaveAsPNG("output.png", true);
  return 0;
 }
--- a/src/video.cc
+++ b/src/video.cc
@ -1,5 +1,6 @@
 #include "video.h"
 #include <cstdlib>
 #include <cstring>
 #include <fstream>
 #include <iostream>
@ -11,18 +12,28 @@ extern "C" {
 Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
 Video::Video(const std::string &video_filename)
-    : image(), input_filename(video_filename) {}
+    : image_(),
      input_filename_(video_filename),
      sws_context_(nullptr),
      frame_count_(0),
      packet_count_(0) {}
-bool Video::DitherGrayscale(const char *output_filename) {
+Video::~Video() {
-  return DitherGrayscale(std::string(output_filename));
+  if (sws_context_ != nullptr) {
    sws_freeContext(sws_context_);
  }
 }
-bool Video::DitherGrayscale(const std::string &output_filename) {
+bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
-  // determine input file format
+                        bool grayscale) {
  return DitherVideo(std::string(output_filename), blue_noise, grayscale);
 }
 bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
                        bool grayscale) {
  // Get AVFormatContext for input file
  AVFormatContext *avf_context = nullptr;
-  std::string url = std::string("file:") + input_filename;
+  std::string url = std::string("file:") + input_filename_;
  int return_value =
      avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
  if (return_value != 0) {
@ -49,42 +60,43 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
    avformat_close_input(&avf_context);
    return false;
  }
-
+  int video_stream_idx = return_value;
  // cleanup AVFormatContext as it is no longer needed
  avformat_close_input(&avf_context);
  // Init required objects for decoding
  // Init parser
  AVCodecParserContext *parser = av_parser_init(avcodec->id);
  if (!parser) {
    std::cout << "ERROR: Failed to init codec parser" << std::endl;
    return false;
  }
  // Alloc codec context
  AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
  if (!codec_ctx) {
    std::cout << "ERROR: Failed to alloc codec context" << std::endl;
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
    return false;
  }
  // Set codec parameters from input stream
  return_value = avcodec_parameters_to_context(
      codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
  if (return_value < 0) {
    std::cout << "ERROR: Failed to set codec parameters from input stream"
              << std::endl;
    avcodec_free_context(&codec_ctx);
    avformat_close_input(&avf_context);
    return false;
  }
  // Init codec context
  return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
-  if (return_value == 0) {
+  if (return_value < 0) {
    std::cout << "ERROR: Failed to init codec context" << std::endl;
    avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
    return false;
  }
  av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
  // Alloc a packet object for reading packets
  AVPacket *pkt = av_packet_alloc();
  if (!pkt) {
    std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
    avcodec_free_context(&codec_ctx);
    av_parser_close(parser);
    return false;
  }
@ -93,76 +105,161 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
  if (!frame) {
    std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
    av_packet_free(&pkt);
    av_parser_close(parser);
    avcodec_free_context(&codec_ctx);
    return false;
  }
-  // Now the file will be opened for decoding the "best" video stream
+  // read frames
-  std::ifstream ifs(input_filename);
+  while (av_read_frame(avf_context, pkt) >= 0) {
-  if (!ifs.is_open() || !ifs.good()) {
+    if (pkt->stream_index == video_stream_idx) {
-    std::cout << "ERROR: Failed to open input file \"" << input_filename << '"'
+      if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale)) {
              << std::endl;
    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_free_context(&codec_ctx);
    av_parser_close(parser);
        return false;
      }
-
+    }
  // Set up buffer to read from input file
  std::array<uint8_t, kReadBufSizeWithPadding> buf;
  // Fill end of buffer with 0 to avoid possible overreading (as shown in
  // example code)
  std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize);
  std::streamsize read_count;
  uint8_t *data_ptr;
  while (ifs.good()) {
    ifs.read(reinterpret_cast<char *>(buf.data()), kReadBufSize);
    read_count = ifs.gcount();
    data_ptr = buf.data();
    if (read_count == 0) {
      // read 0 bytes, probably reached exactly EOF
      break;
  }
-    while (read_count > 0) {
+  // flush decoders
-      return_value =
+  if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale)) {
          av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr,
                           read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
      if (return_value < 0) {
        std::cout << "ERROR: Failed to parse input file" << std::endl;
        av_frame_free(&frame);
        av_packet_free(&pkt);
        avcodec_free_context(&codec_ctx);
        av_parser_close(parser);
    return false;
  }
      data_ptr += return_value;
      read_count -= return_value;
      if (pkt->size) {
        // TODO use packet
      }
    }
  }
  if (ifs.fail()) {
    std::cout << "ERROR: Read error on input file" << std::endl;
    av_frame_free(&frame);
    av_packet_free(&pkt);
    avcodec_free_context(&codec_ctx);
    av_parser_close(parser);
    return false;
  }
  // TODO flush decoder
  // cleanup
  av_frame_free(&frame);
  av_packet_free(&pkt);
  avcodec_free_context(&codec_ctx);
-  av_parser_close(parser);
+  avformat_close_input(&avf_context);
  return true;
 }
 bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
                                 AVFrame *frame, Image *blue_noise,
                                 bool grayscale) {
  int return_value = avcodec_send_packet(codec_ctx, pkt);
  if (return_value < 0) {
    std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
              << std::endl;
    return false;
  }
  return_value = 0;
  while (return_value >= 0) {
    return_value = avcodec_receive_frame(codec_ctx, frame);
    if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
      return true;
    } else if (return_value < 0) {
      std::cout << "ERROR: Failed to get frame from decoded packet(s)"
                << std::endl;
      return false;
    }
    ++frame_count_;
    std::cout << "Frame " << frame_count_ << std::endl;  // TODO DEBUG
    // output buffer info for converting pixel format to RGBA
    uint8_t *dst[AV_NUM_DATA_POINTERS];
    dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
                               sizeof(uint8_t));
    for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
      dst[i] = nullptr;
    }
    std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
        frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
    unsigned int line_count = 0;
    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
      if (frame->linesize[i] > 0) {
        ++line_count;
      }
    }
    if (line_count == 0) {
      std::cout << "ERROR: Invalid number of picture planes" << std::endl;
      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
        free(dst[i]);
      }
      return false;
    }
    // Convert colors to RGBA
    if (sws_context_ == nullptr) {
      sws_context_ = sws_getContext(frame->width, frame->height,
                                    (AVPixelFormat)frame->format, frame->width,
                                    frame->height,
                                    grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
                                              : AVPixelFormat::AV_PIX_FMT_RGBA,
                                    SWS_BILINEAR, nullptr, nullptr, nullptr);
      if (sws_context_ == nullptr) {
        std::cout << "ERROR: Failed to init sws_context_" << std::endl;
        for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
          free(dst[i]);
        }
        return false;
      }
    }
    return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
                             frame->height, dst, dst_strides.data());
    if (return_value < 0) {
      std::cout << "ERROR: Failed to convert pixel format of frame"
                << std::endl;
      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
        free(dst[i]);
      }
      return false;
    }
    // put RGBA data into image
    image_.width_ = frame->width;
    image_.height_ = frame->height;
    if (grayscale) {
      image_.is_grayscale_ = true;
      image_.data_.resize(frame->width * frame->height);
      for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
        image_.data_.at(i) = dst[0][i];
      }
    } else {
      image_.is_grayscale_ = false;
      image_.data_.resize(frame->width * frame->height * 4);
      for (unsigned int y = 0; (int)y < frame->height; ++y) {
        for (unsigned int x = 0; (int)x < frame->width; ++x) {
          image_.data_.at(x * 4 + y * 4 * frame->width) =
              dst[0][x * 4 + y * 4 * frame->width];
          image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
              dst[0][1 + x * 4 + y * 4 * frame->width];
          image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
              dst[0][2 + x * 4 + y * 4 * frame->width];
          image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
              dst[0][3 + x * 4 + y * 4 * frame->width];
        }
      }
    }
    std::unique_ptr<Image> dithered_image;
    if (grayscale) {
      dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
    } else {
      dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
    }
    std::string out_name = "output_";
    if (frame_count_ < 10) {
      out_name += "000" + std::to_string(frame_count_);
    } else if (frame_count_ < 100) {
      out_name += "00" + std::to_string(frame_count_);
    } else if (frame_count_ < 1000) {
      out_name += "0" + std::to_string(frame_count_);
    } else {
      out_name += std::to_string(frame_count_);
    }
    out_name += ".png";
    dithered_image->SaveAsPNG(out_name, false);
    // TODO encode video with dithered_image
    // cleanup
    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
      free(dst[i]);
    }
  }
  return true;
 }
--- a/src/video.h
+++ b/src/video.h
@ -3,6 +3,7 @@
 extern "C" {
 #include <libavcodec/avcodec.h>
 #include <libswscale/swscale.h>
 }
 #include "image.h"
@ -17,12 +18,22 @@ class Video {
  explicit Video(const char *video_filename);
  explicit Video(const std::string &video_filename);
-  bool DitherGrayscale(const char *output_filename);
+  ~Video();
-  bool DitherGrayscale(const std::string &output_filename);
+
  bool DitherVideo(const char *output_filename, Image *blue_noise,
                   bool grayscale = false);
  bool DitherVideo(const std::string &output_filename, Image *blue_noise,
                   bool grayscale = false);
 private:
-  Image image;
+  Image image_;
-  std::string input_filename;
+  std::string input_filename_;
  SwsContext *sws_context_;
  unsigned int frame_count_;
  unsigned int packet_count_;
  bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
                            AVFrame *frame, Image *blue_noise, bool grayscale);
 };
 #endif