Impl convert video frames to color dithered pngs

author Stephen Seo <seo.disparate@gmail.com>

Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)

committer Stephen Seo <seo.disparate@gmail.com>

Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)
author Stephen Seo <seo.disparate@gmail.com>
Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)
committer Stephen Seo <seo.disparate@gmail.com>
Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 1518449c5a80b8d122ce9e1ad266f21629ed2fd9..368bc1a7ee3bc68472188027055a194eaad748c5 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,8 @@ find_package(OpenCL REQUIRED)
  find_package(PNG REQUIRED)
  
  find_package(PkgConfig REQUIRED)
-pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec libavformat libavutil)
+pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED
+    libavcodec libavformat libavutil libswscale)
  
  target_include_directories(DitheringProject PUBLIC
    ${OpenCL_INCLUDE_DIRS}
diff --git a/src/image.cc b/src/image.cc

index c391a0eff2b60fcf67186712e30f69cf8d358d2f..23c1c379a993a8946467f8b6357dcd1ddcced53a 100644 (file)
--- a/src/image.cc
+++ b/src/image.cc
@@ -817,6 +817,10 @@ OpenCLHandle::Ptr Image::GetOpenCLHandle() {
  
  void Image::DecodePNG(const std::string &filename) {
    FILE *file = std::fopen(filename.c_str(), "rb");
+  if (!file) {
+    std::cout << "ERROR: Failed to open \"" << filename << '"' << std::endl;
+    return;
+  }
  
    // Check header of file to check if it is actually a png file.
    {
diff --git a/src/image.h b/src/image.h

index 8671ea43390bdfcd1fc2db1fbb0c7cfc72fd318a..2ccc8e5ad8dd90d29b36fd56eacec7d93e5be74d 100644 (file)
--- a/src/image.h
+++ b/src/image.h
@@ -135,7 +135,7 @@ class Image {
    static const std::array<png_color, 2> dither_bw_palette_;
    static const std::array<png_color, 8> dither_color_palette_;
    OpenCLHandle::Ptr opencl_handle_;
-  /// Internally holds rgba
+  /// Internally holds rgba or grayscale (1 channel)
    std::vector<uint8_t> data_;
    unsigned int width_;
    unsigned int height_;
diff --git a/src/main.cc b/src/main.cc

index 3172e3285f2c48779434e01fa22967873203ae8c..90e7ba513e55ee48de63aba485d963599c8b1bc1 100644 (file)
--- a/src/main.cc
+++ b/src/main.cc
@@ -1,30 +1,19 @@
  #include <iostream>
  
  #include "image.h"
+#include "video.h"
  
  int main(int argc, char **argv) {
-  // Image image("testin.ppm");
-  // image.SaveAsPNG("testout.png", true);
-
-  Image input("input.png");
-  if (!input.IsValid()) {
-    std::cout << "ERROR: input.png is invalid" << std::endl;
+  Image blue_noise("bluenoise.png");
+  if (!blue_noise.IsValid()) {
+    std::cout << "ERROR: Invalid bluenoise.png" << std::endl;
      return 1;
    }
-
-  Image bluenoise("bluenoise.png");
-  if (!bluenoise.IsValid()) {
-    std::cout << "ERROR: bluenoise.png is invalid" << std::endl;
-    return 1;
-  }
-
-  // auto output = input.ToGrayscaleDitheredWithBlueNoise(&bluenoise);
-  auto output = input.ToColorDitheredWithBlueNoise(&bluenoise);
-  if (!output || !output->IsValid()) {
-    std::cout << "ERROR: output Image is invalid" << std::endl;
+  Video video("input.mp4");
+  if (!video.DitherVideo("output.mp4", &blue_noise)) {
+    std::cout << "ERROR: Failed to dither video" << std::endl;
      return 1;
    }
-  output->SaveAsPNG("output.png", true);
  
    return 0;
  }
diff --git a/src/video.cc b/src/video.cc

index ac865003976323229cce570d6395a63cc4630a8d..bcf8795d3bb5d7ea417630b9fd254da676cda360 100644 (file)
--- a/src/video.cc
+++ b/src/video.cc
@@ -1,5 +1,6 @@
  #include "video.h"
  
+#include <cstdlib>
  #include <cstring>
  #include <fstream>
  #include <iostream>
@@ -11,18 +12,28 @@ extern "C" {
  Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
  
  Video::Video(const std::string &video_filename)
-    : image(), input_filename(video_filename) {}
+    : image_(),
+      input_filename_(video_filename),
+      sws_context_(nullptr),
+      frame_count_(0),
+      packet_count_(0) {}
  
-bool Video::DitherGrayscale(const char *output_filename) {
-  return DitherGrayscale(std::string(output_filename));
+Video::~Video() {
+  if (sws_context_ != nullptr) {
+    sws_freeContext(sws_context_);
+  }
  }
  
-bool Video::DitherGrayscale(const std::string &output_filename) {
-  // determine input file format
+bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
+                        bool grayscale) {
+  return DitherVideo(std::string(output_filename), blue_noise, grayscale);
+}
  
+bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
+                        bool grayscale) {
    // Get AVFormatContext for input file
    AVFormatContext *avf_context = nullptr;
-  std::string url = std::string("file:") + input_filename;
+  std::string url = std::string("file:") + input_filename_;
    int return_value =
        avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
    if (return_value != 0) {
@@ -49,42 +60,43 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
      avformat_close_input(&avf_context);
      return false;
    }
-
-  // cleanup AVFormatContext as it is no longer needed
-  avformat_close_input(&avf_context);
-
-  // Init required objects for decoding
-
-  // Init parser
-  AVCodecParserContext *parser = av_parser_init(avcodec->id);
-  if (!parser) {
-    std::cout << "ERROR: Failed to init codec parser" << std::endl;
-    return false;
-  }
+  int video_stream_idx = return_value;
  
    // Alloc codec context
    AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
    if (!codec_ctx) {
      std::cout << "ERROR: Failed to alloc codec context" << std::endl;
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
+    return false;
+  }
+
+  // Set codec parameters from input stream
+  return_value = avcodec_parameters_to_context(
+      codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to set codec parameters from input stream"
+              << std::endl;
+    avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_context);
      return false;
    }
  
    // Init codec context
    return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
-  if (return_value == 0) {
+  if (return_value < 0) {
      std::cout << "ERROR: Failed to init codec context" << std::endl;
      avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
      return false;
    }
  
+  av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
+
    // Alloc a packet object for reading packets
    AVPacket *pkt = av_packet_alloc();
    if (!pkt) {
      std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
      avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
      return false;
    }
  
@@ -93,76 +105,161 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
    if (!frame) {
      std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
      av_packet_free(&pkt);
-    av_parser_close(parser);
      avcodec_free_context(&codec_ctx);
      return false;
    }
  
-  // Now the file will be opened for decoding the "best" video stream
-  std::ifstream ifs(input_filename);
-  if (!ifs.is_open() || !ifs.good()) {
-    std::cout << "ERROR: Failed to open input file \"" << input_filename << '"'
+  // read frames
+  while (av_read_frame(avf_context, pkt) >= 0) {
+    if (pkt->stream_index == video_stream_idx) {
+      if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale)) {
+        return false;
+      }
+    }
+  }
+
+  // flush decoders
+  if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale)) {
+    return false;
+  }
+
+  // cleanup
+  av_frame_free(&frame);
+  av_packet_free(&pkt);
+  avcodec_free_context(&codec_ctx);
+  avformat_close_input(&avf_context);
+  return true;
+}
+
+bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
+                                 AVFrame *frame, Image *blue_noise,
+                                 bool grayscale) {
+  int return_value = avcodec_send_packet(codec_ctx, pkt);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
                << std::endl;
-    av_frame_free(&frame);
-    av_packet_free(&pkt);
-    avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
      return false;
    }
  
-  // Set up buffer to read from input file
-  std::array<uint8_t, kReadBufSizeWithPadding> buf;
-  // Fill end of buffer with 0 to avoid possible overreading (as shown in
-  // example code)
-  std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize);
-
-  std::streamsize read_count;
-  uint8_t *data_ptr;
-  while (ifs.good()) {
-    ifs.read(reinterpret_cast<char *>(buf.data()), kReadBufSize);
-    read_count = ifs.gcount();
-    data_ptr = buf.data();
-    if (read_count == 0) {
-      // read 0 bytes, probably reached exactly EOF
-      break;
+  return_value = 0;
+  while (return_value >= 0) {
+    return_value = avcodec_receive_frame(codec_ctx, frame);
+    if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
+      return true;
+    } else if (return_value < 0) {
+      std::cout << "ERROR: Failed to get frame from decoded packet(s)"
+                << std::endl;
+      return false;
      }
+    ++frame_count_;
  
-    while (read_count > 0) {
-      return_value =
-          av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr,
-                           read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
-      if (return_value < 0) {
-        std::cout << "ERROR: Failed to parse input file" << std::endl;
-        av_frame_free(&frame);
-        av_packet_free(&pkt);
-        avcodec_free_context(&codec_ctx);
-        av_parser_close(parser);
+    std::cout << "Frame " << frame_count_ << std::endl;  // TODO DEBUG
+
+    // output buffer info for converting pixel format to RGBA
+    uint8_t *dst[AV_NUM_DATA_POINTERS];
+    dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
+                               sizeof(uint8_t));
+    for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
+      dst[i] = nullptr;
+    }
+    std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
+        frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
+
+    unsigned int line_count = 0;
+    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+      if (frame->linesize[i] > 0) {
+        ++line_count;
+      }
+    }
+
+    if (line_count == 0) {
+      std::cout << "ERROR: Invalid number of picture planes" << std::endl;
+      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+        free(dst[i]);
+      }
+      return false;
+    }
+
+    // Convert colors to RGBA
+    if (sws_context_ == nullptr) {
+      sws_context_ = sws_getContext(frame->width, frame->height,
+                                    (AVPixelFormat)frame->format, frame->width,
+                                    frame->height,
+                                    grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
+                                              : AVPixelFormat::AV_PIX_FMT_RGBA,
+                                    SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_context_" << std::endl;
+        for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+          free(dst[i]);
+        }
          return false;
        }
-      data_ptr += return_value;
-      read_count -= return_value;
+    }
  
-      if (pkt->size) {
-        // TODO use packet
+    return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
+                             frame->height, dst, dst_strides.data());
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to convert pixel format of frame"
+                << std::endl;
+      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+        free(dst[i]);
        }
+      return false;
      }
-  }
  
-  if (ifs.fail()) {
-    std::cout << "ERROR: Read error on input file" << std::endl;
-    av_frame_free(&frame);
-    av_packet_free(&pkt);
-    avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
-    return false;
-  }
+    // put RGBA data into image
+    image_.width_ = frame->width;
+    image_.height_ = frame->height;
+    if (grayscale) {
+      image_.is_grayscale_ = true;
+      image_.data_.resize(frame->width * frame->height);
+      for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
+        image_.data_.at(i) = dst[0][i];
+      }
+    } else {
+      image_.is_grayscale_ = false;
+      image_.data_.resize(frame->width * frame->height * 4);
+      for (unsigned int y = 0; (int)y < frame->height; ++y) {
+        for (unsigned int x = 0; (int)x < frame->width; ++x) {
+          image_.data_.at(x * 4 + y * 4 * frame->width) =
+              dst[0][x * 4 + y * 4 * frame->width];
+          image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
+              dst[0][1 + x * 4 + y * 4 * frame->width];
+          image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
+              dst[0][2 + x * 4 + y * 4 * frame->width];
+          image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
+              dst[0][3 + x * 4 + y * 4 * frame->width];
+        }
+      }
+    }
  
-  // TODO flush decoder
+    std::unique_ptr<Image> dithered_image;
+    if (grayscale) {
+      dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
+    } else {
+      dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
+    }
+
+    std::string out_name = "output_";
+    if (frame_count_ < 10) {
+      out_name += "000" + std::to_string(frame_count_);
+    } else if (frame_count_ < 100) {
+      out_name += "00" + std::to_string(frame_count_);
+    } else if (frame_count_ < 1000) {
+      out_name += "0" + std::to_string(frame_count_);
+    } else {
+      out_name += std::to_string(frame_count_);
+    }
+    out_name += ".png";
+    dithered_image->SaveAsPNG(out_name, false);
+    // TODO encode video with dithered_image
+
+    // cleanup
+    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+      free(dst[i]);
+    }
+  }
  
-  // cleanup
-  av_frame_free(&frame);
-  av_packet_free(&pkt);
-  avcodec_free_context(&codec_ctx);
-  av_parser_close(parser);
    return true;
  }
diff --git a/src/video.h b/src/video.h

index ac0af820d73ae2003eefb51ed5dbf69b92e8d8ca..77f2c93d84c8a9454d6e71f6cd588481dc1572ae 100644 (file)
--- a/src/video.h
+++ b/src/video.h
@@ -3,6 +3,7 @@
  
  extern "C" {
  #include <libavcodec/avcodec.h>
+#include <libswscale/swscale.h>
  }
  
  #include "image.h"
@@ -17,12 +18,22 @@ class Video {
    explicit Video(const char *video_filename);
    explicit Video(const std::string &video_filename);
  
-  bool DitherGrayscale(const char *output_filename);
-  bool DitherGrayscale(const std::string &output_filename);
+  ~Video();
+
+  bool DitherVideo(const char *output_filename, Image *blue_noise,
+                   bool grayscale = false);
+  bool DitherVideo(const std::string &output_filename, Image *blue_noise,
+                   bool grayscale = false);
  
   private:
-  Image image;
-  std::string input_filename;
+  Image image_;
+  std::string input_filename_;
+  SwsContext *sws_context_;
+  unsigned int frame_count_;
+  unsigned int packet_count_;
+
+  bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
+                            AVFrame *frame, Image *blue_noise, bool grayscale);
  };
  
  #endif
author	Stephen Seo <seo.disparate@gmail.com>
	Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)
committer	Stephen Seo <seo.disparate@gmail.com>
	Tue, 30 Nov 2021 07:02:51 +0000 (16:02 +0900)
CMakeLists.txt		patch \| blob \| history
src/image.cc		patch \| blob \| history
src/image.h		patch \| blob \| history
src/main.cc		patch \| blob \| history
src/video.cc		patch \| blob \| history
src/video.h		patch \| blob \| history