From 11f48592bf5b507b981806df2f9fd543ac0ba1e6 Mon Sep 17 00:00:00 2001
From: Stephen Seo <seo.disparate@gmail.com>
Date: Tue, 30 Nov 2021 16:02:51 +0900
Subject: [PATCH] Impl convert video frames to color dithered pngs

---
 CMakeLists.txt |   3 +-
 src/image.cc   |   4 +
 src/image.h    |   2 +-
 src/main.cc    |  25 ++---
 src/video.cc   | 251 ++++++++++++++++++++++++++++++++++---------------
 src/video.h    |  19 +++-
 6 files changed, 203 insertions(+), 101 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1518449..368bc1a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,7 +26,8 @@ find_package(OpenCL REQUIRED)
 find_package(PNG REQUIRED)
 
 find_package(PkgConfig REQUIRED)
-pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec libavformat libavutil)
+pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED
+    libavcodec libavformat libavutil libswscale)
 
 target_include_directories(DitheringProject PUBLIC
   ${OpenCL_INCLUDE_DIRS}
diff --git a/src/image.cc b/src/image.cc
index c391a0e..23c1c37 100644
--- a/src/image.cc
+++ b/src/image.cc
@@ -817,6 +817,10 @@ OpenCLHandle::Ptr Image::GetOpenCLHandle() {
 
 void Image::DecodePNG(const std::string &filename) {
   FILE *file = std::fopen(filename.c_str(), "rb");
+  if (!file) {
+    std::cout << "ERROR: Failed to open \"" << filename << '"' << std::endl;
+    return;
+  }
 
   // Check header of file to check if it is actually a png file.
   {
diff --git a/src/image.h b/src/image.h
index 8671ea4..2ccc8e5 100644
--- a/src/image.h
+++ b/src/image.h
@@ -135,7 +135,7 @@ class Image {
   static const std::array<png_color, 2> dither_bw_palette_;
   static const std::array<png_color, 8> dither_color_palette_;
   OpenCLHandle::Ptr opencl_handle_;
-  /// Internally holds rgba
+  /// Internally holds rgba or grayscale (1 channel)
   std::vector<uint8_t> data_;
   unsigned int width_;
   unsigned int height_;
diff --git a/src/main.cc b/src/main.cc
index 3172e32..90e7ba5 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -1,30 +1,19 @@
 #include <iostream>
 
 #include "image.h"
+#include "video.h"
 
 int main(int argc, char **argv) {
-  // Image image("testin.ppm");
-  // image.SaveAsPNG("testout.png", true);
-
-  Image input("input.png");
-  if (!input.IsValid()) {
-    std::cout << "ERROR: input.png is invalid" << std::endl;
+  Image blue_noise("bluenoise.png");
+  if (!blue_noise.IsValid()) {
+    std::cout << "ERROR: Invalid bluenoise.png" << std::endl;
     return 1;
   }
-
-  Image bluenoise("bluenoise.png");
-  if (!bluenoise.IsValid()) {
-    std::cout << "ERROR: bluenoise.png is invalid" << std::endl;
+  Video video("input.mp4");
+  if (!video.DitherVideo("output.mp4", &blue_noise)) {
+    std::cout << "ERROR: Failed to dither video" << std::endl;
     return 1;
   }
 
-  // auto output = input.ToGrayscaleDitheredWithBlueNoise(&bluenoise);
-  auto output = input.ToColorDitheredWithBlueNoise(&bluenoise);
-  if (!output || !output->IsValid()) {
-    std::cout << "ERROR: output Image is invalid" << std::endl;
-    return 1;
-  }
-  output->SaveAsPNG("output.png", true);
-
   return 0;
 }
diff --git a/src/video.cc b/src/video.cc
index ac86500..bcf8795 100644
--- a/src/video.cc
+++ b/src/video.cc
@@ -1,5 +1,6 @@
 #include "video.h"
 
+#include <cstdlib>
 #include <cstring>
 #include <fstream>
 #include <iostream>
@@ -11,18 +12,28 @@ extern "C" {
 Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
 
 Video::Video(const std::string &video_filename)
-    : image(), input_filename(video_filename) {}
+    : image_(),
+      input_filename_(video_filename),
+      sws_context_(nullptr),
+      frame_count_(0),
+      packet_count_(0) {}
 
-bool Video::DitherGrayscale(const char *output_filename) {
-  return DitherGrayscale(std::string(output_filename));
+Video::~Video() {
+  if (sws_context_ != nullptr) {
+    sws_freeContext(sws_context_);
+  }
 }
 
-bool Video::DitherGrayscale(const std::string &output_filename) {
-  // determine input file format
+bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
+                        bool grayscale) {
+  return DitherVideo(std::string(output_filename), blue_noise, grayscale);
+}
 
+bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
+                        bool grayscale) {
   // Get AVFormatContext for input file
   AVFormatContext *avf_context = nullptr;
-  std::string url = std::string("file:") + input_filename;
+  std::string url = std::string("file:") + input_filename_;
   int return_value =
       avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
   if (return_value != 0) {
@@ -49,42 +60,43 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
     avformat_close_input(&avf_context);
     return false;
   }
-
-  // cleanup AVFormatContext as it is no longer needed
-  avformat_close_input(&avf_context);
-
-  // Init required objects for decoding
-
-  // Init parser
-  AVCodecParserContext *parser = av_parser_init(avcodec->id);
-  if (!parser) {
-    std::cout << "ERROR: Failed to init codec parser" << std::endl;
-    return false;
-  }
+  int video_stream_idx = return_value;
 
   // Alloc codec context
   AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
   if (!codec_ctx) {
     std::cout << "ERROR: Failed to alloc codec context" << std::endl;
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
+    return false;
+  }
+
+  // Set codec parameters from input stream
+  return_value = avcodec_parameters_to_context(
+      codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to set codec parameters from input stream"
+              << std::endl;
+    avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_context);
     return false;
   }
 
   // Init codec context
   return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
-  if (return_value == 0) {
+  if (return_value < 0) {
     std::cout << "ERROR: Failed to init codec context" << std::endl;
     avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
+    avformat_close_input(&avf_context);
     return false;
   }
 
+  av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
+
   // Alloc a packet object for reading packets
   AVPacket *pkt = av_packet_alloc();
   if (!pkt) {
     std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
     avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
     return false;
   }
 
@@ -93,76 +105,161 @@ bool Video::DitherGrayscale(const std::string &output_filename) {
   if (!frame) {
     std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
     av_packet_free(&pkt);
-    av_parser_close(parser);
     avcodec_free_context(&codec_ctx);
     return false;
   }
 
-  // Now the file will be opened for decoding the "best" video stream
-  std::ifstream ifs(input_filename);
-  if (!ifs.is_open() || !ifs.good()) {
-    std::cout << "ERROR: Failed to open input file \"" << input_filename << '"'
-              << std::endl;
-    av_frame_free(&frame);
-    av_packet_free(&pkt);
-    avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
-    return false;
-  }
-
-  // Set up buffer to read from input file
-  std::array<uint8_t, kReadBufSizeWithPadding> buf;
-  // Fill end of buffer with 0 to avoid possible overreading (as shown in
-  // example code)
-  std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize);
-
-  std::streamsize read_count;
-  uint8_t *data_ptr;
-  while (ifs.good()) {
-    ifs.read(reinterpret_cast<char *>(buf.data()), kReadBufSize);
-    read_count = ifs.gcount();
-    data_ptr = buf.data();
-    if (read_count == 0) {
-      // read 0 bytes, probably reached exactly EOF
-      break;
-    }
-
-    while (read_count > 0) {
-      return_value =
-          av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr,
-                           read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
-      if (return_value < 0) {
-        std::cout << "ERROR: Failed to parse input file" << std::endl;
-        av_frame_free(&frame);
-        av_packet_free(&pkt);
-        avcodec_free_context(&codec_ctx);
-        av_parser_close(parser);
+  // read frames
+  while (av_read_frame(avf_context, pkt) >= 0) {
+    if (pkt->stream_index == video_stream_idx) {
+      if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale)) {
         return false;
       }
-      data_ptr += return_value;
-      read_count -= return_value;
-
-      if (pkt->size) {
-        // TODO use packet
-      }
     }
   }
 
-  if (ifs.fail()) {
-    std::cout << "ERROR: Read error on input file" << std::endl;
-    av_frame_free(&frame);
-    av_packet_free(&pkt);
-    avcodec_free_context(&codec_ctx);
-    av_parser_close(parser);
+  // flush decoders
+  if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale)) {
     return false;
   }
 
-  // TODO flush decoder
-
   // cleanup
   av_frame_free(&frame);
   av_packet_free(&pkt);
   avcodec_free_context(&codec_ctx);
-  av_parser_close(parser);
+  avformat_close_input(&avf_context);
+  return true;
+}
+
+bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
+                                 AVFrame *frame, Image *blue_noise,
+                                 bool grayscale) {
+  int return_value = avcodec_send_packet(codec_ctx, pkt);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
+              << std::endl;
+    return false;
+  }
+
+  return_value = 0;
+  while (return_value >= 0) {
+    return_value = avcodec_receive_frame(codec_ctx, frame);
+    if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
+      return true;
+    } else if (return_value < 0) {
+      std::cout << "ERROR: Failed to get frame from decoded packet(s)"
+                << std::endl;
+      return false;
+    }
+    ++frame_count_;
+
+    std::cout << "Frame " << frame_count_ << std::endl;  // TODO DEBUG
+
+    // output buffer info for converting pixel format to RGBA
+    uint8_t *dst[AV_NUM_DATA_POINTERS];
+    dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
+                               sizeof(uint8_t));
+    for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
+      dst[i] = nullptr;
+    }
+    std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
+        frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
+
+    unsigned int line_count = 0;
+    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+      if (frame->linesize[i] > 0) {
+        ++line_count;
+      }
+    }
+
+    if (line_count == 0) {
+      std::cout << "ERROR: Invalid number of picture planes" << std::endl;
+      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+        free(dst[i]);
+      }
+      return false;
+    }
+
+    // Convert colors to RGBA
+    if (sws_context_ == nullptr) {
+      sws_context_ = sws_getContext(frame->width, frame->height,
+                                    (AVPixelFormat)frame->format, frame->width,
+                                    frame->height,
+                                    grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
+                                              : AVPixelFormat::AV_PIX_FMT_RGBA,
+                                    SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_context_" << std::endl;
+        for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+          free(dst[i]);
+        }
+        return false;
+      }
+    }
+
+    return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
+                             frame->height, dst, dst_strides.data());
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to convert pixel format of frame"
+                << std::endl;
+      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+        free(dst[i]);
+      }
+      return false;
+    }
+
+    // put RGBA data into image
+    image_.width_ = frame->width;
+    image_.height_ = frame->height;
+    if (grayscale) {
+      image_.is_grayscale_ = true;
+      image_.data_.resize(frame->width * frame->height);
+      for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
+        image_.data_.at(i) = dst[0][i];
+      }
+    } else {
+      image_.is_grayscale_ = false;
+      image_.data_.resize(frame->width * frame->height * 4);
+      for (unsigned int y = 0; (int)y < frame->height; ++y) {
+        for (unsigned int x = 0; (int)x < frame->width; ++x) {
+          image_.data_.at(x * 4 + y * 4 * frame->width) =
+              dst[0][x * 4 + y * 4 * frame->width];
+          image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
+              dst[0][1 + x * 4 + y * 4 * frame->width];
+          image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
+              dst[0][2 + x * 4 + y * 4 * frame->width];
+          image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
+              dst[0][3 + x * 4 + y * 4 * frame->width];
+        }
+      }
+    }
+
+    std::unique_ptr<Image> dithered_image;
+    if (grayscale) {
+      dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
+    } else {
+      dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
+    }
+
+    std::string out_name = "output_";
+    if (frame_count_ < 10) {
+      out_name += "000" + std::to_string(frame_count_);
+    } else if (frame_count_ < 100) {
+      out_name += "00" + std::to_string(frame_count_);
+    } else if (frame_count_ < 1000) {
+      out_name += "0" + std::to_string(frame_count_);
+    } else {
+      out_name += std::to_string(frame_count_);
+    }
+    out_name += ".png";
+    dithered_image->SaveAsPNG(out_name, false);
+    // TODO encode video with dithered_image
+
+    // cleanup
+    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
+      free(dst[i]);
+    }
+  }
+
   return true;
 }
diff --git a/src/video.h b/src/video.h
index ac0af82..77f2c93 100644
--- a/src/video.h
+++ b/src/video.h
@@ -3,6 +3,7 @@
 
 extern "C" {
 #include <libavcodec/avcodec.h>
+#include <libswscale/swscale.h>
 }
 
 #include "image.h"
@@ -17,12 +18,22 @@ class Video {
   explicit Video(const char *video_filename);
   explicit Video(const std::string &video_filename);
 
-  bool DitherGrayscale(const char *output_filename);
-  bool DitherGrayscale(const std::string &output_filename);
+  ~Video();
+
+  bool DitherVideo(const char *output_filename, Image *blue_noise,
+                   bool grayscale = false);
+  bool DitherVideo(const std::string &output_filename, Image *blue_noise,
+                   bool grayscale = false);
 
  private:
-  Image image;
-  std::string input_filename;
+  Image image_;
+  std::string input_filename_;
+  SwsContext *sws_context_;
+  unsigned int frame_count_;
+  unsigned int packet_count_;
+
+  bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
+                            AVFrame *frame, Image *blue_noise, bool grayscale);
 };
 
 #endif