From cfdd411b99426b41c18f9c6fbc3be1f9aa8c89a7 Mon Sep 17 00:00:00 2001
From: Stephen Seo <seo.disparate@gmail.com>
Date: Mon, 29 Nov 2021 19:13:26 +0900
Subject: [PATCH] WIP Use ffmpeg to decode/encode dithered frames

---
 CMakeLists.txt |   5 ++
 src/image.h    |   2 +
 src/video.cc   | 168 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/video.h    |  26 ++++++++
 4 files changed, 201 insertions(+)
 create mode 100644 src/video.cc
 create mode 100644 src/video.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index df5fd65..f995bb5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,11 +24,16 @@ find_package(OpenCL REQUIRED)
 
 find_package(PNG REQUIRED)
 
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(FFMPEG_LIBAVCODEC REQUIRED libavcodec)
+
 target_include_directories(DitheringProject PUBLIC
   ${OpenCL_INCLUDE_DIRS}
   ${PNG_INCLUDE_DIRS}
+  ${FFMPEG_LIBAVCODEC_INCLUDE_DIRS}
 )
 target_link_libraries(DitheringProject PUBLIC
   ${OpenCL_LIBRARIES}
   ${PNG_LIBRARIES}
+  ${FFMPEG_LIBAVCODEC_LINK_LIBRARIES}
 )
diff --git a/src/image.h b/src/image.h
index 0cbe886..8671ea4 100644
--- a/src/image.h
+++ b/src/image.h
@@ -128,6 +128,8 @@ class Image {
   OpenCLHandle::Ptr GetOpenCLHandle();
 
  private:
+  friend class Video;
+
   static const char *opencl_grayscale_kernel_;
   static const char *opencl_color_kernel_;
   static const std::array<png_color, 2> dither_bw_palette_;
diff --git a/src/video.cc b/src/video.cc
new file mode 100644
index 0000000..fcac248
--- /dev/null
+++ b/src/video.cc
@@ -0,0 +1,168 @@
+#include "video.h"
+
+#include <cstring>
+#include <fstream>
+#include <iostream>
+
+#include <libavcodec/avcodec.h>
+#include <libavcodec/packet.h>
+#include <libavformat/avformat.h>
+
+Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
+
+Video::Video(const std::string &video_filename)
+    : image(), input_filename(video_filename) {}
+
+bool Video::DitherGrayscale(const char *output_filename) {
+  return DitherGrayscale(std::string(output_filename));
+}
+
+bool Video::DitherGrayscale(const std::string &output_filename) {
+  // determine input file format
+
+  // Get AVFormatContext for input file
+  AVFormatContext *avf_context = nullptr;
+  std::string url = std::string("file:") + input_filename;
+  int return_value =
+      avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
+  if (return_value != 0) {
+    std::cout << "ERROR: Failed to open input file to determine format"
+              << std::endl;
+    return false;
+  }
+
+  // Read from input file to fill in info in AVFormatContext
+  return_value = avformat_find_stream_info(avf_context, nullptr);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to determine input file stream info"
+              << std::endl;
+    avformat_close_input(&avf_context);
+    return false;
+  }
+
+  // Get "best" video stream
+  AVCodec *avcodec = nullptr;
+  return_value = av_find_best_stream(
+      avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to get video stream in input file" << std::endl;
+    avformat_close_input(&avf_context);
+    return false;
+  }
+
+  // cleanup AVFormatContext as it is no longer needed
+  avformat_close_input(&avf_context);
+
+  // Init required objects for decoding
+
+  // Init parser
+  AVCodecParserContext *parser = av_parser_init(avcodec->id);
+  if (!parser) {
+    std::cout << "ERROR: Failed to init codec parser" << std::endl;
+    return false;
+  }
+
+  // Alloc codec context
+  AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
+  if (!codec_ctx) {
+    std::cout << "ERROR: Failed to alloc codec context" << std::endl;
+    av_parser_close(parser);
+    return false;
+  }
+
+  // Init codec context
+  return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
+  if (return_value == 0) {
+    std::cout << "ERROR: Failed to init codec context" << std::endl;
+    avcodec_free_context(&codec_ctx);
+    av_parser_close(parser);
+    return false;
+  }
+
+  // Alloc a packet object for reading packets
+  AVPacket *pkt = av_packet_alloc();
+  if (!pkt) {
+    std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
+    avcodec_free_context(&codec_ctx);
+    av_parser_close(parser);
+    return false;
+  }
+
+  // Alloc a frame object for reading frames
+  AVFrame *frame = av_frame_alloc();
+  if (!frame) {
+    std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
+    av_packet_free(&pkt);
+    av_parser_close(parser);
+    avcodec_free_context(&codec_ctx);
+    return false;
+  }
+
+  // Now the file will be opened for decoding the "best" video stream
+  std::ifstream ifs(input_filename);
+  if (!ifs.is_open() || !ifs.good()) {
+    std::cout << "ERROR: Failed to open input file \"" << input_filename << '"'
+              << std::endl;
+    av_frame_free(&frame);
+    av_packet_free(&pkt);
+    avcodec_free_context(&codec_ctx);
+    av_parser_close(parser);
+    return false;
+  }
+
+  // Set up buffer to read from input file
+  std::array<uint8_t, kReadBufSizeWithPadding> buf;
+  // Fill end of buffer with 0 to avoid possible overreading (as shown in
+  // example code)
+  std::memset(buf.data() + kReadBufSize, 0, kReadBufPaddingSize);
+
+  std::streamsize read_count;
+  uint8_t *data_ptr;
+  while (ifs.good()) {
+    ifs.read(reinterpret_cast<char *>(buf.data()), kReadBufSize);
+    read_count = ifs.gcount();
+    data_ptr = buf.data();
+    if (read_count == 0) {
+      // read 0 bytes, probably reached exactly EOF
+      break;
+    }
+
+    while (read_count > 0) {
+      return_value =
+          av_parser_parse2(parser, codec_ctx, &pkt->data, &pkt->size, data_ptr,
+                           read_count, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
+      if (return_value < 0) {
+        std::cout << "ERROR: Failed to parse input file" << std::endl;
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        av_parser_close(parser);
+        return false;
+      }
+      data_ptr += return_value;
+      read_count -= return_value;
+
+      if (pkt->size) {
+        // TODO use packet
+      }
+    }
+  }
+
+  if (ifs.fail()) {
+    std::cout << "ERROR: Read error on input file" << std::endl;
+    av_frame_free(&frame);
+    av_packet_free(&pkt);
+    avcodec_free_context(&codec_ctx);
+    av_parser_close(parser);
+    return false;
+  }
+
+  // TODO flush decoder
+
+  // cleanup
+  av_frame_free(&frame);
+  av_packet_free(&pkt);
+  avcodec_free_context(&codec_ctx);
+  av_parser_close(parser);
+  return true;
+}
diff --git a/src/video.h b/src/video.h
new file mode 100644
index 0000000..7e0e283
--- /dev/null
+++ b/src/video.h
@@ -0,0 +1,26 @@
+#ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_
+#define IGPUP_DITHERING_PROJECT_VIDEO_H_
+
+#include <libavcodec/avcodec.h>
+
+#include "image.h"
+
+constexpr unsigned int kReadBufSize = 4096;
+constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
+constexpr unsigned int kReadBufSizeWithPadding =
+    kReadBufSize + kReadBufPaddingSize;
+
+class Video {
+ public:
+  explicit Video(const char *video_filename);
+  explicit Video(const std::string &video_filename);
+
+  bool DitherGrayscale(const char *output_filename);
+  bool DitherGrayscale(const std::string &output_filename);
+
+ private:
+  Image image;
+  std::string input_filename;
+};
+
+#endif