diff --git a/src/arg_parse.cc b/src/arg_parse.cc
index b61c9fa..67f904c 100644
--- a/src/arg_parse.cc
+++ b/src/arg_parse.cc
@@ -14,7 +14,8 @@ void Args::PrintUsage() {
   std::cout
       << "Usage: [-h | --help] [-i <filename> | --input <filename>] [-o "
          "<filename> | --output <filename>] [-b <filename> | --blue "
-         "<filename>] [-g | --gray] [--image] [--video] [--overwrite]\n"
+         "<filename>] [-g | --gray] [--image] [--video] [--video-pngs] "
+         "[--overwrite]\n"
          "  -h | --help\t\t\t\tPrint this usage text\n"
          "  -i <filename> | --input <filename>\tSet input filename\n"
          "  -o <filename> | --output <filename>\tSet output filename\n"
@@ -22,6 +23,7 @@ void Args::PrintUsage() {
          "  -g | --gray\t\t\t\tDither output in grayscale\n"
          "  --image\t\t\t\tDither a single image\n"
          "  --video\t\t\t\tDither frames in a video\n"
+         "  --video-pngs\t\t\t\tDither frames but output as individual pngs\n"
          "  --overwrite\t\t\t\tAllow overwriting existing files\n"
       << std::endl;
 }
@@ -56,6 +58,9 @@ bool Args::ParseArgs(int argc, char **argv) {
       do_dither_image_ = true;
     } else if (std::strcmp(argv[0], "--video") == 0) {
       do_dither_image_ = false;
+    } else if (std::strcmp(argv[0], "--video-pngs") == 0) {
+      do_dither_image_ = false;
+      do_video_pngs_ = true;
     } else if (std::strcmp(argv[0], "--overwrite") == 0) {
       do_overwrite_ = true;
     } else {
diff --git a/src/arg_parse.h b/src/arg_parse.h
index 5f62494..0679d24 100644
--- a/src/arg_parse.h
+++ b/src/arg_parse.h
@@ -14,6 +14,7 @@ struct Args {
   bool do_dither_image_;
   bool do_dither_grayscaled_;
   bool do_overwrite_;
+  bool do_video_pngs_;
   std::string input_filename;
   std::string output_filename;
   std::string blue_noise_filename;
diff --git a/src/main.cc b/src/main.cc
index e6c1720..0724fd3 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -60,7 +60,8 @@ int main(int argc, char **argv) {
   } else {
     Video video(args.input_filename);
     if (!video.DitherVideo(args.output_filename, &blue_noise,
-                           args.do_dither_grayscaled_, args.do_overwrite_)) {
+                           args.do_dither_grayscaled_, args.do_overwrite_,
+                           args.do_video_pngs_)) {
       std::cout << "ERROR: Failed to dither frames from input video \""
                 << args.input_filename << '"' << std::endl;
       Args::PrintUsage();
diff --git a/src/video.cc b/src/video.cc
index a8984aa..aac4f59 100644
--- a/src/video.cc
+++ b/src/video.cc
@@ -1,42 +1,63 @@
 #include "video.h"
 
+#include <cmath>
 #include <cstdlib>
 #include <cstring>
 #include <fstream>
 #include <iostream>
 
-extern "C" {
-#include <libavformat/avformat.h>
-}
-
 Video::Video(const char *video_filename) : Video(std::string(video_filename)) {}
 
 Video::Video(const std::string &video_filename)
     : image_(),
       input_filename_(video_filename),
-      sws_context_(nullptr),
+      sws_dec_context_(nullptr),
+      sws_enc_context_(nullptr),
       frame_count_(0),
-      packet_count_(0) {}
+      packet_count_(0),
+      was_grayscale_(false) {}
 
 Video::~Video() {
-  if (sws_context_ != nullptr) {
-    sws_freeContext(sws_context_);
+  if (sws_dec_context_ != nullptr) {
+    sws_freeContext(sws_dec_context_);
   }
 }
 
 bool Video::DitherVideo(const char *output_filename, Image *blue_noise,
-                        bool grayscale, bool overwrite) {
+                        bool grayscale, bool overwrite, bool output_as_pngs) {
   return DitherVideo(std::string(output_filename), blue_noise, grayscale,
-                     overwrite);
+                     overwrite, output_as_pngs);
 }
 
 bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
-                        bool grayscale, bool overwrite) {
+                        bool grayscale, bool overwrite, bool output_as_pngs) {
+  if (!overwrite && !output_as_pngs) {
+    // check if output_file exists
+    std::ifstream ifs(output_filename);
+    if (ifs.is_open()) {
+      std::cout << "ERROR: output file \"" << output_filename
+                << "\" exists "
+                   "and overwrite is disabled"
+                << std::endl;
+      return false;
+    }
+  }
+
+  frame_count_ = 0;
+
+  bool color_changed = false;
+  if (was_grayscale_ != grayscale) {
+    color_changed = true;
+  }
+  was_grayscale_ = grayscale;
+
+  // set up decoding
+
   // Get AVFormatContext for input file
-  AVFormatContext *avf_context = nullptr;
+  AVFormatContext *avf_dec_context = nullptr;
   std::string url = std::string("file:") + input_filename_;
   int return_value =
-      avformat_open_input(&avf_context, url.c_str(), nullptr, nullptr);
+      avformat_open_input(&avf_dec_context, url.c_str(), nullptr, nullptr);
   if (return_value != 0) {
     std::cout << "ERROR: Failed to open input file to determine format"
               << std::endl;
@@ -44,60 +65,70 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
   }
 
   // Read from input file to fill in info in AVFormatContext
-  return_value = avformat_find_stream_info(avf_context, nullptr);
+  return_value = avformat_find_stream_info(avf_dec_context, nullptr);
   if (return_value < 0) {
     std::cout << "ERROR: Failed to determine input file stream info"
               << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
   // Get "best" video stream
-  AVCodec *avcodec = nullptr;
+  AVCodec *dec_codec = nullptr;
   return_value = av_find_best_stream(
-      avf_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &avcodec, 0);
+      avf_dec_context, AVMediaType::AVMEDIA_TYPE_VIDEO, -1, -1, &dec_codec, 0);
   if (return_value < 0) {
     std::cout << "ERROR: Failed to get video stream in input file" << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
   int video_stream_idx = return_value;
 
   // Alloc codec context
-  AVCodecContext *codec_ctx = avcodec_alloc_context3(avcodec);
+  AVCodecContext *codec_ctx = avcodec_alloc_context3(dec_codec);
   if (!codec_ctx) {
     std::cout << "ERROR: Failed to alloc codec context" << std::endl;
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
   // Set codec parameters from input stream
   return_value = avcodec_parameters_to_context(
-      codec_ctx, avf_context->streams[video_stream_idx]->codecpar);
+      codec_ctx, avf_dec_context->streams[video_stream_idx]->codecpar);
   if (return_value < 0) {
     std::cout << "ERROR: Failed to set codec parameters from input stream"
               << std::endl;
     avcodec_free_context(&codec_ctx);
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
   // Init codec context
-  return_value = avcodec_open2(codec_ctx, avcodec, nullptr);
+  return_value = avcodec_open2(codec_ctx, dec_codec, nullptr);
   if (return_value < 0) {
     std::cout << "ERROR: Failed to init codec context" << std::endl;
     avcodec_free_context(&codec_ctx);
-    avformat_close_input(&avf_context);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
-  av_dump_format(avf_context, video_stream_idx, input_filename_.c_str(), 0);
+  std::cout << "Dumping input video format info..." << std::endl;
+  av_dump_format(avf_dec_context, video_stream_idx, input_filename_.c_str(), 0);
+
+  // get input stream info
+  unsigned int width =
+      avf_dec_context->streams[video_stream_idx]->codecpar->width;
+  unsigned int height =
+      avf_dec_context->streams[video_stream_idx]->codecpar->height;
+  auto r_frame_rate = avf_dec_context->streams[video_stream_idx]->r_frame_rate;
+  decltype(r_frame_rate) time_base = {r_frame_rate.den, r_frame_rate.num};
 
   // Alloc a packet object for reading packets
   AVPacket *pkt = av_packet_alloc();
   if (!pkt) {
     std::cout << "ERROR: Failed to alloc an AVPacket" << std::endl;
     avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
@@ -107,162 +138,473 @@ bool Video::DitherVideo(const std::string &output_filename, Image *blue_noise,
     std::cout << "ERROR: Failed to alloc video frame object" << std::endl;
     av_packet_free(&pkt);
     avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
     return false;
   }
 
-  // read frames
-  while (av_read_frame(avf_context, pkt) >= 0) {
-    if (pkt->stream_index == video_stream_idx) {
-      if (!HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
-                                overwrite)) {
+  // Set up encoding
+
+  // alloc/init encoding AVFormatContext
+  AVFormatContext *avf_enc_context = nullptr;
+  if (!output_as_pngs) {
+    return_value = avformat_alloc_output_context2(
+        &avf_enc_context, nullptr, nullptr, output_filename.c_str());
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to alloc/init avf_enc_context" << std::endl;
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }
+
+  // set output video codec (h264)
+  AVCodecContext *enc_codec_context = nullptr;
+  AVCodec *enc_codec = nullptr;
+
+  // get H264 codec
+  if (!output_as_pngs) {
+    enc_codec = avcodec_find_encoder(AVCodecID::AV_CODEC_ID_H264);
+    if (enc_codec == nullptr) {
+      std::cout << "ERROR: Failed to get H264 codec for encoding" << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }
+
+  // create new video stream
+  AVStream *enc_stream = nullptr;
+  if (!output_as_pngs) {
+    enc_stream = avformat_new_stream(avf_enc_context, enc_codec);
+    if (enc_stream == nullptr) {
+      std::cout << "ERROR: Failed to create encoding stream" << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+    // assign its id
+    enc_stream->id = avf_enc_context->nb_streams - 1;
+    // alloc enc AVCodecContext
+    enc_codec_context = avcodec_alloc_context3(enc_codec);
+    if (enc_codec_context == nullptr) {
+      std::cout << "ERROR: Failed to create AVCodecContext for encoding"
+                << std::endl;
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    // set values on enc_codec_context
+    enc_codec_context->codec_id = AVCodecID::AV_CODEC_ID_H264;
+    enc_codec_context->bit_rate = kOutputBitrate;
+    enc_codec_context->width = width;
+    enc_codec_context->height = height;
+    enc_stream->time_base = time_base;
+    enc_codec_context->time_base = time_base;
+    enc_codec_context->gop_size = 12;
+    enc_codec_context->pix_fmt = AVPixelFormat::AV_PIX_FMT_YUV444P;
+    if (avf_enc_context->oformat->flags & AVFMT_GLOBALHEADER) {
+      enc_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    }
+
+    // more init on enc_codec_context
+    return_value = avcodec_open2(enc_codec_context, enc_codec, nullptr);
+    if (return_value != 0) {
+      std::cout << "ERROR: Failed to init enc_codec_context" << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    return_value = avcodec_parameters_from_context(enc_stream->codecpar,
+                                                   enc_codec_context);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to set encoding codec parameters in stream"
+                << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    std::cout << "Dumping output video format info..." << std::endl;
+    av_dump_format(avf_enc_context, enc_stream->id, output_filename.c_str(), 1);
+
+    // open output file if needed
+    if (!(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
+      return_value = avio_open(&avf_enc_context->pb, output_filename.c_str(),
+                               AVIO_FLAG_WRITE);
+      if (return_value < 0) {
+        std::cout << "ERROR: Failed to open file \"" << output_filename
+                  << "\" for writing" << std::endl;
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
         return false;
       }
     }
+
+    // write header
+    return_value = avformat_write_header(avf_enc_context, nullptr);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to write header in output video file"
+                << std::endl;
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+  }  // if (!output_as_pngs)
+
+  // do decoding, then encoding per frame
+
+  // read frames
+  while (av_read_frame(avf_dec_context, pkt) >= 0) {
+    if (pkt->stream_index == video_stream_idx) {
+      auto ret_tuple =
+          HandleDecodingPacket(codec_ctx, pkt, frame, blue_noise, grayscale,
+                               color_changed, output_as_pngs);
+      if (!std::get<0>(ret_tuple)) {
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
+        return false;
+      } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
+        for (auto *yuv_frame : std::get<1>(ret_tuple)) {
+          if (!HandleEncodingFrame(avf_enc_context, enc_codec_context,
+                                   yuv_frame, enc_stream)) {
+            av_frame_free(&yuv_frame);
+            avcodec_close(enc_codec_context);
+            avformat_free_context(avf_enc_context);
+            av_frame_free(&frame);
+            av_packet_free(&pkt);
+            avcodec_free_context(&codec_ctx);
+            avformat_close_input(&avf_dec_context);
+            return false;
+          }
+          av_frame_free(&yuv_frame);
+        }
+      }
+    }
   }
 
   // flush decoders
-  if (!HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
-                            overwrite)) {
+  auto ret_tuple =
+      HandleDecodingPacket(codec_ctx, nullptr, frame, blue_noise, grayscale,
+                           color_changed, output_as_pngs);
+  if (!std::get<0>(ret_tuple)) {
+    avcodec_close(enc_codec_context);
+    avformat_free_context(avf_enc_context);
+    av_frame_free(&frame);
+    av_packet_free(&pkt);
+    avcodec_free_context(&codec_ctx);
+    avformat_close_input(&avf_dec_context);
     return false;
+  } else if (!output_as_pngs && !std::get<1>(ret_tuple).empty()) {
+    for (auto *yuv_frame : std::get<1>(ret_tuple)) {
+      if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, yuv_frame,
+                               enc_stream)) {
+        av_frame_free(&yuv_frame);
+        avcodec_close(enc_codec_context);
+        avformat_free_context(avf_enc_context);
+        av_frame_free(&frame);
+        av_packet_free(&pkt);
+        avcodec_free_context(&codec_ctx);
+        avformat_close_input(&avf_dec_context);
+        return false;
+      }
+      av_frame_free(&yuv_frame);
+    }
+  }
+
+  if (!output_as_pngs) {
+    // flush encoder
+    if (!HandleEncodingFrame(avf_enc_context, enc_codec_context, nullptr,
+                             enc_stream)) {
+      avcodec_close(enc_codec_context);
+      avformat_free_context(avf_enc_context);
+      av_frame_free(&frame);
+      av_packet_free(&pkt);
+      avcodec_free_context(&codec_ctx);
+      avformat_close_input(&avf_dec_context);
+      return false;
+    }
+
+    // finish encoding
+    av_write_trailer(avf_enc_context);
   }
 
   // cleanup
+  if (enc_codec_context) {
+    avcodec_close(enc_codec_context);
+  }
+  if (!output_as_pngs && !(avf_enc_context->oformat->flags & AVFMT_NOFILE)) {
+    avio_closep(&avf_enc_context->pb);
+  }
+  if (avf_enc_context) {
+    avformat_free_context(avf_enc_context);
+  }
   av_frame_free(&frame);
   av_packet_free(&pkt);
   avcodec_free_context(&codec_ctx);
-  avformat_close_input(&avf_context);
+  avformat_close_input(&avf_dec_context);
   return true;
 }
 
-bool Video::HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
-                                 AVFrame *frame, Image *blue_noise,
-                                 bool grayscale, bool overwrite) {
+std::tuple<bool, std::vector<AVFrame *>> Video::HandleDecodingPacket(
+    AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame, Image *blue_noise,
+    bool grayscale, bool color_changed, bool output_as_pngs) {
   int return_value = avcodec_send_packet(codec_ctx, pkt);
   if (return_value < 0) {
     std::cout << "ERROR: Failed to decode packet (" << packet_count_ << ')'
               << std::endl;
-    return false;
+    return {false, {}};
   }
 
   return_value = 0;
+  std::vector<AVFrame *> return_frames{};
+
   while (return_value >= 0) {
     return_value = avcodec_receive_frame(codec_ctx, frame);
     if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
-      return true;
+      return {true, return_frames};
     } else if (return_value < 0) {
       std::cout << "ERROR: Failed to get frame from decoded packet(s)"
                 << std::endl;
-      return false;
+      return {false, {}};
     }
     ++frame_count_;
 
     std::cout << "Frame " << frame_count_ << std::endl;  // TODO DEBUG
 
-    // output buffer info for converting pixel format to RGBA
-    uint8_t *dst[AV_NUM_DATA_POINTERS];
-    dst[0] = (uint8_t *)calloc(4 * frame->width * frame->height + 16,
-                               sizeof(uint8_t));
-    for (unsigned int i = 1; i < AV_NUM_DATA_POINTERS; ++i) {
-      dst[i] = nullptr;
-    }
-    std::array<int, AV_NUM_DATA_POINTERS> dst_strides = {
-        frame->width * (grayscale ? 1 : 4), 0, 0, 0, 0, 0, 0, 0};
-
-    unsigned int line_count = 0;
-    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-      if (frame->linesize[i] > 0) {
-        ++line_count;
-      }
-    }
-
-    if (line_count == 0) {
-      std::cout << "ERROR: Invalid number of picture planes" << std::endl;
-      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-        free(dst[i]);
-      }
-      return false;
+    AVFrame *temp_frame = av_frame_alloc();
+    temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
+    temp_frame->width = frame->width;
+    temp_frame->height = frame->height;
+    return_value = av_frame_get_buffer(temp_frame, 0);
+    if (return_value != 0) {
+      std::cout << "ERROR: Failed to init temp_frame to receive RGBA data"
+                << std::endl;
+      av_frame_free(&temp_frame);
+      return {false, {}};
     }
 
     // Convert colors to RGBA
-    if (sws_context_ == nullptr) {
-      sws_context_ = sws_getContext(frame->width, frame->height,
-                                    (AVPixelFormat)frame->format, frame->width,
-                                    frame->height,
-                                    grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
-                                              : AVPixelFormat::AV_PIX_FMT_RGBA,
-                                    SWS_BILINEAR, nullptr, nullptr, nullptr);
-      if (sws_context_ == nullptr) {
-        std::cout << "ERROR: Failed to init sws_context_" << std::endl;
-        for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-          free(dst[i]);
-        }
-        return false;
+    if (sws_dec_context_ == nullptr) {
+      sws_dec_context_ = sws_getContext(
+          frame->width, frame->height, (AVPixelFormat)frame->format,
+          frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_RGBA,
+          SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_dec_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_dec_context_" << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
       }
     }
 
-    return_value = sws_scale(sws_context_, frame->data, frame->linesize, 0,
-                             frame->height, dst, dst_strides.data());
+    return_value =
+        sws_scale(sws_dec_context_, frame->data, frame->linesize, 0,
+                  frame->height, temp_frame->data, temp_frame->linesize);
     if (return_value < 0) {
       std::cout << "ERROR: Failed to convert pixel format of frame"
                 << std::endl;
-      for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-        free(dst[i]);
-      }
-      return false;
+      av_frame_free(&temp_frame);
+      return {false, {}};
     }
 
     // put RGBA data into image
     image_.width_ = frame->width;
     image_.height_ = frame->height;
-    if (grayscale) {
-      image_.is_grayscale_ = true;
-      image_.data_.resize(frame->width * frame->height);
-      for (unsigned int i = 0; (int)i < frame->width * frame->height; ++i) {
-        image_.data_.at(i) = dst[0][i];
-      }
-    } else {
-      image_.is_grayscale_ = false;
-      image_.data_.resize(frame->width * frame->height * 4);
-      for (unsigned int y = 0; (int)y < frame->height; ++y) {
-        for (unsigned int x = 0; (int)x < frame->width; ++x) {
-          image_.data_.at(x * 4 + y * 4 * frame->width) =
-              dst[0][x * 4 + y * 4 * frame->width];
-          image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
-              dst[0][1 + x * 4 + y * 4 * frame->width];
-          image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
-              dst[0][2 + x * 4 + y * 4 * frame->width];
-          image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
-              dst[0][3 + x * 4 + y * 4 * frame->width];
-        }
+    image_.is_grayscale_ = false;
+    image_.data_.resize(frame->width * frame->height * 4);
+    for (unsigned int y = 0; (int)y < frame->height; ++y) {
+      for (unsigned int x = 0; (int)x < frame->width; ++x) {
+        image_.data_.at(x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][x * 4 + y * 4 * frame->width];
+        image_.data_.at(1 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][1 + x * 4 + y * 4 * frame->width];
+        image_.data_.at(2 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][2 + x * 4 + y * 4 * frame->width];
+        image_.data_.at(3 + x * 4 + y * 4 * frame->width) =
+            temp_frame->data[0][3 + x * 4 + y * 4 * frame->width];
       }
     }
 
+    av_frame_unref(temp_frame);
+
     std::unique_ptr<Image> dithered_image;
     if (grayscale) {
       dithered_image = image_.ToGrayscaleDitheredWithBlueNoise(blue_noise);
     } else {
       dithered_image = image_.ToColorDitheredWithBlueNoise(blue_noise);
     }
+    if (!dithered_image) {
+      std::cout << "ERROR: Failed to dither video frame" << std::endl;
+      return {false, {}};
+    }
 
-    std::string out_name = "output_";
-    if (frame_count_ < 10) {
-      out_name += "000" + std::to_string(frame_count_);
-    } else if (frame_count_ < 100) {
-      out_name += "00" + std::to_string(frame_count_);
-    } else if (frame_count_ < 1000) {
-      out_name += "0" + std::to_string(frame_count_);
-    } else {
+    if (output_as_pngs) {
+      std::string out_name = "output_";
+      for (unsigned int i = 0; i < 9; ++i) {
+        if (frame_count_ < (unsigned int)std::pow(10, i)) {
+          out_name += "0";
+        }
+      }
       out_name += std::to_string(frame_count_);
+      out_name += ".png";
+      if (!dithered_image->SaveAsPNG(out_name, true)) {
+        return {false, {}};
+      }
+      return {true, {}};
     }
-    out_name += ".png";
-    if (!dithered_image->SaveAsPNG(out_name, overwrite)) {
-      return false;
+
+    // convert grayscale/RGBA to YUV444p
+    if (sws_enc_context_ != nullptr && color_changed) {
+      // switched between grayscale/RGBA, context needs to be recreated
+      sws_freeContext(sws_enc_context_);
+      sws_enc_context_ = nullptr;
+    }
+    if (sws_enc_context_ == nullptr) {
+      sws_enc_context_ = sws_getContext(
+          frame->width, frame->height,
+          grayscale ? AVPixelFormat::AV_PIX_FMT_GRAY8
+                    : AVPixelFormat::AV_PIX_FMT_RGBA,
+          frame->width, frame->height, AVPixelFormat::AV_PIX_FMT_YUV444P,
+          SWS_BILINEAR, nullptr, nullptr, nullptr);
+      if (sws_enc_context_ == nullptr) {
+        std::cout << "ERROR: Failed to init sws_enc_context_" << std::endl;
+        return {false, {}};
+      }
+    }
+
+    // rgba data info
+    if (grayscale) {
+      av_frame_free(&temp_frame);
+      temp_frame = av_frame_alloc();
+      temp_frame->format = AVPixelFormat::AV_PIX_FMT_GRAY8;
+      temp_frame->width = frame->width;
+      temp_frame->height = frame->height;
+      return_value = av_frame_get_buffer(temp_frame, 0);
+      if (return_value != 0) {
+        std::cout
+            << "ERROR: Failed to init temp_frame for conversion from grayscale"
+            << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
+      }
+      std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
+                  frame->width * frame->height);
+    } else {
+      temp_frame->format = AVPixelFormat::AV_PIX_FMT_RGBA;
+      temp_frame->width = frame->width;
+      temp_frame->height = frame->height;
+      return_value = av_frame_get_buffer(temp_frame, 0);
+      if (return_value != 0) {
+        std::cout << "ERROR: Failed to init temp_frame for conversion from RGBA"
+                  << std::endl;
+        av_frame_free(&temp_frame);
+        return {false, {}};
+      }
+      std::memcpy(temp_frame->data[0], dithered_image->data_.data(),
+                  4 * frame->width * frame->height);
+    }
+
+    AVFrame *yuv_frame = av_frame_alloc();
+    if (frame == nullptr) {
+      std::cout
+          << "ERROR: Failed to alloc AVFrame for receiving YUV444p from RGBA"
+          << std::endl;
+      av_frame_free(&temp_frame);
+      return {false, {}};
+    }
+    yuv_frame->format = AVPixelFormat::AV_PIX_FMT_YUV444P;
+    yuv_frame->width = frame->width;
+    yuv_frame->height = frame->height;
+    return_value = av_frame_get_buffer(yuv_frame, 0);
+
+    return_value =
+        sws_scale(sws_enc_context_, temp_frame->data, temp_frame->linesize, 0,
+                  frame->height, yuv_frame->data, yuv_frame->linesize);
+    if (return_value <= 0) {
+      std::cout << "ERROR: Failed to convert RGBA to YUV444p with sws_scale"
+                << std::endl;
+      av_frame_free(&yuv_frame);
+      av_frame_free(&temp_frame);
+      return {false, {}};
     }
-    // TODO encode video with dithered_image
 
     // cleanup
-    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; ++i) {
-      free(dst[i]);
+    av_frame_free(&temp_frame);
+    yuv_frame->pts = frame_count_ - 1;
+    yuv_frame->pkt_duration = 1;
+    return_frames.push_back(yuv_frame);
+  }
+
+  return {true, return_frames};
+}
+
+bool Video::HandleEncodingFrame(AVFormatContext *enc_format_ctx,
+                                AVCodecContext *enc_codec_ctx,
+                                AVFrame *yuv_frame, AVStream *video_stream) {
+  int return_value;
+
+  return_value = avcodec_send_frame(enc_codec_ctx, yuv_frame);
+  if (return_value < 0) {
+    std::cout << "ERROR: Failed to send frame to encoder" << std::endl;
+    return false;
+  }
+
+  AVPacket pkt;
+  std::memset(&pkt, 0, sizeof(AVPacket));
+  while (return_value >= 0) {
+    std::memset(&pkt, 0, sizeof(AVPacket));
+
+    return_value = avcodec_receive_packet(enc_codec_ctx, &pkt);
+    if (return_value == AVERROR(EAGAIN) || return_value == AVERROR_EOF) {
+      break;
+    } else if (return_value < 0) {
+      std::cout << "ERROR: Failed to encode a frame" << std::endl;
+      return false;
+    }
+
+    // rescale timing fields (timestamps / durations)
+    av_packet_rescale_ts(&pkt, enc_codec_ctx->time_base,
+                         video_stream->time_base);
+    pkt.stream_index = video_stream->index;
+
+    // write frame
+    return_value = av_interleaved_write_frame(enc_format_ctx, &pkt);
+    av_packet_unref(&pkt);
+    if (return_value < 0) {
+      std::cout << "ERROR: Failed to write encoding packet" << std::endl;
+      return false;
     }
   }
 
diff --git a/src/video.h b/src/video.h
index 3e6c2e7..ebfc7d0 100644
--- a/src/video.h
+++ b/src/video.h
@@ -1,8 +1,11 @@
 #ifndef IGPUP_DITHERING_PROJECT_VIDEO_H_
 #define IGPUP_DITHERING_PROJECT_VIDEO_H_
 
+#include <tuple>
+
 extern "C" {
 #include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
 #include <libswscale/swscale.h>
 }
 
@@ -13,6 +16,8 @@ constexpr unsigned int kReadBufPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE;
 constexpr unsigned int kReadBufSizeWithPadding =
     kReadBufSize + kReadBufPaddingSize;
 
+constexpr unsigned int kOutputBitrate = 40000000;
+
 /*!
  * \brief Helper class that uses Image and OpenCLHandle to dither video frames.
  *
@@ -35,30 +40,39 @@ class Video {
 
   /// Same as DitherVideo(const std::string&, Image*, bool, bool)
   bool DitherVideo(const char *output_filename, Image *blue_noise,
-                   bool grayscale = false, bool overwrite = false);
+                   bool grayscale = false, bool overwrite = false,
+                   bool output_as_pngs = false);
 
   /*!
    * \brief Dithers the frames in the input video.
    *
-   * Currently, the program doesn't create the output video, but instead outputs
-   * each frame as an individual image in the current directory. If things go
-   * well, the expected behavior will be implemented soon.
+   * If output_as_pngs is true, then the output will be individaul PNGs of each
+   * frame instead of a video file. This may be desireable because the output
+   * video struggles to maintain video quality.
    *
    * \return True on success.
    */
   bool DitherVideo(const std::string &output_filename, Image *blue_noise,
-                   bool grayscale = false, bool overwrite = false);
+                   bool grayscale = false, bool overwrite = false,
+                   bool output_as_pngs = false);
 
  private:
   Image image_;
   std::string input_filename_;
-  SwsContext *sws_context_;
+  SwsContext *sws_dec_context_;
+  SwsContext *sws_enc_context_;
   unsigned int frame_count_;
   unsigned int packet_count_;
+  bool was_grayscale_;
 
-  bool HandleDecodingPacket(AVCodecContext *codec_ctx, AVPacket *pkt,
-                            AVFrame *frame, Image *blue_noise, bool grayscale,
-                            bool overwrite);
+  std::tuple<bool, std::vector<AVFrame *>> HandleDecodingPacket(
+      AVCodecContext *codec_ctx, AVPacket *pkt, AVFrame *frame,
+      Image *blue_noise, bool grayscale, bool color_changed,
+      bool output_as_pngs);
+
+  bool HandleEncodingFrame(AVFormatContext *enc_format_ctx,
+                           AVCodecContext *enc_codec_ctx, AVFrame *yuv_frame,
+                           AVStream *video_stream);
 };
 
 #endif