Merge "Move buffer from extend_and_predict into TileWorkerData"
diff --git a/test/simple_encode_test.cc b/test/simple_encode_test.cc
new file mode 100644
index 0000000..82d0963
--- /dev/null
+++ b/test/simple_encode_test.cc
@@ -0,0 +1,112 @@
+#include <math.h>
+#include <memory>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vp9/simple_encode.h"
+
+namespace vp9 {
+namespace {
+
+// TODO(angirbid): Find a better way to construct encode info
+const int w = 352;
+const int h = 288;
+const int frame_rate_num = 30;
+const int frame_rate_den = 1;
+const int target_bitrate = 1000;
+const int num_frames = 17;
+const char infile_path[] = "bus_352x288_420_f20_b8.yuv";
+
+double GetBitrateInKbps(size_t bit_size, int num_frames, int frame_rate_num,
+                        int frame_rate_den) {
+  return static_cast<double>(bit_size) / num_frames * frame_rate_num /
+         frame_rate_den / 1000.;
+}
+
+TEST(SimpleEncode, ComputeFirstPassStats) {
+  SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den,
+                             target_bitrate, num_frames, infile_path);
+  simple_encode.ComputeFirstPassStats();
+  std::vector<std::vector<double>> frame_stats =
+      simple_encode.ObserveFirstPassStats();
+  EXPECT_EQ(frame_stats.size(), static_cast<size_t>(num_frames));
+  size_t data_num = frame_stats[0].size();
+  // Read ObserveFirstPassStats before changing FIRSTPASS_STATS.
+  EXPECT_EQ(data_num, static_cast<size_t>(25));
+  for (size_t i = 0; i < frame_stats.size(); ++i) {
+    EXPECT_EQ(frame_stats[i].size(), data_num);
+    // FIRSTPASS_STATS's first element is frame
+    EXPECT_EQ(frame_stats[i][0], i);
+    // FIRSTPASS_STATS's last element is count, and the count is 1 for single
+    // frame stats
+    EXPECT_EQ(frame_stats[i][data_num - 1], 1);
+  }
+}
+
+TEST(SimpleEncode, GetCodingFrameNum) {
+  SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den,
+                             target_bitrate, num_frames, infile_path);
+  simple_encode.ComputeFirstPassStats();
+  int num_coding_frames = simple_encode.GetCodingFrameNum();
+  EXPECT_EQ(num_coding_frames, 19);
+}
+
+TEST(SimpleEncode, EncodeFrame) {
+  SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den,
+                             target_bitrate, num_frames, infile_path);
+  simple_encode.ComputeFirstPassStats();
+  int num_coding_frames = simple_encode.GetCodingFrameNum();
+  EXPECT_GE(num_coding_frames, num_frames);
+  // The coding frames include actual show frames and alternate reference
+  // frames, i.e. no show frame.
+  int ref_num_alternate_refereces = num_coding_frames - num_frames;
+  int num_alternate_refereces = 0;
+  simple_encode.StartEncode();
+  size_t total_data_bit_size = 0;
+  for (int i = 0; i < num_coding_frames; ++i) {
+    EncodeFrameResult encode_frame_result;
+    simple_encode.EncodeFrame(&encode_frame_result);
+    if (i == 0) {
+      EXPECT_EQ(encode_frame_result.show_idx, 0);
+      EXPECT_EQ(encode_frame_result.frame_type, kKeyFrame)
+          << "The first coding frame should be key frame";
+    }
+    if (encode_frame_result.frame_type == kAlternateReference) {
+      ++num_alternate_refereces;
+    }
+    EXPECT_GE(encode_frame_result.show_idx, 0);
+    EXPECT_LT(encode_frame_result.show_idx, num_frames);
+    if (i == num_coding_frames - 1) {
+      EXPECT_EQ(encode_frame_result.show_idx, num_frames - 1)
+          << "The last coding frame should be the last display order";
+    }
+    EXPECT_GE(encode_frame_result.psnr, 34)
+        << "The psnr is supposed to be greater than 34 given the "
+           "target_bitrate 1000 kbps";
+    total_data_bit_size += encode_frame_result.coding_data_bit_size;
+  }
+  EXPECT_EQ(num_alternate_refereces, ref_num_alternate_refereces);
+  const double bitrate = GetBitrateInKbps(total_data_bit_size, num_frames,
+                                          frame_rate_num, frame_rate_den);
+  const double off_target_threshold = 150;
+  EXPECT_LE(fabs(target_bitrate - bitrate), off_target_threshold);
+  simple_encode.EndEncode();
+}
+
+TEST(SimpleEncode, EncodeFrameWithQuantizeIndex) {
+  SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den,
+                             target_bitrate, num_frames, infile_path);
+  simple_encode.ComputeFirstPassStats();
+  int num_coding_frames = simple_encode.GetCodingFrameNum();
+  simple_encode.StartEncode();
+  for (int i = 0; i < num_coding_frames; ++i) {
+    const int assigned_quantize_index = 100 + i;
+    EncodeFrameResult encode_frame_result;
+    simple_encode.EncodeFrameWithQuantizeIndex(&encode_frame_result,
+                                               assigned_quantize_index);
+    EXPECT_EQ(encode_frame_result.quantize_index, assigned_quantize_index);
+  }
+  simple_encode.EndEncode();
+}
+}  // namespace
+
+}  // namespace vp9
diff --git a/test/test-data.mk b/test/test-data.mk
index 27a9557..905f013 100644
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -26,6 +26,7 @@
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv
+LIBVPX_TEST_DATA-$(CONFIG_RATE_CTRL) += bus_352x288_420_f20_b8.yuv
 
 # Test vectors
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
diff --git a/test/test-data.sha1 b/test/test-data.sha1
index 88f1e10..8f0084c 100644
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -1,3 +1,4 @@
+3eaf216d9fc8b4b9bb8c3956311f49a85974806c *bus_352x288_420_f20_b8.yuv
 d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv
 b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
 76024eb753cdac6a5e5703aaea189d35c3c30ac7 *invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf
diff --git a/test/test.mk b/test/test.mk
index 88bd2a3..b4a5ea0 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -58,6 +58,7 @@
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.h
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_end_to_end_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += timestamp_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_RATE_CTRL)   += simple_encode_test.cc
 
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 3c7db4a..8ca7eb6 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -460,8 +460,8 @@
 #endif  // !CONFIG_REALTIME_ONLY
 
 // Test for whether to calculate metrics for the frame.
-static int is_psnr_calc_enabled(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
+static int is_psnr_calc_enabled(const VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
 
   return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame;
@@ -1468,7 +1468,7 @@
   }
 }
 
-static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
+static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
 
   cpi->oxcf = *oxcf;
@@ -2171,7 +2171,100 @@
   }
 }
 
-VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
+                                 int subsampling_x, int subsampling_y) {
+  VP9_COMMON *const cm = &cpi->common;
+#if !CONFIG_VP9_HIGHBITDEPTH
+  (void)use_highbitdepth;
+  assert(use_highbitdepth == 0);
+#endif
+
+  if (!cpi->initial_width ||
+#if CONFIG_VP9_HIGHBITDEPTH
+      cm->use_highbitdepth != use_highbitdepth ||
+#endif
+      cm->subsampling_x != subsampling_x ||
+      cm->subsampling_y != subsampling_y) {
+    cm->subsampling_x = subsampling_x;
+    cm->subsampling_y = subsampling_y;
+#if CONFIG_VP9_HIGHBITDEPTH
+    cm->use_highbitdepth = use_highbitdepth;
+#endif
+
+    cpi->initial_width = cm->width;
+    cpi->initial_height = cm->height;
+    cpi->initial_mbs = cm->MBs;
+  }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt,
+                                              unsigned int *subsampling_x,
+                                              unsigned int *subsampling_y) {
+  switch (fmt) {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_I422:
+    case VPX_IMG_FMT_I42016:
+    case VPX_IMG_FMT_I42216: *subsampling_x = 1; break;
+    default: *subsampling_x = 0; break;
+  }
+
+  switch (fmt) {
+    case VPX_IMG_FMT_I420:
+    case VPX_IMG_FMT_I440:
+    case VPX_IMG_FMT_YV12:
+    case VPX_IMG_FMT_I42016:
+    case VPX_IMG_FMT_I44016: *subsampling_y = 1; break;
+    default: *subsampling_y = 0; break;
+  }
+}
+
+// TODO(angiebird): Check whether we can move this function to vpx_image.c
+static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) {
+  return fmt & VPX_IMG_FMT_HIGHBITDEPTH;
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+static void setup_denoiser_buffer(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  if (cpi->oxcf.noise_sensitivity > 0 &&
+      !cpi->denoiser.frame_buffer_initialized) {
+    if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
+                           cpi->oxcf.noise_sensitivity, cm->width, cm->height,
+                           cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                           cm->use_highbitdepth,
+#endif
+                           VP9_ENC_BORDER_IN_PIXELS))
+      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                         "Failed to allocate denoiser");
+  }
+}
+#endif
+
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) {
+  const VP9EncoderConfig *oxcf = &cpi->oxcf;
+  unsigned int subsampling_x, subsampling_y;
+  const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt);
+  vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y);
+
+  update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+  setup_denoiser_buffer(cpi);
+#endif
+
+  assert(cpi->lookahead == NULL);
+  cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x,
+                                      subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+                                      use_highbitdepth,
+#endif
+                                      oxcf->lag_in_frames);
+  alloc_raw_frame_buffers(cpi);
+}
+
+VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
                                 BufferPool *const pool) {
   unsigned int i;
   VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
@@ -2554,6 +2647,10 @@
 
   cm->error.setjmp = 0;
 
+#if CONFIG_RATE_CTRL
+  encode_command_init(&cpi->encode_command);
+#endif
+
   return cpi;
 }
 
@@ -2761,30 +2858,19 @@
 #endif
 }
 
-static void generate_psnr_packet(VP9_COMP *cpi) {
-  struct vpx_codec_cx_pkt pkt;
-  int i;
-  PSNR_STATS psnr;
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) {
+  if (is_psnr_calc_enabled(cpi)) {
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr,
-                       cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
+    vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr,
+                         cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);
 #else
-  vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, &psnr);
+    vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr);
 #endif
-
-  for (i = 0; i < 4; ++i) {
-    pkt.data.psnr.samples[i] = psnr.samples[i];
-    pkt.data.psnr.sse[i] = psnr.sse[i];
-    pkt.data.psnr.psnr[i] = psnr.psnr[i];
+    return 1;
+  } else {
+    vp9_zero(*psnr);
+    return 0;
   }
-  pkt.kind = VPX_CODEC_PSNR_PKT;
-  if (cpi->use_svc)
-    cpi->svc
-        .layer_context[cpi->svc.spatial_layer_id *
-                       cpi->svc.number_temporal_layers]
-        .psnr_pkt = pkt.data.psnr;
-  else
-    vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
 }
 
 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) {
@@ -3620,24 +3706,6 @@
 #endif  // CONFIG_VP9_POSTPROC
 }
 
-#if CONFIG_VP9_TEMPORAL_DENOISING
-static void setup_denoiser_buffer(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if (cpi->oxcf.noise_sensitivity > 0 &&
-      !cpi->denoiser.frame_buffer_initialized) {
-    if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
-                           cpi->oxcf.noise_sensitivity, cm->width, cm->height,
-                           cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
-                           cm->use_highbitdepth,
-#endif
-                           VP9_ENC_BORDER_IN_PIXELS))
-      vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
-                         "Failed to allocate denoiser");
-  }
-}
-#endif
-
 static void init_motion_estimation(VP9_COMP *cpi) {
   int y_stride = cpi->scaled_source.y_stride;
 
@@ -4218,6 +4286,14 @@
       vp9_scale_references(cpi);
     }
 
+#if CONFIG_RATE_CTRL
+    // TODO(angiebird): This is a hack for making sure the encoder use the
+    // external_quantize_index exactly. Avoid this kind of hack later.
+    if (cpi->encode_command.use_external_quantize_index) {
+      q = cpi->encode_command.external_quantize_index;
+    }
+#endif
+
     vp9_set_quantizer(cm, q);
 
     if (loop_count == 0) setup_frame(cpi);
@@ -4256,6 +4332,16 @@
       if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
     }
 
+#if CONFIG_RATE_CTRL
+    // This part needs to be after save_coding_context() because
+    // restore_coding_context may be called in the end of this function.
+    // TODO(angiebird): This is a hack for making sure the encoder use the
+    // external_quantize_index exactly. Avoid this kind of hack later.
+    if (cpi->encode_command.use_external_quantize_index) {
+      break;
+    }
+#endif
+
     if (oxcf->rc_mode == VPX_Q) {
       loop = 0;
     } else {
@@ -4799,17 +4885,6 @@
   }
 }
 
-// Implementation and modifications of C. Yeo, H. L. Tan, and Y. H. Tan, "On
-// rate distortion optimization using SSIM," Circuits and Systems for Video
-// Technology, IEEE Transactions on, vol. 23, no. 7, pp. 1170-1181, 2013.
-// SSIM_VAR_SCALE defines the strength of the bias towards SSIM in RDO.
-// Some sample values are:
-// (for midres test set)
-// SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
-//      8.0          9.421   -5.537  -6.898
-//     16.0          4.703   -5.378  -6.238
-//     32.0          1.929   -4.308  -4.807
-#define SSIM_VAR_SCALE 16.0
 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
   ThreadData *td = &cpi->td;
@@ -4826,8 +4901,6 @@
   double log_sum = 0.0;
   int row, col;
 
-  const double c2 = 58.5225 * SSIM_VAR_SCALE;  // 58.5225 = (.03*255)^2
-
   // Loop through each 64x64 block.
   for (row = 0; row < num_rows; ++row) {
     for (col = 0; col < num_cols; ++col) {
@@ -4861,7 +4934,10 @@
         }
       }
       var = var / num_of_var / 64.0;
-      var = 2.0 * var + c2;
+
+      // Curve fitting with an exponential model on all 16x16 blocks from the
+      // Midres dataset.
+      var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222;
       cpi->mi_ssim_rdmult_scaling_factors[index] = var;
       log_sum += log(var);
     }
@@ -5310,37 +5386,9 @@
   mismatch_move_frame_idx_w();
 #endif
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
-
-  vp9_twopass_postencode_update(cpi);
 }
 #endif  // !CONFIG_REALTIME_ONLY
 
-static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth,
-                                 int subsampling_x, int subsampling_y) {
-  VP9_COMMON *const cm = &cpi->common;
-#if !CONFIG_VP9_HIGHBITDEPTH
-  (void)use_highbitdepth;
-  assert(use_highbitdepth == 0);
-#endif
-
-  if (!cpi->initial_width ||
-#if CONFIG_VP9_HIGHBITDEPTH
-      cm->use_highbitdepth != use_highbitdepth ||
-#endif
-      cm->subsampling_x != subsampling_x ||
-      cm->subsampling_y != subsampling_y) {
-    cm->subsampling_x = subsampling_x;
-    cm->subsampling_y = subsampling_y;
-#if CONFIG_VP9_HIGHBITDEPTH
-    cm->use_highbitdepth = use_highbitdepth;
-#endif
-
-    cpi->initial_width = cm->width;
-    cpi->initial_height = cm->height;
-    cpi->initial_mbs = cm->MBs;
-  }
-}
-
 int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
                           int64_t end_time) {
@@ -5365,10 +5413,7 @@
   vpx_usec_timer_start(&timer);
 
   if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
-#if CONFIG_VP9_HIGHBITDEPTH
-                         use_highbitdepth,
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-                         frame_flags))
+                         use_highbitdepth, frame_flags))
     res = -1;
   vpx_usec_timer_mark(&timer);
   cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
@@ -7052,9 +7097,39 @@
 #endif  // CONFIG_NON_GREEDY_MV
 }
 
+static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
+  encode_frame_result->show_idx = -1;  // Actual encoding deosn't happen.
+}
+
+#if !CONFIG_REALTIME_ONLY
+static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result,
+                                       int show_idx,
+                                       FRAME_UPDATE_TYPE update_type,
+                                       const YV12_BUFFER_CONFIG *source_frame,
+                                       const YV12_BUFFER_CONFIG *coded_frame,
+                                       int quantize_index, uint32_t bit_depth,
+                                       uint32_t input_bit_depth) {
+  PSNR_STATS psnr;
+#if CONFIG_VP9_HIGHBITDEPTH
+  vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
+                       input_bit_depth);
+#else
+  (void)bit_depth;
+  (void)input_bit_depth;
+  vpx_calc_psnr(source_frame, coded_frame, &psnr);
+#endif
+  encode_frame_result->psnr = psnr.psnr[0];
+  encode_frame_result->sse = psnr.sse[0];
+  encode_frame_result->show_idx = show_idx;
+  encode_frame_result->update_type = update_type;
+  encode_frame_result->quantize_index = quantize_index;
+}
+#endif  // !CONFIG_REALTIME_ONLY
+
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush) {
+                            int64_t *time_end, int flush,
+                            ENCODE_FRAME_RESULT *encode_frame_result) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   VP9_COMMON *const cm = &cpi->common;
   BufferPool *const pool = cm->buffer_pool;
@@ -7066,6 +7141,7 @@
   int arf_src_index;
   const int gf_group_index = cpi->twopass.gf_group.index;
   int i;
+  init_encode_frame_result(encode_frame_result);
 
   if (is_one_pass_cbr_svc(cpi)) {
     vp9_one_pass_cbr_svc_start_layer(cpi);
@@ -7201,12 +7277,6 @@
     *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
   } else {
     *size = 0;
-#if !CONFIG_REALTIME_ONLY
-    if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
-      vp9_end_first_pass(cpi); /* get last stats packet */
-      cpi->twopass.first_pass_done = 1;
-    }
-#endif  // !CONFIG_REALTIME_ONLY
     return -1;
   }
 
@@ -7333,6 +7403,25 @@
     vp9_first_pass(cpi, source);
   } else if (oxcf->pass == 2 && !cpi->use_svc) {
     Pass2Encode(cpi, size, dest, frame_flags);
+    // update_encode_frame_result() depends on twopass.gf_group.index and
+    // cm->new_fb_idx and cpi->Source are updated for current properly and have
+    // not been updated for the next frame yet.
+    // The update locations are as follows.
+    // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero()
+    // for the first frame in the gf_group and is updated for the next frame at
+    // vp9_twopass_postencode_update().
+    // 2) cpi->Source is updated at the beginging of this function, i.e.
+    // vp9_get_compressed_data()
+    // 3) cm->new_fb_idx is updated at the beginging of this function by
+    // get_free_fb(cm)
+    // TODO(angiebird): Improve the codebase to make the update of frame
+    // dependent variables more robust.
+    update_encode_frame_result(
+        encode_frame_result, source->show_idx,
+        cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+        cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi),
+        cpi->oxcf.input_bit_depth, cm->bit_depth);
+    vp9_twopass_postencode_update(cpi);
   } else if (cpi->use_svc) {
     SvcEncode(cpi, size, dest, frame_flags);
   } else {
@@ -7365,9 +7454,6 @@
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 
-  // Should we calculate metrics for the frame.
-  if (is_psnr_calc_enabled(cpi)) generate_psnr_packet(cpi);
-
   if (cpi->keep_level_stats && oxcf->pass != 1)
     update_level_info(cpi, size, arf_src_index);
 
@@ -7641,7 +7727,7 @@
   return;
 }
 
-int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; }
+int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; }
 
 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
   if (flags &
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 0de47e6..0a8623e 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -20,8 +20,10 @@
 #include "vpx_dsp/ssim.h"
 #endif
 #include "vpx_dsp/variance.h"
+#include "vpx_dsp/psnr.h"
 #include "vpx_ports/system_state.h"
 #include "vpx_util/vpx_thread.h"
+#include "vpx_util/vpx_timestamp.h"
 
 #include "vp9/common/vp9_alloccommon.h"
 #include "vp9/common/vp9_ppflags.h"
@@ -152,7 +154,10 @@
   int height;                    // height of data passed to the compressor
   unsigned int input_bit_depth;  // Input bit depth.
   double init_framerate;         // set to passed in framerate
-  int64_t target_bandwidth;      // bandwidth to be used in bits per second
+  vpx_rational_t g_timebase;  // equivalent to g_timebase in vpx_codec_enc_cfg_t
+  vpx_rational64_t g_timebase_in_ts;  // g_timebase * TICKS_PER_SEC
+
+  int64_t target_bandwidth;  // bandwidth to be used in bits per second
 
   int noise_sensitivity;  // pre processing blur: recommendation 0
   int sharpness;          // sharpening output: recommendation 0:
@@ -259,7 +264,6 @@
   unsigned int target_level;
 
   vpx_fixed_buf_t two_pass_stats_in;
-  struct vpx_codec_pkt_list *output_pkt_list;
 
 #if CONFIG_FP_MB_STATS
   vpx_fixed_buf_t firstpass_mb_stats_in;
@@ -512,6 +516,31 @@
   int group_idx;
 } KMEANS_DATA;
 
+#if CONFIG_RATE_CTRL
+typedef struct ENCODE_COMMAND {
+  int use_external_quantize_index;
+  int external_quantize_index;
+} ENCODE_COMMAND;
+
+static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) {
+  vp9_zero(*encode_command);
+  encode_command->use_external_quantize_index = 0;
+  encode_command->external_quantize_index = -1;
+}
+
+static INLINE void encode_command_set_external_quantize_index(
+    ENCODE_COMMAND *encode_command, int quantize_index) {
+  encode_command->use_external_quantize_index = 1;
+  encode_command->external_quantize_index = quantize_index;
+}
+
+static INLINE void encode_command_reset_external_quantize_index(
+    ENCODE_COMMAND *encode_command) {
+  encode_command->use_external_quantize_index = 0;
+  encode_command->external_quantize_index = -1;
+}
+#endif  // CONFIG_RATE_CTRL
+
 typedef struct VP9_COMP {
   FRAME_INFO frame_info;
   QUANTS quants;
@@ -816,11 +845,23 @@
 
   int multi_layer_arf;
   vpx_roi_map_t roi;
+#if CONFIG_RATE_CTRL
+  ENCODE_COMMAND encode_command;
+#endif
 } VP9_COMP;
 
+typedef struct ENCODE_FRAME_RESULT {
+  int show_idx;
+  FRAME_UPDATE_TYPE update_type;
+  double psnr;
+  uint64_t sse;
+  int quantize_index;
+} ENCODE_FRAME_RESULT;
+
 void vp9_initialize_enc(void);
 
-struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
+void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt);
+struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
                                        BufferPool *const pool);
 void vp9_remove_compressor(VP9_COMP *cpi);
 
@@ -834,7 +875,8 @@
 
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest, int64_t *time_stamp,
-                            int64_t *time_end, int flush);
+                            int64_t *time_end, int flush,
+                            ENCODE_FRAME_RESULT *encode_frame_result);
 
 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
                               vp9_ppflags_t *flags);
@@ -886,7 +928,7 @@
   for (idx = 0; idx < length; ++idx) stack[idx] = -1;
 }
 
-int vp9_get_quantizer(struct VP9_COMP *cpi);
+int vp9_get_quantizer(const VP9_COMP *cpi);
 
 static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
@@ -1059,6 +1101,8 @@
 
 void vp9_set_row_mt(VP9_COMP *cpi);
 
+int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr);
+
 #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
 
 #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index ee2294f..acc4be3 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -84,14 +84,8 @@
   return 1;
 }
 
-static void output_stats(FIRSTPASS_STATS *stats,
-                         struct vpx_codec_pkt_list *pktlist) {
-  struct vpx_codec_cx_pkt pkt;
-  pkt.kind = VPX_CODEC_STATS_PKT;
-  pkt.data.twopass_stats.buf = stats;
-  pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
-  vpx_codec_pkt_list_add(pktlist, &pkt);
-
+static void output_stats(FIRSTPASS_STATS *stats) {
+  (void)stats;
 // TEMP debug code
 #if OUTPUT_FPF
   {
@@ -319,7 +313,8 @@
 }
 
 void vp9_end_first_pass(VP9_COMP *cpi) {
-  output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
+  output_stats(&cpi->twopass.total_stats);
+  cpi->twopass.first_pass_done = 1;
   vpx_free(cpi->twopass.fp_mb_float_stats);
   cpi->twopass.fp_mb_float_stats = NULL;
 }
@@ -1428,7 +1423,7 @@
 
     // Don't want to do output stats with a stack variable!
     twopass->this_frame_stats = fps;
-    output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
+    output_stats(&twopass->this_frame_stats);
     accumulate_stats(&twopass->total_stats, &fps);
 
 #if CONFIG_FP_MB_STATS
@@ -3690,3 +3685,10 @@
   return coding_frame_num;
 }
 #endif
+
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) {
+  return twopass->this_frame_stats;
+}
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) {
+  return twopass->total_stats;
+}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index b1bdf17..408ff3a 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -256,6 +256,9 @@
                              int multi_layer_arf, int allow_alt_ref);
 #endif
 
+FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass);
+FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index 392cd5d..97838c3 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -64,6 +64,7 @@
     unsigned int i;
     ctx->max_sz = depth;
     ctx->buf = calloc(depth, sizeof(*ctx->buf));
+    ctx->next_show_idx = 0;
     if (!ctx->buf) goto bail;
     for (i = 0; i < depth; i++)
       if (vpx_alloc_frame_buffer(
@@ -81,12 +82,16 @@
 }
 
 #define USE_PARTIAL_COPY 0
+int vp9_lookahead_full(const struct lookahead_ctx *ctx) {
+  return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz;
+}
+
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) {
+  return ctx->next_show_idx;
+}
 
 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
-                       int use_highbitdepth,
-#endif
+                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
                        vpx_enc_frame_flags_t flags) {
   struct lookahead_entry *buf;
 #if USE_PARTIAL_COPY
@@ -101,8 +106,12 @@
   int subsampling_x = src->subsampling_x;
   int subsampling_y = src->subsampling_y;
   int larger_dimensions, new_dimensions;
+#if !CONFIG_VP9_HIGHBITDEPTH
+  (void)use_highbitdepth;
+  assert(use_highbitdepth == 0);
+#endif
 
-  if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1;
+  if (vp9_lookahead_full(ctx)) return 1;
   ctx->sz++;
   buf = pop(ctx, &ctx->write_idx);
 
@@ -184,6 +193,8 @@
   buf->ts_start = ts_start;
   buf->ts_end = ts_end;
   buf->flags = flags;
+  buf->show_idx = ctx->next_show_idx;
+  ++ctx->next_show_idx;
   return 0;
 }
 
diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h
index c627bed..dbbe3af 100644
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -25,6 +25,7 @@
   YV12_BUFFER_CONFIG img;
   int64_t ts_start;
   int64_t ts_end;
+  int show_idx; /*The show_idx of this frame*/
   vpx_enc_frame_flags_t flags;
 };
 
@@ -32,10 +33,12 @@
 #define MAX_PRE_FRAMES 1
 
 struct lookahead_ctx {
-  int max_sz;                  /* Absolute size of the queue */
-  int sz;                      /* Number of buffers currently in the queue */
-  int read_idx;                /* Read index */
-  int write_idx;               /* Write index */
+  int max_sz;        /* Absolute size of the queue */
+  int sz;            /* Number of buffers currently in the queue */
+  int read_idx;      /* Read index */
+  int write_idx;     /* Write index */
+  int next_show_idx; /* The show_idx that will be assigned to the next frame
+                        being pushed in the queue*/
   struct lookahead_entry *buf; /* Buffer list */
 };
 
@@ -57,6 +60,23 @@
  */
 void vp9_lookahead_destroy(struct lookahead_ctx *ctx);
 
+/**\brief Check if lookahead is full
+ *
+ * \param[in] ctx         Pointer to the lookahead context
+ *
+ * Return 1 if lookahead is full, otherwise return 0.
+ */
+int vp9_lookahead_full(const struct lookahead_ctx *ctx);
+
+/**\brief Return the next_show_idx
+ *
+ * \param[in] ctx         Pointer to the lookahead context
+ *
+ * Return the show_idx that will be assigned to the next
+ * frame pushed by vp9_lookahead_push()
+ */
+int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx);
+
 /**\brief Enqueue a source buffer
  *
  * This function will copy the source image into a new framebuffer with
@@ -73,10 +93,7 @@
  * \param[in] active_map  Map that specifies which macroblock is active
  */
 int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
-                       int64_t ts_start, int64_t ts_end,
-#if CONFIG_VP9_HIGHBITDEPTH
-                       int use_highbitdepth,
-#endif
+                       int64_t ts_start, int64_t ts_end, int use_highbitdepth,
                        vpx_enc_frame_flags_t flags);
 
 /**\brief Get the next source buffer to encode
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 4b20963..f1ba779 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -58,7 +58,6 @@
   int gold_ref_idx;
   int has_alt_frame;
   size_t layer_size;
-  struct vpx_psnr_pkt psnr_pkt;
   // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame.
   // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct.
   int sb_index;
diff --git a/vp9/simple_encode.cc b/vp9/simple_encode.cc
new file mode 100644
index 0000000..6bf55c9
--- /dev/null
+++ b/vp9/simple_encode.cc
@@ -0,0 +1,306 @@
+#include <vector>
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/vp9_iface_common.h"
+#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/simple_encode.h"
+#include "vp9/vp9_cx_iface.h"
+
+namespace vp9 {
+
+// TODO(angiebird): Merge this function with vpx_img_plane_width()
+static int img_plane_width(const vpx_image_t *img, int plane) {
+  if (plane > 0 && img->x_chroma_shift > 0)
+    return (img->d_w + 1) >> img->x_chroma_shift;
+  else
+    return img->d_w;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_plane_height()
+static int img_plane_height(const vpx_image_t *img, int plane) {
+  if (plane > 0 && img->y_chroma_shift > 0)
+    return (img->d_h + 1) >> img->y_chroma_shift;
+  else
+    return img->d_h;
+}
+
+// TODO(angiebird): Merge this function with vpx_img_read()
+static int img_read(vpx_image_t *img, FILE *file) {
+  int plane;
+
+  for (plane = 0; plane < 3; ++plane) {
+    unsigned char *buf = img->planes[plane];
+    const int stride = img->stride[plane];
+    const int w = img_plane_width(img, plane) *
+                  ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1);
+    const int h = img_plane_height(img, plane);
+    int y;
+
+    for (y = 0; y < h; ++y) {
+      if (fread(buf, 1, w, file) != (size_t)w) return 0;
+      buf += stride;
+    }
+  }
+
+  return 1;
+}
+
+class SimpleEncode::EncodeImpl {
+ public:
+  VP9_COMP *cpi;
+  vpx_img_fmt_t img_fmt;
+  vpx_image_t tmp_img;
+  std::vector<FIRSTPASS_STATS> first_pass_stats;
+};
+
+static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf,
+                              vpx_img_fmt_t img_fmt) {
+  VP9_COMP *cpi;
+  BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool));
+  vp9_initialize_enc();
+  cpi = vp9_create_compressor(oxcf, buffer_pool);
+  vp9_update_compressor_with_img_fmt(cpi, img_fmt);
+  return cpi;
+}
+
+static void free_encoder(VP9_COMP *cpi) {
+  BufferPool *buffer_pool = cpi->common.buffer_pool;
+  vp9_remove_compressor(cpi);
+  // buffer_pool needs to be free after cpi because buffer_pool contains
+  // allocated buffers that will be free in vp9_remove_compressor()
+  vpx_free(buffer_pool);
+}
+
+static INLINE vpx_rational_t make_vpx_rational(int num, int den) {
+  vpx_rational_t v;
+  v.num = num;
+  v.den = den;
+  return v;
+}
+
+static INLINE FrameType
+get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) {
+  // TODO(angiebird): Figure out if we need frame type other than key frame,
+  // alternate reference and inter frame
+  switch (update_type) {
+    case KF_UPDATE: return kKeyFrame; break;
+    case ARF_UPDATE: return kAlternateReference; break;
+    default: return kInterFrame; break;
+  }
+}
+
+static void update_encode_frame_result(
+    EncodeFrameResult *encode_frame_result,
+    const ENCODE_FRAME_RESULT *encode_frame_info) {
+  encode_frame_result->coding_data_bit_size =
+      encode_frame_result->coding_data_byte_size * 8;
+  encode_frame_result->show_idx = encode_frame_info->show_idx;
+  encode_frame_result->frame_type =
+      get_frame_type_from_update_type(encode_frame_info->update_type);
+  encode_frame_result->psnr = encode_frame_info->psnr;
+  encode_frame_result->sse = encode_frame_info->sse;
+  encode_frame_result->quantize_index = encode_frame_info->quantize_index;
+}
+
+SimpleEncode::SimpleEncode(int frame_width, int frame_height,
+                           int frame_rate_num, int frame_rate_den,
+                           int target_bitrate, int num_frames,
+                           const char *infile_path) {
+  impl_ptr_ = std::unique_ptr<EncodeImpl>(new EncodeImpl());
+  frame_width_ = frame_width;
+  frame_height_ = frame_height;
+  frame_rate_num_ = frame_rate_num;
+  frame_rate_den_ = frame_rate_den;
+  target_bitrate_ = target_bitrate;
+  num_frames_ = num_frames;
+  // TODO(angirbid): Should we keep a file pointer here or keep the file_path?
+  file_ = fopen(infile_path, "r");
+  impl_ptr_->cpi = NULL;
+  impl_ptr_->img_fmt = VPX_IMG_FMT_I420;
+}
+
+void SimpleEncode::ComputeFirstPassStats() {
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  const VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_FIRST_PASS);
+  VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+  struct lookahead_ctx *lookahead = cpi->lookahead;
+  int i;
+  int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+  use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+  vpx_image_t img;
+  vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
+  rewind(file_);
+  impl_ptr_->first_pass_stats.clear();
+  for (i = 0; i < num_frames_; ++i) {
+    assert(!vp9_lookahead_full(lookahead));
+    if (img_read(&img, file_)) {
+      int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+      int64_t ts_start =
+          timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx);
+      int64_t ts_end =
+          timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1);
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(&img, &sd);
+      vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+      {
+        int64_t time_stamp;
+        int64_t time_end;
+        int flush = 1;  // Makes vp9_get_compressed_data process a frame
+        size_t size;
+        unsigned int frame_flags = 0;
+        ENCODE_FRAME_RESULT encode_frame_info;
+        // TODO(angiebird): Call vp9_first_pass directly
+        vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp,
+                                &time_end, flush, &encode_frame_info);
+        // vp9_get_compressed_data only generates first pass stats not
+        // compresses data
+        assert(size == 0);
+      }
+      impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
+    }
+  }
+  vp9_end_first_pass(cpi);
+  // TODO(angiebird): Store the total_stats apart form first_pass_stats
+  impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));
+  free_encoder(cpi);
+  rewind(file_);
+  vpx_img_free(&img);
+}
+
+std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
+  std::vector<std::vector<double>> output_stats;
+  // TODO(angiebird): This function make several assumptions of
+  // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the
+  // last one. 2) The last entry of first_pass_stats is the total_stats.
+  // Change the code structure, so that we don't have to make these assumptions
+
+  // Note the last entry of first_pass_stats is the total_stats, we don't need
+  // it.
+  for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) {
+    double *buf_start =
+        reinterpret_cast<double *>(&impl_ptr_->first_pass_stats[i]);
+    // We use - 1 here because the last member in FIRSTPASS_STATS is not double
+    double *buf_end =
+        buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) -
+        1;
+    std::vector<double> this_stats(buf_start, buf_end);
+    output_stats.push_back(this_stats);
+  }
+  return output_stats;
+}
+
+void SimpleEncode::StartEncode() {
+  assert(impl_ptr_->first_pass_stats.size() > 0);
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_LAST_PASS);
+  vpx_fixed_buf_t stats;
+  stats.buf = impl_ptr_->first_pass_stats.data();
+  stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) *
+             impl_ptr_->first_pass_stats.size();
+
+  vp9_set_first_pass_stats(&oxcf, &stats);
+  assert(impl_ptr_->cpi == NULL);
+  impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+  vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_,
+                frame_height_, 1);
+  rewind(file_);
+}
+
+void SimpleEncode::EndEncode() {
+  free_encoder(impl_ptr_->cpi);
+  impl_ptr_->cpi = nullptr;
+  vpx_img_free(&impl_ptr_->tmp_img);
+  rewind(file_);
+}
+
+void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) {
+  VP9_COMP *cpi = impl_ptr_->cpi;
+  struct lookahead_ctx *lookahead = cpi->lookahead;
+  int use_highbitdepth = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+  use_highbitdepth = cpi->common.use_highbitdepth;
+#endif
+  // The lookahead's size is set to oxcf->lag_in_frames.
+  // We want to fill lookahead to it's max capacity if possible so that the
+  // encoder can construct alt ref frame in time.
+  // In the other words, we hope vp9_get_compressed_data to encode a frame
+  // every time in the function
+  while (!vp9_lookahead_full(lookahead)) {
+    // TODO(angiebird): Check whether we can move this file read logics to
+    // lookahead
+    if (img_read(&impl_ptr_->tmp_img, file_)) {
+      int next_show_idx = vp9_lookahead_next_show_idx(lookahead);
+      int64_t ts_start =
+          timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx);
+      int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts,
+                                               next_show_idx + 1);
+      YV12_BUFFER_CONFIG sd;
+      image2yuvconfig(&impl_ptr_->tmp_img, &sd);
+      vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0);
+    } else {
+      break;
+    }
+  }
+  assert(encode_frame_result->coding_data.get() == nullptr);
+  const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3;
+  encode_frame_result->coding_data = std::move(
+      std::unique_ptr<uint8_t[]>(new uint8_t[max_coding_data_byte_size]));
+  int64_t time_stamp;
+  int64_t time_end;
+  int flush = 1;  // Make vp9_get_compressed_data encode a frame
+  unsigned int frame_flags = 0;
+  ENCODE_FRAME_RESULT encode_frame_info;
+  vp9_get_compressed_data(cpi, &frame_flags,
+                          &encode_frame_result->coding_data_byte_size,
+                          encode_frame_result->coding_data.get(), &time_stamp,
+                          &time_end, flush, &encode_frame_info);
+  // vp9_get_compressed_data is expected to encode a frame every time, so the
+  // data size should be greater than zero.
+  assert(encode_frame_result->coding_data_byte_size > 0);
+  assert(encode_frame_result->coding_data_byte_size <
+         max_coding_data_byte_size);
+
+  update_encode_frame_result(encode_frame_result, &encode_frame_info);
+}
+
+void SimpleEncode::EncodeFrameWithQuantizeIndex(
+    EncodeFrameResult *encode_frame_result, int quantize_index) {
+  encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command,
+                                             quantize_index);
+  EncodeFrame(encode_frame_result);
+  encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command);
+}
+
+int SimpleEncode::GetCodingFrameNum() {
+  assert(impl_ptr_->first_pass_stats.size() - 1 > 0);
+  // These are the default settings for now.
+  const int multi_layer_arf = 0;
+  const int allow_alt_ref = 1;
+  vpx_rational_t frame_rate =
+      make_vpx_rational(frame_rate_num_, frame_rate_den_);
+  const VP9EncoderConfig oxcf =
+      vp9_get_encoder_config(frame_width_, frame_height_, frame_rate,
+                             target_bitrate_, VPX_RC_LAST_PASS);
+  FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
+  FIRST_PASS_INFO first_pass_info;
+  fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(),
+                           num_frames_);
+  return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
+                                  multi_layer_arf, allow_alt_ref);
+}
+
+SimpleEncode::~SimpleEncode() {
+  if (this->file_ != NULL) {
+    fclose(this->file_);
+  }
+}
+
+}  // namespace vp9
diff --git a/vp9/simple_encode.h b/vp9/simple_encode.h
new file mode 100644
index 0000000..5c1bd20
--- /dev/null
+++ b/vp9/simple_encode.h
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_SIMPLE_ENCODE_H_
+#define VPX_VP9_SIMPLE_ENCODE_H_
+
+#include <cstdio>
+#include <memory>
+#include <vector>
+
+namespace vp9 {
+
+enum FrameType {
+  kKeyFrame = 0,
+  kInterFrame,
+  kAlternateReference,
+};
+
+struct EncodeFrameResult {
+  int show_idx;
+  FrameType frame_type;
+  size_t coding_data_bit_size;
+  size_t coding_data_byte_size;
+  // The EncodeFrame will allocate a buffer, write the coding data into the
+  // buffer and give the ownership of the buffer to coding_data.
+  std::unique_ptr<unsigned char[]> coding_data;
+  double psnr;
+  uint64_t sse;
+  int quantize_index;
+};
+
+class SimpleEncode {
+ public:
+  SimpleEncode(int frame_width, int frame_height, int frame_rate_num,
+               int frame_rate_den, int target_bitrate, int num_frames,
+               const char *infile_path);
+  ~SimpleEncode();
+  SimpleEncode(SimpleEncode &) = delete;
+  SimpleEncode &operator=(const SimpleEncode &) = delete;
+
+  // Makes encoder compute the first pass stats and store it internally for
+  // future encode.
+  void ComputeFirstPassStats();
+
+  // Outputs the first pass stats.
+  std::vector<std::vector<double>> ObserveFirstPassStats();
+
+  // Initializes the encoder for actual encoding.
+  // This funtion should be called after ComputeFirstPassStats().
+  void StartEncode();
+
+  // Frees the encoder.
+  // This funtion should be called after StartEncode() or EncodeFrame().
+  void EndEncode();
+
+  // Encodes a frame
+  // This funtion should be called after StartEncode() and before EndEncode().
+  void EncodeFrame(EncodeFrameResult *encode_frame_result);
+
+  // Encodes a frame with a specific quantize index.
+  // This funtion should be called after StartEncode() and before EndEncode().
+  void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result,
+                                    int quantize_index);
+
+  // Gets the number of coding frames for the video. The coding frames include
+  // show frame and no show frame.
+  // This funtion should be called after ComputeFirstPassStats().
+  int GetCodingFrameNum();
+
+ private:
+  class EncodeImpl;
+  int frame_width_;
+  int frame_height_;
+  int frame_rate_num_;
+  int frame_rate_den_;
+  int target_bitrate_;
+  int num_frames_;
+  std::FILE *file_;
+  std::unique_ptr<EncodeImpl> impl_ptr_;
+};
+
+}  // namespace vp9
+
+#endif  // VPX_VP9_SIMPLE_ENCODE_H_
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index c9a5566..5ef2f89 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -10,6 +10,7 @@
 
 VP9_COMMON_SRCS-yes += vp9_common.mk
 VP9_COMMON_SRCS-yes += vp9_iface_common.h
+VP9_COMMON_SRCS-yes += vp9_iface_common.c
 VP9_COMMON_SRCS-yes += common/vp9_ppflags.h
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c
 VP9_COMMON_SRCS-yes += common/vp9_blockd.c
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index e452be9..3f51708 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -13,6 +13,7 @@
 
 #include "./vpx_config.h"
 #include "vpx/vpx_encoder.h"
+#include "vpx_dsp/psnr.h"
 #include "vpx_ports/vpx_once.h"
 #include "vpx_ports/system_state.h"
 #include "vpx_util/vpx_timestamp.h"
@@ -21,7 +22,9 @@
 #include "vp9/encoder/vp9_encoder.h"
 #include "vpx/vp8cx.h"
 #include "vp9/common/vp9_alloccommon.h"
+#include "vp9/vp9_cx_iface.h"
 #include "vp9/encoder/vp9_firstpass.h"
+#include "vp9/encoder/vp9_lookahead.h"
 #include "vp9/vp9_cx_iface.h"
 #include "vp9/vp9_iface_common.h"
 
@@ -468,6 +471,15 @@
   }
 }
 
+static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) {
+  vpx_rational64_t g_timebase_in_ts;
+  g_timebase_in_ts.den = g_timebase.den;
+  g_timebase_in_ts.num = g_timebase.num;
+  g_timebase_in_ts.num *= TICKS_PER_SEC;
+  reduce_ratio(&g_timebase_in_ts);
+  return g_timebase_in_ts;
+}
+
 static vpx_codec_err_t set_encoder_config(
     VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg,
     const struct vp9_extracfg *extra_cfg) {
@@ -479,9 +491,13 @@
   oxcf->height = cfg->g_h;
   oxcf->bit_depth = cfg->g_bit_depth;
   oxcf->input_bit_depth = cfg->g_input_bit_depth;
+  // TODO(angiebird): Figure out if we can just use g_timebase to indicate the
+  // inverse of framerate
   // guess a frame rate if out of whack, use 30
   oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num;
   if (oxcf->init_framerate > 180) oxcf->init_framerate = 30;
+  oxcf->g_timebase = cfg->g_timebase;
+  oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase);
 
   oxcf->mode = GOOD;
 
@@ -550,7 +566,7 @@
   }
   oxcf->sharpness = extra_cfg->sharpness;
 
-  oxcf->two_pass_stats_in = cfg->rc_twopass_stats_in;
+  vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in);
 
 #if CONFIG_FP_MB_STATS
   oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
@@ -943,10 +959,9 @@
 
     if (res == VPX_CODEC_OK) {
       priv->pts_offset_initialized = 0;
-      priv->timestamp_ratio.den = priv->cfg.g_timebase.den;
-      priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num;
-      priv->timestamp_ratio.num *= TICKS_PER_SEC;
-      reduce_ratio(&priv->timestamp_ratio);
+      // TODO(angiebird): Replace priv->timestamp_ratio by
+      // oxcf->g_timebase_in_ts
+      priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase);
 
       set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg);
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -954,10 +969,7 @@
           (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
 #endif
       priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool);
-      if (priv->cpi == NULL)
-        res = VPX_CODEC_MEM_ERROR;
-      else
-        priv->cpi->output_pkt_list = &priv->pkt_list.head;
+      if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR;
     }
   }
 
@@ -1075,18 +1087,6 @@
   return index_sz;
 }
 
-static int64_t timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio,
-                                       int64_t n) {
-  return n * timestamp_ratio->num / timestamp_ratio->den;
-}
-
-static int64_t ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio,
-                                       int64_t n) {
-  int64_t round = timestamp_ratio->num / 2;
-  if (round > 0) --round;
-  return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
-}
-
 static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
                                                    unsigned int lib_flags) {
   vpx_codec_frame_flags_t flags = lib_flags << 16;
@@ -1104,6 +1104,27 @@
   return flags;
 }
 
+static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) {
+  vpx_codec_cx_pkt_t pkt;
+  pkt.kind = VPX_CODEC_PSNR_PKT;
+  pkt.data.psnr = *psnr;
+  return pkt;
+}
+
+#if !CONFIG_REALTIME_ONLY
+static INLINE vpx_codec_cx_pkt_t
+get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) {
+  // WARNNING: This function assumes that stats will
+  // exist and not be changed until the packet is processed
+  // TODO(angiebird): Refactor the code to avoid using the assumption.
+  vpx_codec_cx_pkt_t pkt;
+  pkt.kind = VPX_CODEC_STATS_PKT;
+  pkt.data.twopass_stats.buf = stats;
+  pkt.data.twopass_stats.sz = sizeof(*stats);
+  return pkt;
+}
+#endif
+
 const size_t kMinCompressedSize = 8192;
 static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
                                       const vpx_image_t *img,
@@ -1223,90 +1244,135 @@
       }
     }
 
-    while (cx_data_sz >= ctx->cx_data_sz / 2 &&
-           -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
-                                         &dst_time_stamp, &dst_end_time_stamp,
-                                         !img)) {
-      if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
-        // Pack invisible frames with the next visible frame
-        if (!cpi->common.show_frame ||
-            (cpi->use_svc &&
-             cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) {
-          if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
-          ctx->pending_cx_data_sz += size;
-          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-          ctx->pending_frame_magnitude |= size;
-          cx_data += size;
-          cx_data_sz -= size;
+    if (cpi->oxcf.pass == 1 && !cpi->use_svc) {
+#if !CONFIG_REALTIME_ONLY
+      // compute first pass stats
+      if (img) {
+        int ret;
+        ENCODE_FRAME_RESULT encode_frame_result;
+        vpx_codec_cx_pkt_t fps_pkt;
+        // TODO(angiebird): Call vp9_first_pass directly
+        ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+                                      &dst_time_stamp, &dst_end_time_stamp,
+                                      !img, &encode_frame_result);
+        assert(size == 0);  // There is no compressed data in the first pass
+        (void)ret;
+        assert(ret == 0);
+        fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats);
+        vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+      } else {
+        if (!cpi->twopass.first_pass_done) {
+          vpx_codec_cx_pkt_t fps_pkt;
+          vp9_end_first_pass(cpi);
+          fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats);
+          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt);
+        }
+      }
+#else   // !CONFIG_REALTIME_ONLY
+      assert(0);
+#endif  // !CONFIG_REALTIME_ONLY
+    } else {
+      ENCODE_FRAME_RESULT encode_frame_result;
+      while (cx_data_sz >= ctx->cx_data_sz / 2 &&
+             -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
+                                           &dst_time_stamp, &dst_end_time_stamp,
+                                           !img, &encode_frame_result)) {
+        // Pack psnr pkt
+        if (size > 0 && !cpi->use_svc) {
+          // TODO(angiebird): Figure out while we don't need psnr pkt when
+          // use_svc is on
+          PSNR_STATS psnr;
+          if (vp9_get_psnr(cpi, &psnr)) {
+            vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr);
+            vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt);
+          }
+        }
+
+        if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
+          // Pack invisible frames with the next visible frame
+          if (!cpi->common.show_frame ||
+              (cpi->use_svc && cpi->svc.spatial_layer_id <
+                                   cpi->svc.number_spatial_layers - 1)) {
+            if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
+            ctx->pending_cx_data_sz += size;
+            if (size)
+              ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+            ctx->pending_frame_magnitude |= size;
+            cx_data += size;
+            cx_data_sz -= size;
+            pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+            pkt.data.frame.height[cpi->svc.spatial_layer_id] =
+                cpi->common.height;
+            pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+                1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
+
+            if (ctx->output_cx_pkt_cb.output_cx_pkt) {
+              pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+              pkt.data.frame.pts =
+                  ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+                  ctx->pts_offset;
+              pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+                  timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+              pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+              pkt.data.frame.buf = ctx->pending_cx_data;
+              pkt.data.frame.sz = size;
+              ctx->pending_cx_data = NULL;
+              ctx->pending_cx_data_sz = 0;
+              ctx->pending_frame_count = 0;
+              ctx->pending_frame_magnitude = 0;
+              ctx->output_cx_pkt_cb.output_cx_pkt(
+                  &pkt, ctx->output_cx_pkt_cb.user_priv);
+            }
+            continue;
+          }
+
+          // Add the frame packet to the list of returned packets.
+          pkt.kind = VPX_CODEC_CX_FRAME_PKT;
+          pkt.data.frame.pts =
+              ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
+              ctx->pts_offset;
+          pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
+              timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
+          pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
           pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
           pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
           pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
               1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
 
-          if (ctx->output_cx_pkt_cb.output_cx_pkt) {
-            pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-            pkt.data.frame.pts =
-                ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
-                ctx->pts_offset;
-            pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
-                timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
-            pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+          if (ctx->pending_cx_data) {
+            if (size)
+              ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+            ctx->pending_frame_magnitude |= size;
+            ctx->pending_cx_data_sz += size;
+            // write the superframe only for the case when
+            if (!ctx->output_cx_pkt_cb.output_cx_pkt)
+              size += write_superframe_index(ctx);
             pkt.data.frame.buf = ctx->pending_cx_data;
-            pkt.data.frame.sz = size;
+            pkt.data.frame.sz = ctx->pending_cx_data_sz;
             ctx->pending_cx_data = NULL;
             ctx->pending_cx_data_sz = 0;
             ctx->pending_frame_count = 0;
             ctx->pending_frame_magnitude = 0;
+          } else {
+            pkt.data.frame.buf = cx_data;
+            pkt.data.frame.sz = size;
+          }
+          pkt.data.frame.partition_id = -1;
+
+          if (ctx->output_cx_pkt_cb.output_cx_pkt)
             ctx->output_cx_pkt_cb.output_cx_pkt(
                 &pkt, ctx->output_cx_pkt_cb.user_priv);
+          else
+            vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
+
+          cx_data += size;
+          cx_data_sz -= size;
+          if (is_one_pass_cbr_svc(cpi) &&
+              (cpi->svc.spatial_layer_id ==
+               cpi->svc.number_spatial_layers - 1)) {
+            // Encoded all spatial layers; exit loop.
+            break;
           }
-          continue;
-        }
-
-        // Add the frame packet to the list of returned packets.
-        pkt.kind = VPX_CODEC_CX_FRAME_PKT;
-        pkt.data.frame.pts =
-            ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
-            ctx->pts_offset;
-        pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
-            timestamp_ratio, dst_end_time_stamp - dst_time_stamp);
-        pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
-        pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
-        pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
-        pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
-            1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
-
-        if (ctx->pending_cx_data) {
-          if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
-          ctx->pending_frame_magnitude |= size;
-          ctx->pending_cx_data_sz += size;
-          // write the superframe only for the case when
-          if (!ctx->output_cx_pkt_cb.output_cx_pkt)
-            size += write_superframe_index(ctx);
-          pkt.data.frame.buf = ctx->pending_cx_data;
-          pkt.data.frame.sz = ctx->pending_cx_data_sz;
-          ctx->pending_cx_data = NULL;
-          ctx->pending_cx_data_sz = 0;
-          ctx->pending_frame_count = 0;
-          ctx->pending_frame_magnitude = 0;
-        } else {
-          pkt.data.frame.buf = cx_data;
-          pkt.data.frame.sz = size;
-        }
-        pkt.data.frame.partition_id = -1;
-
-        if (ctx->output_cx_pkt_cb.output_cx_pkt)
-          ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
-                                              ctx->output_cx_pkt_cb.user_priv);
-        else
-          vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
-
-        cx_data += size;
-        cx_data_sz -= size;
-        if (is_one_pass_cbr_svc(cpi) &&
-            (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
-          // Encoded all spatial layers; exit loop.
-          break;
         }
       }
     }
@@ -1831,6 +1897,7 @@
 };
 
 static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height,
+                                       vpx_rational_t frame_rate,
                                        int target_bitrate,
                                        vpx_enc_pass enc_pass) {
   vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg;
@@ -1838,10 +1905,9 @@
   enc_cfg.g_h = frame_height;
   enc_cfg.rc_target_bitrate = target_bitrate;
   enc_cfg.g_pass = enc_pass;
-  // Use the same default setting as the one used in vpxenc.c
-  // The default unit time for the encoder is 1/1000 s.
-  enc_cfg.g_timebase.num = 1;
-  enc_cfg.g_timebase.den = 1000;
+  // g_timebase is the inverse of frame_rate
+  enc_cfg.g_timebase.num = frame_rate.den;
+  enc_cfg.g_timebase.den = frame_rate.num;
   return enc_cfg;
 }
 
@@ -1855,12 +1921,13 @@
 }
 
 VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+                                        vpx_rational_t frame_rate,
                                         int target_bitrate,
                                         vpx_enc_pass enc_pass) {
   VP9EncoderConfig oxcf;
   vp9_extracfg extra_cfg = get_extra_cfg();
-  vpx_codec_enc_cfg_t enc_cfg =
-      get_enc_cfg(frame_width, frame_height, target_bitrate, enc_pass);
+  vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg(
+      frame_width, frame_height, frame_rate, target_bitrate, enc_pass);
   set_encoder_config(&oxcf, &enc_cfg, &extra_cfg);
   return oxcf;
 }
@@ -1880,3 +1947,8 @@
   // TODO(angiebird): Figure out how to get subsampling_x/y here
   return frame_info;
 }
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+                              const vpx_fixed_buf_t *stats) {
+  oxcf->two_pass_stats_in = *stats;
+}
diff --git a/vp9/vp9_cx_iface.h b/vp9/vp9_cx_iface.h
index 74c6ff3..59d8642 100644
--- a/vp9/vp9_cx_iface.h
+++ b/vp9/vp9_cx_iface.h
@@ -18,10 +18,26 @@
 #endif
 
 VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height,
+                                        vpx_rational_t frame_rate,
                                         int target_bitrate,
                                         vpx_enc_pass enc_pass);
 FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf);
 
+static INLINE int64_t
+timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+  return n * timestamp_ratio->num / timestamp_ratio->den;
+}
+
+static INLINE int64_t
+ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) {
+  int64_t round = timestamp_ratio->num / 2;
+  if (round > 0) --round;
+  return (n * timestamp_ratio->den + round) / timestamp_ratio->num;
+}
+
+void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf,
+                              const vpx_fixed_buf_t *stats);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/vp9_iface_common.c b/vp9/vp9_iface_common.c
new file mode 100644
index 0000000..74d08a5
--- /dev/null
+++ b/vp9/vp9_iface_common.c
@@ -0,0 +1,131 @@
+/*
+ *  Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed  by a BSD-style license that can be
+ *  found in the LICENSE file in the root of the source tree. An additional
+ *  intellectual property  rights grant can  be found in the  file PATENTS.
+ *  All contributing  project authors may be  found in the AUTHORS  file in
+ *  the root of the source tree.
+ */
+
+#include "vp9/vp9_iface_common.h"
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+                     void *user_priv) {
+  /** vpx_img_wrap() doesn't allow specifying independent strides for
+   * the Y, U, and V planes, nor other alignment adjustments that
+   * might be representable by a YV12_BUFFER_CONFIG, so we just
+   * initialize all the fields.*/
+  int bps;
+  if (!yv12->subsampling_y) {
+    if (!yv12->subsampling_x) {
+      img->fmt = VPX_IMG_FMT_I444;
+      bps = 24;
+    } else {
+      img->fmt = VPX_IMG_FMT_I422;
+      bps = 16;
+    }
+  } else {
+    if (!yv12->subsampling_x) {
+      img->fmt = VPX_IMG_FMT_I440;
+      bps = 16;
+    } else {
+      img->fmt = VPX_IMG_FMT_I420;
+      bps = 12;
+    }
+  }
+  img->cs = yv12->color_space;
+  img->range = yv12->color_range;
+  img->bit_depth = 8;
+  img->w = yv12->y_stride;
+  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
+  img->d_w = yv12->y_crop_width;
+  img->d_h = yv12->y_crop_height;
+  img->r_w = yv12->render_width;
+  img->r_h = yv12->render_height;
+  img->x_chroma_shift = yv12->subsampling_x;
+  img->y_chroma_shift = yv12->subsampling_y;
+  img->planes[VPX_PLANE_Y] = yv12->y_buffer;
+  img->planes[VPX_PLANE_U] = yv12->u_buffer;
+  img->planes[VPX_PLANE_V] = yv12->v_buffer;
+  img->planes[VPX_PLANE_ALPHA] = NULL;
+  img->stride[VPX_PLANE_Y] = yv12->y_stride;
+  img->stride[VPX_PLANE_U] = yv12->uv_stride;
+  img->stride[VPX_PLANE_V] = yv12->uv_stride;
+  img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
+    // vpx_image_t uses byte strides and a pointer to the first byte
+    // of the image.
+    img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
+    img->bit_depth = yv12->bit_depth;
+    img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
+    img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
+    img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
+    img->planes[VPX_PLANE_ALPHA] = NULL;
+    img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
+    img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
+    img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
+    img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
+  }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  img->bps = bps;
+  img->user_priv = user_priv;
+  img->img_data = yv12->buffer_alloc;
+  img->img_data_owner = 0;
+  img->self_allocd = 0;
+}
+
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+                                YV12_BUFFER_CONFIG *yv12) {
+  yv12->y_buffer = img->planes[VPX_PLANE_Y];
+  yv12->u_buffer = img->planes[VPX_PLANE_U];
+  yv12->v_buffer = img->planes[VPX_PLANE_V];
+
+  yv12->y_crop_width = img->d_w;
+  yv12->y_crop_height = img->d_h;
+  yv12->render_width = img->r_w;
+  yv12->render_height = img->r_h;
+  yv12->y_width = img->d_w;
+  yv12->y_height = img->d_h;
+
+  yv12->uv_width =
+      img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
+  yv12->uv_height =
+      img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
+  yv12->uv_crop_width = yv12->uv_width;
+  yv12->uv_crop_height = yv12->uv_height;
+
+  yv12->y_stride = img->stride[VPX_PLANE_Y];
+  yv12->uv_stride = img->stride[VPX_PLANE_U];
+  yv12->color_space = img->cs;
+  yv12->color_range = img->range;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
+    // In vpx_image_t
+    //     planes point to uint8 address of start of data
+    //     stride counts uint8s to reach next row
+    // In YV12_BUFFER_CONFIG
+    //     y_buffer, u_buffer, v_buffer point to uint16 address of data
+    //     stride and border counts in uint16s
+    // This means that all the address calculations in the main body of code
+    // should work correctly.
+    // However, before we do any pixel operations we need to cast the address
+    // to a uint16 ponter and double its value.
+    yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
+    yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
+    yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
+    yv12->y_stride >>= 1;
+    yv12->uv_stride >>= 1;
+    yv12->flags = YV12_FLAG_HIGHBITDEPTH;
+  } else {
+    yv12->flags = 0;
+  }
+  yv12->border = (yv12->y_stride - img->w) / 2;
+#else
+  yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  yv12->subsampling_x = img->x_chroma_shift;
+  yv12->subsampling_y = img->y_chroma_shift;
+  return VPX_CODEC_OK;
+}
diff --git a/vp9/vp9_iface_common.h b/vp9/vp9_iface_common.h
index a1921db..e646917 100644
--- a/vp9/vp9_iface_common.h
+++ b/vp9/vp9_iface_common.h
@@ -10,130 +10,24 @@
 #ifndef VPX_VP9_VP9_IFACE_COMMON_H_
 #define VPX_VP9_VP9_IFACE_COMMON_H_
 
+#include <assert.h>
 #include "vpx_ports/mem.h"
+#include "vpx/vp8.h"
+#include "vpx_scale/yv12config.h"
+#include "common/vp9_enums.h"
 
-static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
-                            void *user_priv) {
-  /** vpx_img_wrap() doesn't allow specifying independent strides for
-   * the Y, U, and V planes, nor other alignment adjustments that
-   * might be representable by a YV12_BUFFER_CONFIG, so we just
-   * initialize all the fields.*/
-  int bps;
-  if (!yv12->subsampling_y) {
-    if (!yv12->subsampling_x) {
-      img->fmt = VPX_IMG_FMT_I444;
-      bps = 24;
-    } else {
-      img->fmt = VPX_IMG_FMT_I422;
-      bps = 16;
-    }
-  } else {
-    if (!yv12->subsampling_x) {
-      img->fmt = VPX_IMG_FMT_I440;
-      bps = 16;
-    } else {
-      img->fmt = VPX_IMG_FMT_I420;
-      bps = 12;
-    }
-  }
-  img->cs = yv12->color_space;
-  img->range = yv12->color_range;
-  img->bit_depth = 8;
-  img->w = yv12->y_stride;
-  img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
-  img->d_w = yv12->y_crop_width;
-  img->d_h = yv12->y_crop_height;
-  img->r_w = yv12->render_width;
-  img->r_h = yv12->render_height;
-  img->x_chroma_shift = yv12->subsampling_x;
-  img->y_chroma_shift = yv12->subsampling_y;
-  img->planes[VPX_PLANE_Y] = yv12->y_buffer;
-  img->planes[VPX_PLANE_U] = yv12->u_buffer;
-  img->planes[VPX_PLANE_V] = yv12->v_buffer;
-  img->planes[VPX_PLANE_ALPHA] = NULL;
-  img->stride[VPX_PLANE_Y] = yv12->y_stride;
-  img->stride[VPX_PLANE_U] = yv12->uv_stride;
-  img->stride[VPX_PLANE_V] = yv12->uv_stride;
-  img->stride[VPX_PLANE_ALPHA] = yv12->y_stride;
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) {
-    // vpx_image_t uses byte strides and a pointer to the first byte
-    // of the image.
-    img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH);
-    img->bit_depth = yv12->bit_depth;
-    img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer);
-    img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer);
-    img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer);
-    img->planes[VPX_PLANE_ALPHA] = NULL;
-    img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride;
-    img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride;
-    img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride;
-    img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride;
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  img->bps = bps;
-  img->user_priv = user_priv;
-  img->img_data = yv12->buffer_alloc;
-  img->img_data_owner = 0;
-  img->self_allocd = 0;
-}
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
-                                       YV12_BUFFER_CONFIG *yv12) {
-  yv12->y_buffer = img->planes[VPX_PLANE_Y];
-  yv12->u_buffer = img->planes[VPX_PLANE_U];
-  yv12->v_buffer = img->planes[VPX_PLANE_V];
+void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
+                     void *user_priv);
 
-  yv12->y_crop_width = img->d_w;
-  yv12->y_crop_height = img->d_h;
-  yv12->render_width = img->r_w;
-  yv12->render_height = img->r_h;
-  yv12->y_width = img->d_w;
-  yv12->y_height = img->d_h;
+vpx_codec_err_t image2yuvconfig(const vpx_image_t *img,
+                                YV12_BUFFER_CONFIG *yv12);
 
-  yv12->uv_width =
-      img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width;
-  yv12->uv_height =
-      img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height;
-  yv12->uv_crop_width = yv12->uv_width;
-  yv12->uv_crop_height = yv12->uv_height;
-
-  yv12->y_stride = img->stride[VPX_PLANE_Y];
-  yv12->uv_stride = img->stride[VPX_PLANE_U];
-  yv12->color_space = img->cs;
-  yv12->color_range = img->range;
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
-    // In vpx_image_t
-    //     planes point to uint8 address of start of data
-    //     stride counts uint8s to reach next row
-    // In YV12_BUFFER_CONFIG
-    //     y_buffer, u_buffer, v_buffer point to uint16 address of data
-    //     stride and border counts in uint16s
-    // This means that all the address calculations in the main body of code
-    // should work correctly.
-    // However, before we do any pixel operations we need to cast the address
-    // to a uint16 ponter and double its value.
-    yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer);
-    yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer);
-    yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer);
-    yv12->y_stride >>= 1;
-    yv12->uv_stride >>= 1;
-    yv12->flags = YV12_FLAG_HIGHBITDEPTH;
-  } else {
-    yv12->flags = 0;
-  }
-  yv12->border = (yv12->y_stride - img->w) / 2;
-#else
-  yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  yv12->subsampling_x = img->x_chroma_shift;
-  yv12->subsampling_y = img->y_chroma_shift;
-  return VPX_CODEC_OK;
-}
-
-static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
+static INLINE VP9_REFFRAME
+ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
   switch (frame) {
     case VP8_LAST_FRAME: return VP9_LAST_FLAG;
     case VP8_GOLD_FRAME: return VP9_GOLD_FLAG;
@@ -142,4 +36,9 @@
   assert(0 && "Invalid Reference Frame");
   return VP9_LAST_FLAG;
 }
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
 #endif  // VPX_VP9_VP9_IFACE_COMMON_H_
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 666f228..ad77450 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -18,6 +18,9 @@
 VP9_CX_SRCS-yes += vp9_cx_iface.c
 VP9_CX_SRCS-yes += vp9_cx_iface.h
 
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc
+VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h
+
 VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
 VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
diff --git a/vpx_dsp/psnr.h b/vpx_dsp/psnr.h
index a556355..9ebb64d 100644
--- a/vpx_dsp/psnr.h
+++ b/vpx_dsp/psnr.h
@@ -12,6 +12,7 @@
 #define VPX_VPX_DSP_PSNR_H_
 
 #include "vpx_scale/yv12config.h"
+#include "vpx/vpx_encoder.h"
 
 #define MAX_PSNR 100.0
 
@@ -19,11 +20,7 @@
 extern "C" {
 #endif
 
-typedef struct {
-  double psnr[4];       // total/y/u/v
-  uint64_t sse[4];      // total/y/u/v
-  uint32_t samples[4];  // total/y/u/v
-} PSNR_STATS;
+typedef struct vpx_psnr_pkt PSNR_STATS;
 
 // TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t