Merge "PATENTS: fix a typo: constitutes -> constitute"
diff --git a/configure b/configure
index e05dd69..43969c2 100755
--- a/configure
+++ b/configure
@@ -40,7 +40,6 @@
   ${toggle_vp8}                   VP8 codec support
   ${toggle_vp9}                   VP9 codec support
   ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
-  ${toggle_mem_tracker}           track memory usage
   ${toggle_postproc}              postprocessing
   ${toggle_vp9_postproc}          vp9 specific postprocessing
   ${toggle_multithread}           multithreaded encoding and decoding
@@ -296,9 +295,6 @@
     codec_srcs
     debug_libs
     fast_unaligned
-    mem_manager
-    mem_tracker
-    mem_checks
 
     dequant_tokens
     dc_recon
@@ -373,7 +369,6 @@
     ${CODECS}
     ${CODEC_FAMILIES}
     static_msvcrt
-    mem_tracker
     spatial_resampling
     realtime_only
     onthefly_bitpacking
diff --git a/test/consistency_test.cc b/test/consistency_test.cc
new file mode 100644
index 0000000..66f694c
--- /dev/null
+++ b/test/consistency_test.cc
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_mem/vpx_mem.h"
+
+extern "C"
+double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+                            uint8_t *img2, int img2_pitch,
+                            int width, int height,
+                            Ssimv *sv2, Metrics *m,
+                            int do_inconsistency);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class ConsistencyTestBase : public ::testing::Test {
+ public:
+  ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    source_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    ssim_array_ = new Ssimv[kDataBufferSize / 16];
+  }
+
+  static void ClearSsim() {
+    memset(ssim_array_, 0, kDataBufferSize / 16);
+  }
+  static void TearDownTestCase() {
+    vpx_free(source_data_[0]);
+    source_data_[0] = NULL;
+    vpx_free(reference_data_[0]);
+    reference_data_[0] = NULL;
+    vpx_free(source_data_[1]);
+    source_data_[1] = NULL;
+    vpx_free(reference_data_[1]);
+    reference_data_[1] = NULL;
+
+    delete ssim_array_;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle frames up to 640x480
+  static const int kDataAlignment = 16;
+  static const int kDataBufferSize = 640*480;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  void FillRandom(uint8_t *data, int stride, int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = rnd_.Rand8();
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    FillRandom(data, stride, width_, height_);
+  }
+
+  void Copy(uint8_t *reference, uint8_t *source) {
+    memcpy(reference, source, kDataBufferSize);
+  }
+
+  void Blur(uint8_t *data, int stride, int taps) {
+    int sum = 0;
+    int half_taps = taps / 2;
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < taps; ++w) {
+        sum += data[w + h * stride];
+      }
+      for (int w = taps; w < width_; ++w) {
+        sum += data[w + h * stride] - data[w - taps + h * stride];
+        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
+      }
+    }
+    for (int w = 0; w < width_; ++w) {
+      for (int h = 0; h < taps; ++h) {
+        sum += data[h + w * stride];
+      }
+      for (int h = taps; h < height_; ++h) {
+        sum += data[w + h * stride] - data[(h - taps) * stride + w];
+        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
+      }
+    }
+  }
+  int width_, height_;
+  static uint8_t* source_data_[2];
+  int source_stride_;
+  static uint8_t* reference_data_[2];
+  int reference_stride_;
+  static Ssimv *ssim_array_;
+  Metrics metrics_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_VP9_ENCODER
+typedef std::tr1::tuple<int, int> ConsistencyParam;
+class ConsistencyVP9Test
+    : public ConsistencyTestBase,
+      public ::testing::WithParamInterface<ConsistencyParam> {
+ public:
+  ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  double CheckConsistency(int frame) {
+    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
+    return
+        vp9_get_ssim_metrics(source_data_[frame], source_stride_,
+                             reference_data_[frame], reference_stride_,
+                             width_, height_, ssim_array_, &metrics_, 1);
+  }
+};
+#endif  // CONFIG_VP9_ENCODER
+
+uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
+uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
+Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
+
+#if CONFIG_VP9_ENCODER
+TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Blur(reference_data_[0], reference_stride_, 3);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 3);
+
+  double inconsistency = CheckConsistency(1);
+  inconsistency = CheckConsistency(0);
+  EXPECT_EQ(inconsistency, 0.0)
+      << "Should have 0 inconsistency if they are exactly the same.";
+
+  // If sources are not consistent reference frames inconsistency should
+  // be less than if the source is consistent.
+  FillRandom(source_data_[0], source_stride_);
+  FillRandom(source_data_[1], source_stride_);
+  FillRandom(reference_data_[0], reference_stride_);
+  FillRandom(reference_data_[1], reference_stride_);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+
+  Copy(source_data_[1], source_data_[0]);
+  CheckConsistency(0);
+  double inconsistency2 = CheckConsistency(1);
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Should have less inconsistency if source itself is inconsistent.";
+
+  // Less of a blur should be less inconsistent than more blur coming off a
+  // a frame with no blur.
+  ClearSsim();
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 4);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+  ClearSsim();
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 8);
+  CheckConsistency(0);
+  inconsistency2 = CheckConsistency(1);
+
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Stronger Blur should produce more inconsistency.";
+}
+#endif  // CONFIG_VP9_ENCODER
+
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+
+#if CONFIG_VP9_ENCODER
+const ConsistencyParam c_vp9_tests[] = {
+  make_tuple(320, 240),
+  make_tuple(318, 242),
+  make_tuple(318, 238),
+};
+INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
+                        ::testing::ValuesIn(c_vp9_tests));
+#endif
+
+}  // namespace
diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 681887e..23f1553 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -1818,9 +1818,9 @@
 #if HAVE_MSA
 const ConvolveFunctions convolve8_msa(
     vp9_convolve_copy_c, vp9_convolve_avg_c,
-    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
+    vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_c,
     vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_c,
-    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
+    vp9_convolve8_msa, vp9_convolve8_avg_c, 0);
 
 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
     make_tuple(4, 4, &convolve8_msa),
diff --git a/test/test.mk b/test/test.mk
index 5baf234..91a93f1 100644
--- a/test/test.mk
+++ b/test/test.mk
@@ -151,6 +151,8 @@
 ifeq ($(CONFIG_VP9_ENCODER),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
+
 endif
 
 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
diff --git a/vp9/common/arm/neon/vp9_avg_neon.c b/vp9/common/arm/neon/vp9_convolve_avg_neon.c
similarity index 100%
rename from vp9/common/arm/neon/vp9_avg_neon.c
rename to vp9/common/arm/neon/vp9_convolve_avg_neon.c
diff --git a/vp9/common/arm/neon/vp9_avg_neon_asm.asm b/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm
similarity index 100%
rename from vp9/common/arm/neon/vp9_avg_neon_asm.asm
rename to vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm
diff --git a/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c b/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
new file mode 100644
index 0000000..e224743
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
@@ -0,0 +1,1045 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src0, src1, src2, src3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+
+  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, out0, out1);
+
+  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+
+  PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+}
+
+static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src0, src1, src2, src3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+
+  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, out0, out1);
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+
+  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+  HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, out2, out3);
+
+  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+  out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+  out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+  PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+  dst += (4 * dst_stride);
+  PCKEV_2B_XORI128_STORE_4_BYTES_4(out2, out3, dst, dst_stride);
+}
+
+static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int8_t *filter, int32_t height) {
+  if (4 == height) {
+    common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else if (8 == height) {
+    common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+  }
+}
+
+static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src0, src1, src2, src3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+
+  XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+  HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+                             filt0, filt1, filt2, filt3, out0, out1, out2,
+                             out3);
+
+  out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+  out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+  out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+  out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+  PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16i8 src0, src1, src2, src3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+
+    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+    PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void common_hz_8t_8w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int8_t *filter, int32_t height) {
+  if (4 == height) {
+    common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else {
+    common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
+  }
+}
+
+static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = (height >> 1); loop_cnt--;) {
+    src0 = LOAD_SB(src);
+    src1 = LOAD_SB(src + 8);
+    src += src_stride;
+    src2 = LOAD_SB(src);
+    src3 = LOAD_SB(src + 8);
+    src += src_stride;
+
+    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+
+    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
+    dst += dst_stride;
+    PCKEV_B_XORI128_STORE_VEC(out3, out2, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = (height >> 1); loop_cnt--;) {
+    src0 = LOAD_SB(src);
+    src2 = LOAD_SB(src + 16);
+    src3 = LOAD_SB(src + 24);
+    src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+    src += src_stride;
+
+    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+
+    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+    src0 = LOAD_SB(src);
+    src2 = LOAD_SB(src + 16);
+    src3 = LOAD_SB(src + 24);
+    src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+
+    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
+    PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
+    dst += dst_stride;
+
+    XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+    HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                               mask3, filt0, filt1, filt2, filt3, out0, out1,
+                               out2, out3);
+
+    out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+    out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+    out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+    out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+    PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
+    PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
+
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt, cnt;
+  v16i8 src0, src1, src2, src3;
+  v16i8 filt0, filt1, filt2, filt3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt, out0, out1, out2, out3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= 3;
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter);
+  filt0 = (v16i8)__msa_splati_h(filt, 0);
+  filt1 = (v16i8)__msa_splati_h(filt, 1);
+  filt2 = (v16i8)__msa_splati_h(filt, 2);
+  filt3 = (v16i8)__msa_splati_h(filt, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  for (loop_cnt = height; loop_cnt--;) {
+    for (cnt = 0; cnt < 2; ++cnt) {
+      src0 = LOAD_SB(&src[cnt << 5]);
+      src2 = LOAD_SB(&src[16 + (cnt << 5)]);
+      src3 = LOAD_SB(&src[24 + (cnt << 5)]);
+      src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+
+      XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
+
+      HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+                                 mask3, filt0, filt1, filt2, filt3, out0, out1,
+                                 out2, out3);
+
+      out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
+      out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
+      out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
+      out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
+
+      PCKEV_B_XORI128_STORE_VEC(out1, out0, &dst[cnt << 5]);
+      PCKEV_B_XORI128_STORE_VEC(out3, out2, &dst[16 + (cnt << 5)]);
+    }
+
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  uint32_t out0, out1, out2, out3;
+  v16i8 src0, src1, src2, src3, mask;
+  v16u8 vec0, vec1, filt0;
+  v16i8 res0, res1;
+  v8u16 vec2, vec3, filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+
+  vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
+  vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
+
+  vec2 = __msa_dotp_u_h(vec0, filt0);
+  vec3 = __msa_dotp_u_h(vec1, filt0);
+
+  vec2 = (v8u16)__msa_srari_h((v8i16)vec2, FILTER_BITS);
+  vec3 = (v8u16)__msa_srari_h((v8i16)vec3, FILTER_BITS);
+
+  vec2 = __msa_min_u_h(vec2, const255);
+  vec3 = __msa_min_u_h(vec3, const255);
+
+  res0 = __msa_pckev_b((v16i8)vec2, (v16i8)vec2);
+  res1 = __msa_pckev_b((v16i8)vec3, (v16i8)vec3);
+
+  out0 = __msa_copy_u_w((v4i32)res0, 0);
+  out1 = __msa_copy_u_w((v4i32)res0, 1);
+  out2 = __msa_copy_u_w((v4i32)res1, 0);
+  out3 = __msa_copy_u_w((v4i32)res1, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+}
+
+static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  uint32_t out0, out1, out2, out3;
+  v16u8 filt0;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 vec0, vec1, vec2, vec3;
+  v8u16 vec4, vec5, vec6, vec7;
+  v16i8 res0, res1, res2, res3;
+  v8u16 filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LOAD_8VECS_SB(src, src_stride,
+                src0, src1, src2, src3, src4, src5, src6, src7);
+
+  vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
+  vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
+  vec2 = (v16u8)__msa_vshf_b(mask, src5, src4);
+  vec3 = (v16u8)__msa_vshf_b(mask, src7, src6);
+
+  vec4 = __msa_dotp_u_h(vec0, filt0);
+  vec5 = __msa_dotp_u_h(vec1, filt0);
+  vec6 = __msa_dotp_u_h(vec2, filt0);
+  vec7 = __msa_dotp_u_h(vec3, filt0);
+
+  vec4 = (v8u16)__msa_srari_h((v8i16)vec4, FILTER_BITS);
+  vec5 = (v8u16)__msa_srari_h((v8i16)vec5, FILTER_BITS);
+  vec6 = (v8u16)__msa_srari_h((v8i16)vec6, FILTER_BITS);
+  vec7 = (v8u16)__msa_srari_h((v8i16)vec7, FILTER_BITS);
+
+  vec4 = __msa_min_u_h(vec4, const255);
+  vec5 = __msa_min_u_h(vec5, const255);
+  vec6 = __msa_min_u_h(vec6, const255);
+  vec7 = __msa_min_u_h(vec7, const255);
+
+  res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
+  res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
+  res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
+  res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
+
+  out0 = __msa_copy_u_w((v4i32)res0, 0);
+  out1 = __msa_copy_u_w((v4i32)res0, 1);
+  out2 = __msa_copy_u_w((v4i32)res1, 0);
+  out3 = __msa_copy_u_w((v4i32)res1, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+  dst += dst_stride;
+
+  out0 = __msa_copy_u_w((v4i32)res2, 0);
+  out1 = __msa_copy_u_w((v4i32)res2, 1);
+  out2 = __msa_copy_u_w((v4i32)res3, 0);
+  out3 = __msa_copy_u_w((v4i32)res3, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+}
+
+static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int8_t *filter, int32_t height) {
+  if (4 == height) {
+    common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else if (8 == height) {
+    common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter);
+  }
+}
+
+static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter) {
+  v16u8 filt0;
+  v16i8 src0, src1, src2, src3, mask;
+  v8u16 vec0, vec1, vec2, vec3;
+  v8u16 out0, out1, out2, out3;
+  v8u16 const255, filt;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+
+  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
+  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
+  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
+  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
+
+  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
+  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
+  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
+  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+
+  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
+
+  out0 = __msa_min_u_h(vec0, const255);
+  out1 = __msa_min_u_h(vec1, const255);
+  out2 = __msa_min_u_h(vec2, const255);
+  out3 = __msa_min_u_h(vec3, const255);
+
+  PCKEV_B_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter, int32_t height) {
+  v16u8 filt0;
+  v16i8 src0, src1, src2, src3, mask;
+  v8u16 vec0, vec1, vec2, vec3;
+  v8u16 filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+
+  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
+  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
+  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
+  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
+
+  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
+  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
+  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
+  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+
+  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
+
+  vec0 = __msa_min_u_h(vec0, const255);
+  vec1 = __msa_min_u_h(vec1, const255);
+  vec2 = __msa_min_u_h(vec2, const255);
+  vec3 = __msa_min_u_h(vec3, const255);
+
+  LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+  src += (4 * src_stride);
+
+  PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+  dst += (4 * dst_stride);
+
+  vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
+  vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
+  vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
+  vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
+
+  vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
+  vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
+  vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
+  vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+
+  SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
+
+  vec0 = __msa_min_u_h(vec0, const255);
+  vec1 = __msa_min_u_h(vec1, const255);
+  vec2 = __msa_min_u_h(vec2, const255);
+  vec3 = __msa_min_u_h(vec3, const255);
+
+  PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+  dst += (4 * dst_stride);
+
+  if (16 == height) {
+    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
+    vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
+    vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
+    vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
+
+    vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
+    vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
+    vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
+    vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+
+    SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
+                     vec0, vec1, vec2, vec3, FILTER_BITS);
+
+    vec0 = __msa_min_u_h(vec0, const255);
+    vec1 = __msa_min_u_h(vec1, const255);
+    vec2 = __msa_min_u_h(vec2, const255);
+    vec3 = __msa_min_u_h(vec3, const255);
+
+    LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+    src += (4 * src_stride);
+
+    PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
+    vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
+    vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
+    vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
+
+    vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
+    vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
+    vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
+    vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+
+    SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
+                     vec0, vec1, vec2, vec3, FILTER_BITS);
+
+    vec0 = __msa_min_u_h(vec0, const255);
+    vec1 = __msa_min_u_h(vec1, const255);
+    vec2 = __msa_min_u_h(vec2, const255);
+    vec3 = __msa_min_u_h(vec3, const255);
+
+    PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+  }
+}
+
+static void common_hz_2t_8w_msa(const uint8_t *src, int32_t src_stride,
+                                uint8_t *dst, int32_t dst_stride,
+                                int8_t *filter, int32_t height) {
+  if (4 == height) {
+    common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter);
+  } else {
+    common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height);
+  }
+}
+
+static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
+  v8u16 filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  loop_cnt = (height >> 2) - 1;
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  src0 = LOAD_SB(src);
+  src1 = LOAD_SB(src + 8);
+  src += src_stride;
+  src2 = LOAD_SB(src);
+  src3 = LOAD_SB(src + 8);
+  src += src_stride;
+  src4 = LOAD_SB(src);
+  src5 = LOAD_SB(src + 8);
+  src += src_stride;
+  src6 = LOAD_SB(src);
+  src7 = LOAD_SB(src + 8);
+  src += src_stride;
+
+  vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
+  vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
+  vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
+  vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
+  vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
+  vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
+  vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
+  vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
+
+  out0 = __msa_dotp_u_h(vec0, filt0);
+  out1 = __msa_dotp_u_h(vec1, filt0);
+  out2 = __msa_dotp_u_h(vec2, filt0);
+  out3 = __msa_dotp_u_h(vec3, filt0);
+  out4 = __msa_dotp_u_h(vec4, filt0);
+  out5 = __msa_dotp_u_h(vec5, filt0);
+  out6 = __msa_dotp_u_h(vec6, filt0);
+  out7 = __msa_dotp_u_h(vec7, filt0);
+
+  out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
+  out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
+  out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
+  out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
+  out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
+  out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
+  out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
+  out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
+
+  out0 = __msa_min_u_h(out0, const255);
+  out1 = __msa_min_u_h(out1, const255);
+  out2 = __msa_min_u_h(out2, const255);
+  out3 = __msa_min_u_h(out3, const255);
+  out4 = __msa_min_u_h(out4, const255);
+  out5 = __msa_min_u_h(out5, const255);
+  out6 = __msa_min_u_h(out6, const255);
+  out7 = __msa_min_u_h(out7, const255);
+
+  PCKEV_B_STORE_VEC(out1, out0, dst);
+  dst += dst_stride;
+  PCKEV_B_STORE_VEC(out3, out2, dst);
+  dst += dst_stride;
+  PCKEV_B_STORE_VEC(out5, out4, dst);
+  dst += dst_stride;
+  PCKEV_B_STORE_VEC(out7, out6, dst);
+  dst += dst_stride;
+
+  for (; loop_cnt--;) {
+    src0 = LOAD_SB(src);
+    src1 = LOAD_SB(src + 8);
+    src += src_stride;
+    src2 = LOAD_SB(src);
+    src3 = LOAD_SB(src + 8);
+    src += src_stride;
+    src4 = LOAD_SB(src);
+    src5 = LOAD_SB(src + 8);
+    src += src_stride;
+    src6 = LOAD_SB(src);
+    src7 = LOAD_SB(src + 8);
+    src += src_stride;
+
+    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
+    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
+    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
+    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
+    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
+    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
+    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
+    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
+
+    out0 = __msa_dotp_u_h(vec0, filt0);
+    out1 = __msa_dotp_u_h(vec1, filt0);
+    out2 = __msa_dotp_u_h(vec2, filt0);
+    out3 = __msa_dotp_u_h(vec3, filt0);
+    out4 = __msa_dotp_u_h(vec4, filt0);
+    out5 = __msa_dotp_u_h(vec5, filt0);
+    out6 = __msa_dotp_u_h(vec6, filt0);
+    out7 = __msa_dotp_u_h(vec7, filt0);
+
+    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
+    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
+    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
+    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
+    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
+    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
+    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
+    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
+
+    out0 = __msa_min_u_h(out0, const255);
+    out1 = __msa_min_u_h(out1, const255);
+    out2 = __msa_min_u_h(out2, const255);
+    out3 = __msa_min_u_h(out3, const255);
+    out4 = __msa_min_u_h(out4, const255);
+    out5 = __msa_min_u_h(out5, const255);
+    out6 = __msa_min_u_h(out6, const255);
+    out7 = __msa_min_u_h(out7, const255);
+
+    PCKEV_B_STORE_VEC(out1, out0, dst);
+    dst += dst_stride;
+    PCKEV_B_STORE_VEC(out3, out2, dst);
+    dst += dst_stride;
+    PCKEV_B_STORE_VEC(out5, out4, dst);
+    dst += dst_stride;
+    PCKEV_B_STORE_VEC(out7, out6, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
+  v8u16 filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  for (loop_cnt = height >> 1; loop_cnt--;) {
+    src0 = LOAD_SB(src);
+    src2 = LOAD_SB(src + 16);
+    src3 = LOAD_SB(src + 24);
+    src1 = __msa_sld_b(src2, src0, 8);
+    src += src_stride;
+    src4 = LOAD_SB(src);
+    src6 = LOAD_SB(src + 16);
+    src7 = LOAD_SB(src + 24);
+    src5 = __msa_sld_b(src6, src4, 8);
+    src += src_stride;
+
+    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
+    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
+    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
+    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
+    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
+    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
+    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
+    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
+
+    out0 = __msa_dotp_u_h(vec0, filt0);
+    out1 = __msa_dotp_u_h(vec1, filt0);
+    out2 = __msa_dotp_u_h(vec2, filt0);
+    out3 = __msa_dotp_u_h(vec3, filt0);
+    out4 = __msa_dotp_u_h(vec4, filt0);
+    out5 = __msa_dotp_u_h(vec5, filt0);
+    out6 = __msa_dotp_u_h(vec6, filt0);
+    out7 = __msa_dotp_u_h(vec7, filt0);
+
+    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
+    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
+    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
+    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
+    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
+    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
+    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
+    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
+
+    out0 = __msa_min_u_h(out0, const255);
+    out1 = __msa_min_u_h(out1, const255);
+    out2 = __msa_min_u_h(out2, const255);
+    out3 = __msa_min_u_h(out3, const255);
+    out4 = __msa_min_u_h(out4, const255);
+    out5 = __msa_min_u_h(out5, const255);
+    out6 = __msa_min_u_h(out6, const255);
+    out7 = __msa_min_u_h(out7, const255);
+
+    PCKEV_B_STORE_VEC(out1, out0, dst);
+    PCKEV_B_STORE_VEC(out3, out2, dst + 16);
+    dst += dst_stride;
+    PCKEV_B_STORE_VEC(out5, out4, dst);
+    PCKEV_B_STORE_VEC(out7, out6, dst + 16);
+    dst += dst_stride;
+  }
+}
+
+static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
+                                 uint8_t *dst, int32_t dst_stride,
+                                 int8_t *filter, int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt0;
+  v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+  v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
+  v8u16 filt, const255;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter);
+  filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  const255 = (v8u16)__msa_ldi_h(255);
+
+  for (loop_cnt = height; loop_cnt--;) {
+    src0 = LOAD_SB(src);
+    src2 = LOAD_SB(src + 16);
+    src4 = LOAD_SB(src + 32);
+    src6 = LOAD_SB(src + 48);
+    src7 = LOAD_SB(src + 56);
+    src1 = __msa_sld_b(src2, src0, 8);
+    src3 = __msa_sld_b(src4, src2, 8);
+    src5 = __msa_sld_b(src6, src4, 8);
+    src += src_stride;
+
+    vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
+    vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
+    vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
+    vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
+    vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
+    vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
+    vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
+    vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
+
+    out0 = __msa_dotp_u_h(vec0, filt0);
+    out1 = __msa_dotp_u_h(vec1, filt0);
+    out2 = __msa_dotp_u_h(vec2, filt0);
+    out3 = __msa_dotp_u_h(vec3, filt0);
+    out4 = __msa_dotp_u_h(vec4, filt0);
+    out5 = __msa_dotp_u_h(vec5, filt0);
+    out6 = __msa_dotp_u_h(vec6, filt0);
+    out7 = __msa_dotp_u_h(vec7, filt0);
+
+    out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
+    out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
+    out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
+    out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
+    out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
+    out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
+    out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
+    out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
+
+    out0 = __msa_min_u_h(out0, const255);
+    out1 = __msa_min_u_h(out1, const255);
+    out2 = __msa_min_u_h(out2, const255);
+    out3 = __msa_min_u_h(out3, const255);
+    out4 = __msa_min_u_h(out4, const255);
+    out5 = __msa_min_u_h(out5, const255);
+    out6 = __msa_min_u_h(out6, const255);
+    out7 = __msa_min_u_h(out7, const255);
+
+    PCKEV_B_STORE_VEC(out1, out0, dst);
+    PCKEV_B_STORE_VEC(out3, out2, dst + 16);
+    PCKEV_B_STORE_VEC(out5, out4, dst + 32);
+    PCKEV_B_STORE_VEC(out7, out6, dst + 48);
+    dst += dst_stride;
+  }
+}
+
+void vp9_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+                             uint8_t *dst, ptrdiff_t dst_stride,
+                             const int16_t *filter_x, int x_step_q4,
+                             const int16_t *filter_y, int y_step_q4,
+                             int w, int h) {
+  int8_t cnt, filt_hor[8];
+
+  if (16 != x_step_q4) {
+    vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+                          filter_x, x_step_q4, filter_y, y_step_q4,
+                          w, h);
+    return;
+  }
+
+  if (((const int32_t *)filter_x)[1] == 0x800000) {
+    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h);
+    return;
+  }
+
+  for (cnt = 0; cnt < 8; ++cnt) {
+    filt_hor[cnt] = filter_x[cnt];
+  }
+
+  if (((const int32_t *)filter_x)[0] == 0) {
+    switch (w) {
+      case 4:
+        common_hz_2t_4w_msa(src, (int32_t)src_stride,
+                            dst, (int32_t)dst_stride,
+                            &filt_hor[3], h);
+        break;
+      case 8:
+        common_hz_2t_8w_msa(src, (int32_t)src_stride,
+                            dst, (int32_t)dst_stride,
+                            &filt_hor[3], h);
+        break;
+      case 16:
+        common_hz_2t_16w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             &filt_hor[3], h);
+        break;
+      case 32:
+        common_hz_2t_32w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             &filt_hor[3], h);
+        break;
+      case 64:
+        common_hz_2t_64w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             &filt_hor[3], h);
+        break;
+      default:
+        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+                              filter_x, x_step_q4, filter_y, y_step_q4,
+                              w, h);
+        break;
+    }
+  } else {
+    switch (w) {
+      case 4:
+        common_hz_8t_4w_msa(src, (int32_t)src_stride,
+                            dst, (int32_t)dst_stride,
+                            filt_hor, h);
+        break;
+      case 8:
+        common_hz_8t_8w_msa(src, (int32_t)src_stride,
+                            dst, (int32_t)dst_stride,
+                            filt_hor, h);
+        break;
+      case 16:
+        common_hz_8t_16w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             filt_hor, h);
+        break;
+      case 32:
+        common_hz_8t_32w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             filt_hor, h);
+        break;
+      case 64:
+        common_hz_8t_64w_msa(src, (int32_t)src_stride,
+                             dst, (int32_t)dst_stride,
+                             filt_hor, h);
+        break;
+      default:
+        vp9_convolve8_horiz_c(src, src_stride, dst, dst_stride,
+                              filter_x, x_step_q4, filter_y, y_step_q4,
+                              w, h);
+        break;
+    }
+  }
+}
diff --git a/vp9/common/mips/msa/vp9_convolve8_msa.c b/vp9/common/mips/msa/vp9_convolve8_msa.c
new file mode 100644
index 0000000..d0c3746
--- /dev/null
+++ b/vp9/common/mips/msa/vp9_convolve8_msa.c
@@ -0,0 +1,880 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+const uint8_t mc_filt_mask_arr[16 * 3] = {
+  /* 8 width cases */
+  0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+  /* 4 width cases */
+  0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20,
+  /* 4 width cases */
+  8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28
+};
+
+static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter_horiz, int8_t *filter_vert,
+                                     int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 filt_horiz;
+  v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
+  v8i16 horiz_out5, horiz_out6, horiz_out7, horiz_out8, horiz_out9;
+  v8i16 tmp0, tmp1, out0, out1, out2, out3, out4;
+  v8i16 filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
+
+  src -= (3 + 3 * src_stride);
+
+  /* rearranging filter */
+  filt_horiz = LOAD_SH(filter_horiz);
+  filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
+  filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
+  filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
+  filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
+                  src0, src1, src2, src3, src4, src5, src6, 128);
+
+  horiz_out0 = HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3,
+                                     filt_horiz0, filt_horiz1, filt_horiz2,
+                                     filt_horiz3);
+  horiz_out2 = HORIZ_8TAP_FILT_2VECS(src2, src3, mask0, mask1, mask2, mask3,
+                                     filt_horiz0, filt_horiz1, filt_horiz2,
+                                     filt_horiz3);
+  horiz_out4 = HORIZ_8TAP_FILT_2VECS(src4, src5, mask0, mask1, mask2, mask3,
+                                     filt_horiz0, filt_horiz1, filt_horiz2,
+                                     filt_horiz3);
+  horiz_out5 = HORIZ_8TAP_FILT_2VECS(src5, src6, mask0, mask1, mask2, mask3,
+                                     filt_horiz0, filt_horiz1, filt_horiz2,
+                                     filt_horiz3);
+  horiz_out1 = (v8i16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
+  horiz_out3 = (v8i16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
+
+  filt = LOAD_SH(filter_vert);
+  filt_vert0 = __msa_splati_h(filt, 0);
+  filt_vert1 = __msa_splati_h(filt, 1);
+  filt_vert2 = __msa_splati_h(filt, 2);
+  filt_vert3 = __msa_splati_h(filt, 3);
+
+  out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+  out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+
+    horiz_out7 = HORIZ_8TAP_FILT_2VECS(src7, src8, mask0, mask1, mask2, mask3,
+                                       filt_horiz0, filt_horiz1, filt_horiz2,
+                                       filt_horiz3);
+    horiz_out6 = (v8i16)__msa_sldi_b((v16i8)horiz_out7, (v16i8)horiz_out5, 8);
+
+    out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
+
+    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+
+    horiz_out9 = HORIZ_8TAP_FILT_2VECS(src9, src10, mask0, mask1, mask2, mask3,
+                                       filt_horiz0, filt_horiz1, filt_horiz2,
+                                       filt_horiz3);
+    horiz_out8 = (v8i16)__msa_sldi_b((v16i8)horiz_out9, (v16i8)horiz_out7, 8);
+
+    out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
+
+    tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+    tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
+    tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
+
+    PCKEV_2B_XORI128_STORE_4_BYTES_4(tmp0, tmp1, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    horiz_out5 = horiz_out9;
+
+    out0 = out2;
+    out1 = out3;
+    out2 = out4;
+  }
+}
+
+static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter_horiz, int8_t *filter_vert,
+                                     int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+  v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
+  v8i16 filt_horiz, filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
+  v16u8 mask0, mask1, mask2, mask3;
+  v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
+  v8i16 horiz_out4, horiz_out5, horiz_out6, horiz_out7;
+  v8i16 horiz_out8, horiz_out9, horiz_out10;
+  v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
+  v8i16 tmp0, tmp1, tmp2, tmp3;
+
+  mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+
+  src -= (3 + 3 * src_stride);
+
+  /* rearranging filter */
+  filt_horiz = LOAD_SH(filter_horiz);
+  filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
+  filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
+  filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
+  filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+
+  mask1 = mask0 + 2;
+  mask2 = mask0 + 4;
+  mask3 = mask0 + 6;
+
+  LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+  src += (7 * src_stride);
+
+  XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
+                  src0, src1, src2, src3, src4, src5, src6, 128);
+
+  horiz_out0 = HORIZ_8TAP_FILT(src0, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out1 = HORIZ_8TAP_FILT(src1, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out2 = HORIZ_8TAP_FILT(src2, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out3 = HORIZ_8TAP_FILT(src3, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out4 = HORIZ_8TAP_FILT(src4, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out5 = HORIZ_8TAP_FILT(src5, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+  horiz_out6 = HORIZ_8TAP_FILT(src6, mask0, mask1, mask2, mask3, filt_horiz0,
+                               filt_horiz1, filt_horiz2, filt_horiz3);
+
+  filt = LOAD_SH(filter_vert);
+  filt_vert0 = __msa_splati_h(filt, 0);
+  filt_vert1 = __msa_splati_h(filt, 1);
+  filt_vert2 = __msa_splati_h(filt, 2);
+  filt_vert3 = __msa_splati_h(filt, 3);
+
+  out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+  out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
+  out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out1);
+  out5 = (v8i16)__msa_ilvev_b((v16i8)horiz_out4, (v16i8)horiz_out3);
+  out6 = (v8i16)__msa_ilvev_b((v16i8)horiz_out6, (v16i8)horiz_out5);
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+    src += (4 * src_stride);
+
+    XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+
+    horiz_out7 = HORIZ_8TAP_FILT(src7, mask0, mask1, mask2, mask3, filt_horiz0,
+                                 filt_horiz1, filt_horiz2, filt_horiz3);
+
+    out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
+    tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+    tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
+
+    horiz_out8 = HORIZ_8TAP_FILT(src8, mask0, mask1, mask2, mask3, filt_horiz0,
+                                 filt_horiz1, filt_horiz2, filt_horiz3);
+
+    out7 = (v8i16)__msa_ilvev_b((v16i8)horiz_out8, (v16i8)horiz_out7);
+    tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+    tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
+
+    horiz_out9 = HORIZ_8TAP_FILT(src9, mask0, mask1, mask2, mask3, filt_horiz0,
+                                 filt_horiz1, filt_horiz2, filt_horiz3);
+
+    out8 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
+    tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+    tmp2 = SRARI_SATURATE_SIGNED_H(tmp2, FILTER_BITS, 7);
+
+    horiz_out10 = HORIZ_8TAP_FILT(src10, mask0, mask1, mask2, mask3,
+                                  filt_horiz0, filt_horiz1, filt_horiz2,
+                                  filt_horiz3);
+
+    out9 = (v8i16)__msa_ilvev_b((v16i8)horiz_out10, (v16i8)horiz_out9);
+    tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vert0, filt_vert1,
+                               filt_vert2, filt_vert3);
+    tmp3 = SRARI_SATURATE_SIGNED_H(tmp3, FILTER_BITS, 7);
+
+    PCKEV_B_4_XORI128_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    horiz_out6 = horiz_out10;
+
+    out0 = out2;
+    out1 = out3;
+    out2 = out8;
+    out4 = out6;
+    out5 = out7;
+    out6 = out9;
+  }
+}
+
+static void common_hv_8ht_8vt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 2; multiple8_cnt--;) {
+    common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                             filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_8ht_8vt_32w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 4; multiple8_cnt--;) {
+    common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                             filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_8ht_8vt_64w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 8; multiple8_cnt--;) {
+    common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                             filter_vert, height);
+    src += 8;
+    dst += 8;
+  }
+}
+
+static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz,
+                                      int8_t *filter_vert) {
+  uint32_t out0, out1, out2, out3;
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 res0, res1, horiz_vec;
+  v16u8 filt_vert, filt_horiz, vec0, vec1;
+  v8u16 filt, tmp0, tmp1;
+  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter_horiz);
+  filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  filt = LOAD_UH(filter_vert);
+  filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
+  horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
+  horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
+
+  horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
+  horiz_out3 = (v8u16)__msa_pckod_d((v2i64)horiz_out4, (v2i64)horiz_out2);
+
+  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+
+  tmp0 = __msa_dotp_u_h(vec0, filt_vert);
+  tmp1 = __msa_dotp_u_h(vec1, filt_vert);
+  tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
+  tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+
+  res0 = (v16u8)__msa_pckev_b((v16i8)tmp0, (v16i8)tmp0);
+  res1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp1);
+
+  out0 = __msa_copy_u_w((v4i32)res0, 0);
+  out1 = __msa_copy_u_w((v4i32)res0, 1);
+  out2 = __msa_copy_u_w((v4i32)res1, 0);
+  out3 = __msa_copy_u_w((v4i32)res1, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+}
+
+static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz,
+                                      int8_t *filter_vert) {
+  uint32_t out0, out1, out2, out3;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+  v16u8 filt_horiz, filt_vert, horiz_vec;
+  v16u8 vec0, vec1, vec2, vec3;
+  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
+  v8u16 vec4, vec5, vec6, vec7, filt;
+  v8u16 horiz_out4, horiz_out5, horiz_out6, horiz_out7, horiz_out8;
+  v16i8 res0, res1, res2, res3;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[16]);
+
+  /* rearranging filter */
+  filt = LOAD_UH(filter_horiz);
+  filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  filt = LOAD_UH(filter_vert);
+  filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+  LOAD_8VECS_SB(src, src_stride,
+                src0, src1, src2, src3, src4, src5, src6, src7);
+  src += (8 * src_stride);
+  src8 = LOAD_SB(src);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
+  horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src4);
+  horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src6);
+  horiz_out6 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out6 = SRARI_SATURATE_UNSIGNED_H(horiz_out6, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src8, src8);
+  horiz_out8 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out8 = SRARI_SATURATE_UNSIGNED_H(horiz_out8, FILTER_BITS, 7);
+
+  horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
+  horiz_out3 = (v8u16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
+  horiz_out5 = (v8u16)__msa_sldi_b((v16i8)horiz_out6, (v16i8)horiz_out4, 8);
+  horiz_out7 = (v8u16)__msa_pckod_d((v2i64)horiz_out8, (v2i64)horiz_out6);
+
+  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+  vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
+  vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
+
+  vec4 = __msa_dotp_u_h(vec0, filt_vert);
+  vec5 = __msa_dotp_u_h(vec1, filt_vert);
+  vec6 = __msa_dotp_u_h(vec2, filt_vert);
+  vec7 = __msa_dotp_u_h(vec3, filt_vert);
+
+  vec4 = SRARI_SATURATE_UNSIGNED_H(vec4, FILTER_BITS, 7);
+  vec5 = SRARI_SATURATE_UNSIGNED_H(vec5, FILTER_BITS, 7);
+  vec6 = SRARI_SATURATE_UNSIGNED_H(vec6, FILTER_BITS, 7);
+  vec7 = SRARI_SATURATE_UNSIGNED_H(vec7, FILTER_BITS, 7);
+
+  res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
+  res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
+  res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
+  res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
+
+  out0 = __msa_copy_u_w((v4i32)res0, 0);
+  out1 = __msa_copy_u_w((v4i32)res0, 1);
+  out2 = __msa_copy_u_w((v4i32)res1, 0);
+  out3 = __msa_copy_u_w((v4i32)res1, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+  dst += dst_stride;
+
+  out0 = __msa_copy_u_w((v4i32)res2, 0);
+  out1 = __msa_copy_u_w((v4i32)res2, 1);
+  out2 = __msa_copy_u_w((v4i32)res3, 0);
+  out3 = __msa_copy_u_w((v4i32)res3, 1);
+
+  STORE_WORD(dst, out0);
+  dst += dst_stride;
+  STORE_WORD(dst, out1);
+  dst += dst_stride;
+  STORE_WORD(dst, out2);
+  dst += dst_stride;
+  STORE_WORD(dst, out3);
+}
+
+static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter_horiz,
+                                     int8_t *filter_vert,
+                                     int32_t height) {
+  if (4 == height) {
+    common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
+                              filter_horiz, filter_vert);
+  } else if (8 == height) {
+    common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
+                              filter_horiz, filter_vert);
+  }
+}
+
+static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz,
+                                      int8_t *filter_vert) {
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 filt_horiz, filt_vert, horiz_vec;
+  v16u8 vec0, vec1, vec2, vec3;
+  v8u16 horiz_out0, horiz_out1;
+  v8u16 tmp0, tmp1, tmp2, tmp3;
+  v8i16 filt;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter_horiz);
+  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LOAD_SH(filter_vert);
+  filt_vert = (v16u8)__msa_splati_h(filt, 0);
+
+  LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
+  src += (5 * src_stride);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
+  horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+  vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  tmp0 = __msa_dotp_u_h(vec0, filt_vert);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+  tmp1 = __msa_dotp_u_h(vec1, filt_vert);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
+  horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+  vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+  tmp2 = __msa_dotp_u_h(vec2, filt_vert);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+  tmp3 = __msa_dotp_u_h(vec3, filt_vert);
+
+  tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
+  tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+  tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+  tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
+
+  PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
+                                          int32_t src_stride,
+                                          uint8_t *dst,
+                                          int32_t dst_stride,
+                                          int8_t *filter_horiz,
+                                          int8_t *filter_vert,
+                                          int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, mask;
+  v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
+  v8u16 horiz_out0, horiz_out1;
+  v8u16 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+  v8i16 filt;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter_horiz);
+  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LOAD_SH(filter_vert);
+  filt_vert = (v16u8)__msa_splati_h(filt, 0);
+
+  src0 = LOAD_SB(src);
+  src += src_stride;
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
+  horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+  for (loop_cnt = (height >> 3); loop_cnt--;) {
+    LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+    src += (4 * src_stride);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
+    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
+    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp2 = (v8u16)__msa_dotp_u_h(vec0, filt_vert);
+
+    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
+    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp3 = __msa_dotp_u_h(vec0, filt_vert);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
+    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+    LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+    src += (4 * src_stride);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp4 = __msa_dotp_u_h(vec0, filt_vert);
+
+    tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
+    tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_8_BYTES_4(tmp1, tmp2, tmp3, tmp4, dst, dst_stride);
+    dst += (4 * dst_stride);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
+    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp5 = __msa_dotp_u_h(vec0, filt_vert);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
+    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp6 = __msa_dotp_u_h(vec0, filt_vert);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
+    horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp7 = __msa_dotp_u_h(vec0, filt_vert);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
+    horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp8 = __msa_dotp_u_h(vec0, filt_vert);
+
+    tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
+    tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
+    tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
+    tmp8 = SRARI_SATURATE_UNSIGNED_H(tmp8, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_8_BYTES_4(tmp5, tmp6, tmp7, tmp8, dst, dst_stride);
+    dst += (4 * dst_stride);
+  }
+}
+
+static void common_hv_2ht_2vt_8w_msa(const uint8_t *src, int32_t src_stride,
+                                     uint8_t *dst, int32_t dst_stride,
+                                     int8_t *filter_horiz, int8_t *filter_vert,
+                                     int32_t height) {
+  if (4 == height) {
+    common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                              filter_vert);
+  } else {
+    common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride,
+                                  filter_horiz, filter_vert, height);
+  }
+}
+
+static void common_hv_2ht_2vt_16w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  uint32_t loop_cnt;
+  v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+  v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
+  v8u16 horiz_vec0, horiz_vec1, tmp1, tmp2;
+  v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
+  v8i16 filt;
+
+  mask = LOAD_SB(&mc_filt_mask_arr[0]);
+
+  /* rearranging filter */
+  filt = LOAD_SH(filter_horiz);
+  filt_horiz = (v16u8)__msa_splati_h(filt, 0);
+
+  filt = LOAD_SH(filter_vert);
+  filt_vert = (v16u8)__msa_splati_h(filt, 0);
+
+  src0 = LOAD_SB(src);
+  src1 = LOAD_SB(src + 8);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
+  horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
+
+  horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
+  horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+  horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+
+  src += src_stride;
+
+  for (loop_cnt = (height >> 2); loop_cnt--;) {
+    LOAD_4VECS_SB(src, src_stride, src0, src2, src4, src6);
+    LOAD_4VECS_SB(src + 8, src_stride, src1, src3, src5, src7);
+    src += (4 * src_stride);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
+    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
+    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
+    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    dst += dst_stride;
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
+    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
+    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
+    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    dst += dst_stride;
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
+    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src5);
+    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
+    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    dst += dst_stride;
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src6, src6);
+    horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
+
+    horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src7);
+    horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
+    horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
+    tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+    vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
+    tmp2 = __msa_dotp_u_h(vec0, filt_vert);
+    tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
+    tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+
+    PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+    dst += dst_stride;
+  }
+}
+
+static void common_hv_2ht_2vt_32w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 2; multiple8_cnt--;) {
+    common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                              filter_vert, height);
+    src += 16;
+    dst += 16;
+  }
+}
+
+static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride,
+                                      uint8_t *dst, int32_t dst_stride,
+                                      int8_t *filter_horiz, int8_t *filter_vert,
+                                      int32_t height) {
+  int32_t multiple8_cnt;
+  for (multiple8_cnt = 4; multiple8_cnt--;) {
+    common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz,
+                              filter_vert, height);
+    src += 16;
+    dst += 16;
+  }
+}
+
+void vp9_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride,
+                       uint8_t *dst, ptrdiff_t dst_stride,
+                       const int16_t *filter_x, int32_t x_step_q4,
+                       const int16_t *filter_y, int32_t y_step_q4,
+                       int32_t w, int32_t h) {
+  int8_t cnt, filt_hor[8], filt_ver[8];
+
+  if (16 != x_step_q4 || 16 != y_step_q4) {
+    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+                    filter_x, x_step_q4, filter_y, y_step_q4,
+                    w, h);
+    return;
+  }
+
+  if (((const int32_t *)filter_x)[1] == 0x800000 &&
+      ((const int32_t *)filter_y)[1] == 0x800000) {
+    vp9_convolve_copy(src, src_stride, dst, dst_stride,
+                      filter_x, x_step_q4, filter_y, y_step_q4,
+                      w, h);
+    return;
+  }
+
+  for (cnt = 0; cnt < 8; ++cnt) {
+    filt_hor[cnt] = filter_x[cnt];
+    filt_ver[cnt] = filter_y[cnt];
+  }
+
+  if (((const int32_t *)filter_x)[0] == 0 &&
+      ((const int32_t *)filter_y)[0] == 0) {
+    switch (w) {
+      case 4:
+        common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 &filt_hor[3], &filt_ver[3], (int32_t)h);
+        break;
+      case 8:
+        common_hv_2ht_2vt_8w_msa(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 &filt_hor[3], &filt_ver[3], (int32_t)h);
+        break;
+      case 16:
+        common_hv_2ht_2vt_16w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  &filt_hor[3], &filt_ver[3], (int32_t)h);
+        break;
+      case 32:
+        common_hv_2ht_2vt_32w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  &filt_hor[3], &filt_ver[3], (int32_t)h);
+        break;
+      case 64:
+        common_hv_2ht_2vt_64w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  &filt_hor[3], &filt_ver[3], (int32_t)h);
+        break;
+      default:
+        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+                        filter_x, x_step_q4, filter_y, y_step_q4,
+                        w, h);
+        break;
+    }
+  } else if (((const int32_t *)filter_x)[0] == 0 ||
+             ((const int32_t *)filter_y)[0] == 0) {
+    vp9_convolve8_c(src, src_stride, dst, dst_stride,
+                    filter_x, x_step_q4, filter_y, y_step_q4,
+                    w, h);
+  } else {
+    switch (w) {
+      case 4:
+        common_hv_8ht_8vt_4w_msa(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filt_hor, filt_ver, (int32_t)h);
+        break;
+      case 8:
+        common_hv_8ht_8vt_8w_msa(src, (int32_t)src_stride,
+                                 dst, (int32_t)dst_stride,
+                                 filt_hor, filt_ver, (int32_t)h);
+        break;
+      case 16:
+        common_hv_8ht_8vt_16w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  filt_hor, filt_ver, (int32_t)h);
+        break;
+      case 32:
+        common_hv_8ht_8vt_32w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  filt_hor, filt_ver, (int32_t)h);
+        break;
+      case 64:
+        common_hv_8ht_8vt_64w_msa(src, (int32_t)src_stride,
+                                  dst, (int32_t)dst_stride,
+                                  filt_hor, filt_ver, (int32_t)h);
+        break;
+      default:
+        vp9_convolve8_c(src, src_stride, dst, dst_stride,
+                        filter_x, x_step_q4, filter_y, y_step_q4,
+                        w, h);
+        break;
+    }
+  }
+}
diff --git a/vp9/common/mips/msa/vp9_convolve_msa.h b/vp9/common/mips/msa/vp9_convolve_msa.h
index b74fd1b..b109a40 100644
--- a/vp9/common/mips/msa/vp9_convolve_msa.h
+++ b/vp9/common/mips/msa/vp9_convolve_msa.h
@@ -14,7 +14,7 @@
 #include "vp9/common/vp9_filter.h"
 #include "vp9/common/mips/msa/vp9_macros_msa.h"
 
-extern uint8_t mc_filt_mask_arr[16 * 3];
+extern const uint8_t mc_filt_mask_arr[16 * 3];
 
 #define HORIZ_8TAP_FILT(src, mask0, mask1, mask2, mask3,                   \
                         filt_h0, filt_h1, filt_h2, filt_h3) ({             \
diff --git a/vp9/common/vp9_blockd.c b/vp9/common/vp9_blockd.c
index 3cd9f44..05329e5 100644
--- a/vp9/common/vp9_blockd.c
+++ b/vp9/common/vp9_blockd.c
@@ -40,7 +40,7 @@
     const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
     foreach_transformed_block_visitor visit, void *arg) {
   const struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MB_MODE_INFO* mbmi = &xd->mi[0].src_mi->mbmi;
+  const MB_MODE_INFO* mbmi = &xd->mi[0]->mbmi;
   // block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
   // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
   // transform size varies per plane, look it up in a common way.
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 1240576..018a9c2 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -130,7 +130,6 @@
 } MB_MODE_INFO;
 
 typedef struct MODE_INFO {
-  struct MODE_INFO *src_mi;
   MB_MODE_INFO mbmi;
   b_mode_info bmi[4];
 } MODE_INFO;
@@ -191,7 +190,7 @@
 
   int mi_stride;
 
-  MODE_INFO *mi;
+  MODE_INFO **mi;
   MODE_INFO *left_mi;
   MODE_INFO *above_mi;
   MB_MODE_INFO *left_mbmi;
@@ -245,7 +244,7 @@
 
 static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
                                   const MACROBLOCKD *xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
   if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
     return DCT_DCT;
@@ -255,7 +254,7 @@
 
 static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
                                       const MACROBLOCKD *xd, int ib) {
-  const MODE_INFO *const mi = xd->mi[0].src_mi;
+  const MODE_INFO *const mi = xd->mi[0];
 
   if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
     return DCT_DCT;
diff --git a/vp9/common/vp9_debugmodes.c b/vp9/common/vp9_debugmodes.c
index e96bc4f..03fded0 100644
--- a/vp9/common/vp9_debugmodes.c
+++ b/vp9/common/vp9_debugmodes.c
@@ -35,7 +35,7 @@
     fprintf(file, "%c ", prefix);
     for (mi_col = 0; mi_col < cols; mi_col++) {
       fprintf(file, "%2d ",
-              *((int*) ((char *) (&mi->src_mi->mbmi) +
+              *((int*) ((char *) (&mi->mbmi) +
                                   member_offset)));
       mi++;
     }
@@ -64,7 +64,7 @@
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "S ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%2d ", mi->src_mi->mbmi.skip);
+      fprintf(mvs, "%2d ", mi->mbmi.skip);
       mi++;
     }
     fprintf(mvs, "\n");
@@ -78,8 +78,8 @@
   for (mi_row = 0; mi_row < rows; mi_row++) {
     fprintf(mvs, "V ");
     for (mi_col = 0; mi_col < cols; mi_col++) {
-      fprintf(mvs, "%4d:%4d ", mi->src_mi->mbmi.mv[0].as_mv.row,
-                               mi->src_mi->mbmi.mv[0].as_mv.col);
+      fprintf(mvs, "%4d:%4d ", mi->mbmi.mv[0].as_mv.row,
+                               mi->mbmi.mv[0].as_mv.col);
       mi++;
     }
     fprintf(mvs, "\n");
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index eee096f..3e8f774 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -216,7 +216,7 @@
 
 static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
                                          PLANE_TYPE type, int block_idx) {
-  const MODE_INFO *const mi = xd->mi[0].src_mi;
+  const MODE_INFO *const mi = xd->mi[0];
 
   if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
     return &vp9_default_scan_orders[tx_size];
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index e464d45..a226ff1 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -826,12 +826,12 @@
 // by mi_row, mi_col.
 // TODO(JBB): This function only works for yv12.
 void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
-                    MODE_INFO *mi, const int mode_info_stride,
+                    MODE_INFO **mi, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm) {
   int idx_32, idx_16, idx_8;
   const loop_filter_info_n *const lfi_n = &cm->lf_info;
-  MODE_INFO *mip = mi;
-  MODE_INFO *mip2 = mi;
+  MODE_INFO **mip = mi;
+  MODE_INFO **mip2 = mi;
 
   // These are offsets to the next mi in the 64x64 block. It is what gets
   // added to the mi ptr as we go through each loop. It helps us to avoid
@@ -859,28 +859,28 @@
                         cm->mi_cols - mi_col : MI_BLOCK_SIZE);
 
   vp9_zero(*lfm);
-  assert(mip != NULL);
+  assert(mip[0] != NULL);
 
   // TODO(jimbankoski): Try moving most of the following code into decode
   // loop and storing lfm in the mbmi structure so that we don't have to go
   // through the recursive loop structure multiple times.
-  switch (mip->mbmi.sb_type) {
+  switch (mip[0]->mbmi.sb_type) {
     case BLOCK_64X64:
-      build_masks(lfi_n, mip , 0, 0, lfm);
+      build_masks(lfi_n, mip[0] , 0, 0, lfm);
       break;
     case BLOCK_64X32:
-      build_masks(lfi_n, mip, 0, 0, lfm);
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
       mip2 = mip + mode_info_stride * 4;
       if (4 >= max_rows)
         break;
-      build_masks(lfi_n, mip2, 32, 8, lfm);
+      build_masks(lfi_n, mip2[0], 32, 8, lfm);
       break;
     case BLOCK_32X64:
-      build_masks(lfi_n, mip, 0, 0, lfm);
+      build_masks(lfi_n, mip[0], 0, 0, lfm);
       mip2 = mip + 4;
       if (4 >= max_cols)
         break;
-      build_masks(lfi_n, mip2, 4, 2, lfm);
+      build_masks(lfi_n, mip2[0], 4, 2, lfm);
       break;
     default:
       for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) {
@@ -890,23 +890,23 @@
         const int mi_32_row_offset = ((idx_32 >> 1) << 2);
         if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows)
           continue;
-        switch (mip->mbmi.sb_type) {
+        switch (mip[0]->mbmi.sb_type) {
           case BLOCK_32X32:
-            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
             break;
           case BLOCK_32X16:
-            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
             if (mi_32_row_offset + 2 >= max_rows)
               continue;
             mip2 = mip + mode_info_stride * 2;
-            build_masks(lfi_n, mip2, shift_y + 16, shift_uv + 4, lfm);
+            build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm);
             break;
           case BLOCK_16X32:
-            build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+            build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
             if (mi_32_col_offset + 2 >= max_cols)
               continue;
             mip2 = mip + 2;
-            build_masks(lfi_n, mip2, shift_y + 2, shift_uv + 1, lfm);
+            build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm);
             break;
           default:
             for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) {
@@ -920,29 +920,29 @@
               if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows)
                 continue;
 
-              switch (mip->mbmi.sb_type) {
+              switch (mip[0]->mbmi.sb_type) {
                 case BLOCK_16X16:
-                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                   break;
                 case BLOCK_16X8:
-                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                   if (mi_16_row_offset + 1 >= max_rows)
                     continue;
                   mip2 = mip + mode_info_stride;
-                  build_y_mask(lfi_n, mip2, shift_y+8, lfm);
+                  build_y_mask(lfi_n, mip2[0], shift_y+8, lfm);
                   break;
                 case BLOCK_8X16:
-                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                   if (mi_16_col_offset +1 >= max_cols)
                     continue;
                   mip2 = mip + 1;
-                  build_y_mask(lfi_n, mip2, shift_y+1, lfm);
+                  build_y_mask(lfi_n, mip2[0], shift_y+1, lfm);
                   break;
                 default: {
                   const int shift_y = shift_32_y[idx_32] +
                                       shift_16_y[idx_16] +
                                       shift_8_y[0];
-                  build_masks(lfi_n, mip, shift_y, shift_uv, lfm);
+                  build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm);
                   mip += offset[0];
                   for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) {
                     const int shift_y = shift_32_y[idx_32] +
@@ -956,7 +956,7 @@
                     if (mi_8_col_offset >= max_cols ||
                         mi_8_row_offset >= max_rows)
                       continue;
-                    build_y_mask(lfi_n, mip, shift_y, lfm);
+                    build_y_mask(lfi_n, mip[0], shift_y, lfm);
                   }
                   break;
                 }
@@ -1151,7 +1151,7 @@
 
 void vp9_filter_block_plane_non420(VP9_COMMON *cm,
                                    struct macroblockd_plane *plane,
-                                   MODE_INFO *mi_8x8,
+                                   MODE_INFO **mi_8x8,
                                    int mi_row, int mi_col) {
   const int ss_x = plane->subsampling_x;
   const int ss_y = plane->subsampling_y;
@@ -1175,7 +1175,7 @@
 
     // Determine the vertical edges that need filtering
     for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
-      const MODE_INFO *mi = mi_8x8[c].src_mi;
+      const MODE_INFO *mi = mi_8x8[c];
       const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
       const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
       // left edge of current unit is block/partition edge -> no skip
@@ -1545,7 +1545,7 @@
     path = LF_PATH_SLOW;
 
   for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
-    MODE_INFO *mi = cm->mi + mi_row * cm->mi_stride;
+    MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
       int plane;
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index ab3b179..f7cbde6 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -95,7 +95,7 @@
 // by mi_row, mi_col.
 void vp9_setup_mask(struct VP9Common *const cm,
                     const int mi_row, const int mi_col,
-                    MODE_INFO *mi_8x8, const int mode_info_stride,
+                    MODE_INFO **mi_8x8, const int mode_info_stride,
                     LOOP_FILTER_MASK *lfm);
 
 void vp9_filter_block_plane_ss00(struct VP9Common *const cm,
@@ -110,7 +110,7 @@
 
 void vp9_filter_block_plane_non420(struct VP9Common *cm,
                                    struct macroblockd_plane *plane,
-                                   MODE_INFO *mi_8x8,
+                                   MODE_INFO **mi_8x8,
                                    int mi_row, int mi_col);
 
 void vp9_loop_filter_init(struct VP9Common *cm);
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 52a7313..436be6c 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -37,7 +37,7 @@
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
       const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
-                                                   xd->mi_stride].src_mi;
+                                                   xd->mi_stride];
       const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
       // Keep counts for entropy encoding.
       context_counter += mode_2_counter[candidate->mode];
@@ -59,7 +59,7 @@
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
       const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
-                                                    xd->mi_stride].src_mi->mbmi;
+                                                    xd->mi_stride]->mbmi;
       different_ref_found = 1;
 
       if (candidate->ref_frame[0] == ref_frame)
@@ -101,7 +101,7 @@
       const POSITION *mv_ref = &mv_ref_search[i];
       if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
         const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
-                                              * xd->mi_stride].src_mi->mbmi;
+                                              * xd->mi_stride]->mbmi;
 
         // If the candidate is INTRA we don't want to consider its mv.
         IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
@@ -183,7 +183,7 @@
                                    int block, int ref, int mi_row, int mi_col,
                                    int_mv *nearest_mv, int_mv *near_mv) {
   int_mv mv_list[MAX_MV_REF_CANDIDATES];
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   b_mode_info *bmi = mi->bmi;
   int n;
 
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 62e4ee7..d722620 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -201,6 +201,12 @@
   void (*free_mi)(struct VP9Common *cm);
   void (*setup_mi)(struct VP9Common *cm);
 
+  // Grid of pointers to 8x8 MODE_INFO structs.  Any 8x8 not in the visible
+  // area will be NULL.
+  MODE_INFO **mi_grid_base;
+  MODE_INFO **mi_grid_visible;
+  MODE_INFO **prev_mi_grid_base;
+  MODE_INFO **prev_mi_grid_visible;
 
   // Whether to use previous frame's motion vectors for prediction.
   int use_prev_frame_mvs;
@@ -371,7 +377,7 @@
   xd->up_available    = (mi_row != 0);
   xd->left_available  = (mi_col > tile->mi_col_start);
   if (xd->up_available) {
-    xd->above_mi = xd->mi[-xd->mi_stride].src_mi;
+    xd->above_mi = xd->mi[-xd->mi_stride];
     xd->above_mbmi = xd->above_mi ? &xd->above_mi->mbmi : NULL;
   } else {
     xd->above_mi = NULL;
@@ -379,7 +385,7 @@
   }
 
   if (xd->left_available) {
-    xd->left_mi = xd->mi[-1].src_mi;
+    xd->left_mi = xd->mi[-1];
     xd->left_mbmi = xd->left_mi ? &xd->left_mi->mbmi : NULL;
   } else {
     xd->left_mi = NULL;
diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c
index fd735f4..0aac4a9 100644
--- a/vp9/common/vp9_pred_common.c
+++ b/vp9/common/vp9_pred_common.c
@@ -344,7 +344,7 @@
 // left of the entries corresponding to real blocks.
 // The prediction flags in these dummy entries are initialized to 0.
 int vp9_get_tx_size_context(const MACROBLOCKD *xd) {
-  const int max_tx_size = max_txsize_lookup[xd->mi[0].src_mi->mbmi.sb_type];
+  const int max_tx_size = max_txsize_lookup[xd->mi[0]->mbmi.sb_type];
   const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
   const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
   const int has_above = xd->up_available;
diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index c496299..11eaf2e 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -172,7 +172,7 @@
                                    int x, int y, int w, int h,
                                    int mi_x, int mi_y) {
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const MODE_INFO *mi = xd->mi[0].src_mi;
+  const MODE_INFO *mi = xd->mi[0];
   const int is_compound = has_second_ref(&mi->mbmi);
   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
   int ref;
@@ -246,7 +246,7 @@
     const int bw = 4 * num_4x4_w;
     const int bh = 4 * num_4x4_h;
 
-    if (xd->mi[0].src_mi->mbmi.sb_type < BLOCK_8X8) {
+    if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
       int i = 0, x, y;
       assert(bsize == BLOCK_8X8);
       for (y = 0; y < num_4x4_h; ++y)
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index e1aecd8..1589975 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -292,10 +292,10 @@
 specialize qw/vp9_convolve_avg neon dspr2/, "$sse2_x86inc";
 
 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2/, "$avx2_ssse3";
+specialize qw/vp9_convolve8 sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
 
 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2/, "$avx2_ssse3";
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
 
 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
 specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c
index 830a517..71bc740 100644
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -107,7 +107,7 @@
 
   for (mi_row = start; mi_row < stop;
        mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
-    MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
       const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 1a3b946..b6f51ab 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -304,7 +304,7 @@
   VP9_COMMON *const cm = args->cm;
   MACROBLOCKD *const xd = args->xd;
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   const PREDICTION_MODE mode = (plane == 0) ? get_y_mode(mi, block)
                                             : mi->mbmi.uv_mode;
   const int16_t *const dequant = (plane == 0) ? args->y_dequant
@@ -367,13 +367,12 @@
   const int offset = mi_row * cm->mi_stride + mi_col;
   int x, y;
 
-  xd->mi = cm->mi + offset;
-  xd->mi[0].src_mi = &xd->mi[0];  // Point to self.
-  xd->mi[0].mbmi.sb_type = bsize;
-
+  xd->mi = cm->mi_grid_visible + offset;
+  xd->mi[0] = &cm->mi[offset];
+  xd->mi[0]->mbmi.sb_type = bsize;
   for (y = 0; y < y_mis; ++y)
     for (x = !y; x < x_mis; ++x) {
-      xd->mi[y * cm->mi_stride + x].src_mi = &xd->mi[0];
+      xd->mi[y * cm->mi_stride + x] = xd->mi[0];
     }
 
   set_skip_context(xd, mi_row, mi_col);
@@ -383,7 +382,7 @@
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
   vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
-  return &xd->mi[0].mbmi;
+  return &xd->mi[0]->mbmi;
 }
 
 static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
@@ -1992,7 +1991,7 @@
   int plane;
   const int mi_x = mi_col * MI_SIZE;
   const int mi_y = mi_row * MI_SIZE;
-  const MODE_INFO *mi = xd->mi[0].src_mi;
+  const MODE_INFO *mi = xd->mi[0];
   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
   const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
   const int is_compound = has_second_ref(&mi->mbmi);
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 072f6b7..6cf4f1e 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -81,7 +81,7 @@
                             FRAME_COUNTS *counts,
                             int allow_select, vp9_reader *r) {
   TX_MODE tx_mode = cm->tx_mode;
-  BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
   if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
     return read_selected_tx_size(cm, xd, counts, max_tx_size, r);
@@ -145,7 +145,7 @@
 static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
                                  int mi_row, int mi_col, vp9_reader *r) {
   struct segmentation *const seg = &cm->seg;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   int predicted_segment_id, segment_id;
 
@@ -191,10 +191,10 @@
                                        MACROBLOCKD *const xd,
                                        FRAME_COUNTS *counts,
                                        int mi_row, int mi_col, vp9_reader *r) {
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const MODE_INFO *above_mi = xd->mi[-cm->mi_stride].src_mi;
-  const MODE_INFO *left_mi  = xd->left_available ? xd->mi[-1].src_mi : NULL;
+  const MODE_INFO *above_mi = xd->mi[-cm->mi_stride];
+  const MODE_INFO *left_mi  = xd->left_available ? xd->mi[-1] : NULL;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   int i;
 
@@ -566,7 +566,7 @@
                                        const TileInfo *const tile,
                                        int mi_row, int mi_col, vp9_reader *r) {
   VP9_COMMON *const cm = &pbi->common;
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   int inter_block;
 
@@ -588,7 +588,7 @@
                         const TileInfo *const tile,
                         int mi_row, int mi_col, vp9_reader *r) {
   VP9_COMMON *const cm = &pbi->common;
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
   const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
   const int x_mis = MIN(bw, cm->mi_cols - mi_col);
@@ -605,10 +605,10 @@
     MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
     for (w = 0; w < x_mis; ++w) {
       MV_REF *const mv = frame_mv + w;
-      mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
-      mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
-      mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
-      mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
     }
   }
 }
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 5480222..54921dc 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -49,6 +49,9 @@
 static void vp9_dec_setup_mi(VP9_COMMON *cm) {
   cm->mi = cm->mip + cm->mi_stride + 1;
   vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
+  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+  vpx_memset(cm->mi_grid_base, 0,
+             cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 }
 
 static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
@@ -56,12 +59,17 @@
   if (!cm->mip)
     return 1;
   cm->mi_alloc_size = mi_size;
+  cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*));
+  if (!cm->mi_grid_base)
+    return 1;
   return 0;
 }
 
 static void vp9_dec_free_mi(VP9_COMMON *cm) {
   vpx_free(cm->mip);
   cm->mip = NULL;
+  vpx_free(cm->mi_grid_base);
+  cm->mi_grid_base = NULL;
 }
 
 VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index fd40875..f0bb2ce 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -52,7 +52,7 @@
                         vp9_reader *r) {
   const int max_eob = 16 << (tx_size << 1);
   const FRAME_CONTEXT *const fc = cm->fc;
-  const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
+  const int ref = is_inter_block(&xd->mi[0]->mbmi);
   int band, c = 0;
   const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
       fc->coef_probs[tx_size][type][ref];
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index d67d1f4..2594ac2 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -78,8 +78,8 @@
 
 static void write_selected_tx_size(const VP9_COMMON *cm,
                                    const MACROBLOCKD *xd, vp9_writer *w) {
-  TX_SIZE tx_size = xd->mi[0].src_mi->mbmi.tx_size;
-  BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
+  TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
+  BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
   const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
                                                  &cm->fc->tx_probs);
@@ -201,7 +201,7 @@
 // This function encodes the reference frame
 static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              vp9_writer *w) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int is_compound = has_second_ref(mbmi);
   const int segment_id = mbmi->segment_id;
 
@@ -338,9 +338,9 @@
 }
 
 static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
-                              MODE_INFO *mi_8x8, vp9_writer *w) {
+                              MODE_INFO **mi_8x8, vp9_writer *w) {
   const struct segmentation *const seg = &cm->seg;
-  const MODE_INFO *const mi = mi_8x8;
+  const MODE_INFO *const mi = mi_8x8[0];
   const MODE_INFO *const above_mi = xd->above_mi;
   const MODE_INFO *const left_mi = xd->left_mi;
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -381,8 +381,8 @@
   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
   MODE_INFO *m;
 
-  xd->mi = cm->mi + (mi_row * cm->mi_stride + mi_col);
-  m = xd->mi;
+  xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
+  m = xd->mi[0];
 
   set_mi_row_col(xd, tile,
                  mi_row, num_8x8_blocks_high_lookup[m->mbmi.sb_type],
@@ -436,7 +436,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  m = cm->mi[mi_row * cm->mi_stride + mi_col].src_mi;
+  m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col];
 
   partition = partition_lookup[bsl][m->mbmi.sb_type];
   write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index cf67e11..6ac205a 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -203,7 +203,7 @@
   int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
   MV_REFERENCE_FRAME frame;
   MACROBLOCKD *filter_mbd = &mb->e_mbd;
-  MB_MODE_INFO *mbmi = &filter_mbd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &filter_mbd->mi[0]->mbmi;
   MB_MODE_INFO saved_mbmi;
   int i, j;
   struct buf_2d saved_dst[MAX_MB_PLANE];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index d428175..4d1964d 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -172,8 +172,8 @@
                                          int mi_row,
                                          int mi_col) {
   const int idx_str = xd->mi_stride * mi_row + mi_col;
-  xd->mi = cm->mi + idx_str;
-  xd->mi[0].src_mi = &xd->mi[0];
+  xd->mi = cm->mi_grid_visible + idx_str;
+  xd->mi[0] = cm->mi + idx_str;
 }
 
 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
@@ -190,7 +190,7 @@
 
   set_mode_info_offsets(cm, xd, mi_row, mi_col);
 
-  mbmi = &xd->mi[0].src_mi->mbmi;
+  mbmi = &xd->mi[0]->mbmi;
 
   // Set up destination pointers.
   vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
@@ -239,7 +239,7 @@
   for (j = 0; j < block_height; ++j)
     for (i = 0; i < block_width; ++i) {
       if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols)
-        xd->mi[j * xd->mi_stride + i].src_mi = &xd->mi[0];
+        xd->mi[j * xd->mi_stride + i] = xd->mi[0];
     }
 }
 
@@ -249,7 +249,7 @@
                            BLOCK_SIZE bsize) {
   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
     set_mode_info_offsets(&cpi->common, xd, mi_row, mi_col);
-    xd->mi[0].src_mi->mbmi.sb_type = bsize;
+    xd->mi[0]->mbmi.sb_type = bsize;
   }
 }
 
@@ -432,11 +432,12 @@
 
     // Check vertical split.
     if (mi_row + block_height / 2 < cm->mi_rows) {
+      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
       get_variance(&vt.part_variances->vert[0]);
       get_variance(&vt.part_variances->vert[1]);
       if (vt.part_variances->vert[0].variance < threshold &&
-          vt.part_variances->vert[1].variance < threshold) {
-        BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
+          vt.part_variances->vert[1].variance < threshold &&
+          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
         set_block_size(cpi, xd, mi_row, mi_col, subsize);
         set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize);
         return 1;
@@ -444,11 +445,12 @@
     }
     // Check horizontal split.
     if (mi_col + block_width / 2 < cm->mi_cols) {
+      BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
       get_variance(&vt.part_variances->horz[0]);
       get_variance(&vt.part_variances->horz[1]);
       if (vt.part_variances->horz[0].variance < threshold &&
-          vt.part_variances->horz[1].variance < threshold) {
-        BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
+          vt.part_variances->horz[1].variance < threshold &&
+          get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
         set_block_size(cpi, xd, mi_row, mi_col, subsize);
         set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize);
         return 1;
@@ -691,7 +693,7 @@
   sp = x->plane[0].src.stride;
 
   if (!is_key_frame) {
-    MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+    MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
     unsigned int uv_sad;
     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
 
@@ -955,8 +957,8 @@
   struct macroblock_plane *const p = x->plane;
   struct macroblockd_plane *const pd = xd->plane;
   MODE_INFO *mi = &ctx->mic;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
-  MODE_INFO *mi_addr = &xd->mi[0];
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  MODE_INFO *mi_addr = xd->mi[0];
   const struct segmentation *const seg = &cm->seg;
   const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
   const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
@@ -974,7 +976,6 @@
   assert(mi->mbmi.sb_type == bsize);
 
   *mi_addr = *mi;
-  mi_addr->src_mi = mi_addr;
 
   // If segmentation in use
   if (seg->enabled) {
@@ -988,7 +989,7 @@
     // Else for cyclic refresh mode update the segment map, set the segment id
     // and then update the quantizer.
     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
-      vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0].src_mi->mbmi, mi_row,
+      vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
                                         mi_col, bsize, ctx->rate, ctx->dist,
                                         x->skip);
     }
@@ -1015,7 +1016,7 @@
     for (x_idx = 0; x_idx < mi_width; x_idx++)
       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
         && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
-        xd->mi[x_idx + y * mis].src_mi = mi_addr;
+        xd->mi[x_idx + y * mis] = mi_addr;
       }
 
   if (cpi->oxcf.aq_mode)
@@ -1088,10 +1089,10 @@
     MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
     for (w = 0; w < x_mis; ++w) {
       MV_REF *const mv = frame_mv + w;
-      mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
-      mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
-      mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
-      mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+      mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+      mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+      mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+      mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
     }
   }
 }
@@ -1114,13 +1115,13 @@
 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
                                    RD_COST *rd_cost, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   INTERP_FILTER filter_ref;
 
   if (xd->up_available)
-    filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
+    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
   else if (xd->left_available)
-    filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter;
+    filter_ref = xd->mi[-1]->mbmi.interp_filter;
   else
     filter_ref = EIGHTTAP;
 
@@ -1135,7 +1136,7 @@
   mbmi->mv[0].as_int = 0;
   mbmi->interp_filter = filter_ref;
 
-  xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = 0;
+  xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
   x->skip = 1;
 
   vp9_rd_cost_init(rd_cost);
@@ -1174,7 +1175,7 @@
   x->use_lp32x32fdct = 1;
 
   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-  mbmi = &xd->mi[0].src_mi->mbmi;
+  mbmi = &xd->mi[0]->mbmi;
   mbmi->sb_type = bsize;
 
   for (i = 0; i < MAX_MB_PLANE; ++i) {
@@ -1274,7 +1275,7 @@
 static void update_stats(VP9_COMMON *cm, ThreadData *td) {
   const MACROBLOCK *x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
-  const MODE_INFO *const mi = xd->mi[0].src_mi;
+  const MODE_INFO *const mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
   const BLOCK_SIZE bsize = mbmi->sb_type;
 
@@ -1504,15 +1505,15 @@
 
 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis,
     int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining,
-    BLOCK_SIZE bsize, MODE_INFO *mi_8x8) {
+    BLOCK_SIZE bsize, MODE_INFO **mi_8x8) {
   int bh = bh_in;
   int r, c;
   for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
     int bw = bw_in;
     for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
       const int index = r * mis + c;
-      mi_8x8[index].src_mi = mi + index;
-      mi_8x8[index].src_mi->mbmi.sb_type = find_partition_size(bsize,
+      mi_8x8[index] = mi + index;
+      mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
           row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
     }
   }
@@ -1524,7 +1525,7 @@
 // may not be allowed in which case this code attempts to choose the largest
 // allowable partition.
 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
-                                   MODE_INFO *mi_8x8, int mi_row, int mi_col,
+                                   MODE_INFO **mi_8x8, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize) {
   VP9_COMMON *const cm = &cpi->common;
   const int mis = cm->mi_stride;
@@ -1543,8 +1544,8 @@
     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
         int index = block_row * mis + block_col;
-        mi_8x8[index].src_mi = mi_upper_left + index;
-        mi_8x8[index].src_mi->mbmi.sb_type = bsize;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = bsize;
       }
     }
   } else {
@@ -1571,7 +1572,7 @@
 static void set_source_var_based_partition(VP9_COMP *cpi,
                                            const TileInfo *const tile,
                                            MACROBLOCK *const x,
-                                           MODE_INFO *mi_8x8,
+                                           MODE_INFO **mi_8x8,
                                            int mi_row, int mi_col) {
   VP9_COMMON *const cm = &cpi->common;
   const int mis = cm->mi_stride;
@@ -1608,8 +1609,8 @@
         d16[j] = cpi->source_diff_var + offset + boffset;
 
         index = b_mi_row * mis + b_mi_col;
-        mi_8x8[index].src_mi = mi_upper_left + index;
-        mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_16X16;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
 
         // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
         // size to further improve quality.
@@ -1630,8 +1631,8 @@
         d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
 
         index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
-        mi_8x8[index].src_mi = mi_upper_left + index;
-        mi_8x8[index].src_mi->mbmi.sb_type = BLOCK_32X32;
+        mi_8x8[index] = mi_upper_left + index;
+        mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
       }
     }
 
@@ -1642,8 +1643,8 @@
 
       // Use 64x64 partition
       if (is_larger_better) {
-        mi_8x8[0].src_mi = mi_upper_left;
-        mi_8x8[0].src_mi->mbmi.sb_type = BLOCK_64X64;
+        mi_8x8[0] = mi_upper_left;
+        mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
       }
     }
   } else {   // partial in-image SB64
@@ -1660,16 +1661,15 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mi = xd->mi[0].src_mi;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MODE_INFO *const mi = xd->mi[0];
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
   const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
   const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
   const int x_mis = MIN(bw, cm->mi_cols - mi_col);
   const int y_mis = MIN(bh, cm->mi_rows - mi_row);
 
-  xd->mi[0] = ctx->mic;
-  xd->mi[0].src_mi = &xd->mi[0];
+  *(xd->mi[0]) = ctx->mic;
 
   if (seg->enabled && cpi->oxcf.aq_mode) {
     // For in frame complexity AQ or variance AQ, copy segment_id from
@@ -1709,10 +1709,10 @@
       MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
       for (w = 0; w < x_mis; ++w) {
         MV_REF *const mv = frame_mv + w;
-        mv->ref_frame[0] = mi->src_mi->mbmi.ref_frame[0];
-        mv->ref_frame[1] = mi->src_mi->mbmi.ref_frame[1];
-        mv->mv[0].as_int = mi->src_mi->mbmi.mv[0].as_int;
-        mv->mv[1].as_int = mi->src_mi->mbmi.mv[1].as_int;
+        mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+        mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+        mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+        mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
       }
     }
   }
@@ -1764,9 +1764,9 @@
 
   if (bsize >= BLOCK_8X8) {
     const int idx_str = xd->mi_stride * mi_row + mi_col;
-    MODE_INFO *mi_8x8 = cm->mi[idx_str].src_mi;
+    MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
-    subsize = mi_8x8[0].src_mi->mbmi.sb_type;
+    subsize = mi_8x8[0]->mbmi.sb_type;
   } else {
     ctx = 0;
     subsize = BLOCK_4X4;
@@ -1820,7 +1820,7 @@
 static void rd_use_partition(VP9_COMP *cpi,
                              ThreadData *td,
                              TileDataEnc *tile_data,
-                             MODE_INFO *mi_8x8, TOKENEXTRA **tp,
+                             MODE_INFO **mi_8x8, TOKENEXTRA **tp,
                              int mi_row, int mi_col,
                              BLOCK_SIZE bsize,
                              int *rate, int64_t *dist,
@@ -1841,7 +1841,7 @@
   RD_COST last_part_rdc, none_rdc, chosen_rdc;
   BLOCK_SIZE sub_subsize = BLOCK_4X4;
   int splits_below = 0;
-  BLOCK_SIZE bs_type = mi_8x8[0].src_mi->mbmi.sb_type;
+  BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
   int do_partition_search = 1;
   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
 
@@ -1875,7 +1875,7 @@
       splits_below = 1;
       for (i = 0; i < 4; i++) {
         int jj = i >> 1, ii = i & 0x01;
-        MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss].src_mi;
+        MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
         if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
           splits_below = 0;
         }
@@ -1900,7 +1900,7 @@
       }
 
       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-      mi_8x8[0].src_mi->mbmi.sb_type = bs_type;
+      mi_8x8[0]->mbmi.sb_type = bs_type;
       pc_tree->partitioning = partition;
     }
   }
@@ -2058,7 +2058,7 @@
 
   // If last_part is better set the partitioning to that.
   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
-    mi_8x8[0].src_mi->mbmi.sb_type = bsize;
+    mi_8x8[0]->mbmi.sb_type = bsize;
     if (bsize >= BLOCK_8X8)
       pc_tree->partitioning = partition;
     chosen_rdc = last_part_rdc;
@@ -2110,7 +2110,7 @@
 //
 // The min and max are assumed to have been initialized prior to calling this
 // function so repeat calls can accumulate a min and max of more than one sb64.
-static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO *mi_8x8,
+static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
                                         BLOCK_SIZE *min_block_size,
                                         BLOCK_SIZE *max_block_size,
                                         int bs_hist[BLOCK_SIZES]) {
@@ -2122,7 +2122,7 @@
   // Check the sb_type for each block that belongs to this region.
   for (i = 0; i < sb_height_in_blocks; ++i) {
     for (j = 0; j < sb_width_in_blocks; ++j) {
-      MODE_INFO *mi = mi_8x8[index+j].src_mi;
+      MODE_INFO *mi = mi_8x8[index+j];
       BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
       bs_hist[sb_type]++;
       *min_block_size = MIN(*min_block_size, sb_type);
@@ -2149,9 +2149,9 @@
                                     BLOCK_SIZE *min_block_size,
                                     BLOCK_SIZE *max_block_size) {
   VP9_COMMON *const cm = &cpi->common;
-  MODE_INFO *mi = xd->mi[0].src_mi;
-  const int left_in_image = xd->left_available && mi[-1].src_mi;
-  const int above_in_image = xd->up_available && mi[-xd->mi_stride].src_mi;
+  MODE_INFO **mi = xd->mi;
+  const int left_in_image = xd->left_available && mi[-1];
+  const int above_in_image = xd->up_available && mi[-xd->mi_stride];
   const int row8x8_remaining = tile->mi_row_end - mi_row;
   const int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
@@ -2170,20 +2170,19 @@
     // passed in values for min and max as a starting point.
     // Find the min and max partition used in previous frame at this location
     if (cm->frame_type != KEY_FRAME) {
-      MODE_INFO *prev_mi =
-          cm->prev_mip + cm->mi_stride + 1 + mi_row * xd->mi_stride + mi_col;
-
+      MODE_INFO **prev_mi =
+          &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
       get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
     }
     // Find the min and max partition sizes used in the left SB64
     if (left_in_image) {
-      MODE_INFO *left_sb64_mi = mi[-MI_BLOCK_SIZE].src_mi;
+      MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
       get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
                                   bs_hist);
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
-      MODE_INFO *above_sb64_mi = mi[-xd->mi_stride * MI_BLOCK_SIZE].src_mi;
+      MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
       get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
                                   bs_hist);
     }
@@ -2245,10 +2244,9 @@
                                  BLOCK_SIZE *min_block_size,
                                  BLOCK_SIZE *max_block_size) {
   VP9_COMMON *const cm = &cpi->common;
-  MODE_INFO *mi_8x8 = xd->mi;
-  const int left_in_image = xd->left_available && mi_8x8[-1].src_mi;
-  const int above_in_image = xd->up_available &&
-                             mi_8x8[-xd->mi_stride].src_mi;
+  MODE_INFO **mi_8x8 = xd->mi;
+  const int left_in_image = xd->left_available && mi_8x8[-1];
+  const int above_in_image = xd->up_available && mi_8x8[-xd->mi_stride];
   int row8x8_remaining = tile->mi_row_end - mi_row;
   int col8x8_remaining = tile->mi_col_end - mi_col;
   int bh, bw;
@@ -2261,15 +2259,15 @@
   if (search_range_ctrl &&
       (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
     int block;
-    MODE_INFO *mi;
+    MODE_INFO **mi;
     BLOCK_SIZE sb_type;
 
     // Find the min and max partition sizes used in the left SB64.
     if (left_in_image) {
       MODE_INFO *cur_mi;
-      mi = mi_8x8[-1].src_mi;
+      mi = &mi_8x8[-1];
       for (block = 0; block < MI_BLOCK_SIZE; ++block) {
-        cur_mi = mi[block * xd->mi_stride].src_mi;
+        cur_mi = mi[block * xd->mi_stride];
         sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
         min_size = MIN(min_size, sb_type);
         max_size = MAX(max_size, sb_type);
@@ -2277,9 +2275,9 @@
     }
     // Find the min and max partition sizes used in the above SB64.
     if (above_in_image) {
-      mi = mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE].src_mi;
+      mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
       for (block = 0; block < MI_BLOCK_SIZE; ++block) {
-        sb_type = mi[block].src_mi ? mi[block].src_mi->mbmi.sb_type : 0;
+        sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
         min_size = MIN(min_size, sb_type);
         max_size = MAX(max_size, sb_type);
       }
@@ -2310,9 +2308,7 @@
 
   MODE_INFO *mi;
   const int idx_str = cm->mi_stride * mi_row + mi_col;
-  MODE_INFO *prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
-
-
+  MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
   BLOCK_SIZE bs, min_size, max_size;
 
   min_size = BLOCK_64X64;
@@ -2321,7 +2317,7 @@
   if (prev_mi) {
     for (idy = 0; idy < mi_height; ++idy) {
       for (idx = 0; idx < mi_width; ++idx) {
-        mi = prev_mi[idy * cm->mi_stride + idx].src_mi;
+        mi = prev_mi[idy * cm->mi_stride + idx];
         bs = mi ? mi->mbmi.sb_type : bsize;
         min_size = MIN(min_size, bs);
         max_size = MAX(max_size, bs);
@@ -2331,7 +2327,7 @@
 
   if (xd->left_available) {
     for (idy = 0; idy < mi_height; ++idy) {
-      mi = xd->mi[idy * cm->mi_stride - 1].src_mi;
+      mi = xd->mi[idy * cm->mi_stride - 1];
       bs = mi ? mi->mbmi.sb_type : bsize;
       min_size = MIN(min_size, bs);
       max_size = MAX(max_size, bs);
@@ -2340,7 +2336,7 @@
 
   if (xd->up_available) {
     for (idx = 0; idx < mi_width; ++idx) {
-      mi = xd->mi[idx - cm->mi_stride].src_mi;
+      mi = xd->mi[idx - cm->mi_stride];
       bs = mi ? mi->mbmi.sb_type : bsize;
       min_size = MIN(min_size, bs);
       max_size = MAX(max_size, bs);
@@ -2850,7 +2846,7 @@
     int seg_skip = 0;
 
     const int idx_str = cm->mi_stride * mi_row + mi_col;
-    MODE_INFO *mi = cm->mi + idx_str;
+    MODE_INFO **mi = cm->mi_grid_visible + idx_str;
 
     if (sf->adaptive_pred_interp_filter) {
       for (i = 0; i < 64; ++i)
@@ -2942,12 +2938,12 @@
 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
   int mi_row, mi_col;
   const int mis = cm->mi_stride;
-  MODE_INFO *mi_ptr = cm->mi;
+  MODE_INFO **mi_ptr = cm->mi_grid_visible;
 
   for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
     for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
-      if (mi_ptr[mi_col].src_mi->mbmi.tx_size > max_tx_size)
-        mi_ptr[mi_col].src_mi->mbmi.tx_size = max_tx_size;
+      if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size)
+        mi_ptr[mi_col]->mbmi.tx_size = max_tx_size;
     }
   }
 }
@@ -2997,7 +2993,7 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *mbmi;
   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
-  mbmi = &xd->mi[0].src_mi->mbmi;
+  mbmi = &xd->mi[0]->mbmi;
   mbmi->sb_type = bsize;
 
   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
@@ -3041,27 +3037,27 @@
   switch (partition) {
     case PARTITION_NONE:
       set_mode_info_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0].src_mi) = pc_tree->none.mic;
+      *(xd->mi[0]) = pc_tree->none.mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
       break;
     case PARTITION_VERT:
       set_mode_info_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0].src_mi) = pc_tree->vertical[0].mic;
+      *(xd->mi[0]) = pc_tree->vertical[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
 
       if (mi_col + hbs < cm->mi_cols) {
         set_mode_info_offsets(cm, xd, mi_row, mi_col + hbs);
-        *(xd->mi[0].src_mi) = pc_tree->vertical[1].mic;
+        *(xd->mi[0]) = pc_tree->vertical[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
       }
       break;
     case PARTITION_HORZ:
       set_mode_info_offsets(cm, xd, mi_row, mi_col);
-      *(xd->mi[0].src_mi) = pc_tree->horizontal[0].mic;
+      *(xd->mi[0]) = pc_tree->horizontal[0].mic;
       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
       if (mi_row + hbs < cm->mi_rows) {
         set_mode_info_offsets(cm, xd, mi_row + hbs, mi_col);
-        *(xd->mi[0].src_mi) = pc_tree->horizontal[1].mic;
+        *(xd->mi[0]) = pc_tree->horizontal[1].mic;
         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
       }
       break;
@@ -3161,7 +3157,7 @@
   if (partition_none_allowed) {
     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col,
                         &this_rdc, bsize, ctx);
-    ctx->mic.mbmi = xd->mi[0].src_mi->mbmi;
+    ctx->mic.mbmi = xd->mi[0]->mbmi;
     ctx->skip_txfm[0] = x->skip_txfm[0];
     ctx->skip = x->skip;
 
@@ -3243,7 +3239,7 @@
     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
                         &pc_tree->horizontal[0]);
 
-    pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+    pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
     pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->horizontal[0].skip = x->skip;
 
@@ -3254,7 +3250,7 @@
                           &this_rdc, subsize,
                           &pc_tree->horizontal[1]);
 
-      pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+      pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
       pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[1].skip = x->skip;
 
@@ -3286,7 +3282,7 @@
     pc_tree->vertical[0].pred_pixel_ready = 1;
     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
                         &pc_tree->vertical[0]);
-    pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+    pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
     pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
     pc_tree->vertical[0].skip = x->skip;
 
@@ -3296,7 +3292,7 @@
       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms,
                           &this_rdc, subsize,
                           &pc_tree->vertical[1]);
-      pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+      pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
       pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[1].skip = x->skip;
 
@@ -3348,7 +3344,7 @@
 static void nonrd_select_partition(VP9_COMP *cpi,
                                    ThreadData *td,
                                    TileDataEnc *tile_data,
-                                   MODE_INFO *mi,
+                                   MODE_INFO **mi,
                                    TOKENEXTRA **tp,
                                    int mi_row, int mi_col,
                                    BLOCK_SIZE bsize, int output_enabled,
@@ -3367,7 +3363,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4;
+  subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
   partition = partition_lookup[bsl][subsize];
 
   if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
@@ -3387,7 +3383,7 @@
         pc_tree->none.pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->none);
-        pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
+        pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
         pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
         pc_tree->none.skip = x->skip;
         break;
@@ -3395,14 +3391,14 @@
         pc_tree->vertical[0].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->vertical[0]);
-        pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+        pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
         pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->vertical[0].skip = x->skip;
         if (mi_col + hbs < cm->mi_cols) {
           pc_tree->vertical[1].pred_pixel_ready = 1;
           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
                               &this_rdc, subsize, &pc_tree->vertical[1]);
-          pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+          pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
           pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
           pc_tree->vertical[1].skip = x->skip;
           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
@@ -3416,14 +3412,14 @@
         pc_tree->horizontal[0].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
                             subsize, &pc_tree->horizontal[0]);
-        pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+        pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
         pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->horizontal[0].skip = x->skip;
         if (mi_row + hbs < cm->mi_rows) {
           pc_tree->horizontal[1].pred_pixel_ready = 1;
           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
                               &this_rdc, subsize, &pc_tree->horizontal[1]);
-          pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+          pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
           pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
           pc_tree->horizontal[1].skip = x->skip;
           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
@@ -3477,7 +3473,7 @@
 static void nonrd_use_partition(VP9_COMP *cpi,
                                 ThreadData *td,
                                 TileDataEnc *tile_data,
-                                MODE_INFO *mi,
+                                MODE_INFO **mi,
                                 TOKENEXTRA **tp,
                                 int mi_row, int mi_col,
                                 BLOCK_SIZE bsize, int output_enabled,
@@ -3494,7 +3490,7 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4;
+  subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
   partition = partition_lookup[bsl][subsize];
 
   if (output_enabled && bsize != BLOCK_4X4) {
@@ -3507,7 +3503,7 @@
       pc_tree->none.pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
                           subsize, &pc_tree->none);
-      pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
+      pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
       pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
       pc_tree->none.skip = x->skip;
       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
@@ -3517,7 +3513,7 @@
       pc_tree->vertical[0].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
                           subsize, &pc_tree->vertical[0]);
-      pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+      pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
       pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->vertical[0].skip = x->skip;
       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
@@ -3526,7 +3522,7 @@
         pc_tree->vertical[1].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
                             dummy_cost, subsize, &pc_tree->vertical[1]);
-        pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+        pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
         pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->vertical[1].skip = x->skip;
         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
@@ -3537,7 +3533,7 @@
       pc_tree->horizontal[0].pred_pixel_ready = 1;
       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
                           subsize, &pc_tree->horizontal[0]);
-      pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+      pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
       pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
       pc_tree->horizontal[0].skip = x->skip;
       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
@@ -3547,7 +3543,7 @@
         pc_tree->horizontal[1].pred_pixel_ready = 1;
         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
                             dummy_cost, subsize, &pc_tree->horizontal[1]);
-        pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+        pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
         pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
         pc_tree->horizontal[1].skip = x->skip;
         encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
@@ -3607,7 +3603,7 @@
     const struct segmentation *const seg = &cm->seg;
     RD_COST dummy_rdc;
     const int idx_str = cm->mi_stride * mi_row + mi_col;
-    MODE_INFO *mi = cm->mi + idx_str;
+    MODE_INFO **mi = cm->mi_grid_visible + idx_str;
     PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
     BLOCK_SIZE bsize = BLOCK_64X64;
     int seg_skip = 0;
@@ -3653,7 +3649,7 @@
       case REFERENCE_PARTITION:
         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
         if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
-            xd->mi[0].src_mi->mbmi.segment_id) {
+            xd->mi[0]->mbmi.segment_id) {
           x->max_partition_size = BLOCK_64X64;
           x->min_partition_size = BLOCK_8X8;
           nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
@@ -3898,8 +3894,8 @@
   MACROBLOCKD *const xd = &x->e_mbd;
   RD_COUNTS *const rdc = &cpi->td.rd_counts;
 
-  xd->mi = cm->mi;
-  xd->mi[0].src_mi = &xd->mi[0];
+  xd->mi = cm->mi_grid_visible;
+  xd->mi[0] = cm->mi;
 
   vp9_zero(*td->counts);
   vp9_zero(rdc->coef_counts);
@@ -4176,8 +4172,8 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *mi_8x8 = xd->mi;
-  MODE_INFO *mi = mi_8x8;
+  MODE_INFO **mi_8x8 = xd->mi;
+  MODE_INFO *mi = mi_8x8[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
   const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                              SEG_LVL_SKIP);
@@ -4251,7 +4247,7 @@
       for (y = 0; y < mi_height; y++)
         for (x = 0; x < mi_width; x++)
           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
-            mi_8x8[mis * y + x].src_mi->mbmi.tx_size = tx_size;
+            mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
     }
     ++td->counts->tx.tx_totals[mbmi->tx_size];
     ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 65e2997..e6afd47 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -128,7 +128,7 @@
   MACROBLOCKD *const xd = &mb->e_mbd;
   struct macroblock_plane *const p = &mb->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
-  const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
+  const int ref = is_inter_block(&xd->mi[0]->mbmi);
   vp9_token_state tokens[1025][2];
   unsigned best_index[1025][2];
   uint8_t token_cache[1024];
@@ -773,7 +773,7 @@
 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   struct optimize_ctx ctx;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct encode_b_args arg = {x, &ctx, &mbmi->skip};
   int plane;
 
@@ -803,7 +803,7 @@
   struct encode_b_args* const args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[plane];
   struct macroblockd_plane *const pd = &xd->plane[plane];
   tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block);
@@ -895,7 +895,7 @@
       case TX_4X4:
         tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
         scan_order = &vp9_scan_orders[TX_4X4][tx_type];
-        mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode;
+        mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
         vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
                                 x->skip_encode ? src : dst,
                                 x->skip_encode ? src_stride : dst_stride,
@@ -998,7 +998,7 @@
     case TX_4X4:
       tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
       scan_order = &vp9_scan_orders[TX_4X4][tx_type];
-      mode = plane == 0 ? get_y_mode(xd->mi[0].src_mi, block) : mbmi->uv_mode;
+      mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode;
       vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
                               x->skip_encode ? src : dst,
                               x->skip_encode ? src_stride : dst_stride,
@@ -1037,7 +1037,7 @@
 
 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
   const MACROBLOCKD *const xd = &x->e_mbd;
-  struct encode_b_args arg = {x, NULL, &xd->mi[0].src_mi->mbmi.skip};
+  struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip};
 
   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
                                          vp9_encode_block_intra, &arg);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index f2c4efc..af73fcb 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -243,7 +243,7 @@
 
 void vp9_update_mv_count(ThreadData *td) {
   const MACROBLOCKD *xd = &td->mb.e_mbd;
-  const MODE_INFO *mi = xd->mi[0].src_mi;
+  const MODE_INFO *mi = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
   if (mbmi->sb_type < BLOCK_8X8) {
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index c6bc6aa..bc09a16 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -258,6 +258,12 @@
   // Clear left border column
   for (i = 1; i < cm->mi_rows + 1; ++i)
     vpx_memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
+
+  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
+
+  vpx_memset(cm->mi_grid_base, 0,
+             cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
 }
 
 static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
@@ -268,6 +274,14 @@
   if (!cm->prev_mip)
     return 1;
   cm->mi_alloc_size = mi_size;
+
+  cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*));
+  if (!cm->mi_grid_base)
+    return 1;
+  cm->prev_mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*));
+  if (!cm->prev_mi_grid_base)
+    return 1;
+
   return 0;
 }
 
@@ -276,10 +290,15 @@
   cm->mip = NULL;
   vpx_free(cm->prev_mip);
   cm->prev_mip = NULL;
+  vpx_free(cm->mi_grid_base);
+  cm->mi_grid_base = NULL;
+  vpx_free(cm->prev_mi_grid_base);
+  cm->prev_mi_grid_base = NULL;
 }
 
 static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
   // Current mip will be the prev_mip for the next frame.
+  MODE_INFO **temp_base = cm->prev_mi_grid_base;
   MODE_INFO *temp = cm->prev_mip;
   cm->prev_mip = cm->mip;
   cm->mip = temp;
@@ -287,6 +306,11 @@
   // Update the upper left visible macroblock ptrs.
   cm->mi = cm->mip + cm->mi_stride + 1;
   cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
+
+  cm->prev_mi_grid_base = cm->mi_grid_base;
+  cm->mi_grid_base = temp_base;
+  cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1;
+  cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1;
 }
 
 void vp9_initialize_enc(void) {
@@ -567,15 +591,15 @@
 
 static void update_reference_segmentation_map(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
-  MODE_INFO *mi_8x8_ptr = cm->mi;
+  MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible;
   uint8_t *cache_ptr = cm->last_frame_seg_map;
   int row, col;
 
   for (row = 0; row < cm->mi_rows; row++) {
-    MODE_INFO *mi_8x8 = mi_8x8_ptr;
+    MODE_INFO **mi_8x8 = mi_8x8_ptr;
     uint8_t *cache = cache_ptr;
     for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++)
-      cache[0] = mi_8x8[0].src_mi->mbmi.segment_id;
+      cache[0] = mi_8x8[0]->mbmi.segment_id;
     mi_8x8_ptr += cm->mi_stride;
     cache_ptr += cm->mi_cols;
   }
@@ -1618,23 +1642,18 @@
 #if CONFIG_INTERNAL_STATS
   cpi->b_calculate_ssimg = 0;
   cpi->b_calculate_blockiness = 1;
-
+  cpi->b_calculate_consistency = 1;
+  cpi->total_inconsistency = 0;
+  cpi->psnr.worst = 100.0;
+  cpi->worst_ssim = 100.0;
 
   cpi->count = 0;
   cpi->bytes = 0;
 
   if (cpi->b_calculate_psnr) {
-    cpi->total_y = 0.0;
-    cpi->total_u = 0.0;
-    cpi->total_v = 0.0;
-    cpi->total = 0.0;
     cpi->total_sq_error = 0;
     cpi->total_samples = 0;
 
-    cpi->totalp_y = 0.0;
-    cpi->totalp_u = 0.0;
-    cpi->totalp_v = 0.0;
-    cpi->totalp = 0.0;
     cpi->totalp_sq_error = 0;
     cpi->totalp_samples = 0;
 
@@ -1646,27 +1665,20 @@
   }
 
   if (cpi->b_calculate_ssimg) {
-    cpi->total_ssimg_y = 0;
-    cpi->total_ssimg_u = 0;
-    cpi->total_ssimg_v = 0;
-    cpi->total_ssimg_all = 0;
+    cpi->ssimg.worst= 100.0;
   }
-  cpi->total_fastssim_y = 0;
-  cpi->total_fastssim_u = 0;
-  cpi->total_fastssim_v = 0;
-  cpi->total_fastssim_all = 0;
+  cpi->fastssim.worst = 100.0;
 
-  cpi->total_psnrhvs_y = 0;
-  cpi->total_psnrhvs_u = 0;
-  cpi->total_psnrhvs_v = 0;
-  cpi->total_psnrhvs_all = 0;
+  cpi->psnrhvs.worst = 100.0;
 
   if (cpi->b_calculate_blockiness) {
     cpi->total_blockiness = 0;
+    cpi->worst_blockiness = 0.0;
   }
 
-  if (cpi->b_calculate_blockiness) {
-    cpi->total_blockiness = 0;
+  if (cpi->b_calculate_consistency) {
+    cpi->ssim_vars = vpx_malloc(sizeof(*cpi->ssim_vars)*720*480);
+    cpi->worst_consistency = 100.0;
   }
 
 #endif
@@ -1865,6 +1877,11 @@
 
   return cpi;
 }
+#define SNPRINT(H, T) \
+  snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T))
+
+#define SNPRINT2(H, T, V) \
+  snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V))
 
 void vp9_remove_compressor(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
@@ -1878,8 +1895,9 @@
 #if CONFIG_INTERNAL_STATS
     vp9_clear_system_state();
 
-    // printf("\n8x8-4x4:%d-%d\n", cpi->t8x8_count, cpi->t4x4_count);
     if (cpi->oxcf.pass != 1) {
+      char headings[512] = {0};
+      char results[512] = {0};
       FILE *f = fopen("opsnr.stt", "a");
       double time_encoded = (cpi->last_end_time_stamp_seen
                              - cpi->first_time_stamp_ever) / 10000000.000;
@@ -1897,39 +1915,50 @@
             vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
                             (double)cpi->totalp_sq_error);
         const double total_ssim = 100 * pow(cpi->summed_quality /
-                                                cpi->summed_weights, 8.0);
+                                            cpi->summed_weights, 8.0);
+        const double totalp_ssim = 100 * pow(cpi->summedp_quality /
+                                             cpi->summedp_weights, 8.0);
+
+        snprintf(headings, sizeof(headings),
+                 "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
+                 "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
+                 "WstPsnr\tWstSsim\tWstFast\tWstHVS");
+        snprintf(results, sizeof(results),
+                 "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+                 "%7.3f\t%7.3f\t%7.3f\t%7.3f"
+                 "%7.3f\t%7.3f\t%7.3f\t%7.3f",
+                 dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr,
+                 cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr,
+                 total_ssim, totalp_ssim,
+                 cpi->fastssim.stat[ALL] / cpi->count,
+                 cpi->psnrhvs.stat[ALL] / cpi->count,
+                 cpi->psnr.worst, cpi->worst_ssim, cpi->fastssim.worst,
+                 cpi->psnrhvs.worst);
+
         if (cpi->b_calculate_blockiness) {
-          fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
-                "VPXSSIM\tVPSSIMP\tFASTSSIM\tPSNRHVS\tTime(ms)\n");
-          fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
-                "%7.3f\t%7.3f\t%8.0f\n",
-                  dr, cpi->total / cpi->count, total_psnr,
-                  cpi->totalp / cpi->count, totalp_psnr, total_ssim,
-                cpi->total_fastssim_all / cpi->count,
-                cpi->total_psnrhvs_all / cpi->count,
-                total_encode_time);
-        } else {
-          fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
-                "VPXSSIM\tVPSSIMP\tBlockiness\tFASTSSIM\tPSNRHVS\tTime(ms)\n");
-          fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
-                  "%7.3f\t%7.3f\t%7.3f\t%8.0f\n",
-                  dr, cpi->total / cpi->count, total_psnr,
-                  cpi->totalp / cpi->count, totalp_psnr, total_ssim,
-                  cpi->total_blockiness / cpi->count,
-                  cpi->total_fastssim_all / cpi->count,
-                  cpi->total_psnrhvs_all / cpi->count,
-                  total_encode_time);
+          SNPRINT(headings, "\t  Block\tWstBlck");
+          SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count);
+          SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
         }
-      }
 
+        if (cpi->b_calculate_consistency) {
+          double consistency =
+              vpx_sse_to_psnr((double)cpi->totalp_samples, peak,
+                              (double)cpi->total_inconsistency);
 
-      if (cpi->b_calculate_ssimg) {
-        fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t  Time(ms)\n");
-        fprintf(f, "%7.2f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr,
-                cpi->total_ssimg_y / cpi->count,
-                cpi->total_ssimg_u / cpi->count,
-                cpi->total_ssimg_v / cpi->count,
-                cpi->total_ssimg_all / cpi->count, total_encode_time);
+          SNPRINT(headings, "\tConsist\tWstCons");
+          SNPRINT2(results, "\t%7.3f", consistency);
+          SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
+        }
+
+        if (cpi->b_calculate_ssimg) {
+          SNPRINT(headings, "\t  SSIMG\tWtSSIMG");
+          SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count);
+          SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);
+        }
+
+        fprintf(f, "%s\t    Time\n", headings);
+        fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
       }
 
       fclose(f);
@@ -3845,6 +3874,14 @@
                                  int width, int height);
 #endif
 
+void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) {
+  s->stat[Y] += y;
+  s->stat[U] += u;
+  s->stat[V] += v;
+  s->stat[ALL] += all;
+  s->worst = MIN(s->worst, all);
+}
+
 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
                             size_t *size, uint8_t *dest,
                             int64_t *time_stamp, int64_t *time_end, int flush) {
@@ -4097,6 +4134,7 @@
 #if CONFIG_INTERNAL_STATS
 
   if (oxcf->pass != 1) {
+    double samples;
     cpi->bytes += (int)(*size);
 
     if (cm->show_frame) {
@@ -4114,12 +4152,11 @@
         calc_psnr(orig, recon, &psnr);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-        cpi->total += psnr.psnr[0];
-        cpi->total_y += psnr.psnr[1];
-        cpi->total_u += psnr.psnr[2];
-        cpi->total_v += psnr.psnr[3];
+        adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],
+                          psnr.psnr[0], &cpi->psnr);
         cpi->total_sq_error += psnr.sse[0];
         cpi->total_samples += psnr.samples[0];
+        samples = psnr.samples[0];
 
         {
           PSNR_STATS psnr2;
@@ -4149,12 +4186,10 @@
           calc_psnr(orig, pp, &psnr2);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-          cpi->totalp += psnr2.psnr[0];
-          cpi->totalp_y += psnr2.psnr[1];
-          cpi->totalp_u += psnr2.psnr[2];
-          cpi->totalp_v += psnr2.psnr[3];
           cpi->totalp_sq_error += psnr2.sse[0];
           cpi->totalp_samples += psnr2.samples[0];
+          adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3],
+                            psnr2.psnr[0], &cpi->psnrp);
 
 #if CONFIG_VP9_HIGHBITDEPTH
           if (cm->use_highbitdepth) {
@@ -4167,6 +4202,7 @@
           frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
+          cpi->worst_ssim= MIN(cpi->worst_ssim, frame_ssim2);
           cpi->summed_quality += frame_ssim2 * weight;
           cpi->summed_weights += weight;
 
@@ -4194,12 +4230,33 @@
 #endif
         }
       }
-      if (cpi->b_calculate_blockiness)
-        cpi->total_blockiness +=
-            vp9_get_blockiness(cpi->Source->y_buffer, cpi->Source->y_stride,
-                               cm->frame_to_show->y_buffer,
-                               cm->frame_to_show->y_stride,
-                               cpi->Source->y_width, cpi->Source->y_height);
+      if (cpi->b_calculate_blockiness) {
+        double frame_blockiness = vp9_get_blockiness(
+            cpi->Source->y_buffer, cpi->Source->y_stride,
+            cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+            cpi->Source->y_width, cpi->Source->y_height);
+        cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
+        cpi->total_blockiness += frame_blockiness;
+      }
+
+      if (cpi->b_calculate_consistency) {
+        double this_inconsistency = vp9_get_ssim_metrics(
+            cpi->Source->y_buffer, cpi->Source->y_stride,
+            cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+            cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
+            &cpi->metrics, 1);
+
+        const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+
+
+        double consistency = vpx_sse_to_psnr(samples, peak,
+                                             (double)cpi->total_inconsistency);
+
+        if (consistency > 0.0)
+          cpi->worst_consistency = MIN(cpi->worst_consistency,
+                                       consistency);
+        cpi->total_inconsistency += this_inconsistency;
+      }
 
       if (cpi->b_calculate_ssimg) {
         double y, u, v, frame_all;
@@ -4214,30 +4271,19 @@
 #else
         frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-        cpi->total_ssimg_y += y;
-        cpi->total_ssimg_u += u;
-        cpi->total_ssimg_v += v;
-        cpi->total_ssimg_all += frame_all;
+        adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
       }
       {
         double y, u, v, frame_all;
         frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
                                       &v);
-
-        cpi->total_fastssim_y += y;
-        cpi->total_fastssim_u += u;
-        cpi->total_fastssim_v += v;
-        cpi->total_fastssim_all += frame_all;
+        adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
         /* TODO(JBB): add 10/12 bit support */
       }
       {
         double y, u, v, frame_all;
         frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
-
-        cpi->total_psnrhvs_y += y;
-        cpi->total_psnrhvs_u += u;
-        cpi->total_psnrhvs_v += v;
-        cpi->total_psnrhvs_all += frame_all;
+        adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
       }
     }
   }
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 267c796..41f1c13 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -34,6 +34,9 @@
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rd.h"
+#if CONFIG_INTERNAL_STATS
+#include "vp9/encoder/vp9_ssim.h"
+#endif
 #include "vp9/encoder/vp9_speed_features.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 #include "vp9/encoder/vp9_tokenize.h"
@@ -272,6 +275,18 @@
   unsigned char *map;
 } ActiveMap;
 
+typedef enum {
+  Y,
+  U,
+  V,
+  ALL
+} STAT_TYPE;
+
+typedef struct IMAGE_STAT {
+  double stat[ALL+1];
+  double worst;
+} ImageStat;
+
 typedef struct VP9_COMP {
   QUANTS quants;
   ThreadData td;
@@ -388,21 +403,16 @@
   unsigned int mode_chosen_counts[MAX_MODES];
 
   int    count;
-  double total_y;
-  double total_u;
-  double total_v;
-  double total;
   uint64_t total_sq_error;
   uint64_t total_samples;
+  ImageStat psnr;
 
-  double totalp_y;
-  double totalp_u;
-  double totalp_v;
-  double totalp;
   uint64_t totalp_sq_error;
   uint64_t totalp_samples;
+  ImageStat psnrp;
 
   double total_blockiness;
+  double worst_blockiness;
 
   int    bytes;
   double summed_quality;
@@ -410,25 +420,21 @@
   double summedp_quality;
   double summedp_weights;
   unsigned int tot_recode_hits;
+  double worst_ssim;
 
-
-  double total_ssimg_y;
-  double total_ssimg_u;
-  double total_ssimg_v;
-  double total_ssimg_all;
-
-  double total_fastssim_y;
-  double total_fastssim_u;
-  double total_fastssim_v;
-  double total_fastssim_all;
-
-  double total_psnrhvs_y;
-  double total_psnrhvs_u;
-  double total_psnrhvs_v;
-  double total_psnrhvs_all;
+  ImageStat ssimg;
+  ImageStat fastssim;
+  ImageStat psnrhvs;
 
   int b_calculate_ssimg;
   int b_calculate_blockiness;
+
+  int b_calculate_consistency;
+
+  double total_inconsistency;
+  double worst_consistency;
+  Ssimv *ssim_vars;
+  Metrics metrics;
 #endif
   int b_calculate_psnr;
 
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index bf9e500..c8f1313 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -358,7 +358,7 @@
   MV tmp_mv = {0, 0};
   MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3};
   int num00, tmp_err, n;
-  const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
   const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
 
@@ -567,8 +567,8 @@
     vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL);
   }
 
-  xd->mi = cm->mi;
-  xd->mi[0].src_mi = &xd->mi[0];
+  xd->mi = cm->mi_grid_visible;
+  xd->mi[0] = cm->mi;
 
   vp9_frame_init_quantizer(cpi);
 
@@ -621,8 +621,8 @@
       xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
       xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
       xd->left_available = (mb_col != 0);
-      xd->mi[0].src_mi->mbmi.sb_type = bsize;
-      xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME;
+      xd->mi[0]->mbmi.sb_type = bsize;
+      xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
       set_mi_row_col(xd, &tile,
                      mb_row << 1, num_8x8_blocks_high_lookup[bsize],
                      mb_col << 1, num_8x8_blocks_wide_lookup[bsize],
@@ -630,8 +630,8 @@
 
       // Do intra 16x16 prediction.
       x->skip_encode = 0;
-      xd->mi[0].src_mi->mbmi.mode = DC_PRED;
-      xd->mi[0].src_mi->mbmi.tx_size = use_dc_pred ?
+      xd->mi[0]->mbmi.mode = DC_PRED;
+      xd->mi[0]->mbmi.tx_size = use_dc_pred ?
          (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
       vp9_encode_intra_block_plane(x, bsize, 0);
       this_error = vp9_get_mb_ss(x->plane[0].src_diff);
@@ -843,11 +843,11 @@
           mv.row *= 8;
           mv.col *= 8;
           this_error = motion_error;
-          xd->mi[0].src_mi->mbmi.mode = NEWMV;
-          xd->mi[0].src_mi->mbmi.mv[0].as_mv = mv;
-          xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
-          xd->mi[0].src_mi->mbmi.ref_frame[0] = LAST_FRAME;
-          xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE;
+          xd->mi[0]->mbmi.mode = NEWMV;
+          xd->mi[0]->mbmi.mv[0].as_mv = mv;
+          xd->mi[0]->mbmi.tx_size = TX_4X4;
+          xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
+          xd->mi[0]->mbmi.ref_frame[1] = NONE;
           vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
           vp9_encode_sby_pass1(x, bsize);
           sum_mvr += mv.row;
@@ -1852,6 +1852,8 @@
       active_max_gf_interval = 12 + MIN(4, (int_lbq / 6));
       if (active_max_gf_interval > rc->max_gf_interval)
         active_max_gf_interval = rc->max_gf_interval;
+      if (active_max_gf_interval < active_min_gf_interval)
+        active_max_gf_interval = active_min_gf_interval;
     }
   }
 
@@ -2047,29 +2049,61 @@
   }
 }
 
-// TODO(PGW) Re-examine the use of II ration in this code in the light of#
-// changes elsewhere
+// Threshold for use of the lagging second reference frame. High second ref
+// usage may point to a transient event like a flash or occlusion rather than
+// a real scene cut.
+#define SECOND_REF_USEAGE_THRESH 0.1
+// Minimum % intra coding observed in first pass (1.0 = 100%)
+#define MIN_INTRA_LEVEL 0.25
+// Minimum ratio between the % of intra coding and inter coding in the first
+// pass after discounting neutral blocks (discounting neutral blocks in this
+// way helps catch scene cuts in clips with very flat areas or letter box
+// format clips with image padding.
+#define INTRA_VS_INTER_THRESH 2.0
+// Hard threshold where the first pass chooses intra for almost all blocks.
+// In such a case even if the frame is not a scene cut coding a key frame
+// may be a good option.
+#define VERY_LOW_INTER_THRESH 0.05
+// Maximum threshold for the relative ratio of intra error score vs best
+// inter error score.
+#define KF_II_ERR_THRESHOLD 2.5
+// In real scene cuts there is almost always a sharp change in the intra
+// or inter error score.
+#define ERR_CHANGE_THRESHOLD 0.4
+// For real scene cuts we expect an improvment in the intra inter error
+// ratio in the next frame.
+#define II_IMPROVEMENT_THRESHOLD 3.5
 #define KF_II_MAX 128.0
+
 static int test_candidate_kf(TWO_PASS *twopass,
                              const FIRSTPASS_STATS *last_frame,
                              const FIRSTPASS_STATS *this_frame,
                              const FIRSTPASS_STATS *next_frame) {
   int is_viable_kf = 0;
+  double pcnt_intra = 1.0 - this_frame->pcnt_inter;
+  double modified_pcnt_inter =
+    this_frame->pcnt_inter - this_frame->pcnt_neutral;
 
   // Does the frame satisfy the primary criteria of a key frame?
+  // See above for an explanation of the test criteria.
   // If so, then examine how well it predicts subsequent frames.
-  if ((this_frame->pcnt_second_ref < 0.10) &&
-      (next_frame->pcnt_second_ref < 0.10) &&
-      ((this_frame->pcnt_inter < 0.05) ||
-       (((this_frame->pcnt_inter - this_frame->pcnt_neutral) < 0.35) &&
+  if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+      (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+      ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
+       ((pcnt_intra > MIN_INTRA_LEVEL) &&
+        (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
         ((this_frame->intra_error /
-          DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) &&
+          DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
+          KF_II_ERR_THRESHOLD) &&
         ((fabs(last_frame->coded_error - this_frame->coded_error) /
-              DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > 0.40) ||
+          DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
+          ERR_CHANGE_THRESHOLD) ||
          (fabs(last_frame->intra_error - this_frame->intra_error) /
-              DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > 0.40) ||
+          DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
+          ERR_CHANGE_THRESHOLD) ||
          ((next_frame->intra_error /
-           DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) {
+          DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
+          II_IMPROVEMENT_THRESHOLD))))) {
     int i;
     const FIRSTPASS_STATS *start_pos = twopass->stats_in;
     FIRSTPASS_STATS local_next_frame = *next_frame;
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index b3a8df9..031f77f 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -63,8 +63,8 @@
         &distortion, &sse, NULL, 0, 0);
   }
 
-  xd->mi[0].src_mi->mbmi.mode = NEWMV;
-  xd->mi[0].src_mi->mbmi.mv[0].as_mv = *dst_mv;
+  xd->mi[0]->mbmi.mode = NEWMV;
+  xd->mi[0]->mbmi.mv[0].as_mv = *dst_mv;
 
   vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
 
@@ -141,7 +141,7 @@
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
     unsigned int err;
 
-    xd->mi[0].src_mi->mbmi.mode = mode;
+    xd->mi[0]->mbmi.mode = mode;
     vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode,
                             x->plane[0].src.buf, x->plane[0].src.stride,
                             xd->plane[0].dst.buf, xd->plane[0].dst.stride,
@@ -247,7 +247,7 @@
   xd->plane[0].dst.stride  = buf->y_stride;
   xd->plane[0].pre[0].stride  = buf->y_stride;
   xd->plane[1].dst.stride = buf->uv_stride;
-  xd->mi[0].src_mi = &mi_local;
+  xd->mi[0] = &mi_local;
   mi_local.mbmi.sb_type = BLOCK_16X16;
   mi_local.mbmi.ref_frame[0] = LAST_FRAME;
   mi_local.mbmi.ref_frame[1] = NONE;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 1f5f08a..ec089f1 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1802,7 +1802,7 @@
   const int src_stride = x->plane[0].src.stride;
   const int ref_stride = xd->plane[0].pre[0].stride;
   uint8_t const *ref_buf, *src_buf;
-  MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
+  MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv;
   unsigned int best_sad, tmp_sad, this_sad[4];
   MV this_mv;
   const int norm_factor = 3 + (bw >> 5);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 416f679..5a16d9f 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -60,7 +60,7 @@
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
       const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row *
-                                                   xd->mi_stride].src_mi;
+                                                   xd->mi_stride];
       const MB_MODE_INFO *const candidate = &candidate_mi->mbmi;
       // Keep counts for entropy encoding.
       context_counter += mode_2_counter[candidate->mode];
@@ -81,7 +81,7 @@
     const POSITION *const mv_ref = &mv_ref_search[i];
     if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
       const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row *
-                                                    xd->mi_stride].src_mi->mbmi;
+                                                    xd->mi_stride]->mbmi;
       different_ref_found = 1;
 
       if (candidate->ref_frame[0] == ref_frame)
@@ -97,7 +97,7 @@
       const POSITION *mv_ref = &mv_ref_search[i];
       if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
         const MB_MODE_INFO *const candidate = &xd->mi[mv_ref->col + mv_ref->row
-                                              * xd->mi_stride].src_mi->mbmi;
+                                              * xd->mi_stride]->mbmi;
 
         // If the candidate is INTRA we don't want to consider its mv.
         IF_DIFF_REF_FRAME_ADD_MV(candidate, ref_frame, ref_sign_bias,
@@ -122,7 +122,7 @@
                                   int_mv *tmp_mv, int *rate_mv,
                                   int64_t best_rd_sofar) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   const int step_param = cpi->sf.mv.fullpel_search_step_param;
   const int sadpb = x->sadperbit16;
@@ -296,7 +296,7 @@
 
     if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id))
+          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
         tx_size = TX_8X8;
       else if (tx_size > TX_16X16)
         tx_size = TX_16X16;
@@ -307,7 +307,7 @@
   }
 
   assert(tx_size >= TX_8X8);
-  xd->mi[0].src_mi->mbmi.tx_size = tx_size;
+  xd->mi[0]->mbmi.tx_size = tx_size;
 
   // Evaluate if the partition block is a skippable block in Y plane.
   {
@@ -376,7 +376,7 @@
     for (i = 1; i <= 2; i++) {
       struct macroblock_plane *const p = &x->plane[i];
       struct macroblockd_plane *const pd = &xd->plane[i];
-      const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0].src_mi->mbmi, pd);
+      const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0]->mbmi, pd);
       const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
       const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
       const int uv_bw = b_width_log2_lookup[uv_bsize];
@@ -472,21 +472,21 @@
 
   if (cpi->common.tx_mode == TX_MODE_SELECT) {
     if (sse > (var << 2))
-      xd->mi[0].src_mi->mbmi.tx_size =
+      xd->mi[0]->mbmi.tx_size =
           MIN(max_txsize_lookup[bsize],
               tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
     else
-      xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
+      xd->mi[0]->mbmi.tx_size = TX_8X8;
 
     if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
       if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
-          cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id))
-        xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
-      else if (xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
-        xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
+          cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+        xd->mi[0]->mbmi.tx_size = TX_8X8;
+      else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
+        xd->mi[0]->mbmi.tx_size = TX_16X16;
     }
   } else {
-    xd->mi[0].src_mi->mbmi.tx_size =
+    xd->mi[0]->mbmi.tx_size =
         MIN(max_txsize_lookup[bsize],
             tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
   }
@@ -494,7 +494,7 @@
   // Evaluate if the partition block is a skippable block in Y plane.
   {
     const BLOCK_SIZE unit_size =
-        txsize_to_bsize[xd->mi[0].src_mi->mbmi.tx_size];
+        txsize_to_bsize[xd->mi[0]->mbmi.tx_size];
     const unsigned int num_blk_log2 =
         (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
         (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
@@ -776,7 +776,7 @@
                                  struct buf_2d yv12_mb[][MAX_MB_PLANE],
                                  int *rate, int64_t *dist) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 
   const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
   unsigned int var = var_y, sse = sse_y;
@@ -935,17 +935,17 @@
 void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
                          BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   RD_COST this_rdc, best_rdc;
   PREDICTION_MODE this_mode;
   struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
   const TX_SIZE intra_tx_size =
       MIN(max_txsize_lookup[bsize],
           tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
-  MODE_INFO *const mic = xd->mi[0].src_mi;
+  MODE_INFO *const mic = xd->mi[0];
   int *bmode_costs;
-  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
-  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
+  const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
+  const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
   const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
   const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
   bmode_costs = cpi->y_mode_costs[A][L];
@@ -1027,7 +1027,7 @@
   VP9_COMMON *const cm = &cpi->common;
   TileInfo *const tile_info = &tile_data->tile_info;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   struct macroblockd_plane *const pd = &xd->plane[0];
   PREDICTION_MODE best_mode = ZEROMV;
   MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
@@ -1105,9 +1105,9 @@
   x->skip = 0;
 
   if (xd->up_available)
-    filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
+    filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
   else if (xd->left_available)
-    filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter;
+    filter_ref = xd->mi[-1]->mbmi.interp_filter;
   else
     filter_ref = cm->interp_filter;
 
@@ -1145,11 +1145,11 @@
                            sf, sf);
 
       if (cm->use_prev_frame_mvs)
-        vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
+        vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
                          candidates, mi_row, mi_col, NULL, NULL);
       else
         const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
-                                             xd->mi[0].src_mi,
+                                             xd->mi[0],
                                              ref_frame, candidates,
                                              mi_row, mi_col);
 
@@ -1337,7 +1337,7 @@
 
       // For large partition blocks, extra testing is done.
       if (bsize > BLOCK_32X32 &&
-        !cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id) &&
+        !cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id) &&
         cm->base_qindex) {
         model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
                                 &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
@@ -1450,7 +1450,7 @@
   mbmi->tx_size       = best_tx_size;
   mbmi->ref_frame[0]  = best_ref_frame;
   mbmi->mv[0].as_int  = frame_mv[best_mode][best_ref_frame].as_int;
-  xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+  xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
   x->skip_txfm[0] = best_mode_skip_txfm;
 
   // Perform intra prediction search, if the best SAD is above a certain
@@ -1580,7 +1580,7 @@
   TileInfo *const tile_info = &tile_data->tile_info;
   SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
   MV_REFERENCE_FRAME best_ref_frame = NONE;
@@ -1609,7 +1609,7 @@
                              &cm->frame_refs[ref_frame - 1].sf;
       vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
                            sf, sf);
-      vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
+      vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
                        candidates, mi_row, mi_col, NULL, NULL);
 
       vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -1688,7 +1688,7 @@
 
         for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
           int b_rate = 0;
-          xd->mi[0].bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
+          xd->mi[0]->bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int;
 
           if (this_mode == NEWMV) {
             const int step_param = cpi->sf.mv.fullpel_search_step_param;
@@ -1705,8 +1705,8 @@
               mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3;
               mvp_full.col = b_mv[NEARESTMV].as_mv.col >> 3;
             } else {
-              mvp_full.row = xd->mi[0].bmi[0].as_mv[0].as_mv.row >> 3;
-              mvp_full.col = xd->mi[0].bmi[0].as_mv[0].as_mv.col >> 3;
+              mvp_full.row = xd->mi[0]->bmi[0].as_mv[0].as_mv.row >> 3;
+              mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3;
             }
 
             vp9_set_mv_search_range(x, &mbmi->ref_mvs[0]->as_mv);
@@ -1748,7 +1748,7 @@
                                          &dummy_dist,
                                          &x->pred_sse[ref_frame], NULL, 0, 0);
 
-            xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv;
+            xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
           } else {
             b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
                                           [INTER_OFFSET(this_mode)];
@@ -1758,7 +1758,7 @@
           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
             vp9_highbd_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
                                     pd->dst.buf, pd->dst.stride,
-                                    &xd->mi[0].bmi[i].as_mv[0].as_mv,
+                                    &xd->mi[0]->bmi[i].as_mv[0].as_mv,
                                     &xd->block_refs[0]->sf,
                                     4 * num_4x4_blocks_wide,
                                     4 * num_4x4_blocks_high, 0,
@@ -1770,7 +1770,7 @@
 #endif
             vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
                                      pd->dst.buf, pd->dst.stride,
-                                     &xd->mi[0].bmi[i].as_mv[0].as_mv,
+                                     &xd->mi[0]->bmi[i].as_mv[0].as_mv,
                                      &xd->block_refs[0]->sf,
                                      4 * num_4x4_blocks_wide,
                                      4 * num_4x4_blocks_high, 0,
@@ -1792,7 +1792,7 @@
           if (this_rdc.rdcost < b_best_rd) {
             b_best_rd = this_rdc.rdcost;
             bsi[ref_frame][i].as_mode = this_mode;
-            bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0].bmi[i].as_mv[0].as_mv;
+            bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0]->bmi[i].as_mv[0].as_mv;
           }
         }  // mode search
 
@@ -1802,11 +1802,11 @@
         pd->dst = orig_dst;
         this_rd += b_best_rd;
 
-        xd->mi[0].bmi[i] = bsi[ref_frame][i];
+        xd->mi[0]->bmi[i] = bsi[ref_frame][i];
         if (num_4x4_blocks_wide > 1)
-          xd->mi[0].bmi[i + 1] = xd->mi[0].bmi[i];
+          xd->mi[0]->bmi[i + 1] = xd->mi[0]->bmi[i];
         if (num_4x4_blocks_high > 1)
-          xd->mi[0].bmi[i + 2] = xd->mi[0].bmi[i];
+          xd->mi[0]->bmi[i + 2] = xd->mi[0]->bmi[i];
       }
     }  // loop through sub8x8 blocks
 
@@ -1821,15 +1821,15 @@
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
       const int block = idy * 2 + idx;
-      xd->mi[0].bmi[block] = bsi[best_ref_frame][block];
+      xd->mi[0]->bmi[block] = bsi[best_ref_frame][block];
       if (num_4x4_blocks_wide > 1)
-        xd->mi[0].bmi[block + 1] = bsi[best_ref_frame][block];
+        xd->mi[0]->bmi[block + 1] = bsi[best_ref_frame][block];
       if (num_4x4_blocks_high > 1)
-        xd->mi[0].bmi[block + 2] = bsi[best_ref_frame][block];
+        xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block];
     }
   }
-  mbmi->mode = xd->mi[0].bmi[3].as_mode;
-  ctx->mic = *(xd->mi[0].src_mi);
+  mbmi->mode = xd->mi[0]->bmi[3].as_mode;
+  ctx->mic = *(xd->mi[0]);
   ctx->skip_txfm[0] = 0;
   ctx->skip = 0;
   // Dummy assignment for speed -5. No effect in speed -6.
diff --git a/vp9/encoder/vp9_psnrhvs.c b/vp9/encoder/vp9_psnrhvs.c
index 6c034aa..e10e028 100644
--- a/vp9/encoder/vp9_psnrhvs.c
+++ b/vp9/encoder/vp9_psnrhvs.c
@@ -23,11 +23,7 @@
 #endif
 #include <string.h>
 
-typedef int16_t od_coeff;
-typedef int16_t tran_low_t;
-extern void vp9_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride);
-
-void od_bin_fdct8x8(od_coeff *y, int ystride, const od_coeff *x, int xstride) {
+void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) {
   (void) xstride;
   vp9_fdct8x8_c(x, y, ystride);
 }
@@ -95,8 +91,8 @@
                            double _par, int _w, int _h, int _step,
                            float _csf[8][8]) {
   float ret;
-  od_coeff dct_s[8 * 8];
-  od_coeff dct_d[8 * 8];
+  int16_t dct_s[8 * 8], dct_d[8 * 8];
+  tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];
   float mask[8][8];
   int pixels;
   int x;
@@ -177,14 +173,14 @@
         s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
       if (d_gvar > 0)
         d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
-      od_bin_fdct8x8(dct_s, 8, dct_s, 8);
-      od_bin_fdct8x8(dct_d, 8, dct_d, 8);
+      od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
+      od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
       for (i = 0; i < 8; i++)
         for (j = (i == 0); j < 8; j++)
-          s_mask += dct_s[i * 8 + j] * dct_s[i * 8 + j] * mask[i][j];
+          s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
       for (i = 0; i < 8; i++)
         for (j = (i == 0); j < 8; j++)
-          d_mask += dct_d[i * 8 + j] * dct_d[i * 8 + j] * mask[i][j];
+          d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j];
       s_mask = sqrt(s_mask * s_gvar) / 32.f;
       d_mask = sqrt(d_mask * d_gvar) / 32.f;
       if (d_mask > s_mask)
@@ -192,7 +188,7 @@
       for (i = 0; i < 8; i++) {
         for (j = 0; j < 8; j++) {
           float err;
-          err = fabs(dct_s[i * 8 + j] - dct_d[i * 8 + j]);
+          err = fabs(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]);
           if (i != 0 || j != 0)
             err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
           ret += (err * _csf[i][j]) * (err * _csf[i][j]);
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 29b54b0..9c08c5c 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -646,7 +646,7 @@
   const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   QUANTS *const quants = &cpi->quants;
-  const int segment_id = xd->mi[0].src_mi->mbmi.segment_id;
+  const int segment_id = xd->mi[0]->mbmi.segment_id;
   const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
   const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
   int i;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 7783f7b..8713caa 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1612,8 +1612,11 @@
 void vp9_rc_set_gf_max_interval(const VP9_COMP *const cpi,
                                 RATE_CONTROL *const rc) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  // Set Maximum gf/arf interval
-  rc->max_gf_interval = 16;
+  // Set Maximum gf/arf interval.
+  rc->max_gf_interval =
+    MIN(16, (int)(cpi->framerate / 2.0));
+  // Round up to next even number if odd.
+  rc->max_gf_interval += (rc->max_gf_interval & 0x01);
 
   // Extended interval for genuinely static scenes
   rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index bbf70ea..a2a4808 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -450,7 +450,7 @@
                  uint8_t *ref_y_buffer, int ref_y_stride,
                  int ref_frame, BLOCK_SIZE block_size) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int i;
   int zero_seen = 0;
   int best_index = 0;
@@ -550,7 +550,7 @@
 }
 
 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
-  const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_pred_context_switchable_interp(xd);
   return SWITCHABLE_INTERP_RATE_FACTOR *
              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 166535b..56ecd4e 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -164,7 +164,7 @@
   int i;
   int64_t rate_sum = 0;
   int64_t dist_sum = 0;
-  const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
+  const int ref = xd->mi[0]->mbmi.ref_frame[0];
   unsigned int sse;
   unsigned int var = 0;
   unsigned int sum_sse = 0;
@@ -347,7 +347,7 @@
                        const int16_t *scan, const int16_t *nb,
                        int use_fast_coef_costing) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const struct macroblock_plane *p = &x->plane[plane];
   const struct macroblockd_plane *pd = &xd->plane[plane];
   const PLANE_TYPE type = pd->plane_type;
@@ -452,7 +452,7 @@
 #endif  // CONFIG_VP9_HIGHBITDEPTH
   args->sse  = this_sse >> shift;
 
-  if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
+  if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
     // TODO(jingning): tune the model to better capture the distortion.
     int64_t p = (pd->dequant[1] * pd->dequant[1] *
                     (1 << ss_txfrm_size)) >> (shift + 2);
@@ -482,7 +482,7 @@
   struct rdcost_block_args *args = arg;
   MACROBLOCK *const x = args->x;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   int64_t rd1, rd2, rd;
 
   if (args->skip)
@@ -588,7 +588,7 @@
   args.use_fast_coef_costing = use_fast_coef_casting;
 
   if (plane == 0)
-    xd->mi[0].src_mi->mbmi.tx_size = tx_size;
+    xd->mi[0]->mbmi.tx_size = tx_size;
 
   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 
@@ -618,7 +618,7 @@
   VP9_COMMON *const cm = &cpi->common;
   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 
   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 
@@ -638,7 +638,7 @@
   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
   int r[TX_SIZES][2], s[TX_SIZES];
   int64_t d[TX_SIZES], sse[TX_SIZES];
@@ -725,7 +725,7 @@
   int64_t sse;
   int64_t *ret_sse = psse ? psse : &sse;
 
-  assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
+  assert(bs == xd->mi[0]->mbmi.sb_type);
 
   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
@@ -792,7 +792,7 @@
 
   vpx_memcpy(ta, a, sizeof(ta));
   vpx_memcpy(tl, l, sizeof(tl));
-  xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
+  xd->mi[0]->mbmi.tx_size = TX_4X4;
 
 #if CONFIG_VP9_HIGHBITDEPTH
   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -824,7 +824,7 @@
                                                                   block,
                                                                   p->src_diff);
           tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
-          xd->mi[0].src_mi->bmi[block].as_mode = mode;
+          xd->mi[0]->bmi[block].as_mode = mode;
           vp9_predict_intra_block(xd, block, 1,
                                   TX_4X4, mode,
                                   x->skip_encode ? src : dst,
@@ -924,7 +924,7 @@
         int16_t *const src_diff =
             vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
-        xd->mi[0].src_mi->bmi[block].as_mode = mode;
+        xd->mi[0]->bmi[block].as_mode = mode;
         vp9_predict_intra_block(xd, block, 1,
                                 TX_4X4, mode,
                                 x->skip_encode ? src : dst,
@@ -997,10 +997,10 @@
                                             int64_t best_rd) {
   int i, j;
   const MACROBLOCKD *const xd = &mb->e_mbd;
-  MODE_INFO *const mic = xd->mi[0].src_mi;
+  MODE_INFO *const mic = xd->mi[0];
   const MODE_INFO *above_mi = xd->above_mi;
   const MODE_INFO *left_mi = xd->left_mi;
-  const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
+  const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   int idx, idy;
@@ -1068,7 +1068,7 @@
   PREDICTION_MODE mode;
   PREDICTION_MODE mode_selected = DC_PRED;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mic = xd->mi[0].src_mi;
+  MODE_INFO *const mic = xd->mi[0];
   int this_rate, this_rate_tokenonly, s;
   int64_t this_distortion, this_rd;
   TX_SIZE best_tx = TX_4X4;
@@ -1143,7 +1143,7 @@
                             int64_t *sse, BLOCK_SIZE bsize,
                             int64_t ref_best_rd) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
   int plane;
   int pnrate = 0, pnskip = 1;
@@ -1206,7 +1206,7 @@
     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
       continue;
 
-    xd->mi[0].src_mi->mbmi.uv_mode = mode;
+    xd->mi[0]->mbmi.uv_mode = mode;
 
     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly,
                           &this_distortion, &s, &this_sse, bsize, best_rd))
@@ -1227,7 +1227,7 @@
     }
   }
 
-  xd->mi[0].src_mi->mbmi.uv_mode = mode_selected;
+  xd->mi[0]->mbmi.uv_mode = mode_selected;
   return best_rd;
 }
 
@@ -1238,7 +1238,7 @@
   const VP9_COMMON *cm = &cpi->common;
   int64_t unused;
 
-  x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
+  x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
   vpx_memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
                    skippable, &unused, bsize, INT64_MAX);
@@ -1264,7 +1264,7 @@
                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
   }
-  *mode_uv = x->e_mbd.mi[0].src_mi->mbmi.uv_mode;
+  *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
 }
 
 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
@@ -1279,7 +1279,7 @@
                                 int_mv seg_mvs[MAX_REF_FRAMES],
                                 int_mv *best_ref_mv[2], const int *mvjcost,
                                 int *mvcost[2]) {
-  MODE_INFO *const mic = xd->mi[0].src_mi;
+  MODE_INFO *const mic = xd->mi[0];
   const MB_MODE_INFO *const mbmi = &mic->mbmi;
   int thismvcost = 0;
   int idx, idy;
@@ -1341,7 +1341,7 @@
   MACROBLOCKD *xd = &x->e_mbd;
   struct macroblockd_plane *const pd = &xd->plane[0];
   struct macroblock_plane *const p = &x->plane[0];
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
@@ -1482,7 +1482,7 @@
 }
 
 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
-  MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
 
@@ -1498,7 +1498,7 @@
 
 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
                                   struct buf_2d orig_pre[2]) {
-  MB_MODE_INFO *mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
   x->plane[0].src = orig_src;
   x->e_mbd.plane[0].pre[0] = orig_pre[0];
   if (has_second_ref(mbmi))
@@ -1556,7 +1556,7 @@
   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int refs[2] = {mbmi->ref_frame[0],
                        mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]};
   int_mv ref_mv[2];
@@ -1745,7 +1745,7 @@
   int i;
   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   MACROBLOCKD *xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mi[0].src_mi;
+  MODE_INFO *mi = xd->mi[0];
   MB_MODE_INFO *mbmi = &mi->mbmi;
   int mode_idx;
   int k, br = 0, idx, idy;
@@ -2186,7 +2186,7 @@
   ctx->skip = x->skip;
   ctx->skippable = skippable;
   ctx->best_mode_index = mode_index;
-  ctx->mic = *xd->mi[0].src_mi;
+  ctx->mic = *xd->mi[0];
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
@@ -2207,7 +2207,7 @@
   const VP9_COMMON *cm = &cpi->common;
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
   MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *const mi = xd->mi[0].src_mi;
+  MODE_INFO *const mi = xd->mi[0];
   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
 
@@ -2240,7 +2240,7 @@
                                  int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
   const VP9_COMMON *cm = &cpi->common;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
   int bestsme = INT_MAX;
   int step_param;
@@ -2412,7 +2412,7 @@
                                  int64_t filter_cache[]) {
   VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
   const int this_mode = mbmi->mode;
   int_mv *frame_mv = mode_mv[this_mode];
@@ -2457,9 +2457,9 @@
   if (pred_filter_search) {
     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
     if (xd->up_available)
-      af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
+      af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
     if (xd->left_available)
-      lf = xd->mi[-1].src_mi->mbmi.interp_filter;
+      lf = xd->mi[-1]->mbmi.interp_filter;
 
     if ((this_mode != NEWMV) || (af == lf))
       best_filter = af;
@@ -2504,7 +2504,7 @@
         return INT64_MAX;
 
       frame_mv[refs[0]].as_int =
-          xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
+          xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
       single_newmv[refs[0]].as_int = tmp_mv.as_int;
 
       // Estimate the rate implications of a new mv but discount this
@@ -2782,8 +2782,8 @@
   TX_SIZE max_uv_tx_size;
   x->skip_encode = 0;
   ctx->skip = 0;
-  xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME;
-  xd->mi[0].src_mi->mbmi.ref_frame[1] = NONE;
+  xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
+  xd->mi[0]->mbmi.ref_frame[1] = NONE;
 
   if (bsize >= BLOCK_8X8) {
     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
@@ -2800,7 +2800,7 @@
       return;
     }
   }
-  max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
+  max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
                                        pd[1].subsampling_x,
                                        pd[1].subsampling_y);
   rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
@@ -2826,7 +2826,7 @@
       }
   }
 
-  ctx->mic = *xd->mi[0].src_mi;
+  ctx->mic = *xd->mi[0];
   rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
 }
 
@@ -2901,7 +2901,7 @@
   RD_OPT *const rd_opt = &cpi->rd;
   SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
   PREDICTION_MODE this_mode;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
@@ -3150,10 +3150,10 @@
       ref_mv.as_int = INVALID_MV;
 
       if ((mi_row - 1) >= tile_info->mi_row_start) {
-        ref_mv = xd->mi[-xd->mi_stride].src_mi->mbmi.mv[0];
-        rf = xd->mi[-xd->mi_stride].src_mi->mbmi.ref_frame[0];
+        ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0];
+        rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0];
         for (i = 0; i < mi_width; ++i) {
-          ref_mbmi = &xd->mi[-xd->mi_stride + i].src_mi->mbmi;
+          ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi;
           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
                           (ref_frame == ref_mbmi->ref_frame[0]);
           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
@@ -3162,11 +3162,11 @@
 
       if ((mi_col - 1) >= tile_info->mi_col_start) {
         if (ref_mv.as_int == INVALID_MV)
-          ref_mv = xd->mi[-1].src_mi->mbmi.mv[0];
+          ref_mv = xd->mi[-1]->mbmi.mv[0];
         if (rf == NONE)
-          rf = xd->mi[-1].src_mi->mbmi.ref_frame[0];
+          rf = xd->mi[-1]->mbmi.ref_frame[0];
         for (i = 0; i < mi_height; ++i) {
-          ref_mbmi = &xd->mi[i * xd->mi_stride - 1].src_mi->mbmi;
+          ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi;
           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
                           (ref_frame == ref_mbmi->ref_frame[0]);
           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
@@ -3580,7 +3580,7 @@
   if (!x->skip && !x->select_tx_size) {
     int has_high_freq_coeff = 0;
     int plane;
-    int max_plane = is_inter_block(&xd->mi[0].src_mi->mbmi)
+    int max_plane = is_inter_block(&xd->mi[0]->mbmi)
                         ? MAX_MB_PLANE : 1;
     for (plane = 0; plane < max_plane; ++plane) {
       x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
@@ -3610,7 +3610,7 @@
                                         int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   unsigned char segment_id = mbmi->segment_id;
   const int comp_pred = 0;
   int i;
@@ -3716,7 +3716,7 @@
   RD_OPT *const rd_opt = &cpi->rd;
   SPEED_FEATURES *const sf = &cpi->sf;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
   unsigned char segment_id = mbmi->segment_id;
@@ -4007,7 +4007,7 @@
               tmp_best_skippable = skippable;
               tmp_best_mbmode = *mbmi;
               for (i = 0; i < 4; i++) {
-                tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
+                tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
               }
               pred_exists = 1;
@@ -4051,7 +4051,7 @@
         skippable = tmp_best_skippable;
         *mbmi = tmp_best_mbmode;
         for (i = 0; i < 4; i++)
-          xd->mi[0].src_mi->bmi[i] = tmp_best_bmodes[i];
+          xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
       }
 
       rate2 += rate;
@@ -4160,7 +4160,7 @@
                    sizeof(uint8_t) * ctx->num_4x4_blk);
 
         for (i = 0; i < 4; i++)
-          best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
+          best_bmodes[i] = xd->mi[0]->bmi[i];
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
@@ -4278,14 +4278,14 @@
   x->skip |= best_skip2;
   if (!is_inter_block(&best_mbmode)) {
     for (i = 0; i < 4; i++)
-      xd->mi[0].src_mi->bmi[i].as_mode = best_bmodes[i].as_mode;
+      xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
   } else {
     for (i = 0; i < 4; ++i)
-      vpx_memcpy(&xd->mi[0].src_mi->bmi[i], &best_bmodes[i],
+      vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i],
                  sizeof(b_mode_info));
 
-    mbmi->mv[0].as_int = xd->mi[0].src_mi->bmi[3].as_mv[0].as_int;
-    mbmi->mv[1].as_int = xd->mi[0].src_mi->bmi[3].as_mv[1].as_int;
+    mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
+    mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
   }
 
   for (i = 0; i < REFERENCE_MODES; ++i) {
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index c9874f7..f66de51 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -107,7 +107,7 @@
 }
 
 static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd,
-                       const TileInfo *tile, MODE_INFO *mi,
+                       const TileInfo *tile, MODE_INFO **mi,
                        int *no_pred_segcounts,
                        int (*temporal_predictor_count)[2],
                        int *t_unpred_seg_counts,
@@ -118,7 +118,7 @@
     return;
 
   xd->mi = mi;
-  segment_id = xd->mi[0].src_mi->mbmi.segment_id;
+  segment_id = xd->mi[0]->mbmi.segment_id;
 
   set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
 
@@ -127,7 +127,7 @@
 
   // Temporal prediction not allowed on key frames
   if (cm->frame_type != KEY_FRAME) {
-    const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
+    const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
     // Test to see if the segment id matches the predicted value.
     const int pred_segment_id = vp9_get_segment_id(cm, cm->last_frame_seg_map,
                                                    bsize, mi_row, mi_col);
@@ -136,7 +136,7 @@
 
     // Store the prediction status for this mb and update counts
     // as appropriate
-    xd->mi[0].src_mi->mbmi.seg_id_predicted = pred_flag;
+    xd->mi[0]->mbmi.seg_id_predicted = pred_flag;
     temporal_predictor_count[pred_context][pred_flag]++;
 
     // Update the "unpredicted" segment count
@@ -146,7 +146,7 @@
 }
 
 static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd,
-                          const TileInfo *tile, MODE_INFO *mi,
+                          const TileInfo *tile, MODE_INFO **mi,
                           int *no_pred_segcounts,
                           int (*temporal_predictor_count)[2],
                           int *t_unpred_seg_counts,
@@ -159,8 +159,8 @@
   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
     return;
 
-  bw = num_8x8_blocks_wide_lookup[mi[0].src_mi->mbmi.sb_type];
-  bh = num_8x8_blocks_high_lookup[mi[0].src_mi->mbmi.sb_type];
+  bw = num_8x8_blocks_wide_lookup[mi[0]->mbmi.sb_type];
+  bh = num_8x8_blocks_high_lookup[mi[0]->mbmi.sb_type];
 
   if (bw == bs && bh == bs) {
     count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count,
@@ -220,13 +220,13 @@
   // predicts this one
   for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) {
     TileInfo tile;
-    MODE_INFO *mi_ptr;
+    MODE_INFO **mi_ptr;
     vp9_tile_init(&tile, cm, 0, tile_col);
 
-    mi_ptr = cm->mi + tile.mi_col_start;
+    mi_ptr = cm->mi_grid_visible + tile.mi_col_start;
     for (mi_row = 0; mi_row < cm->mi_rows;
          mi_row += 8, mi_ptr += 8 * cm->mi_stride) {
-      MODE_INFO *mi = mi_ptr;
+      MODE_INFO **mi = mi_ptr;
       for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
            mi_col += 8, mi += 8)
         count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts,
diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c
index 5dbfbf5..88db5dd 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <math.h>
 #include "./vp9_rtcd.h"
-
 #include "vp9/encoder/vp9_ssim.h"
 
 void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
@@ -201,6 +201,251 @@
   return ssim_all;
 }
 
+// traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity
+//
+// Re working out the math ->
+//
+// ssim(x,y) =  (2*mean(x)*mean(y) + c1)*(2*cov(x,y)+c2) /
+//   ((mean(x)^2+mean(y)^2+c1)*(var(x)+var(y)+c2))
+//
+// mean(x) = sum(x) / n
+//
+// cov(x,y) = (n*sum(xi*yi)-sum(x)*sum(y))/(n*n)
+//
+// var(x) = (n*sum(xi*xi)-sum(xi)*sum(xi))/(n*n)
+//
+// ssim(x,y) =
+//   (2*sum(x)*sum(y)/(n*n) + c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))/(n*n)+c2) /
+//   (((sum(x)*sum(x)+sum(y)*sum(y))/(n*n) +c1) *
+//    ((n*sum(xi*xi) - sum(xi)*sum(xi))/(n*n)+
+//     (n*sum(yi*yi) - sum(yi)*sum(yi))/(n*n)+c2)))
+//
+// factoring out n*n
+//
+// ssim(x,y) =
+//   (2*sum(x)*sum(y) + n*n*c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))+n*n*c2) /
+//   (((sum(x)*sum(x)+sum(y)*sum(y)) + n*n*c1) *
+//    (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2))
+//
+// Replace c1 with n*n * c1 for the final step that leads to this code:
+// The final step scales by 12 bits so we don't lose precision in the constants.
+
+double ssimv_similarity(Ssimv *sv, int64_t n) {
+  // Scale the constants by number of pixels.
+  const int64_t c1 = (cc1 * n * n) >> 12;
+  const int64_t c2 = (cc2 * n * n) >> 12;
+
+  const double l = 1.0 * (2 * sv->sum_s * sv->sum_r + c1) /
+      (sv->sum_s * sv->sum_s + sv->sum_r * sv->sum_r + c1);
+
+  // Since these variables are unsigned sums, convert to double so
+  // math is done in double arithmetic.
+  const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2)
+      / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + n * sv->sum_sq_r
+         - sv->sum_r * sv->sum_r + c2);
+
+  return l * v;
+}
+
+// The first term of the ssim metric is a luminance factor.
+//
+// (2*mean(x)*mean(y) + c1)/ (mean(x)^2+mean(y)^2+c1)
+//
+// This luminance factor is super sensitive to the dark side of luminance
+// values and completely insensitive on the white side.  check out 2 sets
+// (1,3) and (250,252) the term gives ( 2*1*3/(1+9) = .60
+// 2*250*252/ (250^2+252^2) => .99999997
+//
+// As a result in this tweaked version of the calculation in which the
+// luminance is taken as percentage off from peak possible.
+//
+// 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count
+//
+double ssimv_similarity2(Ssimv *sv, int64_t n) {
+  // Scale the constants by number of pixels.
+  const int64_t c1 = (cc1 * n * n) >> 12;
+  const int64_t c2 = (cc2 * n * n) >> 12;
+
+  const double mean_diff = (1.0 * sv->sum_s - sv->sum_r) / n;
+  const double l = (255 * 255 - mean_diff * mean_diff + c1) / (255 * 255 + c1);
+
+  // Since these variables are unsigned, sums convert to double so
+  // math is done in double arithmetic.
+  const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2)
+      / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s +
+         n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2);
+
+  return l * v;
+}
+void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch,
+                 Ssimv *sv) {
+  vp9_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch,
+                     &sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r,
+                     &sv->sum_sxr);
+}
+
+double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+                            uint8_t *img2, int img2_pitch,
+                            int width, int height,
+                            Ssimv *sv2, Metrics *m,
+                            int do_inconsistency) {
+  double dssim_total = 0;
+  double ssim_total = 0;
+  double ssim2_total = 0;
+  double inconsistency_total = 0;
+  int i, j;
+  int c = 0;
+  double norm;
+  double old_ssim_total = 0;
+  vp9_clear_system_state();
+  // We can sample points as frequently as we like start with 1 per 4x4.
+  for (i = 0; i < height; i += 4,
+       img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
+    for (j = 0; j < width; j += 4, ++c) {
+      Ssimv sv = {0};
+      double ssim;
+      double ssim2;
+      double dssim;
+      uint32_t var_new;
+      uint32_t var_old;
+      uint32_t mean_new;
+      uint32_t mean_old;
+      double ssim_new;
+      double ssim_old;
+
+      // Not sure there's a great way to handle the edge pixels
+      // in ssim when using a window. Seems biased against edge pixels
+      // however you handle this. This uses only samples that are
+      // fully in the frame.
+      if (j + 8 <= width && i + 8 <= height) {
+        ssimv_parms(img1 + j, img1_pitch, img2 + j, img2_pitch, &sv);
+      }
+
+      ssim = ssimv_similarity(&sv, 64);
+      ssim2 = ssimv_similarity2(&sv, 64);
+
+      sv.ssim = ssim2;
+
+      // dssim is calculated to use as an actual error metric and
+      // is scaled up to the same range as sum square error.
+      // Since we are subsampling every 16th point maybe this should be
+      // *16 ?
+      dssim = 255 * 255 * (1 - ssim2) / 2;
+
+      // Here I introduce a new error metric: consistency-weighted
+      // SSIM-inconsistency.  This metric isolates frames where the
+      // SSIM 'suddenly' changes, e.g. if one frame in every 8 is much
+      // sharper or blurrier than the others. Higher values indicate a
+      // temporally inconsistent SSIM. There are two ideas at work:
+      //
+      // 1) 'SSIM-inconsistency': the total inconsistency value
+      // reflects how much SSIM values are changing between this
+      // source / reference frame pair and the previous pair.
+      //
+      // 2) 'consistency-weighted': weights de-emphasize areas in the
+      // frame where the scene content has changed. Changes in scene
+      // content are detected via changes in local variance and local
+      // mean.
+      //
+      // Thus the overall measure reflects how inconsistent the SSIM
+      // values are, over consistent regions of the frame.
+      //
+      // The metric has three terms:
+      //
+      // term 1 -> uses change in scene Variance to weight error score
+      //  2 * var(Fi)*var(Fi-1) / (var(Fi)^2+var(Fi-1)^2)
+      //  larger changes from one frame to the next mean we care
+      //  less about consistency.
+      //
+      // term 2 -> uses change in local scene luminance to weight error
+      //  2 * avg(Fi)*avg(Fi-1) / (avg(Fi)^2+avg(Fi-1)^2)
+      //  larger changes from one frame to the next mean we care
+      //  less about consistency.
+      //
+      // term3 -> measures inconsistency in ssim scores between frames
+      //   1 - ( 2 * ssim(Fi)*ssim(Fi-1)/(ssim(Fi)^2+sssim(Fi-1)^2).
+      //
+      // This term compares the ssim score for the same location in 2
+      // subsequent frames.
+      var_new = sv.sum_sq_s - sv.sum_s * sv.sum_s / 64;
+      var_old = sv2[c].sum_sq_s - sv2[c].sum_s * sv2[c].sum_s / 64;
+      mean_new = sv.sum_s;
+      mean_old = sv2[c].sum_s;
+      ssim_new = sv.ssim;
+      ssim_old = sv2[c].ssim;
+
+      if (do_inconsistency) {
+        // We do the metric once for every 4x4 block in the image. Since
+        // we are scaling the error to SSE for use in a psnr calculation
+        // 1.0 = 4x4x255x255 the worst error we can possibly have.
+        static const double kScaling = 4. * 4 * 255 * 255;
+
+        // The constants have to be non 0 to avoid potential divide by 0
+        // issues other than that they affect kind of a weighting between
+        // the terms.  No testing of what the right terms should be has been
+        // done.
+        static const double c1 = 1, c2 = 1, c3 = 1;
+
+        // This measures how much consistent variance is in two consecutive
+        // source frames. 1.0 means they have exactly the same variance.
+        const double variance_term = (2.0 * var_old * var_new + c1) /
+            (1.0 * var_old * var_old + 1.0 * var_new * var_new + c1);
+
+        // This measures how consistent the local mean are between two
+        // consecutive frames. 1.0 means they have exactly the same mean.
+        const double mean_term = (2.0 * mean_old * mean_new + c2) /
+            (1.0 * mean_old * mean_old + 1.0 * mean_new * mean_new + c2);
+
+        // This measures how consistent the ssims of two
+        // consecutive frames is. 1.0 means they are exactly the same.
+        double ssim_term = pow((2.0 * ssim_old * ssim_new + c3) /
+                               (ssim_old * ssim_old + ssim_new * ssim_new + c3),
+                               5);
+
+        double this_inconsistency;
+
+        // Floating point math sometimes makes this > 1 by a tiny bit.
+        // We want the metric to scale between 0 and 1.0 so we can convert
+        // it to an snr scaled value.
+        if (ssim_term > 1)
+          ssim_term = 1;
+
+        // This converts the consistency metric to an inconsistency metric
+        // ( so we can scale it like psnr to something like sum square error.
+        // The reason for the variance and mean terms is the assumption that
+        // if there are big changes in the source we shouldn't penalize
+        // inconsistency in ssim scores a bit less as it will be less visible
+        // to the user.
+        this_inconsistency = (1 - ssim_term) * variance_term * mean_term;
+
+        this_inconsistency *= kScaling;
+        inconsistency_total += this_inconsistency;
+      }
+      sv2[c] = sv;
+      ssim_total += ssim;
+      ssim2_total += ssim2;
+      dssim_total += dssim;
+
+      old_ssim_total += ssim_old;
+    }
+    old_ssim_total += 0;
+  }
+
+  norm = 1. / (width / 4) / (height / 4);
+  ssim_total *= norm;
+  ssim2_total *= norm;
+  m->ssim2 = ssim2_total;
+  m->ssim = ssim_total;
+  if (old_ssim_total == 0)
+    inconsistency_total = 0;
+
+  m->ssimc = inconsistency_total;
+
+  m->dssim = dssim_total;
+  return inconsistency_total;
+}
+
+
 #if CONFIG_VP9_HIGHBITDEPTH
 double vp9_highbd_calc_ssim(YV12_BUFFER_CONFIG *source,
                             YV12_BUFFER_CONFIG *dest,
diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h
index ed1bb83..10f14c4 100644
--- a/vp9/encoder/vp9_ssim.h
+++ b/vp9/encoder/vp9_ssim.h
@@ -17,6 +17,52 @@
 
 #include "vpx_scale/yv12config.h"
 
+// metrics used for calculating ssim, ssim2, dssim, and ssimc
+typedef struct {
+  // source sum ( over 8x8 region )
+  uint64_t sum_s;
+
+  // reference sum (over 8x8 region )
+  uint64_t sum_r;
+
+  // source sum squared ( over 8x8 region )
+  uint64_t sum_sq_s;
+
+  // reference sum squared (over 8x8 region )
+  uint64_t sum_sq_r;
+
+  // sum of source times reference (over 8x8 region)
+  uint64_t sum_sxr;
+
+  // calculated ssim score between source and reference
+  double ssim;
+} Ssimv;
+
+// metrics collected on a frame basis
+typedef struct {
+  // ssim consistency error metric ( see code for explanation )
+  double ssimc;
+
+  // standard ssim
+  double ssim;
+
+  // revised ssim ( see code for explanation)
+  double ssim2;
+
+  // ssim restated as an error metric like sse
+  double dssim;
+
+  // dssim converted to decibels
+  double dssimd;
+
+  // ssimc converted to decibels
+  double ssimcd;
+} Metrics;
+
+double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
+                      int img2_pitch, int width, int height, Ssimv *sv2,
+                      Metrics *m, int do_inconsistency);
+
 double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                      double *weight);
 
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 424cc08..e62e4ab 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -44,7 +44,7 @@
   const int which_mv = 0;
   const MV mv = { mv_row, mv_col };
   const InterpKernel *const kernel =
-    vp9_get_interp_kernel(xd->mi[0].src_mi->mbmi.interp_filter);
+    vp9_get_interp_kernel(xd->mi[0]->mbmi.interp_filter);
 
   enum mv_precision mv_precision_uv;
   int uv_stride;
@@ -225,7 +225,7 @@
 
   MV best_ref_mv1 = {0, 0};
   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
-  MV *ref_mv = &x->e_mbd.mi[0].src_mi->bmi[0].as_mv[0].as_mv;
+  MV *ref_mv = &x->e_mbd.mi[0]->bmi[0].as_mv[0].as_mv;
 
   // Save input state
   struct buf_2d src = x->plane[0].src;
@@ -343,8 +343,8 @@
         if (frames[frame] == NULL)
           continue;
 
-        mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.row = 0;
-        mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.col = 0;
+        mbd->mi[0]->bmi[0].as_mv[0].as_mv.row = 0;
+        mbd->mi[0]->bmi[0].as_mv[0].as_mv.col = 0;
 
         if (frame == alt_ref_index) {
           filter_weight = 2;
@@ -370,8 +370,8 @@
               frames[frame]->v_buffer + mb_uv_offset,
               frames[frame]->y_stride,
               mb_uv_width, mb_uv_height,
-              mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.row,
-              mbd->mi[0].src_mi->bmi[0].as_mv[0].as_mv.col,
+              mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
+              mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
               predictor, scale,
               mb_col * 16, mb_row * 16);
 
@@ -722,8 +722,8 @@
         }
       }
       cm->mi = cm->mip + cm->mi_stride + 1;
-      xd->mi = cm->mi;
-      xd->mi[0].src_mi = &xd->mi[0];
+      xd->mi = cm->mi_grid_visible;
+      xd->mi[0] = cm->mi;
     } else {
       // ARF is produced at the native frame size and resized when coded.
 #if CONFIG_VP9_HIGHBITDEPTH
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 9d2595b..862be4d 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -497,7 +497,7 @@
   uint8_t token_cache[32 * 32];
   struct macroblock_plane *p = &x->plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
-  MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   int pt; /* near block/prev token context index */
   int c;
   TOKENEXTRA *t = *tp;        /* store tokens starting here */
@@ -612,7 +612,7 @@
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const int ctx = vp9_get_skip_context(xd);
   const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                               SEG_LVL_SKIP);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 652971d..5b210ee 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -131,6 +131,8 @@
 
 # common (msa)
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_macros_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_horiz_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_vert_msa.c
 VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
 
@@ -155,9 +157,9 @@
 # neon with assembly and intrinsics implementations. If both are available
 # prefer assembly.
 ifeq ($(HAVE_NEON_ASM), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_avg_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon_asm$(ASM)
+VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon_asm$(ASM)
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon_asm$(ASM)
@@ -173,9 +175,9 @@
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon_asm$(ASM)
 else
 ifeq ($(HAVE_NEON), yes)
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_avg_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_avg_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve8_neon.c
+VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_avg_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_convolve_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_copy_neon.c
 VP9_COMMON_SRCS-yes += common/arm/neon/vp9_idct16x16_1_add_neon.c
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 225a3ba..c4dd785 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -13,35 +13,6 @@
 #define VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_
 #include "./vpx_config.h"
 
-#ifndef CONFIG_MEM_MANAGER
-# if defined(VXWORKS)
-#  define CONFIG_MEM_MANAGER  1 /*include heap manager functionality,*/
-/*default: enabled on vxworks*/
-# else
-#  define CONFIG_MEM_MANAGER  0 /*include heap manager functionality*/
-# endif
-#endif /*CONFIG_MEM_MANAGER*/
-
-#ifndef CONFIG_MEM_TRACKER
-# define CONFIG_MEM_TRACKER     1 /*include xvpx_* calls in the lib*/
-#endif
-
-#ifndef CONFIG_MEM_CHECKS
-# define CONFIG_MEM_CHECKS      0 /*include some basic safety checks in
-vpx_memcpy, _memset, and _memmove*/
-#endif
-
-#ifndef USE_GLOBAL_FUNCTION_POINTERS
-# define USE_GLOBAL_FUNCTION_POINTERS   0  /*use function pointers instead of compiled functions.*/
-#endif
-
-#if CONFIG_MEM_TRACKER
-# include "vpx_mem_tracker.h"
-# if VPX_MEM_TRACKER_VERSION_CHIEF != 2 || VPX_MEM_TRACKER_VERSION_MAJOR != 5
-#  error "vpx_mem requires memory tracker version 2.5 to track memory usage"
-# endif
-#endif
-
 #define ADDRESS_STORAGE_SIZE      sizeof(size_t)
 
 #ifndef DEFAULT_ALIGNMENT
@@ -54,41 +25,6 @@
 # endif
 #endif
 
-#if CONFIG_MEM_TRACKER
-# define TRY_BOUNDS_CHECK         1        /*when set to 1 pads each allocation,
-integrity can be checked using
-vpx_memory_tracker_check_integrity
-or on free by defining*/
-/*TRY_BOUNDS_CHECK_ON_FREE*/
-#else
-# define TRY_BOUNDS_CHECK         0
-#endif /*CONFIG_MEM_TRACKER*/
-
-#if TRY_BOUNDS_CHECK
-# define TRY_BOUNDS_CHECK_ON_FREE 0          /*checks mem integrity on every
-free, very expensive*/
-# define BOUNDS_CHECK_VALUE       0xdeadbeef /*value stored before/after ea.
-mem addr for bounds checking*/
-# define BOUNDS_CHECK_PAD_SIZE    32         /*size of the padding before and
-after ea allocation to be filled
-with BOUNDS_CHECK_VALUE.
-this should be a multiple of 4*/
-#else
-# define BOUNDS_CHECK_VALUE       0
-# define BOUNDS_CHECK_PAD_SIZE    0
-#endif /*TRY_BOUNDS_CHECK*/
-
-#ifndef REMOVE_PRINTFS
-# define REMOVE_PRINTFS 0
-#endif
-
-/* Should probably use a vpx_mem logger function. */
-#if REMOVE_PRINTFS
-# define _P(x)
-#else
-# define _P(x) x
-#endif
-
 /*returns an addr aligned to the byte boundary specified by align*/
 #define align_addr(addr,align) (void*)(((size_t)(addr) + ((align) - 1)) & (size_t)-(align))
 
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
deleted file mode 100644
index 1335e00..0000000
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_
-#define VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_
-
-/* vpx_mem_tracker version info */
-#define vpx_mem_tracker_version "2.5.1.1"
-
-#define VPX_MEM_TRACKER_VERSION_CHIEF 2
-#define VPX_MEM_TRACKER_VERSION_MAJOR 5
-#define VPX_MEM_TRACKER_VERSION_MINOR 1
-#define VPX_MEM_TRACKER_VERSION_PATCH 1
-/* END - vpx_mem_tracker version info */
-
-#include <stdarg.h>
-
-struct mem_block {
-  size_t addr;
-  unsigned int size,
-           line;
-  char *file;
-  struct mem_block *prev,
-      * next;
-
-  int padded; // This mem_block has padding for integrity checks.
-  // As of right now, this should only be 0 if
-  // using vpx_mem_alloc to allocate cache memory.
-  // 2005-01-11 tjf
-};
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
-  /*
-      vpx_memory_tracker_init(int padding_size, int pad_value)
-        padding_size - the size of the padding before and after each mem addr.
-                       Values > 0 indicate that integrity checks can be performed
-                       by inspecting these areas.
-        pad_value - the initial value within the padding area before and after
-                    each mem addr.
-
-      Initializes the memory tracker interface. Should be called before any
-      other calls to the memory tracker.
-  */
-  int vpx_memory_tracker_init(int padding_size, int pad_value);
-
-  /*
-      vpx_memory_tracker_destroy()
-      Deinitializes the memory tracker interface
-  */
-  void vpx_memory_tracker_destroy();
-
-  /*
-      vpx_memory_tracker_add(size_t addr, unsigned int size,
-                           char * file, unsigned int line)
-        addr - memory address to be added to list
-        size - size of addr
-        file - the file addr was referenced from
-        line - the line in file addr was referenced from
-      Adds memory address addr, it's size, file and line it came from
-      to the memory tracker allocation table
-  */
-  void vpx_memory_tracker_add(size_t addr, unsigned int size,
-                              char *file, unsigned int line,
-                              int padded);
-
-  /*
-      vpx_memory_tracker_add(size_t addr, unsigned int size, char * file, unsigned int line)
-        addr - memory address to be added to be removed
-        padded - if 0, disables bounds checking on this memory block even if bounds
-        checking is enabled. (for example, when allocating cache memory, we still want
-        to check for memory leaks, but we do not waste cache space for bounds check padding)
-      Removes the specified address from the memory tracker's allocation
-      table
-      Return:
-        0: on success
-        -1: if memory allocation table's mutex could not be locked
-        -2: if the addr was not found in the list
-  */
-  int vpx_memory_tracker_remove(size_t addr);
-
-  /*
-      vpx_memory_tracker_find(unsigned int addr)
-        addr - address to be found in the memory tracker's
-               allocation table
-      Return:
-          If found, pointer to the memory block that matches addr
-          NULL otherwise
-  */
-  struct mem_block *vpx_memory_tracker_find(size_t addr);
-
-  /*
-      vpx_memory_tracker_dump()
-      Dumps the current contents of the memory
-      tracker allocation table
-  */
-  void vpx_memory_tracker_dump();
-
-  /*
-      vpx_memory_tracker_check_integrity()
-      If a padding_size was provided to vpx_memory_tracker_init()
-      This function will verify that the region before and after each
-      memory address contains the specified pad_value. Should the check
-      fail, the filename and line of the check will be printed out.
-  */
-  void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
-
-  /*
-      vpx_memory_tracker_set_log_type
-        type - value representing the logging type to use
-        option - type specific option. This will be interpreted differently
-                 based on the type.
-      Sets the logging type for the memory tracker.
-      Values currently supported:
-        0: if option is NULL, log to stderr, otherwise interpret option as a
-           filename and attempt to open it.
-        1: Use output_debug_string (WIN32 only), option ignored
-      Return:
-        0: on success
-        -1: if the logging type could not be set, because the value was invalid
-            or because a file could not be opened
-  */
-  int vpx_memory_tracker_set_log_type(int type, char *option);
-
-  /*
-      vpx_memory_tracker_set_log_func
-        userdata - ptr to be passed to the supplied logfunc, can be NULL
-        logfunc - the logging function to be used to output data from
-                  vpx_memory_track_dump/check_integrity
-      Sets a logging function to be used by the memory tracker.
-      Return:
-        0: on success
-        -1: if the logging type could not be set because logfunc was NULL
-  */
-  int vpx_memory_tracker_set_log_func(void *userdata,
-                                      void(*logfunc)(void *userdata,
-                                                     const char *fmt, va_list args));
-
-  /* Wrappers to standard library functions. */
-  typedef void *(* mem_track_malloc_func)(size_t);
-  typedef void *(* mem_track_calloc_func)(size_t, size_t);
-  typedef void *(* mem_track_realloc_func)(void *, size_t);
-  typedef void (* mem_track_free_func)(void *);
-  typedef void *(* mem_track_memcpy_func)(void *, const void *, size_t);
-  typedef void *(* mem_track_memset_func)(void *, int, size_t);
-  typedef void *(* mem_track_memmove_func)(void *, const void *, size_t);
-
-  /*
-      vpx_memory_tracker_set_functions
-
-      Sets the function pointers for the standard library functions.
-
-      Return:
-        0: on success
-        -1: if the use global function pointers is not set.
-  */
-  int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l
-, mem_track_calloc_func g_calloc_l
-, mem_track_realloc_func g_realloc_l
-, mem_track_free_func g_free_l
-, mem_track_memcpy_func g_memcpy_l
-, mem_track_memset_func g_memset_l
-, mem_track_memmove_func g_memmove_l);
-
-#if defined(__cplusplus)
-}
-#endif
-
-#endif  // VPX_MEM_INCLUDE_VPX_MEM_TRACKER_H_
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
deleted file mode 100644
index ab3562d..0000000
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-void *U(alloc)(U(descriptor) *desc, U(size_aau) n) {
-#ifdef HMM_AUDIT_FAIL
-
-  if (desc->avl_tree_root)
-    AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-#endif
-
-    if (desc->last_freed) {
-#ifdef HMM_AUDIT_FAIL
-      AUDIT_BLOCK(desc->last_freed)
-#endif
-
-      U(into_free_collection)(desc, (head_record *)(desc->last_freed));
-
-      desc->last_freed = 0;
-    }
-
-  /* Add space for block header. */
-  n += HEAD_AAUS;
-
-  /* Convert n from number of address alignment units to block alignment
-  ** units. */
-  n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
-
-  if (n < MIN_BLOCK_BAUS)
-    n = MIN_BLOCK_BAUS;
-
-  {
-    /* Search for the first node of the bin containing the smallest
-    ** block big enough to satisfy request. */
-    ptr_record *ptr_rec_ptr =
-      U(avl_search)(
-        (U(avl_avl) *) & (desc->avl_tree_root), (U(size_bau)) n,
-        AVL_GREATER_EQUAL);
-
-    /* If an approprate bin is found, satisfy the allocation request,
-    ** otherwise return null pointer. */
-    return(ptr_rec_ptr ?
-           U(alloc_from_bin)(desc, ptr_rec_ptr, (U(size_bau)) n) : 0);
-  }
-}
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
deleted file mode 100644
index 0eff59d..0000000
--- a/vpx_mem/memory_manager/hmm_base.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-void U(init)(U(descriptor) *desc) {
-  desc->avl_tree_root = 0;
-  desc->last_freed = 0;
-}
-
-/* Remove a free block from a bin's doubly-linked list when it is not,
-** the first block in the bin.
-*/
-void U(dll_remove)(
-  /* Pointer to pointer record in the block to be removed. */
-  ptr_record *to_remove) {
-  to_remove->prev->next = to_remove->next;
-
-  if (to_remove->next)
-    to_remove->next->prev = to_remove->prev;
-}
-
-/* Put a block into the free collection of a heap.
-*/
-void U(into_free_collection)(
-  /* Pointer to heap descriptor. */
-  U(descriptor) *desc,
-  /* Pointer to head record of block. */
-  head_record *head_ptr) {
-  ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
-
-  ptr_record *bin_front_ptr =
-    U(avl_insert)((U(avl_avl) *) & (desc->avl_tree_root), ptr_rec_ptr);
-
-  if (bin_front_ptr != ptr_rec_ptr) {
-    /* The block was not inserted into the AVL tree because there is
-    ** already a bin for the size of the block. */
-
-    MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(head_ptr)
-    ptr_rec_ptr->self = ptr_rec_ptr;
-
-    /* Make the block the new second block in the bin's doubly-linked
-    ** list. */
-    ptr_rec_ptr->prev = bin_front_ptr;
-    ptr_rec_ptr->next = bin_front_ptr->next;
-    bin_front_ptr->next = ptr_rec_ptr;
-
-    if (ptr_rec_ptr->next)
-      ptr_rec_ptr->next->prev = ptr_rec_ptr;
-  } else
-    /* Block is first block in new bin. */
-    ptr_rec_ptr->next = 0;
-}
-
-/* Allocate a block from a given bin.  Returns a pointer to the payload
-** of the removed block.  The "last freed" pointer must be null prior
-** to calling this function.
-*/
-void *U(alloc_from_bin)(
-  /* Pointer to heap descriptor. */
-  U(descriptor) *desc,
-  /* Pointer to pointer record of first block in bin. */
-  ptr_record *bin_front_ptr,
-  /* Number of BAUs needed in the allocated block.  If the block taken
-  ** from the bin is significantly larger than the number of BAUs needed,
-  ** the "extra" BAUs are split off to form a new free block. */
-  U(size_bau) n_baus) {
-  head_record *head_ptr;
-  U(size_bau) rem_baus;
-
-  if (bin_front_ptr->next) {
-    /* There are multiple blocks in this bin.  Use the 2nd block in
-    ** the bin to avoid needless change to the AVL tree.
-    */
-
-    ptr_record *ptr_rec_ptr = bin_front_ptr->next;
-    head_ptr = PTR_REC_TO_HEAD(ptr_rec_ptr);
-
-#ifdef AUDIT_FAIL
-    AUDIT_BLOCK(head_ptr)
-#endif
-
-    U(dll_remove)(ptr_rec_ptr);
-  } else {
-    /* There is only one block in the bin, so it has to be removed
-    ** from the AVL tree.
-    */
-
-    head_ptr = PTR_REC_TO_HEAD(bin_front_ptr);
-
-    U(avl_remove)(
-      (U(avl_avl) *) & (desc->avl_tree_root), BLOCK_BAUS(head_ptr));
-  }
-
-  MARK_BLOCK_ALLOCATED(head_ptr)
-
-  rem_baus = BLOCK_BAUS(head_ptr) - n_baus;
-
-  if (rem_baus >= MIN_BLOCK_BAUS) {
-    /* Since there are enough "extra" BAUs, split them off to form
-    ** a new free block.
-    */
-
-    head_record *rem_head_ptr =
-      (head_record *) BAUS_FORWARD(head_ptr, n_baus);
-
-    /* Change the next block's header to reflect the fact that the
-    ** block preceeding it is now smaller.
-    */
-    SET_PREV_BLOCK_BAUS(
-      BAUS_FORWARD(head_ptr, head_ptr->block_size), rem_baus)
-
-    head_ptr->block_size = n_baus;
-
-    rem_head_ptr->previous_block_size = n_baus;
-    rem_head_ptr->block_size = rem_baus;
-
-    desc->last_freed = rem_head_ptr;
-  }
-
-  return(HEAD_TO_PTR_REC(head_ptr));
-}
-
-/* Take a block out of the free collection.
-*/
-void U(out_of_free_collection)(
-  /* Descriptor of heap that block is in. */
-  U(descriptor) *desc,
-  /* Pointer to head of block to take out of free collection. */
-  head_record *head_ptr) {
-  ptr_record *ptr_rec_ptr = HEAD_TO_PTR_REC(head_ptr);
-
-  if (ptr_rec_ptr->self == ptr_rec_ptr)
-    /* Block is not the front block in its bin, so all we have to
-    ** do is take it out of the bin's doubly-linked list. */
-    U(dll_remove)(ptr_rec_ptr);
-  else {
-    ptr_record *next = ptr_rec_ptr->next;
-
-    if (next)
-      /* Block is the front block in its bin, and there is at least
-      ** one other block in the bin.  Substitute the next block for
-      ** the front block. */
-      U(avl_subst)((U(avl_avl) *) & (desc->avl_tree_root), next);
-    else
-      /* Block is the front block in its bin, but there is no other
-      ** block in the bin.  Eliminate the bin. */
-      U(avl_remove)(
-        (U(avl_avl) *) & (desc->avl_tree_root), BLOCK_BAUS(head_ptr));
-  }
-}
-
-void U(free)(U(descriptor) *desc, void *payload_ptr) {
-  /* Flags if coalesce with adjacent block. */
-  int coalesce;
-
-  head_record *fwd_head_ptr;
-  head_record *free_head_ptr = PTR_REC_TO_HEAD(payload_ptr);
-
-  desc->num_baus_can_shrink = 0;
-
-#ifdef HMM_AUDIT_FAIL
-
-  AUDIT_BLOCK(free_head_ptr)
-
-  /* Make sure not freeing an already free block. */
-  if (!IS_BLOCK_ALLOCATED(free_head_ptr))
-    HMM_AUDIT_FAIL
-
-    if (desc->avl_tree_root)
-      /* Audit root block in AVL tree. */
-      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-
-#endif
-
-      fwd_head_ptr =
-        (head_record *) BAUS_FORWARD(free_head_ptr, free_head_ptr->block_size);
-
-  if (free_head_ptr->previous_block_size) {
-    /* Coalesce with backward block if possible. */
-
-    head_record *bkwd_head_ptr =
-      (head_record *) BAUS_BACKWARD(
-        free_head_ptr, free_head_ptr->previous_block_size);
-
-#ifdef HMM_AUDIT_FAIL
-    AUDIT_BLOCK(bkwd_head_ptr)
-#endif
-
-    if (bkwd_head_ptr == (head_record *)(desc->last_freed)) {
-      desc->last_freed = 0;
-      coalesce = 1;
-    } else if (IS_BLOCK_ALLOCATED(bkwd_head_ptr))
-      coalesce = 0;
-    else {
-      U(out_of_free_collection)(desc, bkwd_head_ptr);
-      coalesce = 1;
-    }
-
-    if (coalesce) {
-      bkwd_head_ptr->block_size += free_head_ptr->block_size;
-      SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(bkwd_head_ptr))
-      free_head_ptr = bkwd_head_ptr;
-    }
-  }
-
-  if (fwd_head_ptr->block_size == 0) {
-    /* Block to be freed is last block before dummy end-of-chunk block. */
-    desc->end_of_shrinkable_chunk =
-      BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
-    desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
-
-    if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
-      /* Free block is the entire chunk, so shrinking can eliminate
-      ** entire chunk including dummy end block. */
-      desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
-  } else {
-    /* Coalesce with forward block if possible. */
-
-#ifdef HMM_AUDIT_FAIL
-    AUDIT_BLOCK(fwd_head_ptr)
-#endif
-
-    if (fwd_head_ptr == (head_record *)(desc->last_freed)) {
-      desc->last_freed = 0;
-      coalesce = 1;
-    } else if (IS_BLOCK_ALLOCATED(fwd_head_ptr))
-      coalesce = 0;
-    else {
-      U(out_of_free_collection)(desc, fwd_head_ptr);
-      coalesce = 1;
-    }
-
-    if (coalesce) {
-      free_head_ptr->block_size += fwd_head_ptr->block_size;
-
-      fwd_head_ptr =
-        (head_record *) BAUS_FORWARD(
-          fwd_head_ptr, BLOCK_BAUS(fwd_head_ptr));
-
-      SET_PREV_BLOCK_BAUS(fwd_head_ptr, BLOCK_BAUS(free_head_ptr))
-
-      if (fwd_head_ptr->block_size == 0) {
-        /* Coalesced block to be freed is last block before dummy
-        ** end-of-chunk block. */
-        desc->end_of_shrinkable_chunk =
-          BAUS_FORWARD(fwd_head_ptr, DUMMY_END_BLOCK_BAUS);
-        desc->num_baus_can_shrink = BLOCK_BAUS(free_head_ptr);
-
-        if (PREV_BLOCK_BAUS(free_head_ptr) == 0)
-          /* Free block is the entire chunk, so shrinking can
-          ** eliminate entire chunk including dummy end block. */
-          desc->num_baus_can_shrink += DUMMY_END_BLOCK_BAUS;
-      }
-    }
-  }
-
-  if (desc->last_freed) {
-    /* There is a last freed block, but it is not adjacent to the
-    ** block being freed by this call to free, so put the last
-    ** freed block into the free collection.
-    */
-
-#ifdef HMM_AUDIT_FAIL
-    AUDIT_BLOCK(desc->last_freed)
-#endif
-
-    U(into_free_collection)(desc, (head_record *)(desc->last_freed));
-  }
-
-  desc->last_freed = free_head_ptr;
-}
-
-void U(new_chunk)(U(descriptor) *desc, void *start, U(size_bau) n_baus) {
-#ifdef HMM_AUDIT_FAIL
-
-  if (desc->avl_tree_root)
-    /* Audit root block in AVL tree. */
-    AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-#endif
-
-#undef HEAD_PTR
-#define HEAD_PTR ((head_record *) start)
-
-    /* Make the chunk one big free block followed by a dummy end block.
-    */
-
-    n_baus -= DUMMY_END_BLOCK_BAUS;
-
-  HEAD_PTR->previous_block_size = 0;
-  HEAD_PTR->block_size = n_baus;
-
-  U(into_free_collection)(desc, HEAD_PTR);
-
-  /* Set up the dummy end block. */
-  start = BAUS_FORWARD(start, n_baus);
-  HEAD_PTR->previous_block_size = n_baus;
-  HEAD_PTR->block_size = 0;
-
-#undef HEAD_PTR
-}
-
-#ifdef HMM_AUDIT_FAIL
-
-/* Function that does audit fail actions defined my preprocessor symbol,
-** and returns a dummy integer value.
-*/
-int U(audit_block_fail_dummy_return)(void) {
-  HMM_AUDIT_FAIL
-
-  /* Dummy return. */
-  return(0);
-}
-
-#endif
-
-/* AVL Tree instantiation. */
-
-#ifdef HMM_AUDIT_FAIL
-
-/* The AVL tree generic package passes an ACCESS of 1 when it "touches"
-** a child node for the first time during a particular operation.  I use
-** this feature to audit only one time (per operation) the free blocks
-** that are tree nodes.  Since the root node is not a child node, it has
-** to be audited directly.
-*/
-
-/* The pain you feel while reading these macros will not be in vain.  It
-** will remove all doubt from you mind that C++ inline functions are
-** a very good thing.
-*/
-
-#define AVL_GET_LESS(H, ACCESS) \
-  (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->self)
-#define AVL_GET_GREATER(H, ACCESS) \
-  (((ACCESS) ? AUDIT_BLOCK_AS_EXPR(PTR_REC_TO_HEAD(H)) : 0), (H)->prev)
-
-#else
-
-#define AVL_GET_LESS(H, ACCESS) ((H)->self)
-#define AVL_GET_GREATER(H, ACCESS) ((H)->prev)
-
-#endif
-
-#define AVL_SET_LESS(H, LH) (H)->self = (LH);
-#define AVL_SET_GREATER(H, GH) (H)->prev = (GH);
-
-/*  high bit of high bit of
-**  block_size  previous_block_size balance factor
-**  ----------- ------------------- --------------
-**  0       0           n/a (block allocated)
-**  0       1           1
-**  1       0           -1
-**  1       1           0
-*/
-
-#define AVL_GET_BALANCE_FACTOR(H) \
-  ((((head_record *) (PTR_REC_TO_HEAD(H)))->block_size & \
-    HIGH_BIT_BAU_SIZE) ? \
-   (((head_record *) (PTR_REC_TO_HEAD(H)))->previous_block_size & \
-    HIGH_BIT_BAU_SIZE ? 0 : -1) : 1)
-
-#define AVL_SET_BALANCE_FACTOR(H, BF) \
-  {                         \
-    register head_record *p =               \
-                                            (head_record *) PTR_REC_TO_HEAD(H);       \
-    register int bal_f = (BF);              \
-    \
-    if (bal_f <= 0)                 \
-      p->block_size |= HIGH_BIT_BAU_SIZE;       \
-    else                        \
-      p->block_size &= ~HIGH_BIT_BAU_SIZE;      \
-    if (bal_f >= 0)                 \
-      p->previous_block_size |= HIGH_BIT_BAU_SIZE;  \
-    else                        \
-      p->previous_block_size &= ~HIGH_BIT_BAU_SIZE; \
-  }
-
-#define COMPARE_KEY_KEY(K1, K2) ((K1) == (K2) ? 0 : ((K1) > (K2) ? 1 : -1))
-
-#define AVL_COMPARE_KEY_NODE(K, H) \
-  COMPARE_KEY_KEY(K, BLOCK_BAUS(PTR_REC_TO_HEAD(H)))
-
-#define AVL_COMPARE_NODE_NODE(H1, H2) \
-  COMPARE_KEY_KEY(BLOCK_BAUS(PTR_REC_TO_HEAD(H1)), \
-                  BLOCK_BAUS(PTR_REC_TO_HEAD(H2)))
-
-#define AVL_NULL ((ptr_record *) 0)
-
-#define AVL_IMPL_MASK \
-  ( AVL_IMPL_INSERT | AVL_IMPL_SEARCH | AVL_IMPL_REMOVE | AVL_IMPL_SUBST )
-
-#include "cavl_impl.h"
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
deleted file mode 100644
index 51c3cc2..0000000
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-/* The function in this file performs default actions if self-auditing
-** finds heap corruption.  Don't rely on this code to handle the
-** case where HMM is being used to implement the malloc and free standard
-** library functions.  Rewrite the function if necessary to avoid using
-** I/O and execution termination functions that call malloc or free.
-** In Unix, for example, you would replace the fputs calls with calls
-** to the write system call using file handle number 2.
-*/
-#include "hmm_intrnl.h"
-#include <stdio.h>
-#include <stdlib.h>
-
-static int entered = 0;
-
-/* Print abort message, file and line.  Terminate execution.
-*/
-void hmm_dflt_abort(const char *file, const char *line) {
-  /* Avoid use of printf(), which is more likely to use heap. */
-
-  if (entered)
-
-    /* The standard I/O functions called a heap function and caused
-    ** an indirect recursive call to this function.  So we'll have
-    ** to just exit without printing a message.  */
-    while (1);
-
-  entered = 1;
-
-  fputs("\n_abort - Heap corruption\n" "File: ", stderr);
-  fputs(file, stderr);
-  fputs("  Line: ", stderr);
-  fputs(line, stderr);
-  fputs("\n\n", stderr);
-  fputs("hmm_dflt_abort: while(1)!!!\n", stderr);
-  fflush(stderr);
-
-  while (1);
-}
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
deleted file mode 100644
index 0e86373..0000000
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-void U(grow_chunk)(U(descriptor) *desc, void *end, U(size_bau) n_baus) {
-#undef HEAD_PTR
-#define HEAD_PTR ((head_record *) end)
-
-  end = BAUS_BACKWARD(end, DUMMY_END_BLOCK_BAUS);
-
-#ifdef HMM_AUDIT_FAIL
-
-  if (HEAD_PTR->block_size != 0)
-    /* Chunk does not have valid dummy end block. */
-    HMM_AUDIT_FAIL
-
-#endif
-
-    /* Create a new block that absorbs the old dummy end block. */
-    HEAD_PTR->block_size = n_baus;
-
-  /* Set up the new dummy end block. */
-  {
-    head_record *dummy = (head_record *) BAUS_FORWARD(end, n_baus);
-    dummy->previous_block_size = n_baus;
-    dummy->block_size = 0;
-  }
-
-  /* Simply free the new block, allowing it to coalesce with any
-  ** free block at that was the last block in the chunk prior to
-  ** growth.
-  */
-  U(free)(desc, HEAD_TO_PTR_REC(end));
-
-#undef HEAD_PTR
-}
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
deleted file mode 100644
index 192758d..0000000
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-U(size_aau) U(largest_available)(U(descriptor) *desc) {
-  U(size_bau) largest;
-
-  if (!(desc->avl_tree_root))
-    largest = 0;
-  else {
-#ifdef HMM_AUDIT_FAIL
-    /* Audit root block in AVL tree. */
-    AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-#endif
-
-    largest =
-      BLOCK_BAUS(
-        PTR_REC_TO_HEAD(
-          U(avl_search)(
-            (U(avl_avl) *) & (desc->avl_tree_root),
-            (U(size_bau)) ~(U(size_bau)) 0, AVL_LESS)));
-  }
-
-  if (desc->last_freed) {
-    /* Size of last freed block. */
-    register U(size_bau) lf_size;
-
-#ifdef HMM_AUDIT_FAIL
-    AUDIT_BLOCK(desc->last_freed)
-#endif
-
-    lf_size = BLOCK_BAUS(desc->last_freed);
-
-    if (lf_size > largest)
-      largest = lf_size;
-  }
-
-  /* Convert largest size to AAUs and subract head size leaving payload
-  ** size.
-  */
-  return(largest ?
-         ((largest * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) - HEAD_AAUS) :
-         0);
-}
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
deleted file mode 100644
index baa5a8f..0000000
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-int U(resize)(U(descriptor) *desc, void *mem, U(size_aau) n) {
-  U(size_aau) i;
-  head_record *next_head_ptr;
-  head_record *head_ptr = PTR_REC_TO_HEAD(mem);
-
-  /* Flag. */
-  int next_block_free;
-
-  /* Convert n from desired block size in AAUs to BAUs. */
-  n += HEAD_AAUS;
-  n = DIV_ROUND_UP(n, HMM_BLOCK_ALIGN_UNIT);
-
-  if (n < MIN_BLOCK_BAUS)
-    n = MIN_BLOCK_BAUS;
-
-#ifdef HMM_AUDIT_FAIL
-
-  AUDIT_BLOCK(head_ptr)
-
-  if (!IS_BLOCK_ALLOCATED(head_ptr))
-    HMM_AUDIT_FAIL
-
-    if (desc->avl_tree_root)
-      AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-
-#endif
-
-      i = head_ptr->block_size;
-
-  next_head_ptr =
-    (head_record *) BAUS_FORWARD(head_ptr, head_ptr->block_size);
-
-  next_block_free =
-    (next_head_ptr == desc->last_freed) ||
-    !IS_BLOCK_ALLOCATED(next_head_ptr);
-
-  if (next_block_free)
-    /* Block can expand into next free block. */
-    i += BLOCK_BAUS(next_head_ptr);
-
-  if (n > i)
-    /* Not enough room for block to expand. */
-    return(-1);
-
-  if (next_block_free) {
-#ifdef HMM_AUDIT_FAIL
-    AUDIT_BLOCK(next_head_ptr)
-#endif
-
-    if (next_head_ptr == desc->last_freed)
-      desc->last_freed = 0;
-    else
-      U(out_of_free_collection)(desc, next_head_ptr);
-
-    next_head_ptr =
-      (head_record *) BAUS_FORWARD(head_ptr, (U(size_bau)) i);
-  }
-
-  /* Set i to number of "extra" BAUs. */
-  i -= n;
-
-  if (i < MIN_BLOCK_BAUS)
-    /* Not enough extra BAUs to be a block on their own, so just keep them
-    ** in the block being resized.
-    */
-  {
-    n += i;
-    i = n;
-  } else {
-    /* There are enough "leftover" BAUs in the next block to
-    ** form a remainder block. */
-
-    head_record *rem_head_ptr;
-
-    rem_head_ptr = (head_record *) BAUS_FORWARD(head_ptr, n);
-
-    rem_head_ptr->previous_block_size = (U(size_bau)) n;
-    rem_head_ptr->block_size = (U(size_bau)) i;
-
-    if (desc->last_freed) {
-#ifdef HMM_AUDIT_FAIL
-      AUDIT_BLOCK(desc->last_freed)
-#endif
-
-      U(into_free_collection)(desc, (head_record *)(desc->last_freed));
-
-      desc->last_freed = 0;
-    }
-
-    desc->last_freed = rem_head_ptr;
-  }
-
-  head_ptr->block_size = (U(size_bau)) n;
-  next_head_ptr->previous_block_size = (U(size_bau)) i;
-
-  return(0);
-}
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
deleted file mode 100644
index f80aeea..0000000
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-void U(shrink_chunk)(U(descriptor) *desc, U(size_bau) n_baus_to_shrink) {
-  head_record *dummy_end_block = (head_record *)
-                                 BAUS_BACKWARD(desc->end_of_shrinkable_chunk, DUMMY_END_BLOCK_BAUS);
-
-#ifdef HMM_AUDIT_FAIL
-
-  if (dummy_end_block->block_size != 0)
-    /* Chunk does not have valid dummy end block. */
-    HMM_AUDIT_FAIL
-
-#endif
-
-    if (n_baus_to_shrink) {
-      head_record *last_block = (head_record *)
-                                BAUS_BACKWARD(
-                                  dummy_end_block, dummy_end_block->previous_block_size);
-
-#ifdef HMM_AUDIT_FAIL
-      AUDIT_BLOCK(last_block)
-#endif
-
-      if (last_block == desc->last_freed) {
-        U(size_bau) bs = BLOCK_BAUS(last_block);
-
-        /* Chunk will not be shrunk out of existence if
-        ** 1.  There is at least one allocated block in the chunk
-        **     and the amount to shrink is exactly the size of the
-        **     last block, OR
-        ** 2.  After the last block is shrunk, there will be enough
-        **     BAUs left in it to form a minimal size block. */
-        int chunk_will_survive =
-          (PREV_BLOCK_BAUS(last_block) && (n_baus_to_shrink == bs)) ||
-          (n_baus_to_shrink <= (U(size_bau))(bs - MIN_BLOCK_BAUS));
-
-        if (chunk_will_survive ||
-            (!PREV_BLOCK_BAUS(last_block) &&
-             (n_baus_to_shrink ==
-              (U(size_bau))(bs + DUMMY_END_BLOCK_BAUS)))) {
-          desc->last_freed = 0;
-
-          if (chunk_will_survive) {
-            bs -= n_baus_to_shrink;
-
-            if (bs) {
-              /* The last (non-dummy) block was not completely
-              ** eliminated by the shrink. */
-
-              last_block->block_size = bs;
-
-              /* Create new dummy end record.
-              */
-              dummy_end_block =
-                (head_record *) BAUS_FORWARD(last_block, bs);
-              dummy_end_block->previous_block_size = bs;
-              dummy_end_block->block_size = 0;
-
-#ifdef HMM_AUDIT_FAIL
-
-              if (desc->avl_tree_root)
-                AUDIT_BLOCK(PTR_REC_TO_HEAD(desc->avl_tree_root))
-#endif
-
-                U(into_free_collection)(desc, last_block);
-            } else {
-              /* The last (non-dummy) block was completely
-              ** eliminated by the shrink.  Make its head
-              ** the new dummy end block.
-              */
-              last_block->block_size = 0;
-              last_block->previous_block_size &= ~HIGH_BIT_BAU_SIZE;
-            }
-          }
-        }
-
-#ifdef HMM_AUDIT_FAIL
-        else
-          HMM_AUDIT_FAIL
-#endif
-        }
-
-#ifdef HMM_AUDIT_FAIL
-      else
-        HMM_AUDIT_FAIL
-#endif
-      }
-}
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
deleted file mode 100644
index 4428c3e..0000000
--- a/vpx_mem/memory_manager/hmm_true.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#include "hmm_intrnl.h"
-
-U(size_aau) U(true_size)(void *payload_ptr) {
-  register  head_record *head_ptr = PTR_REC_TO_HEAD(payload_ptr);
-
-#ifdef HMM_AUDIT_FAIL
-  AUDIT_BLOCK(head_ptr)
-#endif
-
-  /* Convert block size from BAUs to AAUs.  Subtract head size, leaving
-  ** payload size.
-  */
-  return(
-          (BLOCK_BAUS(head_ptr) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT)) -
-          HEAD_AAUS);
-}
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
deleted file mode 100644
index a5ced8b..0000000
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IF_H_
-#define VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IF_H_
-
-/* Abstract AVL Tree Generic C Package.
-** Interface generation header file.
-**
-** This code is in the public domain.  See cavl_tree.html for interface
-** documentation.
-**
-** Version: 1.5  Author: Walt Karas
-*/
-
-/* This header contains the definition of CHAR_BIT (number of bits in a
-** char). */
-#include <limits.h>
-
-#undef L_
-#undef L_EST_LONG_BIT
-#undef L_SIZE
-#undef L_SC
-#undef L_LONG_BIT
-#undef L_BIT_ARR_DEFN
-
-#ifndef AVL_SEARCH_TYPE_DEFINED_
-#define AVL_SEARCH_TYPE_DEFINED_
-
-typedef enum {
-  AVL_EQUAL = 1,
-  AVL_LESS = 2,
-  AVL_GREATER = 4,
-  AVL_LESS_EQUAL = AVL_EQUAL | AVL_LESS,
-  AVL_GREATER_EQUAL = AVL_EQUAL | AVL_GREATER
-}
-avl_search_type;
-
-#endif
-
-#ifdef AVL_UNIQUE
-
-#define L_ AVL_UNIQUE
-
-#else
-
-#define L_(X) X
-
-#endif
-
-/* Determine storage class for function prototypes. */
-#ifdef AVL_PRIVATE
-
-#define L_SC static
-
-#else
-
-#define L_SC extern
-
-#endif
-
-#ifdef AVL_SIZE
-
-#define L_SIZE AVL_SIZE
-
-#else
-
-#define L_SIZE unsigned long
-
-#endif
-
-typedef struct {
-#ifdef AVL_INSIDE_STRUCT
-
-  AVL_INSIDE_STRUCT
-
-#endif
-
-  AVL_HANDLE root;
-}
-L_(avl);
-
-/* Function prototypes. */
-
-L_SC void L_(init)(L_(avl) *tree);
-
-L_SC int L_(is_empty)(L_(avl) *tree);
-
-L_SC AVL_HANDLE L_(insert)(L_(avl) *tree, AVL_HANDLE h);
-
-L_SC AVL_HANDLE L_(search)(L_(avl) *tree, AVL_KEY k, avl_search_type st);
-
-L_SC AVL_HANDLE L_(search_least)(L_(avl) *tree);
-
-L_SC AVL_HANDLE L_(search_greatest)(L_(avl) *tree);
-
-L_SC AVL_HANDLE L_(remove)(L_(avl) *tree, AVL_KEY k);
-
-L_SC AVL_HANDLE L_(subst)(L_(avl) *tree, AVL_HANDLE new_node);
-
-#ifdef AVL_BUILD_ITER_TYPE
-
-L_SC int L_(build)(
-  L_(avl) *tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes);
-
-#endif
-
-/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
-** L_EST_LONG_BIT to be the greatest multiple of 8 in the range
-** 32 - 64 (inclusive) that is less than or equal to the number of
-** bits in a long.
-*/
-
-#if (((LONG_MAX >> 31) >> 7) == 0)
-
-#define L_EST_LONG_BIT 32
-
-#elif (((LONG_MAX >> 31) >> 15) == 0)
-
-#define L_EST_LONG_BIT 40
-
-#elif (((LONG_MAX >> 31) >> 23) == 0)
-
-#define L_EST_LONG_BIT 48
-
-#elif (((LONG_MAX >> 31) >> 31) == 0)
-
-#define L_EST_LONG_BIT 56
-
-#else
-
-#define L_EST_LONG_BIT 64
-
-#endif
-
-/* Number of bits in a long. */
-#define L_LONG_BIT (sizeof(long) * CHAR_BIT)
-
-/* The macro L_BIT_ARR_DEFN defines a bit array whose index is a (0-based)
-** node depth.  The definition depends on whether the maximum depth is more
-** or less than the number of bits in a single long.
-*/
-
-#if ((AVL_MAX_DEPTH) > L_EST_LONG_BIT)
-
-/* Maximum depth may be more than number of bits in a long. */
-
-#define L_BIT_ARR_DEFN(NAME) \
-  unsigned long NAME[((AVL_MAX_DEPTH) + L_LONG_BIT - 1) / L_LONG_BIT];
-
-#else
-
-/* Maximum depth is definitely less than number of bits in a long. */
-
-#define L_BIT_ARR_DEFN(NAME) unsigned long NAME;
-
-#endif
-
-/* Iterator structure. */
-typedef struct {
-  /* Tree being iterated over. */
-  L_(avl) *tree_;
-
-  /* Records a path into the tree.  If bit n is true, indicates
-  ** take greater branch from the nth node in the path, otherwise
-  ** take the less branch.  bit 0 gives branch from root, and
-  ** so on. */
-  L_BIT_ARR_DEFN(branch)
-
-  /* Zero-based depth of path into tree. */
-  unsigned depth;
-
-  /* Handles of nodes in path from root to current node (returned by *). */
-  AVL_HANDLE path_h[(AVL_MAX_DEPTH) - 1];
-}
-L_(iter);
-
-/* Iterator function prototypes. */
-
-L_SC void L_(start_iter)(
-  L_(avl) *tree, L_(iter) *iter, AVL_KEY k, avl_search_type st);
-
-L_SC void L_(start_iter_least)(L_(avl) *tree, L_(iter) *iter);
-
-L_SC void L_(start_iter_greatest)(L_(avl) *tree, L_(iter) *iter);
-
-L_SC AVL_HANDLE L_(get_iter)(L_(iter) *iter);
-
-L_SC void L_(incr_iter)(L_(iter) *iter);
-
-L_SC void L_(decr_iter)(L_(iter) *iter);
-
-L_SC void L_(init_iter)(L_(iter) *iter);
-
-#define AVL_IMPL_INIT           1
-#define AVL_IMPL_IS_EMPTY       (1 << 1)
-#define AVL_IMPL_INSERT         (1 << 2)
-#define AVL_IMPL_SEARCH         (1 << 3)
-#define AVL_IMPL_SEARCH_LEAST       (1 << 4)
-#define AVL_IMPL_SEARCH_GREATEST    (1 << 5)
-#define AVL_IMPL_REMOVE         (1 << 6)
-#define AVL_IMPL_BUILD          (1 << 7)
-#define AVL_IMPL_START_ITER     (1 << 8)
-#define AVL_IMPL_START_ITER_LEAST   (1 << 9)
-#define AVL_IMPL_START_ITER_GREATEST    (1 << 10)
-#define AVL_IMPL_GET_ITER       (1 << 11)
-#define AVL_IMPL_INCR_ITER      (1 << 12)
-#define AVL_IMPL_DECR_ITER      (1 << 13)
-#define AVL_IMPL_INIT_ITER      (1 << 14)
-#define AVL_IMPL_SUBST          (1 << 15)
-
-#define AVL_IMPL_ALL            (~0)
-
-#undef L_
-#undef L_EST_LONG_BIT
-#undef L_SIZE
-#undef L_SC
-#undef L_LONG_BIT
-#undef L_BIT_ARR_DEFN
-
-#endif  // VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IF_H_
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
deleted file mode 100644
index 8b9ae27..0000000
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ /dev/null
@@ -1,1152 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IMPL_H_
-#define VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IMPL_H_
-
-/* Abstract AVL Tree Generic C Package.
-** Implementation generation header file.
-**
-** This code is in the public domain.  See cavl_tree.html for interface
-** documentation.
-**
-** Version: 1.5  Author: Walt Karas
-*/
-
-#undef L_
-#undef L_EST_LONG_BIT
-#undef L_SIZE
-#undef l_tree
-#undef L_MASK_HIGH_BIT
-#undef L_LONG_BIT
-#undef L_BIT_ARR_DEFN
-#undef L_BIT_ARR_VAL
-#undef L_BIT_ARR_0
-#undef L_BIT_ARR_1
-#undef L_BIT_ARR_ALL
-#undef L_BIT_ARR_LONGS
-#undef L_IMPL_MASK
-#undef L_CHECK_READ_ERROR
-#undef L_CHECK_READ_ERROR_INV_DEPTH
-#undef L_SC
-#undef L_BALANCE_PARAM_PREFIX
-
-#ifdef AVL_UNIQUE
-
-#define L_ AVL_UNIQUE
-
-#else
-
-#define L_(X) X
-
-#endif
-
-/* Determine correct storage class for functions */
-#ifdef AVL_PRIVATE
-
-#define L_SC static
-
-#else
-
-#define L_SC
-
-#endif
-
-#ifdef AVL_SIZE
-
-#define L_SIZE AVL_SIZE
-
-#else
-
-#define L_SIZE unsigned long
-
-#endif
-
-#define L_MASK_HIGH_BIT ((int) ~ ((~ (unsigned) 0) >> 1))
-
-/* ANSI C/ISO C++ require that a long have at least 32 bits.  Set
-** L_EST_LONG_BIT to be the greatest multiple of 8 in the range
-** 32 - 64 (inclusive) that is less than or equal to the number of
-** bits in a long.
-*/
-
-#if (((LONG_MAX >> 31) >> 7) == 0)
-
-#define L_EST_LONG_BIT 32
-
-#elif (((LONG_MAX >> 31) >> 15) == 0)
-
-#define L_EST_LONG_BIT 40
-
-#elif (((LONG_MAX >> 31) >> 23) == 0)
-
-#define L_EST_LONG_BIT 48
-
-#elif (((LONG_MAX >> 31) >> 31) == 0)
-
-#define L_EST_LONG_BIT 56
-
-#else
-
-#define L_EST_LONG_BIT 64
-
-#endif
-
-#define L_LONG_BIT (sizeof(long) * CHAR_BIT)
-
-#if ((AVL_MAX_DEPTH) > L_EST_LONG_BIT)
-
-/* The maximum depth may be greater than the number of bits in a long,
-** so multiple longs are needed to hold a bit array indexed by node
-** depth. */
-
-#define L_BIT_ARR_LONGS (((AVL_MAX_DEPTH) + L_LONG_BIT - 1) / L_LONG_BIT)
-
-#define L_BIT_ARR_DEFN(NAME) unsigned long NAME[L_BIT_ARR_LONGS];
-
-#define L_BIT_ARR_VAL(BIT_ARR, BIT_NUM) \
-  ((BIT_ARR)[(BIT_NUM) / L_LONG_BIT] & (1L << ((BIT_NUM) % L_LONG_BIT)))
-
-#define L_BIT_ARR_0(BIT_ARR, BIT_NUM) \
-  (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] &= ~(1L << ((BIT_NUM) % L_LONG_BIT));
-
-#define L_BIT_ARR_1(BIT_ARR, BIT_NUM) \
-  (BIT_ARR)[(BIT_NUM) / L_LONG_BIT] |= 1L << ((BIT_NUM) % L_LONG_BIT);
-
-#define L_BIT_ARR_ALL(BIT_ARR, BIT_VAL) \
-  { int i = L_BIT_ARR_LONGS; do (BIT_ARR)[--i] = 0L - (BIT_VAL); while(i); }
-
-#else /* The bit array can definitely fit in one long */
-
-#define L_BIT_ARR_DEFN(NAME) unsigned long NAME;
-
-#define L_BIT_ARR_VAL(BIT_ARR, BIT_NUM) ((BIT_ARR) & (1L << (BIT_NUM)))
-
-#define L_BIT_ARR_0(BIT_ARR, BIT_NUM) (BIT_ARR) &= ~(1L << (BIT_NUM));
-
-#define L_BIT_ARR_1(BIT_ARR, BIT_NUM) (BIT_ARR) |= 1L << (BIT_NUM);
-
-#define L_BIT_ARR_ALL(BIT_ARR, BIT_VAL) (BIT_ARR) = 0L - (BIT_VAL);
-
-#endif
-
-#ifdef AVL_READ_ERRORS_HAPPEN
-
-#define L_CHECK_READ_ERROR(ERROR_RETURN) \
-  { if (AVL_READ_ERROR) return(ERROR_RETURN); }
-
-#else
-
-#define L_CHECK_READ_ERROR(ERROR_RETURN)
-
-#endif
-
-/* The presumed reason that an instantiation places additional fields
-** inside the AVL tree structure is that the SET_ and GET_ macros
-** need these fields.  The "balance" function does not explicitly use
-** any fields in the AVL tree structure, so only pass an AVL tree
-** structure pointer to "balance" if it has instantiation-specific
-** fields that are (presumably) needed by the SET_/GET_ calls within
-** "balance".
-*/
-#ifdef AVL_INSIDE_STRUCT
-
-#define L_BALANCE_PARAM_CALL_PREFIX l_tree,
-#define L_BALANCE_PARAM_DECL_PREFIX L_(avl) *l_tree,
-
-#else
-
-#define L_BALANCE_PARAM_CALL_PREFIX
-#define L_BALANCE_PARAM_DECL_PREFIX
-
-#endif
-
-#ifdef AVL_IMPL_MASK
-
-#define L_IMPL_MASK (AVL_IMPL_MASK)
-
-#else
-
-/* Define all functions. */
-#define L_IMPL_MASK AVL_IMPL_ALL
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_INIT)
-
-L_SC void L_(init)(L_(avl) *l_tree) {
-  l_tree->root = AVL_NULL;
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_IS_EMPTY)
-
-L_SC int L_(is_empty)(L_(avl) *l_tree) {
-  return(l_tree->root == AVL_NULL);
-}
-
-#endif
-
-/* Put the private balance function in the same compilation module as
-** the insert function.  */
-#if (L_IMPL_MASK & AVL_IMPL_INSERT)
-
-/* Balances subtree, returns handle of root node of subtree after balancing.
-*/
-L_SC AVL_HANDLE L_(balance)(L_BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h) {
-  AVL_HANDLE deep_h;
-
-  /* Either the "greater than" or the "less than" subtree of
-  ** this node has to be 2 levels deeper (or else it wouldn't
-  ** need balancing).
-  */
-  if (AVL_GET_BALANCE_FACTOR(bal_h) > 0) {
-    /* "Greater than" subtree is deeper. */
-
-    deep_h = AVL_GET_GREATER(bal_h, 1);
-
-    L_CHECK_READ_ERROR(AVL_NULL)
-
-    if (AVL_GET_BALANCE_FACTOR(deep_h) < 0) {
-      int bf;
-
-      AVL_HANDLE old_h = bal_h;
-      bal_h = AVL_GET_LESS(deep_h, 1);
-      L_CHECK_READ_ERROR(AVL_NULL)
-      AVL_SET_GREATER(old_h, AVL_GET_LESS(bal_h, 1))
-      AVL_SET_LESS(deep_h, AVL_GET_GREATER(bal_h, 1))
-      AVL_SET_LESS(bal_h, old_h)
-      AVL_SET_GREATER(bal_h, deep_h)
-
-      bf = AVL_GET_BALANCE_FACTOR(bal_h);
-
-      if (bf != 0) {
-        if (bf > 0) {
-          AVL_SET_BALANCE_FACTOR(old_h, -1)
-          AVL_SET_BALANCE_FACTOR(deep_h, 0)
-        } else {
-          AVL_SET_BALANCE_FACTOR(deep_h, 1)
-          AVL_SET_BALANCE_FACTOR(old_h, 0)
-        }
-
-        AVL_SET_BALANCE_FACTOR(bal_h, 0)
-      } else {
-        AVL_SET_BALANCE_FACTOR(old_h, 0)
-        AVL_SET_BALANCE_FACTOR(deep_h, 0)
-      }
-    } else {
-      AVL_SET_GREATER(bal_h, AVL_GET_LESS(deep_h, 0))
-      AVL_SET_LESS(deep_h, bal_h)
-
-      if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) {
-        AVL_SET_BALANCE_FACTOR(deep_h, -1)
-        AVL_SET_BALANCE_FACTOR(bal_h, 1)
-      } else {
-        AVL_SET_BALANCE_FACTOR(deep_h, 0)
-        AVL_SET_BALANCE_FACTOR(bal_h, 0)
-      }
-
-      bal_h = deep_h;
-    }
-  } else {
-    /* "Less than" subtree is deeper. */
-
-    deep_h = AVL_GET_LESS(bal_h, 1);
-    L_CHECK_READ_ERROR(AVL_NULL)
-
-    if (AVL_GET_BALANCE_FACTOR(deep_h) > 0) {
-      int bf;
-      AVL_HANDLE old_h = bal_h;
-      bal_h = AVL_GET_GREATER(deep_h, 1);
-      L_CHECK_READ_ERROR(AVL_NULL)
-      AVL_SET_LESS(old_h, AVL_GET_GREATER(bal_h, 0))
-      AVL_SET_GREATER(deep_h, AVL_GET_LESS(bal_h, 0))
-      AVL_SET_GREATER(bal_h, old_h)
-      AVL_SET_LESS(bal_h, deep_h)
-
-      bf = AVL_GET_BALANCE_FACTOR(bal_h);
-
-      if (bf != 0) {
-        if (bf < 0) {
-          AVL_SET_BALANCE_FACTOR(old_h, 1)
-          AVL_SET_BALANCE_FACTOR(deep_h, 0)
-        } else {
-          AVL_SET_BALANCE_FACTOR(deep_h, -1)
-          AVL_SET_BALANCE_FACTOR(old_h, 0)
-        }
-
-        AVL_SET_BALANCE_FACTOR(bal_h, 0)
-      } else {
-        AVL_SET_BALANCE_FACTOR(old_h, 0)
-        AVL_SET_BALANCE_FACTOR(deep_h, 0)
-      }
-    } else {
-      AVL_SET_LESS(bal_h, AVL_GET_GREATER(deep_h, 0))
-      AVL_SET_GREATER(deep_h, bal_h)
-
-      if (AVL_GET_BALANCE_FACTOR(deep_h) == 0) {
-        AVL_SET_BALANCE_FACTOR(deep_h, 1)
-        AVL_SET_BALANCE_FACTOR(bal_h, -1)
-      } else {
-        AVL_SET_BALANCE_FACTOR(deep_h, 0)
-        AVL_SET_BALANCE_FACTOR(bal_h, 0)
-      }
-
-      bal_h = deep_h;
-    }
-  }
-
-  return(bal_h);
-}
-
-L_SC AVL_HANDLE L_(insert)(L_(avl) *l_tree, AVL_HANDLE h) {
-  AVL_SET_LESS(h, AVL_NULL)
-  AVL_SET_GREATER(h, AVL_NULL)
-  AVL_SET_BALANCE_FACTOR(h, 0)
-
-  if (l_tree->root == AVL_NULL)
-    l_tree->root = h;
-  else {
-    /* Last unbalanced node encountered in search for insertion point. */
-    AVL_HANDLE unbal = AVL_NULL;
-    /* Parent of last unbalanced node. */
-    AVL_HANDLE parent_unbal = AVL_NULL;
-    /* Balance factor of last unbalanced node. */
-    int unbal_bf;
-
-    /* Zero-based depth in tree. */
-    unsigned depth = 0, unbal_depth = 0;
-
-    /* Records a path into the tree.  If bit n is true, indicates
-    ** take greater branch from the nth node in the path, otherwise
-    ** take the less branch.  bit 0 gives branch from root, and
-    ** so on. */
-    L_BIT_ARR_DEFN(branch)
-
-    AVL_HANDLE hh = l_tree->root;
-    AVL_HANDLE parent = AVL_NULL;
-    int cmp;
-
-    do {
-      if (AVL_GET_BALANCE_FACTOR(hh) != 0) {
-        unbal = hh;
-        parent_unbal = parent;
-        unbal_depth = depth;
-      }
-
-      cmp = AVL_COMPARE_NODE_NODE(h, hh);
-
-      if (cmp == 0)
-        /* Duplicate key. */
-        return(hh);
-
-      parent = hh;
-
-      if (cmp > 0) {
-        hh = AVL_GET_GREATER(hh, 1);
-        L_BIT_ARR_1(branch, depth)
-      } else {
-        hh = AVL_GET_LESS(hh, 1);
-        L_BIT_ARR_0(branch, depth)
-      }
-
-      L_CHECK_READ_ERROR(AVL_NULL)
-      depth++;
-    } while (hh != AVL_NULL);
-
-    /*  Add node to insert as leaf of tree. */
-    if (cmp < 0)
-      AVL_SET_LESS(parent, h)
-      else
-        AVL_SET_GREATER(parent, h)
-
-        depth = unbal_depth;
-
-    if (unbal == AVL_NULL)
-      hh = l_tree->root;
-    else {
-      cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-      depth++;
-      unbal_bf = AVL_GET_BALANCE_FACTOR(unbal);
-
-      if (cmp < 0)
-        unbal_bf--;
-      else  /* cmp > 0 */
-        unbal_bf++;
-
-      hh = cmp < 0 ? AVL_GET_LESS(unbal, 1) : AVL_GET_GREATER(unbal, 1);
-      L_CHECK_READ_ERROR(AVL_NULL)
-
-      if ((unbal_bf != -2) && (unbal_bf != 2)) {
-        /* No rebalancing of tree is necessary. */
-        AVL_SET_BALANCE_FACTOR(unbal, unbal_bf)
-        unbal = AVL_NULL;
-      }
-    }
-
-    if (hh != AVL_NULL)
-      while (h != hh) {
-        cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-        depth++;
-
-        if (cmp < 0) {
-          AVL_SET_BALANCE_FACTOR(hh, -1)
-          hh = AVL_GET_LESS(hh, 1);
-        } else { /* cmp > 0 */
-          AVL_SET_BALANCE_FACTOR(hh, 1)
-          hh = AVL_GET_GREATER(hh, 1);
-        }
-
-        L_CHECK_READ_ERROR(AVL_NULL)
-      }
-
-    if (unbal != AVL_NULL) {
-      unbal = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX unbal);
-      L_CHECK_READ_ERROR(AVL_NULL)
-
-      if (parent_unbal == AVL_NULL)
-        l_tree->root = unbal;
-      else {
-        depth = unbal_depth - 1;
-        cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-
-        if (cmp < 0)
-          AVL_SET_LESS(parent_unbal, unbal)
-          else  /* cmp > 0 */
-            AVL_SET_GREATER(parent_unbal, unbal)
-          }
-    }
-
-  }
-
-  return(h);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_SEARCH)
-
-L_SC AVL_HANDLE L_(search)(L_(avl) *l_tree, AVL_KEY k, avl_search_type st) {
-  int cmp, target_cmp;
-  AVL_HANDLE match_h = AVL_NULL;
-  AVL_HANDLE h = l_tree->root;
-
-  if (st & AVL_LESS)
-    target_cmp = 1;
-  else if (st & AVL_GREATER)
-    target_cmp = -1;
-  else
-    target_cmp = 0;
-
-  while (h != AVL_NULL) {
-    cmp = AVL_COMPARE_KEY_NODE(k, h);
-
-    if (cmp == 0) {
-      if (st & AVL_EQUAL) {
-        match_h = h;
-        break;
-      }
-
-      cmp = -target_cmp;
-    } else if (target_cmp != 0)
-      if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
-        /* cmp and target_cmp are both positive or both negative. */
-        match_h = h;
-
-    h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
-    L_CHECK_READ_ERROR(AVL_NULL)
-  }
-
-  return(match_h);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_SEARCH_LEAST)
-
-L_SC AVL_HANDLE L_(search_least)(L_(avl) *l_tree) {
-  AVL_HANDLE h = l_tree->root;
-  AVL_HANDLE parent = AVL_NULL;
-
-  while (h != AVL_NULL) {
-    parent = h;
-    h = AVL_GET_LESS(h, 1);
-    L_CHECK_READ_ERROR(AVL_NULL)
-  }
-
-  return(parent);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_SEARCH_GREATEST)
-
-L_SC AVL_HANDLE L_(search_greatest)(L_(avl) *l_tree) {
-  AVL_HANDLE h = l_tree->root;
-  AVL_HANDLE parent = AVL_NULL;
-
-  while (h != AVL_NULL) {
-    parent = h;
-    h = AVL_GET_GREATER(h, 1);
-    L_CHECK_READ_ERROR(AVL_NULL)
-  }
-
-  return(parent);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_REMOVE)
-
-/* Prototype of balance function (called by remove) in case not in
-** same compilation unit.
-*/
-L_SC AVL_HANDLE L_(balance)(L_BALANCE_PARAM_DECL_PREFIX AVL_HANDLE bal_h);
-
-L_SC AVL_HANDLE L_(remove)(L_(avl) *l_tree, AVL_KEY k) {
-  /* Zero-based depth in tree. */
-  unsigned depth = 0, rm_depth;
-
-  /* Records a path into the tree.  If bit n is true, indicates
-  ** take greater branch from the nth node in the path, otherwise
-  ** take the less branch.  bit 0 gives branch from root, and
-  ** so on. */
-  L_BIT_ARR_DEFN(branch)
-
-  AVL_HANDLE h = l_tree->root;
-  AVL_HANDLE parent = AVL_NULL;
-  AVL_HANDLE child;
-  AVL_HANDLE path;
-  int cmp, cmp_shortened_sub_with_path;
-  int reduced_depth;
-  int bf;
-  AVL_HANDLE rm;
-  AVL_HANDLE parent_rm;
-
-  for (;;) {
-    if (h == AVL_NULL)
-      /* No node in tree with given key. */
-      return(AVL_NULL);
-
-    cmp = AVL_COMPARE_KEY_NODE(k, h);
-
-    if (cmp == 0)
-      /* Found node to remove. */
-      break;
-
-    parent = h;
-
-    if (cmp > 0) {
-      h = AVL_GET_GREATER(h, 1);
-      L_BIT_ARR_1(branch, depth)
-    } else {
-      h = AVL_GET_LESS(h, 1);
-      L_BIT_ARR_0(branch, depth)
-    }
-
-    L_CHECK_READ_ERROR(AVL_NULL)
-    depth++;
-    cmp_shortened_sub_with_path = cmp;
-  }
-
-  rm = h;
-  parent_rm = parent;
-  rm_depth = depth;
-
-  /* If the node to remove is not a leaf node, we need to get a
-  ** leaf node, or a node with a single leaf as its child, to put
-  ** in the place of the node to remove.  We will get the greatest
-  ** node in the less subtree (of the node to remove), or the least
-  ** node in the greater subtree.  We take the leaf node from the
-  ** deeper subtree, if there is one. */
-
-  if (AVL_GET_BALANCE_FACTOR(h) < 0) {
-    child = AVL_GET_LESS(h, 1);
-    L_BIT_ARR_0(branch, depth)
-    cmp = -1;
-  } else {
-    child = AVL_GET_GREATER(h, 1);
-    L_BIT_ARR_1(branch, depth)
-    cmp = 1;
-  }
-
-  L_CHECK_READ_ERROR(AVL_NULL)
-  depth++;
-
-  if (child != AVL_NULL) {
-    cmp = -cmp;
-
-    do {
-      parent = h;
-      h = child;
-
-      if (cmp < 0) {
-        child = AVL_GET_LESS(h, 1);
-        L_BIT_ARR_0(branch, depth)
-      } else {
-        child = AVL_GET_GREATER(h, 1);
-        L_BIT_ARR_1(branch, depth)
-      }
-
-      L_CHECK_READ_ERROR(AVL_NULL)
-      depth++;
-    } while (child != AVL_NULL);
-
-    if (parent == rm)
-      /* Only went through do loop once.  Deleted node will be replaced
-      ** in the tree structure by one of its immediate children. */
-      cmp_shortened_sub_with_path = -cmp;
-    else
-      cmp_shortened_sub_with_path = cmp;
-
-    /* Get the handle of the opposite child, which may not be null. */
-    child = cmp > 0 ? AVL_GET_LESS(h, 0) : AVL_GET_GREATER(h, 0);
-  }
-
-  if (parent == AVL_NULL)
-    /* There were only 1 or 2 nodes in this tree. */
-    l_tree->root = child;
-  else if (cmp_shortened_sub_with_path < 0)
-    AVL_SET_LESS(parent, child)
-    else
-      AVL_SET_GREATER(parent, child)
-
-      /* "path" is the parent of the subtree being eliminated or reduced
-      ** from a depth of 2 to 1.  If "path" is the node to be removed, we
-      ** set path to the node we're about to poke into the position of the
-      ** node to be removed. */
-      path = parent == rm ? h : parent;
-
-  if (h != rm) {
-    /* Poke in the replacement for the node to be removed. */
-    AVL_SET_LESS(h, AVL_GET_LESS(rm, 0))
-    AVL_SET_GREATER(h, AVL_GET_GREATER(rm, 0))
-    AVL_SET_BALANCE_FACTOR(h, AVL_GET_BALANCE_FACTOR(rm))
-
-    if (parent_rm == AVL_NULL)
-      l_tree->root = h;
-    else {
-      depth = rm_depth - 1;
-
-      if (L_BIT_ARR_VAL(branch, depth))
-        AVL_SET_GREATER(parent_rm, h)
-        else
-          AVL_SET_LESS(parent_rm, h)
-        }
-  }
-
-  if (path != AVL_NULL) {
-    /* Create a temporary linked list from the parent of the path node
-    ** to the root node. */
-    h = l_tree->root;
-    parent = AVL_NULL;
-    depth = 0;
-
-    while (h != path) {
-      if (L_BIT_ARR_VAL(branch, depth)) {
-        child = AVL_GET_GREATER(h, 1);
-        AVL_SET_GREATER(h, parent)
-      } else {
-        child = AVL_GET_LESS(h, 1);
-        AVL_SET_LESS(h, parent)
-      }
-
-      L_CHECK_READ_ERROR(AVL_NULL)
-      depth++;
-      parent = h;
-      h = child;
-    }
-
-    /* Climb from the path node to the root node using the linked
-    ** list, restoring the tree structure and rebalancing as necessary.
-    */
-    reduced_depth = 1;
-    cmp = cmp_shortened_sub_with_path;
-
-    for (;;) {
-      if (reduced_depth) {
-        bf = AVL_GET_BALANCE_FACTOR(h);
-
-        if (cmp < 0)
-          bf++;
-        else  /* cmp > 0 */
-          bf--;
-
-        if ((bf == -2) || (bf == 2)) {
-          h = L_(balance)(L_BALANCE_PARAM_CALL_PREFIX h);
-          L_CHECK_READ_ERROR(AVL_NULL)
-          bf = AVL_GET_BALANCE_FACTOR(h);
-        } else
-          AVL_SET_BALANCE_FACTOR(h, bf)
-          reduced_depth = (bf == 0);
-      }
-
-      if (parent == AVL_NULL)
-        break;
-
-      child = h;
-      h = parent;
-      depth--;
-      cmp = L_BIT_ARR_VAL(branch, depth) ? 1 : -1;
-
-      if (cmp < 0) {
-        parent = AVL_GET_LESS(h, 1);
-        AVL_SET_LESS(h, child)
-      } else {
-        parent = AVL_GET_GREATER(h, 1);
-        AVL_SET_GREATER(h, child)
-      }
-
-      L_CHECK_READ_ERROR(AVL_NULL)
-    }
-
-    l_tree->root = h;
-  }
-
-  return(rm);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_SUBST)
-
-L_SC AVL_HANDLE L_(subst)(L_(avl) *l_tree, AVL_HANDLE new_node) {
-  AVL_HANDLE h = l_tree->root;
-  AVL_HANDLE parent = AVL_NULL;
-  int cmp, last_cmp;
-
-  /* Search for node already in tree with same key. */
-  for (;;) {
-    if (h == AVL_NULL)
-      /* No node in tree with same key as new node. */
-      return(AVL_NULL);
-
-    cmp = AVL_COMPARE_NODE_NODE(new_node, h);
-
-    if (cmp == 0)
-      /* Found the node to substitute new one for. */
-      break;
-
-    last_cmp = cmp;
-    parent = h;
-    h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
-    L_CHECK_READ_ERROR(AVL_NULL)
-  }
-
-  /* Copy tree housekeeping fields from node in tree to new node. */
-  AVL_SET_LESS(new_node, AVL_GET_LESS(h, 0))
-  AVL_SET_GREATER(new_node, AVL_GET_GREATER(h, 0))
-  AVL_SET_BALANCE_FACTOR(new_node, AVL_GET_BALANCE_FACTOR(h))
-
-  if (parent == AVL_NULL)
-    /* New node is also new root. */
-    l_tree->root = new_node;
-  else {
-    /* Make parent point to new node. */
-    if (last_cmp < 0)
-      AVL_SET_LESS(parent, new_node)
-      else
-        AVL_SET_GREATER(parent, new_node)
-      }
-
-  return(h);
-}
-
-#endif
-
-#ifdef AVL_BUILD_ITER_TYPE
-
-#if (L_IMPL_MASK & AVL_IMPL_BUILD)
-
-L_SC int L_(build)(
-  L_(avl) *l_tree, AVL_BUILD_ITER_TYPE p, L_SIZE num_nodes) {
-  /* Gives path to subtree being built.  If bit n is false, branch
-  ** less from the node at depth n, if true branch greater. */
-  L_BIT_ARR_DEFN(branch)
-
-  /* If bit n is true, then for the current subtree at depth n, its
-  ** greater subtree has one more node than its less subtree. */
-  L_BIT_ARR_DEFN(rem)
-
-  /* Depth of root node of current subtree. */
-  unsigned depth = 0;
-
-  /* Number of nodes in current subtree. */
-  L_SIZE num_sub = num_nodes;
-
-  /* The algorithm relies on a stack of nodes whose less subtree has
-  ** been built, but whose greater subtree has not yet been built.
-  ** The stack is implemented as linked list.  The nodes are linked
-  ** together by having the "greater" handle of a node set to the
-  ** next node in the list.  "less_parent" is the handle of the first
-  ** node in the list. */
-  AVL_HANDLE less_parent = AVL_NULL;
-
-  /* h is root of current subtree, child is one of its children. */
-  AVL_HANDLE h;
-  AVL_HANDLE child;
-
-  if (num_nodes == 0) {
-    l_tree->root = AVL_NULL;
-    return(1);
-  }
-
-  for (;;) {
-    while (num_sub > 2) {
-      /* Subtract one for root of subtree. */
-      num_sub--;
-
-      if (num_sub & 1)
-        L_BIT_ARR_1(rem, depth)
-        else
-          L_BIT_ARR_0(rem, depth)
-          L_BIT_ARR_0(branch, depth)
-          depth++;
-
-      num_sub >>= 1;
-    }
-
-    if (num_sub == 2) {
-      /* Build a subtree with two nodes, slanting to greater.
-      ** I arbitrarily chose to always have the extra node in the
-      ** greater subtree when there is an odd number of nodes to
-      ** split between the two subtrees. */
-
-      h = AVL_BUILD_ITER_VAL(p);
-      L_CHECK_READ_ERROR(0)
-      AVL_BUILD_ITER_INCR(p)
-      child = AVL_BUILD_ITER_VAL(p);
-      L_CHECK_READ_ERROR(0)
-      AVL_BUILD_ITER_INCR(p)
-      AVL_SET_LESS(child, AVL_NULL)
-      AVL_SET_GREATER(child, AVL_NULL)
-      AVL_SET_BALANCE_FACTOR(child, 0)
-      AVL_SET_GREATER(h, child)
-      AVL_SET_LESS(h, AVL_NULL)
-      AVL_SET_BALANCE_FACTOR(h, 1)
-    } else { /* num_sub == 1 */
-      /* Build a subtree with one node. */
-
-      h = AVL_BUILD_ITER_VAL(p);
-      L_CHECK_READ_ERROR(0)
-      AVL_BUILD_ITER_INCR(p)
-      AVL_SET_LESS(h, AVL_NULL)
-      AVL_SET_GREATER(h, AVL_NULL)
-      AVL_SET_BALANCE_FACTOR(h, 0)
-    }
-
-    while (depth) {
-      depth--;
-
-      if (!L_BIT_ARR_VAL(branch, depth))
-        /* We've completed a less subtree. */
-        break;
-
-      /* We've completed a greater subtree, so attach it to
-      ** its parent (that is less than it).  We pop the parent
-      ** off the stack of less parents. */
-      child = h;
-      h = less_parent;
-      less_parent = AVL_GET_GREATER(h, 1);
-      L_CHECK_READ_ERROR(0)
-      AVL_SET_GREATER(h, child)
-      /* num_sub = 2 * (num_sub - rem[depth]) + rem[depth] + 1 */
-      num_sub <<= 1;
-      num_sub += L_BIT_ARR_VAL(rem, depth) ? 0 : 1;
-
-      if (num_sub & (num_sub - 1))
-        /* num_sub is not a power of 2. */
-        AVL_SET_BALANCE_FACTOR(h, 0)
-        else
-          /* num_sub is a power of 2. */
-          AVL_SET_BALANCE_FACTOR(h, 1)
-        }
-
-    if (num_sub == num_nodes)
-      /* We've completed the full tree. */
-      break;
-
-    /* The subtree we've completed is the less subtree of the
-    ** next node in the sequence. */
-
-    child = h;
-    h = AVL_BUILD_ITER_VAL(p);
-    L_CHECK_READ_ERROR(0)
-    AVL_BUILD_ITER_INCR(p)
-    AVL_SET_LESS(h, child)
-
-    /* Put h into stack of less parents. */
-    AVL_SET_GREATER(h, less_parent)
-    less_parent = h;
-
-    /* Proceed to creating greater than subtree of h. */
-    L_BIT_ARR_1(branch, depth)
-    num_sub += L_BIT_ARR_VAL(rem, depth) ? 1 : 0;
-    depth++;
-
-  } /* end for (;; ) */
-
-  l_tree->root = h;
-
-  return(1);
-}
-
-#endif
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_INIT_ITER)
-
-/* Initialize depth to invalid value, to indicate iterator is
-** invalid.   (Depth is zero-base.)  It's not necessary to initialize
-** iterators prior to passing them to the "start" function.
-*/
-L_SC void L_(init_iter)(L_(iter) *iter) {
-  iter->depth = ~0;
-}
-
-#endif
-
-#ifdef AVL_READ_ERRORS_HAPPEN
-
-#define L_CHECK_READ_ERROR_INV_DEPTH \
-  { if (AVL_READ_ERROR) { iter->depth = ~0; return; } }
-
-#else
-
-#define L_CHECK_READ_ERROR_INV_DEPTH
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_START_ITER)
-
-L_SC void L_(start_iter)(
-  L_(avl) *l_tree, L_(iter) *iter, AVL_KEY k, avl_search_type st) {
-  AVL_HANDLE h = l_tree->root;
-  unsigned d = 0;
-  int cmp, target_cmp;
-
-  /* Save the tree that we're going to iterate through in a
-  ** member variable. */
-  iter->tree_ = l_tree;
-
-  iter->depth = ~0;
-
-  if (h == AVL_NULL)
-    /* Tree is empty. */
-    return;
-
-  if (st & AVL_LESS)
-    /* Key can be greater than key of starting node. */
-    target_cmp = 1;
-  else if (st & AVL_GREATER)
-    /* Key can be less than key of starting node. */
-    target_cmp = -1;
-  else
-    /* Key must be same as key of starting node. */
-    target_cmp = 0;
-
-  for (;;) {
-    cmp = AVL_COMPARE_KEY_NODE(k, h);
-
-    if (cmp == 0) {
-      if (st & AVL_EQUAL) {
-        /* Equal node was sought and found as starting node. */
-        iter->depth = d;
-        break;
-      }
-
-      cmp = -target_cmp;
-    } else if (target_cmp != 0)
-      if (!((cmp ^ target_cmp) & L_MASK_HIGH_BIT))
-        /* cmp and target_cmp are both negative or both positive. */
-        iter->depth = d;
-
-    h = cmp < 0 ? AVL_GET_LESS(h, 1) : AVL_GET_GREATER(h, 1);
-    L_CHECK_READ_ERROR_INV_DEPTH
-
-    if (h == AVL_NULL)
-      break;
-
-    if (cmp > 0)
-      L_BIT_ARR_1(iter->branch, d)
-      else
-        L_BIT_ARR_0(iter->branch, d)
-        iter->path_h[d++] = h;
-  }
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_START_ITER_LEAST)
-
-L_SC void L_(start_iter_least)(L_(avl) *l_tree, L_(iter) *iter) {
-  AVL_HANDLE h = l_tree->root;
-
-  iter->tree_ = l_tree;
-
-  iter->depth = ~0;
-
-  L_BIT_ARR_ALL(iter->branch, 0)
-
-  while (h != AVL_NULL) {
-    if (iter->depth != ~0)
-      iter->path_h[iter->depth] = h;
-
-    iter->depth++;
-    h = AVL_GET_LESS(h, 1);
-    L_CHECK_READ_ERROR_INV_DEPTH
-  }
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_START_ITER_GREATEST)
-
-L_SC void L_(start_iter_greatest)(L_(avl) *l_tree, L_(iter) *iter) {
-  AVL_HANDLE h = l_tree->root;
-
-  iter->tree_ = l_tree;
-
-  iter->depth = ~0;
-
-  L_BIT_ARR_ALL(iter->branch, 1)
-
-  while (h != AVL_NULL) {
-    if (iter->depth != ~0)
-      iter->path_h[iter->depth] = h;
-
-    iter->depth++;
-    h = AVL_GET_GREATER(h, 1);
-    L_CHECK_READ_ERROR_INV_DEPTH
-  }
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_GET_ITER)
-
-L_SC AVL_HANDLE L_(get_iter)(L_(iter) *iter) {
-  if (iter->depth == ~0)
-    return(AVL_NULL);
-
-  return(iter->depth == 0 ?
-         iter->tree_->root : iter->path_h[iter->depth - 1]);
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_INCR_ITER)
-
-L_SC void L_(incr_iter)(L_(iter) *iter) {
-#define l_tree (iter->tree_)
-
-  if (iter->depth != ~0) {
-    AVL_HANDLE h =
-      AVL_GET_GREATER((iter->depth == 0 ?
-                       iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
-    L_CHECK_READ_ERROR_INV_DEPTH
-
-    if (h == AVL_NULL)
-      do {
-        if (iter->depth == 0) {
-          iter->depth = ~0;
-          break;
-        }
-
-        iter->depth--;
-      } while (L_BIT_ARR_VAL(iter->branch, iter->depth));
-    else {
-      L_BIT_ARR_1(iter->branch, iter->depth)
-      iter->path_h[iter->depth++] = h;
-
-      for (;;) {
-        h = AVL_GET_LESS(h, 1);
-        L_CHECK_READ_ERROR_INV_DEPTH
-
-        if (h == AVL_NULL)
-          break;
-
-        L_BIT_ARR_0(iter->branch, iter->depth)
-        iter->path_h[iter->depth++] = h;
-      }
-    }
-  }
-
-#undef l_tree
-}
-
-#endif
-
-#if (L_IMPL_MASK & AVL_IMPL_DECR_ITER)
-
-L_SC void L_(decr_iter)(L_(iter) *iter) {
-#define l_tree (iter->tree_)
-
-  if (iter->depth != ~0) {
-    AVL_HANDLE h =
-      AVL_GET_LESS((iter->depth == 0 ?
-                    iter->tree_->root : iter->path_h[iter->depth - 1]), 1);
-    L_CHECK_READ_ERROR_INV_DEPTH
-
-    if (h == AVL_NULL)
-      do {
-        if (iter->depth == 0) {
-          iter->depth = ~0;
-          break;
-        }
-
-        iter->depth--;
-      } while (!L_BIT_ARR_VAL(iter->branch, iter->depth));
-    else {
-      L_BIT_ARR_0(iter->branch, iter->depth)
-      iter->path_h[iter->depth++] = h;
-
-      for (;;) {
-        h = AVL_GET_GREATER(h, 1);
-        L_CHECK_READ_ERROR_INV_DEPTH
-
-        if (h == AVL_NULL)
-          break;
-
-        L_BIT_ARR_1(iter->branch, iter->depth)
-        iter->path_h[iter->depth++] = h;
-      }
-    }
-  }
-
-#undef l_tree
-}
-
-#endif
-
-/* Tidy up the preprocessor symbol name space. */
-#undef L_
-#undef L_EST_LONG_BIT
-#undef L_SIZE
-#undef L_MASK_HIGH_BIT
-#undef L_LONG_BIT
-#undef L_BIT_ARR_DEFN
-#undef L_BIT_ARR_VAL
-#undef L_BIT_ARR_0
-#undef L_BIT_ARR_1
-#undef L_BIT_ARR_ALL
-#undef L_CHECK_READ_ERROR
-#undef L_CHECK_READ_ERROR_INV_DEPTH
-#undef L_BIT_ARR_LONGS
-#undef L_IMPL_MASK
-#undef L_CHECK_READ_ERROR
-#undef L_CHECK_READ_ERROR_INV_DEPTH
-#undef L_SC
-#undef L_BALANCE_PARAM_CALL_PREFIX
-#undef L_BALANCE_PARAM_DECL_PREFIX
-
-#endif  // VPX_MEM_MEMORY_MANAGER_INCLUDE_CAVL_IMPL_H_
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
deleted file mode 100644
index d584b19..0000000
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_MEM_MEMORY_MANAGER_INCLUDE_HEAPMM_H_
-#define VPX_MEM_MEMORY_MANAGER_INCLUDE_HEAPMM_H_
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-/* External header file for Heap Memory Manager.  See documentation in
-** heapmm.html.
-*/
-
-#undef HMM_PROCESS
-
-/* Include once per configuration in a particular translation unit. */
-
-#ifndef HMM_CNFG_NUM
-
-/* Default configuration. */
-
-#ifndef HMM_INC_CNFG_DFLT
-#define HMM_INC_CNFG_DFLT
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 0
-
-/* Test configuration. */
-
-#ifndef HMM_INC_CNFG_0
-#define HMM_INC_CNFG_0
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 1
-
-#ifndef HMM_INC_CNFG_1
-#define HMM_INC_CNFG_1
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 2
-
-#ifndef HMM_INC_CNFG_2
-#define HMM_INC_CNFG_2
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 3
-
-#ifndef HMM_INC_CNFG_3
-#define HMM_INC_CNFG_3
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 4
-
-#ifndef HMM_INC_CNFG_4
-#define HMM_INC_CNFG_4
-#define HMM_PROCESS
-#endif
-
-#elif HMM_CNFG_NUM == 5
-
-#ifndef HMM_INC_CNFG_5
-#define HMM_INC_CNFG_5
-#define HMM_PROCESS
-#endif
-
-#endif
-
-#ifdef HMM_PROCESS
-
-#include "hmm_cnfg.h"
-
-/* Heap descriptor. */
-typedef struct HMM_UNIQUE(structure) {
-  /* private: */
-
-  /* Pointer to (payload of) root node in AVL tree.  This field should
-  ** really be the AVL tree descriptor (type avl_avl).  But (in the
-  ** instantiation of the AVL tree generic package used in package) the
-  ** AVL tree descriptor simply contains a pointer to the root.  So,
-  ** whenever a pointer to the AVL tree descriptor is needed, I use the
-  ** cast:
-  **
-  ** (avl_avl *) &(heap_desc->avl_tree_root)
-  **
-  ** (where heap_desc is a pointer to a heap descriptor).  This trick
-  ** allows me to avoid including cavl_if.h in this external header. */
-  void *avl_tree_root;
-
-  /* Pointer to first byte of last block freed, after any coalescing. */
-  void *last_freed;
-
-  /* public: */
-
-  HMM_UNIQUE(size_bau) num_baus_can_shrink;
-  void *end_of_shrinkable_chunk;
-}
-HMM_UNIQUE(descriptor);
-
-/* Prototypes for externally-callable functions. */
-
-void HMM_UNIQUE(init)(HMM_UNIQUE(descriptor) *desc);
-
-void *HMM_UNIQUE(alloc)(
-  HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) num_addr_align_units);
-
-/* NOT YET IMPLEMENTED */
-void *HMM_UNIQUE(greedy_alloc)(
-  HMM_UNIQUE(descriptor) *desc, HMM_UNIQUE(size_aau) needed_addr_align_units,
-  HMM_UNIQUE(size_aau) coveted_addr_align_units);
-
-int HMM_UNIQUE(resize)(
-  HMM_UNIQUE(descriptor) *desc, void *mem,
-  HMM_UNIQUE(size_aau) num_addr_align_units);
-
-/* NOT YET IMPLEMENTED */
-int HMM_UNIQUE(greedy_resize)(
-  HMM_UNIQUE(descriptor) *desc, void *mem,
-  HMM_UNIQUE(size_aau) needed_addr_align_units,
-  HMM_UNIQUE(size_aau) coveted_addr_align_units);
-
-void HMM_UNIQUE(free)(HMM_UNIQUE(descriptor) *desc, void *mem);
-
-HMM_UNIQUE(size_aau) HMM_UNIQUE(true_size)(void *mem);
-
-HMM_UNIQUE(size_aau) HMM_UNIQUE(largest_available)(
-  HMM_UNIQUE(descriptor) *desc);
-
-void HMM_UNIQUE(new_chunk)(
-  HMM_UNIQUE(descriptor) *desc, void *start_of_chunk,
-  HMM_UNIQUE(size_bau) num_block_align_units);
-
-void HMM_UNIQUE(grow_chunk)(
-  HMM_UNIQUE(descriptor) *desc, void *end_of_chunk,
-  HMM_UNIQUE(size_bau) num_block_align_units);
-
-/* NOT YET IMPLEMENTED */
-void HMM_UNIQUE(shrink_chunk)(
-  HMM_UNIQUE(descriptor) *desc,
-  HMM_UNIQUE(size_bau) num_block_align_units);
-
-#endif /* defined HMM_PROCESS */
-#endif  // VPX_MEM_MEMORY_MANAGER_INCLUDE_HEAPMM_H_
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
deleted file mode 100644
index caa8713..0000000
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_CNFG_H_
-#define VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_CNFG_H_
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-/* Configure Heap Memory Manager for processor architecture, compiler,
-** and desired performance characteristics.  This file is included
-** by heapmm.h, so these definitions can be used by code external to
-** HMM.  You can change the default configuration, and/or create alternate
-** configuration(s).
-*/
-
-/* To allow for multiple configurations of HMM to be used in the same
-** compilation unit, undefine all preprocessor symbols that will be
-** defined below.
-*/
-#undef HMM_ADDR_ALIGN_UNIT
-#undef HMM_BLOCK_ALIGN_UNIT
-#undef HMM_UNIQUE
-#undef HMM_DESC_PARAM
-#undef HMM_SYM_TO_STRING
-#undef HMM_SYM_TO_STRING
-#undef HMM_AUDIT_FAIL
-
-/* Turn X into a string after one macro expansion pass of X.  This trick
-** works with both GCC and Visual C++. */
-#define HMM_SYM_TO_STRING(X) HMM_SYM_TO_STRING(X)
-#define HMM_SYM_TO_STRING(X) #X
-
-#ifndef HMM_CNFG_NUM
-
-/* Default configuration. */
-
-/* Use hmm_ prefix to avoid identifier conflicts. */
-#define HMM_UNIQUE(BASE) hmm_ ## BASE
-
-/* Number of bytes in an Address Alignment Unit (AAU). */
-// fwg
-// #define HMM_ADDR_ALIGN_UNIT sizeof(int)
-#define HMM_ADDR_ALIGN_UNIT 32
-
-/* Number of AAUs in a Block Alignment Unit (BAU). */
-#define HMM_BLOCK_ALIGN_UNIT 1
-
-/* Type of unsigned integer big enough to hold the size of a Block in AAUs. */
-typedef unsigned long HMM_UNIQUE(size_aau);
-
-/* Type of unsigned integer big enough to hold the size of a Block/Chunk
-** in BAUs.  The high bit will be robbed. */
-typedef unsigned long HMM_UNIQUE(size_bau);
-
-void hmm_dflt_abort(const char *, const char *);
-
-/* Actions upon a self-audit failure.  Must expand to a single complete
-** statement.  If you remove the definition of this macro, no self-auditing
-** will be performed. */
-#define HMM_AUDIT_FAIL \
-  hmm_dflt_abort(__FILE__, HMM_SYM_TO_STRING(__LINE__));
-
-#elif HMM_CNFG_NUM == 0
-
-/* Definitions for testing. */
-
-#define HMM_UNIQUE(BASE) thmm_ ## BASE
-
-#define HMM_ADDR_ALIGN_UNIT sizeof(int)
-
-#define HMM_BLOCK_ALIGN_UNIT 3
-
-typedef unsigned HMM_UNIQUE(size_aau);
-
-typedef unsigned short HMM_UNIQUE(size_bau);
-
-/* Under this test setup, a long jump is done if there is a self-audit
-** failure.
-*/
-
-extern jmp_buf HMM_UNIQUE(jmp_buf);
-extern const char *HMM_UNIQUE(fail_file);
-extern unsigned HMM_UNIQUE(fail_line);
-
-#define HMM_AUDIT_FAIL \
-  { HMM_UNIQUE(fail_file) = __FILE__; HMM_UNIQUE(fail_line) = __LINE__; \
-    longjmp(HMM_UNIQUE(jmp_buf), 1); }
-
-#elif HMM_CNFG_NUM == 1
-
-/* Put configuration 1 definitions here (if there is a configuration 1). */
-
-#elif HMM_CNFG_NUM == 2
-
-/* Put configuration 2 definitions here. */
-
-#elif HMM_CNFG_NUM == 3
-
-/* Put configuration 3 definitions here. */
-
-#elif HMM_CNFG_NUM == 4
-
-/* Put configuration 4 definitions here. */
-
-#elif HMM_CNFG_NUM == 5
-
-/* Put configuration 5 definitions here. */
-
-#endif
-
-#endif  // VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_CNFG_H_
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
deleted file mode 100644
index 7302aa2..0000000
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This code is in the public domain.
-** Version: 1.1  Author: Walt Karas
-*/
-
-#ifndef VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_INTRNL_H_
-#define VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_INTRNL_H_
-
-#ifdef __uClinux__
-# include <lddk.h>
-#endif
-
-#include "heapmm.h"
-
-#define U(BASE) HMM_UNIQUE(BASE)
-
-/* Mask of high bit of variable of size_bau type. */
-#define HIGH_BIT_BAU_SIZE \
-  ((U(size_bau)) ~ (((U(size_bau)) ~ (U(size_bau)) 0) >> 1))
-
-/* Add a given number of AAUs to pointer. */
-#define AAUS_FORWARD(PTR, AAU_OFFSET) \
-  (((char *) (PTR)) + ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
-
-/* Subtract a given number of AAUs from pointer. */
-#define AAUS_BACKWARD(PTR, AAU_OFFSET) \
-  (((char *) (PTR)) - ((AAU_OFFSET) * ((U(size_aau)) HMM_ADDR_ALIGN_UNIT)))
-
-/* Add a given number of BAUs to a pointer. */
-#define BAUS_FORWARD(PTR, BAU_OFFSET) \
-  AAUS_FORWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
-
-/* Subtract a given number of BAUs to a pointer. */
-#define BAUS_BACKWARD(PTR, BAU_OFFSET) \
-  AAUS_BACKWARD((PTR), (BAU_OFFSET) * ((U(size_aau)) HMM_BLOCK_ALIGN_UNIT))
-
-typedef struct head_struct {
-  /* Sizes in Block Alignment Units. */
-  HMM_UNIQUE(size_bau) previous_block_size, block_size;
-}
-head_record;
-
-typedef struct ptr_struct {
-  struct ptr_struct *self, *prev, *next;
-}
-ptr_record;
-
-/* Divide and round up any fraction to the next whole number. */
-#define DIV_ROUND_UP(NUMER, DENOM) (((NUMER) + (DENOM) - 1) / (DENOM))
-
-/* Number of AAUs in a block head. */
-#define HEAD_AAUS DIV_ROUND_UP(sizeof(head_record), HMM_ADDR_ALIGN_UNIT)
-
-/* Number of AAUs in a block pointer record. */
-#define PTR_RECORD_AAUS DIV_ROUND_UP(sizeof(ptr_record), HMM_ADDR_ALIGN_UNIT)
-
-/* Number of BAUs in a dummy end record (at end of chunk). */
-#define DUMMY_END_BLOCK_BAUS DIV_ROUND_UP(HEAD_AAUS, HMM_BLOCK_ALIGN_UNIT)
-
-/* Minimum number of BAUs in a block (allowing room for the pointer record. */
-#define MIN_BLOCK_BAUS \
-  DIV_ROUND_UP(HEAD_AAUS + PTR_RECORD_AAUS, HMM_BLOCK_ALIGN_UNIT)
-
-/* Return number of BAUs in block (masking off high bit containing block
-** status). */
-#define BLOCK_BAUS(HEAD_PTR) \
-  (((head_record *) (HEAD_PTR))->block_size & ~HIGH_BIT_BAU_SIZE)
-
-/* Return number of BAUs in previous block (masking off high bit containing
-** block status). */
-#define PREV_BLOCK_BAUS(HEAD_PTR) \
-  (((head_record *) (HEAD_PTR))->previous_block_size & ~HIGH_BIT_BAU_SIZE)
-
-/* Set number of BAUs in previous block, preserving high bit containing
-** block status. */
-#define SET_PREV_BLOCK_BAUS(HEAD_PTR, N_BAUS) \
-  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
-    h_ptr->previous_block_size &= HIGH_BIT_BAU_SIZE; \
-    h_ptr->previous_block_size |= (N_BAUS); }
-
-/* Convert pointer to pointer record of block to pointer to block's head
-** record. */
-#define PTR_REC_TO_HEAD(PTR_REC_PTR) \
-  ((head_record *) AAUS_BACKWARD(PTR_REC_PTR, HEAD_AAUS))
-
-/* Convert pointer to block head to pointer to block's pointer record. */
-#define HEAD_TO_PTR_REC(HEAD_PTR) \
-  ((ptr_record *) AAUS_FORWARD(HEAD_PTR, HEAD_AAUS))
-
-/* Returns non-zero if block is allocated. */
-#define IS_BLOCK_ALLOCATED(HEAD_PTR) \
-  (((((head_record *) (HEAD_PTR))->block_size | \
-     ((head_record *) (HEAD_PTR))->previous_block_size) & \
-    HIGH_BIT_BAU_SIZE) == 0)
-
-#define MARK_BLOCK_ALLOCATED(HEAD_PTR) \
-  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
-    h_ptr->block_size &= ~HIGH_BIT_BAU_SIZE; \
-    h_ptr->previous_block_size &= ~HIGH_BIT_BAU_SIZE; }
-
-/* Mark a block as free when it is not the first block in a bin (and
-** therefore not a node in the AVL tree). */
-#define MARK_SUCCESSIVE_BLOCK_IN_FREE_BIN(HEAD_PTR) \
-  { register head_record *h_ptr = (head_record *) (HEAD_PTR); \
-    h_ptr->block_size |= HIGH_BIT_BAU_SIZE; }
-
-/* Prototypes for internal functions implemented in one file and called in
-** another.
-*/
-
-void U(into_free_collection)(U(descriptor) *desc, head_record *head_ptr);
-
-void U(out_of_free_collection)(U(descriptor) *desc, head_record *head_ptr);
-
-void *U(alloc_from_bin)(
-  U(descriptor) *desc, ptr_record *bin_front_ptr, U(size_bau) n_baus);
-
-#ifdef HMM_AUDIT_FAIL
-
-/* Simply contains a reference to the HMM_AUDIT_FAIL macro and a
-** dummy return. */
-int U(audit_block_fail_dummy_return)(void);
-
-
-/* Auditing a block consists of checking that the size in its head
-** matches the previous block size in the head of the next block. */
-#define AUDIT_BLOCK_AS_EXPR(HEAD_PTR) \
-  ((BLOCK_BAUS(HEAD_PTR) == \
-    PREV_BLOCK_BAUS(BAUS_FORWARD(HEAD_PTR, BLOCK_BAUS(HEAD_PTR)))) ? \
-   0 : U(audit_block_fail_dummy_return)())
-
-#define AUDIT_BLOCK(HEAD_PTR) \
-  { void *h_ptr = (HEAD_PTR); AUDIT_BLOCK_AS_EXPR(h_ptr); }
-
-#endif
-
-/* Interface to AVL tree generic package instantiation. */
-
-#define AVL_UNIQUE(BASE) U(avl_ ## BASE)
-
-#define AVL_HANDLE ptr_record *
-
-#define AVL_KEY U(size_bau)
-
-#define AVL_MAX_DEPTH 64
-
-#include "cavl_if.h"
-
-#endif  // VPX_MEM_MEMORY_MANAGER_INCLUDE_HMM_INTRNL_H_
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index da61642..0eb3f7a 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -18,113 +18,11 @@
 #include "include/vpx_mem_intrnl.h"
 #include "vpx/vpx_integer.h"
 
-#if CONFIG_MEM_TRACKER
-#ifndef VPX_NO_GLOBALS
-static unsigned long g_alloc_count = 0;
-#else
-#include "vpx_global_handling.h"
-#define g_alloc_count vpxglobalm(vpxmem,g_alloc_count)
-#endif
-#endif
-
-#if CONFIG_MEM_MANAGER
-# include "heapmm.h"
-# include "hmm_intrnl.h"
-
-# define SHIFT_HMM_ADDR_ALIGN_UNIT 5
-# define TOTAL_MEMORY_TO_ALLOCATE  20971520 /* 20 * 1024 * 1024 */
-
-# define MM_DYNAMIC_MEMORY 1
-# if MM_DYNAMIC_MEMORY
-static unsigned char *g_p_mng_memory_raw = NULL;
-static unsigned char *g_p_mng_memory     = NULL;
-# else
-static unsigned char g_p_mng_memory[TOTAL_MEMORY_TO_ALLOCATE];
-# endif
-
-static size_t g_mm_memory_size = TOTAL_MEMORY_TO_ALLOCATE;
-
-static hmm_descriptor hmm_d;
-static int g_mng_memory_allocated = 0;
-
-static int vpx_mm_create_heap_memory();
-static void *vpx_mm_realloc(void *memblk, size_t size);
-#endif /*CONFIG_MEM_MANAGER*/
-
-#if USE_GLOBAL_FUNCTION_POINTERS
-struct GLOBAL_FUNC_POINTERS {
-  g_malloc_func g_malloc;
-  g_calloc_func g_calloc;
-  g_realloc_func g_realloc;
-  g_free_func g_free;
-  g_memcpy_func g_memcpy;
-  g_memset_func g_memset;
-  g_memmove_func g_memmove;
-} *g_func = NULL;
-
-# define VPX_MALLOC_L  g_func->g_malloc
-# define VPX_REALLOC_L g_func->g_realloc
-# define VPX_FREE_L    g_func->g_free
-# define VPX_MEMCPY_L  g_func->g_memcpy
-# define VPX_MEMSET_L  g_func->g_memset
-# define VPX_MEMMOVE_L g_func->g_memmove
-#else
-# define VPX_MALLOC_L  malloc
-# define VPX_REALLOC_L realloc
-# define VPX_FREE_L    free
-# define VPX_MEMCPY_L  memcpy
-# define VPX_MEMSET_L  memset
-# define VPX_MEMMOVE_L memmove
-#endif /* USE_GLOBAL_FUNCTION_POINTERS */
-
-unsigned int vpx_mem_get_version() {
-  unsigned int ver = ((unsigned int)(unsigned char)VPX_MEM_VERSION_CHIEF << 24 |
-                      (unsigned int)(unsigned char)VPX_MEM_VERSION_MAJOR << 16 |
-                      (unsigned int)(unsigned char)VPX_MEM_VERSION_MINOR << 8  |
-                      (unsigned int)(unsigned char)VPX_MEM_VERSION_PATCH);
-  return ver;
-}
-
-int vpx_mem_set_heap_size(size_t size) {
-  int ret = -1;
-
-#if CONFIG_MEM_MANAGER
-#if MM_DYNAMIC_MEMORY
-
-  if (!g_mng_memory_allocated && size) {
-    g_mm_memory_size = size;
-    ret = 0;
-  } else
-    ret = -3;
-
-#else
-  ret = -2;
-#endif
-#else
-  (void)size;
-#endif
-
-  return ret;
-}
-
 void *vpx_memalign(size_t align, size_t size) {
   void *addr,
        * x = NULL;
 
-#if CONFIG_MEM_MANAGER
-  int number_aau;
-
-  if (vpx_mm_create_heap_memory() < 0) {
-    _P(printf("[vpx][mm] ERROR vpx_memalign() Couldn't create memory for Heap.\n");)
-  }
-
-  number_aau = ((size + align - 1 + ADDRESS_STORAGE_SIZE) >>
-                SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
-
-  addr = hmm_alloc(&hmm_d, number_aau);
-#else
-  addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE);
-#endif /*CONFIG_MEM_MANAGER*/
+  addr = malloc(size + align - 1 + ADDRESS_STORAGE_SIZE);
 
   if (addr) {
     x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, (int)align);
@@ -145,7 +43,7 @@
   x = vpx_memalign(DEFAULT_ALIGNMENT, num * size);
 
   if (x)
-    VPX_MEMSET_L(x, 0, num * size);
+    memset(x, 0, num * size);
 
   return x;
 }
@@ -171,11 +69,7 @@
     addr   = (void *)(((size_t *)memblk)[-1]);
     memblk = NULL;
 
-#if CONFIG_MEM_MANAGER
-    new_addr = vpx_mm_realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
-#else
-    new_addr = VPX_REALLOC_L(addr, size + align + ADDRESS_STORAGE_SIZE);
-#endif
+    new_addr = realloc(addr, size + align + ADDRESS_STORAGE_SIZE);
 
     if (new_addr) {
       addr = new_addr;
@@ -193,280 +87,20 @@
 void vpx_free(void *memblk) {
   if (memblk) {
     void *addr = (void *)(((size_t *)memblk)[-1]);
-#if CONFIG_MEM_MANAGER
-    hmm_free(&hmm_d, addr);
-#else
-    VPX_FREE_L(addr);
-#endif
+    free(addr);
   }
 }
 
-#if CONFIG_MEM_TRACKER
-void *xvpx_memalign(size_t align, size_t size, char *file, int line) {
-#if TRY_BOUNDS_CHECK
-  unsigned char *x_bounds;
-#endif
-
-  void *x;
-
-  if (g_alloc_count == 0) {
-#if TRY_BOUNDS_CHECK
-    int i_rv = vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE);
-#else
-    int i_rv = vpx_memory_tracker_init(0, 0);
-#endif
-
-    if (i_rv < 0) {
-      _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
-    }
-  }
-
-#if TRY_BOUNDS_CHECK
-  {
-    int i;
-    unsigned int tempme = BOUNDS_CHECK_VALUE;
-
-    x_bounds = vpx_memalign(align, size + (BOUNDS_CHECK_PAD_SIZE * 2));
-
-    if (x_bounds) {
-      /*we're aligning the address twice here but to keep things
-        consistent we want to have the padding come before the stored
-        address so no matter what free function gets called we will
-        attempt to free the correct address*/
-      x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
-      x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
-                     (int)align);
-      /* save the actual malloc address */
-      ((size_t *)x)[-1] = (size_t)x_bounds;
-
-      for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) {
-        VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
-        VPX_MEMCPY_L((unsigned char *)x + size + i,
-                     &tempme, sizeof(unsigned int));
-      }
-    } else
-      x = NULL;
-  }
-#else
-  x = vpx_memalign(align, size);
-#endif /*TRY_BOUNDS_CHECK*/
-
-  g_alloc_count++;
-
-  vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
-
-  return x;
-}
-
-void *xvpx_malloc(size_t size, char *file, int line) {
-  return xvpx_memalign(DEFAULT_ALIGNMENT, size, file, line);
-}
-
-void *xvpx_calloc(size_t num, size_t size, char *file, int line) {
-  void *x = xvpx_memalign(DEFAULT_ALIGNMENT, num * size, file, line);
-
-  if (x)
-    VPX_MEMSET_L(x, 0, num * size);
-
-  return x;
-}
-
-void *xvpx_realloc(void *memblk, size_t size, char *file, int line) {
-  struct mem_block *p = NULL;
-  int orig_size = 0,
-      orig_line = 0;
-  char *orig_file = NULL;
-
-#if TRY_BOUNDS_CHECK
-  unsigned char *x_bounds = memblk ?
-                            (unsigned char *)(((size_t *)memblk)[-1]) :
-                            NULL;
-#endif
-
-  void *x;
-
-  if (g_alloc_count == 0) {
-#if TRY_BOUNDS_CHECK
-
-    if (!vpx_memory_tracker_init(BOUNDS_CHECK_PAD_SIZE, BOUNDS_CHECK_VALUE))
-#else
-    if (!vpx_memory_tracker_init(0, 0))
-#endif
-    {
-      _P(printf("ERROR xvpx_malloc MEM_TRACK_USAGE error vpx_memory_tracker_init().\n");)
-    }
-  }
-
-  if ((p = vpx_memory_tracker_find((size_t)memblk))) {
-    orig_size = p->size;
-    orig_file = p->file;
-    orig_line = p->line;
-  }
-
-#if TRY_BOUNDS_CHECK_ON_FREE
-  vpx_memory_tracker_check_integrity(file, line);
-#endif
-
-  /* have to do this regardless of success, because
-   * the memory that does get realloc'd may change
-   * the bounds values of this block
-   */
-  vpx_memory_tracker_remove((size_t)memblk);
-
-#if TRY_BOUNDS_CHECK
-  {
-    int i;
-    unsigned int tempme = BOUNDS_CHECK_VALUE;
-
-    x_bounds = vpx_realloc(memblk, size + (BOUNDS_CHECK_PAD_SIZE * 2));
-
-    if (x_bounds) {
-      x_bounds = (unsigned char *)(((size_t *)x_bounds)[-1]);
-      x = align_addr(x_bounds + BOUNDS_CHECK_PAD_SIZE + ADDRESS_STORAGE_SIZE,
-                     (int)DEFAULT_ALIGNMENT);
-      /* save the actual malloc address */
-      ((size_t *)x)[-1] = (size_t)x_bounds;
-
-      for (i = 0; i < BOUNDS_CHECK_PAD_SIZE; i += sizeof(unsigned int)) {
-        VPX_MEMCPY_L(x_bounds + i, &tempme, sizeof(unsigned int));
-        VPX_MEMCPY_L((unsigned char *)x + size + i,
-                     &tempme, sizeof(unsigned int));
-      }
-    } else
-      x = NULL;
-  }
-#else
-  x = vpx_realloc(memblk, size);
-#endif /*TRY_BOUNDS_CHECK*/
-
-  if (!memblk) ++g_alloc_count;
-
-  if (x)
-    vpx_memory_tracker_add((size_t)x, (unsigned int)size, file, line, 1);
-  else
-    vpx_memory_tracker_add((size_t)memblk, orig_size, orig_file, orig_line, 1);
-
-  return x;
-}
-
-void xvpx_free(void *p_address, char *file, int line) {
-#if TRY_BOUNDS_CHECK
-  unsigned char *p_bounds_address = (unsigned char *)p_address;
-  /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/
-#endif
-
-#if !TRY_BOUNDS_CHECK_ON_FREE
-  (void)file;
-  (void)line;
-#endif
-
-  if (p_address) {
-#if TRY_BOUNDS_CHECK_ON_FREE
-    vpx_memory_tracker_check_integrity(file, line);
-#endif
-
-    /* if the addr isn't found in the list, assume it was allocated via
-     * vpx_ calls not xvpx_, therefore it does not contain any padding
-     */
-    if (vpx_memory_tracker_remove((size_t)p_address) == -2) {
-      p_bounds_address = p_address;
-      _P(fprintf(stderr, "[vpx_mem][xvpx_free] addr: %p not found in"
-                 " list; freed from file:%s"
-                 " line:%d\n", p_address, file, line));
-    } else
-      --g_alloc_count;
-
-#if TRY_BOUNDS_CHECK
-    vpx_free(p_bounds_address);
-#else
-    vpx_free(p_address);
-#endif
-
-    if (!g_alloc_count)
-      vpx_memory_tracker_destroy();
-  }
-}
-
-#endif /*CONFIG_MEM_TRACKER*/
-
-#if CONFIG_MEM_CHECKS
-#if defined(VXWORKS)
-#include <task_lib.h> /*for task_delay()*/
-/* This function is only used to get a stack trace of the player
-object so we can se where we are having a problem. */
-static int get_my_tt(int task) {
-  tt(task);
-
-  return 0;
-}
-
-static void vx_sleep(int msec) {
-  int ticks_to_sleep = 0;
-
-  if (msec) {
-    int msec_per_tick = 1000 / sys_clk_rate_get();
-
-    if (msec < msec_per_tick)
-      ticks_to_sleep++;
-    else
-      ticks_to_sleep = msec / msec_per_tick;
-  }
-
-  task_delay(ticks_to_sleep);
-}
-#endif
-#endif
-
 void *vpx_memcpy(void *dest, const void *source, size_t length) {
-#if CONFIG_MEM_CHECKS
-
-  if (((int)dest < 0x4000) || ((int)source < 0x4000)) {
-    _P(printf("WARNING: vpx_memcpy dest:0x%x source:0x%x len:%d\n", (int)dest, (int)source, length);)
-
-#if defined(VXWORKS)
-    sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
-
-    vx_sleep(10000);
-#endif
-  }
-
-#endif
-
-  return VPX_MEMCPY_L(dest, source, length);
+  return memcpy(dest, source, length);
 }
 
 void *vpx_memset(void *dest, int val, size_t length) {
-#if CONFIG_MEM_CHECKS
-
-  if ((int)dest < 0x4000) {
-    _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n", (int)dest, val, length);)
-
-#if defined(VXWORKS)
-    sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
-
-    vx_sleep(10000);
-#endif
-  }
-
-#endif
-
-  return VPX_MEMSET_L(dest, val, length);
+  return memset(dest, val, length);
 }
 
 #if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
 void *vpx_memset16(void *dest, int val, size_t length) {
-#if CONFIG_MEM_CHECKS
-  if ((int)dest < 0x4000) {
-    _P(printf("WARNING: vpx_memset dest:0x%x val:%d len:%d\n",
-              (int)dest, val, length);)
-
-#if defined(VXWORKS)
-    sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
-
-    vx_sleep(10000);
-#endif
-  }
-#endif
   int i;
   void *orig = dest;
   uint16_t *dest16 = dest;
@@ -477,205 +111,5 @@
 #endif  // CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
 
 void *vpx_memmove(void *dest, const void *src, size_t count) {
-#if CONFIG_MEM_CHECKS
-
-  if (((int)dest < 0x4000) || ((int)src < 0x4000)) {
-    _P(printf("WARNING: vpx_memmove dest:0x%x src:0x%x count:%d\n", (int)dest, (int)src, count);)
-
-#if defined(VXWORKS)
-    sp(get_my_tt, task_id_self(), 0, 0, 0, 0, 0, 0, 0, 0);
-
-    vx_sleep(10000);
-#endif
-  }
-
-#endif
-
-  return VPX_MEMMOVE_L(dest, src, count);
-}
-
-#if CONFIG_MEM_MANAGER
-
-static int vpx_mm_create_heap_memory() {
-  int i_rv = 0;
-
-  if (!g_mng_memory_allocated) {
-#if MM_DYNAMIC_MEMORY
-    g_p_mng_memory_raw =
-      (unsigned char *)malloc(g_mm_memory_size + HMM_ADDR_ALIGN_UNIT);
-
-    if (g_p_mng_memory_raw) {
-      g_p_mng_memory = (unsigned char *)((((unsigned int)g_p_mng_memory_raw) +
-                                          HMM_ADDR_ALIGN_UNIT - 1) &
-                                         -(int)HMM_ADDR_ALIGN_UNIT);
-
-      _P(printf("[vpx][mm] total memory size:%d g_p_mng_memory_raw:0x%x g_p_mng_memory:0x%x\n"
-, g_mm_memory_size + HMM_ADDR_ALIGN_UNIT
-, (unsigned int)g_p_mng_memory_raw
-, (unsigned int)g_p_mng_memory);)
-    } else {
-      _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
-, g_mm_memory_size);)
-
-      i_rv = -1;
-    }
-
-    if (g_p_mng_memory)
-#endif
-    {
-      int chunk_size = 0;
-
-      g_mng_memory_allocated = 1;
-
-      hmm_init(&hmm_d);
-
-      chunk_size = g_mm_memory_size >> SHIFT_HMM_ADDR_ALIGN_UNIT;
-
-      chunk_size -= DUMMY_END_BLOCK_BAUS;
-
-      _P(printf("[vpx][mm] memory size:%d for vpx memory manager. g_p_mng_memory:0x%x  chunk_size:%d\n"
-, g_mm_memory_size
-, (unsigned int)g_p_mng_memory
-, chunk_size);)
-
-      hmm_new_chunk(&hmm_d, (void *)g_p_mng_memory, chunk_size);
-    }
-
-#if MM_DYNAMIC_MEMORY
-    else {
-      _P(printf("[vpx][mm] Couldn't allocate memory:%d for vpx memory manager.\n"
-, g_mm_memory_size);)
-
-      i_rv = -1;
-    }
-
-#endif
-  }
-
-  return i_rv;
-}
-
-static void *vpx_mm_realloc(void *memblk, size_t size) {
-  void *p_ret = NULL;
-
-  if (vpx_mm_create_heap_memory() < 0) {
-    _P(printf("[vpx][mm] ERROR vpx_mm_realloc() Couldn't create memory for Heap.\n");)
-  } else {
-    int i_rv = 0;
-    int old_num_aaus;
-    int new_num_aaus;
-
-    old_num_aaus = hmm_true_size(memblk);
-    new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
-
-    if (old_num_aaus == new_num_aaus) {
-      p_ret = memblk;
-    } else {
-      i_rv = hmm_resize(&hmm_d, memblk, new_num_aaus);
-
-      if (i_rv == 0) {
-        p_ret = memblk;
-      } else {
-        /* Error. Try to malloc and then copy data. */
-        void *p_from_malloc;
-
-        new_num_aaus = (size >> SHIFT_HMM_ADDR_ALIGN_UNIT) + 1;
-        p_from_malloc  = hmm_alloc(&hmm_d, new_num_aaus);
-
-        if (p_from_malloc) {
-          vpx_memcpy(p_from_malloc, memblk, size);
-          hmm_free(&hmm_d, memblk);
-
-          p_ret = p_from_malloc;
-        }
-      }
-    }
-  }
-
-  return p_ret;
-}
-#endif /*CONFIG_MEM_MANAGER*/
-
-#if USE_GLOBAL_FUNCTION_POINTERS
-# if CONFIG_MEM_TRACKER
-extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l
-, g_calloc_func g_calloc_l
-, g_realloc_func g_realloc_l
-, g_free_func g_free_l
-, g_memcpy_func g_memcpy_l
-, g_memset_func g_memset_l
-, g_memmove_func g_memmove_l);
-# endif
-#endif /*USE_GLOBAL_FUNCTION_POINTERS*/
-int vpx_mem_set_functions(g_malloc_func g_malloc_l
-, g_calloc_func g_calloc_l
-, g_realloc_func g_realloc_l
-, g_free_func g_free_l
-, g_memcpy_func g_memcpy_l
-, g_memset_func g_memset_l
-, g_memmove_func g_memmove_l) {
-#if USE_GLOBAL_FUNCTION_POINTERS
-
-  /* If use global functions is turned on then the
-  application must set the global functions before
-  it does anything else or vpx_mem will have
-  unpredictable results. */
-  if (!g_func) {
-    g_func = (struct GLOBAL_FUNC_POINTERS *)
-             g_malloc_l(sizeof(struct GLOBAL_FUNC_POINTERS));
-
-    if (!g_func) {
-      return -1;
-    }
-  }
-
-#if CONFIG_MEM_TRACKER
-  {
-    int rv = 0;
-    rv = vpx_memory_tracker_set_functions(g_malloc_l
-, g_calloc_l
-, g_realloc_l
-, g_free_l
-, g_memcpy_l
-, g_memset_l
-, g_memmove_l);
-
-    if (rv < 0) {
-      return rv;
-    }
-  }
-#endif
-
-  g_func->g_malloc  = g_malloc_l;
-  g_func->g_calloc  = g_calloc_l;
-  g_func->g_realloc = g_realloc_l;
-  g_func->g_free    = g_free_l;
-  g_func->g_memcpy  = g_memcpy_l;
-  g_func->g_memset  = g_memset_l;
-  g_func->g_memmove = g_memmove_l;
-
-  return 0;
-#else
-  (void)g_malloc_l;
-  (void)g_calloc_l;
-  (void)g_realloc_l;
-  (void)g_free_l;
-  (void)g_memcpy_l;
-  (void)g_memset_l;
-  (void)g_memmove_l;
-  return -1;
-#endif
-}
-
-int vpx_mem_unset_functions() {
-#if USE_GLOBAL_FUNCTION_POINTERS
-
-  if (g_func) {
-    g_free_func temp_free = g_func->g_free;
-    temp_free(g_func);
-    g_func = NULL;
-  }
-
-#endif
-  return 0;
+  return memmove(dest, src, count);
 }
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index e2391f4..f6876d6 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -17,22 +17,6 @@
 # include <lddk.h>
 #endif
 
-/* vpx_mem version info */
-#define vpx_mem_version "2.2.1.5"
-
-#define VPX_MEM_VERSION_CHIEF 2
-#define VPX_MEM_VERSION_MAJOR 2
-#define VPX_MEM_VERSION_MINOR 1
-#define VPX_MEM_VERSION_PATCH 5
-/* end - vpx_mem version info */
-
-#ifndef VPX_TRACK_MEM_USAGE
-# define VPX_TRACK_MEM_USAGE       0  /* enable memory tracking/integrity checks */
-#endif
-#ifndef VPX_CHECK_MEM_FUNCTIONS
-# define VPX_CHECK_MEM_FUNCTIONS   0  /* enable basic safety checks in _memcpy,
-_memset, and _memmove */
-#endif
 #ifndef REPLACE_BUILTIN_FUNCTIONS
 # define REPLACE_BUILTIN_FUNCTIONS 0  /* replace builtin functions with their
 vpx_ equivalents */
@@ -45,26 +29,6 @@
 extern "C" {
 #endif
 
-  /*
-      vpx_mem_get_version()
-      provided for runtime version checking. Returns an unsigned int of the form
-      CHIEF | MAJOR | MINOR | PATCH, where the chief version number is the high
-      order byte.
-  */
-  unsigned int vpx_mem_get_version(void);
-
-  /*
-      vpx_mem_set_heap_size(size_t size)
-        size - size in bytes for the memory manager to allocate for its heap
-      Sets the memory manager's initial heap size
-      Return:
-        0: on success
-        -1: if memory manager calls have not been included in the vpx_mem lib
-        -2: if the memory manager has been compiled to use static memory
-        -3: if the memory manager has already allocated its heap
-  */
-  int vpx_mem_set_heap_size(size_t size);
-
   void *vpx_memalign(size_t align, size_t size);
   void *vpx_malloc(size_t size);
   void *vpx_calloc(size_t num, size_t size);
@@ -78,29 +42,6 @@
 #endif
   void *vpx_memmove(void *dest, const void *src, size_t count);
 
-  /* special memory functions */
-  void *vpx_mem_alloc(int id, size_t size, size_t align);
-  void vpx_mem_free(int id, void *mem, size_t size);
-
-  /* Wrappers to standard library functions. */
-  typedef void *(* g_malloc_func)(size_t);
-  typedef void *(* g_calloc_func)(size_t, size_t);
-  typedef void *(* g_realloc_func)(void *, size_t);
-  typedef void (* g_free_func)(void *);
-  typedef void *(* g_memcpy_func)(void *, const void *, size_t);
-  typedef void *(* g_memset_func)(void *, int, size_t);
-  typedef void *(* g_memmove_func)(void *, const void *, size_t);
-
-  int vpx_mem_set_functions(g_malloc_func g_malloc_l
-, g_calloc_func g_calloc_l
-, g_realloc_func g_realloc_l
-, g_free_func g_free_l
-, g_memcpy_func g_memcpy_l
-, g_memset_func g_memset_l
-, g_memmove_func g_memmove_l);
-  int vpx_mem_unset_functions(void);
-
-
   /* some defines for backward compatibility */
 #define DMEM_GENERAL 0
 
@@ -119,50 +60,11 @@
 # endif
 #endif
 
-#if CONFIG_MEM_TRACKER
-#include <stdarg.h>
-  /*from vpx_mem/vpx_mem_tracker.c*/
-  extern void vpx_memory_tracker_dump();
-  extern void vpx_memory_tracker_check_integrity(char *file, unsigned int line);
-  extern int vpx_memory_tracker_set_log_type(int type, char *option);
-  extern int vpx_memory_tracker_set_log_func(void *userdata,
-                                             void(*logfunc)(void *userdata,
-                                                            const char *fmt, va_list args));
-# ifndef __VPX_MEM_C__
-#  define vpx_memalign(align, size) xvpx_memalign((align), (size), __FILE__, __LINE__)
-#  define vpx_malloc(size)          xvpx_malloc((size), __FILE__, __LINE__)
-#  define vpx_calloc(num, size)     xvpx_calloc(num, size, __FILE__, __LINE__)
-#  define vpx_realloc(addr, size)   xvpx_realloc(addr, size, __FILE__, __LINE__)
-#  define vpx_free(addr)            xvpx_free(addr, __FILE__, __LINE__)
-#  define vpx_memory_tracker_check_integrity() vpx_memory_tracker_check_integrity(__FILE__, __LINE__)
-#  define vpx_mem_alloc(id,size,align) xvpx_mem_alloc(id, size, align, __FILE__, __LINE__)
-#  define vpx_mem_free(id,mem,size) xvpx_mem_free(id, mem, size, __FILE__, __LINE__)
-# endif
-
-  void *xvpx_memalign(size_t align, size_t size, char *file, int line);
-  void *xvpx_malloc(size_t size, char *file, int line);
-  void *xvpx_calloc(size_t num, size_t size, char *file, int line);
-  void *xvpx_realloc(void *memblk, size_t size, char *file, int line);
-  void xvpx_free(void *memblk, char *file, int line);
-  void *xvpx_mem_alloc(int id, size_t size, size_t align, char *file, int line);
-  void xvpx_mem_free(int id, void *mem, size_t size, char *file, int line);
-
-#else
-# ifndef __VPX_MEM_C__
-#  define vpx_memory_tracker_dump()
-#  define vpx_memory_tracker_check_integrity()
-#  define vpx_memory_tracker_set_log_type(t,o) 0
-#  define vpx_memory_tracker_set_log_func(u,f) 0
-# endif
-#endif
-
-#if !VPX_CHECK_MEM_FUNCTIONS
-# ifndef __VPX_MEM_C__
-#  include <string.h>
-#  define vpx_memcpy  memcpy
-#  define vpx_memset  memset
-#  define vpx_memmove memmove
-# endif
+#ifndef __VPX_MEM_C__
+# include <string.h>
+# define vpx_memcpy  memcpy
+# define vpx_memset  memset
+# define vpx_memmove memmove
 #endif
 
 #ifdef VPX_MEM_PLTFRM
diff --git a/vpx_mem/vpx_mem.mk b/vpx_mem/vpx_mem.mk
index 4663c5a..7f275ea 100644
--- a/vpx_mem/vpx_mem.mk
+++ b/vpx_mem/vpx_mem.mk
@@ -2,21 +2,3 @@
 MEM_SRCS-yes += vpx_mem.c
 MEM_SRCS-yes += vpx_mem.h
 MEM_SRCS-yes += include/vpx_mem_intrnl.h
-
-MEM_SRCS-$(CONFIG_MEM_TRACKER) += vpx_mem_tracker.c
-MEM_SRCS-$(CONFIG_MEM_TRACKER) += include/vpx_mem_tracker.h
-
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_true.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_resize.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_shrink.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_largest.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_dflt_abort.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_base.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include/hmm_intrnl.h
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include/cavl_if.h
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include/hmm_cnfg.h
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include/heapmm.h
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/include/cavl_impl.h
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_grow.c
-MEM_SRCS-$(CONFIG_MEM_MANAGER) += memory_manager/hmm_alloc.c
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
deleted file mode 100644
index 613e8a1..0000000
--- a/vpx_mem/vpx_mem_tracker.c
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
-  vpx_mem_tracker.c
-
-  jwz 2003-09-30:
-   Stores a list of addreses, their size, and file and line they came from.
-   All exposed lib functions are prefaced by vpx_ and allow the global list
-   to be thread safe.
-   Current supported platforms are:
-    Linux, Win32, win_ce and vx_works
-   Further support can be added by defining the platform specific mutex
-   in the memory_tracker struct as well as calls to create/destroy/lock/unlock
-   the mutex in vpx_memory_tracker_init/Destroy and memory_tracker_lock_mutex/unlock_mutex
-*/
-#include "./vpx_config.h"
-
-#if defined(__uClinux__)
-# include <lddk.h>
-#endif
-
-#if HAVE_PTHREAD_H
-# include <pthread.h>
-#elif defined(WIN32) || defined(_WIN32_WCE)
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-# include <winbase.h>
-#elif defined(VXWORKS)
-# include <sem_lib.h>
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h> // VXWORKS doesn't have a malloc/memory.h file,
-// this should pull in malloc,free,etc.
-#include <stdarg.h>
-
-#include "include/vpx_mem_tracker.h"
-
-#undef vpx_malloc   // undefine any vpx_mem macros that may affect calls to
-#undef vpx_free     // memory functions in this file
-#undef vpx_memcpy
-#undef vpx_memset
-
-
-#ifndef USE_GLOBAL_FUNCTION_POINTERS
-# define USE_GLOBAL_FUNCTION_POINTERS   0  // use function pointers instead of compiled functions.
-#endif
-
-#if USE_GLOBAL_FUNCTION_POINTERS
-static mem_track_malloc_func g_malloc   = malloc;
-static mem_track_calloc_func g_calloc   = calloc;
-static mem_track_realloc_func g_realloc = realloc;
-static mem_track_free_func g_free       = free;
-static mem_track_memcpy_func g_memcpy   = memcpy;
-static mem_track_memset_func g_memset   = memset;
-static mem_track_memmove_func g_memmove = memmove;
-# define MEM_TRACK_MALLOC g_malloc
-# define MEM_TRACK_FREE   g_free
-# define MEM_TRACK_MEMCPY g_memcpy
-# define MEM_TRACK_MEMSET g_memset
-#else
-# define MEM_TRACK_MALLOC vpx_malloc
-# define MEM_TRACK_FREE   vpx_free
-# define MEM_TRACK_MEMCPY vpx_memcpy
-# define MEM_TRACK_MEMSET vpx_memset
-#endif // USE_GLOBAL_FUNCTION_POINTERS
-
-/* prototypes for internal library functions */
-static void memtrack_log(const char *fmt, ...);
-static void memory_tracker_dump();
-static void memory_tracker_check_integrity(char *file, unsigned int line);
-static void memory_tracker_add(size_t addr, unsigned int size,
-                               char *file, unsigned int line,
-                               int padded);
-static int memory_tracker_remove(size_t addr);
-static struct mem_block *memory_tracker_find(size_t addr);
-
-#if defined(NO_MUTEX)
-# define memory_tracker_lock_mutex() (!g_b_mem_tracker_inited)
-# define memory_tracker_unlock_mutex()
-#else
-static int memory_tracker_lock_mutex();
-static int memory_tracker_unlock_mutex();
-#endif
-
-#ifndef VPX_NO_GLOBALS
-struct memory_tracker {
-  struct mem_block *head,
-      * tail;
-  int len,
-      totalsize;
-  unsigned int current_allocated,
-           max_allocated;
-
-#if HAVE_PTHREAD_H
-  pthread_mutex_t mutex;
-#elif defined(WIN32) || defined(_WIN32_WCE)
-  HANDLE mutex;
-#elif defined(VXWORKS)
-  SEM_ID mutex;
-#elif defined(NO_MUTEX)
-#else
-#error "No mutex type defined for this platform!"
-#endif
-
-  int padding_size,
-      pad_value;
-};
-
-static struct memory_tracker memtrack;   // our global memory allocation list
-static int g_b_mem_tracker_inited = 0;     // indicates whether the global list has
-// been initialized (1:yes/0:no)
-static struct {
-  FILE *file;
-  int type;
-  void (*func)(void *userdata, const char *fmt, va_list args);
-  void *userdata;
-} g_logging = {NULL, 0, NULL, NULL};
-#else
-# include "vpx_global_handling.h"
-#define g_b_mem_tracker_inited vpxglobalm(vpxmem,g_b_mem_tracker_inited)
-#define g_logging vpxglobalm(vpxmem,g_logging)
-#define memtrack vpxglobalm(vpxmem,memtrack)
-#endif // #ifndef VPX_NO_GLOBALS
-
-extern void *vpx_malloc(size_t size);
-extern void vpx_free(void *memblk);
-extern void *vpx_memcpy(void *dest, const void *src, size_t length);
-extern void *vpx_memset(void *dest, int val, size_t length);
-
-/*
- *
- * Exposed library functions
- *
-*/
-
-/*
-    vpx_memory_tracker_init(int padding_size, int pad_value)
-      padding_size - the size of the padding before and after each mem addr.
-                     Values > 0 indicate that integrity checks can be performed
-                     by inspecting these areas.
-      pad_value - the initial value within the padding area before and after
-                  each mem addr.
-
-    Initializes global memory tracker structure
-    Allocates the head of the list
-*/
-int vpx_memory_tracker_init(int padding_size, int pad_value) {
-  if (!g_b_mem_tracker_inited) {
-    if ((memtrack.head = (struct mem_block *)
-                         MEM_TRACK_MALLOC(sizeof(struct mem_block)))) {
-      int ret;
-
-      MEM_TRACK_MEMSET(memtrack.head, 0, sizeof(struct mem_block));
-
-      memtrack.tail = memtrack.head;
-
-      memtrack.current_allocated = 0;
-      memtrack.max_allocated     = 0;
-
-      memtrack.padding_size = padding_size;
-      memtrack.pad_value    = pad_value;
-
-#if HAVE_PTHREAD_H
-      ret = pthread_mutex_init(&memtrack.mutex,
-                               NULL);            /*mutex attributes (NULL=default)*/
-#elif defined(WIN32) || defined(_WIN32_WCE)
-      memtrack.mutex = CreateMutex(NULL,   /*security attributes*/
-                                   FALSE,  /*we don't want initial ownership*/
-                                   NULL);  /*mutex name*/
-      ret = !memtrack.mutex;
-#elif defined(VXWORKS)
-      memtrack.mutex = sem_bcreate(SEM_Q_FIFO, /*SEM_Q_FIFO non-priority based mutex*/
-                                   SEM_FULL);  /*SEM_FULL initial state is unlocked*/
-      ret = !memtrack.mutex;
-#elif defined(NO_MUTEX)
-      ret = 0;
-#endif
-
-      if (ret) {
-        memtrack_log("vpx_memory_tracker_init: Error creating mutex!\n");
-
-        MEM_TRACK_FREE(memtrack.head);
-        memtrack.head = NULL;
-      } else {
-        memtrack_log("Memory Tracker init'd, v."vpx_mem_tracker_version" pad_size:%d pad_val:0x%x %d\n"
-, padding_size
-, pad_value
-, pad_value);
-        g_b_mem_tracker_inited = 1;
-      }
-    }
-  }
-
-  return g_b_mem_tracker_inited;
-}
-
-/*
-    vpx_memory_tracker_destroy()
-    If our global struct was initialized zeros out all its members,
-    frees memory and destroys it's mutex
-*/
-void vpx_memory_tracker_destroy() {
-  if (!memory_tracker_lock_mutex()) {
-    struct mem_block *p  = memtrack.head,
-                          * p2 = memtrack.head;
-
-    memory_tracker_dump();
-
-    while (p) {
-      p2 = p;
-      p  = p->next;
-
-      MEM_TRACK_FREE(p2);
-    }
-
-    memtrack.head              = NULL;
-    memtrack.tail              = NULL;
-    memtrack.len               = 0;
-    memtrack.current_allocated = 0;
-    memtrack.max_allocated     = 0;
-
-    if (!g_logging.type && g_logging.file && g_logging.file != stderr) {
-      fclose(g_logging.file);
-      g_logging.file = NULL;
-    }
-
-    memory_tracker_unlock_mutex();
-
-    g_b_mem_tracker_inited = 0;
-  }
-}
-
-/*
-    vpx_memory_tracker_add(size_t addr, unsigned int size,
-                         char * file, unsigned int line)
-      addr - memory address to be added to list
-      size - size of addr
-      file - the file addr was referenced from
-      line - the line in file addr was referenced from
-    Adds memory address addr, it's size, file and line it came from
-    to the global list via the thread safe internal library function
-*/
-void vpx_memory_tracker_add(size_t addr, unsigned int size,
-                            char *file, unsigned int line,
-                            int padded) {
-  memory_tracker_add(addr, size, file, line, padded);
-}
-
-/*
-    vpx_memory_tracker_remove(size_t addr)
-      addr - memory address to be removed from list
-    Removes addr from the global list via the thread safe
-    internal remove function
-    Return:
-      Same as described for memory_tracker_remove
-*/
-int vpx_memory_tracker_remove(size_t addr) {
-  return memory_tracker_remove(addr);
-}
-
-/*
-    vpx_memory_tracker_find(size_t addr)
-      addr - address to be found in list
-    Return:
-        If found, pointer to the memory block that matches addr
-        NULL otherwise
-*/
-struct mem_block *vpx_memory_tracker_find(size_t addr) {
-  struct mem_block *p = NULL;
-
-  if (!memory_tracker_lock_mutex()) {
-    p = memory_tracker_find(addr);
-    memory_tracker_unlock_mutex();
-  }
-
-  return p;
-}
-
-/*
-    vpx_memory_tracker_dump()
-    Locks the memory tracker's mutex and calls the internal
-    library function to dump the current contents of the
-    global memory allocation list
-*/
-void vpx_memory_tracker_dump() {
-  if (!memory_tracker_lock_mutex()) {
-    memory_tracker_dump();
-    memory_tracker_unlock_mutex();
-  }
-}
-
-/*
-    vpx_memory_tracker_check_integrity(char* file, unsigned int line)
-      file - The file name where the check was placed
-      line - The line in file where the check was placed
-    Locks the memory tracker's mutex and calls the internal
-    integrity check function to inspect every address in the global
-    memory allocation list
-*/
-void vpx_memory_tracker_check_integrity(char *file, unsigned int line) {
-  if (!memory_tracker_lock_mutex()) {
-    memory_tracker_check_integrity(file, line);
-    memory_tracker_unlock_mutex();
-  }
-}
-
-/*
-    vpx_memory_tracker_set_log_type
-    Sets the logging type for the memory tracker. Based on the value it will
-    direct its output to the appropriate place.
-    Return:
-      0: on success
-      -1: if the logging type could not be set, because the value was invalid
-          or because a file could not be opened
-*/
-int vpx_memory_tracker_set_log_type(int type, char *option) {
-  int ret = -1;
-
-  switch (type) {
-    case 0:
-      g_logging.type = 0;
-
-      if (!option) {
-        g_logging.file = stderr;
-        ret = 0;
-      } else {
-        if ((g_logging.file = fopen((char *)option, "w")))
-          ret = 0;
-      }
-
-      break;
-#if defined(WIN32) && !defined(_WIN32_WCE)
-    case 1:
-      g_logging.type = type;
-      ret = 0;
-      break;
-#endif
-    default:
-      break;
-  }
-
-  // output the version to the new logging destination
-  if (!ret)
-    memtrack_log("Memory Tracker logging initialized, "
-                 "Memory Tracker v."vpx_mem_tracker_version"\n");
-
-  return ret;
-}
-
-/*
-    vpx_memory_tracker_set_log_func
-    Sets a logging function to be used by the memory tracker.
-    Return:
-      0: on success
-      -1: if the logging type could not be set because logfunc was NULL
-*/
-int vpx_memory_tracker_set_log_func(void *userdata,
-                                    void(*logfunc)(void *userdata,
-                                                   const char *fmt, va_list args)) {
-  int ret = -1;
-
-  if (logfunc) {
-    g_logging.type     = -1;
-    g_logging.userdata = userdata;
-    g_logging.func     = logfunc;
-    ret = 0;
-  }
-
-  // output the version to the new logging destination
-  if (!ret)
-    memtrack_log("Memory Tracker logging initialized, "
-                 "Memory Tracker v."vpx_mem_tracker_version"\n");
-
-  return ret;
-}
-
-/*
- *
- * END - Exposed library functions
- *
-*/
-
-
-/*
- *
- * Internal library functions
- *
-*/
-
-static void memtrack_log(const char *fmt, ...) {
-  va_list list;
-
-  va_start(list, fmt);
-
-  switch (g_logging.type) {
-    case -1:
-
-      if (g_logging.func)
-        g_logging.func(g_logging.userdata, fmt, list);
-
-      break;
-    case 0:
-
-      if (g_logging.file) {
-        vfprintf(g_logging.file, fmt, list);
-        fflush(g_logging.file);
-      }
-
-      break;
-#if defined(WIN32) && !defined(_WIN32_WCE)
-    case 1: {
-      char temp[1024];
-      _vsnprintf(temp, sizeof(temp) / sizeof(char) - 1, fmt, list);
-      OutputDebugString(temp);
-    }
-    break;
-#endif
-    default:
-      break;
-  }
-
-  va_end(list);
-}
-
-/*
-    memory_tracker_dump()
-    Dumps the current contents of the global memory allocation list
-*/
-static void memory_tracker_dump() {
-  int i = 0;
-  struct mem_block *p = (memtrack.head ? memtrack.head->next : NULL);
-
-  memtrack_log("\n_currently Allocated= %d; Max allocated= %d\n",
-               memtrack.current_allocated, memtrack.max_allocated);
-
-  while (p) {
-#if defined(WIN32) && !defined(_WIN32_WCE)
-
-    /*when using outputdebugstring, output filenames so they
-      can be clicked to be opened in visual studio*/
-    if (g_logging.type == 1)
-      memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file:\n"
-                   "  %s(%d):\n", i,
-                   p->addr, i, p->size,
-                   p->file, p->line);
-    else
-#endif
-      memtrack_log("memblocks[%d].addr= 0x%.8x, memblocks[%d].size= %d, file: %s, line: %d\n", i,
-                   p->addr, i, p->size,
-                   p->file, p->line);
-
-    p = p->next;
-    ++i;
-  }
-
-  memtrack_log("\n");
-}
-
-/*
-    memory_tracker_check_integrity(char* file, unsigned int file)
-      file - the file name where the check was placed
-      line - the line in file where the check was placed
-    If a padding_size was supplied to vpx_memory_tracker_init()
-    this function will check ea. addr in the list verifying that
-    addr-padding_size and addr+padding_size is filled with pad_value
-*/
-static void memory_tracker_check_integrity(char *file, unsigned int line) {
-  if (memtrack.padding_size) {
-    int i,
-        index = 0;
-    unsigned char *p_show_me,
-             * p_show_me2;
-    unsigned int tempme = memtrack.pad_value,
-                 dead1,
-                 dead2;
-    unsigned char *x_bounds;
-    struct mem_block *p = memtrack.head->next;
-
-    while (p) {
-      // x_bounds = (unsigned char*)p->addr;
-      // back up VPX_BYTE_ALIGNMENT
-      // x_bounds -= memtrack.padding_size;
-
-      if (p->padded) { // can the bounds be checked?
-        /*yes, move to the address that was actually allocated
-        by the vpx_* calls*/
-        x_bounds = (unsigned char *)(((size_t *)p->addr)[-1]);
-
-        for (i = 0; i < memtrack.padding_size; i += sizeof(unsigned int)) {
-          p_show_me = (x_bounds + i);
-          p_show_me2 = (unsigned char *)(p->addr + p->size + i);
-
-          MEM_TRACK_MEMCPY(&dead1, p_show_me, sizeof(unsigned int));
-          MEM_TRACK_MEMCPY(&dead2, p_show_me2, sizeof(unsigned int));
-
-          if ((dead1 != tempme) || (dead2 != tempme)) {
-            memtrack_log("\n[vpx_mem integrity check failed]:\n"
-                         "    index[%d,%d] {%s:%d} addr=0x%x, size=%d,"
-                         " file: %s, line: %d c0:0x%x c1:0x%x\n",
-                         index, i, file, line, p->addr, p->size, p->file,
-                         p->line, dead1, dead2);
-          }
-        }
-      }
-
-      ++index;
-      p = p->next;
-    }
-  }
-}
-
-/*
-    memory_tracker_add(size_t addr, unsigned int size,
-                     char * file, unsigned int line)
-    Adds an address (addr), it's size, file and line number to our list.
-    Adjusts the total bytes allocated and max bytes allocated if necessary.
-    If memory cannot be allocated the list will be destroyed.
-*/
-void memory_tracker_add(size_t addr, unsigned int size,
-                        char *file, unsigned int line,
-                        int padded) {
-  if (!memory_tracker_lock_mutex()) {
-    struct mem_block *p;
-
-    p = MEM_TRACK_MALLOC(sizeof(struct mem_block));
-
-    if (p) {
-      p->prev       = memtrack.tail;
-      p->prev->next = p;
-      p->addr       = addr;
-      p->size       = size;
-      p->line       = line;
-      p->file       = file;
-      p->padded     = padded;
-      p->next       = NULL;
-
-      memtrack.tail = p;
-
-      memtrack.current_allocated += size;
-
-      if (memtrack.current_allocated > memtrack.max_allocated)
-        memtrack.max_allocated = memtrack.current_allocated;
-
-      // memtrack_log("memory_tracker_add: added addr=0x%.8x\n", addr);
-
-      memory_tracker_unlock_mutex();
-    } else {
-      memtrack_log("memory_tracker_add: error allocating memory!\n");
-      memory_tracker_unlock_mutex();
-      vpx_memory_tracker_destroy();
-    }
-  }
-}
-
-/*
-    memory_tracker_remove(size_t addr)
-    Removes an address and its corresponding size (if they exist)
-    from the memory tracker list and adjusts the current number
-    of bytes allocated.
-    Return:
-      0: on success
-      -1: if the mutex could not be locked
-      -2: if the addr was not found in the list
-*/
-int memory_tracker_remove(size_t addr) {
-  int ret = -1;
-
-  if (!memory_tracker_lock_mutex()) {
-    struct mem_block *p;
-
-    if ((p = memory_tracker_find(addr))) {
-      memtrack.current_allocated -= p->size;
-
-      p->prev->next = p->next;
-
-      if (p->next)
-        p->next->prev = p->prev;
-      else
-        memtrack.tail = p->prev;
-
-      ret = 0;
-      MEM_TRACK_FREE(p);
-    } else {
-      if (addr)
-        memtrack_log("memory_tracker_remove(): addr not found in list,"
-                     " 0x%.8x\n", addr);
-
-      ret = -2;
-    }
-
-    memory_tracker_unlock_mutex();
-  }
-
-  return ret;
-}
-
-/*
-    memory_tracker_find(size_t addr)
-    Finds an address in our addrs list
-    NOTE: the mutex MUST be locked in the other internal
-          functions before calling this one. This avoids
-          the need for repeated locking and unlocking as in Remove
-    Returns: pointer to the mem block if found, NULL otherwise
-*/
-static struct mem_block *memory_tracker_find(size_t addr) {
-  struct mem_block *p = NULL;
-
-  if (memtrack.head) {
-    p = memtrack.head->next;
-
-    while (p && (p->addr != addr))
-      p = p->next;
-  }
-
-  return p;
-}
-
-
-#if !defined(NO_MUTEX)
-/*
-    memory_tracker_lock_mutex()
-    Locks the memory tracker mutex with a platform specific call
-    Returns:
-        0: Success
-       <0: Failure, either the mutex was not initialized
-           or the call to lock the mutex failed
-*/
-static int memory_tracker_lock_mutex() {
-  int ret = -1;
-
-  if (g_b_mem_tracker_inited) {
-
-#if HAVE_PTHREAD_H
-    ret = pthread_mutex_lock(&memtrack.mutex);
-#elif defined(WIN32) || defined(_WIN32_WCE)
-    ret = WaitForSingleObject(memtrack.mutex, INFINITE);
-#elif defined(VXWORKS)
-    ret = sem_take(memtrack.mutex, WAIT_FOREVER);
-#endif
-
-    if (ret) {
-      memtrack_log("memory_tracker_lock_mutex: mutex lock failed\n");
-    }
-  }
-
-  return ret;
-}
-
-/*
-    memory_tracker_unlock_mutex()
-    Unlocks the memory tracker mutex with a platform specific call
-    Returns:
-        0: Success
-       <0: Failure, either the mutex was not initialized
-           or the call to unlock the mutex failed
-*/
-static int memory_tracker_unlock_mutex() {
-  int ret = -1;
-
-  if (g_b_mem_tracker_inited) {
-
-#if HAVE_PTHREAD_H
-    ret = pthread_mutex_unlock(&memtrack.mutex);
-#elif defined(WIN32) || defined(_WIN32_WCE)
-    ret = !ReleaseMutex(memtrack.mutex);
-#elif defined(VXWORKS)
-    ret = sem_give(memtrack.mutex);
-#endif
-
-    if (ret) {
-      memtrack_log("memory_tracker_unlock_mutex: mutex unlock failed\n");
-    }
-  }
-
-  return ret;
-}
-#endif
-
-/*
-    vpx_memory_tracker_set_functions
-
-    Sets the function pointers for the standard library functions.
-
-    Return:
-      0: on success
-      -1: if the use global function pointers is not set.
-*/
-int vpx_memory_tracker_set_functions(mem_track_malloc_func g_malloc_l
-, mem_track_calloc_func g_calloc_l
-, mem_track_realloc_func g_realloc_l
-, mem_track_free_func g_free_l
-, mem_track_memcpy_func g_memcpy_l
-, mem_track_memset_func g_memset_l
-, mem_track_memmove_func g_memmove_l) {
-#if USE_GLOBAL_FUNCTION_POINTERS
-
-  if (g_malloc_l)
-    g_malloc = g_malloc_l;
-
-  if (g_calloc_l)
-    g_calloc = g_calloc_l;
-
-  if (g_realloc_l)
-    g_realloc = g_realloc_l;
-
-  if (g_free_l)
-    g_free = g_free_l;
-
-  if (g_memcpy_l)
-    g_memcpy = g_memcpy_l;
-
-  if (g_memset_l)
-    g_memset = g_memset_l;
-
-  if (g_memmove_l)
-    g_memmove = g_memmove_l;
-
-  return 0;
-#else
-  (void)g_malloc_l;
-  (void)g_calloc_l;
-  (void)g_realloc_l;
-  (void)g_free_l;
-  (void)g_memcpy_l;
-  (void)g_memset_l;
-  (void)g_memmove_l;
-  return -1;
-#endif
-}