Merge "vp9_rdopt: fix integer sanitizer warnings"
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 9125a01..94cb325 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -275,7 +275,8 @@
   }
 
   void CheckSADs() const {
-    uint32_t reference_sad, exp_sad[8];
+    uint32_t reference_sad;
+    DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[8]);
 
     SADs(exp_sad);
     for (int offset = 0; offset < 8; ++offset) {
@@ -299,7 +300,8 @@
   }
 
   void CheckSADs() const {
-    uint32_t reference_sad, exp_sad[4];
+    uint32_t reference_sad;
+    DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]);
 
     SADs(exp_sad);
     for (int block = 0; block < 4; ++block) {
@@ -534,7 +536,8 @@
   FillRandom(GetReference(2), reference_stride_);
   FillRandom(GetReference(3), reference_stride_);
   const int kCountSpeedTestBlock = 500000000 / (params_.width * params_.height);
-  uint32_t reference_sad[4], exp_sad[4];
+  uint32_t reference_sad[4];
+  DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]);
   vpx_usec_timer timer;
 
   memset(reference_sad, 0, sizeof(reference_sad));
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index cce6b6f..d094904 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -77,7 +77,12 @@
         coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),
         qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),
         dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {
+    // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.
+#if HAVE_NEON
+    max_value_ = (1 << (7 + bit_depth_)) - 1;
+#else
     max_value_ = (1 << bit_depth_) - 1;
+#endif
     zbin_ptr_ =
         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
     round_fp_ptr_ = reinterpret_cast<int16_t *>(
diff --git a/tools/3D-Reconstruction/MotionEST/Exhaust.py b/tools/3D-Reconstruction/MotionEST/Exhaust.py
index 3c03468..83ca157 100644
--- a/tools/3D-Reconstruction/MotionEST/Exhaust.py
+++ b/tools/3D-Reconstruction/MotionEST/Exhaust.py
@@ -30,7 +30,7 @@
     """
 
   def search(self, cur_r, cur_c):
-    min_loss = self.dist(cur_r, cur_c, [0, 0], self.metric)
+    min_loss = self.block_dist(cur_r, cur_c, [0, 0], self.metric)
     cur_x = cur_c * self.blk_sz
     cur_y = cur_r * self.blk_sz
     ref_x = cur_x
@@ -39,14 +39,15 @@
     for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz):
       for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz):
         if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz:
-          loss = self.dist(cur_r, cur_c, [y - cur_y, x - cur_x], self.metric)
+          loss = self.block_dist(cur_r, cur_c, [y - cur_y, x - cur_x],
+                                 self.metric)
           if loss < min_loss:
             min_loss = loss
             ref_x = x
             ref_y = y
     return ref_x, ref_y
 
-  def est(self):
+  def motion_field_estimation(self):
     for i in xrange(self.num_row):
       for j in xrange(self.num_col):
         ref_x, ref_y = self.search(i, j)
@@ -101,7 +102,7 @@
     """
 
   def search(self, cur_r, cur_c):
-    dist_loss = self.dist(cur_r, cur_c, [0, 0], self.metric)
+    dist_loss = self.block_dist(cur_r, cur_c, [0, 0], self.metric)
     nb_loss = self.neighborLoss(cur_r, cur_c, np.array([0, 0]))
     min_loss = dist_loss + self.beta * nb_loss
     cur_x = cur_c * self.blk_sz
@@ -113,8 +114,8 @@
     for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz):
       for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz):
         if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz:
-          dist_loss = self.dist(cur_r, cur_c, [y - cur_y, x - cur_x],
-                                self.metric)
+          dist_loss = self.block_dist(cur_r, cur_c, [y - cur_y, x - cur_x],
+                                      self.metric)
           nb_loss = self.neighborLoss(cur_r, cur_c, [y - cur_y, x - cur_x])
           loss = dist_loss + self.beta * nb_loss
           if loss < min_loss:
@@ -123,7 +124,7 @@
             ref_y = y
     return ref_x, ref_y
 
-  def est(self):
+  def motion_field_estimation(self):
     for i in xrange(self.num_row):
       for j in xrange(self.num_col):
         ref_x, ref_y = self.search(i, j)
diff --git a/tools/3D-Reconstruction/MotionEST/HornSchunck.py b/tools/3D-Reconstruction/MotionEST/HornSchunck.py
index 0bf431c..38fcae1 100644
--- a/tools/3D-Reconstruction/MotionEST/HornSchunck.py
+++ b/tools/3D-Reconstruction/MotionEST/HornSchunck.py
@@ -120,7 +120,7 @@
             avg[i, j] += self.mf[i + r, j + c] / 12.0
     return avg
 
-  def est(self):
+  def motion_field_estimation(self):
     count = 0
     """
         u_{n+1} = ~u_n - Ix(Ix.~u_n+Iy.~v+It)/(IxIx+IyIy+alpha^2)
@@ -136,7 +136,7 @@
       count += 1
     self.mf *= self.blk_sz
 
-  def est_mat(self):
+  def motion_field_estimation_mat(self):
     row_idx = []
     col_idx = []
     data = []
@@ -145,8 +145,7 @@
     b = np.zeros((N, 1))
     for i in xrange(self.num_row):
       for j in xrange(self.num_col):
-        """(IxIx+alpha^2)u+IxIy.v-alpha^2~u IxIy.u+(IyIy+alpha^2)v-alpha^2~v
-        """
+        """(IxIx+alpha^2)u+IxIy.v-alpha^2~u IxIy.u+(IyIy+alpha^2)v-alpha^2~v"""
         u_idx = i * 2 * self.num_col + 2 * j
         v_idx = u_idx + 1
         b[u_idx, 0] = -self.Ix[i, j] * self.It[i, j]
diff --git a/tools/3D-Reconstruction/MotionEST/Util.py b/tools/3D-Reconstruction/MotionEST/Util.py
index f1a0cd4..d52e8a5 100644
--- a/tools/3D-Reconstruction/MotionEST/Util.py
+++ b/tools/3D-Reconstruction/MotionEST/Util.py
@@ -32,6 +32,8 @@
   for i in xrange(num_row):
     for j in xrange(num_col):
       center = (j * blk_sz + 0.5 * blk_sz, i * blk_sz + 0.5 * blk_sz)
+      """mf[i,j][0] is the row shift and mf[i,j][1] is the column shift In PIL coordinates, head[0] is x (column shift) and head[1] is y (row shift).
+      """
       head = (center[0] + mf[i, j][1], center[1] + mf[i, j][0])
       draw.line([center, head], fill=(255, 0, 0, 255))
   return Image.alpha_composite(img_rgba, mf_layer)
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 8ac0a2c..8cc61bd 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -92,7 +92,7 @@
     }
 
     validate_buffer(bc->buffer + bc->pos, 1, bc->buffer_end, bc->error);
-    bc->buffer[bc->pos++] = (lowvalue >> (24 - offset));
+    bc->buffer[bc->pos++] = (lowvalue >> (24 - offset) & 0xff);
 
     lowvalue <<= offset;
     shift = count;
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 8b62b45..d75a481 100644
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -26,6 +26,22 @@
 #include "vpx_dsp/arm/mem_neon.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 
+static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff,
+                                               const int16x8_t dequant,
+                                               tran_low_t *dqcoeff) {
+  const int32x4_t dqcoeff_0 =
+      vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant));
+  const int32x4_t dqcoeff_1 =
+      vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  vst1q_s32(dqcoeff, dqcoeff_0);
+  vst1q_s32(dqcoeff + 4, dqcoeff_1);
+#else
+  vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}
+
 void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
                           int skip_block, const int16_t *round_ptr,
                           const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
@@ -55,7 +71,8 @@
     const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
@@ -67,10 +84,9 @@
     const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
     const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
     const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+    calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr);
     v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
     store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff);
-    store_s16q_to_tran_low(dqcoeff_ptr, v_dqcoeff);
     v_round = vmovq_n_s16(round_ptr[1]);
     v_quant = vmovq_n_s16(quant_ptr[1]);
     v_dequant = vmovq_n_s16(dequant_ptr[1]);
@@ -80,7 +96,8 @@
     const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);
     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+    const int16x8_t v_abs = vabsq_s16(v_coeff);
+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);
     const int32x4_t v_tmp_lo =
         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));
     const int32x4_t v_tmp_hi =
@@ -92,10 +109,9 @@
     const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
     const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
     const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
-    const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+    calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i);
     v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
     store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
-    store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
   }
 #ifdef __aarch64__
   *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
@@ -146,9 +162,8 @@
   const int16x8_t dequant_mask =
       vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
 
-  int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+  int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
   int32x4_t dqcoeff_0, dqcoeff_1;
-  int16x8_t dqcoeff;
   uint16x8_t eob_max;
   (void)scan;
   (void)count;
@@ -170,13 +185,17 @@
   // Add 1 if negative to round towards zero because the C uses division.
   dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
   dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
-
-  dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+  vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+  vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+  store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1),
+                                                   vshrn_n_s32(dqcoeff_1, 1)));
+#endif
 
   eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
 
   store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
-  store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
 
   iscan += 8;
   coeff_ptr += 8;
@@ -200,9 +219,8 @@
       const int16x8_t dequant_mask =
           vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));
 
-      int16x8_t qcoeff = vaddq_s16(coeff_abs, round);
+      int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);
       int32x4_t dqcoeff_0, dqcoeff_1;
-      int16x8_t dqcoeff;
 
       qcoeff = vqdmulhq_s16(qcoeff, quant);
       qcoeff = veorq_s16(qcoeff, coeff_sign);
@@ -215,14 +233,19 @@
       dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0));
       dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1));
 
-      dqcoeff =
-          vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
+#if CONFIG_VP9_HIGHBITDEPTH
+      vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1));
+      vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1));
+#else
+      store_s16q_to_tran_low(
+          dqcoeff_ptr,
+          vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)));
+#endif
 
       eob_max =
           vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
 
       store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
-      store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
 
       iscan += 8;
       coeff_ptr += 8;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 08d6d4d..868cd43 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5937,9 +5937,8 @@
   (void)sadpb;
   nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row,
                                            mi_col, rf_idx, bsize, nb_full_mvs);
-  vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
-                             &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
-                             mv);
+  vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param,
+                             lambda, 1, nb_full_mvs, nb_full_mv_num, mv);
 #else
   (void)frame_idx;
   (void)mi_row;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 0e44bb4..b6e3090 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -2437,16 +2437,24 @@
   return best_sad;
 }
 
+static int get_exhaustive_threshold(int exhaustive_searches_thresh,
+                                    BLOCK_SIZE bsize) {
+  return exhaustive_searches_thresh >>
+         (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+}
+
 #if CONFIG_NON_GREEDY_MV
 // Runs sequence of diamond searches in smaller steps for RD.
 /* do_refine: If last step (1-away) of n-step search doesn't pick the center
               point as the best match, we will do a final 1-away diamond
               refining search  */
-int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full,
-                               int step_param, int lambda, int do_refine,
-                               const vp9_variance_fn_ptr_t *fn_ptr,
+int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+                               BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                               int lambda, int do_refine,
                                const int_mv *nb_full_mvs, int full_mv_num,
                                MV *best_mv) {
+  const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+  const SPEED_FEATURES *const sf = &cpi->sf;
   int n, num00 = 0;
   int thissme;
   int bestsme;
@@ -2495,9 +2503,16 @@
     }
   }
 
-  full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
-                            nb_full_mvs, full_mv_num);
-  bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
+  if (sf->exhaustive_searches_thresh < INT_MAX &&
+      !cpi->rc.is_src_frame_alt_ref) {
+    const int64_t exhaustive_thr =
+        get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+    if (bestsme > exhaustive_thr) {
+      full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+                                nb_full_mvs, full_mv_num);
+      bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
+    }
+  }
   return bestsme;
 }
 #endif  // CONFIG_NON_GREEDY_MV
@@ -2886,9 +2901,10 @@
     if (sf->exhaustive_searches_thresh < INT_MAX &&
         !cpi->rc.is_src_frame_alt_ref) {
       const int64_t exhaustive_thr =
-          sf->exhaustive_searches_thresh >>
-          (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
-      if (var > exhaustive_thr) run_exhaustive_search = 1;
+          get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
+      if (var > exhaustive_thr) {
+        run_exhaustive_search = 1;
+      }
     }
   } else if (method == MESH) {
     run_exhaustive_search = 1;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 424ca62..6f46041 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -136,9 +136,8 @@
                                     const int_mv *nb_full_mvs, int full_mv_num);
 
 int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
-                               MV *mvp_full, int step_param, int lambda,
-                               int do_refine,
-                               const vp9_variance_fn_ptr_t *fn_ptr,
+                               BLOCK_SIZE bsize, MV *mvp_full, int step_param,
+                               int lambda, int do_refine,
                                const int_mv *nb_full_mvs, int full_mv_num,
                                MV *best_mv);
 
diff --git a/vp9/encoder/vp9_non_greedy_mv.c b/vp9/encoder/vp9_non_greedy_mv.c
index f54e40c..d83aeca 100644
--- a/vp9/encoder/vp9_non_greedy_mv.c
+++ b/vp9/encoder/vp9_non_greedy_mv.c
@@ -180,12 +180,12 @@
 
 int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs,
                                  int mv_num) {
-  // The bahavior of this function is to compute log2 of mv difference,
+  // The behavior of this function is to compute log2 of mv difference,
   // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff)
-  // against available neghbor mvs.
-  // Since the log2 is monotonic increasing, we can compute
+  // against available neighbor mvs.
+  // Since the log2 is monotonically increasing, we can compute
   // min row_diff * row_diff + col_diff * col_diff first
-  // then apply log2 in the end
+  // then apply log2 in the end.
   int i;
   int64_t min_abs_diff = INT64_MAX;
   int cnt = 0;
@@ -201,7 +201,6 @@
   }
   if (cnt) {
     return log2_approximation(1 + min_abs_diff);
-  } else {
-    return 0;
   }
+  return 0;
 }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 0e74de4..57edc72 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2579,9 +2579,9 @@
   mvp_full.row >>= 3;
 
 #if CONFIG_NON_GREEDY_MV
-  bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1,
-                                       &cpi->fn_ptr[bsize], nb_full_mvs,
-                                       nb_full_mv_num, &tmp_mv->as_mv);
+  bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param,
+                                       lambda, 1, nb_full_mvs, nb_full_mv_num,
+                                       &tmp_mv->as_mv);
 #else   // CONFIG_NON_GREEDY_MV
   bestsme = vp9_full_pixel_search(
       cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
@@ -2617,9 +2617,9 @@
       mvp_full.row >>= 3;
 #if CONFIG_NON_GREEDY_MV
       this_me = vp9_full_pixel_diamond_new(
-          cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
-          lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num,
-          &this_mv);
+          cpi, x, bsize, &mvp_full,
+          VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs,
+          nb_full_mv_num, &this_mv);
 #else   // CONFIG_NON_GREEDY_MV
       this_me = vp9_full_pixel_search(
           cpi, x, bsize, &mvp_full,
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 8ba113b..bfe803b2 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -57,8 +57,8 @@
   svc->simulcast_mode = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) {
-    svc->fb_idx_spatial_layer_id[i] = -1;
-    svc->fb_idx_temporal_layer_id[i] = -1;
+    svc->fb_idx_spatial_layer_id[i] = 0xff;
+    svc->fb_idx_temporal_layer_id[i] = 0xff;
     svc->fb_idx_base[i] = 0;
   }
   for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
diff --git a/vpx_dsp/bitwriter.h b/vpx_dsp/bitwriter.h
index f276fee..04084af 100644
--- a/vpx_dsp/bitwriter.h
+++ b/vpx_dsp/bitwriter.h
@@ -85,7 +85,7 @@
       br->buffer[x] += 1;
     }
 
-    br->buffer[br->pos++] = (lowvalue >> (24 - offset));
+    br->buffer[br->pos++] = (lowvalue >> (24 - offset)) & 0xff;
     lowvalue <<= offset;
     shift = count;
     lowvalue &= 0xffffff;