Merge "Use the pred_mv feature for speed 0"
diff --git a/test/vp9_datarate_test.cc b/test/vp9_datarate_test.cc
index 9a11d38..02bead0 100644
--- a/test/vp9_datarate_test.cc
+++ b/test/vp9_datarate_test.cc
@@ -360,8 +360,8 @@
   cfg_.rc_end_usage = VPX_CBR;
   cfg_.g_lag_in_frames = 0;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
   const int bitrates[4] = { 150, 350, 550, 750 };
   const int bitrate_index = GET_PARAM(3);
   cfg_.rc_target_bitrate = bitrates[bitrate_index];
@@ -386,8 +386,8 @@
   cfg_.g_lag_in_frames = 0;
   cfg_.g_error_resilient = 0;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
   const int bitrates[4] = { 150, 350, 550, 750 };
   const int bitrate_index = GET_PARAM(3);
   cfg_.rc_target_bitrate = bitrates[bitrate_index];
@@ -413,8 +413,8 @@
   // Encode using multiple threads.
   cfg_.g_threads = 2;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
   cfg_.rc_target_bitrate = 200;
   ResetModel();
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
@@ -472,8 +472,8 @@
   // interval (128).
   cfg_.kf_max_dist = 9999;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
 
   const int kDropFrameThreshTestStep = 30;
   const int bitrates[2] = { 50, 150 };
@@ -522,8 +522,8 @@
 
   cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
   const int bitrates[4] = { 200, 400, 600, 800 };
   const int bitrate_index = GET_PARAM(3);
   cfg_.rc_target_bitrate = bitrates[bitrate_index];
@@ -569,8 +569,8 @@
 
   cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
   const int bitrates[4] = { 200, 400, 600, 800 };
   const int bitrate_index = GET_PARAM(3);
   cfg_.rc_target_bitrate = bitrates[bitrate_index];
@@ -625,9 +625,9 @@
 
   cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  cfg_.rc_target_bitrate = 200;
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
+  cfg_.rc_target_bitrate = 400;
   ResetModel();
   // 40-20-40 bitrate allocation for 3 temporal layers.
   cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
@@ -650,8 +650,8 @@
         << j;
     // Expect some frame drops in this test: for this 200 frames test,
     // expect at least 10% and not more than 60% drops.
-    ASSERT_GE(num_drops_, 20);
-    ASSERT_LE(num_drops_, 130);
+    ASSERT_GE(num_drops_, 40);
+    ASSERT_LE(num_drops_, 240);
   }
 }
 
@@ -683,12 +683,12 @@
   cfg_.rc_end_usage = VPX_CBR;
   cfg_.g_lag_in_frames = 0;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 300);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
 
   cfg_.rc_target_bitrate = 450;
-  cfg_.g_w = 352;
-  cfg_.g_h = 288;
+  cfg_.g_w = 640;
+  cfg_.g_h = 480;
 
   ResetModel();
 
@@ -746,13 +746,13 @@
   cfg_.rc_end_usage = VPX_CBR;
   cfg_.g_lag_in_frames = 0;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
 
   // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
   // there is only one denoiser mode: denoiserYonly(which is 1),
   // but may add more modes in the future.
-  cfg_.rc_target_bitrate = 300;
+  cfg_.rc_target_bitrate = 400;
   ResetModel();
   // Turn on the denoiser.
   denoiser_on_ = 1;
@@ -833,13 +833,13 @@
   cfg_.rc_end_usage = VPX_CBR;
   cfg_.g_lag_in_frames = 0;
 
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
+  ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
+                                       0, 400);
 
   // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
   // there is only one denoiser mode: denoiserYonly(which is 1),
   // but may add more modes in the future.
-  cfg_.rc_target_bitrate = 300;
+  cfg_.rc_target_bitrate = 400;
   ResetModel();
   // The denoiser is off by default.
   denoiser_on_ = 0;
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 3db11fc..335a0b2 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4052,13 +4052,12 @@
     vp9_svc_assert_constraints_pattern(cpi);
   }
 
-  if (!cpi->sf.re_encode_overshoot_rt &&
-      cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+  // Check if this high_source_sad (scene/slide change) frame should be
+  // encoded at high/max QP, and if so, set the q and adjust some rate
+  // control parameters.
+  if (cpi->sf.overshoot_detection_rt == 1 &&
       (cpi->rc.high_source_sad ||
        (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
-    // Check if this high_source_sad (scene/slide change) frame should be
-    // encoded at high/max QP, and if so, set the q and adjust some rate
-    // control parameters.
     if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
       vp9_set_quantizer(cm, q);
       vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -4087,10 +4086,11 @@
 
   vp9_encode_frame(cpi);
 
-  // Check if we should drop this frame because of high overshoot.
-  // Only for frames where high temporal-source SAD is detected.
+  // Check if we should re-encode this frame at high Q because of high
+  // overshoot based on the encoded frame size. Only for frames where
+  // high temporal-source SAD is detected.
   // For SVC: all spatial layers are checked for re-encoding.
-  if (cpi->sf.re_encode_overshoot_rt &&
+  if (cpi->sf.overshoot_detection_rt == 2 &&
       (cpi->rc.high_source_sad ||
        (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
     int frame_size = 0;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 1ec6965..64db18f 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2636,6 +2636,8 @@
   if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width &&
       src_height == last_src_height) {
     YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
+    int num_mi_cols = cm->mi_cols;
+    int num_mi_rows = cm->mi_rows;
     int start_frame = 0;
     int frames_to_buffer = 1;
     int frame = 0;
@@ -2646,9 +2648,13 @@
     float thresh = 8.0f;
     uint32_t thresh_key = 140000;
     if (cpi->oxcf.speed <= 5) thresh_key = 240000;
-    if (cpi->oxcf.rc_mode == VPX_VBR) {
-      min_thresh = 65000;
-      thresh = 2.1f;
+    if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000;
+    if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f;
+    if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) {
+      const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2);
+      const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2);
+      num_mi_cols = aligned_width >> MI_SIZE_LOG2;
+      num_mi_rows = aligned_height >> MI_SIZE_LOG2;
     }
     if (cpi->oxcf.lag_in_frames > 0) {
       frames_to_buffer = (cm->current_video_frame == 1)
@@ -2696,8 +2702,8 @@
         uint64_t avg_sad = 0;
         uint64_t tmp_sad = 0;
         int num_samples = 0;
-        int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
-        int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+        int sb_cols = (num_mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+        int sb_rows = (num_mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
         if (cpi->oxcf.lag_in_frames > 0) {
           src_y = frames[frame]->y_buffer;
           src_ystride = frames[frame]->y_stride;
@@ -2733,7 +2739,7 @@
           if (avg_sad >
                   VPXMAX(min_thresh,
                          (unsigned int)(rc->avg_source_sad[0] * thresh)) &&
-              rc->frames_since_key > 1 &&
+              rc->frames_since_key > 1 + cpi->svc.number_spatial_layers &&
               num_zero_temp_sad < 3 * (num_samples >> 2))
             rc->high_source_sad = 1;
           else
@@ -2814,14 +2820,16 @@
   SPEED_FEATURES *const sf = &cpi->sf;
   int thresh_qp = 7 * (rc->worst_quality >> 3);
   int thresh_rate = rc->avg_frame_bandwidth << 3;
-  // Lower rate threshold for video.
+  // Lower thresh_qp for video (more overshoot at lower Q) to be
+  // more conservative for video.
   if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
-    thresh_rate = rc->avg_frame_bandwidth << 2;
+    thresh_qp = rc->worst_quality >> 1;
   // If this decision is not based on an encoded frame size but just on
-  // scene/slide change detection (i.e., re_encode_overshoot_rt = 0), adjust the
-  // qp_thresh and skip the (frame_size > thresh_rate) condition in this case.
-  if (!sf->re_encode_overshoot_rt) thresh_qp = 3 * (rc->worst_quality >> 2);
-  if ((!sf->re_encode_overshoot_rt || frame_size > thresh_rate) &&
+  // scene/slide change detection (i.e., re_encode_overshoot_rt = 1),
+  // for now skip the (frame_size > thresh_rate) condition in this case.
+  // TODO(marpan): Use a better size/rate condition for this case and
+  // adjust thresholds.
+  if ((sf->overshoot_detection_rt == 1 || frame_size > thresh_rate) &&
       cm->base_qindex < thresh_qp) {
     double rate_correction_factor =
         cpi->rc.rate_correction_factors[INTER_NORMAL];
@@ -2838,7 +2846,7 @@
     // and the encoded frame used alot of Intra modes, then force hybrid_intra
     // encoding for the re-encode on this scene change. hybrid_intra will
     // use rd-based intra mode selection for small blocks.
-    if (sf->re_encode_overshoot_rt && frame_size > (thresh_rate << 1) &&
+    if (sf->overshoot_detection_rt == 2 && frame_size > (thresh_rate << 1) &&
         cpi->svc.spatial_layer_id == 0) {
       MODE_INFO **mi = cm->mi_grid_visible;
       int sum_intra_usage = 0;
@@ -2892,8 +2900,8 @@
         LAYER_CONTEXT *lc = &svc->layer_context[layer];
         RATE_CONTROL *lrc = &lc->rc;
         lrc->avg_frame_qindex[INTER_FRAME] = *q;
-        lrc->buffer_level = rc->optimal_buffer_level;
-        lrc->bits_off_target = rc->optimal_buffer_level;
+        lrc->buffer_level = lrc->optimal_buffer_level;
+        lrc->bits_off_target = lrc->optimal_buffer_level;
         lrc->rc_1_frame = 0;
         lrc->rc_2_frame = 0;
         lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 859e626..107da21 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -405,7 +405,7 @@
   sf->use_compound_nonrd_pickmode = 0;
   sf->nonrd_keyframe = 0;
   sf->svc_use_lowres_part = 0;
-  sf->re_encode_overshoot_rt = 0;
+  sf->overshoot_detection_rt = 0;
   sf->disable_16x16part_nonkey = 0;
   sf->disable_golden_ref = 0;
   sf->enable_tpl_model = 0;
@@ -570,11 +570,9 @@
     // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
     // increase in encoding time.
     if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
-    if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
-        cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
-        (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
-      sf->re_encode_overshoot_rt = 1;
-    }
+    if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+        cpi->oxcf.rc_mode == VPX_CBR)
+      sf->overshoot_detection_rt = 1;
     if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
         cm->width <= 1280 && cm->height <= 720) {
       sf->use_altref_onepass = 1;
@@ -583,7 +581,6 @@
   }
 
   if (speed >= 6) {
-    sf->re_encode_overshoot_rt = 0;
     if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) {
       sf->use_altref_onepass = 1;
       sf->use_compound_nonrd_pickmode = 1;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 89fc82e..7430f0a 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -542,9 +542,17 @@
   // For SVC: enables use of partition from lower spatial resolution.
   int svc_use_lowres_part;
 
-  // Enable re-encoding on scene change with potential high overshoot,
-  // for real-time encoding flow.
-  int re_encode_overshoot_rt;
+  // Flag to indicate process for handling overshoot on slide/scene change,
+  // for real-time CBR mode.
+  // 0: no reaction to rate control on a detected slide/scene change
+  // (prior to encoding the frame).
+  // 1: set to larger Q based only on the detected slide/scene change
+  // and current/past Q. No second pass encoding, so faster than option 2.
+  // 2: based on (first pass) encoded frame, if large frame size is detected
+  // then set to higher Q for second encode. This involves 2 pass encoding
+  // on slide change, so slower than 1, but more accurate for detecting
+  // overshoot.
+  int overshoot_detection_rt;
 
   // Disable partitioning of 16x16 blocks.
   int disable_16x16part_nonkey;