vp9: Fix for non-rd pickmode for high-bitdepth build.

Use the simple block_yrd under certain conditions.
The optimization code is completed but the speed is still slower
(~6% on 720p) than the low-bitdepth build.

For now, use the more complex block_yrd under certain conditions
(always use it for speed <= 5, otherwise use it on key frames and for
bsize >= 32x32).

This gives about ~2-3% gain in quality for speed 7 on RTC set
(over high bitdepth build), with about the same encoder fps as the
low bitdepth build.

Change-Id: Ibe92a1945d0bd635f880befb4c815727df62d754
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 9f2e93a..00a552d 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -636,17 +636,19 @@
 #if CONFIG_VP9_HIGHBITDEPTH
   // TODO(jingning): Implement the high bit-depth Hadamard transforms and
   // remove this check condition.
-  // TODO(marpan): Disable this for 8 bit once optimizations for the functions
-  // below are merged in.
-  // if (xd->bd != 8) {
-  unsigned int var_y, sse_y;
-  (void)tx_size;
-  model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y,
-                    &sse_y);
-  *sse = INT_MAX;
-  *skippable = 0;
-  return;
-// }
+  // TODO(marpan): Use this path (model_rd) for 8bit under certain conditions
+  // for now, as the vp9_quantize_fp below for highbitdepth build is slow.
+  if (xd->bd != 8 ||
+      (cpi->oxcf.speed > 5 && cpi->common.frame_type != KEY_FRAME &&
+       bsize < BLOCK_32X32)) {
+    unsigned int var_y, sse_y;
+    (void)tx_size;
+    model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist,
+                      &var_y, &sse_y);
+    *sse = INT_MAX;
+    *skippable = 0;
+    return;
+  }
 #endif
 
   (void)cpi;