VaVDA: split kReduced and kSuperReduced modes

H264 GetNumReferenceFrames() was increased by 1 [1] for all platforms to
circumvent a limitation during introduction of the kReduced mode; this
CL:
- Renames VaVDA::BufferAllocationMode::kReduced to kSuperReduced.
- Takes that +1 out of h264_decoder
- Introduces kReduced to represent this H264 case where we +1 the
  amount of media pipeline frames allocated.

Note that this CL doesn't change ToT: there's an extra allocation
in the # codec reference frames _and_ in the amount of pipeline
buffers.

Test results ToT: https://pastebin.com/YG1Tcgya
With this change: https://pastebin.com/JgER07id and
https://paste.googleplex.com/5473272024006656 (2nd round) - similar.

This way, we can have all buffer amount calculations in one place,
specially with a view to introducing kReduced for platforms < SkyLake.

[1] https://cs.chromium.org/chromium/src/media/gpu/h264_decoder.cc?dr=C&sq=package:chromium&g=0&l=1428

TBR=hiroh@chromium.org (bc of
  http://crrev.com/c/1455302#message-3083834c541894c42a630b7fd73b6cd5095ce67e)

Test: tast run DEVICE_IP video.PlaybackPerfH2642160P60FPS on a SkyLake device (cave)
Bug: 912295, 920510
Change-Id: Ice286b1bd4db21f353d86cb9c65467e1322963be
Reviewed-on: https://chromium-review.googlesource.com/c/1455302
Reviewed-by: Miguel Casas <mcasas@chromium.org>
Commit-Queue: Miguel Casas <mcasas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#629774}
diff --git a/media/gpu/h264_decoder.cc b/media/gpu/h264_decoder.cc
index 0fa01797..73dcb62 100644
--- a/media/gpu/h264_decoder.cc
+++ b/media/gpu/h264_decoder.cc
@@ -1425,10 +1425,7 @@
 size_t H264Decoder::GetNumReferenceFrames() const {
   // Use the maximum number of pictures in the Decoded Picture Buffer plus one
   // for the one being currently egressed.
-  // Another +1 is experimentally needed for high-to-high resolution changes.
-  // TODO(mcasas): Figure out why +2 instead of +1, see crbug.com/909926 and
-  // http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
-  return dpb_.max_num_pics() + 2;
+  return dpb_.max_num_pics() + 1;
 }
 
 // static
diff --git a/media/gpu/vaapi/vaapi_video_decode_accelerator.cc b/media/gpu/vaapi/vaapi_video_decode_accelerator.cc
index b58943f..0a62f89 100644
--- a/media/gpu/vaapi/vaapi_video_decode_accelerator.cc
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator.cc
@@ -536,13 +536,19 @@
 
   // If we are in BufferAllocationMode::kNone, split the requested |num_pics|
   // between VA reference frames and client PictureBuffers proper.
-  if (buffer_allocation_mode_ == BufferAllocationMode::kReduced)
+  if (IsBufferAllocationModeReducedOrSuperReduced())
     requested_num_reference_frames_ = num_reference_frames;
   else
     requested_num_reference_frames_ = 0;
 
   requested_num_pics_ = num_pics - requested_num_reference_frames_;
 
+  // Add an extra allocation for |client_| and |decoder_| in the kReduced case.
+  if (buffer_allocation_mode_ == BufferAllocationMode::kReduced) {
+    requested_num_reference_frames_++;
+    requested_num_pics_++;
+  }
+
   VLOGF(2) << " |requested_num_pics_| = " << requested_num_pics_
            << "; |requested_num_reference_frames_| = "
            << requested_num_reference_frames_;
@@ -558,7 +564,7 @@
 
   base::AutoLock auto_lock(lock_);
   const size_t expected_max_available_va_surfaces =
-      buffer_allocation_mode_ == BufferAllocationMode::kReduced
+      IsBufferAllocationModeReducedOrSuperReduced()
           ? previously_requested_num_reference_frames_
           : pictures_.size();
 
@@ -703,7 +709,7 @@
     DCHECK_EQ(va_surface_ids.size(), buffers.size());
   } else {
     const size_t requested_num_surfaces =
-        buffer_allocation_mode_ == BufferAllocationMode::kReduced
+        IsBufferAllocationModeReducedOrSuperReduced()
             ? requested_num_reference_frames_
             : pictures_.size();
     CHECK_NE(requested_num_surfaces, 0u);
@@ -1027,7 +1033,7 @@
     available_va_surfaces_.pop_front();
 
     TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
-                      (buffer_allocation_mode_ == BufferAllocationMode::kReduced
+                      (IsBufferAllocationModeReducedOrSuperReduced()
                            ? requested_num_reference_frames_
                            : pictures_.size()) -
                           available_va_surfaces_.size(),
@@ -1066,13 +1072,12 @@
     base::AutoLock auto_lock(lock_);
     available_va_surfaces_.push_back(va_surface_id);
     if (buffer_allocation_mode_ != BufferAllocationMode::kNone) {
-      TRACE_COUNTER_ID2(
-          "media,gpu", "Vaapi VASurfaceIDs", this, "used",
-          (buffer_allocation_mode_ == BufferAllocationMode::kReduced
-               ? requested_num_reference_frames_
-               : pictures_.size()) -
-              available_va_surfaces_.size(),
-          "available", available_va_surfaces_.size());
+      TRACE_COUNTER_ID2("media,gpu", "Vaapi VASurfaceIDs", this, "used",
+                        (IsBufferAllocationModeReducedOrSuperReduced()
+                             ? requested_num_reference_frames_
+                             : pictures_.size()) -
+                            available_va_surfaces_.size(),
+                        "available", available_va_surfaces_.size());
     }
     surfaces_available_.Signal();
   }
@@ -1087,7 +1092,7 @@
 }
 
 VaapiVideoDecodeAccelerator::BufferAllocationMode
-VaapiVideoDecodeAccelerator::DecideBufferAllocationMode() {
+VaapiVideoDecodeAccelerator::DecideBufferAllocationMode() const {
   // TODO(crbug.com/912295): Enable a better BufferAllocationMode for IMPORT
   // |output_mode_| as well.
   if (output_mode_ == VideoDecodeAccelerator::Config::OutputMode::IMPORT)
@@ -1110,12 +1115,25 @@
   // request the |client_| to allocate less than the usual |decoder_|s
   // GetRequiredNumOfPictures().
   // TODO(crbug.com/912295): enable for previous architectures.
-  if (IsSkyLakeOrLater())
-    return BufferAllocationMode::kReduced;
+  if (IsSkyLakeOrLater()) {
+    // Another +1 is experimentally needed for high-to-high resolution changes.
+    // TODO(mcasas): Figure out why and why only H264, see crbug.com/912295 and
+    // http://crrev.com/c/1363807/9/media/gpu/h264_decoder.cc#1449.
+    if (profile_ >= H264PROFILE_MIN && profile_ <= H264PROFILE_MAX)
+      return BufferAllocationMode::kReduced;
+
+    return BufferAllocationMode::kSuperReduced;
+  }
 
   return BufferAllocationMode::kNormal;
 }
 
+bool VaapiVideoDecodeAccelerator::IsBufferAllocationModeReducedOrSuperReduced()
+    const {
+  return buffer_allocation_mode_ == BufferAllocationMode::kSuperReduced ||
+         buffer_allocation_mode_ == BufferAllocationMode::kReduced;
+}
+
 bool VaapiVideoDecodeAccelerator::OnMemoryDump(
     const base::trace_event::MemoryDumpArgs& args,
     base::trace_event::ProcessMemoryDump* pmd) {
@@ -1142,7 +1160,7 @@
   // calculated size is an estimation since we don't know the internal VA
   // strides, texture compression, headers, etc, but is a good lower boundary.
   const size_t requested_num_surfaces =
-      buffer_allocation_mode_ == BufferAllocationMode::kReduced
+      IsBufferAllocationModeReducedOrSuperReduced()
           ? requested_num_reference_frames_
           : pictures_.size();
   dump->AddScalar(MemoryAllocatorDump::kNameSize,
diff --git a/media/gpu/vaapi/vaapi_video_decode_accelerator.h b/media/gpu/vaapi/vaapi_video_decode_accelerator.h
index 22d0c29..adb79cc 100644
--- a/media/gpu/vaapi/vaapi_video_decode_accelerator.h
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator.h
@@ -194,6 +194,9 @@
     kNone,
     // Using a reduced amount of |client_|s provided PictureBuffers and
     // |decoder_|s GetNumReferenceFrames() internallly.
+    kSuperReduced,
+    // Similar to kSuperReduced, but we have to increase slightly the amount of
+    // PictureBuffers allocated for the |client_|.
     kReduced,
     // Using |client_|s provided PictureBuffers and as many internally
     // allocated.
@@ -202,7 +205,8 @@
 
   // Decides the concrete buffer allocation mode, depending on the hardware
   // platform and other parameters.
-  BufferAllocationMode DecideBufferAllocationMode();
+  BufferAllocationMode DecideBufferAllocationMode() const;
+  bool IsBufferAllocationModeReducedOrSuperReduced() const;
 
   // VAVDA state.
   enum State {
diff --git a/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc b/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc
index 9091772..c32265ef 100644
--- a/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc
+++ b/media/gpu/vaapi/vaapi_video_decode_accelerator_unittest.cc
@@ -171,7 +171,7 @@
     vda_.buffer_allocation_mode_ =
         GetParam().decode_using_client_picture_buffers
             ? VaapiVideoDecodeAccelerator::BufferAllocationMode::kNone
-            : VaapiVideoDecodeAccelerator::BufferAllocationMode::kReduced;
+            : VaapiVideoDecodeAccelerator::BufferAllocationMode::kSuperReduced;
 
     vda_.state_ = VaapiVideoDecodeAccelerator::kIdle;
   }
@@ -238,7 +238,7 @@
 
     const size_t expected_num_picture_buffers_requested =
         vda_.buffer_allocation_mode_ ==
-                VaapiVideoDecodeAccelerator::BufferAllocationMode::kReduced
+                VaapiVideoDecodeAccelerator::BufferAllocationMode::kSuperReduced
             ? num_pictures - kNumReferenceFrames
             : num_pictures;