Proof-of-concept WebrtcVideoEncoderGpu

This is the base implementation of a proof-of-concept
WebrtcVideoEncoder utilizing the Chrome Media VEA framework for video
encode acceleration.

This current implementation by itself does not
work - the main reason for this is that the VEA implementation for
Windows (the primary platform I was testing for) is tightly coupled to
a Chrome GPU process that doesn't exist in Chrome Remote Desktop's
process model. (Indeed this may be the case for other platforms, too -
I've only been working with Windows.) As a consequence, thread checks
and shared memory allocation needed to be refactored/removed. A
subsequent change refactors the Windows VEA and WebrtcVideoEncoderGpu
to bring them to a working, proof-of-concept state.

This change exists to exemplify how WebrtcVideoEncoderGpu _should_ be
implemented, in the case where a separate GPU process exists and
shared memory is used. Going forward, we may choose to incorporate
such a process into the CRD process model; thus, this change is kept
for posterity.

Cq-Include-Trybots: master.tryserver.chromium.android:android_optional_gpu_tests_rel;master.tryserver.chromium.linux:linux_optional_gpu_tests_rel;master.tryserver.chromium.mac:mac_optional_gpu_tests_rel;master.tryserver.chromium.win:win_optional_gpu_tests_rel
Change-Id: I277f3684640a2f290c51cda95c368488c6438f96
Reviewed-on: https://chromium-review.googlesource.com/583828
Commit-Queue: Gus Smith <gusss@google.com>
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Reviewed-by: John Bauman <jbauman@chromium.org>
Reviewed-by: Joe Downing <joedow@chromium.org>
Reviewed-by: Jamie Walch <jamiewalch@chromium.org>
Cr-Commit-Position: refs/heads/master@{#492172}
diff --git a/media/gpu/BUILD.gn b/media/gpu/BUILD.gn
index f2adbb9..0fcf70a 100644
--- a/media/gpu/BUILD.gn
+++ b/media/gpu/BUILD.gn
@@ -121,6 +121,7 @@
     "//content/renderer:*",
     "//media/gpu/ipc/*",
     "//media/mojo/*",
+    "//remoting/codec:encoder",
     ":*",
   ]
 
diff --git a/remoting/codec/BUILD.gn b/remoting/codec/BUILD.gn
index 1eee958..e463e2e 100644
--- a/remoting/codec/BUILD.gn
+++ b/remoting/codec/BUILD.gn
@@ -36,6 +36,19 @@
     "//ui/gfx:color_space",
   ]
 
+  # Currently, building WebrtcVideoEncoderGpu is only supported on Windows and
+  # Linux, and encoding with WebrtcVideoEncoderGpu is only supported on Windows.
+  if (is_win || is_linux) {
+    sources += [
+      "webrtc_video_encoder_gpu.cc",
+      "webrtc_video_encoder_gpu.h",
+    ]
+    deps += [
+      "//gpu/command_buffer/service",
+      "//media/gpu",
+    ]
+  }
+
   # Opus depends on //media, which is not a supported include for iOS.
   # Also disabled it on Android, to avoid dependency on //media.
   # TODO(sergeyu): Cleanup host-only deps in client targets.
diff --git a/remoting/codec/DEPS b/remoting/codec/DEPS
index 91cf8b1..19e100787 100644
--- a/remoting/codec/DEPS
+++ b/remoting/codec/DEPS
@@ -5,4 +5,7 @@
   "+google/protobuf",
   "+third_party/opus",
   "+third_party/webrtc",
+  "+gpu/command_buffer/service/gpu_preferences.h",
+  "+media/video",
+  "+media/gpu",
 ]
diff --git a/remoting/codec/webrtc_video_encoder_gpu.cc b/remoting/codec/webrtc_video_encoder_gpu.cc
new file mode 100644
index 0000000..7580c70
--- /dev/null
+++ b/remoting/codec/webrtc_video_encoder_gpu.cc
@@ -0,0 +1,219 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "remoting/codec/webrtc_video_encoder_gpu.h"
+
+#include <utility>
+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/logging.h"
+#include "base/memory/ptr_util.h"
+#include "gpu/command_buffer/service/gpu_preferences.h"
+#include "media/gpu/gpu_video_encode_accelerator_factory.h"
+#include "third_party/libyuv/include/libyuv/convert_from_argb.h"
+#include "third_party/webrtc/modules/desktop_capture/desktop_frame.h"
+#include "third_party/webrtc/modules/desktop_capture/desktop_geometry.h"
+
+namespace {
+// Currently, the frame scheduler only encodes a single frame at a time. Thus,
+// there's no reason to have this set to anything greater than one.
+const int kWebrtcVideoEncoderGpuOutputBufferCount = 1;
+
+void ArgbToI420(const webrtc::DesktopFrame& frame,
+                scoped_refptr<media::VideoFrame> video_frame) {
+  const uint8_t* rgb_data = frame.data();
+  const int rgb_stride = frame.stride();
+  const int y_stride = video_frame->stride(0);
+  DCHECK_EQ(video_frame->stride(1), video_frame->stride(2));
+  const int uv_stride = video_frame->stride(1);
+  uint8_t* y_data = video_frame->data(0);
+  uint8_t* u_data = video_frame->data(1);
+  uint8_t* v_data = video_frame->data(2);
+  libyuv::ARGBToI420(rgb_data, rgb_stride, y_data, y_stride, u_data, uv_stride,
+                     v_data, uv_stride, video_frame->visible_rect().width(),
+                     video_frame->visible_rect().height());
+}
+}  // namespace
+
+namespace remoting {
+
+WebrtcVideoEncoderGpu::WebrtcVideoEncoderGpu(
+    media::VideoCodecProfile codec_profile)
+    : state_(UNINITIALIZED),
+      codec_profile_(codec_profile),
+      weak_factory_(this) {}
+
+WebrtcVideoEncoderGpu::~WebrtcVideoEncoderGpu() {}
+
+// TODO(gusss): Implement either a software fallback or some sort of delay if
+// the hardware encoder crashes.
+// Bug: crbug.com/751870
+void WebrtcVideoEncoderGpu::Encode(std::unique_ptr<webrtc::DesktopFrame> frame,
+                                   const FrameParams& params,
+                                   WebrtcVideoEncoder::EncodeCallback done) {
+  DCHECK(frame);
+  DCHECK(done);
+  DCHECK_GT(params.duration, base::TimeDelta::FromMilliseconds(0));
+
+  if (state_ == INITIALIZATION_ERROR) {
+    DLOG(ERROR) << "Encoder failed to initialize; dropping encode request";
+    std::move(done).Run(nullptr);
+    return;
+  }
+
+  DVLOG(3) << __func__ << " bitrate = " << params.bitrate_kbps << ", "
+           << "duration = " << params.duration << ", "
+           << "key_frame = " << params.key_frame;
+
+  if (state_ == UNINITIALIZED ||
+      input_visible_size_.width() != frame->size().width() ||
+      input_visible_size_.height() != frame->size().height()) {
+    DVLOG(3) << __func__ << " Currently not initialized for frame size "
+             << frame->size().width() << "x" << frame->size().height()
+             << ". Initializing.";
+    input_visible_size_ =
+        gfx::Size(frame->size().width(), frame->size().height());
+
+    pending_encode_ = base::BindOnce(&WebrtcVideoEncoderGpu::Encode,
+                                     weak_factory_.GetWeakPtr(),
+                                     std::move(frame), params, std::move(done));
+
+    BeginInitialization();
+
+    return;
+  }
+
+  // If we get to this point and state_ != INITIALIZED, we may be attempting to
+  // have multiple outstanding encode requests, which is not currently
+  // supported. The current assumption is that the FrameScheduler will wait for
+  // an Encode to finish before attempting another.
+  DCHECK_EQ(state_, INITIALIZED);
+
+  scoped_refptr<media::VideoFrame> video_frame = media::VideoFrame::CreateFrame(
+      media::VideoPixelFormat::PIXEL_FORMAT_I420, input_coded_size_,
+      gfx::Rect(input_visible_size_), input_visible_size_, base::TimeDelta());
+
+  base::TimeDelta new_timestamp = previous_timestamp_ + params.duration;
+  video_frame->set_timestamp(new_timestamp);
+  previous_timestamp_ = new_timestamp;
+
+  ArgbToI420(*frame, video_frame);
+
+  callbacks_[video_frame->timestamp()] = std::move(done);
+
+  video_encode_accelerator_->Encode(video_frame, /*force_keyframe=*/false);
+}
+
+void WebrtcVideoEncoderGpu::RequireBitstreamBuffers(
+    unsigned int input_count,
+    const gfx::Size& input_coded_size,
+    size_t output_buffer_size) {
+  DCHECK(state_ == INITIALIZING);
+
+  DVLOG(3) << __func__ << ", "
+           << "input_count = " << input_count << ", "
+           << "input_coded_size = " << input_coded_size.width() << "x"
+           << input_coded_size.height() << ", "
+           << "output_buffer_size = " << output_buffer_size;
+
+  required_input_frame_count_ = input_count;
+  input_coded_size_ = input_coded_size;
+  output_buffer_size_ = output_buffer_size;
+
+  output_buffers_.clear();
+
+  for (unsigned int i = 0; i < kWebrtcVideoEncoderGpuOutputBufferCount; ++i) {
+    auto shm = base::MakeUnique<base::SharedMemory>();
+    // TODO(gusss): Do we need to handle mapping failure more gracefully?
+    // LOG_ASSERT will simply cause a crash.
+    LOG_ASSERT(shm->CreateAndMapAnonymous(output_buffer_size_));
+    output_buffers_.push_back(std::move(shm));
+  }
+
+  for (size_t i = 0; i < output_buffers_.size(); ++i) {
+    UseOutputBitstreamBufferId(i);
+  }
+
+  state_ = INITIALIZED;
+
+  if (pending_encode_)
+    std::move(pending_encode_).Run();
+}
+
+void WebrtcVideoEncoderGpu::BitstreamBufferReady(int32_t bitstream_buffer_id,
+                                                 size_t payload_size,
+                                                 bool key_frame,
+                                                 base::TimeDelta timestamp) {
+  DVLOG(3) << __func__ << " bitstream_buffer_id = " << bitstream_buffer_id
+           << ", "
+           << "payload_size = " << payload_size << ", "
+           << "key_frame = " << key_frame << ", "
+           << "timestamp ms = " << timestamp.InMilliseconds();
+
+  std::unique_ptr<EncodedFrame> encoded_frame =
+      base::MakeUnique<EncodedFrame>();
+  base::SharedMemory* output_buffer =
+      output_buffers_[bitstream_buffer_id].get();
+  encoded_frame->data.assign(reinterpret_cast<char*>(output_buffer->memory()),
+                             payload_size);
+  encoded_frame->key_frame = key_frame;
+  encoded_frame->size = webrtc::DesktopSize(input_coded_size_.width(),
+                                            input_coded_size_.height());
+
+  UseOutputBitstreamBufferId(bitstream_buffer_id);
+
+  auto callback_it = callbacks_.find(timestamp);
+  DCHECK(callback_it != callbacks_.end())
+      << "Callback not found for timestamp " << timestamp;
+  std::move(std::get<1>(*callback_it)).Run(std::move(encoded_frame));
+  callbacks_.erase(timestamp);
+}
+
+void WebrtcVideoEncoderGpu::NotifyError(
+    media::VideoEncodeAccelerator::Error error) {
+  LOG(ERROR) << __func__ << " error: " << error;
+}
+
+void WebrtcVideoEncoderGpu::BeginInitialization() {
+  DVLOG(3) << __func__;
+
+  media::VideoPixelFormat input_format =
+      media::VideoPixelFormat::PIXEL_FORMAT_I420;
+  // TODO(gusss): implement some logical way to set an initial bitrate.
+  uint32_t initial_bitrate = 8 * 1024 * 8;
+  gpu::GpuPreferences gpu_preferences;
+
+  video_encode_accelerator_ =
+      media::GpuVideoEncodeAcceleratorFactory::CreateVEA(
+          input_format, input_visible_size_, codec_profile_, initial_bitrate,
+          this, gpu_preferences);
+
+  if (!video_encode_accelerator_) {
+    LOG(ERROR) << "Could not create VideoEncodeAccelerator";
+    state_ = INITIALIZATION_ERROR;
+    return;
+  }
+
+  state_ = INITIALIZING;
+}
+
+void WebrtcVideoEncoderGpu::UseOutputBitstreamBufferId(
+    int32_t bitstream_buffer_id) {
+  DVLOG(3) << __func__ << " id=" << bitstream_buffer_id;
+  video_encode_accelerator_->UseOutputBitstreamBuffer(media::BitstreamBuffer(
+      bitstream_buffer_id, output_buffers_[bitstream_buffer_id]->handle(),
+      output_buffer_size_));
+}
+
+// static
+std::unique_ptr<WebrtcVideoEncoderGpu> WebrtcVideoEncoderGpu::CreateForH264() {
+  DVLOG(3) << __func__;
+
+  // TODO(gusss): what profile should be picked here? Currently, baseline was
+  // chosen arbitrarily.
+  return base::WrapUnique(new WebrtcVideoEncoderGpu(
+      media::VideoCodecProfile::H264PROFILE_BASELINE));
+}
+
+}  // namespace remoting
diff --git a/remoting/codec/webrtc_video_encoder_gpu.h b/remoting/codec/webrtc_video_encoder_gpu.h
new file mode 100644
index 0000000..82a7f19
--- /dev/null
+++ b/remoting/codec/webrtc_video_encoder_gpu.h
@@ -0,0 +1,109 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_
+#define REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_
+
+#include "media/video/video_encode_accelerator.h"
+#include "remoting/codec/webrtc_video_encoder.h"
+
+namespace base {
+class SharedMemory;
+}
+
+namespace remoting {
+
+// A WebrtcVideoEncoder implementation utilizing the VideoEncodeAccelerator
+// framework to do hardware-accelerated encoding.
+// A brief explanation of how this class is initialized:
+// 1. An instance of WebrtcVideoEncoderGpu is created using a static method, for
+//      example CreateForH264(). The state at this point is UNINITIALIZED.
+// 2. On the first encode call, the WebrtcVideoEncoder saves the incoming
+//      DesktopFrame's dimensions and thunks the encode. Before returning, it
+//      calls BeginInitialization().
+// 3. In BeginInitialization(), the WebrtcVideoEncoderGpu constructs the
+//      VideoEncodeAccelerator using the saved dimensions from the DesktopFrame.
+//      If the VideoEncodeAccelerator is constructed
+//      successfully, the state is then INITIALIZING. If not, the state is
+//      INIITALIZATION_ERROR.
+// 4. Some time later, the VideoEncodeAccelerator sets itself up and is ready
+//      to encode. At this point, it calls our WebrtcVideoEncoderGpu's
+//      RequireBitstreamBuffers() method. Once bitstream buffers are allocated,
+//      the state is INITIALIZED.
+class WebrtcVideoEncoderGpu : public WebrtcVideoEncoder,
+                              public media::VideoEncodeAccelerator::Client {
+ public:
+  static std::unique_ptr<WebrtcVideoEncoderGpu> CreateForH264();
+
+  ~WebrtcVideoEncoderGpu() override;
+
+  // WebrtcVideoEncoder interface.
+  void Encode(std::unique_ptr<webrtc::DesktopFrame> frame,
+              const FrameParams& params,
+              WebrtcVideoEncoder::EncodeCallback done) override;
+
+  // VideoEncodeAccelerator::Client interface.
+  void RequireBitstreamBuffers(unsigned int input_count,
+                               const gfx::Size& input_coded_size,
+                               size_t output_buffer_size) override;
+  void BitstreamBufferReady(int32_t bitstream_buffer_id,
+                            size_t payload_size,
+                            bool key_frame,
+                            base::TimeDelta timestamp) override;
+  void NotifyError(media::VideoEncodeAccelerator::Error error) override;
+
+ private:
+  enum State { UNINITIALIZED, INITIALIZING, INITIALIZED, INITIALIZATION_ERROR };
+
+  explicit WebrtcVideoEncoderGpu(media::VideoCodecProfile codec_profile);
+
+  void BeginInitialization();
+
+  void UseOutputBitstreamBufferId(int32_t bitstream_buffer_id);
+
+  State state_;
+
+  // Only after the first encode request do we know how large the incoming
+  // frames will be. Thus, we initialize after the first encode request,
+  // postponing the encode until the encoder has been initialized.
+  base::OnceClosure pending_encode_;
+
+  std::unique_ptr<media::VideoEncodeAccelerator> video_encode_accelerator_;
+
+  base::TimeDelta previous_timestamp_;
+
+  media::VideoCodecProfile codec_profile_;
+
+  // Shared memory with which the VEA transfers output to WebrtcVideoEncoderGpu
+  std::vector<std::unique_ptr<base::SharedMemory>> output_buffers_;
+
+  // TODO(gusss): required_input_frame_count_ is currently unused; evaluate
+  // whether or not it's actually needed. This variable represents the number of
+  // frames needed by the VEA before it can start producing output.
+  // This may be important in the future, as the current frame scheduler for CRD
+  // encodes one frame at a time - it will not send the next frame in until the
+  // previous frame has been returned. It may need to be "tricked" into sending
+  // in a number of start frames.
+  // However, initial tests showed that, even if the VEA requested a number of
+  // initial frames, it still encoded and returned the first frame before
+  // getting the second frame. This may be platform-dependent - these tests were
+  // done with the MediaFoundationVideoEncodeAccelerator for Windows.
+  unsigned int required_input_frame_count_;
+
+  gfx::Size input_coded_size_;
+  gfx::Size input_visible_size_;
+
+  size_t output_buffer_size_;
+
+  base::flat_map<base::TimeDelta, WebrtcVideoEncoder::EncodeCallback>
+      callbacks_;
+
+  base::WeakPtrFactory<WebrtcVideoEncoderGpu> weak_factory_;
+
+  DISALLOW_COPY_AND_ASSIGN(WebrtcVideoEncoderGpu);
+};
+
+}  // namespace remoting
+
+#endif  // REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_