Proof-of-concept WebrtcVideoEncoderGpu
This is the base implementation of a proof-of-concept
WebrtcVideoEncoder utilizing the Chrome Media VEA framework for video
encode acceleration.
This current implementation by itself does not
work - the main reason for this is that the VEA implementation for
Windows (the primary platform I was testing for) is tightly coupled to
a Chrome GPU process that doesn't exist in Chrome Remote Desktop's
process model. (Indeed this may be the case for other platforms, too -
I've only been working with Windows.) As a consequence, thread checks
and shared memory allocation needed to be refactored/removed. A
subsequent change refactors the Windows VEA and WebrtcVideoEncoderGpu
to bring them to a working, proof-of-concept state.
This change exists to exemplify how WebrtcVideoEncoderGpu _should_ be
implemented, in the case where a separate GPU process exists and
shared memory is used. Going forward, we may choose to incorporate
such a process into the CRD process model; thus, this change is kept
for posterity.
Cq-Include-Trybots: master.tryserver.chromium.android:android_optional_gpu_tests_rel;master.tryserver.chromium.linux:linux_optional_gpu_tests_rel;master.tryserver.chromium.mac:mac_optional_gpu_tests_rel;master.tryserver.chromium.win:win_optional_gpu_tests_rel
Change-Id: I277f3684640a2f290c51cda95c368488c6438f96
Reviewed-on: https://chromium-review.googlesource.com/583828
Commit-Queue: Gus Smith <gusss@google.com>
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Reviewed-by: John Bauman <jbauman@chromium.org>
Reviewed-by: Joe Downing <joedow@chromium.org>
Reviewed-by: Jamie Walch <jamiewalch@chromium.org>
Cr-Commit-Position: refs/heads/master@{#492172}
diff --git a/media/gpu/BUILD.gn b/media/gpu/BUILD.gn
index f2adbb9..0fcf70a 100644
--- a/media/gpu/BUILD.gn
+++ b/media/gpu/BUILD.gn
@@ -121,6 +121,7 @@
"//content/renderer:*",
"//media/gpu/ipc/*",
"//media/mojo/*",
+ "//remoting/codec:encoder",
":*",
]
diff --git a/remoting/codec/BUILD.gn b/remoting/codec/BUILD.gn
index 1eee958..e463e2e 100644
--- a/remoting/codec/BUILD.gn
+++ b/remoting/codec/BUILD.gn
@@ -36,6 +36,19 @@
"//ui/gfx:color_space",
]
+ # Currently, building WebrtcVideoEncoderGpu is only supported on Windows and
+ # Linux, and encoding with WebrtcVideoEncoderGpu is only supported on Windows.
+ if (is_win || is_linux) {
+ sources += [
+ "webrtc_video_encoder_gpu.cc",
+ "webrtc_video_encoder_gpu.h",
+ ]
+ deps += [
+ "//gpu/command_buffer/service",
+ "//media/gpu",
+ ]
+ }
+
# Opus depends on //media, which is not a supported include for iOS.
# Also disabled it on Android, to avoid dependency on //media.
# TODO(sergeyu): Cleanup host-only deps in client targets.
diff --git a/remoting/codec/DEPS b/remoting/codec/DEPS
index 91cf8b1..19e100787 100644
--- a/remoting/codec/DEPS
+++ b/remoting/codec/DEPS
@@ -5,4 +5,7 @@
"+google/protobuf",
"+third_party/opus",
"+third_party/webrtc",
+ "+gpu/command_buffer/service/gpu_preferences.h",
+ "+media/video",
+ "+media/gpu",
]
diff --git a/remoting/codec/webrtc_video_encoder_gpu.cc b/remoting/codec/webrtc_video_encoder_gpu.cc
new file mode 100644
index 0000000..7580c70
--- /dev/null
+++ b/remoting/codec/webrtc_video_encoder_gpu.cc
@@ -0,0 +1,219 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "remoting/codec/webrtc_video_encoder_gpu.h"
+
+#include <utility>
+#include "base/bind.h"
+#include "base/bind_helpers.h"
+#include "base/logging.h"
+#include "base/memory/ptr_util.h"
+#include "gpu/command_buffer/service/gpu_preferences.h"
+#include "media/gpu/gpu_video_encode_accelerator_factory.h"
+#include "third_party/libyuv/include/libyuv/convert_from_argb.h"
+#include "third_party/webrtc/modules/desktop_capture/desktop_frame.h"
+#include "third_party/webrtc/modules/desktop_capture/desktop_geometry.h"
+
+namespace {
+// Currently, the frame scheduler only encodes a single frame at a time. Thus,
+// there's no reason to have this set to anything greater than one.
+const int kWebrtcVideoEncoderGpuOutputBufferCount = 1;
+
+void ArgbToI420(const webrtc::DesktopFrame& frame,
+ scoped_refptr<media::VideoFrame> video_frame) {
+ const uint8_t* rgb_data = frame.data();
+ const int rgb_stride = frame.stride();
+ const int y_stride = video_frame->stride(0);
+ DCHECK_EQ(video_frame->stride(1), video_frame->stride(2));
+ const int uv_stride = video_frame->stride(1);
+ uint8_t* y_data = video_frame->data(0);
+ uint8_t* u_data = video_frame->data(1);
+ uint8_t* v_data = video_frame->data(2);
+ libyuv::ARGBToI420(rgb_data, rgb_stride, y_data, y_stride, u_data, uv_stride,
+ v_data, uv_stride, video_frame->visible_rect().width(),
+ video_frame->visible_rect().height());
+}
+} // namespace
+
+namespace remoting {
+
+WebrtcVideoEncoderGpu::WebrtcVideoEncoderGpu(
+ media::VideoCodecProfile codec_profile)
+ : state_(UNINITIALIZED),
+ codec_profile_(codec_profile),
+ weak_factory_(this) {}
+
+WebrtcVideoEncoderGpu::~WebrtcVideoEncoderGpu() {}
+
+// TODO(gusss): Implement either a software fallback or some sort of delay if
+// the hardware encoder crashes.
+// Bug: crbug.com/751870
+void WebrtcVideoEncoderGpu::Encode(std::unique_ptr<webrtc::DesktopFrame> frame,
+ const FrameParams& params,
+ WebrtcVideoEncoder::EncodeCallback done) {
+ DCHECK(frame);
+ DCHECK(done);
+ DCHECK_GT(params.duration, base::TimeDelta::FromMilliseconds(0));
+
+ if (state_ == INITIALIZATION_ERROR) {
+ DLOG(ERROR) << "Encoder failed to initialize; dropping encode request";
+ std::move(done).Run(nullptr);
+ return;
+ }
+
+ DVLOG(3) << __func__ << " bitrate = " << params.bitrate_kbps << ", "
+ << "duration = " << params.duration << ", "
+ << "key_frame = " << params.key_frame;
+
+ if (state_ == UNINITIALIZED ||
+ input_visible_size_.width() != frame->size().width() ||
+ input_visible_size_.height() != frame->size().height()) {
+ DVLOG(3) << __func__ << " Currently not initialized for frame size "
+ << frame->size().width() << "x" << frame->size().height()
+ << ". Initializing.";
+ input_visible_size_ =
+ gfx::Size(frame->size().width(), frame->size().height());
+
+ pending_encode_ = base::BindOnce(&WebrtcVideoEncoderGpu::Encode,
+ weak_factory_.GetWeakPtr(),
+ std::move(frame), params, std::move(done));
+
+ BeginInitialization();
+
+ return;
+ }
+
+ // If we get to this point and state_ != INITIALIZED, we may be attempting to
+ // have multiple outstanding encode requests, which is not currently
+ // supported. The current assumption is that the FrameScheduler will wait for
+ // an Encode to finish before attempting another.
+ DCHECK_EQ(state_, INITIALIZED);
+
+ scoped_refptr<media::VideoFrame> video_frame = media::VideoFrame::CreateFrame(
+ media::VideoPixelFormat::PIXEL_FORMAT_I420, input_coded_size_,
+ gfx::Rect(input_visible_size_), input_visible_size_, base::TimeDelta());
+
+ base::TimeDelta new_timestamp = previous_timestamp_ + params.duration;
+ video_frame->set_timestamp(new_timestamp);
+ previous_timestamp_ = new_timestamp;
+
+ ArgbToI420(*frame, video_frame);
+
+ callbacks_[video_frame->timestamp()] = std::move(done);
+
+ video_encode_accelerator_->Encode(video_frame, /*force_keyframe=*/false);
+}
+
+void WebrtcVideoEncoderGpu::RequireBitstreamBuffers(
+ unsigned int input_count,
+ const gfx::Size& input_coded_size,
+ size_t output_buffer_size) {
+ DCHECK(state_ == INITIALIZING);
+
+ DVLOG(3) << __func__ << ", "
+ << "input_count = " << input_count << ", "
+ << "input_coded_size = " << input_coded_size.width() << "x"
+ << input_coded_size.height() << ", "
+ << "output_buffer_size = " << output_buffer_size;
+
+ required_input_frame_count_ = input_count;
+ input_coded_size_ = input_coded_size;
+ output_buffer_size_ = output_buffer_size;
+
+ output_buffers_.clear();
+
+ for (unsigned int i = 0; i < kWebrtcVideoEncoderGpuOutputBufferCount; ++i) {
+ auto shm = base::MakeUnique<base::SharedMemory>();
+ // TODO(gusss): Do we need to handle mapping failure more gracefully?
+ // LOG_ASSERT will simply cause a crash.
+ LOG_ASSERT(shm->CreateAndMapAnonymous(output_buffer_size_));
+ output_buffers_.push_back(std::move(shm));
+ }
+
+ for (size_t i = 0; i < output_buffers_.size(); ++i) {
+ UseOutputBitstreamBufferId(i);
+ }
+
+ state_ = INITIALIZED;
+
+ if (pending_encode_)
+ std::move(pending_encode_).Run();
+}
+
+void WebrtcVideoEncoderGpu::BitstreamBufferReady(int32_t bitstream_buffer_id,
+ size_t payload_size,
+ bool key_frame,
+ base::TimeDelta timestamp) {
+ DVLOG(3) << __func__ << " bitstream_buffer_id = " << bitstream_buffer_id
+ << ", "
+ << "payload_size = " << payload_size << ", "
+ << "key_frame = " << key_frame << ", "
+ << "timestamp ms = " << timestamp.InMilliseconds();
+
+ std::unique_ptr<EncodedFrame> encoded_frame =
+ base::MakeUnique<EncodedFrame>();
+ base::SharedMemory* output_buffer =
+ output_buffers_[bitstream_buffer_id].get();
+ encoded_frame->data.assign(reinterpret_cast<char*>(output_buffer->memory()),
+ payload_size);
+ encoded_frame->key_frame = key_frame;
+ encoded_frame->size = webrtc::DesktopSize(input_coded_size_.width(),
+ input_coded_size_.height());
+
+ UseOutputBitstreamBufferId(bitstream_buffer_id);
+
+ auto callback_it = callbacks_.find(timestamp);
+ DCHECK(callback_it != callbacks_.end())
+ << "Callback not found for timestamp " << timestamp;
+ std::move(std::get<1>(*callback_it)).Run(std::move(encoded_frame));
+ callbacks_.erase(timestamp);
+}
+
+void WebrtcVideoEncoderGpu::NotifyError(
+ media::VideoEncodeAccelerator::Error error) {
+ LOG(ERROR) << __func__ << " error: " << error;
+}
+
+void WebrtcVideoEncoderGpu::BeginInitialization() {
+ DVLOG(3) << __func__;
+
+ media::VideoPixelFormat input_format =
+ media::VideoPixelFormat::PIXEL_FORMAT_I420;
+ // TODO(gusss): implement some logical way to set an initial bitrate.
+ uint32_t initial_bitrate = 8 * 1024 * 8;
+ gpu::GpuPreferences gpu_preferences;
+
+ video_encode_accelerator_ =
+ media::GpuVideoEncodeAcceleratorFactory::CreateVEA(
+ input_format, input_visible_size_, codec_profile_, initial_bitrate,
+ this, gpu_preferences);
+
+ if (!video_encode_accelerator_) {
+ LOG(ERROR) << "Could not create VideoEncodeAccelerator";
+ state_ = INITIALIZATION_ERROR;
+ return;
+ }
+
+ state_ = INITIALIZING;
+}
+
+void WebrtcVideoEncoderGpu::UseOutputBitstreamBufferId(
+ int32_t bitstream_buffer_id) {
+ DVLOG(3) << __func__ << " id=" << bitstream_buffer_id;
+ video_encode_accelerator_->UseOutputBitstreamBuffer(media::BitstreamBuffer(
+ bitstream_buffer_id, output_buffers_[bitstream_buffer_id]->handle(),
+ output_buffer_size_));
+}
+
+// static
+std::unique_ptr<WebrtcVideoEncoderGpu> WebrtcVideoEncoderGpu::CreateForH264() {
+ DVLOG(3) << __func__;
+
+ // TODO(gusss): what profile should be picked here? Currently, baseline was
+ // chosen arbitrarily.
+ return base::WrapUnique(new WebrtcVideoEncoderGpu(
+ media::VideoCodecProfile::H264PROFILE_BASELINE));
+}
+
+} // namespace remoting
diff --git a/remoting/codec/webrtc_video_encoder_gpu.h b/remoting/codec/webrtc_video_encoder_gpu.h
new file mode 100644
index 0000000..82a7f19
--- /dev/null
+++ b/remoting/codec/webrtc_video_encoder_gpu.h
@@ -0,0 +1,109 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_
+#define REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_
+
+#include "media/video/video_encode_accelerator.h"
+#include "remoting/codec/webrtc_video_encoder.h"
+
+namespace base {
+class SharedMemory;
+}
+
+namespace remoting {
+
+// A WebrtcVideoEncoder implementation utilizing the VideoEncodeAccelerator
+// framework to do hardware-accelerated encoding.
+// A brief explanation of how this class is initialized:
+// 1. An instance of WebrtcVideoEncoderGpu is created using a static method, for
+// example CreateForH264(). The state at this point is UNINITIALIZED.
+// 2. On the first encode call, the WebrtcVideoEncoder saves the incoming
+// DesktopFrame's dimensions and thunks the encode. Before returning, it
+// calls BeginInitialization().
+// 3. In BeginInitialization(), the WebrtcVideoEncoderGpu constructs the
+// VideoEncodeAccelerator using the saved dimensions from the DesktopFrame.
+// If the VideoEncodeAccelerator is constructed
+// successfully, the state is then INITIALIZING. If not, the state is
+// INIITALIZATION_ERROR.
+// 4. Some time later, the VideoEncodeAccelerator sets itself up and is ready
+// to encode. At this point, it calls our WebrtcVideoEncoderGpu's
+// RequireBitstreamBuffers() method. Once bitstream buffers are allocated,
+// the state is INITIALIZED.
+class WebrtcVideoEncoderGpu : public WebrtcVideoEncoder,
+ public media::VideoEncodeAccelerator::Client {
+ public:
+ static std::unique_ptr<WebrtcVideoEncoderGpu> CreateForH264();
+
+ ~WebrtcVideoEncoderGpu() override;
+
+ // WebrtcVideoEncoder interface.
+ void Encode(std::unique_ptr<webrtc::DesktopFrame> frame,
+ const FrameParams& params,
+ WebrtcVideoEncoder::EncodeCallback done) override;
+
+ // VideoEncodeAccelerator::Client interface.
+ void RequireBitstreamBuffers(unsigned int input_count,
+ const gfx::Size& input_coded_size,
+ size_t output_buffer_size) override;
+ void BitstreamBufferReady(int32_t bitstream_buffer_id,
+ size_t payload_size,
+ bool key_frame,
+ base::TimeDelta timestamp) override;
+ void NotifyError(media::VideoEncodeAccelerator::Error error) override;
+
+ private:
+ enum State { UNINITIALIZED, INITIALIZING, INITIALIZED, INITIALIZATION_ERROR };
+
+ explicit WebrtcVideoEncoderGpu(media::VideoCodecProfile codec_profile);
+
+ void BeginInitialization();
+
+ void UseOutputBitstreamBufferId(int32_t bitstream_buffer_id);
+
+ State state_;
+
+ // Only after the first encode request do we know how large the incoming
+ // frames will be. Thus, we initialize after the first encode request,
+ // postponing the encode until the encoder has been initialized.
+ base::OnceClosure pending_encode_;
+
+ std::unique_ptr<media::VideoEncodeAccelerator> video_encode_accelerator_;
+
+ base::TimeDelta previous_timestamp_;
+
+ media::VideoCodecProfile codec_profile_;
+
+ // Shared memory with which the VEA transfers output to WebrtcVideoEncoderGpu
+ std::vector<std::unique_ptr<base::SharedMemory>> output_buffers_;
+
+ // TODO(gusss): required_input_frame_count_ is currently unused; evaluate
+ // whether or not it's actually needed. This variable represents the number of
+ // frames needed by the VEA before it can start producing output.
+ // This may be important in the future, as the current frame scheduler for CRD
+ // encodes one frame at a time - it will not send the next frame in until the
+ // previous frame has been returned. It may need to be "tricked" into sending
+ // in a number of start frames.
+ // However, initial tests showed that, even if the VEA requested a number of
+ // initial frames, it still encoded and returned the first frame before
+ // getting the second frame. This may be platform-dependent - these tests were
+ // done with the MediaFoundationVideoEncodeAccelerator for Windows.
+ unsigned int required_input_frame_count_;
+
+ gfx::Size input_coded_size_;
+ gfx::Size input_visible_size_;
+
+ size_t output_buffer_size_;
+
+ base::flat_map<base::TimeDelta, WebrtcVideoEncoder::EncodeCallback>
+ callbacks_;
+
+ base::WeakPtrFactory<WebrtcVideoEncoderGpu> weak_factory_;
+
+ DISALLOW_COPY_AND_ASSIGN(WebrtcVideoEncoderGpu);
+};
+
+} // namespace remoting
+
+#endif // REMOTING_CODEC_WEBRTC_VIDEO_ENCODER_GPU_H_