blob: 16766ba2c01a7aaa5c0e89cab38dc40c1b621f61 [file] [log] [blame]
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/gpu/vaapi/vaapi_mjpeg_decode_accelerator.h"
#include <stddef.h>
#include <va/va.h>
#include <array>
#include <utility>
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/callback_helpers.h"
#include "base/containers/span.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/numerics/checked_math.h"
#include "base/numerics/safe_conversions.h"
#include "base/optional.h"
#include "base/single_thread_task_runner.h"
#include "base/threading/thread_task_runner_handle.h"
#include "base/trace_event/trace_event.h"
#include "gpu/ipc/common/gpu_memory_buffer_impl.h"
#include "gpu/ipc/common/gpu_memory_buffer_support.h"
#include "media/base/bitstream_buffer.h"
#include "media/base/unaligned_shared_memory.h"
#include "media/base/video_frame.h"
#include "media/base/video_frame_layout.h"
#include "media/base/video_types.h"
#include "media/gpu/format_utils.h"
#include "media/gpu/linux/platform_video_frame_utils.h"
#include "media/gpu/macros.h"
#include "media/gpu/vaapi/va_surface.h"
#include "media/gpu/vaapi/vaapi_image_decoder.h"
#include "media/gpu/vaapi/vaapi_utils.h"
#include "media/gpu/vaapi/vaapi_wrapper.h"
#include "third_party/libyuv/include/libyuv.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/gpu_memory_buffer.h"
#include "ui/gfx/linux/native_pixmap_dmabuf.h"
#include "ui/gfx/native_pixmap.h"
namespace media {
namespace {
constexpr uint32_t kInvalidVaFourcc = 0u;
// UMA errors that the VaapiMjpegDecodeAccelerator class reports.
enum VAJDAFailure {
VAAPI_ERROR = 0,
VAJDA_FAILURES_MAX,
};
static void ReportToVAJDADecoderFailureUMA(VAJDAFailure failure) {
UMA_HISTOGRAM_ENUMERATION("Media.VAJDA.DecoderFailure", failure,
VAJDA_FAILURES_MAX + 1);
}
static void ReportToVAJDAVppFailureUMA(VAJDAFailure failure) {
UMA_HISTOGRAM_ENUMERATION("Media.VAJDA.VppFailure", failure,
VAJDA_FAILURES_MAX + 1);
}
static void ReportToVAJDAResponseToClientUMA(
chromeos_camera::MjpegDecodeAccelerator::Error response) {
UMA_HISTOGRAM_ENUMERATION(
"Media.VAJDA.ResponseToClient", response,
chromeos_camera::MjpegDecodeAccelerator::Error::MJDA_ERROR_CODE_MAX + 1);
}
static chromeos_camera::MjpegDecodeAccelerator::Error
VaapiJpegDecodeStatusToError(VaapiImageDecodeStatus status) {
switch (status) {
case VaapiImageDecodeStatus::kSuccess:
return chromeos_camera::MjpegDecodeAccelerator::Error::NO_ERRORS;
case VaapiImageDecodeStatus::kParseFailed:
return chromeos_camera::MjpegDecodeAccelerator::Error::PARSE_JPEG_FAILED;
case VaapiImageDecodeStatus::kUnsupportedSubsampling:
return chromeos_camera::MjpegDecodeAccelerator::Error::UNSUPPORTED_JPEG;
default:
return chromeos_camera::MjpegDecodeAccelerator::Error::PLATFORM_FAILURE;
}
}
static bool VerifyDataSize(const VAImage* image) {
const gfx::Size dimensions(base::strict_cast<int>(image->width),
base::strict_cast<int>(image->height));
size_t min_size = 0;
if (image->format.fourcc == VA_FOURCC_I420) {
min_size = VideoFrame::AllocationSize(PIXEL_FORMAT_I420, dimensions);
} else if (image->format.fourcc == VA_FOURCC_NV12) {
min_size = VideoFrame::AllocationSize(PIXEL_FORMAT_NV12, dimensions);
} else if (image->format.fourcc == VA_FOURCC_YUY2 ||
image->format.fourcc == VA_FOURCC('Y', 'U', 'Y', 'V')) {
min_size = VideoFrame::AllocationSize(PIXEL_FORMAT_YUY2, dimensions);
} else {
return false;
}
return base::strict_cast<size_t>(image->data_size) >= min_size;
}
static uint32_t VideoPixelFormatToVAFourCC(VideoPixelFormat format) {
switch (format) {
case PIXEL_FORMAT_I420:
return VA_FOURCC_I420;
case PIXEL_FORMAT_YV12:
return VA_FOURCC_YV12;
case PIXEL_FORMAT_NV12:
return VA_FOURCC_NV12;
case PIXEL_FORMAT_NV21:
return VA_FOURCC_NV21;
case PIXEL_FORMAT_UYVY:
return VA_FOURCC_UYVY;
case PIXEL_FORMAT_YUY2:
return VA_FOURCC_YUY2;
case PIXEL_FORMAT_ARGB:
return VA_FOURCC_ARGB;
case PIXEL_FORMAT_XRGB:
return VA_FOURCC_XRGB;
case PIXEL_FORMAT_ABGR:
return VA_FOURCC_ABGR;
case PIXEL_FORMAT_XBGR:
return VA_FOURCC_XBGR;
default:
return kInvalidVaFourcc;
}
}
} // namespace
void VaapiMjpegDecodeAccelerator::NotifyError(int32_t bitstream_buffer_id,
Error error) {
if (!task_runner_->BelongsToCurrentThread()) {
task_runner_->PostTask(
FROM_HERE, base::BindOnce(&VaapiMjpegDecodeAccelerator::NotifyError,
weak_this_factory_.GetWeakPtr(),
bitstream_buffer_id, error));
return;
}
VLOGF(1) << "Notifying of error " << error;
// |error| shouldn't be NO_ERRORS because successful decodes should be handled
// by VideoFrameReady().
DCHECK_NE(chromeos_camera::MjpegDecodeAccelerator::Error::NO_ERRORS, error);
ReportToVAJDAResponseToClientUMA(error);
DCHECK(client_);
client_->NotifyError(bitstream_buffer_id, error);
}
void VaapiMjpegDecodeAccelerator::VideoFrameReady(int32_t bitstream_buffer_id) {
DCHECK(task_runner_->BelongsToCurrentThread());
ReportToVAJDAResponseToClientUMA(
chromeos_camera::MjpegDecodeAccelerator::Error::NO_ERRORS);
client_->VideoFrameReady(bitstream_buffer_id);
}
VaapiMjpegDecodeAccelerator::VaapiMjpegDecodeAccelerator(
const scoped_refptr<base::SingleThreadTaskRunner>& io_task_runner)
: task_runner_(base::ThreadTaskRunnerHandle::Get()),
io_task_runner_(io_task_runner),
client_(nullptr),
decoder_thread_("VaapiMjpegDecoderThread"),
weak_this_factory_(this) {}
VaapiMjpegDecodeAccelerator::~VaapiMjpegDecodeAccelerator() {
DCHECK(task_runner_->BelongsToCurrentThread());
VLOGF(2) << "Destroying VaapiMjpegDecodeAccelerator";
weak_this_factory_.InvalidateWeakPtrs();
decoder_thread_.Stop();
}
bool VaapiMjpegDecodeAccelerator::Initialize(
chromeos_camera::MjpegDecodeAccelerator::Client* client) {
VLOGF(2);
DCHECK(task_runner_->BelongsToCurrentThread());
client_ = client;
if (!decoder_.Initialize(
base::BindRepeating(&ReportToVAJDADecoderFailureUMA, VAAPI_ERROR))) {
return false;
}
vpp_vaapi_wrapper_ = VaapiWrapper::Create(
VaapiWrapper::kVideoProcess, VAProfileNone,
base::BindRepeating(&ReportToVAJDAVppFailureUMA, VAAPI_ERROR));
if (!vpp_vaapi_wrapper_) {
VLOGF(1) << "Failed initializing VAAPI for VPP";
return false;
}
gpu_memory_buffer_support_ = std::make_unique<gpu::GpuMemoryBufferSupport>();
if (!decoder_thread_.Start()) {
VLOGF(1) << "Failed to start decoding thread.";
return false;
}
decoder_task_runner_ = decoder_thread_.task_runner();
return true;
}
bool VaapiMjpegDecodeAccelerator::OutputPictureLibYuvOnTaskRunner(
std::unique_ptr<ScopedVAImage> scoped_image,
int32_t input_buffer_id,
scoped_refptr<VideoFrame> video_frame) {
DCHECK(decoder_task_runner_->BelongsToCurrentThread());
TRACE_EVENT1("jpeg", __func__, "input_buffer_id", input_buffer_id);
DCHECK(scoped_image);
const VAImage* image = scoped_image->image();
// For camera captures, we assume that the visible size is the same as the
// coded size.
DCHECK_EQ(video_frame->visible_rect().size(), video_frame->coded_size());
DCHECK_EQ(0, video_frame->visible_rect().x());
DCHECK_EQ(0, video_frame->visible_rect().y());
DCHECK(decoder_.GetScopedVASurface());
const gfx::Size visible_size(base::strict_cast<int>(image->width),
base::strict_cast<int>(image->height));
if (visible_size != video_frame->visible_rect().size()) {
VLOGF(1) << "The decoded visible size is not the same as the video frame's";
return false;
}
// The decoded image size is aligned up to JPEG MCU size, so it may be larger
// than |video_frame|'s visible size.
if (base::strict_cast<int>(image->width) < visible_size.width() ||
base::strict_cast<int>(image->height) < visible_size.height()) {
VLOGF(1) << "Decoded image size is smaller than output frame size";
return false;
}
DCHECK(VerifyDataSize(image));
// Extract source pointers and strides.
auto* const mem =
static_cast<const uint8_t*>(scoped_image->va_buffer()->data());
std::array<const uint8_t*, VideoFrame::kMaxPlanes> src_ptrs{};
std::array<int, VideoFrame::kMaxPlanes> src_strides{};
for (uint32_t i = 0; i < image->num_planes; i++) {
src_ptrs[i] = mem + image->offsets[i];
if (!base::CheckedNumeric<uint32_t>(image->pitches[i])
.AssignIfValid(&src_strides[i])) {
VLOGF(1) << "Can't extract the strides";
return false;
}
}
// Extract destination pointers and strides.
std::array<uint8_t*, VideoFrame::kMaxPlanes> dst_ptrs{};
std::array<int, VideoFrame::kMaxPlanes> dst_strides{};
base::ScopedClosureRunner buffer_unmapper;
if (video_frame->HasDmaBufs()) {
// Dmabuf-backed frame needs to be mapped for SW access.
DCHECK(gpu_memory_buffer_support_);
base::Optional<gfx::BufferFormat> gfx_format =
VideoPixelFormatToGfxBufferFormat(video_frame->format());
if (!gfx_format) {
VLOGF(1) << "Unsupported format: " << video_frame->format();
return false;
}
std::unique_ptr<gpu::GpuMemoryBufferImpl> gmb =
gpu_memory_buffer_support_->CreateGpuMemoryBufferImplFromHandle(
CreateGpuMemoryBufferHandle(video_frame.get()),
video_frame->coded_size(), *gfx_format,
gfx::BufferUsage::SCANOUT_CPU_READ_WRITE, base::DoNothing());
if (!gmb) {
VLOGF(1) << "Failed to create GPU memory buffer";
return false;
}
if (!gmb->Map()) {
VLOGF(1) << "Failed to map GPU memory buffer";
return false;
}
for (size_t i = 0; i < video_frame->layout().num_planes(); i++) {
dst_ptrs[i] = static_cast<uint8_t*>(gmb->memory(i));
dst_strides[i] = gmb->stride(i);
}
buffer_unmapper.ReplaceClosure(
base::BindOnce(&gpu::GpuMemoryBufferImpl::Unmap, std::move(gmb)));
} else {
for (size_t i = 0; i < video_frame->layout().num_planes(); i++) {
dst_ptrs[i] = video_frame->visible_data(i);
dst_strides[i] = video_frame->stride(i);
}
}
switch (image->format.fourcc) {
case VA_FOURCC_I420:
DCHECK_EQ(image->num_planes, 3u);
switch (video_frame->format()) {
case PIXEL_FORMAT_I420:
DCHECK_EQ(video_frame->layout().num_planes(), 3u);
if (libyuv::I420Copy(src_ptrs[0], src_strides[0], src_ptrs[1],
src_strides[1], src_ptrs[2], src_strides[2],
dst_ptrs[0], dst_strides[0], dst_ptrs[1],
dst_strides[1], dst_ptrs[2], dst_strides[2],
visible_size.width(), visible_size.height())) {
VLOGF(1) << "I420Copy failed";
return false;
}
break;
case PIXEL_FORMAT_NV12:
DCHECK_EQ(video_frame->layout().num_planes(), 2u);
if (libyuv::I420ToNV12(src_ptrs[0], src_strides[0], src_ptrs[1],
src_strides[1], src_ptrs[2], src_strides[2],
dst_ptrs[0], dst_strides[0], dst_ptrs[1],
dst_strides[1], visible_size.width(),
visible_size.height())) {
VLOGF(1) << "I420ToNV12 failed";
return false;
}
break;
default:
VLOGF(1) << "Can't convert image from I420 to "
<< video_frame->format();
return false;
}
break;
case VA_FOURCC_YUY2:
case VA_FOURCC('Y', 'U', 'Y', 'V'):
DCHECK_EQ(image->num_planes, 1u);
switch (video_frame->format()) {
case PIXEL_FORMAT_I420:
DCHECK_EQ(video_frame->layout().num_planes(), 3u);
if (libyuv::YUY2ToI420(src_ptrs[0], src_strides[0], dst_ptrs[0],
dst_strides[0], dst_ptrs[1], dst_strides[1],
dst_ptrs[2], dst_strides[2],
visible_size.width(), visible_size.height())) {
VLOGF(1) << "YUY2ToI420 failed";
return false;
}
break;
case PIXEL_FORMAT_NV12:
DCHECK_EQ(video_frame->layout().num_planes(), 2u);
if (libyuv::YUY2ToNV12(src_ptrs[0], src_strides[0], dst_ptrs[0],
dst_strides[0], dst_ptrs[1], dst_strides[1],
visible_size.width(), visible_size.height())) {
VLOGF(1) << "YUY2ToNV12 failed";
return false;
}
break;
default:
VLOGF(1) << "Can't convert image from YUYV to "
<< video_frame->format();
return false;
}
break;
default:
VLOGF(1) << "Can't convert image from "
<< FourccToString(image->format.fourcc) << " to "
<< video_frame->format();
return false;
}
task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&VaapiMjpegDecodeAccelerator::VideoFrameReady,
weak_this_factory_.GetWeakPtr(), input_buffer_id));
return true;
}
bool VaapiMjpegDecodeAccelerator::OutputPictureVppOnTaskRunner(
const ScopedVASurface* surface,
int32_t input_buffer_id,
scoped_refptr<VideoFrame> video_frame) {
DCHECK(decoder_task_runner_->BelongsToCurrentThread());
DCHECK(surface);
TRACE_EVENT1("jpeg", __func__, "input_buffer_id", input_buffer_id);
// Bind a VA surface to |video_frame|.
scoped_refptr<gfx::NativePixmap> pixmap =
CreateNativePixmapDmaBuf(video_frame.get());
if (!pixmap) {
VLOGF(1) << "Cannot create native pixmap for output buffer";
return false;
}
scoped_refptr<VASurface> output_surface =
vpp_vaapi_wrapper_->CreateVASurfaceForPixmap(pixmap);
if (!output_surface) {
VLOGF(1) << "Cannot create VA surface for output buffer";
return false;
}
// Use VPP to blit the visible size region within |surface| into
// |output_surface|. BlitSurface() does scaling not cropping when source and
// destination sizes don't match, so we manipulate the sizes of surfaces to
// effectively do the cropping.
const gfx::Size& blit_size = video_frame->visible_rect().size();
if (surface->size().width() < blit_size.width() ||
surface->size().height() < blit_size.height()) {
VLOGF(1) << "Decoded surface size is smaller than target size";
return false;
}
scoped_refptr<VASurface> src_surface = base::MakeRefCounted<VASurface>(
surface->id(), blit_size, surface->format(),
base::DoNothing() /* release_cb */);
scoped_refptr<VASurface> dst_surface = base::MakeRefCounted<VASurface>(
output_surface->id(), blit_size, output_surface->format(),
base::DoNothing() /* release_cb */);
// We should call vaSyncSurface() when passing surface between contexts. See:
// https://lists.01.org/pipermail/intel-vaapi-media/2019-June/000131.html
if (!vpp_vaapi_wrapper_->SyncSurface(src_surface->id())) {
VLOGF(1) << "Cannot sync VPP input surface";
return false;
}
if (!vpp_vaapi_wrapper_->BlitSurface(src_surface, dst_surface)) {
VLOGF(1) << "Cannot convert decoded image into output buffer";
return false;
}
// Sync target surface since the buffer is returning to client.
if (!vpp_vaapi_wrapper_->SyncSurface(dst_surface->id())) {
VLOGF(1) << "Cannot sync VPP output surface";
return false;
}
task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&VaapiMjpegDecodeAccelerator::VideoFrameReady,
weak_this_factory_.GetWeakPtr(), input_buffer_id));
return true;
}
void VaapiMjpegDecodeAccelerator::DecodeTask(
int32_t bitstream_buffer_id,
std::unique_ptr<UnalignedSharedMemory> shm,
scoped_refptr<VideoFrame> video_frame) {
DVLOGF(4);
DCHECK(decoder_task_runner_->BelongsToCurrentThread());
TRACE_EVENT0("jpeg", "DecodeTask");
// TODO(andrescj): validate that the video frame's visible size is the same as
// the parsed JPEG's visible size when it is returned from Decode(), and
// remove the size checks in OutputPicture*().
VaapiImageDecodeStatus status = decoder_.Decode(
base::make_span(static_cast<const uint8_t*>(shm->memory()), shm->size()));
if (status != VaapiImageDecodeStatus::kSuccess) {
NotifyError(bitstream_buffer_id, VaapiJpegDecodeStatusToError(status));
return;
}
const ScopedVASurface* surface = decoder_.GetScopedVASurface();
DCHECK(surface);
DCHECK(surface->IsValid());
// For DMA-buf backed |video_frame|, we will import it as a VA surface and use
// VPP to convert the decoded |surface| into it, if the formats and sizes are
// supported.
const uint32_t video_frame_va_fourcc =
VideoPixelFormatToVAFourCC(video_frame->format());
if (video_frame_va_fourcc == kInvalidVaFourcc) {
VLOGF(1) << "Unsupported video frame format: " << video_frame->format();
NotifyError(bitstream_buffer_id, PLATFORM_FAILURE);
return;
}
// TODO(kamesan): move HasDmaBufs() to DCHECK when we deprecate
// shared-memory-backed video frame.
if (video_frame->HasDmaBufs() &&
VaapiWrapper::IsVppResolutionAllowed(surface->size()) &&
VaapiWrapper::IsVppSupportedForJpegDecodedSurfaceToFourCC(
surface->format(), video_frame_va_fourcc)) {
if (!OutputPictureVppOnTaskRunner(surface, bitstream_buffer_id,
std::move(video_frame))) {
VLOGF(1) << "Output picture using VPP failed";
NotifyError(bitstream_buffer_id, PLATFORM_FAILURE);
}
return;
}
// Fallback to do conversion by libyuv. This happens when:
// 1. |video_frame| is backed by shared memory.
// 2. VPP doesn't support the format conversion. This is intended for AMD
// VAAPI driver whose VPP only supports converting decoded 4:2:0 JPEGs.
std::unique_ptr<ScopedVAImage> image =
decoder_.GetImage(video_frame_va_fourcc, &status);
if (status != VaapiImageDecodeStatus::kSuccess) {
NotifyError(bitstream_buffer_id, VaapiJpegDecodeStatusToError(status));
return;
}
if (!OutputPictureLibYuvOnTaskRunner(std::move(image), bitstream_buffer_id,
std::move(video_frame))) {
VLOGF(1) << "Output picture using libyuv failed";
NotifyError(bitstream_buffer_id, PLATFORM_FAILURE);
}
}
void VaapiMjpegDecodeAccelerator::Decode(
BitstreamBuffer bitstream_buffer,
scoped_refptr<VideoFrame> video_frame) {
DCHECK(io_task_runner_->BelongsToCurrentThread());
TRACE_EVENT1("jpeg", "Decode", "input_id", bitstream_buffer.id());
DVLOGF(4) << "Mapping new input buffer id: " << bitstream_buffer.id()
<< " size: " << bitstream_buffer.size();
if (bitstream_buffer.id() < 0) {
VLOGF(1) << "Invalid bitstream_buffer, id: " << bitstream_buffer.id();
NotifyError(bitstream_buffer.id(), INVALID_ARGUMENT);
return;
}
if ((video_frame->visible_rect().width() & 1) ||
(video_frame->visible_rect().height() & 1)) {
VLOGF(1) << "Video frame visible size has odd dimension";
NotifyError(bitstream_buffer.id(), PLATFORM_FAILURE);
return;
}
// UnalignedSharedMemory will take over the |bitstream_buffer.handle()|.
auto shm = std::make_unique<UnalignedSharedMemory>(
bitstream_buffer.TakeRegion(), bitstream_buffer.size(),
false /* read_only */);
if (!shm->MapAt(bitstream_buffer.offset(), bitstream_buffer.size())) {
VLOGF(1) << "Failed to map input buffer";
NotifyError(bitstream_buffer.id(), UNREADABLE_INPUT);
return;
}
// It's safe to use base::Unretained(this) because |decoder_task_runner_| runs
// tasks on |decoder_thread_| which is stopped in the destructor of |this|.
decoder_task_runner_->PostTask(
FROM_HERE, base::BindOnce(&VaapiMjpegDecodeAccelerator::DecodeTask,
base::Unretained(this), bitstream_buffer.id(),
std::move(shm), std::move(video_frame)));
}
bool VaapiMjpegDecodeAccelerator::IsSupported() {
return VaapiWrapper::IsDecodeSupported(VAProfileJPEGBaseline);
}
} // namespace media