blob: 64d6c83fa866536d15e8806fa13720df3ae6faca [file] [log] [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "gpu/ipc/service/image_decode_accelerator_stub.h"
#include <stddef.h>
#include <algorithm>
#include <new>
#include <optional>
#include <utility>
#include <vector>
#include "base/containers/span.h"
#include "base/feature_list.h"
#include "base/functional/bind.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/memory/raw_ptr.h"
#include "base/notimplemented.h"
#include "base/numerics/checked_math.h"
#include "base/numerics/safe_conversions.h"
#include "base/task/single_thread_task_runner.h"
#include "build/build_config.h"
#include "components/viz/common/resources/shared_image_format_utils.h"
#include "gpu/command_buffer/common/constants.h"
#include "gpu/command_buffer/common/context_result.h"
#include "gpu/command_buffer/common/discardable_handle.h"
#include "gpu/command_buffer/common/scheduling_priority.h"
#include "gpu/command_buffer/common/shared_image_usage.h"
#include "gpu/command_buffer/common/sync_token.h"
#include "gpu/command_buffer/service/context_group.h"
#include "gpu/command_buffer/service/decoder_context.h"
#include "gpu/command_buffer/service/gr_shader_cache.h"
#include "gpu/command_buffer/service/scheduler.h"
#include "gpu/command_buffer/service/service_transfer_cache.h"
#include "gpu/command_buffer/service/shared_context_state.h"
#include "gpu/command_buffer/service/shared_image/shared_image_factory.h"
#include "gpu/command_buffer/service/shared_image/shared_image_representation.h"
#include "gpu/command_buffer/service/task_graph.h"
#include "gpu/config/gpu_finch_features.h"
#include "gpu/ipc/common/surface_handle.h"
#include "gpu/ipc/service/command_buffer_stub.h"
#include "gpu/ipc/service/gpu_channel.h"
#include "gpu/ipc/service/gpu_channel_manager.h"
#include "gpu/ipc/service/shared_image_stub.h"
#include "third_party/abseil-cpp/absl/cleanup/cleanup.h"
#include "third_party/skia/include/core/SkColorSpace.h"
#include "third_party/skia/include/core/SkImage.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkRefCnt.h"
#include "third_party/skia/include/gpu/ganesh/GrBackendSemaphore.h"
#include "third_party/skia/include/gpu/ganesh/GrBackendSurface.h"
#include "third_party/skia/include/gpu/ganesh/GrTypes.h"
#include "ui/gfx/color_space.h"
#include "ui/gfx/gpu_memory_buffer_handle.h"
#if BUILDFLAG(IS_CHROMEOS)
#include "ui/gfx/linux/native_pixmap_dmabuf.h"
#endif
namespace gpu {
class Buffer;
#if BUILDFLAG(IS_CHROMEOS)
namespace {
struct CleanUpContext {
scoped_refptr<base::SingleThreadTaskRunner> main_task_runner_;
raw_ptr<SharedContextState> shared_context_state_ = nullptr;
std::unique_ptr<SkiaImageRepresentation> skia_representation_;
std::unique_ptr<SkiaImageRepresentation::ScopedReadAccess>
skia_scoped_access_;
size_t num_callbacks_pending_;
CleanUpContext(scoped_refptr<base::SingleThreadTaskRunner> main_task_runner,
raw_ptr<SharedContextState> shared_context_state,
std::unique_ptr<SkiaImageRepresentation> skia_representation,
std::unique_ptr<SkiaImageRepresentation::ScopedReadAccess>
skia_scoped_access)
: main_task_runner_(main_task_runner),
shared_context_state_(shared_context_state),
skia_representation_(std::move(skia_representation)),
skia_scoped_access_(std::move(skia_scoped_access)),
num_callbacks_pending_(skia_representation_->NumPlanesExpected()) {}
};
void CleanUpResource(SkImages::ReleaseContext context) {
auto* clean_up_context = static_cast<CleanUpContext*>(context);
DCHECK(clean_up_context->main_task_runner_->BelongsToCurrentThread());
// The context should be current as we set it to be current earlier, and this
// call is coming from Skia itself.
DCHECK(
clean_up_context->shared_context_state_->IsCurrent(/*surface=*/nullptr));
clean_up_context->skia_scoped_access_->ApplyBackendSurfaceEndState();
CHECK_GT(clean_up_context->num_callbacks_pending_, 0u);
clean_up_context->num_callbacks_pending_--;
if (clean_up_context->num_callbacks_pending_ == 0u) {
delete clean_up_context;
}
}
} // namespace
#endif
// NOTE: `worker_`, `scheduler_`, and `channel_` must not be dereferenced
// within the constructor as doing so requires that `lock_` be held, which it's
// not here.
ImageDecodeAcceleratorStub::ImageDecodeAcceleratorStub(
ImageDecodeAcceleratorWorker* worker,
GpuChannel* channel,
int32_t route_id)
: worker_(worker),
scheduler_(channel->scheduler()),
command_buffer_id_(
CommandBufferIdFromChannelAndRoute(channel->client_id(), route_id)),
sequence_(
channel->scheduler()->CreateSequence(SchedulingPriority::kLow,
channel->task_runner(),
CommandBufferNamespace::GPU_IO,
command_buffer_id_)),
channel_(channel),
main_task_runner_(channel->task_runner()),
io_task_runner_(channel->io_task_runner()) {
// We need the sequence to be initially disabled so that when we schedule a
// task to release the decode sync token, it doesn't run immediately (we want
// it to run when the decode is done).
channel->scheduler()->DisableSequence(sequence_);
}
void ImageDecodeAcceleratorStub::Shutdown() {
DCHECK(main_task_runner_->BelongsToCurrentThread());
base::AutoLock lock(lock_);
scheduler_->DestroySequence(sequence_);
// Clear out raw_ptr references to objects that may be destroyed on the main
// thread before this object is destroyed on the IO thread.
channel_ = nullptr;
worker_ = nullptr;
scheduler_ = nullptr;
}
ImageDecodeAcceleratorStub::~ImageDecodeAcceleratorStub() {
DCHECK(!channel_);
}
void ImageDecodeAcceleratorStub::ScheduleImageDecode(
mojom::ScheduleImageDecodeParamsPtr params,
uint64_t release_count) {
DCHECK(io_task_runner_->BelongsToCurrentThread());
const SyncToken decode_sync_token(CommandBufferNamespace::GPU_IO,
command_buffer_id_, release_count);
base::AutoLock lock(lock_);
if (!base::FeatureList::IsEnabled(
features::kVaapiJpegImageDecodeAcceleration) &&
!base::FeatureList::IsEnabled(
features::kVaapiWebPImageDecodeAcceleration)) {
ScheduleSyncTokenRelease(decode_sync_token);
return;
}
if (!channel_) {
// The channel is no longer available, so don't do any decoding.
ScheduleSyncTokenRelease(decode_sync_token);
return;
}
mojom::ScheduleImageDecodeParams& decode_params = *params;
// Start the actual decode.
worker_->Decode(
std::move(decode_params.encoded_data), decode_params.output_size,
base::BindOnce(&ImageDecodeAcceleratorStub::OnDecodeCompleted,
base::WrapRefCounted(this), decode_params.output_size));
// Schedule a task to eventually release the decode sync token. Note that this
// task won't run until the sequence is re-enabled when a decode completes.
const SyncToken discardable_handle_sync_token(
CommandBufferNamespace::GPU_IO,
CommandBufferIdFromChannelAndRoute(channel_->client_id(),
decode_params.raster_decoder_route_id),
decode_params.discardable_handle_release_count);
scheduler_->ScheduleTask(Scheduler::Task(
sequence_,
base::BindOnce(&ImageDecodeAcceleratorStub::ProcessCompletedDecode,
base::WrapRefCounted(this), std::move(params)),
/*sync_token_fences=*/{discardable_handle_sync_token},
decode_sync_token));
}
void ImageDecodeAcceleratorStub::ProcessCompletedDecode(
mojom::ScheduleImageDecodeParamsPtr params_ptr) {
DCHECK(main_task_runner_->BelongsToCurrentThread());
base::AutoLock lock(lock_);
if (!channel_) {
// The channel is no longer available, so don't do anything.
return;
}
mojom::ScheduleImageDecodeParams& params = *params_ptr;
DCHECK(!pending_completed_decodes_.empty());
std::unique_ptr<ImageDecodeAcceleratorWorker::DecodeResult> completed_decode =
std::move(pending_completed_decodes_.front());
pending_completed_decodes_.pop();
// Regardless of what happens next, make sure the sequence gets disabled if
// there are no more completed decodes after this. base::Unretained(this) is
// safe because *this outlives the ScopedClosureRunner.
// The decode sync token gets released automatically by the scheduler on task
// completion.
absl::Cleanup finalizer = [this] {
lock_.AssertAcquired();
FinishCompletedDecode();
};
if (!completed_decode) {
DLOG(ERROR) << "The image could not be decoded";
return;
}
// TODO(crbug.com/40641220): the output_size parameter is going away, so this
// validation is not needed. Checking if the size is too small should happen
// at the level of the decoder (since that's the component that's aware of its
// own capabilities).
if (params.output_size.IsEmpty()) {
DLOG(ERROR) << "Output dimensions are too small";
return;
}
// Gain access to the transfer cache through the GpuChannelManager's
// SharedContextState. We will also use that to get a GrContext that will be
// used for Skia operations.
ContextResult context_result;
scoped_refptr<SharedContextState> shared_context_state =
channel_->gpu_channel_manager()->GetSharedContextState(&context_result);
if (context_result != ContextResult::kSuccess) {
DLOG(ERROR) << "Unable to obtain the SharedContextState";
return;
}
DCHECK(shared_context_state);
if (!shared_context_state->gr_context()) {
DLOG(ERROR) << "Could not get the GrContext";
return;
}
if (!shared_context_state->MakeCurrent(nullptr /* surface */)) {
DLOG(ERROR) << "Could not MakeCurrent the shared context";
return;
}
std::vector<sk_sp<SkImage>> plane_sk_images;
std::optional<base::ScopedClosureRunner> notify_gl_state_changed;
#if BUILDFLAG(IS_CHROMEOS)
const size_t num_planes =
completed_decode->handle.native_pixmap_handle().planes.size();
DCHECK_EQ(completed_decode->si_format.NumberOfPlanes(),
static_cast<int>(num_planes));
// We should notify the SharedContextState that we or Skia may have modified
// the driver's GL state. We put this in a ScopedClosureRunner so that if we
// return early, the SharedContextState ends up in a consistent state.
// TODO(blundell): Determine whether this is still necessary after the
// transition to SharedImage.
notify_gl_state_changed.emplace(base::BindOnce(
[](scoped_refptr<SharedContextState> scs) {
scs->set_need_context_state_reset(true);
},
shared_context_state));
plane_sk_images.resize(num_planes);
// Right now, we only support YUV 4:2:0 for the output of the decoder (either
// as YV12 or NV12).
CHECK(completed_decode->si_format == viz::MultiPlaneFormat::kYV12 ||
completed_decode->si_format == viz::MultiPlaneFormat::kNV12);
const gfx::Size shared_image_size = completed_decode->visible_size;
const gpu::Mailbox mailbox = gpu::Mailbox::Generate();
if (!channel_->shared_image_stub()->CreateSharedImage(
mailbox, std::move(completed_decode->handle),
completed_decode->si_format, shared_image_size, gfx::ColorSpace(),
kTopLeft_GrSurfaceOrigin, kOpaque_SkAlphaType,
SHARED_IMAGE_USAGE_RASTER_READ | SHARED_IMAGE_USAGE_OOP_RASTERIZATION,
"ImageDecodeAccelerator")) {
DLOG(ERROR) << "Could not create SharedImage";
return;
}
// Create the SkiaRepresentation::ScopedReadAccess from the SharedImage.
// There is a need to be careful here as the SkiaRepresentation can outlive
// the channel: the representation is effectively owned by the transfer
// cache, which is owned by SharedContextState, which is destroyed by
// GpuChannelManager *after* GpuChannelManager destroys the channels. Hence,
// we cannot supply the channel's SharedImageStub as a MemoryTracker to
// create a SharedImageRepresentationFactory here (the factory creates a
// MemoryTypeTracker instance backed by that MemoryTracker that needs to
// outlive the representation). Instead, we create the Skia representation
// directly using the SharedContextState's MemoryTypeTracker instance.
std::unique_ptr<SkiaImageRepresentation> skia_representation =
channel_->gpu_channel_manager()->shared_image_manager()->ProduceSkia(
mailbox, shared_context_state->memory_type_tracker(),
shared_context_state);
// Note that per the above reasoning, we have to make sure that the factory
// representation doesn't outlive the channel (since it *was* created via
// the channel). We can destroy it now that the Skia representation has been
// created (or if creation failed, we'll early out shortly, but we still need
// to destroy the SharedImage to avoid leaks).
channel_->shared_image_stub()->factory()->DestroySharedImage(mailbox);
if (!skia_representation) {
DLOG(ERROR) << "Could not create a SkiaImageRepresentation";
return;
}
std::vector<GrBackendSemaphore> begin_semaphores;
std::vector<GrBackendSemaphore> end_semaphores;
auto skia_scoped_access = skia_representation->BeginScopedReadAccess(
&begin_semaphores, &end_semaphores);
if (!skia_scoped_access) {
DLOG(ERROR) << "Could not get scoped access to SkiaImageRepresentation";
return;
}
// As this SharedImage has just been created, there should not be any
// semaphores.
DCHECK(begin_semaphores.empty());
DCHECK(end_semaphores.empty());
// Create the SkImage for each plane, handing over lifetime management of the
// skia image representation and scoped access.
CleanUpContext* resource = new CleanUpContext(
channel_->task_runner(), shared_context_state.get(),
std::move(skia_representation), std::move(skia_scoped_access));
const size_t num_planes_expected =
resource->skia_representation_->NumPlanesExpected();
for (size_t plane = 0u; plane < num_planes_expected; plane++) {
plane_sk_images[plane] =
resource->skia_scoped_access_->CreateSkImageForPlane(
base::checked_cast<int>(plane), shared_context_state.get(),
CleanUpResource, resource);
if (!plane_sk_images[plane]) {
DLOG(ERROR) << "Could not create planar SkImage";
return;
}
}
// Insert the cache entry in the transfer cache. Note that this section
// validates several of the IPC parameters: |params.raster_decoder_route_id|,
// |params.transfer_cache_entry_id|, |params.discardable_handle_shm_id|, and
// |params.discardable_handle_shm_offset|.
CommandBufferStub* command_buffer =
channel_->LookupCommandBuffer(params.raster_decoder_route_id);
if (!command_buffer) {
DLOG(ERROR) << "Could not find the command buffer";
return;
}
scoped_refptr<Buffer> handle_buffer =
command_buffer->GetTransferBuffer(params.discardable_handle_shm_id);
if (!DiscardableHandleBase::ValidateParameters(
handle_buffer.get(), params.discardable_handle_shm_offset)) {
DLOG(ERROR) << "Could not validate the discardable handle parameters";
return;
}
DCHECK(command_buffer->decoder_context());
if (command_buffer->decoder_context()->GetRasterDecoderId() < 0) {
DLOG(ERROR) << "Could not get the raster decoder ID";
return;
}
{
auto* gr_shader_cache = channel_->gpu_channel_manager()->gr_shader_cache();
std::optional<raster::GrShaderCache::ScopedCacheUse> cache_use;
if (gr_shader_cache)
cache_use.emplace(gr_shader_cache,
base::strict_cast<int32_t>(channel_->client_id()));
DCHECK(shared_context_state->transfer_cache());
SkYUVAInfo::PlaneConfig plane_config =
completed_decode->si_format == viz::MultiPlaneFormat::kYV12
? SkYUVAInfo::PlaneConfig::kY_V_U
: SkYUVAInfo::PlaneConfig::kY_UV;
// TODO(andrescj): |params.target_color_space| is not needed because Skia
// knows where it's drawing, so it can handle color space conversion without
// us having to specify the target color space. However, we are currently
// assuming that the color space of the image is sRGB. This means we don't
// support images with embedded color profiles. We could rename
// |params.target_color_space| to |params.image_color_space| and we can send
// the embedded color profile from the renderer using that field.
if (!shared_context_state->transfer_cache()
->CreateLockedHardwareDecodedImageEntry(
command_buffer->decoder_context()->GetRasterDecoderId(),
params.transfer_cache_entry_id,
ServiceDiscardableHandle(std::move(handle_buffer),
params.discardable_handle_shm_offset,
params.discardable_handle_shm_id),
shared_context_state->gr_context(), std::move(plane_sk_images),
plane_config, SkYUVAInfo::Subsampling::k420,
completed_decode->yuv_color_space,
completed_decode->buffer_byte_size, params.needs_mips)) {
DLOG(ERROR) << "Could not create and insert the transfer cache entry";
return;
}
}
DCHECK(notify_gl_state_changed);
notify_gl_state_changed->RunAndReset();
#else
// Right now, we only support Chrome OS because we need to use the
// |native_pixmap_handle| member of a GpuMemoryBufferHandle.
NOTIMPLEMENTED()
<< "Image decode acceleration is unsupported for this platform";
#endif
}
void ImageDecodeAcceleratorStub::FinishCompletedDecode() {
DCHECK(main_task_runner_->BelongsToCurrentThread());
lock_.AssertAcquired();
if (pending_completed_decodes_.empty())
scheduler_->DisableSequence(sequence_);
}
void ImageDecodeAcceleratorStub::OnDecodeCompleted(
gfx::Size expected_output_size,
std::unique_ptr<ImageDecodeAcceleratorWorker::DecodeResult> result) {
base::AutoLock lock(lock_);
if (!channel_) {
// The channel is no longer available, so don't do anything.
return;
}
// A sanity check on the output of the decoder.
DCHECK(!result || expected_output_size == result->visible_size);
// The decode is ready to be processed: add it to |pending_completed_decodes_|
// so that ProcessCompletedDecode() can pick it up.
pending_completed_decodes_.push(std::move(result));
// We only need to enable the sequence when the number of pending completed
// decodes is 1. If there are more, the sequence should already be enabled.
if (pending_completed_decodes_.size() == 1u)
scheduler_->EnableSequence(sequence_);
}
void ImageDecodeAcceleratorStub::ScheduleSyncTokenRelease(
const SyncToken& release) {
lock_.AssertAcquired();
scheduler_->ScheduleTask(Scheduler::Task(sequence_,
base::OnceClosure(base::DoNothing()),
/*sync_token_fences=*/{}, release));
}
} // namespace gpu