blob: 5a2928a6784ba02abbf7c89b9514d3ff91a15cce [file] [log] [blame]
// Copyright 2012 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "gpu/command_buffer/service/command_buffer_service.h"
#include <stddef.h>
#include <stdint.h>
#include <limits>
#include <memory>
#include "base/compiler_specific.h"
#include "base/logging.h"
#include "base/memory/page_size.h"
#include "base/trace_event/memory_dump_manager.h"
#include "base/trace_event/memory_dump_provider.h"
#include "base/trace_event/memory_dump_request_args.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "gpu/command_buffer/common/cmd_buffer_common.h"
#include "gpu/command_buffer/common/command_buffer_shared.h"
#include "gpu/command_buffer/service/memory_tracking.h"
#include "gpu/command_buffer/service/transfer_buffer_manager.h"
#include "gpu/config/gpu_finch_features.h"
#if BUILDFLAG(IS_MAC)
#include <mach/mach_vm.h>
#include <mach/vm_purgable.h>
#include <mach/vm_statistics.h>
#include "base/no_destructor.h"
#include "base/process/process_metrics.h"
#include "base/trace_event/process_memory_dump.h"
#endif
namespace gpu {
#if BUILDFLAG(IS_MAC)
namespace {
class AppleGpuMemoryDumpProvider
: public base::trace_event::MemoryDumpProvider {
public:
AppleGpuMemoryDumpProvider();
bool OnMemoryDump(const base::trace_event::MemoryDumpArgs& args,
base::trace_event::ProcessMemoryDump* pmd) override;
private:
// NoDestructor only.
~AppleGpuMemoryDumpProvider() override = default;
};
AppleGpuMemoryDumpProvider::AppleGpuMemoryDumpProvider() {
base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider(
this, "CommandBuffer", nullptr);
}
bool AppleGpuMemoryDumpProvider::OnMemoryDump(
const base::trace_event::MemoryDumpArgs& args,
base::trace_event::ProcessMemoryDump* pmd) {
// Collect IOSurface total memory usage.
size_t surface_virtual_size = 0;
size_t surface_resident_size = 0;
size_t surface_swapped_out_size = 0;
size_t surface_dirty_size = 0;
size_t surface_nonpurgeable_size = 0;
size_t surface_purgeable_size = 0;
// And IOAccelerator. Per vm_statistics.h in XNU, this is used to
// "differentiate memory needed by GPU drivers and frameworks from generic
// IOKit allocations". See xnu-1456.1.26/osfmk/mach/vm_statistics.h.
size_t accelerator_virtual_size = 0;
size_t accelerator_resident_size = 0;
size_t accelerator_swapped_out_size = 0;
size_t accelerator_dirty_size = 0;
size_t accelerator_nonpurgeable_size = 0;
size_t accelerator_purgeable_size = 0;
task_t task = mach_task_self();
mach_vm_address_t address = 0;
mach_vm_size_t size = 0;
while (true) {
address += size;
// GetBasicInfo is faster than querying the extended attributes. Query this
// first to filter out regions that cannot correspond to IOSurfaces.
vm_region_basic_info_64 basic_info;
base::MachVMRegionResult result =
base::GetBasicInfo(task, &size, &address, &basic_info);
if (result == base::MachVMRegionResult::Finished) {
break;
} else if (result == base::MachVMRegionResult::Error) {
return false;
}
// All IOSurfaces and IOAccelerator allocations seen locally (M1 laptop)
// have rw-/rw- permissions. More distinctive characteristics require the
// extended info, which are more expensive to query.
const vm_prot_t rw = VM_PROT_READ | VM_PROT_WRITE;
if (basic_info.protection != rw || basic_info.max_protection != rw)
continue;
// Candidate, need the extended info to get the user tag, but also the page
// status breakdown.
vm_region_extended_info_data_t info;
mach_port_t object_name;
mach_msg_type_number_t count;
count = VM_REGION_EXTENDED_INFO_COUNT;
kern_return_t ret = mach_vm_region(
task, &address, &size, VM_REGION_EXTENDED_INFO,
reinterpret_cast<vm_region_info_t>(&info), &count, &object_name);
// No regions above the requested address.
if (ret == KERN_INVALID_ADDRESS)
break;
if (ret != KERN_SUCCESS)
return false;
if (info.user_tag != VM_MEMORY_IOSURFACE &&
info.user_tag != VM_MEMORY_IOACCELERATOR) {
continue;
}
int purgeable_state = 0;
ret = mach_vm_purgable_control(task, address, VM_PURGABLE_GET_STATE,
&purgeable_state);
purgeable_state = purgeable_state & VM_PURGABLE_STATE_MASK;
switch (info.user_tag) {
case VM_MEMORY_IOSURFACE:
surface_virtual_size += size;
surface_resident_size += info.pages_resident * base::GetPageSize();
surface_swapped_out_size +=
info.pages_swapped_out * base::GetPageSize();
surface_dirty_size += info.pages_dirtied * base::GetPageSize();
if (purgeable_state == VM_PURGABLE_VOLATILE ||
purgeable_state == VM_PURGABLE_EMPTY) {
surface_purgeable_size += size;
} else {
surface_nonpurgeable_size += size;
}
break;
case VM_MEMORY_IOACCELERATOR:
accelerator_virtual_size += size;
accelerator_resident_size += info.pages_resident * base::GetPageSize();
accelerator_swapped_out_size +=
info.pages_swapped_out * base::GetPageSize();
accelerator_dirty_size += info.pages_dirtied * base::GetPageSize();
if (purgeable_state == VM_PURGABLE_VOLATILE ||
purgeable_state == VM_PURGABLE_EMPTY) {
accelerator_purgeable_size += size;
} else {
accelerator_nonpurgeable_size += size;
}
break;
}
}
auto* dump = pmd->CreateAllocatorDump("iosurface");
dump->AddScalar("virtual_size", "bytes", surface_virtual_size);
dump->AddScalar("resident_size", "bytes", surface_resident_size);
dump->AddScalar("swapped_out_size", "bytes", surface_swapped_out_size);
dump->AddScalar("dirty_size", "bytes", surface_dirty_size);
dump->AddScalar("size", "bytes", surface_virtual_size);
// Some IOSurfaces have a non-trivial difference between their mapped size
// and their "dirty" size, possibly because some of it has been marked
// purgeable, and has been purged (rather than swapped out). Report resident
// + swapped, as it is the fraction of memory which is: (a) using actual
// memory, and (b) counted in private memory footprint.
//
// Note: not using "dirty_size", as it doesn't contain the swapped out part.
dump->AddScalar("resident_swapped", "bytes",
surface_resident_size + surface_swapped_out_size);
dump->AddScalar("nonpurgeable_size", "bytes", surface_nonpurgeable_size);
dump->AddScalar("purgeable_size", "bytes", surface_purgeable_size);
// Ditto for IOAccelerator.
dump = pmd->CreateAllocatorDump("ioaccelerator");
dump->AddScalar("virtual_size", "bytes", accelerator_virtual_size);
dump->AddScalar("resident_size", "bytes", accelerator_resident_size);
dump->AddScalar("swapped_out_size", "bytes", accelerator_swapped_out_size);
dump->AddScalar("dirty_size", "bytes", accelerator_dirty_size);
dump->AddScalar("size", "bytes", accelerator_virtual_size);
dump->AddScalar("resident_swapped", "bytes",
accelerator_resident_size + accelerator_swapped_out_size);
dump->AddScalar("nonpurgeable_size", "bytes", accelerator_nonpurgeable_size);
dump->AddScalar("purgeable_size", "bytes", accelerator_purgeable_size);
return true;
}
} // namespace
#endif
// Context switching leads to a render pass break in ANGLE/Vulkan. The command
// buffer has a 20-command limit before it forces a context switch. This
// experiment tests a 100-command limit.
int GetCommandBufferSliceSize() {
static int slice_size =
(base::FeatureList::IsEnabled(features::kIncreasedCmdBufferParseSlice)
? CommandBufferService::kParseCommandsSliceLarge
: CommandBufferService::kParseCommandsSliceSmall);
return slice_size;
}
CommandBufferService::CommandBufferService(
CommandBufferServiceClient* client,
scoped_refptr<MemoryTracker> memory_tracker)
: client_(client),
transfer_buffer_manager_(
std::make_unique<TransferBufferManager>(std::move(memory_tracker))) {
DCHECK(client_);
state_.token = 0;
#if BUILDFLAG(IS_MAC)
static base::NoDestructor<AppleGpuMemoryDumpProvider> dump_provider;
#endif
}
CommandBufferService::~CommandBufferService() = default;
void CommandBufferService::UpdateState() {
++state_.generation;
if (shared_state_)
shared_state_->Write(state_);
}
void CommandBufferService::Flush(int32_t put_offset,
AsyncAPIInterface* handler) {
DCHECK(handler);
if (put_offset < 0 || put_offset >= num_entries_) {
SetParseError(gpu::error::kOutOfBounds);
return;
}
TRACE_EVENT1("gpu", "CommandBufferService:PutChanged", "handler",
std::string(handler->GetLogPrefix()));
put_offset_ = put_offset;
DCHECK(buffer_);
if (state_.error != error::kNoError)
return;
DCHECK(scheduled());
if (paused_) {
paused_ = false;
TRACE_COUNTER_ID1("gpu", "CommandBufferService::Paused", this, paused_);
}
handler->BeginDecoding();
// BeginDecoding can cause context loss due to resuming shared image access.
if (state_.error != error::kNoError) {
handler->EndDecoding();
return;
}
int end = put_offset_ < state_.get_offset ? num_entries_ : put_offset_;
while (put_offset_ != state_.get_offset) {
int num_entries = end - state_.get_offset;
int entries_processed = 0;
error::Error error = handler->DoCommands(
GetCommandBufferSliceSize(), UNSAFE_TODO(buffer_ + state_.get_offset),
num_entries, &entries_processed);
state_.get_offset += entries_processed;
DCHECK_LE(state_.get_offset, num_entries_);
if (state_.get_offset == num_entries_) {
end = put_offset_;
state_.get_offset = 0;
}
if (error::IsError(error)) {
SetParseError(error);
break;
}
if (client_->OnCommandBatchProcessed() ==
CommandBufferServiceClient::kPauseExecution) {
paused_ = true;
TRACE_COUNTER_ID1("gpu", "CommandBufferService::Paused", this, paused_);
break;
}
if (!scheduled())
break;
}
handler->EndDecoding();
}
void CommandBufferService::SetGetBuffer(int32_t transfer_buffer_id) {
DCHECK((put_offset_ == state_.get_offset) ||
(state_.error != error::kNoError));
put_offset_ = 0;
state_.get_offset = 0;
++state_.set_get_buffer_count;
// If the buffer is invalid we handle it gracefully.
// This means `transfer_buffer` can be nullptr.
auto transfer_buffer = GetTransferBuffer(transfer_buffer_id);
if (transfer_buffer) {
uint32_t size = transfer_buffer->size();
volatile void* memory = transfer_buffer->memory();
// check proper alignments.
DCHECK_EQ(
0u, (reinterpret_cast<intptr_t>(memory)) % alignof(CommandBufferEntry));
DCHECK_EQ(0u, size % sizeof(CommandBufferEntry));
num_entries_ = size / sizeof(CommandBufferEntry);
buffer_ = reinterpret_cast<volatile CommandBufferEntry*>(memory);
} else {
num_entries_ = 0;
buffer_ = nullptr;
}
ring_buffer_ = std::move(transfer_buffer);
UpdateState();
}
void CommandBufferService::SetSharedStateBuffer(
std::unique_ptr<BufferBacking> shared_state_buffer) {
shared_state_buffer_ = std::move(shared_state_buffer);
DCHECK(shared_state_buffer_->GetSize() >= sizeof(*shared_state_));
shared_state_ =
static_cast<CommandBufferSharedState*>(shared_state_buffer_->GetMemory());
UpdateState();
}
CommandBuffer::State CommandBufferService::GetState() {
return state_;
}
void CommandBufferService::SetReleaseCount(uint64_t release_count) {
DLOG_IF(ERROR, release_count < state_.release_count)
<< "Non-monotonic SetReleaseCount";
state_.release_count = release_count;
UpdateState();
}
scoped_refptr<Buffer> CommandBufferService::CreateTransferBuffer(
uint32_t size,
int32_t* id,
uint32_t alignment) {
*id = GetNextBufferId();
auto result = CreateTransferBufferWithId(size, *id, alignment);
if (!result) {
*id = -1;
}
return result;
}
void CommandBufferService::DestroyTransferBuffer(int32_t id) {
transfer_buffer_manager_->DestroyTransferBuffer(id);
}
scoped_refptr<Buffer> CommandBufferService::GetTransferBuffer(int32_t id) {
return transfer_buffer_manager_->GetTransferBuffer(id);
}
bool CommandBufferService::RegisterTransferBuffer(
int32_t id,
scoped_refptr<Buffer> buffer) {
return transfer_buffer_manager_->RegisterTransferBuffer(id,
std::move(buffer));
}
scoped_refptr<Buffer> CommandBufferService::CreateTransferBufferWithId(
uint32_t size,
int32_t id,
uint32_t alignment) {
scoped_refptr<Buffer> buffer = MakeMemoryBuffer(size, alignment);
if (!RegisterTransferBuffer(id, buffer)) {
SetParseError(gpu::error::kOutOfBounds);
return nullptr;
}
return buffer;
}
void CommandBufferService::SetToken(int32_t token) {
state_.token = token;
UpdateState();
}
void CommandBufferService::SetParseError(error::Error error) {
if (state_.error == error::kNoError) {
state_.error = error;
client_->OnParseError();
}
}
void CommandBufferService::SetContextLostReason(
error::ContextLostReason reason) {
state_.context_lost_reason = reason;
}
bool CommandBufferService::ShouldYield() {
return client_->OnCommandBatchProcessed() ==
CommandBufferServiceClient::kPauseExecution;
}
void CommandBufferService::SetScheduled(bool scheduled) {
TRACE_EVENT2("gpu", "CommandBufferService:SetScheduled", "this",
static_cast<void*>(this), "scheduled", scheduled);
scheduled_ = scheduled;
}
size_t CommandBufferService::GetSharedMemoryBytesAllocated() const {
return transfer_buffer_manager_->shared_memory_bytes_allocated();
}
} // namespace gpu