blob: 6f3cc365fb3ab315b60a610af831cff8776cebf4 [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "services/webnn/coreml/tensor_impl_coreml.h"
#import <CoreFoundation/CoreFoundation.h>
#import <CoreML/CoreML.h>
#import <CoreVideo/CVPixelBuffer.h>
#import <IOSurface/IOSurfaceRef.h>
#include "base/apple/bridging.h"
#include "base/compiler_specific.h"
#include "base/functional/callback_helpers.h"
#include "base/memory/scoped_refptr.h"
#include "base/types/expected.h"
#include "gpu/command_buffer/service/shared_image/shared_image_representation.h"
#include "mojo/public/cpp/base/big_buffer.h"
#include "services/webnn/coreml/buffer_content_coreml.h"
#include "services/webnn/coreml/context_impl_coreml.h"
#include "services/webnn/coreml/utils_coreml.h"
#include "services/webnn/public/cpp/operand_descriptor.h"
#include "services/webnn/public/cpp/webnn_trace.h"
#include "services/webnn/public/mojom/webnn_tensor.mojom.h"
#include "services/webnn/queueable_resource_state.h"
#include "services/webnn/resource_task.h"
namespace webnn::coreml {
namespace {
MLMultiArrayDataType ToMLMultiArrayDataType(OperandDataType data_type) {
switch (data_type) {
case OperandDataType::kFloat32:
return MLMultiArrayDataTypeFloat32;
case OperandDataType::kFloat16:
return MLMultiArrayDataTypeFloat16;
case OperandDataType::kInt32:
return MLMultiArrayDataTypeInt32;
case OperandDataType::kUint32:
case OperandDataType::kInt64:
case OperandDataType::kUint64:
case OperandDataType::kInt8:
case OperandDataType::kUint8:
case OperandDataType::kInt4:
case OperandDataType::kUint4:
// Unsupported data types for MLMultiArrays in CoreML.
NOTREACHED();
}
}
NSArray<NSNumber*>* ShapeToNSArray(base::span<const uint32_t> shape) {
NSMutableArray<NSNumber*>* ns_shape = [[NSMutableArray alloc] init];
if (shape.empty()) {
// Allocate a one-element array to hold the value of a scalar tensor.
[ns_shape addObject:[[NSNumber alloc] initWithUnsignedLong:1ul]];
} else {
for (uint32_t dimension : shape) {
[ns_shape addObject:[[NSNumber alloc] initWithUnsignedLong:dimension]];
}
}
return ns_shape;
}
// Creates an MLMultiArray given a data type and shape. See documentation here:
// https://developer.apple.com/documentation/coreml/mlmultiarray/init(shape:datatype:)
API_AVAILABLE(macos(12.3))
MLMultiArray* CreateMultiArrayFromDescriptor(OperandDescriptor descriptor) {
NSArray<NSNumber*>* shape = ShapeToNSArray(descriptor.shape());
NSError* error = nil;
MLMultiArray* multi_array = [[MLMultiArray alloc]
initWithShape:shape
dataType:ToMLMultiArrayDataType(descriptor.data_type())
error:&error];
if (error) {
LOG(ERROR) << "[WebNN] Failed to allocate tensor: " << error;
return nil;
}
// `MLMultiArray` doesn't initialize its contents.
__block bool block_executing_synchronously = true;
[multi_array getMutableBytesWithHandler:^(void* mutable_bytes, NSInteger size,
NSArray<NSNumber*>* strides) {
// TODO(crbug.com/333392274): Refactor this method to assume the handler may
// be invoked on some other thread. We should not assume that the block
// will always run synchronously.
CHECK(block_executing_synchronously);
// TODO(crbug.com/333392274): Use the `WriteToMLMultiArray()` function
// which handles non-contiguous buffers.
UNSAFE_TODO(memset(mutable_bytes, 0, size));
}];
block_executing_synchronously = false;
return multi_array;
}
// Creates an MLMultiArray by wrapping an IOSurface wrapped by a CVPixelBuffer.
// This is only supported for float16 tensors. See the documentation here:
// https://developer.apple.com/documentation/coreml/mlmultiarray/init(pixelbuffer:shape:)
API_AVAILABLE(macos(12.0))
MLMultiArray* CreateMultiArrayBackedByIOSurface(OperandDescriptor descriptor) {
CHECK_EQ(descriptor.data_type(), OperandDataType::kFloat16);
// The pixel buffer's width must match the last dimension of the tensor.
NSArray<NSNumber*>* shape = ShapeToNSArray(descriptor.shape());
NSNumber* width = shape.lastObject;
NSNumber* height =
@(descriptor.NumberOfElements() / static_cast<size_t>(width.intValue));
NSDictionary* iosurface_properties = @{
(NSString*)kIOSurfaceWidth : width,
(NSString*)kIOSurfaceHeight : height,
(NSString*)kIOSurfaceBytesPerElement : @(2),
// This is the only supported data type for importing an MLMultiArray from a
// CVPixelBuffer.
(NSString*)kIOSurfacePixelFormat : @(kCVPixelFormatType_OneComponent16Half),
};
IOSurfaceRef surface =
IOSurfaceCreate(base::apple::NSToCFPtrCast(iosurface_properties));
CVPixelBufferRef pixel_buffer = nil;
CVReturn pixel_buffer_result = CVPixelBufferCreateWithIOSurface(
kCFAllocatorDefault, surface,
/*pixelBufferAttributes=*/nil, &pixel_buffer);
if (pixel_buffer_result != kCVReturnSuccess) {
LOG(ERROR) << "[WebNN] Failed to allocate tensor: " << pixel_buffer_result;
return nil;
}
return [[MLMultiArray alloc] initWithPixelBuffer:pixel_buffer shape:shape];
}
} // namespace
// static
base::expected<scoped_refptr<WebNNTensorImpl>, mojom::ErrorPtr>
TensorImplCoreml::Create(
mojo::PendingAssociatedReceiver<mojom::WebNNTensor> receiver,
base::WeakPtr<WebNNContextImpl> context,
mojom::TensorInfoPtr tensor_info) {
// TODO(crbug.com/329482489): Move this check to the renderer and throw a
// TypeError.
if (tensor_info->descriptor.Rank() > 5) {
LOG(ERROR) << "[WebNN] Tensor rank is too large.";
return base::unexpected(mojom::Error::New(
mojom::Error::Code::kNotSupportedError, "Tensor rank is too large."));
}
CHECK(base::IsValueInRangeForNumericType<int>(
tensor_info->descriptor.PackedByteLength()));
MLMultiArray* multi_array = nil;
if (tensor_info->descriptor.data_type() == OperandDataType::kFloat16) {
// TODO(https://crbug.com/333392274): Consider not using IOSurface when
// WebGPU interop is not requested.
multi_array = CreateMultiArrayBackedByIOSurface(tensor_info->descriptor);
} else if (tensor_info->usage.Has(MLTensorUsageFlags::kWebGpuInterop)) {
// TODO(https://crbug.com/333392274): Support WebGPU interop with more
// than just float16 tensors.
return base::unexpected(
mojom::Error::New(mojom::Error::Code::kUnknownError,
"Interoperability with WebGPU is only supported "
"when using float16 tensors."));
} else {
multi_array = CreateMultiArrayFromDescriptor(tensor_info->descriptor);
}
if (!multi_array) {
return base::unexpected(mojom::Error::New(mojom::Error::Code::kUnknownError,
"Failed to allocate tensor."));
}
auto buffer_content = std::make_unique<BufferContent>(std::move(multi_array));
auto buffer_state =
base::MakeRefCounted<QueueableResourceState<BufferContent>>(
std::move(buffer_content));
return base::MakeRefCounted<TensorImplCoreml>(
std::move(receiver), std::move(context), std::move(tensor_info),
std::move(buffer_state), /*representation=*/nullptr,
/*representation_access=*/nullptr, base::PassKey<TensorImplCoreml>());
}
// static
base::expected<scoped_refptr<WebNNTensorImpl>, mojom::ErrorPtr>
TensorImplCoreml::Create(
mojo::PendingAssociatedReceiver<mojom::WebNNTensor> receiver,
base::WeakPtr<WebNNContextImpl> context,
mojom::TensorInfoPtr tensor_info,
std::unique_ptr<gpu::WebNNTensorRepresentation> representation) {
auto representation_access = representation->BeginScopedAccess();
if (!representation_access) {
return base::unexpected(
mojom::Error::New(mojom::Error::Code::kUnknownError,
"Failed to begin access to tensor."));
}
if (tensor_info->descriptor.data_type() != OperandDataType::kFloat16) {
return base::unexpected(
mojom::Error::New(mojom::Error::Code::kUnknownError,
"Unsupported data type for WebGPU interop."));
}
IOSurfaceRef io_surface = representation->GetIOSurface();
if (IOSurfaceGetBytesPerElement(io_surface) != 2) {
return base::unexpected(
mojom::Error::New(mojom::Error::Code::kUnknownError,
"Invalid IOSurface: bytes per element is not 2."));
}
if (IOSurfaceGetPixelFormat(io_surface) !=
kCVPixelFormatType_OneComponent16Half) {
return base::unexpected(mojom::Error::New(
mojom::Error::Code::kUnknownError,
"Invalid IOSurface: pixel format is not OneComponent16Half."));
}
size_t height = 1ul;
const std::vector<uint32_t>& shape = tensor_info->descriptor.shape();
if (!shape.empty()) {
height = shape.back();
}
size_t width = tensor_info->descriptor.NumberOfElements() / height;
if (height != IOSurfaceGetHeight(io_surface) ||
width != IOSurfaceGetWidth(io_surface)) {
return base::unexpected(mojom::Error::New(
mojom::Error::Code::kUnknownError,
"Invalid IOSurface: width and height doesn't match with tensor."));
}
CVPixelBufferRef pixel_buffer = nullptr;
CVReturn pixel_buffer_result = CVPixelBufferCreateWithIOSurface(
kCFAllocatorDefault, io_surface,
/*pixelBufferAttributes=*/nil, &pixel_buffer);
if (pixel_buffer_result != kCVReturnSuccess) {
LOG(ERROR) << "[WebNN] Failed to create pixel buffer from IOSurface: "
<< pixel_buffer_result;
return base::unexpected(
mojom::Error::New(mojom::Error::Code::kUnknownError,
"Failed to create pixel buffer from IOSurface."));
}
MLMultiArray* multi_array =
[[MLMultiArray alloc] initWithPixelBuffer:pixel_buffer
shape:ShapeToNSArray(shape)];
if (!multi_array) {
return base::unexpected(mojom::Error::New(mojom::Error::Code::kUnknownError,
"Failed to allocate tensor."));
}
auto buffer_content = std::make_unique<BufferContent>(std::move(multi_array));
auto buffer_state =
base::MakeRefCounted<QueueableResourceState<BufferContent>>(
std::move(buffer_content));
return base::MakeRefCounted<TensorImplCoreml>(
std::move(receiver), std::move(context), std::move(tensor_info),
std::move(buffer_state), std::move(representation),
std::move(representation_access), base::PassKey<TensorImplCoreml>());
}
TensorImplCoreml::TensorImplCoreml(
mojo::PendingAssociatedReceiver<mojom::WebNNTensor> receiver,
base::WeakPtr<WebNNContextImpl> context,
mojom::TensorInfoPtr tensor_info,
scoped_refptr<QueueableResourceState<BufferContent>> buffer_state,
std::unique_ptr<gpu::WebNNTensorRepresentation> representation,
std::unique_ptr<gpu::WebNNTensorRepresentation::ScopedAccess>
representation_access,
base::PassKey<TensorImplCoreml> /*pass_key*/)
: WebNNTensorImpl(std::move(receiver),
std::move(context),
std::move(tensor_info),
std::move(representation)),
buffer_state_(std::move(buffer_state)) {
representation_access_ = std::move(representation_access);
}
TensorImplCoreml::~TensorImplCoreml() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
}
void TensorImplCoreml::ReadTensorImpl(
mojom::WebNNTensor::ReadTensorCallback callback) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
ScopedTrace scoped_trace("TensorImplCoreml::ReadTensorImpl");
// Lock the buffer contents as shared/read-only.
std::vector<scoped_refptr<QueueableResourceStateBase>> shared_resources = {
buffer_state_};
scoped_trace.AddStep("Wait for tensor");
auto task = base::MakeRefCounted<ResourceTask>(
std::move(shared_resources),
/*exclusive_resources=*/
std::vector<scoped_refptr<QueueableResourceStateBase>>(),
base::BindOnce(
[](scoped_refptr<QueueableResourceState<BufferContent>> buffer_state,
ReadTensorCallback read_tensor_result_callback,
ScopedTrace scoped_trace, base::OnceClosure completion_closure) {
scoped_trace.AddStep("Begin read");
// Read from the underlying buffer contents, which are kept alive
// until `completion_closure` is run.
buffer_state->GetSharedLockedResource().Read(base::BindOnce(
[](base::OnceClosure completion_closure,
ReadTensorCallback read_tensor_result_callback,
ScopedTrace scoped_trace,
mojo_base::BigBuffer output_buffer) {
scoped_trace.AddStep("End read");
// Unlock the buffer contents.
std::move(completion_closure).Run();
std::move(read_tensor_result_callback)
.Run(mojom::ReadTensorResult::NewBuffer(
std::move(output_buffer)));
},
std::move(completion_closure),
std::move(read_tensor_result_callback),
std::move(scoped_trace)));
},
buffer_state_, std::move(callback), std::move(scoped_trace)));
task->Enqueue();
}
void TensorImplCoreml::WriteTensorImpl(mojo_base::BigBuffer src_buffer) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
ScopedTrace scoped_trace("TensorImplCoreml::WriteTensorImpl");
// Take an exclusive lock to the buffer contents while writing.
std::vector<scoped_refptr<QueueableResourceStateBase>> exclusive_resources = {
buffer_state_};
scoped_trace.AddStep("Wait for tensor");
auto task = base::MakeRefCounted<ResourceTask>(
/*shared_resources=*/
std::vector<scoped_refptr<QueueableResourceStateBase>>(),
std::move(exclusive_resources),
base::BindOnce(
[](scoped_refptr<QueueableResourceState<BufferContent>> buffer_state,
mojo_base::BigBuffer src_buffer, ScopedTrace scoped_trace,
base::OnceClosure completion_closure) {
scoped_trace.AddStep("Begin write");
// Write to the underlying buffer contents, which are kept alive
// until `completion_closure` is run.
buffer_state->GetExclusivelyLockedResource()->Write(
src_buffer,
base::BindOnce(
[](base::OnceClosure completion_closure,
ScopedTrace scoped_trace) {
scoped_trace.AddStep("End write");
std::move(completion_closure).Run();
},
std::move(completion_closure), std::move(scoped_trace)));
},
buffer_state_, std::move(src_buffer), std::move(scoped_trace)));
task->Enqueue();
}
const scoped_refptr<QueueableResourceState<BufferContent>>&
TensorImplCoreml::GetBufferState() const {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
return buffer_state_;
}
} // namespace webnn::coreml