blob: ce8c5520a23e912702f0ae088693e2a3fa6d1155 [file] [log] [blame]
// Copyright 2023 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module webnn.mojom;
import "mojo/public/mojom/base/big_buffer.mojom";
import "services/webnn/public/mojom/webnn_error.mojom";
// Represents the `MLOperand` which describes not only input and constant
// operand, but also the output operand of operator.
struct Operand {
// Represents the `MLOperandType` in the WebIDL definition.
enum DataType {
kFloat32,
kFloat16,
kInt32,
kUint32,
kInt64,
kUint64,
kInt8,
kUint8,
};
enum Kind {
kInput,
kConstant,
kOutput,
};
Kind kind;
// The data type of the operand.
DataType data_type;
// The dimensions of the operand.
array<uint32> dimensions;
// The name field is only required for input/output operands of graph.
string? name;
};
// Computes the indices of the max or min elements of the input tensor's
// element along the provided axis.
struct ArgMinMax {
enum Kind {
kMin,
kMax,
};
// The kind of operation.
Kind kind;
// The input operand referenced by the input id must be distinct from the
// output operand referenced by the output id and the output sizes must be
// the same as the input sizes, except for the reduced axis.
uint64 input_operand_id;
uint64 output_operand_id;
// Axes indicates the dimensions to reduce. The values in the sequence must
// be in the range [0, N-1] where N is the rank of the input tensor.
array<uint32> axes;
// True is to retain reduced dimensions with size 1.
bool keep_dimensions = false;
// True is to select the last index rather than the first find along axis.
bool select_last_index = false;
};
// Represents normalizing the tensor values of input features across
// the batch dimension.
//
// This operator performs the following batch normalization, defined as:
// Output = Scale * ((Input - Mean) / sqrt(Variance + Epsilon)) + Bias
//
// If Activation is provided, the output will be further processed:
// Output = Activation(Output)
struct BatchNormalization {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of mean operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 mean_operand_id;
// The id of variance operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 variance_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The optional 1-D tensor of the bias values whose size is equal to the
// size of the input dimension denoted by axis.
uint64? scale_operand_id;
// The optional 1-D tensor of the scaling values whose is equal to the size
// of the input dimension denoted by axis.
uint64? bias_operand_id;
// A scalar which specifies the index to the feature count dimension of the
// input shape for which the mean and variance values are.
uint32 axis = 1;
// A float scalar which specifies a small value to prevent computational
// error due to divide-by-zero.
float epsilon = 1e-5;
// The optional activation function that immediately follows the
// batchNormalization.
Activation? activation;
};
// Clamp the input tensor element-wise within a range specified by the minimum
// and maximum values.
struct Clamp {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The minimum value of the range.
float min_value;
// The maximum value of the range.
float max_value;
};
// Represents the concat operation that concatenates the input tensors along
// the given axis.
struct Concat {
// The ids of input operand are used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
array<uint64> input_operand_ids;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The axis used to concatenate along.
uint32 axis;
};
// Represents the `MLInputOperandLayout` that specifies the layout format of
// the input tensor. `kChannelsFirst` means `nchw` (batches, channels, height,
// width), `kChannelsLast` means `nhwc` (batches, height, width, channels).
// The type is used to get the spatial dimension from input tensor, thus safe to
// represent as enum.
enum InputOperandLayout {
kChannelsFirst,
kChannelsLast,
};
// A size has height and width values.
struct Size2d {
uint32 height;
uint32 width;
};
// The additional rows and columns added to the beginning and ending of each
// spatial dimension of input.
struct Padding2d {
// The height and width padding at the beginning of input tensor.
Size2d beginning;
// The height and width padding at the ending of input tensor.
Size2d ending;
};
// Represents a 2-D convolution given the input and filter tensors.
//
// * conv2d (Kind == kDirect): `nchw` input layout only supports `oihw` (
// output_channels, input_channels/groups, height, width) filter layout,
// `nhwc` input layout only supports `ohwi` for regular conv2d and `ihwo` (1,
// height, width, options.groups) filter layout for depthwise conv2d that is
// `options.groups = inputChannels = outputChannels`.
// * convTranspose2d (Kind == kTransposed) only supports `iohw` (input_channels,
// output_channels/groups, height, width) filter layout
// Support for other layouts are being discussed in the working group
// https://github.com/webmachinelearning/webnn/issues/324.
struct Conv2d {
enum Kind {
// Represents a 2-D convolution given 4-D input and filter tensors.
kDirect,
// Represents a 2-D transposed convolution given 4-D input and filter
// tensors.
kTransposed,
};
// The kind of 2-D convolution.
Kind kind;
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of filter operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 filter_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The padding for each spatial dimension of input.
Padding2d padding;
// The stride of the sliding window for each spatial dimension of input.
Size2d strides;
// The dilation factor for each spatial dimension of input.
Size2d dilations;
// The number of groups that input channels and output channels are divided
// into.
uint32 groups = 1;
// The layout format of the input.
InputOperandLayout input_layout;
// The additional 1-D tensor with the shape of output channels whose values
// are added to the convolution result.
uint64? bias_operand_id;
// The optional activation function that immediately follows the convolution.
Activation? activation;
};
// Represents an element-wise binary operation, mathematically equivalent to:
// <output_operand> = <lhs_operand> <operation_kind> <rhs_operand>;
// The shapes of left-hand side (lhs) operand and right-hand side (rhs) operand
// must be compatible according to numpy-broadcasting-rule:
// https://www.w3.org/TR/webnn/#biblio-numpy-broadcasting-rule
struct ElementWiseBinary {
enum Kind {
kAdd,
kSub,
kMul,
kDiv,
kMax,
kMin,
kPow,
kEqual,
kGreater,
kGreaterOrEqual,
kLesser,
kLesserOrEqual,
};
// The kind of binary operation.
Kind kind;
// The id of lhs operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 lhs_operand_id;
// The id of rhs operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 rhs_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Represents an element-wise unary operation, mathematically equivalent to:
// <output_operand> = <operation_kind>(<input_operand>);
struct ElementWiseUnary {
enum Kind {
kAbs,
kCeil,
kCos,
kExp,
kFloor,
kLog,
kNeg,
kSin,
kTan,
kLogicalNot,
kIdentity,
kSqrt,
kErf,
kReciprocal,
kCast,
};
// The kind of unary operation.
Kind kind;
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Expands the shape of input operand to the output shape according to
// numpy-broadcasting-rule:
// https://www.w3.org/TR/webnn/#biblio-numpy-broadcasting-rule
struct Expand {
// The input operand referenced by the input id must be distinct from the
// output operand referenced by the output id, and the input shape must be
// broadcastable to the output shape.
uint64 input_operand_id;
uint64 output_operand_id;
};
// Specifies the different ways to pad a tensor. The padding value is only
// specified when the mode is "constant".
struct ConstantPadding {
float value = 0;
};
struct EdgePadding {};
struct ReflectionPadding {};
struct SymmetricPadding {};
union PaddingMode {
ConstantPadding constant;
EdgePadding edge;
ReflectionPadding reflection;
SymmetricPadding symmetric;
};
// This operator performs the following normalization, defined as:
// Output = scale * (input - mean) / sqrt(variance + epsilon) + bias, where
// mean and variance are computed per instance per channel. The specified
// layout determines how to choose the channel.
struct InstanceNormalization {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
// The optional 1-D tensor of the scale values whose size is equal to the
// size of the feature dimension of the input.
uint64? scale_operand_id;
// The optional 1-D tensor of the bias values whose size is equal to the
// size of the feature dimension of the input.
uint64? bias_operand_id;
// A float scalar which specifies a small value to prevent computational
// error due to divide-by-zero.
float epsilon = 1e-5;
// The layout format of the input.
InputOperandLayout layout;
};
// Represents matmul operation which compute the matrix product of two input
// tensors.
struct Matmul {
// The id of `a` operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 a_operand_id;
// The id of `b` operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 b_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Represents a pad operation which inflates the input tensor with constant or
// mirrored values on the edges.
struct Pad {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The number of padding values to add at the beginning of each input
// dimension. The array length should be equal to the rank of input tensor.
array<uint32> beginning_padding;
// The number of padding values to add at the ending of each input
// dimension. The array length should be equal to the rank of input tensor.
array<uint32> ending_padding;
PaddingMode mode;
};
struct Reduce {
enum Kind {
kL1,
kL2,
kLogSum,
kLogSumExp,
kMax,
kMean,
kMin,
kProduct,
kSum,
kSumSquare,
};
// The kind of reduce operation.
Kind kind;
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// Axes indicates the dimensions to reduce. The values in the sequence
// must be in the range [0, N-1] where N is the rank of the input tensor.
// The length of the sequence should be less than or equal to N.
array<uint32> axes;
// If true, retains reduced dimensions with size 1.
bool keep_dimensions = false;
};
// Represents an average or max pooling operation across all the elements with
// moving window over the input tensor.
// This struct also contains the attributes of pool2d operator, but the
// `roundingType` and `outputSizes` array in MLPool2dOptions are not included
// because they are used to calculate the output dimensions of pool2d in blink
// side.
struct Pool2d {
enum Kind {
kAveragePool2d,
kL2Pool2d,
kMaxPool2d,
};
Kind kind;
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The dimensions of the sliding window which is supplied by user or the
// element of input operand height and width.
Size2d window_dimensions;
// The padding for each spatial dimension of input.
Padding2d padding;
// The element stride of the sliding window for each spatial dimension of
// input.
Size2d strides;
// The dilation factor for each spatial dimension of input.
Size2d dilations;
// The layout format of the input.
InputOperandLayout layout;
};
struct StartAndSize {
uint32 start;
uint32 size;
};
struct Slice {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// An array containing the number of elements of the input window in each
// dimension.
array<StartAndSize> starts_and_sizes;
};
// Represents an operation of exponential linear unit function in the
// expression max(0, x) + alpha * (exp(min(0, x)) - 1).
struct Elu {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// A float scalar multiplier for (exp(min(0, x)) - 1).
float alpha = 1.0;
};
// Represents an operation of gathering elements from the axis dimension of
// the input tensor indexed by the indices tensor.
//
// Specifically, Gather operation returns a new `MLOperand` by replacing the
// axis dimension of input with the lookup result of `input[...indices...]` as
// the equation below:
//
// output[..., dIndex0, ..., dIndexN, ...] = input[..., indices[], ...]
// ^ ^ Look up the value
// | of `indices` in input
// This is dAxis, indicated by `axis` parameter.
// This dimension expands to `indices.dimensions`
//
// Example 1, given an input = [[ 0, 1, 2],
// [10, 11, 12]] with shape (2, 3),
// an indices = [1, 0, 1] with shape (3),
// and axis = 0,
// the output should be [[10, 11, 12],
// [ 0, 1, 2],
// [10, 11, 12]] with shape (3, 3).
//
// Example 2, given an input = [[ 0, 1, 2],
// [10, 11, 12]] with shape (2, 3),
// an indices = [[0, 1],
// [2, 1]] with shape (2, 2),
// and axis = 1,
// the output should be [[[ 0, 1],
// [ 2, 1]],
// [[10, 11],
// [12, 11]]] with shape (2, 2, 2).
//
// Note the values in `indices` are computed at runtime, so they can exceed
// the boundary of the `axis` dimension of input. If unchecked, such indices
// will cause out-of-bounds access.
//
// Therefore, the implementation must "sanitize" `indices` before using it to
// perform memory addressing operations on `input`.
//
// For some backends like DirectML, the native ML APIs have already done the
// "sanitization" so `indices` can be passed directly to the API. For other
// backends, the specific "sanitization" relies on the browser implementation.
//
// Typically, the implementation clamps the values in `indices` to be in range
// of `-N` (inclusive) to `N` (exclusive), where `N = input.dimensions[axis]`,
// and negative index means indexing from the end of the `axis` dimension.
//
// Need to finalize out-of-bounds indices sanitization behavior, tracked by spec
// issue: https://github.com/webmachinelearning/webnn/issues/486
//
// TODO(crbug.com/329134204): Add conformance test cases for gather's
// out-of-bounds access.
struct Gather {
// The input operand (referenced by input_operand_id) and the indices operand
// (referenced by indices_operand_id) must be distinct from the output
// operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 indices_operand_id;
uint64 output_operand_id;
// The axis dimension of the input tensor to gather on.
uint32 axis = 0;
};
// Corresponds to `MLOperand gelu(MLOperand input)` that computes the gelu
// function of the input tensor following the expression
// 0.5 * x * (1 + erf(x / sqrt(2))).
struct Gelu {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
};
// Represents the processing direction of the input sequence for the recurrent
// network.
enum RecurrentNetworkDirection {
kForward,
kBackward,
kBoth,
};
// The ordering of the weight for the update (z), reset (r), new (n) gates.
enum GruWeightLayout {
kZrn,
kRzn,
};
// Represents the Gated Recurrent Unit [GRU] recurrent network.
struct Gru {
// The id of the input operand.
uint64 input_operand_id;
// The id of the weight operand.
uint64 weight_operand_id;
// The id of the recurrent weight operand.
uint64 recurrent_weight_operand_id;
// The number of steps in the recurrent network.
uint32 steps;
// The number of features in the hidden state.
uint32 hidden_size;
// The ids of the output operands must be distinct from other operand ids.
array<uint64> output_operand_ids;
// The id of the bias operand.
uint64? bias_operand_id;
// The id of the recurrent bias operand.
uint64? recurrent_bias_operand_id;
// The id of the initial hidden state operand.
uint64? initial_hidden_state_operand_id;
// Indicates whether to apply the reset gate after or before matrix
// multiplication.
bool reset_after;
// Indicates whether to also return the entire sequence with every output from
// each time step.
bool return_sequence;
// The processing direction of the input sequence.
RecurrentNetworkDirection direction;
// The ordering of the weight and bias vectors for the internal gates of GRU.
GruWeightLayout layout;
// Specifies a pair of activation functions for the update, reset and new
// gates.
array<Activation, 2> activations;
};
// Represents a single step of Gated Recurrent Unit [GRU] recurrent network.
struct GruCell {
// The id of the input operand.
uint64 input_operand_id;
// The id of the weight operand.
uint64 weight_operand_id;
// The id of the recurrent weight operand.
uint64 recurrent_weight_operand_id;
// The id of the hidden state operand.
uint64 hidden_state_operand_id;
// The number of features in the hidden state.
uint32 hidden_size;
// The id of the output operand must be distinct from other operand ids.
uint64 output_operand_id;
// The id of the bias operand.
uint64? bias_operand_id;
// The id of the recurrent bias operand.
uint64? recurrent_bias_operand_id;
// Indicates whether to apply the reset gate after or before matrix
// multiplication.
bool reset_after;
// The ordering of the weight and bias vectors for the internal gates of GRU.
GruWeightLayout layout;
// Specifies a pair of activation functions for the update, reset and new
// gates.
array<Activation, 2> activations;
};
// Represents general matrix multiplication (gemm) operation in the expression
// `alpha * A * B + beta * C`.
struct Gemm {
// The id of `A` operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 a_operand_id;
// The id of `B` operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 b_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// The id of optional `C` operand is used to get the `Operand` description
// from `GraphInfo.id_to_operand_map`.
uint64? c_operand_id;
// A float scalar multiplier for the `A * B`.
float alpha = 1.0;
// A float scalar multiplier for the third tensor.
float beta = 1.0;
// True is to transpose the first tensor before matrix multiplication.
bool a_transpose = false;
// True is to transpose the second tensor before matrix multiplication.
bool b_transpose = false;
};
// Represents an operation which performs a hard sigmoid function on every
// element in InputTensor, placing the result into the corresponding
// element of OutputTensor:
// Output = max(0, min(alpha * input + beta, 1))
struct HardSigmoid {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
// A float scalar multiplier. The default value is 0.2.
float alpha = 0.2;
// A float scalar addition. The default value is 0.5.
float beta = 0.5;
};
// Represents an operation which computes the nonlinear function
// output = input * max(0, min(6, (input + 3))) / 6
struct HardSwish {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
};
// Represents an operation of normalizing the input across the feature
// dimensions for each individual sample in a batch, where the feature
// dimensions are the input dimensions indexed by `axes`.
//
// The normalization follows the expression below:
// Scale * ((Input - Mean) / sqrt(Variance + Epsilon)) + Bias
// where the mean and variance values are computed on the fly across the
// feature dimensions.
struct LayerNormalization {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
// The scale operand (referenced by scale_operand_id) includes the scaling
// values.
uint64? scale_operand_id;
// The bias operand (referenced by bias_operand_id) includes the bias values.
uint64? bias_operand_id;
// The indices to the input dimensions to reduce along.
array<uint32> axes;
// A float scalar which specifies a small value to prevent computational
// error due to divide-by-zero.
float epsilon = 1e-5;
};
// Represents a leaky version of relu operation whose calculation follows the
// expression max(0, x) + alpha ∗ min(0, x).
struct LeakyRelu {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// A float scalar multiplier for min(0, x).
float alpha = 0.01;
};
// Represents a linear operation whose calculation follows the
// expression alpha * x + beta.
struct Linear {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
// A float scalar multiplier.
float alpha = 1.0;
// A float scalar addition.
float beta = 0;
};
// The ordering of the weight for the input (i), output (o), forget (f) and
// cell (g) gates.
enum LstmWeightLayout {
kIofg,
kIfgo,
};
// Represents an operation of Long Short-Term Memory (LSTM) recurrent network.
struct Lstm {
// The id of the input operand.
uint64 input_operand_id;
// The id of the weight operand.
uint64 weight_operand_id;
// The id of the recurrent weight operand.
uint64 recurrent_weight_operand_id;
// The ids of the output operands must be distinct from other operand ids.
array<uint64> output_operand_ids;
// The number of steps in the recurrent network.
uint32 steps;
// The number of features in the hidden state.
uint32 hidden_size;
// The id of the bias operand.
uint64? bias_operand_id;
// The id of the recurrent bias operand.
uint64? recurrent_bias_operand_id;
// The id of the peephole weight operand.
uint64? peephole_weight_operand_id;
// The id of the initial hidden state operand.
uint64? initial_hidden_state_operand_id;
// The id of the initial cell state operand.
uint64? initial_cell_state_operand_id;
// Indicates whether to return the entire sequence of outputs from each time
// step.
bool return_sequence;
// The processing direction of the input sequence.
RecurrentNetworkDirection direction;
// The ordering of both weight and bias vectors.
LstmWeightLayout layout;
// The activation functions for the input, output and forget gates.
array<Activation, 3> activations;
};
// Represents a single step of Long Short-Term Memory (LSTM) recurrent network.
struct LstmCell {
// The id of the input operand.
uint64 input_operand_id;
// The id of the weight operand.
uint64 weight_operand_id;
// The id of the recurrent weight operand.
uint64 recurrent_weight_operand_id;
// The id of the hidden state operand.
uint64 hidden_state_operand_id;
// The id of the cell state operand.
uint64 cell_state_operand_id;
// The ids of the output operands must be distinct from other operand ids.
array<uint64, 2> output_operand_ids;
// The number of features in the hidden state.
uint32 hidden_size;
// The id of the bias operand.
uint64? bias_operand_id;
// The id of the recurrent bias operand.
uint64? recurrent_bias_operand_id;
// The id of the peephole weight operand.
uint64? peephole_weight_operand_id;
// The ordering of both weight and bias vectors.
LstmWeightLayout layout;
// The activation functions for the input, output and forget gates.
array<Activation, 3> activations;
};
// Represents a parametric relu operation whose calculation follows the
// expression max(0, x) + slope ∗ min(0, x).
struct Prelu {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of slope operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 slope_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Corresponds to `MLOperand relu(MLOperand x)` that compute the rectified
// linear function of the input tensor.
struct Relu {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Alters the shape of input operand to the output operand. This operation does
// not copy or change the content of the input, it just changes the tensor’s
// logical dimensions.
struct Reshape {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Corresponds to `MLOperand sigmoid(MLOperand x)` that compute the sigmoid
// function of the input tensor following the expression 1 / (exp(-x) + 1).
struct Sigmoid {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Corresponds to `MLOperand softmax(MLOperand x)` that compute the softmax
// values of the 2-D input tensor along axis 1.
struct Softmax {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Corresponds to `MLOperand softplus(MLOperand x)` that computes the softplus
// function of the input tensor following the expression ln(1 + exp(x)).
struct Softplus {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
};
// Corresponds to `MLOperand softsign(MLOperand x)` that computes the softsign
// function of the input tensor following the expression x / (1 + |x|).
struct Softsign {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
};
// Represents a split operation that splits an input tensor into multiple
// output tensors. The shape of the outputs and the specified axis determine
// how the split will be performed. Since axis specifies which input dimension
// will be split, the sum of all output dimension sizes along the axis
// dimension must be equal to the input tensor’s axis dimension.
// Example:
// input = [1, 2, 3, (4)]
// axis = 3
// output[0] = [1, 2, 3, (1)]
// output[1] = [1, 2, 3, (2)]
// output[2] = [1, 2, 3, (1)]
struct Split {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The ids of output operands used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
array<uint64> output_operand_ids;
// Axis specifies which input tensor dimension will be split.
uint32 axis = 0;
};
// Corresponds to `MLOperand tanh(MLOperand x)` that compute the hyperbolic
//tangent function of the input tensor following the expression
// (exp(2 * x) - 1) / (exp(2 * x) + 1).
struct Tanh {
// The id of input operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 input_operand_id;
// The id of output operand is used to get the `Operand` description from
// `GraphInfo.id_to_operand_map`.
uint64 output_operand_id;
};
// Represents the transpose operation that permutes the dimensions of the
// input tensor following the given permutation.
struct Transpose {
// The id of input operand.
uint64 input_operand_id;
// The id of output operand.
uint64 output_operand_id;
// The values used to permute the dimensions of the input tensor.
array<uint32> permutation;
};
// Given a 2-D tensor (matrix), triangular will return a 2-D tensor
// containing either the upper or lower triangular part of the input tensor.
// If the input tensor has greater than 2 dimensions it is treated as a
// batch of matrices which represented by last two dimensions and the result
// has the same shape.
struct Triangular {
// The input operand (referenced by input_operand_id) must be distinct from
// the output operand (referenced by output_operand_id).
uint64 input_operand_id;
uint64 output_operand_id;
// The upper indicates whether the output the upper or the lower part of
// the input matrix is retained. True indicates that the upper part is
// retained.
bool upper;
// The diagonal specifies how many diagonals above or below the main
// diagonals of the input matrix are retained or excluded. A value of 0
// means no diagonals other than the main diagonals are affected.
int32 diagonal;
};
// Resample the tensor values from the source to the destination spatial
// dimensions.
struct Resample2d {
// The id of input operand.
uint64 input_operand_id;
// The id of output operand.
uint64 output_operand_id;
enum InterpolationMode {
kNearestNeighbor,
kLinear,
};
InterpolationMode mode;
// Specifies the scaling factor in each spatial dimensions of the input.
// The scale may not be exactly output size / input size. The backend
// needs this information, e.g., for linear interpolation algorithm.
array<float, 2>? scales;
// Specifies the two consecutive dimensions of the input tensor to which
// the interpolation algorithm applies.
array<uint32, 2> axes;
};
// Select the values from the true value or the false value tensor depending
// on the corresponding Boolean values of the condition tensor. Non-zero
// elements of condition tensor select from true value tensor, while
// zero-valued elements select from false value tensor.
struct Where {
// Constraints:
// * The shapes of all input operands must be bidirectionally broadcastable.
// * The data type of condition operand must be uint8.
// * The data types of true value, false value and output operands must be
// the same.
// * The output operand must be a different operand from condition, true
// value and false value operands.
// The id of condition operand.
uint64 condition_operand_id;
// The id of true value operand.
uint64 true_value_operand_id;
// The id of false value operand.
uint64 false_value_operand_id;
// The id of output operand.
uint64 output_operand_id;
};
// Represents the `MLActivation` which describes an activation function type
// to create other operations (Conv2d, BatchNormalization, for example).
union Activation {
Clamp clamp;
Elu elu;
Gelu gelu;
HardSigmoid hard_sigmoid;
LeakyRelu leaky_relu;
Linear linear;
Relu relu;
Sigmoid sigmoid;
Softmax softmax;
Softplus softplus;
Softsign softsign;
Tanh tanh;
};
// Holds one of operator.
union Operation {
// Keep the order as the same as build methods of `MLGraphBuilder`.
ArgMinMax arg_min_max;
BatchNormalization batch_normalization;
Clamp clamp;
Concat concat;
Conv2d conv2d;
ElementWiseBinary element_wise_binary;
Elu elu;
ElementWiseUnary element_wise_unary;
Expand expand;
Gather gather;
Gelu gelu;
Gemm gemm;
Gru gru;
GruCell gru_cell;
HardSigmoid hard_sigmoid;
HardSwish hard_swish;
LayerNormalization layer_normalization;
InstanceNormalization instance_normalization;
LeakyRelu leaky_relu;
Linear linear;
Lstm lstm;
LstmCell lstm_cell;
Matmul matmul;
Pad pad;
Pool2d pool2d;
Prelu prelu;
Reduce reduce;
Relu relu;
Resample2d resample2d;
Reshape reshape;
Sigmoid sigmoid;
Slice slice;
Softmax softmax;
Softplus softplus;
Softsign softsign;
Split split;
Tanh tanh;
Transpose transpose;
Triangular triangular;
Where where;
};
// Describes an entire WebNN graph information.
struct GraphInfo {
// A map of all operands used in this `GraphInfo`, the key is the operand id.
map<uint64, Operand> id_to_operand_map;
// The id array from the `GraphInfo.id_to_operand_map` is used to identify the
// input operands of this graph.
array<uint64> input_operands;
// The id array from the `GraphInfo.id_to_operand_map` is used to identify the
// output operands of this graph.
array<uint64> output_operands;
// The operations are sorted in the topological order.
array<Operation> operations;
// The constant weight data specified through the MLGraphBuilder.constant()
// method defined in the WebIDL, the key is the constant operand id.
map<uint64, mojo_base.mojom.BigBuffer> constant_id_to_buffer_map;
};
// Represents the return value of `WebNNGraph::Compute()`. Let it be
// `named_outputs` if the computation was successful and `error` otherwise.
union ComputeResult {
map<string, mojo_base.mojom.BigBuffer> named_outputs;
Error error;
};
// WebNNGraph runs in the GPU process and is called by the renderer process to
// execute the computational graph. Graph execution is performed by calling
// hardware accelerated OS machine learning APIs.
interface WebNNGraph {
// Called by the renderer process to carry out the computational workload of
// the compiled graph. The key of map is the name of input/output to identify
// the tensor in the graph, the value is the shared memory to reduce memory
// copy for inference.
Compute(map<string, mojo_base.mojom.BigBuffer> named_inputs)
=> (ComputeResult result);
};