| // Copyright 2023 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| module webnn.mojom; |
| |
| import "mojo/public/mojom/base/big_buffer.mojom"; |
| import "services/webnn/public/mojom/webnn_error.mojom"; |
| |
| // Represents the `MLOperand` which describes not only input and constant |
| // operand, but also the output operand of operator. |
| struct Operand { |
| // Represents the `MLOperandType` in the WebIDL definition. |
| enum DataType { |
| kFloat32, |
| kFloat16, |
| kInt32, |
| kUint32, |
| kInt64, |
| kUint64, |
| kInt8, |
| kUint8, |
| }; |
| |
| enum Kind { |
| kInput, |
| kConstant, |
| kOutput, |
| }; |
| |
| Kind kind; |
| // The data type of the operand. |
| DataType data_type; |
| // The dimensions of the operand. |
| array<uint32> dimensions; |
| // The name field is only required for input/output operands of graph. |
| string? name; |
| }; |
| |
| // Computes the indices of the max or min elements of the input tensor's |
| // element along the provided axis. |
| struct ArgMinMax { |
| enum Kind { |
| kMin, |
| kMax, |
| }; |
| |
| // The kind of operation. |
| Kind kind; |
| // The input operand referenced by the input id must be distinct from the |
| // output operand referenced by the output id and the output sizes must be |
| // the same as the input sizes, except for the reduced axis. |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| |
| // Axes indicates the dimensions to reduce. The values in the sequence must |
| // be in the range [0, N-1] where N is the rank of the input tensor. |
| array<uint32> axes; |
| // True is to retain reduced dimensions with size 1. |
| bool keep_dimensions = false; |
| // True is to select the last index rather than the first find along axis. |
| bool select_last_index = false; |
| }; |
| |
| // Represents normalizing the tensor values of input features across |
| // the batch dimension. |
| // |
| // This operator performs the following batch normalization, defined as: |
| // Output = Scale * ((Input - Mean) / sqrt(Variance + Epsilon)) + Bias |
| // |
| // If Activation is provided, the output will be further processed: |
| // Output = Activation(Output) |
| struct BatchNormalization { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of mean operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 mean_operand_id; |
| // The id of variance operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 variance_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // The optional 1-D tensor of the bias values whose size is equal to the |
| // size of the input dimension denoted by axis. |
| uint64? scale_operand_id; |
| // The optional 1-D tensor of the scaling values whose is equal to the size |
| // of the input dimension denoted by axis. |
| uint64? bias_operand_id; |
| // A scalar which specifies the index to the feature count dimension of the |
| // input shape for which the mean and variance values are. |
| uint32 axis = 1; |
| // A float scalar which specifies a small value to prevent computational |
| // error due to divide-by-zero. |
| float epsilon = 1e-5; |
| // The optional activation function that immediately follows the |
| // batchNormalization. |
| Activation? activation; |
| }; |
| |
| // Clamp the input tensor element-wise within a range specified by the minimum |
| // and maximum values. |
| struct Clamp { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // The minimum value of the range. |
| float min_value; |
| // The maximum value of the range. |
| float max_value; |
| }; |
| |
| // Represents the concat operation that concatenates the input tensors along |
| // the given axis. |
| struct Concat { |
| // The ids of input operand are used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| array<uint64> input_operand_ids; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // The axis used to concatenate along. |
| uint32 axis; |
| }; |
| |
| // Represents the `MLInputOperandLayout` that specifies the layout format of |
| // the input tensor. `kChannelsFirst` means `nchw` (batches, channels, height, |
| // width), `kChannelsLast` means `nhwc` (batches, height, width, channels). |
| // The type is used to get the spatial dimension from input tensor, thus safe to |
| // represent as enum. |
| enum InputOperandLayout { |
| kChannelsFirst, |
| kChannelsLast, |
| }; |
| |
| // A size has height and width values. |
| struct Size2d { |
| uint32 height; |
| uint32 width; |
| }; |
| |
| // The additional rows and columns added to the beginning and ending of each |
| // spatial dimension of input. |
| struct Padding2d { |
| // The height and width padding at the beginning of input tensor. |
| Size2d beginning; |
| // The height and width padding at the ending of input tensor. |
| Size2d ending; |
| }; |
| |
| // Represents a 2-D convolution given the input and filter tensors. |
| // |
| // * conv2d (Kind == kDirect): `nchw` input layout only supports `oihw` ( |
| // output_channels, input_channels/groups, height, width) filter layout, |
| // `nhwc` input layout only supports `ohwi` for regular conv2d and `ihwo` (1, |
| // height, width, options.groups) filter layout for depthwise conv2d that is |
| // `options.groups = inputChannels = outputChannels`. |
| // * convTranspose2d (Kind == kTransposed) only supports `iohw` (input_channels, |
| // output_channels/groups, height, width) filter layout |
| // Support for other layouts are being discussed in the working group |
| // https://github.com/webmachinelearning/webnn/issues/324. |
| struct Conv2d { |
| enum Kind { |
| // Represents a 2-D convolution given 4-D input and filter tensors. |
| kDirect, |
| // Represents a 2-D transposed convolution given 4-D input and filter |
| // tensors. |
| kTransposed, |
| }; |
| |
| // The kind of 2-D convolution. |
| Kind kind; |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of filter operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 filter_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // The padding for each spatial dimension of input. |
| Padding2d padding; |
| // The stride of the sliding window for each spatial dimension of input. |
| Size2d strides; |
| // The dilation factor for each spatial dimension of input. |
| Size2d dilations; |
| // The number of groups that input channels and output channels are divided |
| // into. |
| uint32 groups = 1; |
| // The layout format of the input. |
| InputOperandLayout input_layout; |
| // The additional 1-D tensor with the shape of output channels whose values |
| // are added to the convolution result. |
| uint64? bias_operand_id; |
| // The optional activation function that immediately follows the convolution. |
| Activation? activation; |
| }; |
| |
| // Represents an element-wise binary operation, mathematically equivalent to: |
| // <output_operand> = <lhs_operand> <operation_kind> <rhs_operand>; |
| // The shapes of left-hand side (lhs) operand and right-hand side (rhs) operand |
| // must be compatible according to numpy-broadcasting-rule: |
| // https://www.w3.org/TR/webnn/#biblio-numpy-broadcasting-rule |
| struct ElementWiseBinary { |
| enum Kind { |
| kAdd, |
| kSub, |
| kMul, |
| kDiv, |
| kMax, |
| kMin, |
| kPow, |
| kEqual, |
| kGreater, |
| kGreaterOrEqual, |
| kLesser, |
| kLesserOrEqual, |
| }; |
| |
| // The kind of binary operation. |
| Kind kind; |
| // The id of lhs operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 lhs_operand_id; |
| // The id of rhs operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 rhs_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Represents an element-wise unary operation, mathematically equivalent to: |
| // <output_operand> = <operation_kind>(<input_operand>); |
| struct ElementWiseUnary { |
| enum Kind { |
| kAbs, |
| kCeil, |
| kCos, |
| kExp, |
| kFloor, |
| kLog, |
| kNeg, |
| kSin, |
| kTan, |
| kLogicalNot, |
| kIdentity, |
| kSqrt, |
| kErf, |
| kReciprocal, |
| kCast, |
| }; |
| |
| // The kind of unary operation. |
| Kind kind; |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Expands the shape of input operand to the output shape according to |
| // numpy-broadcasting-rule: |
| // https://www.w3.org/TR/webnn/#biblio-numpy-broadcasting-rule |
| struct Expand { |
| // The input operand referenced by the input id must be distinct from the |
| // output operand referenced by the output id, and the input shape must be |
| // broadcastable to the output shape. |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| }; |
| |
| // Specifies the different ways to pad a tensor. The padding value is only |
| // specified when the mode is "constant". |
| struct ConstantPadding { |
| float value = 0; |
| }; |
| |
| struct EdgePadding {}; |
| |
| struct ReflectionPadding {}; |
| |
| struct SymmetricPadding {}; |
| |
| union PaddingMode { |
| ConstantPadding constant; |
| EdgePadding edge; |
| ReflectionPadding reflection; |
| SymmetricPadding symmetric; |
| }; |
| |
| // This operator performs the following normalization, defined as: |
| // Output = scale * (input - mean) / sqrt(variance + epsilon) + bias, where |
| // mean and variance are computed per instance per channel. The specified |
| // layout determines how to choose the channel. |
| struct InstanceNormalization { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| |
| // The optional 1-D tensor of the scale values whose size is equal to the |
| // size of the feature dimension of the input. |
| uint64? scale_operand_id; |
| // The optional 1-D tensor of the bias values whose size is equal to the |
| // size of the feature dimension of the input. |
| uint64? bias_operand_id; |
| // A float scalar which specifies a small value to prevent computational |
| // error due to divide-by-zero. |
| float epsilon = 1e-5; |
| // The layout format of the input. |
| InputOperandLayout layout; |
| }; |
| |
| // Represents matmul operation which compute the matrix product of two input |
| // tensors. |
| struct Matmul { |
| // The id of `a` operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 a_operand_id; |
| // The id of `b` operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 b_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Represents a pad operation which inflates the input tensor with constant or |
| // mirrored values on the edges. |
| struct Pad { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // The number of padding values to add at the beginning of each input |
| // dimension. The array length should be equal to the rank of input tensor. |
| array<uint32> beginning_padding; |
| // The number of padding values to add at the ending of each input |
| // dimension. The array length should be equal to the rank of input tensor. |
| array<uint32> ending_padding; |
| |
| PaddingMode mode; |
| }; |
| |
| struct Reduce { |
| enum Kind { |
| kL1, |
| kL2, |
| kLogSum, |
| kLogSumExp, |
| kMax, |
| kMean, |
| kMin, |
| kProduct, |
| kSum, |
| kSumSquare, |
| }; |
| |
| // The kind of reduce operation. |
| Kind kind; |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // Axes indicates the dimensions to reduce. The values in the sequence |
| // must be in the range [0, N-1] where N is the rank of the input tensor. |
| // The length of the sequence should be less than or equal to N. |
| array<uint32> axes; |
| // If true, retains reduced dimensions with size 1. |
| bool keep_dimensions = false; |
| }; |
| |
| // Represents an average or max pooling operation across all the elements with |
| // moving window over the input tensor. |
| // This struct also contains the attributes of pool2d operator, but the |
| // `roundingType` and `outputSizes` array in MLPool2dOptions are not included |
| // because they are used to calculate the output dimensions of pool2d in blink |
| // side. |
| struct Pool2d { |
| enum Kind { |
| kAveragePool2d, |
| kL2Pool2d, |
| kMaxPool2d, |
| }; |
| |
| Kind kind; |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // The dimensions of the sliding window which is supplied by user or the |
| // element of input operand height and width. |
| Size2d window_dimensions; |
| // The padding for each spatial dimension of input. |
| Padding2d padding; |
| // The element stride of the sliding window for each spatial dimension of |
| // input. |
| Size2d strides; |
| // The dilation factor for each spatial dimension of input. |
| Size2d dilations; |
| // The layout format of the input. |
| InputOperandLayout layout; |
| }; |
| |
| struct StartAndSize { |
| uint32 start; |
| uint32 size; |
| }; |
| |
| struct Slice { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // An array containing the number of elements of the input window in each |
| // dimension. |
| array<StartAndSize> starts_and_sizes; |
| }; |
| |
| // Represents an operation of exponential linear unit function in the |
| // expression max(0, x) + alpha * (exp(min(0, x)) - 1). |
| struct Elu { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // A float scalar multiplier for (exp(min(0, x)) - 1). |
| float alpha = 1.0; |
| }; |
| |
| // Represents an operation of gathering elements from the axis dimension of |
| // the input tensor indexed by the indices tensor. |
| // |
| // Specifically, Gather operation returns a new `MLOperand` by replacing the |
| // axis dimension of input with the lookup result of `input[...indices...]` as |
| // the equation below: |
| // |
| // output[..., dIndex0, ..., dIndexN, ...] = input[..., indices[], ...] |
| // ^ ^ Look up the value |
| // | of `indices` in input |
| // This is dAxis, indicated by `axis` parameter. |
| // This dimension expands to `indices.dimensions` |
| // |
| // Example 1, given an input = [[ 0, 1, 2], |
| // [10, 11, 12]] with shape (2, 3), |
| // an indices = [1, 0, 1] with shape (3), |
| // and axis = 0, |
| // the output should be [[10, 11, 12], |
| // [ 0, 1, 2], |
| // [10, 11, 12]] with shape (3, 3). |
| // |
| // Example 2, given an input = [[ 0, 1, 2], |
| // [10, 11, 12]] with shape (2, 3), |
| // an indices = [[0, 1], |
| // [2, 1]] with shape (2, 2), |
| // and axis = 1, |
| // the output should be [[[ 0, 1], |
| // [ 2, 1]], |
| // [[10, 11], |
| // [12, 11]]] with shape (2, 2, 2). |
| // |
| // Note the values in `indices` are computed at runtime, so they can exceed |
| // the boundary of the `axis` dimension of input. If unchecked, such indices |
| // will cause out-of-bounds access. |
| // |
| // Therefore, the implementation must "sanitize" `indices` before using it to |
| // perform memory addressing operations on `input`. |
| // |
| // For some backends like DirectML, the native ML APIs have already done the |
| // "sanitization" so `indices` can be passed directly to the API. For other |
| // backends, the specific "sanitization" relies on the browser implementation. |
| // |
| // Typically, the implementation clamps the values in `indices` to be in range |
| // of `-N` (inclusive) to `N` (exclusive), where `N = input.dimensions[axis]`, |
| // and negative index means indexing from the end of the `axis` dimension. |
| // |
| // Need to finalize out-of-bounds indices sanitization behavior, tracked by spec |
| // issue: https://github.com/webmachinelearning/webnn/issues/486 |
| // |
| // TODO(crbug.com/329134204): Add conformance test cases for gather's |
| // out-of-bounds access. |
| struct Gather { |
| // The input operand (referenced by input_operand_id) and the indices operand |
| // (referenced by indices_operand_id) must be distinct from the output |
| // operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 indices_operand_id; |
| uint64 output_operand_id; |
| |
| // The axis dimension of the input tensor to gather on. |
| uint32 axis = 0; |
| }; |
| |
| // Corresponds to `MLOperand gelu(MLOperand input)` that computes the gelu |
| // function of the input tensor following the expression |
| // 0.5 * x * (1 + erf(x / sqrt(2))). |
| struct Gelu { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| }; |
| |
| // Represents the processing direction of the input sequence for the recurrent |
| // network. |
| enum RecurrentNetworkDirection { |
| kForward, |
| kBackward, |
| kBoth, |
| }; |
| |
| // The ordering of the weight for the update (z), reset (r), new (n) gates. |
| enum GruWeightLayout { |
| kZrn, |
| kRzn, |
| }; |
| |
| // Represents the Gated Recurrent Unit [GRU] recurrent network. |
| struct Gru { |
| // The id of the input operand. |
| uint64 input_operand_id; |
| // The id of the weight operand. |
| uint64 weight_operand_id; |
| // The id of the recurrent weight operand. |
| uint64 recurrent_weight_operand_id; |
| // The number of steps in the recurrent network. |
| uint32 steps; |
| // The number of features in the hidden state. |
| uint32 hidden_size; |
| // The ids of the output operands must be distinct from other operand ids. |
| array<uint64> output_operand_ids; |
| |
| // The id of the bias operand. |
| uint64? bias_operand_id; |
| // The id of the recurrent bias operand. |
| uint64? recurrent_bias_operand_id; |
| // The id of the initial hidden state operand. |
| uint64? initial_hidden_state_operand_id; |
| // Indicates whether to apply the reset gate after or before matrix |
| // multiplication. |
| bool reset_after; |
| // Indicates whether to also return the entire sequence with every output from |
| // each time step. |
| bool return_sequence; |
| // The processing direction of the input sequence. |
| RecurrentNetworkDirection direction; |
| // The ordering of the weight and bias vectors for the internal gates of GRU. |
| GruWeightLayout layout; |
| // Specifies a pair of activation functions for the update, reset and new |
| // gates. |
| array<Activation, 2> activations; |
| }; |
| |
| // Represents a single step of Gated Recurrent Unit [GRU] recurrent network. |
| struct GruCell { |
| // The id of the input operand. |
| uint64 input_operand_id; |
| // The id of the weight operand. |
| uint64 weight_operand_id; |
| // The id of the recurrent weight operand. |
| uint64 recurrent_weight_operand_id; |
| // The id of the hidden state operand. |
| uint64 hidden_state_operand_id; |
| // The number of features in the hidden state. |
| uint32 hidden_size; |
| // The id of the output operand must be distinct from other operand ids. |
| uint64 output_operand_id; |
| |
| // The id of the bias operand. |
| uint64? bias_operand_id; |
| // The id of the recurrent bias operand. |
| uint64? recurrent_bias_operand_id; |
| // Indicates whether to apply the reset gate after or before matrix |
| // multiplication. |
| bool reset_after; |
| // The ordering of the weight and bias vectors for the internal gates of GRU. |
| GruWeightLayout layout; |
| // Specifies a pair of activation functions for the update, reset and new |
| // gates. |
| array<Activation, 2> activations; |
| }; |
| |
| // Represents general matrix multiplication (gemm) operation in the expression |
| // `alpha * A * B + beta * C`. |
| struct Gemm { |
| // The id of `A` operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 a_operand_id; |
| // The id of `B` operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 b_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| |
| // The id of optional `C` operand is used to get the `Operand` description |
| // from `GraphInfo.id_to_operand_map`. |
| uint64? c_operand_id; |
| // A float scalar multiplier for the `A * B`. |
| float alpha = 1.0; |
| // A float scalar multiplier for the third tensor. |
| float beta = 1.0; |
| // True is to transpose the first tensor before matrix multiplication. |
| bool a_transpose = false; |
| // True is to transpose the second tensor before matrix multiplication. |
| bool b_transpose = false; |
| }; |
| |
| // Represents an operation which performs a hard sigmoid function on every |
| // element in InputTensor, placing the result into the corresponding |
| // element of OutputTensor: |
| // Output = max(0, min(alpha * input + beta, 1)) |
| struct HardSigmoid { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| |
| // A float scalar multiplier. The default value is 0.2. |
| float alpha = 0.2; |
| // A float scalar addition. The default value is 0.5. |
| float beta = 0.5; |
| }; |
| |
| // Represents an operation which computes the nonlinear function |
| // output = input * max(0, min(6, (input + 3))) / 6 |
| struct HardSwish { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| }; |
| |
| // Represents an operation of normalizing the input across the feature |
| // dimensions for each individual sample in a batch, where the feature |
| // dimensions are the input dimensions indexed by `axes`. |
| // |
| // The normalization follows the expression below: |
| // Scale * ((Input - Mean) / sqrt(Variance + Epsilon)) + Bias |
| // where the mean and variance values are computed on the fly across the |
| // feature dimensions. |
| struct LayerNormalization { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| |
| // The scale operand (referenced by scale_operand_id) includes the scaling |
| // values. |
| uint64? scale_operand_id; |
| // The bias operand (referenced by bias_operand_id) includes the bias values. |
| uint64? bias_operand_id; |
| // The indices to the input dimensions to reduce along. |
| array<uint32> axes; |
| // A float scalar which specifies a small value to prevent computational |
| // error due to divide-by-zero. |
| float epsilon = 1e-5; |
| }; |
| |
| // Represents a leaky version of relu operation whose calculation follows the |
| // expression max(0, x) + alpha ∗ min(0, x). |
| struct LeakyRelu { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // A float scalar multiplier for min(0, x). |
| float alpha = 0.01; |
| }; |
| |
| // Represents a linear operation whose calculation follows the |
| // expression alpha * x + beta. |
| struct Linear { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| // A float scalar multiplier. |
| float alpha = 1.0; |
| // A float scalar addition. |
| float beta = 0; |
| }; |
| |
| // The ordering of the weight for the input (i), output (o), forget (f) and |
| // cell (g) gates. |
| enum LstmWeightLayout { |
| kIofg, |
| kIfgo, |
| }; |
| |
| // Represents an operation of Long Short-Term Memory (LSTM) recurrent network. |
| struct Lstm { |
| // The id of the input operand. |
| uint64 input_operand_id; |
| // The id of the weight operand. |
| uint64 weight_operand_id; |
| // The id of the recurrent weight operand. |
| uint64 recurrent_weight_operand_id; |
| // The ids of the output operands must be distinct from other operand ids. |
| array<uint64> output_operand_ids; |
| |
| // The number of steps in the recurrent network. |
| uint32 steps; |
| // The number of features in the hidden state. |
| uint32 hidden_size; |
| |
| // The id of the bias operand. |
| uint64? bias_operand_id; |
| // The id of the recurrent bias operand. |
| uint64? recurrent_bias_operand_id; |
| // The id of the peephole weight operand. |
| uint64? peephole_weight_operand_id; |
| // The id of the initial hidden state operand. |
| uint64? initial_hidden_state_operand_id; |
| // The id of the initial cell state operand. |
| uint64? initial_cell_state_operand_id; |
| |
| // Indicates whether to return the entire sequence of outputs from each time |
| // step. |
| bool return_sequence; |
| // The processing direction of the input sequence. |
| RecurrentNetworkDirection direction; |
| // The ordering of both weight and bias vectors. |
| LstmWeightLayout layout; |
| // The activation functions for the input, output and forget gates. |
| array<Activation, 3> activations; |
| }; |
| |
| // Represents a single step of Long Short-Term Memory (LSTM) recurrent network. |
| struct LstmCell { |
| // The id of the input operand. |
| uint64 input_operand_id; |
| // The id of the weight operand. |
| uint64 weight_operand_id; |
| // The id of the recurrent weight operand. |
| uint64 recurrent_weight_operand_id; |
| // The id of the hidden state operand. |
| uint64 hidden_state_operand_id; |
| // The id of the cell state operand. |
| uint64 cell_state_operand_id; |
| // The ids of the output operands must be distinct from other operand ids. |
| array<uint64, 2> output_operand_ids; |
| |
| // The number of features in the hidden state. |
| uint32 hidden_size; |
| |
| // The id of the bias operand. |
| uint64? bias_operand_id; |
| // The id of the recurrent bias operand. |
| uint64? recurrent_bias_operand_id; |
| // The id of the peephole weight operand. |
| uint64? peephole_weight_operand_id; |
| |
| // The ordering of both weight and bias vectors. |
| LstmWeightLayout layout; |
| // The activation functions for the input, output and forget gates. |
| array<Activation, 3> activations; |
| }; |
| |
| // Represents a parametric relu operation whose calculation follows the |
| // expression max(0, x) + slope ∗ min(0, x). |
| struct Prelu { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of slope operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 slope_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Corresponds to `MLOperand relu(MLOperand x)` that compute the rectified |
| // linear function of the input tensor. |
| struct Relu { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Alters the shape of input operand to the output operand. This operation does |
| // not copy or change the content of the input, it just changes the tensor’s |
| // logical dimensions. |
| struct Reshape { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Corresponds to `MLOperand sigmoid(MLOperand x)` that compute the sigmoid |
| // function of the input tensor following the expression 1 / (exp(-x) + 1). |
| struct Sigmoid { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Corresponds to `MLOperand softmax(MLOperand x)` that compute the softmax |
| // values of the 2-D input tensor along axis 1. |
| struct Softmax { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Corresponds to `MLOperand softplus(MLOperand x)` that computes the softplus |
| // function of the input tensor following the expression ln(1 + exp(x)). |
| struct Softplus { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| }; |
| |
| // Corresponds to `MLOperand softsign(MLOperand x)` that computes the softsign |
| // function of the input tensor following the expression x / (1 + |x|). |
| struct Softsign { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| }; |
| |
| // Represents a split operation that splits an input tensor into multiple |
| // output tensors. The shape of the outputs and the specified axis determine |
| // how the split will be performed. Since axis specifies which input dimension |
| // will be split, the sum of all output dimension sizes along the axis |
| // dimension must be equal to the input tensor’s axis dimension. |
| // Example: |
| // input = [1, 2, 3, (4)] |
| // axis = 3 |
| // output[0] = [1, 2, 3, (1)] |
| // output[1] = [1, 2, 3, (2)] |
| // output[2] = [1, 2, 3, (1)] |
| struct Split { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The ids of output operands used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| array<uint64> output_operand_ids; |
| |
| // Axis specifies which input tensor dimension will be split. |
| uint32 axis = 0; |
| }; |
| |
| // Corresponds to `MLOperand tanh(MLOperand x)` that compute the hyperbolic |
| //tangent function of the input tensor following the expression |
| // (exp(2 * x) - 1) / (exp(2 * x) + 1). |
| struct Tanh { |
| // The id of input operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 input_operand_id; |
| // The id of output operand is used to get the `Operand` description from |
| // `GraphInfo.id_to_operand_map`. |
| uint64 output_operand_id; |
| }; |
| |
| // Represents the transpose operation that permutes the dimensions of the |
| // input tensor following the given permutation. |
| struct Transpose { |
| // The id of input operand. |
| uint64 input_operand_id; |
| // The id of output operand. |
| uint64 output_operand_id; |
| // The values used to permute the dimensions of the input tensor. |
| array<uint32> permutation; |
| }; |
| |
| // Given a 2-D tensor (matrix), triangular will return a 2-D tensor |
| // containing either the upper or lower triangular part of the input tensor. |
| // If the input tensor has greater than 2 dimensions it is treated as a |
| // batch of matrices which represented by last two dimensions and the result |
| // has the same shape. |
| struct Triangular { |
| // The input operand (referenced by input_operand_id) must be distinct from |
| // the output operand (referenced by output_operand_id). |
| uint64 input_operand_id; |
| uint64 output_operand_id; |
| |
| // The upper indicates whether the output the upper or the lower part of |
| // the input matrix is retained. True indicates that the upper part is |
| // retained. |
| bool upper; |
| // The diagonal specifies how many diagonals above or below the main |
| // diagonals of the input matrix are retained or excluded. A value of 0 |
| // means no diagonals other than the main diagonals are affected. |
| int32 diagonal; |
| }; |
| |
| // Resample the tensor values from the source to the destination spatial |
| // dimensions. |
| struct Resample2d { |
| // The id of input operand. |
| uint64 input_operand_id; |
| // The id of output operand. |
| uint64 output_operand_id; |
| |
| enum InterpolationMode { |
| kNearestNeighbor, |
| kLinear, |
| }; |
| |
| InterpolationMode mode; |
| // Specifies the scaling factor in each spatial dimensions of the input. |
| // The scale may not be exactly output size / input size. The backend |
| // needs this information, e.g., for linear interpolation algorithm. |
| array<float, 2>? scales; |
| // Specifies the two consecutive dimensions of the input tensor to which |
| // the interpolation algorithm applies. |
| array<uint32, 2> axes; |
| }; |
| |
| // Select the values from the true value or the false value tensor depending |
| // on the corresponding Boolean values of the condition tensor. Non-zero |
| // elements of condition tensor select from true value tensor, while |
| // zero-valued elements select from false value tensor. |
| struct Where { |
| // Constraints: |
| // * The shapes of all input operands must be bidirectionally broadcastable. |
| // * The data type of condition operand must be uint8. |
| // * The data types of true value, false value and output operands must be |
| // the same. |
| // * The output operand must be a different operand from condition, true |
| // value and false value operands. |
| |
| // The id of condition operand. |
| uint64 condition_operand_id; |
| // The id of true value operand. |
| uint64 true_value_operand_id; |
| // The id of false value operand. |
| uint64 false_value_operand_id; |
| // The id of output operand. |
| uint64 output_operand_id; |
| }; |
| |
| // Represents the `MLActivation` which describes an activation function type |
| // to create other operations (Conv2d, BatchNormalization, for example). |
| union Activation { |
| Clamp clamp; |
| Elu elu; |
| Gelu gelu; |
| HardSigmoid hard_sigmoid; |
| LeakyRelu leaky_relu; |
| Linear linear; |
| Relu relu; |
| Sigmoid sigmoid; |
| Softmax softmax; |
| Softplus softplus; |
| Softsign softsign; |
| Tanh tanh; |
| }; |
| |
| // Holds one of operator. |
| union Operation { |
| // Keep the order as the same as build methods of `MLGraphBuilder`. |
| ArgMinMax arg_min_max; |
| BatchNormalization batch_normalization; |
| Clamp clamp; |
| Concat concat; |
| Conv2d conv2d; |
| ElementWiseBinary element_wise_binary; |
| Elu elu; |
| ElementWiseUnary element_wise_unary; |
| Expand expand; |
| Gather gather; |
| Gelu gelu; |
| Gemm gemm; |
| Gru gru; |
| GruCell gru_cell; |
| HardSigmoid hard_sigmoid; |
| HardSwish hard_swish; |
| LayerNormalization layer_normalization; |
| InstanceNormalization instance_normalization; |
| LeakyRelu leaky_relu; |
| Linear linear; |
| Lstm lstm; |
| LstmCell lstm_cell; |
| Matmul matmul; |
| Pad pad; |
| Pool2d pool2d; |
| Prelu prelu; |
| Reduce reduce; |
| Relu relu; |
| Resample2d resample2d; |
| Reshape reshape; |
| Sigmoid sigmoid; |
| Slice slice; |
| Softmax softmax; |
| Softplus softplus; |
| Softsign softsign; |
| Split split; |
| Tanh tanh; |
| Transpose transpose; |
| Triangular triangular; |
| Where where; |
| }; |
| |
| // Describes an entire WebNN graph information. |
| struct GraphInfo { |
| // A map of all operands used in this `GraphInfo`, the key is the operand id. |
| map<uint64, Operand> id_to_operand_map; |
| // The id array from the `GraphInfo.id_to_operand_map` is used to identify the |
| // input operands of this graph. |
| array<uint64> input_operands; |
| // The id array from the `GraphInfo.id_to_operand_map` is used to identify the |
| // output operands of this graph. |
| array<uint64> output_operands; |
| // The operations are sorted in the topological order. |
| array<Operation> operations; |
| // The constant weight data specified through the MLGraphBuilder.constant() |
| // method defined in the WebIDL, the key is the constant operand id. |
| map<uint64, mojo_base.mojom.BigBuffer> constant_id_to_buffer_map; |
| }; |
| |
| // Represents the return value of `WebNNGraph::Compute()`. Let it be |
| // `named_outputs` if the computation was successful and `error` otherwise. |
| union ComputeResult { |
| map<string, mojo_base.mojom.BigBuffer> named_outputs; |
| Error error; |
| }; |
| |
| // WebNNGraph runs in the GPU process and is called by the renderer process to |
| // execute the computational graph. Graph execution is performed by calling |
| // hardware accelerated OS machine learning APIs. |
| interface WebNNGraph { |
| // Called by the renderer process to carry out the computational workload of |
| // the compiled graph. The key of map is the name of input/output to identify |
| // the tensor in the graph, the value is the shared memory to reduce memory |
| // copy for inference. |
| Compute(map<string, mojo_base.mojom.BigBuffer> named_inputs) |
| => (ComputeResult result); |
| }; |