| // Copyright 2023 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| module on_device_model.mojom; |
| |
| // Partial response received via StreamingResponder.OnResponse(). |
| struct ResponseChunk { |
| // Text for this chunk of the response. |
| string text; |
| |
| // Optional TS scores computed against the full response so far, up to and |
| // including `text`. These are probabilities in the range [0, 1]. |
| array<float>? ts_scores; |
| }; |
| |
| // Information pertaining to a complete response that was streamed by a |
| // StreamingResponder. |
| struct ResponseSummary { |
| // Optional TS scores computed against the full response. These are |
| // probabilities in the range [0, 1]. |
| array<float>? ts_scores; |
| }; |
| |
| // Streams a response from a call to execute a model. Close this pipe to cancel |
| // the call to |Execute()|. |
| interface StreamingResponder { |
| // This is called each time a new chunk of text is available. |
| OnResponse(ResponseChunk chunk); |
| |
| // This is called once when all text for the query has been returned. No other |
| // methods on this interface will be called after OnComplete(). `summary` |
| // conveys metadata about the response that was streamed. |
| OnComplete(ResponseSummary summary); |
| }; |
| |
| // Notifies the caller when the model is done processing context. Close this |
| // pipe to cancel the call to |AddContext()|. |
| interface ContextClient { |
| // Called when the context has finished processing with the number of tokens |
| // processed. |
| OnComplete(uint32 tokens_processed); |
| }; |
| |
| // Options to control how the model handles input. |
| struct InputOptions { |
| // The text for this input. |
| string text; |
| |
| // The maximum number of tokens that should be processed. If not set, will |
| // process all tokens from this input. |
| uint32? max_tokens; |
| |
| // After text is tokenized, the offset into that vector to start processing. |
| // If not set, will start at the first token. |
| uint32? token_offset; |
| |
| // If this is true, indicates this is a one-off call that wants to ignore the |
| // context for this input. Note that this is less efficient than running on |
| // top of the current context, so only use when necessary. |
| bool ignore_context; |
| |
| // The maximum number of tokens that should be output from a call to |
| // Execute(). If not set, will output tokens until an end token or the maximum |
| // sequence length. |
| uint32? max_output_tokens; |
| |
| // The interval (in number of tokens) between periodic TS scoring updates |
| // provided by StreamingResponder.UpdateTSScores(). If this is null or |
| // negative, no TS scores are evaluated or sent to the client; if 0, they are |
| // only evaluated and sent only during response completion; otherwise for a |
| // value of N, an update will be provided after every Nth token, as well as |
| // during response completion. |
| uint32? ts_interval; |
| }; |
| |
| // A session for a model that allows adding context and then executing an input |
| // with that context. |
| interface Session { |
| // Adds context to this session. Any context added here will build off of |
| // previous calls to |AddContext()|. |
| AddContext(InputOptions input, pending_remote<ContextClient>? client); |
| |
| // Executes model on the given input. The input will be added on top of the |
| // context provided by |AddContext()|. The response will be streamed to |
| // |response|. To cancel the request, close the |response| pipe. |
| Execute(InputOptions input, pending_remote<StreamingResponder> response); |
| }; |
| |
| // A loaded model which can be queried. This interface must be controlled by the |
| // browser and consumers must take care to sanitize inputs. |
| interface OnDeviceModel { |
| // Starts a session with this model. If a session starts before the previous |
| // one has completed, the previous session will be canceled. |
| StartSession(pending_receiver<Session> session); |
| }; |
| |
| // Classifies the device based on how fast it is estimated to be able to run a |
| // model. |
| enum PerformanceClass { |
| // There was an error running the benchmark. The device is likely not able to |
| // run any models. |
| kError, |
| // The GPU was blocked so the benchmark could not run. |
| kGpuBlocked, |
| // The library failed to load so the benchmark could not run. |
| kFailedToLoadLibrary, |
| |
| // The values below classify devices into a range of performance buckets. |
| kVeryLow, |
| kLow, |
| kMedium, |
| kHigh, |
| kVeryHigh, |
| }; |
| |
| enum LoadModelResult { |
| kSuccess, |
| kGpuBlocked, |
| kFailedToLoadLibrary, |
| }; |