blob: abeef0439ff3bc2f513c2bc81a1dd8c1dff2bc85 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
edition = "2023";
package optimization_guide.proto;
import "components/optimization_guide/proto/features/common_quality_data.proto";
option java_outer_classname = "ActionsProto";
option java_package = "org.chromium.components.optimization_guide.features.proto";
option optimize_for = LITE_RUNTIME;
// DO NOT EDIT THIS FILE DIRECTLY!
//
// This file is generated in g3 and then synced to Chrome. Instead, please refer to
// http://go/chrome-mqls-onboarding (Google-internal link), and then changes will
// be synced with Chrome automatically.
// This API should eventually converge with go/unified-browser-control-api.
// Please check with erikchen@ before making modifications.
// The target of an action.
// Next ID: 4
message ActionTarget {
// Id of the content node within the frame. Same as
// ContentNode.common_ancestor_dom_node_id.
int32 content_node_id = 1 [features = { field_presence: EXPLICIT }];
Coordinate coordinate = 2 [features = { field_presence: EXPLICIT }];
// Which document in a frame the action should be taken in.
DocumentIdentifier document_identifier = 3 [features = { field_presence: EXPLICIT }];
}
// Mouse click action.
// Next ID: 5
message ClickAction {
ActionTarget target = 1 [features = { field_presence: EXPLICIT }];
ClickType click_type = 2 [features = { field_presence: EXPLICIT }];
ClickCount click_count = 3 [features = { field_presence: EXPLICIT }];
int32 tab_id = 4 [features = { field_presence: EXPLICIT }];
enum ClickType {
// Unknown click type. This should not be used.
UNKNOWN_CLICK_TYPE = 0;
// Left mouse click.
LEFT = 1;
// Right mouse click.
RIGHT = 2;
}
// Represents the number of times the target was clicked, for now only
// supports single and double clicks.
enum ClickCount {
// Unknown click count. This should not be used.
UNKNOWN_CLICK_COUNT = 0;
// Single click.
SINGLE = 1;
// Double click.
DOUBLE = 2;
}
}
// Type text action.
// Next ID: 6
message TypeAction {
ActionTarget target = 1 [features = { field_presence: EXPLICIT }];
// The text to type.
string text = 2 [features = { field_presence: EXPLICIT }];
// After Typing the text, press enter.
bool follow_by_enter = 3 [features = { field_presence: EXPLICIT }];
TypeMode mode = 4 [features = { field_presence: EXPLICIT }];
int32 tab_id = 5 [features = { field_presence: EXPLICIT }];
// How the text should be inserted into the target.
enum TypeMode {
UNKNOWN_TYPE_MODE = 0;
DELETE_EXISTING = 1;
PREPEND = 2;
APPEND = 3;
}
}
// Scroll action.
// Next ID: 5
message ScrollAction {
ActionTarget target = 1 [features = { field_presence: EXPLICIT }];
ScrollDirection direction = 2 [features = { field_presence: EXPLICIT }];
// The distance to scroll in pixels.
float distance = 3 [features = { field_presence: EXPLICIT }];
int32 tab_id = 4 [features = { field_presence: EXPLICIT }];
enum ScrollDirection {
// Unknown scroll direction. This should not be used.
UNKNOWN_SCROLL_DIRECTION = 0;
// Scroll left.
LEFT = 1;
// Scroll right.
RIGHT = 2;
// Scroll up.
UP = 3;
// Scroll down.
DOWN = 4;
}
}
// Move mouse action.
// This is considered a low-level building block for more complex actions, e.g.
// scroll_at can be mouse_move + scroll action.
// This action also covers hovering, equivalent to "hovering at" when used in
// isolation.
// Next ID: 3
message MoveMouseAction {
ActionTarget target = 1 [features = { field_presence: EXPLICIT }];
int32 tab_id = 2 [features = { field_presence: EXPLICIT }];
}
// Drag and release mouse action.
// Next ID: 4
message DragAndReleaseAction {
ActionTarget from_target = 1 [features = { field_presence: EXPLICIT }];
ActionTarget to_target = 2 [features = { field_presence: EXPLICIT }];
int32 tab_id = 3 [features = { field_presence: EXPLICIT }];
}
// Select dropdown action.
// Next ID: 4
message SelectAction {
ActionTarget target = 1 [features = { field_presence: EXPLICIT }];
// The value of the option to select.
string value = 2 [features = { field_presence: EXPLICIT }];
int32 tab_id = 3 [features = { field_presence: EXPLICIT }];
}
// Navigate the page to a URL action.
// Next ID: 3
message NavigateAction {
string url = 1 [features = { field_presence: EXPLICIT }];
int32 tab_id = 2 [features = { field_presence: EXPLICIT }];
}
// Wait action waits the specified milliseconds.
// Next ID: 3
message WaitAction {
int32 wait_time_ms = 1 [features = { field_presence: EXPLICIT }];
// If provided, the given tab will be included in the returned observations
// (along with tabs in the actuating set), regardless of whether the tab is in
// the actuating set. This will not cause the tab to be added to the actuating
// set.
int32 observe_tab_id = 2 [features = { field_presence: EXPLICIT }];
}
// Navigate the page Back action.
// Next ID: 2
message HistoryBackAction {
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// Navigate the page Forward action.
// Next ID: 2
message HistoryForwardAction {
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// Creates a new tab
// Next ID: 3
message CreateTabAction {
// The id of the window to create the tab in. Passing in invalid window id
// will cause the action to fail.
int32 window_id = 1 [features = { field_presence: EXPLICIT }];
// When set to true, the new tab will be visible within the window, although
// the window itself may not be visible.
bool foreground = 2 [features = { field_presence: EXPLICIT }];
}
// Closes an existing tab.
// Next ID: 2
message CloseTabAction {
// The id of the tab to close. Passing in invalid tab id will cause the action
// to fail.
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// Activates an existing tab. This does not affect window visibility or
// activation.
// Next ID: 2
message ActivateTabAction {
// The id of the tab to activate. Passing in invalid tab id will cause the
// action to fail.
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// Creates a new window.
// Next ID: 1
message CreateWindowAction {
}
// Closes an existing window.
// Next ID: 2
message CloseWindowAction {
// The id of the window to close. Passing in invalid window id will cause the
// action to fail.
int32 window_id = 1 [features = { field_presence: EXPLICIT }];
}
// Activates an existing window. This does not move the window, but it brings it
// to the front of the z-order, and gives it OS-activation, which means the
// window will receive keyboard events.
// Next ID: 2
message ActivateWindowAction {
// The id of the window to activate. Passing in invalid window id will cause
// the action to fail.
int32 window_id = 1 [features = { field_presence: EXPLICIT }];
}
// Yields control to the user.
// Next ID: 1
message YieldToUserAction {
}
// Attempts to log in to the website.
// Next ID: 2
message AttemptLoginAction {
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// Specifies a form by a set of fields (from different autofill sections) and
// the kind of data to fill in these sections. See AttemptFormFillingAction.
//
// If a form consists of multiple autofill sections, only fields from the
// section corresponding to the trigger field will be filled.
//
// This FormFillingRequest in development and future changes are possible once
// the model constraings/performance are understood.
//
// Next ID: 3
message FormFillingRequest {
// Specifies a field that is part of the form. Autofill will be triggered from
// this field, but will fill other fields of the same autofill section with
// the available data.
//
// TODO(crbug.com/452946211): This field is repeated to be able to support a form that
// is spread over multiple autofill sections.
repeated ActionTarget trigger_fields = 1;
// Specifies which data is requested. The requested data type will be
// presented in the UI to the user (e.g. a user chooses an address for the
// "shipping address").
RequestedData requested_data = 2 [features = { field_presence: EXPLICIT }];
// Enumerates the purposes for forms. Note that although the type of data
// (address or credit card) is encoded that autofill already detects the type,
// but does not detect the purpose (e.g. shipping v.s. billing address)
// provide here.
enum RequestedData {
// The requested data is not specified.
REQUESTED_DATA_UNKNOWN = 0;
// An address should be filled. This value can be used as a catch-all when
// the more specific address options below do not fit.
ADDRESS = 1;
// A shipping address should be filled.
SHIPPING_ADDRESS = 2;
// A billing address should be filled.
BILLING_ADDRESS = 3;
// A home address should be filled.
HOME_ADDRESS = 4;
// A work address should be filled.
WORK_ADDRESS = 5;
// A credit card should be filled.
CREDIT_CARD = 6;
}
}
// Attempts to fill forms on the website.
//
// This AttemptFormFillingAction in development and future changes are possible
// once the model constraings/performance are understood.
//
// Next ID: 3
message AttemptFormFillingAction {
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
repeated FormFillingRequest form_filling_requests = 2;
}
// Scrolls the DOM element into viewport.
// Next ID: 5
message ScrollToAction {
ActionTarget target = 3 [features = { field_presence: EXPLICIT }];
int32 tab_id = 4 [features = { field_presence: EXPLICIT }];
reserved 1, 2;
}
// Executes a script tool.
// Next ID: 5
message ScriptToolAction {
// The Document which provides the associated Tool.
DocumentIdentifier document_identifier = 1 [features = { field_presence: EXPLICIT }];
int32 tab_id = 2 [features = { field_presence: EXPLICIT }];
// The name of the tool to execute.
string tool_name = 3 [features = { field_presence: EXPLICIT }];
// A serialized JSON string containing the arguments to pass to the tool.
string input_arguments = 4 [features = { field_presence: EXPLICIT }];
}
// Play media action plays the media session.
// Next ID: 1
message PlayMediaAction {
}
// Pause media action pauses the media session.
// Next ID: 1
message PauseMediaAction {
}
// Seek media action seeks the media session to the given seek time in
// microseconds from the beginning of the media. The seek time should be
// non-negative.
// Next ID: 2
message SeekMediaAction {
int64 seek_time_microseconds = 1 [features = { field_presence: EXPLICIT }];
}
// Controls media session in a tab.
// Next ID: 6
message MediaControlAction {
oneof media_control_action {
PlayMediaAction play = 3;
PauseMediaAction pause = 4;
SeekMediaAction seek = 5;
}
// The id of the tab which contains the media session.
int32 tab_id = 2 [features = { field_presence: EXPLICIT }];
reserved 1;
}
// All of the actions that can be taken.
// Only one of these is allowed per action.
// Next ID: 23
message Action {
oneof action {
ClickAction click = 1;
TypeAction type = 2;
ScrollAction scroll = 3;
MoveMouseAction move_mouse = 4;
DragAndReleaseAction drag_and_release = 5;
SelectAction select = 6;
NavigateAction navigate = 7;
HistoryBackAction back = 8;
HistoryForwardAction forward = 9;
WaitAction wait = 10;
CreateTabAction create_tab = 11;
CloseTabAction close_tab = 12;
ActivateTabAction activate_tab = 13;
CreateWindowAction create_window = 14;
CloseWindowAction close_window = 15;
ActivateWindowAction activate_window = 16;
YieldToUserAction yield_to_user = 17;
AttemptLoginAction attempt_login = 18;
ScrollToAction scroll_to = 19;
ScriptToolAction script_tool = 20;
MediaControlAction media_control = 21;
AttemptFormFillingAction attempt_form_filling = 22;
}
}
// Container for any type of metadata that is associated with a task.
// Next ID: 2
message TaskMetadata {
SecurityMetadata security = 1 [features = { field_presence: EXPLICIT }];
}
// Metadata related to action security.
// Next ID: 2
message SecurityMetadata {
// If the server predicted that new mainframe origins may be navigated to as a
// result of the current task turn, those are given here.
//
// These origins, if any, are added to the set of all writeable origins kept
// by the Chrome browser process.
repeated string added_writable_mainframe_origins = 1;
}
// A single set of actions to be taken by Chrome. The response is ActionsResult.
// Next ID: 5
message Actions {
// The actions to be taken by Chrome.
repeated Action actions = 1;
// The task id of the action.
int32 task_id = 2 [features = { field_presence: EXPLICIT }];
// Whether to skip APC and screenshot (i.e., async) observation collections
// when building the ActionsResult. Tab Observation will still be collected,
// but will be missing APC and screenshot. Tab ID will be populated in the
// TabObservation.
// We may want more sophisticated control over which observations are
// collected in the future.
bool skip_async_observation_collection = 3 [features = { field_presence: EXPLICIT }];
// Metadata that is associated with a task.
TaskMetadata task_metadata = 4 [features = { field_presence: EXPLICIT }];
}
// Information about a chrome window.
// Next ID: 5
message WindowObservation {
// Unique id
int32 id = 1 [features = { field_presence: EXPLICIT }];
// id of the activated tab. There may be multiple tabs visible at the same
// time.
int32 activated_tab_id = 2 [features = { field_presence: EXPLICIT }];
// The active window is defined as the most recently used chrome browser
// window.
bool active = 3 [features = { field_presence: EXPLICIT }];
// All tabs. The order is not meaningful.
repeated int32 tab_ids = 4;
}
// Information about a chrome tab.
// Next ID: 7
message TabObservation {
// Unique id
int32 id = 1 [features = { field_presence: EXPLICIT }];
// The page content at the observation time.
// This field is only populated if skip_async_observation_collection is false
// in the Actions message.
AnnotatedPageContent annotated_page_content = 2 [features = { field_presence: EXPLICIT }];
// The screenshot of the page at the time after the action.
// This field is only populated if skip_async_observation_collection is false
// in the Actions message.
bytes screenshot = 3 [features = { field_presence: EXPLICIT }];
// The mime type of the |screenshot|.
// This field is only populated if skip_async_observation_collection is false
// in the Actions message.
string screenshot_mime_type = 4 [features = { field_presence: EXPLICIT }];
// The result of the tab observation.
TabObservationResult result = 5 [features = { field_presence: EXPLICIT }];
// Metadata about the page content.
// The number of meta tags returned is limited to 32.
// This field is only populated if skip_async_observation_collection is false
// in the Actions message.
PageMetadata metadata = 6 [features = { field_presence: EXPLICIT }];
// Result codes of a tab observation. It may be possible
// to retry an observation given specific error codes.
enum TabObservationResult {
// The tab was observed successfully.
TAB_OBSERVATION_OK = 0;
// The tab is not available because it went away.
TAB_OBSERVATION_TAB_WENT_AWAY = 1;
// The screenshot of the tab timed out.
TAB_OBSERVATION_SCREENSHOT_TIMEOUT = 2;
// The page crashed.
TAB_OBSERVATION_PAGE_CRASHED = 3;
}
}
// All relevant state in chrome after the actions are taken or an error has
// occurred.
// Next ID: 7
message ActionsResult {
// The result of the actions. See
// https://source.chromium.org/chromium/chromium/src/+/main:chrome/common/actor.mojom
// for definition.
int32 action_result = 1 [features = { field_presence: EXPLICIT }];
// If a specific action failed, the index of the failed action. -1 for no
// failure.
int32 index_of_failed_action = 2 [features = { field_presence: EXPLICIT }];
// Observations after all actions are finished or an error has occurred. The
// client will determine the relevant windows and tabs.
repeated WindowObservation windows = 3;
// Observations after all actions are finished or an error has occurred. The
// client will determine the relevant windows and tabs.
repeated TabObservation tabs = 4;
// The results of executing script tools.
repeated ScriptToolResult script_tool_results = 5;
LatencyInformation latency_information = 6 [features = { field_presence: EXPLICIT }];
// Chrome reported information about the latency of steps while performing the
// actions.
// Next ID: 4
message LatencyInformation {
// All steps that Chrome reported latency for. These steps can overlap due
// to parallel execution.
repeated LatencyStep latency_steps = 1;
// Latency of a single step.
// Next ID: 7
message LatencyStep {
oneof step {
ActionLatency action = 3;
PageStabilizationLatency page_stabilization = 4;
AnnotatedPageContentLatency annotated_page_content = 5;
ScreenshotLatency screenshot = 6;
}
// The latency since the time that Chrome received the Actions request.
// Time from request to step start.
int32 latency_start_ms = 1 [features = { field_presence: EXPLICIT }];
// Time from request to step stop.
int32 latency_stop_ms = 2 [features = { field_presence: EXPLICIT }];
message ActionLatency {
// The index of the action that triggered the latency step.
int32 action_index = 1 [features = { field_presence: EXPLICIT }];
}
message PageStabilizationLatency {
// The index of the action that triggered the page stabilization.
int32 action_index = 1 [features = { field_presence: EXPLICIT }];
}
// The latency of the annotated page content step.
message AnnotatedPageContentLatency {
// The id of the tab that was observed.
int32 id = 1 [features = { field_presence: EXPLICIT }];
}
// The latency of the screenshot step.
message ScreenshotLatency {
// The id of the tab that was observed.
int32 id = 1 [features = { field_presence: EXPLICIT }];
}
}
}
}
// A request to start a task.
// Next ID: 1
message BrowserStartTask {
// The id of the tab the task should be started in. If not set, the task
// will be started in a new tab.
// TODO(crbug.com/421441072): Remove this field. This is only needed in the short term
// since we need a mechanism to start tasks in a new tab, and the model has
// not yet been trained to use CreateTabAction.
int32 tab_id = 1 [features = { field_presence: EXPLICIT }];
}
// The response to a request to start a task.
// Next ID: 4
message BrowserStartTaskResult {
// The id of the task that was created.
int32 task_id = 1 [features = { field_presence: EXPLICIT }];
// The id of the tab the action will be taken in.
int32 tab_id = 2 [features = { field_presence: EXPLICIT }];
// The status of the start task request. If not SUCCESS, the task_id and
// tab_id may not be set.
StartTaskStatus status = 3 [features = { field_presence: EXPLICIT }];
enum StartTaskStatus {
// Unknown start task status. This should not be used.
START_TASK_STATUS_UNKNOWN = 0;
// The task was started successfully.
SUCCESS = 1;
// The task was not started because there were too many tasks already
// running.
OVER_TASK_LIMIT = 2;
}
}