blob: 9f4b7ecbcaa6d2ac070f48538656356e4ab91e71 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module actor.mojom;
import "mojo/public/mojom/base/time.mojom";
import "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom";
import "ui/gfx/geometry/mojom/geometry.mojom";
// This interface is meant to largely mirror the
// BrowserAction::ActionInformation proto.
// Tool-specific target.
union ToolTarget {
// DOMNodeId for the node this invocation should be applied to.
int32 dom_node_id;
// An x,y pair representing a location relative to the origin at the top-left
// corner of the local root frame (i.e. the blink::WebWidget/RenderWidget). In
// DIP.
gfx.mojom.Point coordinate_dip;
};
// Attributes of the Node target returned from hit test on last observed APC.
struct ObservedToolTarget {
blink.mojom.AIPageContentAttributes node_attribute;
};
// Information specific to a click action.
struct ClickAction {
// Corresponds to ClickAction.ClickType
enum Type {
kLeft = 1,
kRight = 2,
};
// Corresponds to ClickAction.ClickCount
enum Count {
kSingle = 1,
kDouble = 2,
};
Type type;
Count count;
};
// Information specific to a mouse move action.
// Has no members because the target, included in ToolInvocation, is the only
// needed parameter.
struct MouseMoveAction {
};
// Information specific to a scroll-to action.
// Has no members because the target, included in ToolInvocation, is the only
// needed parameter.
struct ScrollToAction {
};
// Information specific to the type (keyboard input) action. Note: TypeAction
// may be implemented in multiple steps (e.g. individual key events). In some
// situations, part of the action may complete while another part fails.
// TypeAction currently returns failure if any part of the action fails.
// TODO(crbug.com/409333494): Consider how to handle partial success cases like
// this.
struct TypeAction {
// How the text should be inserted into the target.
enum Mode {
kDeleteExisting = 0,
kPrepend = 1,
kAppend = 2,
};
Mode mode;
// text should be ASCII only for now.
string text;
bool follow_by_enter;
};
// Scroll action performs a scroll on the page's viewport or on a specified
// target element.
// It currently returns true even for a partial scroll to the given distance
// and direction.
// It will provide more precise information about how much it scrolls.
struct ScrollAction {
// Corresponds to ScrollAction.ScrollDirection.
// This must be kept in sync with ScrollAction.ScrollDirection enum in
// components/optimization_guide/proto/features/actions_data.proto.
enum ScrollDirection {
// Scroll left.
kLeft = 1,
// Scroll right.
kRight = 2,
// Scroll up.
kUp = 3,
// Scroll down.
kDown = 4,
};
ScrollDirection direction;
// Scroll distance in DIPs, and it should always be positive.
float distance;
};
// Action for selecting an option from a <select> element.
struct SelectAction {
// The value of the option to set as the currently selected option. The action
// fails if the given value doesn't match the value of one of the element's
// <option> children. Case-sensitive.
string value;
};
// Performs a left mouse button down on the from_target, then moves to and
// releases on the to_target.
struct DragAndReleaseAction {
ToolTarget to_target;
};
// Executes a script tool associated with the Document.
struct ScriptToolAction {
string name;
string input_arguments;
};
// Union of tool-specific actions.
union ToolAction {
ClickAction click;
DragAndReleaseAction drag_and_release;
MouseMoveAction mouse_move;
ScrollAction scroll;
SelectAction select;
TypeAction type;
ScriptToolAction script_tool;
ScrollToAction scroll_to;
};
// LINT.IfChange(ActionResultCode)
// Result codes for outcomes of an action.
// kOk - Action was successful.
// 10-99 - An error occurred that is not specific to the tool.
// X00-X99 - A tool-specific error; each tool gets a reserved range of values.
//
// These codes will be used in UMA histograms and in
// optimization_guide.proto.BrowserActionResult. New Codes may be added to
// unused values, but existing codes should not be removed, reordered, or
// renumbered.
// Googlers: When making additions, also add the new value here:
// http://shortn/_gLyPxrRm6p
enum ActionResultCode {
kOk = 0,
// DEPRECATED. Use explicit error codes instead.
// A generic error that does not have a more specific code. Avoid using this.
// kError = 10,
// Actions are not allowed on the current URL.
kUrlBlocked = 11,
// The task requires opening a new tab, and we were unable to do so.
kNewTabCreationFailed = 12,
// The tab we were trying to act on no longer exists.
kTabWentAway = 13,
// The task for the action no longer exists.
kTaskWentAway = 14,
// The tab we were trying to act on performed a cross-origin navigation and is
// no longer able to be acted upon.
kCrossOriginNavigation = 15,
// An unknown tool was requested.
kToolUnknown = 16,
// The target frame in the renderer no longer exists or is shutting down.
kFrameWentAway = 17,
// The target DOMNodeId does not exist in the document.
kInvalidDomNodeId = 18,
// The targeted element cannot be modified because it is disabled.
kElementDisabled = 19,
// The target element is off screen or otherwise clipped by ancestor elements.
kElementOffscreen = 20,
// When using coordinate target, a supplied coordinate is outside the
// viewport.
kCoordinatesOutOfBounds = 21,
// Arguments supplied to the tool are invalid.
kArgumentsInvalid = 22,
// The task for the action was paused.
kTaskPaused = 23,
// The tool executor in the renderer was destroyed before the tool finished
// executing.
kExecutorDestroyed = 24,
// The target window no longer exists.
kWindowWentAway = 25,
// The current frame target under supplied coordinate does not match the
// frame under that coordinate during time of observation.
kFrameLocationChangedSinceObservation = 26,
// A tool caused a navigation, but the navigation was blocked.
kTriggeredNavigationBlocked = 27,
// The requested tool actions array was empty.
kEmptyActionSequence = 28,
// The tool executor was busy with another tool, so could not invoke another.
kExecutorBusy = 29,
// The target element from hit test observed page content cannot be found in
// the live DOM.
kObservedTargetElementDestroyed = 30,
// The target element from hit test observed page content is different from
// hit test on the live DOM.
kObservedTargetElementChanged = 31,
// The target node's interaction point is obscured by other elements in hit
// testing.
kTargetNodeInteractionPointObscured = 32,
// The tool took too long to execute.
kToolTimeout = 33,
// The task triggered a file picker.
kFilePickerTriggered = 34,
// File picker was confirmed.
kFilePickerConfirmed = 35,
// File picker was cancelled.
kFilePickerCancelled = 36,
// The execution engine already had an existing action sequence when a new
// sequence was provided.
kExecutionEngineExistingAction = 37,
// Navigation to external protocol was blocked
kExternalProtocolNavigationBlocked = 38,
// The renderer process crashed.
kRendererCrashed = 39,
// Only used as place holder for planned but unimplemented features. Do not
// use as a generic error code.
kNotImplemented = 40,
// The PageTool invocation was canceled.
kInvokeCanceled = 41,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 100-199: Errors for navigation. (Not part of the ToolAction union
// as it's a browser-side tool.)
// The requested URL was not valid.
kNavigateInvalidUrl = 100,
// The navigation failed before it started.
kNavigateFailedToStart = 101,
// The navigation committed to an error page.
kNavigateCommittedErrorPage = 102,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 200-299: Errors for ClickAction.
// The click event was suppressed.
kClickSuppressed = 200,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 300-399: Errors for DragAndRelease.
// The `from` coordinate is outside of the viewport bounds.
kDragAndReleaseFromOffscreen = 300,
// The `to` coordinate is outside of the viewport bounds.
kDragAndReleaseToOffscreen = 301,
// The initial mouse move event was suppressed.
kDragAndReleaseFromMoveSuppressed = 302,
// The mouse down event was suppressed.
kDragAndReleaseDownSuppressed = 303,
// The mouse move event to the target was suppressed.
kDragAndReleaseToMoveSuppressed = 304,
// The mouse up event was suppressed.
kDragAndReleaseUpSuppressed = 305,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 400-499: Errors for MouseMoveAction.
// The mouse move event was suppressed.
kMouseMoveEventSuppressed = 400,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 500-599: Errors for ScrollAction.
// The tool is targeting the viewport but the document doesn't have a
// scrolling element for the viewport. (Note: this is a rare edge case in
// quirks mode. This does NOT mean that the viewport isn't scrollable.)
kScrollNoScrollingElement = 500,
// The targeted element either has no overflow or the overflow is not user
// scrollable.
kScrollTargetNotUserScrollable = 501,
// The offset of the target element didn't change after scrolling.
kScrollOffsetDidNotChange = 502,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 600-699: Errors for SelectAction.
// Target element was not a <select>.
kSelectInvalidElement = 600,
// Specified value to select does not exist as an <option> in the <select>.
kSelectNoSuchOption = 601,
// Specified value to select does exist but is disabled.
kSelectOptionDisabled = 602,
// The invoking the tool resulted in an unexpected value.
// DEPRECATED: Unused
// kSelectUnexpectedValue = 603,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 700-799: Errors for TypeAction.
// The target provided exists but is not an Element.
kTypeTargetNotElement = 700,
// The target element is not focusable.
kTypeTargetNotFocusable = 701,
// The type tool does not support the requested characters.
kTypeUnsupportedCharacters = 702,
// Failed to map a character in the input string to a key.
kTypeFailedMappingCharToKey = 703,
// The key down event for one of the chars was suppressed by Blink.
kTypeKeyDownSuppressed = 704,
// The text to type is not valid UTF-8 string.
kTypeInvalidTextEncoding = 705,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 800-899: Errors for history tool. (Not part of the ToolAction union
// as it's a browser-side tool.)
// Calling GoBack/GoForward resulted in no navigations being created.
kHistoryNoNavigationsCreated = 800,
// Navigations were created but asynchronously cancelled before being started.
kHistoryCancelledBeforeStart = 801,
// Back traversal was requested when at beginning of session history.
kHistoryNoBackEntries = 802,
// Forward traversal was requested when at end of session history.
kHistoryNoForwardEntries = 803,
// History navigation was aborted before commit.
kHistoryFailedBeforeCommit = 804,
// History navigation committed to an error page.
kHistoryErrorPage = 805,
// The navigation entry at the current offset changed between validation and
// time of use.
kHistoryNavigationEntryChanged = 806,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 900-999: Errors for attempt login.
// The login attempt failed because no credentials were available.
kLoginNoCredentialsAvailable = 900,
// The login attempt failed because the current page is not a login page.
// The caller needs to first navigate to a login page.
kLoginNotLoginPage = 901,
// The login attempt failed because the credentials were entered, but another
// action is needed to complete the login.
kLoginCredentialsEnteredButAnotherActionNeeded = 902,
// Neither the username, nor the password field could be filled.
kLoginNoFillableFields = 903,
// The use of the password manager is not allowed on the page (e.g. blocked
// by enterprise policy).
kLoginFillingNotAllowed = 904,
// The page changed before the credential selected by the user was filled.
kLoginPageChangedDuringSelection = 905,
// The task is in a background tab and filling requires device reauth. The
// user needs to focus that tab first.
kLoginDeviceReauthRequired = 906,
// The screen-lock-based authentication attempt failed (likely the user
// cancelled it).
kLoginDeviceReauthFailed = 907,
// The login attempt failed because the feature is disabled.
kLoginFeatureDisabled = 908,
// Multiple login requests were made at the same time.
kLoginTooManyRequests = 909,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 1000-1099: Errors for media control.
// The media control action failed because there is no media on the page.
kMediaControlNoMedia = 1000,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 1100-1199: Errors for form filling.
// Autofill is not available.
kFormFillingAutofillUnavailable = 1100,
// No autofill suggestions are available for the fields.
kFormFillingNoSuggestionsAvailable = 1101,
// The user could not choose a suggestion because of an error showing the
// suggestion selection dialog.
kFormFillingDialogError = 1102,
// The trigger field for form filling was not found. It could be that the page
// has changed since the action was requested.
kFormFillingFieldNotFound = 1103,
// An error occurred during autofill.
kFormFillingUnknownAutofillError = 1104,
// No tab observation was found when the TOCTOU check was performed.
kFormFillingNoLastTabObservation = 1105,
// The suggestion ID selected by the user was invalid.
kFormFillingInvalidSuggestionId = 1106,
///////////////////////////////////////////////////////////////////////
// Codes 1200-1299: Errors for script tool.
// The document did not return a response for the script tool.
kScriptToolNoResponse = 1200,
// Please see the comment above about adding new values.
///////////////////////////////////////////////////////////////////////
// Codes 1300-1399: Errors for Chrome UI.
// The Chrome Actor UI could not be shown.
kActorUiError = 1300,
// Please see the comment above about adding new values.
};
// LINT.ThenChange(//tools/metrics/histograms/metadata/actor/enums.xml:ActionResultCode)
// The task ID associated with the tool invocation.
struct TaskId {
int32 id;
};
// All information required to invoke a tool in the renderer.
struct ToolInvocation {
// The task ID owning this invocation.
TaskId task_id;
// The action to carry out in this invocation.
ToolAction action;
// Target of this tool invocation.
ToolTarget target;
// Target from hit test on the last observed page content.
// This is optional and is used for checking if the action target has changed
// since last observation.
ObservedToolTarget? observed_target;
};
struct ActionResult {
// A code with the outcome of the tool invocation.
ActionResultCode code;
// Whether any action was taken on the page. This is used to know if we need
// to wait for the page to settle.
bool requires_page_stabilization;
// If the code is not kOk, an English language message describing the error.
string message;
// If the action is a script tool, provides the result of that execution. This
// is only populated if the execution was successful.
string? script_tool_response;
// Time at which execution of the action was complete and page
// stabilization was started. This field may not be available if the action's
// execution did not fully complete.
mojo_base.mojom.TimeTicks? execution_end_time;
};
// The type of the journal entry.
enum JournalEntryType {
kBegin,
kEnd,
kInstant,
};
// Details of a journal entry.
struct JournalDetails {
string key;
string value;
};
// Represents a journal entry. This should only be used for logging and
// debugging. It should not be used to make logic decisions since a
// compromised renderer could lie about events (such as mismatched
// or missing begin and end events).
struct JournalEntry {
// What type of entry this is.
JournalEntryType type;
// The task ID associated with this entry.
TaskId task_id;
// The time the event occurred at.
mojo_base.mojom.Time timestamp;
// The event name.
string event;
// Track UUID.
uint64 track_uuid;
// Specific details of the event logged.
array<JournalDetails> details;
};
// Use to listen for new journal entries. Lives in the browser process and
// is used to receive entries from renderer processes.
interface JournalClient {
// New log messages are sent in batches to limit the frequency of calls.
AddEntriesToJournal(array<JournalEntry> entries);
};
// Used to monitor page stability in the renderer. Implemented by
// `actor::PageStabilityMonitor` and lives in the renderer process. The browser
// queries this interface to learn when the current page associated with
// `actor::PageStabilityMonitor` is stable.
//
// We CHECK that only one `NotifyWhenStable()` call is in flight at any given
// time.
//
// `observation_delay` is the amount of time to wait when observing tool
// execution before starting to wait for page stability.
interface PageStabilityMonitor {
NotifyWhenStable(mojo_base.mojom.TimeDelta observation_delay) => ();
};