blob: 09013540f5f361131659e7f7f1b6a764550a3b9a [file] [log] [blame]
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/content/renderer/form_autofill_util.h"
#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <utility>
#include <variant>
#include <vector>
#include "base/check_deref.h"
#include "base/check_op.h"
#include "base/command_line.h"
#include "base/containers/contains.h"
#include "base/containers/flat_map.h"
#include "base/containers/flat_set.h"
#include "base/containers/span.h"
#include "base/debug/crash_logging.h"
#include "base/feature_list.h"
#include "base/i18n/case_conversion.h"
#include "base/metrics/field_trial.h"
#include "base/metrics/histogram_functions.h"
#include "base/metrics/histogram_macros.h"
#include "base/no_destructor.h"
#include "base/not_fatal_until.h"
#include "base/notreached.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/autofill/content/renderer/synchronous_form_cache.h"
#include "components/autofill/content/renderer/timing.h"
#include "components/autofill/core/common/autocomplete_parsing_util.h"
#include "components/autofill/core/common/autofill_constants.h"
#include "components/autofill/core/common/autofill_features.h"
#include "components/autofill/core/common/autofill_util.h"
#include "components/autofill/core/common/field_data_manager.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/form_field_data.h"
#include "components/autofill/core/common/metrics_enums.h"
#include "components/autofill/core/common/mojom/autofill_types.mojom-shared.h"
#include "components/autofill/core/common/unique_ids.h"
#include "components/password_manager/core/common/password_manager_features.h"
#include "content/public/renderer/render_frame.h"
#include "third_party/blink/public/platform/url_conversion.h"
#include "third_party/blink/public/platform/web_string.h"
#include "third_party/blink/public/web/web_autofill_state.h"
#include "third_party/blink/public/web/web_document.h"
#include "third_party/blink/public/web/web_element.h"
#include "third_party/blink/public/web/web_element_collection.h"
#include "third_party/blink/public/web/web_form_control_element.h"
#include "third_party/blink/public/web/web_form_element.h"
#include "third_party/blink/public/web/web_frame.h"
#include "third_party/blink/public/web/web_input_element.h"
#include "third_party/blink/public/web/web_label_element.h"
#include "third_party/blink/public/web/web_local_frame.h"
#include "third_party/blink/public/web/web_node.h"
#include "third_party/blink/public/web/web_option_element.h"
#include "third_party/blink/public/web/web_remote_frame.h"
#include "third_party/blink/public/web/web_select_element.h"
#include "third_party/re2/src/re2/re2.h"
using blink::WebAutofillState;
using blink::WebDocument;
using blink::WebElement;
using blink::WebElementCollection;
using blink::WebFormControlElement;
using blink::WebFormElement;
using blink::WebFrame;
using blink::WebInputElement;
using blink::WebLabelElement;
using blink::WebLocalFrame;
using blink::WebNode;
using blink::WebOptionElement;
using blink::WebSelectElement;
using blink::WebString;
namespace autofill::form_util {
struct ShadowFieldData {
ShadowFieldData() = default;
ShadowFieldData(ShadowFieldData&& other) = default;
ShadowFieldData& operator=(ShadowFieldData&& other) = default;
ShadowFieldData(const ShadowFieldData& other) = delete;
ShadowFieldData& operator=(const ShadowFieldData& other) = delete;
~ShadowFieldData() = default;
// If the form control is inside shadow DOM, then these lists will contain
// id and name attributes of the parent shadow host elements. There may be
// more than one if the form control is in nested shadow DOM.
std::vector<std::u16string> shadow_host_id_attributes;
std::vector<std::u16string> shadow_host_name_attributes;
};
namespace {
using LabelSource = FormFieldData::LabelSource;
// Maximal length of a button's title.
constexpr int kMaxLengthForSingleButtonTitle = 30;
// Maximal length of all button titles.
constexpr int kMaxLengthForAllButtonTitles = 200;
// Number of shadow roots to traverse upwards when looking for relevant forms
// and labels of an input element inside a shadow root.
constexpr size_t kMaxShadowLevelsUp = 2;
// Text features to detect form submission buttons. Features are selected based
// on analysis of real forms and their buttons.
// TODO(crbug.com/41429204): Consider to add more features (e.g. non-English
// features).
const char* const kButtonFeatures[] = {"button", "btn", "submit",
"boton" /* "button" in Spanish */};
// Number of form neighbor nodes to traverse in search of four digit
// combinations on the webpage.
constexpr int kFormNeighborNodesToTraverse = 50;
// Maximum number of consecutive numbers to allow in the four digit combination
// matches.
constexpr int kMaxConsecutiveInFourDigitCombinationMatches = 2;
// Maximum number of four digit combination matches to find in the DOM.
constexpr size_t kMaxFourDigitCombinationMatches = 5;
// Constants to be passed to GetWebString<kConstant>().
constexpr std::string_view kAnchor = "a";
constexpr std::string_view kAutocomplete = "autocomplete";
constexpr std::string_view kAriaDescribedBy = "aria-describedby";
constexpr std::string_view kAriaLabel = "aria-label";
constexpr std::string_view kAriaLabelledBy = "aria-labelledby";
constexpr std::string_view kBold = "b";
constexpr std::string_view kBreak = "br";
constexpr std::string_view kButton = "button";
constexpr std::string_view kClass = "class";
constexpr std::string_view kColspan = "colspan";
constexpr std::string_view kDefinitionDescriptionTag = "dd";
constexpr std::string_view kDefinitionTermTag = "dt";
constexpr std::string_view kDiv = "div";
constexpr std::string_view kFieldset = "fieldset";
constexpr std::string_view kFont = "font";
constexpr std::string_view kFor = "for";
constexpr std::string_view kForm = "form";
constexpr std::string_view kFormControlSelector = "input, select, textarea";
constexpr std::string_view kId = "id";
constexpr std::string_view kIframe = "iframe";
constexpr std::string_view kImage = "img";
constexpr std::string_view kInput = "input";
constexpr std::string_view kLabel = "label";
constexpr std::string_view kListItem = "li";
constexpr std::string_view kMeta = "meta";
constexpr std::string_view kName = "name";
constexpr std::string_view kNoScript = "noscript";
constexpr std::string_view kOption = "option";
constexpr std::string_view kParagraph = "p";
constexpr std::string_view kPattern = "pattern";
constexpr std::string_view kPlaceholder = "placeholder";
constexpr std::string_view kRole = "role";
constexpr std::string_view kScript = "script";
constexpr std::string_view kSpan = "span";
#if BUILDFLAG(IS_ANDROID)
constexpr std::string_view kSrc = "src";
#endif
constexpr std::string_view kStrong = "strong";
constexpr std::string_view kStyle = "style";
constexpr std::string_view kSubmit = "submit";
constexpr std::string_view kTable = "table";
constexpr std::string_view kTableCell = "td";
constexpr std::string_view kTableHeader = "th";
constexpr std::string_view kTableRow = "tr";
constexpr std::string_view kTitle = "title";
constexpr std::string_view kType = "type";
constexpr std::string_view kValue = "value";
// Wrapper for frequently used WebString constants.
template <const std::string_view& string>
const WebString& GetWebString() {
static const base::NoDestructor<WebString> web_string(
WebString::FromUTF8(string));
return *web_string;
}
template <const std::string_view& tag_name>
bool HasTagName(const WebElement& element) {
return element.HasHTMLTagName(GetWebString<tag_name>());
}
template <const std::string_view& tag_name>
bool HasTagName(const WebNode& node) {
return node.IsElementNode() && HasTagName<tag_name>(node.To<WebElement>());
}
template <const std::string_view& attribute>
bool HasAttribute(const WebElement& element) {
return element.HasAttribute(GetWebString<attribute>());
}
template <const std::string_view& attribute>
WebString GetAttribute(const WebElement& element) {
return element.GetAttribute(GetWebString<attribute>());
}
// Returns the form's |name| attribute if non-empty; otherwise the form's |id|
// attribute.
std::u16string GetFormIdentifier(const WebFormElement& form) {
std::u16string identifier = form.GetName().Utf16();
if (identifier.empty()) {
identifier = form.GetIdAttribute().Utf16();
}
return identifier;
}
// Helper function to return the next web node of `current_node` in the DOM.
// `forward` determines the direction to traverse in.
WebNode NextWebNode(const WebNode& current_node, bool forward) {
if (forward) {
if (current_node.FirstChild()) {
return current_node.FirstChild();
}
if (current_node.NextSibling()) {
return current_node.NextSibling();
}
WebNode parent = current_node.ParentNode();
while (parent) {
if (parent.NextSibling()) {
return parent.NextSibling();
}
parent = parent.ParentNode();
}
return parent;
} else {
if (current_node.PreviousSibling()) {
WebNode previous = current_node.PreviousSibling();
while (previous.LastChild()) {
previous = previous.LastChild();
}
return previous;
}
return current_node.ParentNode();
}
}
// True for
// - all text-type <input> elements (which does not include <textarea>) and
// - elements that have ever been <input type=password>.
bool IsTextInput(const WebFormControlElement& element) {
std::optional<FormControlType> type = GetAutofillFormControlType(element);
if (!type) {
return false;
}
switch (*type) {
case FormControlType::kContentEditable:
case FormControlType::kInputCheckbox:
case FormControlType::kInputMonth:
case FormControlType::kInputDate:
case FormControlType::kInputRadio:
case FormControlType::kSelectOne:
case FormControlType::kTextArea:
return false;
case FormControlType::kInputEmail:
case FormControlType::kInputNumber:
case FormControlType::kInputPassword:
case FormControlType::kInputSearch:
case FormControlType::kInputTelephone:
case FormControlType::kInputText:
case FormControlType::kInputUrl:
return true;
}
NOTREACHED();
}
bool IsSelectElement(const WebFormControlElement& element) {
return GetAutofillFormControlType(element) == FormControlType::kSelectOne;
}
bool IsCheckableElement(const WebFormControlElement& element) {
using enum blink::mojom::FormControlType;
// We intentionally use `FormControlType()` instead of
// `FormControlTypeForAutofill()` because the existing callers do not care if
// the field has ever been a password field before.
return element && (element.FormControlType() == kInputCheckbox || // nocheck
element.FormControlType() == kInputRadio); // nocheck
}
bool IsCheckableElement(const WebElement& element) {
return IsCheckableElement(element.DynamicTo<WebInputElement>());
}
// Returns true if |node| is an element and it is a container type that
// InferLabelForElement() can traverse.
bool IsTraversableContainerElement(const WebNode& node) {
if (!node.IsElementNode()) {
return false;
}
const WebElement element = node.To<WebElement>();
return HasTagName<kDefinitionDescriptionTag>(element) ||
HasTagName<kDiv>(element) || HasTagName<kFieldset>(element) ||
HasTagName<kListItem>(element) || HasTagName<kTableCell>(element) ||
HasTagName<kTable>(element);
}
// This function checks whether the children of `element` are of the type
// <script>, <meta>, <title> or <style>.
bool IsWebElementEmpty(const WebElement& root) {
if (!root) {
return true;
}
for (WebNode child = root.FirstChild(); child; child = child.NextSibling()) {
if (child.IsTextNode() &&
!base::ContainsOnlyChars(child.NodeValue().Utf8(),
base::kWhitespaceASCII)) {
return false;
}
if (!child.IsElementNode()) {
continue;
}
WebElement element = child.To<WebElement>();
if (!element.HasHTMLTagName(GetWebString<kScript>()) &&
!element.HasHTMLTagName(GetWebString<kMeta>()) &&
!element.HasHTMLTagName(GetWebString<kTitle>()) &&
!element.HasHTMLTagName(GetWebString<kStyle>())) {
return false;
}
}
return true;
}
// Returns the colspan for a <td> / <th>. Defaults to 1.
size_t CalculateTableCellColumnSpan(const WebElement& element) {
DCHECK(HasTagName<kTableCell>(element) || HasTagName<kTableHeader>(element));
size_t span = 1;
if (HasAttribute<kColspan>(element)) {
std::u16string colspan = GetAttribute<kColspan>(element).Utf16();
// Do not check return value to accept imperfect conversions.
base::StringToSizeT(colspan, &span);
// Handle overflow.
if (span == std::numeric_limits<size_t>::max())
span = 1;
span = std::max(span, static_cast<size_t>(1));
}
return span;
}
// Appends |suffix| to |prefix| so that any intermediary whitespace is collapsed
// to a single space. If |force_whitespace| is true, then the resulting string
// is guaranteed to have a space between |prefix| and |suffix|. Otherwise, the
// result includes a space only if |prefix| has trailing whitespace or |suffix|
// has leading whitespace.
// A few examples:
// * CombineAndCollapseWhitespace("foo", "bar", false) -> "foobar"
// * CombineAndCollapseWhitespace("foo", "bar", true) -> "foo bar"
// * CombineAndCollapseWhitespace("foo ", "bar", false) -> "foo bar"
// * CombineAndCollapseWhitespace("foo", " bar", false) -> "foo bar"
// * CombineAndCollapseWhitespace("foo", " bar", true) -> "foo bar"
// * CombineAndCollapseWhitespace("foo ", " bar", false) -> "foo bar"
// * CombineAndCollapseWhitespace(" foo", "bar ", false) -> " foobar "
// * CombineAndCollapseWhitespace(" foo", "bar ", true) -> " foo bar "
const std::u16string CombineAndCollapseWhitespace(const std::u16string& prefix,
const std::u16string& suffix,
bool force_whitespace) {
std::u16string prefix_trimmed;
base::TrimPositions prefix_trailing_whitespace =
base::TrimWhitespace(prefix, base::TRIM_TRAILING, &prefix_trimmed);
// Recursively compute the children's text.
std::u16string suffix_trimmed;
base::TrimPositions suffix_leading_whitespace =
base::TrimWhitespace(suffix, base::TRIM_LEADING, &suffix_trimmed);
if (prefix_trailing_whitespace || suffix_leading_whitespace ||
force_whitespace) {
return prefix_trimmed + u" " + suffix_trimmed;
}
return prefix_trimmed + suffix_trimmed;
}
// This is a helper function for the FindChildText() function (see below).
// Search depth is limited with the |depth| parameter.
// |divs_to_skip| is a list of <div> tags to ignore if encountered.
std::u16string FindChildTextInner(const WebNode& node,
int depth,
const std::set<WebNode>& divs_to_skip) {
if (depth <= 0 || !node) {
return std::u16string();
}
// Skip over comments.
if (node.IsCommentNode())
return FindChildTextInner(node.NextSibling(), depth - 1, divs_to_skip);
if (!node.IsElementNode() && !node.IsTextNode())
return std::u16string();
// Ignore elements known not to contain inferable labels.
bool skip_node = false;
if (node.IsElementNode()) {
const WebElement element = node.To<WebElement>();
if (HasTagName<kOption>(element) ||
(HasTagName<kDiv>(element) && base::Contains(divs_to_skip, node)) ||
IsAutofillableElement(element.DynamicTo<WebFormControlElement>())) {
return std::u16string();
}
skip_node = HasTagName<kScript>(element) ||
HasTagName<kNoScript>(element) || HasTagName<kStyle>(element);
}
std::u16string node_text;
if (!skip_node) {
// Extract the text exactly at this node.
node_text = node.NodeValue().Utf16();
// Recursively compute the children's text.
// Preserve inter-element whitespace separation.
std::u16string child_text =
FindChildTextInner(node.FirstChild(), depth - 1, divs_to_skip);
bool add_space = node.IsTextNode() && node_text.empty();
node_text = CombineAndCollapseWhitespace(node_text, child_text, add_space);
}
// Recursively compute the siblings' text.
// Again, preserve inter-element whitespace separation.
std::u16string sibling_text =
FindChildTextInner(node.NextSibling(), depth - 1, divs_to_skip);
bool add_space = node.IsTextNode() && node_text.empty();
node_text = CombineAndCollapseWhitespace(node_text, sibling_text, add_space);
return node_text;
}
// Same as FindChildText() below, but with a list of div nodes to skip.
std::u16string FindChildTextWithIgnoreList(
const WebNode& node,
const std::set<WebNode>& divs_to_skip) {
if (node.IsTextNode()) {
return node.NodeValue().Utf16();
}
WebNode child = node.FirstChild();
const int kChildSearchDepth = 10;
std::u16string node_text =
FindChildTextInner(child, kChildSearchDepth, divs_to_skip);
base::TrimWhitespace(node_text, base::TRIM_ALL, &node_text);
return node_text;
}
struct InferredLabel {
// Returns an `InferredLabel` if `label` contains at least one character that
// is neither whitespace nor "+*:-–()".
static std::optional<InferredLabel> BuildIfValid(
std::u16string label,
FormFieldData::LabelSource source);
std::u16string label;
FormFieldData::LabelSource source = FormFieldData::LabelSource::kUnknown;
private:
InferredLabel(std::u16string label, FormFieldData::LabelSource source);
};
// Shared function for InferLabelFromPrevious() and InferLabelFromNext().
std::optional<InferredLabel> InferLabelFromSibling(
const WebFormControlElement& element,
bool forward) {
std::u16string inferred_label;
WebNode sibling = element;
while (true) {
sibling = forward ? sibling.NextSibling() : sibling.PreviousSibling();
if (!sibling) {
break;
}
// Skip over comments.
if (sibling.IsCommentNode())
continue;
// Otherwise, only consider normal HTML elements and their contents.
if (!sibling.IsElementNode() && !sibling.IsTextNode())
break;
// A label might be split across multiple "lightweight" nodes.
// Coalesce any text contained in multiple consecutive
// (a) plain text nodes or
// (b) inline HTML elements that are essentially equivalent to text nodes.
if (sibling.IsTextNode() || HasTagName<kBold>(sibling) ||
HasTagName<kStrong>(sibling) || HasTagName<kSpan>(sibling) ||
HasTagName<kFont>(sibling)) {
std::u16string value = FindChildText(sibling);
// A text node's value will be empty if it is for a line break.
bool add_space = sibling.IsTextNode() && value.empty();
if (forward) {
inferred_label =
CombineAndCollapseWhitespace(inferred_label, value, add_space);
} else {
inferred_label =
CombineAndCollapseWhitespace(value, inferred_label, add_space);
}
continue;
}
// If we have identified a partial label and have reached a non-lightweight
// element, consider the label to be complete.
if (auto r = InferredLabel::BuildIfValid(inferred_label,
LabelSource::kCombined)) {
return r;
}
// <img> and <br> tags often appear between the input element and its
// label text, so skip over them.
if (HasTagName<kImage>(sibling) || HasTagName<kBreak>(sibling)) {
continue;
}
// We only expect <p> and <label> tags to contain the full label text.
bool has_label_tag = HasTagName<kLabel>(sibling);
if (HasTagName<kParagraph>(sibling) || has_label_tag) {
return InferredLabel::BuildIfValid(
FindChildText(sibling),
has_label_tag ? LabelSource::kLabelTag : LabelSource::kPTag);
}
break;
}
return InferredLabel::BuildIfValid(inferred_label, LabelSource::kCombined);
}
// Helper function to add a button's |title| to the |list|.
void AddButtonTitleToList(std::u16string title,
mojom::ButtonTitleType button_type,
ButtonTitleList* list) {
title = base::CollapseWhitespace(std::move(title), false);
if (title.empty()) {
return;
}
list->emplace_back(std::move(title).substr(0, kMaxLengthForSingleButtonTitle),
button_type);
}
// Returns true iff |attribute| contains one of |kButtonFeatures|.
bool AttributeHasButtonFeature(const WebString& attribute) {
if (attribute.IsNull())
return false;
std::string value = attribute.Utf8();
std::ranges::transform(value, value.begin(), ::tolower);
for (const char* const button_feature : kButtonFeatures) {
if (value.find(button_feature, 0) != std::string::npos)
return true;
}
return false;
}
// Returns true if |element|'s id, name or css class contain |kButtonFeatures|.
bool ElementAttributesHasButtonFeature(const WebElement& element) {
return AttributeHasButtonFeature(GetAttribute<kId>(element)) ||
AttributeHasButtonFeature(GetAttribute<kName>(element)) ||
AttributeHasButtonFeature(GetAttribute<kClass>(element));
}
// Finds elements from |elements| that contains |kButtonFeatures| and appends it
// to the |list|. If |extract_value_attribute|, the "value" attribute is
// extracted as a button title. Otherwise, |WebElement::TextContent| (aka
// innerText in Javascript) is extracted as a title.
void FindElementsWithButtonFeatures(const WebElementCollection& elements,
mojom::ButtonTitleType button_type,
bool extract_value_attribute,
ButtonTitleList* list) {
for (WebElement item = elements.FirstItem(); item;
item = elements.NextItem()) {
if (!ElementAttributesHasButtonFeature(item))
continue;
std::u16string title =
extract_value_attribute
? (HasAttribute<kValue>(item) ? GetAttribute<kValue>(item).Utf16()
: std::u16string())
: item.TextContent().Utf16();
if (extract_value_attribute && title.empty())
title = item.TextContent().Utf16();
AddButtonTitleToList(std::move(title), button_type, list);
}
}
// Returns a list of elements whose id matches one of the ids found in
// `id_list`.
std::vector<WebElement> GetWebElementsFromIdList(const WebDocument& document,
const WebString& id_list) {
std::vector<WebElement> web_elements;
std::u16string id_list_utf16 = id_list.Utf16();
for (std::u16string_view id : base::SplitStringPiece(
id_list_utf16, base::kWhitespaceUTF16, base::KEEP_WHITESPACE,
base::SPLIT_WANT_NONEMPTY)) {
web_elements.push_back(document.GetElementById(WebString(id)));
}
return web_elements;
}
// Returns the coalesced child of the elements who's ids are found in
// |id_list|.
//
// For example, given this document...
//
// <div id="billing">Billing</div>
// <div>
// <div id="name">Name</div>
// <input id="field1" type="text" aria-labelledby="billing name"/>
// </div>
// <div>
// <div id="address">Address</div>
// <input id="field2" type="text" aria-labelledby="billing address"/>
// </div>
//
// The coalesced text by the id_list found in the aria-labelledby attribute
// of the field1 input element would be "Billing Name" and for field2 it would
// be "Billing Address".
std::u16string CoalesceTextByIdList(const WebDocument& document,
const WebString& id_list) {
const std::u16string kSpace = u" ";
std::u16string text;
for (const auto& node : GetWebElementsFromIdList(document, id_list)) {
if (node) {
std::u16string child_text = FindChildText(node);
if (!child_text.empty()) {
if (!text.empty()) {
text.append(kSpace);
}
text.append(child_text);
}
}
}
base::TrimWhitespace(text, base::TRIM_ALL, &text);
return text;
}
// Returns the ARIA label text of the elements denoted by the aria-labelledby
// attribute of |element| or the value of the aria-label attribute of
// |element|, with priority given to the aria-labelledby attribute.
std::u16string GetAriaLabel(const WebDocument& document,
const WebElement& element) {
if (HasAttribute<kAriaLabelledBy>(element)) {
WebString aria_label_attribute = GetAttribute<kAriaLabelledBy>(element);
std::u16string text = CoalesceTextByIdList(document, aria_label_attribute);
if (!text.empty()) {
return text;
}
}
if (HasAttribute<kAriaLabel>(element)) {
return GetAttribute<kAriaLabel>(element).Utf16();
}
return std::u16string();
}
// Returns the ARIA label text of the elements denoted by the aria-describedby
// attribute of |element|.
std::u16string GetAriaDescription(const WebDocument& document,
const WebElement& element) {
return CoalesceTextByIdList(document,
GetAttribute<kAriaDescribedBy>(element));
}
// Helper for |InferLabelForElement()| that infers a label, if possible, from
// a previous sibling of |element|,
// e.g. Some Text <input ...>
// or Some <span>Text</span> <input ...>
// or <p>Some Text</p><input ...>
// or <label>Some Text</label> <input ...>
// or Some Text <img><input ...>
// or <b>Some Text</b><br/> <input ...>.
std::optional<InferredLabel> InferLabelFromPrevious(
const WebFormControlElement& element) {
return InferLabelFromSibling(element, /*forward=*/false);
}
// Same as InferLabelFromPrevious(), but in the other direction.
// Useful for cases like: <span><input type="checkbox">Label For Checkbox</span>
std::optional<InferredLabel> InferLabelFromNext(
const WebFormControlElement& element) {
return InferLabelFromSibling(element, /*forward=*/true);
}
// Helper for |InferLabelForElement()| that infers a label, if possible, from
// the placeholder text. e.g. <input placeholder="foo">
std::optional<InferredLabel> InferLabelFromPlaceholder(
const WebFormControlElement& element) {
if (HasAttribute<kPlaceholder>(element)) {
return InferredLabel::BuildIfValid(
GetAttribute<kPlaceholder>(element).Utf16(), LabelSource::kPlaceHolder);
}
return std::nullopt;
}
std::optional<InferredLabel> InferLabelFromAriaLabel(
const WebFormControlElement& element) {
return InferredLabel::BuildIfValid(
GetAriaLabel(element.GetDocument(), element), LabelSource::kAriaLabel);
}
// Detects a label declared after the `element`, which is visually positioned
// above the element (usually using CSS). Such labels often act as
// placeholders. E.g.
// <div>
// <input>
// <span>Placeholder</span>
// </div>
// We want to consider placeholders which are either positioned over the input
// element or placed on the top left (or top right in RTL languages) of the
// input element (they need to overlap a bit). We want to disregard elements
// that are primarily below the input element (even if they overlap) because
// that place is often used to indicate incorrect inputs.
std::optional<InferredLabel> InferLabelFromOverlayingSuccessor(
const WebFormControlElement& element) {
WebNode next = element.NextSibling();
while (next && !next.IsElementNode()) {
next = next.NextSibling();
}
if (next) {
gfx::Rect element_bounds = element.BoundsInWidget();
gfx::Rect next_bounds = next.To<WebElement>().BoundsInWidget();
// Reduce size by 1 pixel in all dimensions to resolve intersection due to
// rounding errors.
next_bounds.Inset(1);
// We don't rely on element_bounds.Contains(next_bounds) because some
// websites render the label partially above the input element.
// We check the following conditions: 1) horizontally we want the `next`
// element to be contained by `element`
// to consider `next` a label:
// |<----- element ----->|
// |<----- next ------>|
// 2) vertically we often see three cases:
// (a)
// -----
// ^ (b)
// -------- | -----
// ^ next ^
// | | |
// | v | (c) (not a placeholder)
// element ----- next -----
// | | ^
// | | |
// v v next
// -------- ----- |
// v
// -----
// a) a label is presented on the top left corner of an input element,
// possibly even exceeding it a bit.
// b) a label is presented inside the input element.
// c) an error message is presented at the bottom of an input element.
if (!next_bounds.IsEmpty() &&
// `next` needs to overlap `element` to be even considered.
element_bounds.Intersects(next_bounds) &&
// `next` must be horizontally contained.
next_bounds.x() >= element_bounds.x() &&
next_bounds.right() <= element_bounds.right() &&
// bottom of `next` does not exceed the bounds of `element` because that
// may represent an error label (case c above). The top of `next` may,
// however exceed the `element` (case a above), so that condition is not
// tested.
!(next_bounds.bottom() > element_bounds.bottom())) {
return InferredLabel::BuildIfValid(FindChildText(next),
LabelSource::kOverlayingLabel);
}
}
return std::nullopt;
}
// Helper for |InferLabelForElement()| that infers a label, from
// the value attribute when it is present and user has not typed in (if
// element's value attribute is same as the element's value).
std::optional<InferredLabel> InferLabelFromValueAttribute(
const WebFormControlElement& element) {
if (HasAttribute<kValue>(element) &&
GetAttribute<kValue>(element) == element.Value()) {
return InferredLabel::BuildIfValid(GetAttribute<kValue>(element).Utf16(),
LabelSource::kValue);
}
return std::nullopt;
}
// Helper for `InferLabelForElement()` that infers a label, if possible, from
// surrounding table structure,
// e.g. <tr><td>Some Text</td><td><input ...></td></tr>
// or <tr><th>Some Text</th><td><input ...></td></tr>
// or <tr><td><b>Some Text</b></td><td><b><input ...></b></td></tr>
// or <tr><th><b>Some Text</b></th><td><b><input ...></b></td></tr>
// `cell` represents the <td> tag containing the input element.
std::optional<InferredLabel> InferLabelFromTableColumn(const WebNode& cell) {
DCHECK(HasTagName<kTableCell>(cell));
// Check all previous siblings, skipping non-element nodes, until we find a
// non-empty text block.
std::optional<InferredLabel> r;
WebNode previous = cell.PreviousSibling();
while (!r && previous) {
if (HasTagName<kTableCell>(previous) ||
HasTagName<kTableHeader>(previous)) {
r = InferredLabel::BuildIfValid(FindChildText(previous),
LabelSource::kTdTag);
}
previous = previous.PreviousSibling();
}
return r;
}
// Helper for `InferLabelForElement()` that infers a label, if possible, from
// surrounding table structure.
//
// If there are multiple cells and the row with the input matches up with the
// previous row, then look for a specific cell within the previous row.
// e.g. <tr><td>Input 1 label</td><td>Input 2 label</td></tr>
// <tr><td><input name="input 1"></td><td><input name="input2"></td></tr>
//
// Otherwise, just look in the entire previous row.
// e.g. <tr><td>Some Text</td></tr><tr><td><input ...></td></tr>
// `cell` represents the <td> tag containing the input element.
std::optional<InferredLabel> InferLabelFromTableRow(const WebNode& cell) {
DCHECK(HasTagName<kTableCell>(cell));
// Count the cell holding the input element.
size_t cell_count = CalculateTableCellColumnSpan(cell.To<WebElement>());
size_t cell_position = 0;
size_t cell_position_end = cell_count - 1;
// Count cells to the left to figure out |element|'s cell's position.
for (WebNode cell_it = cell.PreviousSibling(); cell_it;
cell_it = cell_it.PreviousSibling()) {
if (HasTagName<kTableCell>(cell_it)) {
cell_position += CalculateTableCellColumnSpan(cell_it.To<WebElement>());
}
}
// Count cells to the right.
for (WebNode cell_it = cell.NextSibling(); cell_it;
cell_it = cell_it.NextSibling()) {
if (HasTagName<kTableCell>(cell_it)) {
cell_count += CalculateTableCellColumnSpan(cell_it.To<WebElement>());
}
}
// Combine left + right.
cell_count += cell_position;
cell_position_end += cell_position;
// Find the current row.
WebNode parent = cell.ParentNode();
while (parent && !HasTagName<kTableRow>(parent)) {
parent = parent.ParentNode();
}
if (!parent) {
return std::nullopt;
}
// Now find the previous row.
WebNode row_it = parent.PreviousSibling();
while (row_it && !HasTagName<kTableRow>(row_it)) {
row_it = row_it.PreviousSibling();
}
// If there exists a previous row, check its cells and size. If they align
// with the current row, infer the label from the cell above.
if (row_it) {
WebNode matching_cell;
size_t prev_row_count = 0;
WebNode prev_row_it = row_it.FirstChild();
while (prev_row_it) {
if (prev_row_it.IsElementNode()) {
WebElement prev_row_element = prev_row_it.To<WebElement>();
if (prev_row_element.HasHTMLTagName(GetWebString<kTableCell>()) ||
prev_row_element.HasHTMLTagName(GetWebString<kTableHeader>())) {
size_t span = CalculateTableCellColumnSpan(prev_row_element);
size_t prev_row_count_end = prev_row_count + span - 1;
if (prev_row_count == cell_position &&
prev_row_count_end == cell_position_end) {
matching_cell = prev_row_it;
}
prev_row_count += span;
}
}
prev_row_it = prev_row_it.NextSibling();
}
if ((cell_count == prev_row_count) && matching_cell) {
if (auto r = InferredLabel::BuildIfValid(FindChildText(matching_cell),
LabelSource::kTdTag)) {
return r;
}
}
}
// If there is no previous row, or if the previous row and current row do not
// align, check all previous siblings, skipping non-element nodes, until we
// find a non-empty text block.
WebNode previous = parent.PreviousSibling();
std::optional<InferredLabel> r;
while (!r && previous) {
if (HasTagName<kTableRow>(previous)) {
r = InferredLabel::BuildIfValid(FindChildText(previous),
LabelSource::kTdTag);
}
previous = previous.PreviousSibling();
}
return r;
}
// Helper for `InferLabelForElement()` that infers a label, if possible, from
// a surrounding div table,
// e.g. <div>Some Text<span><input ...></span></div>
// e.g. <div>Some Text</div><div><input ...></div>
//
// Contrary to the other InferLabelFrom* functions, this functions walks up
// the DOM tree from the original input, instead of down from the surrounding
// tag. While doing so, if a <label> or text node sibling are found along the
// way, a label is inferred from them directly. For example, <div>First
// name<div><input></div>Last name<div><input></div></div> infers "First name"
// and "Last name" for the two inputs, respectively, by picking up the text
// nodes on the way to the surrounding div. Without doing so, the label of both
// inputs becomes "First nameLast name".
std::optional<InferredLabel> InferLabelFromDivTable(
const WebFormControlElement& element) {
WebNode node = element.ParentNode();
bool looking_for_parent = true;
std::set<WebNode> divs_to_skip;
// Search the sibling and parent <div>s until we find a candidate label.
std::optional<InferredLabel> r;
while (!r && node) {
if (HasTagName<kDiv>(node)) {
r = InferredLabel::BuildIfValid(
looking_for_parent ? FindChildTextWithIgnoreList(node, divs_to_skip)
: FindChildText(node),
LabelSource::kDivTable);
// Avoid sibling DIVs that contain autofillable fields.
if (!looking_for_parent && r) {
WebElement result_element =
node.QuerySelector(GetWebString<kFormControlSelector>());
if (result_element) {
r = std::nullopt;
divs_to_skip.insert(node);
}
}
looking_for_parent = false;
} else if (!looking_for_parent) {
// Infer a label from text nodes and unassigned <label> siblings.
if (node.IsTextNode() ||
(HasTagName<kLabel>(node) &&
!node.To<WebLabelElement>().CorrespondingControl())) {
r = InferredLabel::BuildIfValid(FindChildText(node),
LabelSource::kDivTable);
}
} else if (IsTraversableContainerElement(node)) {
// If the element is in a non-div container, its label most likely is too.
break;
}
if (!node.PreviousSibling()) {
// If there are no more siblings, continue walking up the tree.
looking_for_parent = true;
}
node = looking_for_parent ? node.ParentNode() : node.PreviousSibling();
}
return r;
}
// Helper for `InferLabelForElement()` that infers a label, if possible, from
// a surrounding definition list,
// e.g. <dl><dt>Some Text</dt><dd><input ...></dd></dl>
// e.g. <dl><dt><b>Some Text</b></dt><dd><b><input ...></b></dd></dl>
std::optional<InferredLabel> InferLabelFromDefinitionList(const WebNode& dd) {
DCHECK(HasTagName<kDefinitionDescriptionTag>(dd));
// Skip by any intervening text nodes.
WebNode previous = dd.PreviousSibling();
while (previous && previous.IsTextNode()) {
previous = previous.PreviousSibling();
}
if (!previous || !HasTagName<kDefinitionTermTag>(previous)) {
return std::nullopt;
}
return InferredLabel::BuildIfValid(FindChildText(previous),
LabelSource::kDdTag);
}
// Helper for `InferLabelForElement()` that infers a label, if possible, from
// the first surrounding <label>, <div>, <td>, <dd> or <li> tag (if any).
// See `FindChildText()`, `InferLabelFromDivTable()`,
// `InferLabelFromTableColumn()`, `InferLabelFromTableRow()` and
// `InferLabelFromDefinitionList()` for examples how a label is extracted from
// the different tags.
std::optional<InferredLabel> InferLabelFromAncestors(
const WebFormControlElement& element) {
std::set<std::string> seen_tag_names;
WebNode parent = element;
while ((parent = parent.ParentNode())) {
if (!parent.IsElementNode())
continue;
std::string tag_name = parent.To<WebElement>().TagName().Utf8();
if (base::Contains(seen_tag_names, tag_name))
continue;
seen_tag_names.insert(tag_name);
std::optional<InferredLabel> r;
if (tag_name == "LABEL") {
r = InferredLabel::BuildIfValid(FindChildText(parent),
LabelSource::kLabelTag);
} else if (tag_name == "DIV") {
r = InferLabelFromDivTable(element);
} else if (tag_name == "TD") {
r = InferLabelFromTableColumn(parent);
if (!r) {
r = InferLabelFromTableRow(parent);
}
} else if (tag_name == "DD") {
r = InferLabelFromDefinitionList(parent);
} else if (tag_name == "LI") {
r = InferredLabel::BuildIfValid(FindChildText(parent),
LabelSource::kLiTag);
} else if (tag_name == "FIELDSET") {
break;
}
if (r) {
return r;
}
}
return std::nullopt;
}
// The first <option> of <select> elements sometimes represents a default value
// like <option>Select country</option> (with no value attribute). In this case,
// the text of this <option> is a useful label.
// `InferLabelFromDefaultSelectValue()` attempts to decide if this is the case,
// by checking if only the first <option> is lacking a value.
std::optional<InferredLabel> InferLabelFromDefaultSelectText(
const WebFormControlElement& element) {
if (!base::FeatureList::IsEnabled(
features::kAutofillInferLabelFromDefaultSelectText)) {
return std::nullopt;
}
CHECK(IsSelectElement(element));
std::vector<WebElement> options =
element.To<WebSelectElement>().GetListItems();
// `options` can contain other elements like <optgroup>.
std::erase_if(options, [](const WebElement& e) {
return !e.DynamicTo<WebOptionElement>();
});
auto has_non_empty_value_attribute = [](const WebElement& e) {
// If an <option>'s value is unspecified, it default to its text content.
// For this reason the `HasAttribute<>()` check is necessary.
if (!HasAttribute<kValue>(e)) {
return false;
}
std::u16string value = GetAttribute<kValue>(e).Utf16();
base::TrimWhitespace(value, base::TRIM_ALL, &value);
return !value.empty();
};
if (options.size() >= 2 && !has_non_empty_value_attribute(options[0]) &&
std::all_of(options.begin() + 1, options.end(),
has_non_empty_value_attribute)) {
return InferredLabel::BuildIfValid(FindChildText(options[0]),
LabelSource::kDefaultSelectText);
}
return std::nullopt;
}
// Infers corresponding label for `element` from surrounding context in the DOM,
// e.g. the contents of the preceding <p> tag or text element. Returns an empty
// string if it could not find a label for `element`.
std::optional<InferredLabel> InferLabelForElement(
const WebFormControlElement& element) {
if (IsCheckableElement(element)) {
if (auto r = InferLabelFromNext(element)) {
return r;
}
}
if (auto r = InferLabelFromPrevious(element)) {
return r;
}
if (!base::FeatureList::IsEnabled(
features::kAutofillBetterLocalHeuristicPlaceholderSupport)) {
if (auto r = InferLabelFromPlaceholder(element)) {
return r;
}
}
if (auto r = InferLabelFromOverlayingSuccessor(element)) {
return r;
}
// If we didn't find a placeholder, check for aria-label text.
if (auto r = InferLabelFromAriaLabel(element)) {
return r;
}
// If we didn't find a label, check the `element`'s ancestors.
if (auto r = InferLabelFromAncestors(element)) {
return r;
}
if (IsSelectElement(element)) {
if (auto r = InferLabelFromDefaultSelectText(element)) {
return r;
}
}
// If we didn't find a label, check the value attr used as the placeholder.
if (auto r = InferLabelFromValueAttribute(element)) {
return r;
}
return std::nullopt;
}
void InferLabelForElements(
base::span<const WebFormControlElement> control_elements,
std::vector<FormFieldData>& fields) {
SCOPED_UMA_HISTOGRAM_TIMER_MICROS(
"Autofill.TimingPrecise.InferLabelForElement");
CHECK_EQ(control_elements.size(), fields.size());
for (size_t i = 0; i < control_elements.size(); ++i) {
if (fields[i].label().empty()) {
if (auto label = InferLabelForElement(control_elements[i])) {
fields[i].set_label(std::move(label->label));
fields[i].set_label_source(label->source);
}
}
fields[i].set_label(
std::move(fields[i].label()).substr(0, kMaxStringLength));
}
}
// Removes the duplicate titles and limits totals length. The order of the list
// is preserved as first elements are more reliable features than following
// ones.
void RemoveDuplicatesAndLimitTotalLength(ButtonTitleList* result) {
std::set<ButtonTitleInfo> already_added;
ButtonTitleList unique_titles;
int total_length = 0;
for (auto title : *result) {
if (already_added.find(title) != already_added.end())
continue;
already_added.insert(title);
total_length += title.first.length();
if (total_length > kMaxLengthForAllButtonTitles) {
int new_length =
title.first.length() - (total_length - kMaxLengthForAllButtonTitles);
title.first = std::move(title.first).substr(0, new_length);
}
unique_titles.push_back(std::move(title));
if (total_length >= kMaxLengthForAllButtonTitles) {
break;
}
}
*result = std::move(unique_titles);
}
// Return button titles with highest priority based on credibility of their HTML
// tags and attributes.
ButtonTitleList InferButtonTitlesForForm(const WebFormElement& web_form) {
// Different button types have different credibility of being the main button.
// Highest - <input type='submit'>, <button type='submit'>, <button>.
// Moderate - <input type='button'> <button type='button'>.
// Least - <a>, <div>. <span> with attributes having button features.
ButtonTitleList highest_priority_buttons;
ButtonTitleList moderate_priority_buttons;
WebElementCollection input_elements =
web_form.GetElementsByHTMLTagName(GetWebString<kInput>());
for (WebElement item = input_elements.FirstItem(); item;
item = input_elements.NextItem()) {
DCHECK(item.IsFormControlElement());
WebFormControlElement control_element = item.To<WebFormControlElement>();
blink::mojom::FormControlType type =
control_element.FormControlTypeForAutofill();
bool is_submit_type = type == blink::mojom::FormControlType::kInputSubmit ||
type == blink::mojom::FormControlType::kButtonSubmit;
bool is_button_type = type == blink::mojom::FormControlType::kInputButton ||
type == blink::mojom::FormControlType::kButtonButton;
if (!is_submit_type && !is_button_type) {
continue;
}
std::u16string title = control_element.Value().Utf16();
AddButtonTitleToList(
std::move(title),
is_submit_type ? mojom::ButtonTitleType::INPUT_ELEMENT_SUBMIT_TYPE
: mojom::ButtonTitleType::INPUT_ELEMENT_BUTTON_TYPE,
is_submit_type ? &highest_priority_buttons
: &moderate_priority_buttons);
}
WebElementCollection button_elements =
web_form.GetElementsByHTMLTagName(GetWebString<kButton>());
for (WebElement item = button_elements.FirstItem(); item;
item = button_elements.NextItem()) {
const WebString& type_attribute = GetAttribute<kType>(item);
if (!type_attribute.IsNull() && type_attribute != GetWebString<kButton>() &&
type_attribute != GetWebString<kSubmit>()) {
// Neither type='submit' nor type='button'. Skip this button.
continue;
}
bool is_submit_type =
type_attribute.IsNull() || type_attribute == GetWebString<kSubmit>();
std::u16string title = item.TextContent().Utf16();
AddButtonTitleToList(
std::move(title),
is_submit_type ? mojom::ButtonTitleType::BUTTON_ELEMENT_SUBMIT_TYPE
: mojom::ButtonTitleType::BUTTON_ELEMENT_BUTTON_TYPE,
is_submit_type ? &highest_priority_buttons
: &moderate_priority_buttons);
}
if (!highest_priority_buttons.empty()) {
RemoveDuplicatesAndLimitTotalLength(&highest_priority_buttons);
return highest_priority_buttons;
}
if (!moderate_priority_buttons.empty()) {
RemoveDuplicatesAndLimitTotalLength(&moderate_priority_buttons);
return moderate_priority_buttons;
}
ButtonTitleList least_priority_buttons;
FindElementsWithButtonFeatures(
web_form.GetElementsByHTMLTagName(GetWebString<kAnchor>()),
mojom::ButtonTitleType::HYPERLINK,
/*extract_value_attribute=*/true, &least_priority_buttons);
FindElementsWithButtonFeatures(
web_form.GetElementsByHTMLTagName(GetWebString<kDiv>()),
mojom::ButtonTitleType::DIV,
/*extract_value_attribute=*/false, &least_priority_buttons);
FindElementsWithButtonFeatures(
web_form.GetElementsByHTMLTagName(GetWebString<kSpan>()),
mojom::ButtonTitleType::SPAN,
/*extract_value_attribute=*/false, &least_priority_buttons);
RemoveDuplicatesAndLimitTotalLength(&least_priority_buttons);
return least_priority_buttons;
}
bool ShouldSkipFillField(const FormFieldData::FillData& field,
const WebFormControlElement& element) {
enum class SkipReason {
kUnfillable = 0,
// kNoValueToFill = 1,
kPreviouslyAutofilled = 2,
kUserEditedText = 3,
kUserEditedSelect = 4,
kMaxValue = kUserEditedSelect
};
constexpr char kSkipReasonHistogram[] = "Autofill.RendererFillSkipReason";
// Skip all checkable or non-modifiable elements, except select fields because
// some synthetic select element use a hidden select element.
if (!element.IsConnected() || !IsAutofillableElement(element) ||
!element.IsEnabled() || element.IsReadOnly() ||
IsCheckableElement(element) ||
(!element.IsFocusable() && !IsSelectElement(element))) {
base::UmaHistogramEnumeration(kSkipReasonHistogram,
SkipReason::kUnfillable);
return true;
}
if (element.Focused() || field.force_override) {
return false;
}
// Skip filling previously autofilled fields unless autofill is instructed to
// override it.
if (element.IsAutofilled()) {
base::UmaHistogramEnumeration(kSkipReasonHistogram,
SkipReason::kPreviouslyAutofilled);
return true;
}
// A text field is skipped if it has a non-empty value that is entered by
// the user and is NOT the value of the input field's "value" or "placeholder"
// attribute. (The "value" attribute in <input value="foo"> indicates the
// value of the input element at loading time, not its runtime value after the
// user entered something into the field.)
//
// Some sites fill the fields with a formatting string like (___)-___-____.
// To tell the difference between the values entered by the user nd the site,
// we'll sanitize the value. If the sanitized value is empty, it means that
// the site has filled the field, in this case, the field is not skipped.
// Nevertheless the below condition does not hold for sites set the |kValue|
// attribute to the user-input value.
auto HasAttributeWithValue = [&element](const auto& attribute,
const auto& value) {
return element.HasAttribute(attribute) &&
base::i18n::ToLower(element.GetAttribute(attribute).Utf16()) ==
base::i18n::ToLower(value);
};
const std::u16string current_element_value = element.Value().Utf16();
if ((element.DynamicTo<WebInputElement>() || IsTextAreaElement(element)) &&
element.UserHasEditedTheField() &&
!SanitizedFieldIsEmpty(current_element_value) &&
!HasAttributeWithValue(GetWebString<kValue>(), current_element_value) &&
!HasAttributeWithValue(GetWebString<kPlaceholder>(),
current_element_value)) {
base::UmaHistogramEnumeration(kSkipReasonHistogram,
SkipReason::kUserEditedText);
return true;
}
// Check if we should autofill/preview/clear a select element or leave it.
if (IsSelectElement(element) && element.UserHasEditedTheField() &&
!SanitizedFieldIsEmpty(current_element_value)) {
base::UmaHistogramEnumeration(kSkipReasonHistogram,
SkipReason::kUserEditedSelect);
return true;
}
return false;
}
// Sets the |field|'s value to the value in |data|, and specifies the section
// for filled fields. Also sets the "autofilled" attribute,
// causing the background to be blue.
void FillFormField(const FormFieldData::FillData& data,
bool is_initiating_node,
WebFormControlElement& form_control,
FieldDataManager& field_data_manager) {
std::optional<FormControlType> type =
GetAutofillFormControlType(form_control);
if (!type) {
return;
}
switch (*type) {
case FormControlType::kInputCheckbox:
case FormControlType::kInputRadio:
return;
case FormControlType::kContentEditable:
NOTREACHED();
case FormControlType::kInputDate:
case FormControlType::kInputEmail:
case FormControlType::kInputMonth:
case FormControlType::kInputNumber:
case FormControlType::kInputPassword:
case FormControlType::kInputSearch:
case FormControlType::kInputTelephone:
case FormControlType::kInputText:
case FormControlType::kInputUrl:
case FormControlType::kSelectOne:
case FormControlType::kTextArea:
break;
}
WebAutofillState new_autofill_state = data.is_autofilled
? WebAutofillState::kAutofilled
: WebAutofillState::kNotFilled;
if (IsTextInput(form_control)) {
field_data_manager.UpdateFieldDataMap(
GetFieldRendererId(form_control),
data.value.substr(0, form_control.MaxLength()),
FieldPropertiesFlags::kAutofilledOnUserTrigger);
}
form_control.SetAutofillValue(WebString::FromUTF16(data.value),
new_autofill_state);
// Changing the field's value might trigger JavaScript, which is capable
// of destroying the frame.
if (!form_control.GetDocument().GetFrame()) {
return;
}
if (!is_initiating_node || IsSelectElement(form_control)) {
return;
}
auto length = base::checked_cast<unsigned>(form_control.Value().length());
form_control.SetSelectionRange(length, length);
// selectionchange event is capable of destroying the frame.
if (!form_control.GetDocument().GetFrame()) {
return;
}
// Clear the current IME composition (the underline), if there is one.
form_control.GetDocument().GetFrame()->UnmarkText();
}
// Sets the |field|'s "suggested" (non JS visible) value to the value in |data|.
// Also sets the "autofilled" attribute, causing the background to be blue.
void PreviewFormField(const FormFieldData::FillData& data,
WebFormControlElement& form_control,
FieldDataManager& field_data_manager) {
std::optional<FormControlType> type =
GetAutofillFormControlType(form_control);
if (!type) {
return;
}
switch (*type) {
case FormControlType::kInputCheckbox:
case FormControlType::kInputRadio:
return;
case FormControlType::kContentEditable:
NOTREACHED();
case FormControlType::kInputDate:
case FormControlType::kInputEmail:
case FormControlType::kInputMonth:
case FormControlType::kInputNumber:
case FormControlType::kInputPassword:
case FormControlType::kInputSearch:
case FormControlType::kInputTelephone:
case FormControlType::kInputText:
case FormControlType::kInputUrl:
case FormControlType::kSelectOne:
case FormControlType::kTextArea:
break;
}
form_control.SetSuggestedValue(WebString::FromUTF16(data.value));
WebAutofillState new_autofill_state = data.is_autofilled
? WebAutofillState::kPreviewed
: WebAutofillState::kNotFilled;
form_control.SetAutofillState(new_autofill_state);
}
// A less-than comparator for FormFieldData's pointer by their FieldRendererId.
// It also supports direct comparison of a FieldRendererId with a FormFieldData
// pointer.
struct CompareByRendererId {
using is_transparent = void;
bool operator()(const std::pair<FormFieldData*, ShadowFieldData>& f,
const std::pair<FormFieldData*, ShadowFieldData>& g) const {
DCHECK(f.first && g.first);
return f.first->renderer_id() < g.first->renderer_id();
}
bool operator()(const FieldRendererId f,
const std::pair<FormFieldData*, ShadowFieldData>& g) const {
DCHECK(g.first);
return f < g.first->renderer_id();
}
bool operator()(const std::pair<FormFieldData*, ShadowFieldData>& f,
FieldRendererId g) const {
DCHECK(f.first);
return f.first->renderer_id() < g;
}
};
// Searches |fields| for a unique field with name |field_name|. If there is
// none or more than one field with that name, the fields' shadow hosts' name
// and id attributes are tested, and the first match is returned. Returns
// nullptr if no match was found.
FormFieldData* SearchForFormControlByName(
const std::u16string& field_name,
base::span<const std::pair<FormFieldData*, ShadowFieldData>> fields,
LabelSource& label_source) {
if (field_name.empty())
return nullptr;
auto get_field_name = [](const auto& p) { return p.first->name(); };
auto it = std::ranges::find(fields, field_name, get_field_name);
auto end = fields.end();
if (it == end ||
std::ranges::find(it + 1, end, field_name, get_field_name) != end) {
auto ShadowHostHasTargetName = [&](const auto& p) {
return base::Contains(p.second.shadow_host_name_attributes, field_name) ||
base::Contains(p.second.shadow_host_id_attributes, field_name);
};
it = std::ranges::find_if(fields, ShadowHostHasTargetName);
if (it != end) {
label_source =
base::Contains(it->second.shadow_host_name_attributes, field_name)
? LabelSource::kForShadowHostName
: LabelSource::kForShadowHostId;
}
} else {
label_source = LabelSource::kForName;
}
return it != end ? it->first : nullptr;
}
// Considers all <label> descendents of `root`, looks at their corresponding
// control and matches them to the fields in `fields`. The corresponding
// control is either a descendent of the label or an input specified by id in
// the label's for-attribute.
// In case no corresponding control exists, but a for-attribute is specified,
// we look for fields with matching name as a fallback. Moreover, the ids and
// names of shadow root ancestors of the fields are considered as a fallback.
void MatchLabelsAndFields(const WebDocument& root,
base::span<FormFieldData> fields,
std::vector<ShadowFieldData> shadow_fields) {
CHECK_EQ(fields.size(), shadow_fields.size());
if (fields.empty()) {
// Performance optimization: If there are no fields, the below is a no-op.
return;
}
base::flat_set<std::pair<FormFieldData*, ShadowFieldData>,
CompareByRendererId>
field_set = [&] {
std::vector<std::pair<FormFieldData*, ShadowFieldData>> items;
for (size_t i = 0; i < fields.size(); i++) {
items.emplace_back(&fields[i], std::move(shadow_fields[i]));
}
return items;
}();
WebElementCollection labels =
root.GetElementsByHTMLTagName(GetWebString<kLabel>());
DCHECK(labels);
for (WebElement item = labels.FirstItem(); item; item = labels.NextItem()) {
WebLabelElement label = item.To<WebLabelElement>();
WebElement control = label.CorrespondingControl();
FormFieldData* field_data = nullptr;
LabelSource label_source = LabelSource::kForId;
if (!control) {
// Sometimes site authors will incorrectly specify the corresponding
// field element's name rather than its id, so we compensate here.
field_data = SearchForFormControlByName(GetAttribute<kFor>(label).Utf16(),
field_set, label_source);
} else if (control.IsFormControlElement()) {
WebFormControlElement form_control = control.To<WebFormControlElement>();
if (form_control.FormControlTypeForAutofill() ==
blink::mojom::FormControlType::kInputHidden) {
continue;
}
// Typical case: look up `field_data` in `field_set`.
auto iter = field_set.find(GetFieldRendererId(form_control));
if (iter == field_set.end())
continue;
field_data = iter->first;
}
// Skip `label` if we could not find an associated form control.
if (!field_data)
continue;
std::u16string label_text = FindChildText(label);
if (label_text.empty()) {
if (HasAttribute<kFor>(label)) {
continue;
}
DCHECK(control && control.IsFormControlElement());
// An associated form control was found, but the `label` does not have a
// for-attribute, so the form control must be a descendant of the `label`.
// Since `FindChildText()` stops at autofillable elements, the
// `label_text` can be empty if the "text" is declared behind the <input>.
// For example:
// <label>
// <input>
// text
// </label>
// Thus, consider text behind the <input> as a fallback.
// Since associated labels are counted as `kFor`, the source is ignored.
if (auto inferred_label =
InferLabelFromNext(control.To<WebFormControlElement>())) {
label_text = inferred_label->label;
}
if (label_text.empty()) {
continue;
}
}
// Concatenate labels because some sites might have multiple label
// candidates.
if (!field_data->label().empty()) {
field_data->set_label(field_data->label() + u" ");
}
field_data->set_label(field_data->label() + std::move(label_text));
field_data->set_label_source(label_source);
}
}
bool IsAdIframe(const WebElement& element) {
DCHECK(element.HasHTMLTagName(GetWebString<kIframe>()));
WebFrame* iframe = WebFrame::FromFrameOwnerElement(element);
return iframe && iframe->IsAdFrame();
}
// A heuristic visibility detection. See crbug.com/1335257 for an overview of
// relevant aspects.
//
// Note that WebElement::BoundsInWidget(), WebElement::GetClientSize(),
// and WebElement::GetScrollSize() include the padding but do not include the
// border and margin. BoundsInWidget() additionally scales the
// dimensions according to the zoom factor.
//
// It seems that invisible fields on websites typically have dimensions between
// 0 and 10 pixels, before the zoom factor. Therefore choosing `kMinPixelSize`
// is easier without including the zoom factor. For that reason, this function
// prefers GetClientSize() over BoundsInWidget().
//
// This function does not check the position in the viewport because fields in
// iframes commonly are visible despite the body having height zero. Therefore,
// `e.GetDocument().Body().BoundsInWidget().Intersects(
// e.BoundsInWidget())` yields false negatives.
//
// TODO(crbug.com/40846971): Can input fields or iframes actually overflow?
bool IsWebElementVisible(const WebElement& element) {
auto HasMinSize = [](auto size) {
constexpr int kMinPixelSize = 10;
return size.width() >= kMinPixelSize && size.height() >= kMinPixelSize;
};
return element && element.IsFocusable() &&
(IsCheckableElement(element) || HasMinSize(element.GetClientSize()) ||
HasMinSize(element.GetScrollSize()));
}
// Returns the topmost <form> ancestor of |node|, or an IsNull() pointer.
//
// Generally, WebFormElements must not be nested [1]. When parsing HTML, Blink
// ignores nested form tags; the inner forms therefore never make it into the
// DOM. However, nested forms can be created and added to the DOM dynamically,
// in which case Blink associates each field with its closest ancestor.
//
// For some elements, Autofill determines the associated form without Blink's
// help (currently, these are only iframe elements). For consistency with
// Blink's behaviour, we associate them with their closest form element
// ancestor.
//
// [1] https://html.spec.whatwg.org/multipage/forms.html#the-form-element
WebFormElement GetClosestAncestorFormElement(WebNode n) {
while (n) {
if (HasTagName<kForm>(n)) {
return n.To<WebFormElement>();
}
n = n.ParentNode();
}
return WebFormElement();
}
// Returns true if a DOM traversal (pre-order, depth-first) visits `x` before
// `y`.
// As a performance improvement, `ancestor_hint` can be set to a suspected
// ancestor of `x` and `y`. Otherwise, `ancestor_hint` can be arbitrary.
//
// This function is a simplified/specialized version of Blink's private
// Node::compareDocumentPosition().
bool IsDOMPredecessor(const WebNode& x,
const WebNode& y,
const WebNode& ancestor_hint) {
DCHECK(x.GetDocument() == y.GetDocument());
DCHECK(!ancestor_hint || x.GetDocument() == ancestor_hint.GetDocument());
// Extends the `path` up to `end` (exclusive) or the document root.
// Paths are backwards: the last element is the top-most node.
auto BuildPath = [](std::vector<WebNode> path, const WebNode& end) {
DCHECK(!path.empty());
path.reserve(path.size() + 16);
WebNode parent;
while ((parent = path.back().ParentNode()) && parent != end) {
path.push_back(parent);
}
return path;
};
// Returns true iff `lhs` is strictly to the left of `rhs`, provided both
// nodes are siblings.
auto IsLeftSiblingOf = [](const WebNode& lhs, const WebNode& rhs) {
DCHECK(lhs.ParentNode() == rhs.ParentNode());
for (WebNode n = rhs; n; n = n.NextSibling()) {
if (n == lhs) {
return false;
}
}
return true;
};
// Both paths are successors of either `ancestor_hint` or the document root.
// If their parents aren't the same, we extend the paths to the document root.
std::vector<WebNode> x_path = BuildPath({x}, ancestor_hint);
std::vector<WebNode> y_path = BuildPath({y}, ancestor_hint);
if (x_path.back().ParentNode() != y_path.back().ParentNode()) {
x_path = BuildPath(std::move(x_path), WebNode());
y_path = BuildPath(std::move(y_path), WebNode());
}
auto x_it = x_path.rbegin();
auto y_it = y_path.rbegin();
// Find the first different nodes in the paths. If such nodes exist, they are
// siblings and their sibling order determines |x| and |y|'s relationship.
while (x_it != x_path.rend() && y_it != y_path.rend()) {
if (*x_it != *y_it) {
return IsLeftSiblingOf(*x_it, *y_it);
}
++x_it;
++y_it;
}
// If the paths don't differ in a node, the shorter path indicates a
// predecessor since DOM traversal is in-order.
return x_it == x_path.rend() && y_it != y_path.rend();
}
// Indicates if an iframe |element| is considered actually visible to the user.
//
// This function is not intended to implement a perfect visibility check. It
// rather aims to strike balance between cheap tests and filtering invisible
// frames, which can then be skipped during parsing.
//
// The current visibility check requires focusability and a sufficiently large
// bounding box. Thus, particularly elements with "visibility: invisible",
// "display: none", and "width: 0; height: 0" are considered invisible.
//
// Future potential improvements include:
// * Detect potential visibility of elements with "overflow: visible".
// (See WebElement::GetScrollSize().)
// * Detect invisibility of elements with
// - "position: absolute; {left,top,right,bottom}: -100px"
// - "opacity: 0.0"
// - "clip: rect(0,0,0,0)"
//
// TODO(crbug.com/40846971): This check is very similar to IsWebElementVisible()
// (see the documentation there for the subtle differences: zoom factor and
// scroll size). We can probably merge them but should do a Finch experiment
// about it.
bool IsVisibleIframe(const WebElement& element) {
DCHECK(element.HasHTMLTagName(GetWebString<kIframe>()));
// It is common for not-humanly-visible elements to have very small yet
// positive bounds. The threshold of 10 pixels is chosen rather arbitrarily.
constexpr int kMinPixelSize = 10;
gfx::Rect bounds = element.BoundsInWidget();
return element.IsFocusable() && bounds.width() > kMinPixelSize &&
bounds.height() > kMinPixelSize;
}
// A necessary condition for an iframe to be added to FormData::child_frames.
//
// We also extract invisible iframes for the following reason. An iframe may be
// invisible at page load (for example, when it contains parts of a credit card
// form and the user hasn't chosen a payment method yet). Autofill is not
// notified when the iframe becomes visible. That is, Autofill may have not
// re-extracted the main frame's form by the time the iframe has become visible
// and the user has focused a field in that iframe. This outdated form is
// missing the link in FormData::child_frames between the parent form and the
// iframe's form, which prevents Autofill from filling across frames.
//
// The current implementation extracts visible ad frames. Assuming IsAdIframe()
// has no false positives, we could omit the IsVisibleIframe() disjunct. We
// could even take this further and disable Autofill in ad frames.
//
// For further details, see crbug.com/1117028#c8 and crbug.com/1245631.
bool IsRelevantChildFrame(const WebElement& element) {
DCHECK(element.HasHTMLTagName(GetWebString<kIframe>()));
return !IsAdIframe(element) ||
(!base::FeatureList::IsEnabled(
features::kAutofillExtractOnlyNonAdFrames) &&
IsVisibleIframe(element));
}
// Returns the <iframe> elements that are associated with `form_element`.
// An iframe is associated with `form_element` iff
// - if `form_element` is non-null:
// `form_element` is the iframe's closest <form> ancestor
// - if `form_element` is null:
// the iframe has no <form> ancestor.
std::vector<WebElement> GetIframeElements(const WebDocument& document,
const WebFormElement& form_element) {
std::vector<WebElement> relevant_iframes;
WebElementCollection iframes =
document.GetElementsByHTMLTagName(GetWebString<kIframe>());
for (WebElement iframe = iframes.FirstItem(); iframe;
iframe = iframes.NextItem()) {
if (GetClosestAncestorFormElement(iframe) == form_element &&
IsRelevantChildFrame(iframe)) {
relevant_iframes.push_back(iframe);
}
}
return relevant_iframes;
}
// Returns if a script-modified username or credit card number is suitable to
// store in Password Manager/Autofill given `typed_value`.
bool IsScriptModifiedValueAcceptable(
const std::u16string& value,
const std::u16string& typed_value,
const FieldDataManager& field_data_manager) {
// The minimal size of a field value that will be substring-matched.
constexpr size_t kMinMatchSize = 3u;
const auto lowercase = base::i18n::ToLower(value);
const auto typed_lowercase = base::i18n::ToLower(typed_value);
// If the page-generated value is just a completion of the typed value, that's
// likely acceptable.
if (lowercase.starts_with(typed_lowercase)) {
return true;
}
if (typed_lowercase.size() >= kMinMatchSize &&
lowercase.find(typed_lowercase) != std::u16string::npos) {
return true;
}
// If the page-generated value comes from user typed or autofilled values in
// other fields, that's also likely OK.
return field_data_manager.FindMatchedValue(value);
}
// Returns the maximum length value that Autofill may fill into the field. There
// are two special cases:
// - It is 0 for fields that do not support free text input (e.g., <select> and
// <input type=month>).
// - It is the maximum 32 bit number for fields that support text values (e.g.,
// <input type=text> or <textarea>) but have no maxlength attribute set.
// The choice of 32 (as opposed to 64) is intentional: it allows us to still
// do arithmetic with FormFieldData::max_length without having to worry about
// integer overflows everywhere.
uint64_t GetMaxLength(const WebFormControlElement& element) {
if (IsTextInput(element) || element.FormControlTypeForAutofill() ==
blink::mojom::FormControlType::kTextArea) {
auto max_length = element.MaxLength();
static_assert(uint64_t{std::numeric_limits<decltype(max_length)>::max()} <=
FormFieldData::kDefaultMaxLength);
return max_length < 0 ? FormFieldData::kDefaultMaxLength : max_length;
}
return 0;
}
// Returns the SelectOptions for the given <select> element.
//
// For example,
// <select>
// <option value=Foo>Bar</option>
// <option value=Foo>Foo</option>
// </select>
// returns {{.value = "Foo", .text = "Bar"}, {.value = "Foo", .text = "Foo"}}.
// For more details, see the documentation of `SelectOption`.
std::vector<SelectOption> GetSelectOptions(
const WebSelectElement& select_element) {
std::vector<WebElement> option_elements = select_element.GetListItems();
// Constrain the maximum list length to prevent a malicious site from DOS'ing
// the browser, without entirely breaking autocomplete for some extreme
// legitimate sites: http://crbug.com/49332 and http://crbug.com/363094
if (option_elements.size() > kMaxListSize) {
return {};
}
auto to_string = [](WebString s) {
return s.Utf16().substr(0, kMaxStringLength);
};
std::vector<SelectOption> options;
options.reserve(option_elements.size());
for (const auto& maybe_option_element : option_elements) {
if (auto option_element =
maybe_option_element.DynamicTo<WebOptionElement>()) {
std::u16string text = to_string(option_element.GetText());
if (text.empty()) {
text = GetAriaLabel(option_element.GetDocument(), option_element)
.substr(0, kMaxStringLength);
}
options.push_back({.value = to_string(option_element.Value()),
.text = std::move(text)});
}
}
return options;
}
// Returns the SelectOptions for the <datalist> associated with the given
// <input> element. The browser may display these options to the user as
// Autofill suggestions.
//
// For example,
// <input datalist=l>
// <datalist id=l>
// <option value=Foo>Bar</option>
// <option value=Foo>Foo</option>
// </datalist>
// returns {{.value = "Foo", .text = "Bar"}, {.value = "Foo", .text = ""}}.
// It is intentional that the `value` takes precedence over the `text` because
// datalist values are user-visible.
std::vector<SelectOption> GetDataListOptions(const WebInputElement& element) {
auto to_string = [](WebString s) {
return s.Utf16().substr(0, kMaxStringLength);
};
std::vector<WebOptionElement> option_elements =
element.FilteredDataListOptions();
std::vector<SelectOption> options;
options.reserve(std::min(option_elements.size(), kMaxListSize));
for (const WebOptionElement& option_element : option_elements) {
if (options.size() > kMaxListSize) {
break;
}
options.push_back(
{.value = to_string(option_element.Value()),
.text = to_string(option_element.Value() != option_element.Label()
? option_element.Label()
: WebString())});
}
return options;
}
// Returns whether `node` has a shadow-tree-including ancestor that is a
// `<form>`.
bool HasFormAncestor(WebNode node) {
node = node.ParentOrShadowHostNode();
while (node) {
if (HasTagName<kForm>(node)) {
return true;
}
node = node.ParentOrShadowHostNode();
}
return false;
}
// Returns all connected form control elements
// - owned by `form_element` if `!form_element.IsNull()`;
// - owned by no form otherwise.
std::vector<WebFormControlElement> GetOwnedFormControls(
const WebDocument& document,
const WebFormElement& form_element) {
std::vector<WebFormControlElement> form_controls;
if (form_element) {
form_controls = form_element.GetFormControlElements(); // nocheck
} else {
form_controls = document.UnassociatedFormControls(); // nocheck
// A form control element may be unassociated inside its Shadow DOM, but
// owned (in the Autofill sense) by a <form> containing the shadow host.
std::erase_if(form_controls, [](const WebFormControlElement& e) {
return e.OwnerShadowHost() && HasFormAncestor(e);
});
}
std::erase_if(form_controls, std::not_fn(&WebNode::IsConnected));
return form_controls;
}
// Fills out a FormField object from a given autofillable WebFormControlElement.
// |extract_options|: See the enum ExtractOption above for details. Field
// properties will be copied from |field_data_manager|, if the argument is not
// null and has entry for |element| (see properties in FieldPropertiesFlags).
void WebFormControlElementToFormField(
const WebFormElement& form_element,
const WebFormControlElement& element,
const FieldDataManager* field_data_manager,
DenseSet<ExtractOption> extract_options,
FormFieldData* field,
ShadowFieldData* shadow_data) {
DCHECK(field);
DCHECK(element);
DCHECK(element.GetDocument().GetFrame());
DCHECK(element.IsConnected());
DCHECK(IsAutofillableElement(element));
const FieldRendererId renderer_id = GetFieldRendererId(element);
// Save both id and name attributes, if present. If there is only one of them,
// it will be saved to |name|. See HTMLFormControlElement::nameForAutofill.
field->set_name(element.NameForAutofill().Utf16());
field->set_id_attribute(element.GetIdAttribute().Utf16());
field->set_name_attribute(GetAttribute<kName>(element).Utf16());
field->set_renderer_id(renderer_id);
field->set_host_form_id(GetFormRendererId(form_element));
field->set_form_control_ax_id(element.GetAxId());
field->set_form_control_type(*GetAutofillFormControlType(element));
field->set_max_length(GetMaxLength(element));
field->set_autocomplete_attribute(GetAutocompleteAttribute(element));
field->set_parsed_autocomplete(
ParseAutocompleteAttribute(field->autocomplete_attribute()));
if (base::EqualsCaseInsensitiveASCII(GetAttribute<kRole>(element).Utf16(),
"presentation")) {
field->set_role(FormFieldData::RoleAttribute::kPresentation);
}
if (HasAttribute<kPattern>(element)) {
field->set_pattern(GetAttribute<kPattern>(element).Utf16());
}
field->set_placeholder(GetAttribute<kPlaceholder>(element).Utf16());
if (HasAttribute<kClass>(element)) {
field->set_css_classes(GetAttribute<kClass>(element).Utf16());
}
if (field_data_manager && field_data_manager->HasFieldData(renderer_id)) {
field->set_properties_mask(
field_data_manager->GetFieldPropertiesMask(renderer_id));
}
field->set_aria_label(GetAriaLabel(element.GetDocument(), element));
field->set_aria_description(
GetAriaDescription(element.GetDocument(), element));
const bool kAutofillDetectFieldVisibilityEnabled =
base::FeatureList::IsEnabled(features::kAutofillDetectFieldVisibility);
// Traverse up through shadow hosts to see if we can gather missing
// attributes.
// TODO(crbug.com/40204601): Make sure this works for all shadow DOM cases,
// including cases in which the owning form is multiple (shadow DOM) levels
// apart from the form control element. Also check whether we cannot simplify
// some of the shadow DOM traversals here.
size_t levels_up = kMaxShadowLevelsUp;
for (WebElement host = element.OwnerShadowHost();
host && levels_up > 0 && form_element &&
form_element.OwnerShadowHost() != host;
host = host.OwnerShadowHost(), --levels_up) {
std::u16string shadow_host_id = host.GetIdAttribute().Utf16();
if (shadow_data && !shadow_host_id.empty()) {
shadow_data->shadow_host_id_attributes.push_back(shadow_host_id);
}
std::u16string shadow_host_name = GetAttribute<kName>(host).Utf16();
if (shadow_data && !shadow_host_name.empty()) {
shadow_data->shadow_host_name_attributes.push_back(shadow_host_name);
}
if (field->id_attribute().empty()) {
field->set_id_attribute(host.GetIdAttribute().Utf16());
}
if (field->name_attribute().empty()) {
field->set_name_attribute(GetAttribute<kName>(host).Utf16());
}
if (field->name().empty()) {
field->set_name(field->name_attribute().empty()
? field->id_attribute()
: field->name_attribute());
}
if (field->autocomplete_attribute().empty()) {
field->set_autocomplete_attribute(GetAutocompleteAttribute(host));
field->set_parsed_autocomplete(
ParseAutocompleteAttribute(field->autocomplete_attribute()));
}
if (field->css_classes().empty() && HasAttribute<kClass>(host)) {
field->set_css_classes(GetAttribute<kClass>(host).Utf16());
}
if (field->aria_label().empty()) {
field->set_aria_label(GetAriaLabel(host.GetDocument(), host));
}
if (field->aria_description().empty()) {
field->set_aria_description(GetAriaDescription(host.GetDocument(), host));
}
}
// The browser doesn't need to differentiate between preview and autofill.
field->set_is_autofilled(element.IsAutofilled());
field->set_is_user_edited(element.UserHasEditedTheField());
field->set_is_focusable(element.IsFocusable());
field->set_is_visible(kAutofillDetectFieldVisibilityEnabled
? IsWebElementVisible(element)
: field->is_focusable());
field->set_should_autocomplete(
element.AutoComplete() &&
!(field->parsed_autocomplete().has_value() &&
field->parsed_autocomplete().value().field_type ==
HtmlFieldType::kOneTimeCode));
field->set_text_direction(GetTextDirectionForElement(element));
field->set_is_enabled(element.IsEnabled());
field->set_is_readonly(element.IsReadOnly());
if (auto input_element = element.DynamicTo<WebInputElement>()) {
SetCheckStatus(field, IsCheckableElement(input_element),
input_element.IsChecked());
if (extract_options.contains(ExtractOption::kDatalist) ||
base::FeatureList::IsEnabled(
features::kAutofillOptimizeFormExtraction)) {
// TODO(crbug.com/316143236): Remove this metric once debugging is
// complete.
base::UmaHistogramEnumeration(
"Autofill.DataList.Events",
AutofillDataListEvents::kDataListOptionsParsed);
field->set_datalist_options(GetDataListOptions(input_element));
}
} else if (IsTextAreaElement(element)) {
// Nothing more to do in this case.
} else {
// Set option strings on the field if available.
DCHECK(IsSelectElement(element));
field->set_options(GetSelectOptions(element.To<WebSelectElement>()));
}
if (extract_options.contains(ExtractOption::kBounds) ||
base::FeatureList::IsEnabled(features::kAutofillOptimizeFormExtraction)) {
if (auto* local_frame = element.GetDocument().GetFrame()) {
if (auto* render_frame =
content::RenderFrame::FromWebFrame(local_frame)) {
field->set_bounds(gfx::RectF(
render_frame->ConvertViewportToWindow(element.BoundsInWidget())));
}
}
}
field->set_value(element.Value().Utf16().substr(0, kMaxStringLength));
field->set_selected_text(
element.SelectedText().Utf16().substr(0, kMaxSelectedTextLength));
field->set_allows_writing_suggestions(element.WritingSuggestions());
if (field_data_manager) {
MaybeUpdateUserInput(*field, GetFieldRendererId(element),
*field_data_manager);
}
}
#if BUILDFLAG(IS_ANDROID)
// Checks whether an `element` looks like a captcha based on
// heuristics. The heuristics cannot be perfect and therefore is a subject to
// change, e.g. adding a list of domains of captcha providers to be compared
// with 'src' attribute.
bool IsLikelyCaptchaIframe(const WebElement& element) {
if (!IsWebElementVisible(element)) {
return false;
}
static constexpr std::string_view kCaptcha = "captcha";
return GetAttribute<kSrc>(element).Find(kCaptcha) != std::string::npos ||
GetAttribute<kTitle>(element).Find(kCaptcha) != std::string::npos ||
GetAttribute<kId>(element).Find(kCaptcha) != std::string::npos ||
GetAttribute<kName>(element).Find(kCaptcha) != std::string::npos;
}
#endif
std::optional<FormData> ExtractFormDataWithFieldsAndFrames(
const WebDocument& document,
const WebFormElement& form_element,
const FieldDataManager& field_data_manager,
ButtonTitlesCache* button_titles_cache,
DenseSet<ExtractOption> extract_options) {
if (form_element && !form_element.IsConnected()) {
return std::nullopt;
}
std::vector<WebFormControlElement> control_elements =
GetOwnedAutofillableFormControls(document, form_element);
if (base::FeatureList::IsEnabled(features::kAutofillOptimizeFormExtraction) &&
control_elements.size() > kMaxExtractableFields) {
return std::nullopt;
}
std::vector<WebElement> iframe_elements =
GetIframeElements(document, form_element);
if (base::FeatureList::IsEnabled(features::kAutofillOptimizeFormExtraction)) {
std::erase_if(iframe_elements, [](const WebElement& iframe_element) {
WebFrame* iframe = WebFrame::FromFrameOwnerElement(iframe_element);
return !iframe ||
(!iframe->IsWebLocalFrame() && !iframe->IsWebRemoteFrame());
});
if (iframe_elements.size() > kMaxExtractableChildFrames) {
iframe_elements.clear();
}
if (control_elements.empty() && iframe_elements.empty()) {
return std::nullopt;
}
}
// Extracts fields from `control_elements` into `fields` and sets
// `child_frames[i].predecessor` to the field index of the last field that
// precedes the `i`th child frame.
//
// After each iteration, `iframe_elements[next_iframe]` is the first iframe
// that comes after `control_elements[i]`.
//
// After the loop,
// - `fields` is completely populated;
// - `child_frames` has the correct size and `child_frames[i].predecessor` is
// set to the correct value, but `child_frames[i].token` is not initialized
// yet.
std::vector<FormFieldData> fields;
std::vector<ShadowFieldData> shadow_fields;
std::vector<FrameTokenWithPredecessor> child_frames;
fields.reserve(control_elements.size());
shadow_fields.reserve(control_elements.size());
child_frames.resize(iframe_elements.size());
size_t next_iframe = 0;
for (const WebFormControlElement& control_element : control_elements) {
DCHECK(control_element.IsConnected());
DCHECK(IsAutofillableElement(control_element));
fields.emplace_back();
shadow_fields.emplace_back();
WebFormControlElementToFormField(form_element, control_element,
&field_data_manager, extract_options,
&fields.back(), &shadow_fields.back());
// Finds the last frame that precedes |control_element|.
while (next_iframe < iframe_elements.size() &&
!IsDOMPredecessor(control_element, iframe_elements[next_iframe],
form_element)) {
++next_iframe;
}
// The `next_frame`th frame precedes `control_element` and thus `fields[i]`,
// where `i` is the index of `control_element`. The frames after that, i.e.,
// the `k`th frames for `k > next_frame`, may also precede `fields[i]`; in
// case they do not, `child_frames[k].predecessor` will be updated in a
// later iteration.
for (size_t k = next_iframe; k < iframe_elements.size(); ++k) {
child_frames[k].predecessor = fields.size() - 1;
}
if (fields.size() > kMaxExtractableFields) {
return std::nullopt;
}
}
// Extracts field labels from the <label for="..."> tags.
// This is done by iterating through all <label>s and looking them up in the
// `field_set` built below.
// Iterating through the fields and looking at their `WebElement::Labels()`
// unfortunately doesn't scale, as each call corresponds to a DOM traversal.
MatchLabelsAndFields(document, fields, std::move(shadow_fields));
// Infers field labels from other tags or <labels> without for="...".
InferLabelForElements(control_elements, fields);
// Extracts the frame tokens of |iframe_elements|.
DCHECK_EQ(child_frames.size(), iframe_elements.size());
for (size_t i = 0; i < iframe_elements.size(); ++i) {
WebFrame* iframe = WebFrame::FromFrameOwnerElement(iframe_elements[i]);
if (iframe && iframe->IsWebLocalFrame()) {
child_frames[i].token = LocalFrameToken(
iframe->ToWebLocalFrame()->GetLocalFrameToken().value());
} else if (iframe && iframe->IsWebRemoteFrame()) {
child_frames[i].token = RemoteFrameToken(
iframe->ToWebRemoteFrame()->GetRemoteFrameToken().value());
} else if (base::FeatureList::IsEnabled(
features::kAutofillOptimizeFormExtraction)) {
NOTREACHED();
}
}
if (!base::FeatureList::IsEnabled(
features::kAutofillOptimizeFormExtraction)) {
std::erase_if(child_frames, [](const auto& child_frame) {
return std::visit([](const auto& token) { return token.is_empty(); },
child_frame.token);
});
if (child_frames.size() > kMaxExtractableChildFrames) {
child_frames.clear();
}
const bool success = (!fields.empty() || !child_frames.empty()) &&
fields.size() <= kMaxExtractableFields;
if (!success) {
return std::nullopt;
}
}
base::UmaHistogramCounts1000(!form_element
? "Autofill.ExtractFormUnowned.FieldCount2"
: "Autofill.ExtractFormOwned.FieldCount2",
fields.size());
FormData form;
if (!form_element) {
DCHECK(form.renderer_id().is_null());
DCHECK(form.main_frame_origin().opaque());
form.set_is_action_empty(true);
} else {
form.set_name(GetFormIdentifier(form_element));
form.set_id_attribute(form_element.GetIdAttribute().Utf16());
form.set_name_attribute(GetAttribute<kName>(form_element).Utf16());
form.set_renderer_id(GetFormRendererId(form_element));
form.set_action(GetCanonicalActionForForm(form_element));
if (!form.action().is_valid()) {
form.set_action(blink::WebStringToGURL(form_element.Action()));
}
form.set_is_action_empty(form_element.Action().IsNull() ||
form_element.Action().IsEmpty());
}
form.set_fields(std::move(fields));
form.set_child_frames(std::move(child_frames));
form.set_button_titles(GetButtonTitles(form_element, button_titles_cache));
// `likely_contains_captcha` is only needed for Android for the autosubmission
// after filling credentials from TTF bottom sheet.
#if BUILDFLAG(IS_ANDROID)
form.set_likely_contains_captcha(
std::ranges::any_of(iframe_elements, IsLikelyCaptchaIframe));
#endif
return form;
}
} // namespace
InferredLabel::InferredLabel(std::u16string label, LabelSource source)
: label(std::move(label)), source(source) {}
// static
std::optional<InferredLabel> InferredLabel::BuildIfValid(std::u16string label,
LabelSource source) {
// List of characters a label can't be entirely made of (this list can grow).
const std::u16string_view invalid_chars =
u"+*:-\u2013()"; // U+2013 is the En Dash "–".
auto is_valid_label_character = [&invalid_chars](char16_t c) {
return !base::Contains(invalid_chars, c) &&
!base::Contains(std::u16string_view(base::kWhitespaceUTF16), c);
};
if (std::ranges::any_of(label, is_valid_label_character)) {
base::TrimWhitespace(label, base::TRIM_ALL, &label);
return InferredLabel{std::move(label), source};
}
return std::nullopt;
}
std::string GetAutocompleteAttribute(const WebElement& element) {
std::string autocomplete_attribute =
GetAttribute<kAutocomplete>(element).Utf8();
if (autocomplete_attribute.size() > kMaxStringLength) {
// Discard overly long attribute values to avoid DOS-ing the browser
// process. However, send over a default string to indicate that the
// attribute was present.
return "x-max-data-length-exceeded";
}
return autocomplete_attribute;
}
std::optional<FormData> ExtractFormData(
const WebDocument& document,
const WebFormElement& form_element,
const FieldDataManager& field_data_manager,
const CallTimerState& timer_state,
ButtonTitlesCache* button_titles_cache,
DenseSet<ExtractOption> extract_options) {
ScopedCallTimer timer("ExtractFormData", timer_state);
return ExtractFormDataWithFieldsAndFrames(
document, form_element, field_data_manager, button_titles_cache,
extract_options);
}
GURL GetCanonicalActionForForm(const WebFormElement& form) {
WebString action = form.Action();
if (action.IsNull()) {
action = WebString(""); // missing 'action' attribute implies current URL.
}
GURL full_action(form.GetDocument().CompleteURL(action));
return StripAuthAndParams(full_action);
}
bool IsTextAreaElement(const WebFormControlElement& element) {
return GetAutofillFormControlType(element) == FormControlType::kTextArea;
}
bool IsTextAreaElementOrTextInput(const WebFormControlElement& element) {
return IsTextAreaElement(element) || IsTextInput(element);
}
bool IsAutofillableElement(const WebFormControlElement& element) {
return GetAutofillFormControlType(element).has_value();
}
std::optional<FormControlType> ToAutofillFormControlType(
blink::mojom::FormControlType type) {
// Note that adding a new field type here automatically makes
// IsAutofillableElement() return true.
switch (type) {
case blink::mojom::FormControlType::kInputCheckbox:
return FormControlType::kInputCheckbox;
case blink::mojom::FormControlType::kInputEmail:
return FormControlType::kInputEmail;
case blink::mojom::FormControlType::kInputMonth:
return FormControlType::kInputMonth;
case blink::mojom::FormControlType::kInputNumber:
return FormControlType::kInputNumber;
case blink::mojom::FormControlType::kInputPassword:
return FormControlType::kInputPassword;
case blink::mojom::FormControlType::kInputRadio:
return FormControlType::kInputRadio;
case blink::mojom::FormControlType::kInputSearch:
return FormControlType::kInputSearch;
case blink::mojom::FormControlType::kInputTelephone:
return FormControlType::kInputTelephone;
case blink::mojom::FormControlType::kInputText:
return FormControlType::kInputText;
case blink::mojom::FormControlType::kInputUrl:
return FormControlType::kInputUrl;
case blink::mojom::FormControlType::kSelectOne:
return FormControlType::kSelectOne;
case blink::mojom::FormControlType::kTextArea:
return FormControlType::kTextArea;
case blink::mojom::FormControlType::kInputDate:
if (base::FeatureList::IsEnabled(features::kAutofillExtractInputDate)) {
return FormControlType::kInputDate;
}
break;
case blink::mojom::FormControlType::kButtonButton:
case blink::mojom::FormControlType::kButtonSubmit:
case blink::mojom::FormControlType::kButtonReset:
case blink::mojom::FormControlType::kButtonPopover:
case blink::mojom::FormControlType::kFieldset:
case blink::mojom::FormControlType::kInputButton:
case blink::mojom::FormControlType::kInputColor:
case blink::mojom::FormControlType::kInputDatetimeLocal:
case blink::mojom::FormControlType::kInputFile:
case blink::mojom::FormControlType::kInputHidden:
case blink::mojom::FormControlType::kInputImage:
case blink::mojom::FormControlType::kInputRange:
case blink::mojom::FormControlType::kInputReset:
case blink::mojom::FormControlType::kInputSubmit:
case blink::mojom::FormControlType::kInputTime:
case blink::mojom::FormControlType::kInputWeek:
case blink::mojom::FormControlType::kOutput:
case blink::mojom::FormControlType::kSelectMultiple:
break;
}
return std::nullopt;
}
std::optional<FormControlType> GetAutofillFormControlType(
const WebFormControlElement& element) {
return element
? ToAutofillFormControlType(element.FormControlTypeForAutofill())
: std::nullopt;
}
bool IsWebauthnTaggedElement(const WebFormControlElement& element) {
const std::optional<AutocompleteParsingResult> parsing_result =
ParseAutocompleteAttribute(GetAutocompleteAttribute(element));
return parsing_result.has_value() && parsing_result->webauthn;
}
bool IsElementEditable(const WebInputElement& element) {
return element.IsEnabled() && !element.IsReadOnly();
}
FormRendererId GetFormRendererId(const WebElement& e) {
// This function is intended only for WebFormElements and for contenteditables
// that aren't WebFormControlElement. However, an element that used to be
// contenteditable may dynamically change to a non-contenteditable. Therefore,
// instead of checking that `e` is a WebFormControlElement or contenteditable,
// we just that `e` is not a WebFormControlElement to protect against
// confusions between Get{Form,Field}RendererId().
CHECK(!e.DynamicTo<WebFormControlElement>());
if (!e) {
return FormRendererId();
}
return FormRendererId(e.GetDomNodeId());
}
FieldRendererId GetFieldRendererId(const WebElement& e) {
// This function is intended only for WebFormControlElements and for
// contenteditables that aren't WebFormElement. However, an element that used
// to be contenteditable may dynamically change to a non-contenteditable.
// Therefore, instead of checking that `e` is a WebFormControlElement or
// contenteditable, we just that `e` is not a WebFormElement to protect
// against confusions between Get{Form,Field}RendererId().
CHECK(!e.DynamicTo<WebFormElement>());
return FieldRendererId(e.GetDomNodeId());
}
base::i18n::TextDirection GetTextDirectionForElement(
const WebFormControlElement& element) {
// Use 'text-align: left|right' if set or 'direction' otherwise.
// See https://crbug.com/482339
switch (element.AlignmentForFormData()) {
case WebFormControlElement::Alignment::kLeft:
return base::i18n::LEFT_TO_RIGHT;
case WebFormControlElement::Alignment::kRight:
return base::i18n::RIGHT_TO_LEFT;
case WebFormControlElement::Alignment::kNotSet:
return element.DirectionForFormData();
}
}
std::vector<WebFormControlElement> GetOwnedAutofillableFormControls(
const WebDocument& document,
const WebFormElement& form_element) {
std::vector<WebFormControlElement> elements =
GetOwnedFormControls(document, form_element);
std::erase_if(elements, std::not_fn(&IsAutofillableElement));
return elements;
}
std::optional<std::pair<FormData, raw_ref<const FormFieldData>>>
FindFormAndFieldForFormControlElement(
const WebFormControlElement& element,
const FieldDataManager& field_data_manager,
const CallTimerState& timer_state,
form_util::ButtonTitlesCache* button_titles_cache,
DenseSet<ExtractOption> extract_options,
const SynchronousFormCache& form_cache) {
DCHECK(element);
if (!element.IsConnected() || !IsAutofillableElement(element)) {
return std::nullopt;
}
WebDocument document = element.GetDocument();
WebFormElement owning_form = element.GetOwningFormForAutofill();
std::optional<FormData> form = form_cache.GetOrExtractForm(
document, owning_form, field_data_manager, timer_state,
button_titles_cache, extract_options);
const bool extract_form_data_succeeded = form.has_value();
if (!form) {
// If we couldn't extract the form, ignore the fields other than `element`.
// This gives Autocomplete and other handlers the chance to handle it.
FormFieldData field;
WebFormControlElementToFormField(owning_form, element, nullptr,
extract_options, &field,
/*shadow_data=*/nullptr);
form.emplace();
form->set_fields({std::move(field)});
}
if (auto it = std::ranges::find(form->fields(), GetFieldRendererId(element),
&FormFieldData::renderer_id);
it != form->fields().end()) {
return std::make_optional(std::make_pair(std::move(*form), raw_ref(*it)));
}
// This is not reachable if the following holds:
// ```
// base::Contains(GetOwnedFormControls(element.GetOwningFormForAutofill()),
// element)
// ```
// This does not hold if `element` is an unowned element in a
// shadow DOM and kAutofillIncludeShadowDomInUnassociatedListedElements is
// disabled. Then `element.GetOwningFormForAutofill()` returns the unowned
// form, but `GetOwnedFormControls()` does not include the field.
// See crbug.com/347059988 for more details.
GURL url;
if (WebDocument doc = element.GetDocument()) {
url = doc.Url();
}
auto get_id = [](const WebElement& e) {
return e ? e.GetIdAttribute().Utf8() : "";
};
auto is_top_level = [](const WebFormElement form) {
WebNode n = form;
while (n && (n = n.ParentOrShadowHostNode())) {
if (n.DynamicTo<WebFormElement>()) {
return false;
}
}
return true;
};
auto has_nested_form = [](const WebFormElement form,
WebFormControlElement elem) {
for (WebNode n = elem; n && n != form; n = n.ParentOrShadowHostNode()) {
if (n.DynamicTo<WebFormElement>()) {
return true;
}
}
return false;
};
auto get_form_size = [&document](const WebFormElement& form) {
return document
? static_cast<int>(GetOwnedFormControls(document, form).size())
: -1;
};
WebFormElement assoc_form_element = element.Form(); // nocheck
// clang-format off
SCOPED_CRASH_KEY_STRING64("Autofill", "url", url.spec());
SCOPED_CRASH_KEY_BOOL("Autofill", "ExtractFormData_succeeded", extract_form_data_succeeded);
SCOPED_CRASH_KEY_NUMBER("Autofill", "extracted_form_size", form->fields().size());
SCOPED_CRASH_KEY_STRING64("Autofill", "elem_tag_name", element.TagName().Utf8());
SCOPED_CRASH_KEY_STRING64("Autofill", "elem_id", get_id(element));
SCOPED_CRASH_KEY_STRING64("Autofill", "elem_form_attr", element.GetAttribute("form").Utf8());
SCOPED_CRASH_KEY_NUMBER("Autofill", "elem_form_control_type", base::to_underlying(element.FormControlType())); // nocheck
SCOPED_CRASH_KEY_BOOL("Autofill", "elem_autofillable", IsAutofillableElement(element));
SCOPED_CRASH_KEY_BOOL("Autofill", "elem_document", !!document);
SCOPED_CRASH_KEY_BOOL("Autofill", "elem_connected", element.IsConnected());
SCOPED_CRASH_KEY_BOOL("Autofill", "elem_in_shadow_dom", !!element.OwnerShadowHost());
#define SCOPED_CRASH_KEYS_FOR_FORM(prefix, f) \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_non_null", !!f); \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_connected", f && f.IsConnected()); \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_in_shadow_dom", f && !!f.OwnerShadowHost()); \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_in_same_dom", f && element.OwnerShadowHost() == f.OwnerShadowHost()); \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_is_top_level", is_top_level(f)); \
SCOPED_CRASH_KEY_BOOL("Autofill", #prefix "_form_has_nested_form", has_nested_form(f, element)); \
SCOPED_CRASH_KEY_NUMBER("Autofill", #prefix "_form_size", get_form_size(f)); \
SCOPED_CRASH_KEY_STRING64("Autofill", #prefix "_form_id", get_id(f));
SCOPED_CRASH_KEYS_FOR_FORM(assoc, assoc_form_element);
SCOPED_CRASH_KEYS_FOR_FORM(owng, owning_form);
#undef FORM_CRASH_KEYS
// clang-format on
NOTREACHED(base::NotFatalUntil::M139);
return std::nullopt;
}
std::optional<FormData> FindFormForContentEditable(
const WebElement& content_editable) {
if (content_editable.DynamicTo<WebFormElement>() ||
content_editable.DynamicTo<WebFormControlElement>() ||
!content_editable.IsContentEditable() ||
content_editable != content_editable.RootEditableElement() ||
!content_editable.IsConnected()) {
return std::nullopt;
}
std::vector<FormFieldData> fields(1);
FormFieldData& field = fields.back();
WebDocument document = content_editable.GetDocument();
field.set_id_attribute(content_editable.GetIdAttribute().Utf16());
field.set_name_attribute(GetAttribute<kName>(content_editable).Utf16());
field.set_name(!field.id_attribute().empty() ? field.id_attribute()
: field.name_attribute());
field.set_renderer_id(GetFieldRendererId(content_editable));
field.set_host_form_id(GetFormRendererId(content_editable));
field.set_form_control_type(FormControlType::kContentEditable);
field.set_autocomplete_attribute(GetAutocompleteAttribute(content_editable));
field.set_parsed_autocomplete(
ParseAutocompleteAttribute(field.autocomplete_attribute()));
if (auto* local_frame = document.GetFrame()) {
if (auto* render_frame = content::RenderFrame::FromWebFrame(local_frame)) {
field.set_bounds(gfx::RectF(render_frame->ConvertViewportToWindow(
content_editable.BoundsInWidget())));
}
}
if (base::EqualsCaseInsensitiveASCII(
GetAttribute<kRole>(content_editable).Utf16(), "presentation")) {
field.set_role(FormFieldData::RoleAttribute::kPresentation);
}
if (HasAttribute<kClass>(content_editable)) {
field.set_css_classes(GetAttribute<kClass>(content_editable).Utf16());
}
field.set_aria_label(GetAriaLabel(document, content_editable));
field.set_aria_description(GetAriaDescription(document, content_editable));
// TextContentAbridged() includes hidden elements and does not add linebreaks.
// If this is not sufficient in the future, consider calling
// HTMLElement::innerText(), which returns the text "as rendered" (i.e., it
// inserts whitespace at the right places and it ignores "display:none"
// subtrees), but is significantly more expensive because it triggers a
// layout.
field.set_value(
content_editable.TextContentAbridged(kMaxStringLength).Utf16());
DCHECK_LE(field.value().length(), kMaxStringLength);
field.set_selected_text(content_editable.SelectedText().Utf16().substr(
0, kMaxSelectedTextLength));
field.set_allows_writing_suggestions(content_editable.WritingSuggestions());
FormData form;
form.set_renderer_id(GetFormRendererId(content_editable));
form.set_id_attribute(content_editable.GetIdAttribute().Utf16());
form.set_name_attribute(GetAttribute<kName>(content_editable).Utf16());
form.set_name(!form.id_attribute().empty() ? form.id_attribute()
: form.name_attribute());
form.set_is_action_empty(true);
form.set_fields(std::move(fields));
return form;
}
std::vector<std::pair<FieldRendererId, WebAutofillState>> ApplyFieldsAction(
const WebDocument& document,
base::span<const FormFieldData::FillData> fields,
mojom::FormActionType action_type,
mojom::ActionPersistence action_persistence,
FieldDataManager& field_data_manager) {
// This container stores the FormFieldData::FillData* of `form.fields` that
// will be filled into their corresponding blink elements.
std::vector<std::pair<FieldRendererId, WebAutofillState>> filled_fields;
filled_fields.reserve(fields.size());
struct Field {
explicit operator bool() const {
DCHECK_EQ(!data, !element);
return data;
}
raw_ptr<const FormFieldData::FillData> data = nullptr;
WebFormControlElement element;
};
// We first collect the focused (if one exists) and the unfocused autofillable
// fields, and the autofill them in the following order:
//
// 1. Autofill the focused field.
// 2. Send a blur event for the initially focused field.
// 3. For each unfocused field, focus -> autofill -> blur.
// 4. Send a focus event for the initially focused field.
//
// We currently do not emit other events like keydown/keyup or paste and
// beforeinput/textInput/input.
Field focused_field;
std::vector<Field> unfocused_fields;
unfocused_fields.reserve(fields.size());
// Step 0: Find the focused and the unfocused fields to fill.
for (const FormFieldData::FillData& field : fields) {
WebFormControlElement element =
GetFormControlByRendererId(field.renderer_id);
if (!element) {
continue;
}
if ((action_type == mojom::FormActionType::kFill &&
ShouldSkipFillField(field, element))) {
continue;
}
if (element.Focused()) {
focused_field = {&field, element};
} else {
unfocused_fields.emplace_back(&field, element);
}
}
// Step 1: Autofill the initiating element.
if (focused_field) {
// In preview mode, only fill the field if it changes the fields value.
// With this, the WebAutofillState is not changed from kAutofilled to
// kPreviewed. This prevents the highlighting to change.
filled_fields.emplace_back(GetFieldRendererId(focused_field.element),
focused_field.element.GetAutofillState());
if (action_persistence == mojom::ActionPersistence::kFill) {
FillFormField(*focused_field.data, /*is_initiating_node=*/true,
focused_field.element, field_data_manager);
} else {
PreviewFormField(*focused_field.data, focused_field.element,
field_data_manager);
}
}
// If there is no other field to be autofilled, sending the blur event and
// then the focus event for the initiating element does not make sense.
if (unfocused_fields.empty()) {
return filled_fields;
}
// Step 2: A blur event is emitted for the focused element if it is the
// initiating element before all other elements are autofilled.
if (action_persistence == mojom::ActionPersistence::kFill && focused_field) {
focused_field.element.DispatchBlurEvent();
}
// Step 3: Autofill the non-initiating elements.
// WebFormControlElement::SetAutofillValue fires the focus and blur
// events.
for (Field& field : unfocused_fields) {
filled_fields.emplace_back(GetFieldRendererId(field.element),
field.element.GetAutofillState());
if (action_persistence == mojom::ActionPersistence::kFill) {
FillFormField(*field.data, /*is_initiating_node=*/false, field.element,
field_data_manager);
} else {
PreviewFormField(*field.data, field.element, field_data_manager);
}
}
// Step 4: A focus event is emitted for the initiating element after
// autofilling is completed. It is not intended to work for preview.
if (action_persistence == mojom::ActionPersistence::kFill && focused_field) {
focused_field.element.DispatchFocusEvent();
}
return filled_fields;
}
void ClearPreviewedElements(
base::span<std::pair<WebFormControlElement, WebAutofillState>>
previewed_elements) {
for (auto& [control_element, prior_autofill_state] : previewed_elements) {
// We do not add null elements to `previewed_elements_` in AutofillAgent.
DCHECK(control_element);
control_element.SetSuggestedValue(WebString());
control_element.SetAutofillState(prior_autofill_state);
}
}
bool IsOwnedByFrame(const WebNode& node, content::RenderFrame* frame) {
if (!node || !frame) {
return false;
}
const WebDocument& doc = node.GetDocument();
WebLocalFrame* node_frame = doc ? doc.GetFrame() : nullptr;
WebLocalFrame* expected_frame = frame->GetWebFrame();
return expected_frame && node_frame &&
expected_frame->GetLocalFrameToken() ==
node_frame->GetLocalFrameToken();
}
bool MaybeWasOwnedByFrame(const WebNode& node, content::RenderFrame* frame) {
if (!node || !frame) {
return true;
}
const WebDocument& doc = node.GetDocument();
WebLocalFrame* node_frame = doc ? doc.GetFrame() : nullptr;
WebLocalFrame* expected_frame = frame->GetWebFrame();
return !expected_frame || !node_frame ||
expected_frame->GetLocalFrameToken() ==
node_frame->GetLocalFrameToken();
}
bool IsWebpageEmpty(const WebLocalFrame* frame) {
WebDocument document = frame->GetDocument();
return IsWebElementEmpty(document.Head()) &&
IsWebElementEmpty(document.Body());
}
std::u16string FindChildText(const WebNode& node) {
return FindChildTextWithIgnoreList(node, std::set<WebNode>());
}
ButtonTitleList GetButtonTitles(const WebFormElement& web_form,
ButtonTitlesCache* button_titles_cache) {
// It makes no sense to collect button titles for a synthetic forms built
// from unowned fields, as it's time-consuming and leads to scraping
// many irrelevant elements.
if (!web_form) {
return {};
}
if (!button_titles_cache) {
// Button titles scraping is disabled for this form.
return InferButtonTitlesForForm(web_form);
}
auto [form_position, cache_miss] = button_titles_cache->emplace(
GetFormRendererId(web_form), ButtonTitleList());
if (!cache_miss)
return form_position->second;
form_position->second = InferButtonTitlesForForm(web_form);
return form_position->second;
}
WebFormElement GetFormByRendererId(FormRendererId form_renderer_id) {
if (!form_renderer_id) {
return WebFormElement();
}
WebNode node = WebNode::FromDomNodeId(form_renderer_id.value());
WebFormElement form = node.DynamicTo<WebFormElement>();
return form && form.IsConnected() && form.GetDocument().GetFrame()
? form
: WebFormElement();
}
WebFormControlElement GetFormControlByRendererId(
FieldRendererId queried_form_control) {
if (!queried_form_control) {
return WebFormControlElement();
}
WebNode node = WebNode::FromDomNodeId(queried_form_control.value());
WebFormControlElement form_control = node.DynamicTo<WebFormControlElement>();
return form_control && form_control.IsConnected() &&
form_control.GetDocument().GetFrame()
? form_control
: WebFormControlElement();
}
WebElement GetContentEditableByRendererId(FieldRendererId field_renderer_id) {
WebElement field =
WebNode::FromDomNodeId(*field_renderer_id).DynamicTo<WebElement>();
return field && field.IsContentEditable() ? field : WebElement();
}
void TraverseDomForFourDigitCombinations(
const WebDocument& document,
base::OnceCallback<void(const std::vector<std::string>&)>
potential_matches) {
re2::RE2 kFourDigitRegex("(?:\\D|^)(\\d{4})(?:\\D|$)");
base::flat_set<std::string> matches;
// Iterate through each form control element in the DOM and extract the
// elements nearby in search of four digit combinations.
std::vector<WebFormControlElement> form_control_elements;
for (const WebFormElement& form : document.GetTopLevelForms()) {
std::ranges::move(GetOwnedFormControls(document, form),
std::back_inserter(form_control_elements));
}
std::ranges::move(GetOwnedFormControls(document, WebFormElement()),
std::back_inserter(form_control_elements));
auto extract_four_digit_combinations = [&](WebNode node) {
if (!node.IsTextNode()) {
return;
}
std::string node_text = node.NodeValue().Utf8();
std::string_view input(node_text);
std::string match;
while (matches.size() < kMaxFourDigitCombinationMatches &&
re2::RE2::FindAndConsume(&input, kFourDigitRegex, &match)) {
matches.insert(match);
}
};
// Returns whether the traversal reached a form control element.
auto iterate_and_extract_four_digit_combinations = [&](WebNode node,
bool forward) {
for (int i = 0; i < kFormNeighborNodesToTraverse; ++i) {
if (!node) {
break;
}
extract_four_digit_combinations(node);
node = NextWebNode(node, forward);
if (auto form_control_element = node.DynamicTo<WebFormControlElement>()) {
// Reached next form control element.
return true;
}
}
return false;
};
bool reached_form_control_before = false;
for (const WebFormControlElement& element : form_control_elements) {
// If a forward search ended at a form control, we don't need a backward
// search for that form control.
if (!reached_form_control_before) {
iterate_and_extract_four_digit_combinations(element,
/*forward=*/false);
}
reached_form_control_before =
iterate_and_extract_four_digit_combinations(element, /*forward=*/true);
if (matches.size() >= kMaxFourDigitCombinationMatches) {
break;
}
}
// Check for consecutive numbers as a potential indicator that we've parsed
// a year <select> element of a credit card form. This indicates that a CVC
// field is not a standalone CVC element.
if (matches.size() > 2) {
auto iter = matches.begin();
int consecutive_numbers = 0;
int previous_combination = 0;
base::StringToInt(*iter, &previous_combination);
iter++;
for (; iter != matches.end(); ++iter) {
int current_combination = 0;
base::StringToInt(*iter, &current_combination);
if (current_combination == previous_combination + 1) {
consecutive_numbers++;
} else {
consecutive_numbers = 0;
}
if (consecutive_numbers > kMaxConsecutiveInFourDigitCombinationMatches) {
// Clear all matches as we presume this is not standalone cvc if
// there is a year input field.
matches.clear();
break;
}
previous_combination = current_combination;
}
}
std::move(potential_matches).Run(std::move(matches).extract());
}
std::string ExtractFinalCheckoutAmountFromDom(
const blink::WebDocument& document,
std::string_view price_regex,
std::string_view label_regex,
size_t number_of_ancestor_levels_to_search) {
std::vector<WebNode> price_nodes =
document.FindAllTextNodesMatchingRegex(WebString::FromUTF8(price_regex));
if (price_nodes.empty()) {
return "";
}
std::vector<WebNode> label_nodes =
document.FindAllTextNodesMatchingRegex(WebString::FromUTF8(label_regex));
if (label_nodes.empty()) {
return "";
}
// Used later to check efficiently if a given ancestor contains a label node
// in its subtree.
std::set<WebNode> all_ancestors_of_label_nodes;
for (WebNode& label_node : label_nodes) {
WebNode parent = label_node.ParentNode();
while (parent && all_ancestors_of_label_nodes.insert(parent).second) {
parent = parent.ParentNode();
}
}
// Used later to efficiently check if a given ancestor contains more than 1
// price node under it.
std::map<WebNode, size_t> count_of_price_nodes_under_ancestors;
// Pairs of price nodes to their current ancestor to be checked. These pairs
// will be used during the search, and the second value in each pair will
// be updated to its parent if the search on the current ancestor does not
// return a final-checkout-amount.
std::vector<std::pair<WebNode, WebNode>> price_to_current_ancestor;
price_to_current_ancestor.reserve(price_nodes.size());
// Build both `count_of_price_nodes_under_ancestors` and
// `price_to_current_ancestor`.
for (WebNode& price_node : price_nodes) {
WebNode parent = price_node.ParentNode();
// Set the initial parent as the parent to be searched.
price_to_current_ancestor.emplace_back(price_node, parent);
while (parent) {
++count_of_price_nodes_under_ancestors[parent];
parent = parent.ParentNode();
}
}
// Now that `price_to_current_ancestor` is fully built, start
// doing the checks on ancestors to find label nodes.
for (size_t current_ancestor_level = 0;
current_ancestor_level < number_of_ancestor_levels_to_search;
++current_ancestor_level) {
for (auto& [price_node, current_ancestor] : price_to_current_ancestor) {
if (!current_ancestor) {
// Stop searching ancestors of this price node since there are no more
// ancestors to check. This can occur if the current price node is very
// close to the root of the DOM. The mechanism for the stoppage is the
// `current_ancestor` for this `price_node` does not get updated, so on
// every future level this `price_node` will just continue instead of
// checking a new level's ancestor.
continue;
}
if (count_of_price_nodes_under_ancestors[current_ancestor] > 1) {
// Stop searching ancestors of this checkout amount node since more than
// 1 price node was found under this node, `current_ancestor`.
current_ancestor.Reset();
continue;
}
if (all_ancestors_of_label_nodes.contains(current_ancestor)) {
// An ancestor was found that contains a label node in its
// subtree, and has only one price node under it. Thus this price node
// is the final-checkout-amount node. Return its value.
return price_node.NodeValue().Utf8();
}
current_ancestor = current_ancestor.ParentNode();
}
}
// Notify the caller that no final-checkout-amount was found.
return "";
}
void MaybeUpdateUserInput(FormFieldData& field,
FieldRendererId element_id,
const FieldDataManager& field_data_manager) {
// If the field was autofilled or the user typed into it, check the value
// stored in `field_data_manager` against the value property of the DOM
// `element`. If they differ, then the scripts on the website modified the
// value afterwards. Store the original value as the `user_input`, unless
// this is one of recognised situations when the site-modified value is more
// useful for filling.
if (FieldPropertiesMask properties_mask =
field_data_manager.HasFieldData(element_id)
? field_data_manager.GetFieldPropertiesMask(element_id)
: FieldPropertiesMask();
properties_mask &
(FieldPropertiesFlags::kUserTyped | FieldPropertiesFlags::kAutofilled)) {
// The user input is preserved for all passwords. It is also preserved for
// other fields, as long as `value` is not acceptable.
std::u16string user_input = field_data_manager.GetUserInput(element_id);
if (field.form_control_type() == FormControlType::kInputPassword ||
!IsScriptModifiedValueAcceptable(field.value(), user_input,
field_data_manager)) {
field.set_user_input(std::move(user_input).substr(0, kMaxStringLength));
}
}
}
std::u16string GetAriaLabelForTesting( // IN-TEST
const WebDocument& document,
const WebElement& element) {
return GetAriaLabel(document, element);
}
std::u16string GetAriaDescriptionForTesting( // IN-TEST
const WebDocument& document,
const WebElement& element) {
return GetAriaDescription(document, element);
}
void InferLabelForElementsForTesting( // IN-TEST
base::span<const blink::WebFormControlElement> control_elements,
std::vector<FormFieldData>& fields) {
InferLabelForElements(control_elements, fields);
}
std::vector<blink::WebFormControlElement>
GetOwnedFormControlsForTesting( // IN-TEST
const blink::WebDocument& document,
const blink::WebFormElement& form_element) {
return GetOwnedFormControls(document, form_element);
}
WebNode NextWebNodeForTesting( // IN-TEST
const WebNode& current_node,
bool forward) {
return NextWebNode(current_node, forward);
}
std::u16string FindChildTextWithIgnoreListForTesting( // IN-TEST
const WebNode& node,
const std::set<WebNode>& divs_to_skip) {
return FindChildTextWithIgnoreList(node, divs_to_skip);
}
bool IsWebElementVisibleForTesting(const WebElement& element) { // IN-TEST
return IsWebElementVisible(element);
}
bool IsVisibleIframeForTesting( // IN-TEST
const WebElement& iframe_element) {
return IsVisibleIframe(iframe_element);
}
WebFormElement GetClosestAncestorFormElementForTesting(WebNode n) { // IN-TEST
return GetClosestAncestorFormElement(n);
}
bool IsDOMPredecessorForTesting(const WebNode& x, // IN-TEST
const WebNode& y,
const WebNode& ancestor_hint) {
return IsDOMPredecessor(x, y, ancestor_hint);
}
uint64_t GetMaxLengthForTesting( // IN-TEST
const WebFormControlElement& element) {
return GetMaxLength(element);
}
void WebFormControlElementToFormFieldForTesting( // IN-TEST
const WebFormElement& form_element,
const WebFormControlElement& element,
const FieldDataManager* field_data_manager,
DenseSet<ExtractOption> extract_options,
FormFieldData* field) {
WebFormControlElementToFormField(form_element, element, field_data_manager,
extract_options, field,
/*shadow_data=*/nullptr);
}
std::vector<SelectOption> GetDataListOptionsForTesting( // IN-TEST
const WebInputElement& element) {
return GetDataListOptions(element);
}
} // namespace autofill::form_util