blob: fc645837814d070b20e3b8f0c19c29cdcd164f34 [file] [log] [blame]
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_
#define COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_
#include <stddef.h>
#include <set>
#include <string>
#include <vector>
#include "base/containers/flat_map.h"
#include "base/i18n/rtl.h"
#include "components/autofill/core/common/autofill_constants.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/form_field_data.h"
#include "components/autofill/core/common/mojom/autofill_types.mojom-shared.h"
#include "components/autofill/core/common/unique_ids.h"
#include "third_party/blink/public/platform/web_vector.h"
#include "third_party/blink/public/web/web_element_collection.h"
#include "ui/gfx/geometry/rect_f.h"
class GURL;
namespace blink {
enum class WebAutofillState;
class WebDocument;
class WebElement;
class WebFormControlElement;
class WebFormElement;
class WebInputElement;
class WebLocalFrame;
class WebNode;
} // namespace blink
namespace content {
class RenderFrame;
} // namespace content
namespace autofill {
struct FormData;
struct FormFieldData;
class FieldDataManager;
namespace form_util {
// Mapping from a form element's render id to results of button titles
// heuristics for a given form element.
using ButtonTitlesCache = base::flat_map<FormRendererId, ButtonTitleList>;
// A bit field mask to extract data from WebFormControlElement.
// Copied to components/autofill/ios/browser/resources/autofill_controller.js.
enum ExtractMask {
EXTRACT_NONE = 0,
EXTRACT_VALUE = 1 << 0, // Extract value from WebFormControlElement.
EXTRACT_OPTION_TEXT = 1 << 1, // Extract option text from
// WebFormSelectElement. Only valid when
// |EXTRACT_VALUE| is set.
// This is used for form submission where
// human readable value is captured.
EXTRACT_OPTIONS = 1 << 2, // Extract options from
// WebFormControlElement.
EXTRACT_BOUNDS = 1 << 3, // Extract bounds from WebFormControlElement,
// could trigger layout if needed.
EXTRACT_DATALIST = 1 << 4, // Extract datalist from WebFormControlElement,
// the total number of options is up to
// kMaxListSize and each option has as far as
// kMaxDataLength.
};
// Autofill supports assigning <label for=x> tags to inputs if x its id/name,
// or the id/name of a shadow host element containing the input.
// This enum is used to track how often each case occurs in practise.
enum class AssignedLabelSource {
kId = 0,
kName = 1,
kShadowHostId = 2,
kShadowHostName = 3,
kMaxValue = kShadowHostName,
};
// This temporary histogram is emitted inline, because browser files like
// AutofillMetrics cannot be included here.
// TODO(crbug.com/1339277): Remove.
inline constexpr char kAssignedLabelSourceHistogram[] =
"Autofill.LabelInference.AssignedLabelSource";
// Indicates if an iframe |element| is considered actually visible to the user.
//
// This function is not intended to implement a perfect visibility check. It
// rather aims to strike balance between cheap tests and filtering invisible
// frames, which can then be skipped during parsing.
//
// The current visibility check requires focusability and a sufficiently large
// bounding box. Thus, particularly elements with "visibility: invisible",
// "display: none", and "width: 0; height: 0" are considered invisible.
//
// Future potential improvements include:
// * Detect potential visibility of elements with "overflow: visible".
// (See WebElement::GetScrollSize().)
// * Detect invisibility of elements with
// - "position: absolute; {left,top,right,bottol}: -100px"
// - "opacity: 0.0"
// - "clip: rect(0,0,0,0)"
//
// Exposed for testing purposes.
bool IsVisibleIframe(const blink::WebElement& iframe_element);
// Returns the topmost <form> ancestor of |node|, or an IsNull() pointer.
//
// Generally, WebFormElements must not be nested [1]. When parsing HTML, Blink
// ignores nested form tags; the inner forms therefore never make it into the
// DOM. Howevery, nested forms can be created and added to the DOM dynamically,
// in which case Blink associates each field with its closest ancestor.
//
// For some elements, Autofill determines the associated form without Blink's
// help (currently, these are only iframe elements). For consistency with
// Blink's behaviour, we associate them with their closest form element
// ancestor.
//
// [1] https://html.spec.whatwg.org/multipage/forms.html#the-form-element
blink::WebFormElement GetClosestAncestorFormElement(blink::WebNode node);
// Returns true if a DOM traversal (pre-order, depth-first) visits `x` before
// `y`.
// As a performance improvement, `ancestor_hint` can be set to a suspected
// ancestor of `x` and `y`. Otherwise, `ancestor_hint` can be arbitrary.
//
// This function is a simplified/specialised version of Blink's private
// Node::compareDocumentPosition().
//
// Exposed for testing purposes.
bool IsDOMPredecessor(const blink::WebNode& x,
const blink::WebNode& y,
const blink::WebNode& ancestor_hint);
// Gets up to kMaxListSize data list values (with corresponding label) for the
// given element, each value and label have as far as kMaxDataLength.
void GetDataListSuggestions(const blink::WebInputElement& element,
std::vector<std::u16string>* values,
std::vector<std::u16string>* labels);
// Extract FormData from the form element and return whether the operation was
// successful.
bool ExtractFormData(const blink::WebFormElement& form_element,
const FieldDataManager& field_data_manager,
FormData* data);
// Returns true if at least one element from |control_elements| is visible in
// |document|.
bool IsSomeControlElementVisible(
const blink::WebDocument& document,
const std::set<FieldRendererId>& control_elements);
// Helper functions to assist in getting the canonical form of the action and
// origin. The action will proplerly take into account <BASE>, and both will
// strip unnecessary data (e.g. query params and HTTP credentials).
GURL GetCanonicalActionForForm(const blink::WebFormElement& form);
GURL GetDocumentUrlWithoutAuth(const blink::WebDocument& document);
// Returns true if |element| is a month input element.
bool IsMonthInput(const blink::WebInputElement& element);
// Returns true if |element| is a text input element.
bool IsTextInput(const blink::WebInputElement& element);
// Returns true if `element` is either a select or a selectmenu element.
bool IsSelectOrSelectMenuElement(const blink::WebFormControlElement& element);
// Returns true if |element| is a select element.
bool IsSelectElement(const blink::WebFormControlElement& element);
// Returns true if `element` is a selectmenu element.
bool IsSelectMenuElement(const blink::WebFormControlElement& element);
// Returns true if |element| is a textarea element.
bool IsTextAreaElement(const blink::WebFormControlElement& element);
// Returns true if `element` is a textarea element or a text input element.
bool IsTextAreaElementOrTextInput(const blink::WebFormControlElement& element);
// Returns true if |element| is a checkbox or a radio button element.
bool IsCheckableElement(const blink::WebFormControlElement& element);
// Returns true if |element| is one of the input element types that can be
// autofilled. {Text, Radiobutton, Checkbox}.
bool IsAutofillableInputElement(const blink::WebInputElement& element);
// Returns true if |element| is one of the element types that can be autofilled.
// {Text, Radiobutton, Checkbox, Select, TextArea}.
bool IsAutofillableElement(const blink::WebFormControlElement& element);
// Returns true if |element| can be edited (enabled and not read only).
bool IsElementEditable(const blink::WebInputElement& element);
// True if this element can take focus. If the layout is blocked, then the
// function checks if the element takes up space in the layout, i.e., this
// element or a descendant has a non-empty bounding client rect. If this element
// is a selectmenu, checks whether a child of the selectmenu can take focus.
bool IsWebElementFocusableForAutofill(const blink::WebElement& element);
// A heuristic visibility detection. See crbug.com/1335257 for an overview of
// relevant aspects.
//
// Note that WebElement::BoundsInWidget(), WebElement::GetClientSize(),
// and WebElement::GetScrollSize() include the padding but do not include the
// border and margin. BoundsInWidget() additionally scales the
// dimensions according to the zoom factor.
//
// It seems that invisible fields on websites typically have dimensions between
// 0 and 10 pixels, before the zoom factor. Therefore choosing `kMinPixelSize`
// is easier without including the zoom factor. For that reason, this function
// prefers GetClientSize() over BoundsInWidget().
//
// This function does not check the position in the viewport because fields in
// iframes commonly are visible despite the body having height zero. Therefore,
// `e.GetDocument().Body().BoundsInWidget().Intersects(
// e.BoundsInWidget())` yields false negatives.
//
// Exposed for testing purposes.
//
// TODO(crbug.com/1335257): Can input fields or iframes actually overflow?
bool IsWebElementVisible(const blink::WebElement& element);
// Returns the form's |name| attribute if non-empty; otherwise the form's |id|
// attribute.
std::u16string GetFormIdentifier(const blink::WebFormElement& form);
// Returns the FormRendererId of a given WebFormElement. If
// WebFormElement::IsNull(), returns a null form renderer id, which is the
// renderer id of the unowned form.
FormRendererId GetFormRendererId(const blink::WebFormElement& form);
// Returns the FieldRendererId of a given WebFormControlElement.
FieldRendererId GetFieldRendererId(const blink::WebFormControlElement& field);
// Returns text alignment for |element|.
base::i18n::TextDirection GetTextDirectionForElement(
const blink::WebFormControlElement& element);
// Returns all the auto-fillable form control elements in |control_elements|.
std::vector<blink::WebFormControlElement> ExtractAutofillableElementsFromSet(
const blink::WebVector<blink::WebFormControlElement>& control_elements);
// Returns all the auto-fillable form control elements in |form_element|.
std::vector<blink::WebFormControlElement> ExtractAutofillableElementsInForm(
const blink::WebFormElement& form_element);
struct ShadowFieldData;
// Fills out a FormField object from a given WebFormControlElement.
// |extract_mask|: See the enum ExtractMask above for details. Field properties
// will be copied from |field_data_manager|, if the argument is not null and
// has entry for |element| (see properties in FieldPropertiesFlags).
void WebFormControlElementToFormField(
const blink::WebFormElement& form_element,
const blink::WebFormControlElement& element,
const FieldDataManager* field_data_manager,
ExtractMask extract_mask,
FormFieldData* field,
ShadowFieldData* shadow_data = nullptr);
// Fills |form| with the FormData object corresponding to the |form_element|.
// If |field| is non-NULL, also fills |field| with the FormField object
// corresponding to the |form_control_element|. |extract_mask| controls what
// data is extracted. Returns true if |form| is filled out. Also returns false
// if there are no fields or too many fields in the |form|. Field properties
// will be copied from |field_data_manager|, if the argument is not null and
// has entry for |element| (see properties in FieldPropertiesFlags).
bool WebFormElementToFormData(
const blink::WebFormElement& form_element,
const blink::WebFormControlElement& form_control_element,
const FieldDataManager* field_data_manager,
ExtractMask extract_mask,
FormData* form,
FormFieldData* field);
// Returns the form that owns the `form_control`, or a null pointer if no form
// owns the `form_control`. exists.
//
// The form that owns `form_control` is
// - the form with which `form_control` is associated, if such a form exists,
// - the closest shadow-including ancestor WebFormElement.
blink::WebFormElement GetOwningForm(
const blink::WebFormControlElement& form_control);
// Get all form control elements from |elements| that are not part of a form.
std::vector<blink::WebFormControlElement> GetUnownedFormFieldElements(
const blink::WebDocument& document);
// A shorthand for filtering the results of GetUnownedFormFieldElements with
// ExtractAutofillableElementsFromSet.
std::vector<blink::WebFormControlElement>
GetUnownedAutofillableFormFieldElements(const blink::WebDocument& document);
// Returns the <iframe> elements that are not in the scope of any <form>.
std::vector<blink::WebElement> GetUnownedIframeElements(
const blink::WebDocument& document);
// Returns false iff the extraction fails because the number of fields exceeds
// |kMaxParseableFields|, or |field| and |element| are not nullptr but
// |element| is not among |control_elements|.
bool UnownedFormElementsToFormData(
const std::vector<blink::WebFormControlElement>& control_elements,
const std::vector<blink::WebElement>& iframe_elements,
const blink::WebFormControlElement* element,
const blink::WebDocument& document,
const FieldDataManager* field_data_manager,
ExtractMask extract_mask,
FormData* form,
FormFieldData* field);
// Finds the form that contains |element| and returns it in |form|. If |field|
// is non-NULL, fill it with the FormField representation for |element|.
// |additional_extract_mask| control what to extract beside the default mask
// which is EXTRACT_VALUE | EXTRACT_OPTIONS. Returns false if the form is not
// found or cannot be serialized.
bool FindFormAndFieldForFormControlElement(
const blink::WebFormControlElement& element,
const FieldDataManager* field_data_manager,
ExtractMask additional_extract_mask,
FormData* form,
FormFieldData* field);
// Same as above but with default ExtractMask.
bool FindFormAndFieldForFormControlElement(
const blink::WebFormControlElement& element,
const FieldDataManager* field_data_manager,
FormData* form,
FormFieldData* field);
// Fills or previews the form represented by |form|. |element| is the input
// element that initiated the auto-fill process. Returns the filled fields.
std::vector<blink::WebFormControlElement> FillOrPreviewForm(
const FormData& form,
const blink::WebFormControlElement& element,
mojom::RendererFormDataAction action);
// Clears the suggested values in |control_elements|. The state of
// |initiating_element| is set to |old_autofill_state|; all other fields are set
// to kNotFilled.
void ClearPreviewedElements(
std::vector<blink::WebFormControlElement>& control_elements,
const blink::WebFormControlElement& initiating_element,
blink::WebAutofillState old_autofill_state);
// Indicates if |node| is owned by |frame| in the sense of
// https://dom.spec.whatwg.org/#concept-node-document. Note that being owned by
// a frame does not require being attached to its DOM.
bool IsOwnedByFrame(const blink::WebNode& node, content::RenderFrame* frame);
// Checks if the webpage is empty.
// This kind of webpage is considered as empty:
// <html>
// <head>
// </head>
// <body>
// </body>
// </html>
// Meta, script and title tags don't influence the emptiness of a webpage.
bool IsWebpageEmpty(const blink::WebLocalFrame* frame);
// This function checks whether the children of |element|
// are of the type <script>, <meta>, or <title>.
bool IsWebElementEmpty(const blink::WebElement& element);
// Previews |suggestion| in |input_element| and highlights the suffix of
// |suggestion| not included in the |input_element| text. |input_element| must
// not be null. |user_input| should be the text typed by the user into
// |input_element|. Note that |user_input| cannot be easily derived from
// |input_element| by calling value(), because of http://crbug.com/507714.
void PreviewSuggestion(const std::u16string& suggestion,
const std::u16string& user_input,
blink::WebFormControlElement* input_element);
// Returns the aggregated values of the descendants of |element| that are
// non-empty text nodes. This is a faster alternative to |innerText()| for
// performance critical operations. It does a full depth-first search so can be
// used when the structure is not directly known. However, unlike with
// |innerText()|, the search depth and breadth are limited to a fixed threshold.
// Whitespace is trimmed from text accumulated at descendant nodes.
std::u16string FindChildText(const blink::WebNode& node);
// Returns the button titles for |web_form|. |button_titles_cache| can be used
// to spare recomputation if called multiple times for the same form.
ButtonTitleList GetButtonTitles(const blink::WebFormElement& web_form,
ButtonTitlesCache* button_titles_cache);
// Exposed for testing purposes.
std::u16string FindChildTextWithIgnoreListForTesting(
const blink::WebNode& node,
const std::set<blink::WebNode>& divs_to_skip);
bool InferLabelForElementForTesting(const blink::WebFormControlElement& element,
std::u16string& label,
FormFieldData::LabelSource& label_source);
// Returns the form element by unique renderer id. Returns the null element if
// there is no form with the |form_renderer_id|.
blink::WebFormElement FindFormByUniqueRendererId(
const blink::WebDocument& doc,
FormRendererId form_renderer_id);
// Returns the form control element by unique renderer id. It searches the
// |form_to_be_searched| if specified, otherwise the whole document. Returns the
// null element if there is no element with the |queried_form_control| renderer
// id.
blink::WebFormControlElement FindFormControlElementByUniqueRendererId(
const blink::WebDocument& doc,
FieldRendererId queried_form_control,
absl::optional<FormRendererId> form_to_be_searched = absl::nullopt);
// Note: The vector-based API of the following two functions is a tax for
// limiting the frequency and duration of retrieving a lot of DOM elements.
// Alternative solutions have been discussed on https://crrev.com/c/1108201.
// Returns form control elements identified by the given unique renderer IDs.
// The result has the same number of elements as |queried_form_controls| and
// the i-th element of the result corresponds to the i-th element of
// |queried_form_controls|. The call of this function might be time
// expensive, because it retrieves all DOM elements.
std::vector<blink::WebFormControlElement>
FindFormControlElementsByUniqueRendererId(
const blink::WebDocument& doc,
const std::vector<FieldRendererId>& queried_form_controls);
// Returns form control elements by unique renderer id from the form with
// |form_renderer_id|. The result has the same number elements as
// |queried_form_controls| and the i-th element of the result corresponds to
// the i-th element of |queried_form_controls|. This function is faster than
// the previous one, because it only retrieves form control elements from a
// single form.
std::vector<blink::WebFormControlElement>
FindFormControlElementsByUniqueRendererId(
const blink::WebDocument& doc,
FormRendererId form_renderer_id,
const std::vector<FieldRendererId>& queried_form_controls);
// Returns the ARIA label text of the elements denoted by the aria-labelledby
// attribute of |element| or the value of the aria-label attribute of
// |element|, with priority given to the aria-labelledby attribute.
std::u16string GetAriaLabel(const blink::WebDocument& document,
const blink::WebElement& element);
// Returns the ARIA label text of the elements denoted by the aria-describedby
// attribute of |element|.
std::u16string GetAriaDescription(const blink::WebDocument& document,
const blink::WebElement& element);
} // namespace form_util
} // namespace autofill
#endif // COMPONENTS_AUTOFILL_CONTENT_RENDERER_FORM_AUTOFILL_UTIL_H_