blob: bcbf8e28ac3631b19a4873ac5bb026b4bea5fc71 [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_AUTOFILL_CONTENT_BROWSER_FORM_FOREST_H_
#define COMPONENTS_AUTOFILL_CONTENT_BROWSER_FORM_FOREST_H_
#include "base/containers/flat_map.h"
#include "base/containers/flat_set.h"
#include "base/containers/span.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/raw_ref.h"
#include "components/autofill/core/browser/autofill_driver.h"
#include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/unique_ids.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "third_party/abseil-cpp/absl/types/variant.h"
namespace autofill::internal {
// FormForest converts renderer forms into a browser form and vice versa.
//
// A *frame-transcending* form is a form whose logical fields live in different
// frames. The *renderer forms* are the FormData objects as we receive them from
// the renderers of these frames. The *browser form* of a frame-transcending
// form is its root FormData, with all fields of its descendant FormDatas moved
// into the root.
// See ContentAutofillRouter for further details on the terminology and
// motivation.
//
// Consider the following main frame with two frame-transcending forms:
//
// +--------------- Frame ---------------+
// | |
// +--- Form-A --+ +--- Form-C --+
// | | | | |
// Field-1 Frame Field-4 Frame Frame
// | | |
// +--- Form-B --+ Form-D Form-E
// | | | |
// Field-2 Field-3 Field-5 Frame
// |
// Form-F
// |
// Field-6
//
// The renderer forms are form A, B, C, D, E, F.
//
// The browser form of forms A and B has the fields fields 1, 2, 3, 4.
// Converting this browser form back to renderer forms yields Form-A and Form-B.
//
// Analogously, the browser form of forms C, D, E, and F has the fields 5 and 6.
// Converting this browser form back to renderer forms yields forms C, D, E, F.
//
// The three key functions of FormForest are:
// - UpdateTreeOfRendererForm()
// - GetBrowserForm()
// - GetRendererFormsOfBrowserForm()
//
// UpdateTreeOfRendererForm() incrementally builds up a graph of frames, forms,
// and fields.
//
// This graph is a forest in two (entirely independent) ways:
//
// Firstly, there may be multiple root frames. One reason is the website author
// can disconnect an entire frame subtree from the rest of the frame tree in the
// future using the fencedframes tag and/or disallowdocumentaccess attribute.
// Another reason is that the frame hierarchy emerges gradually and therefore
// some links may be unknown. For example, Form-A might point to a nonexistent
// frame of Form-B because, after Form-A was last parsed, a cross-origin
// navigation happened in Form-B's frame.
//
// Secondly, removing the root frames obtains a forest, where each tree
// corresponds to a frame-transcending form. We call the roots of this forest
// *root forms*. In the example, forms A and C are root forms. This is relevant
// because filling operations happen on the granularity of root forms.
//
// As an invariant, UpdateTreeOfRendererForm() keeps each frame-transcending
// form in a flattened state: fields are stored as children of their root
// forms. The fields are ordered according to pre-order depth-first (DOM order)
// traversal of the original tree. In our example:
//
// +--------------- Frame ---------------+
// | |
// +-------+---- Form-A ---+-------+ +-------+- Form-C -+-------+
// | | | | | | | | |
// Field-1 Field-2 Field-3 Field-4 Frame Field-5 Field-6 Frame Frame
// | | |
// Form-B Form-D Form-E
// |
// Frame
// |
// Form-F
//
// There is no meaningful order between the fields and frames in these flattened
// forms.
//
// GetBrowserForm(renderer_form) simply retrieves the form node of
// |renderer_form| and returns the root form, along with its field children. For
// example, if |renderer_form| is form B, it returns form A with fields 1–4.
//
// GetRendererFormsOfBrowserForm(browser_form) returns the individual renderer
// forms that constitute |browser_form|, with their fields reinstated. For
// example, if |browser_form| has fields 1–4, it returns form A with fields 1
// and 4, and form B with fields 2 and 3.
//
// The node types in the forest always alternate as follows:
// - The root nodes are frames.
// - The children of frames are forms.
// - The children of forms are frames or fields.
// - Fields are leaves. Forms and frames may be leaves.
//
// Frames, forms, and fields are represented as FrameData, FormData, and
// FormFieldData objects respectively. The graph is stored as follows:
// - All FrameData nodes are stored directly in FormForest::frame_datas_.
// - The FrameData -> FormData edges are stored in the FrameData::child_forms
// vector of FormData objects. That is, each FrameData directly holds its
// children.
// - The FormData -> FrameData edges are stored in the FormData::child_frames
// vector of FrameTokens. To retrieve the actual FrameData child, the token
// is resolved to a LocalFrameToken if necessary (see Resolve()), and then
// looked up in FormForest::frame_datas_.
// - The FormData -> FormFieldData edges are stored in the FormData::fields
// vector of FormFieldData objects. As per the aforementioned invariant,
// fields are only stored in root forms. Each field's original parent can be
// identified by FormFieldData::host_frame and FormFieldData::host_form_id.
//
// Reasonable usage of FormForest follows this protocol:
// 1. Call UpdateTreeOfRendererForm(renderer_form) whenever a renderer form is
// seen to make FormForest aware of the (new or potentially changed) form.
// 2. Call GetBrowserForm(renderer_form.global_id()) directly afterwards (as
// long as the renderer form is known to the FormForest).
// 3. Call GetRendererFormsOfBrowserForm(browser_form) only if |browser_form|
// was previously returned by GetBrowserForm(), perhaps with
// different FormFieldData::value, FormFieldData::is_autofilled.
//
// For FormForest to be memory safe,
// 1. UpdateTreeOfRendererForm() and GetRendererFormsOfBrowserForm() must only
// be called for forms which have the following attributes set:
// - FormData::host_frame
// - FormData::unique_renderer_id
// - FormFieldData::host_frame
// - FormFieldData::unique_renderer_id
// - FormFieldData::host_form_id
// 2. GetBrowserForm() must only be called for known renderer forms. A renderer
// form is *known* after a corresponding UpdateTreeOfRendererForm() call
// until it is erased by EraseForms() or EraseFormsOfFrame().
//
// FormForest works with LocalFrameToken and resolves the RemoteFrameTokens in
// FormData::child_frames to LocalFrameTokens.
//
// From the perspective of a frame F, a frame G is either local or remote:
// - If G is local, G is hosted by the same render process as F.
// - If G is remote, G may be hosted by another render process.
//
// Suppose F is the parent frame of G. If G is local to F, then F refers to G in
// its FormData::child_frames by G's LocalFrameToken. Otherwise, if G is remote
// to F, then F uses a RemoteFrameToken as a placeholder to refer to G in
// FormData::child_frames.
//
// While LocalFrameTokens are unique identifiers at any point in time, they may
// change when a navigation happens in the frame:
// - If G is local to F and a navigation causes G's render process to be
// swapped so that G becomes remote, G gets a new LocalFrameToken and F will
// refer to G by a fresh RemoteFrameToken.
// - If G is remote to F and a navigation causes G's render process to be
// swapped, then F may continue to refer to G by the same RemoteFrameToken
// as before even if G's LocalFrameToken has changed.
// The first example is the reason why UpdateTreeOfRendererForm() sometimes
// triggers form re-extraction in a parent frame. The second example is the
// reason why we do not cache LocalFrameTokens.
class FormForest {
public:
// A FrameData is a frame node in the form tree. Its children are FormData
// objects.
struct FrameData {
// Less-than relation on FrameData objects based on their frame token, to be
// used by FrameData sets.
struct CompareByFrameToken {
using is_transparent = void;
bool operator()(const std::unique_ptr<FrameData>& f,
const std::unique_ptr<FrameData>& g) const {
return f && g ? f->frame_token < g->frame_token : f.get() < g.get();
}
bool operator()(const std::unique_ptr<FrameData>& f,
const LocalFrameToken& g) const {
return f ? f->frame_token < g : true;
}
bool operator()(const LocalFrameToken& f,
const std::unique_ptr<FrameData>& g) const {
return g ? f < g->frame_token : false;
}
};
explicit FrameData(LocalFrameToken frame_token);
FrameData(const FrameData&) = delete;
FrameData& operator=(const FrameData&) = delete;
~FrameData();
// Unique identifier of the frame. This is never null.
const LocalFrameToken frame_token;
// List of forms directly contained in the frame, in the same order as the
// corresponding UpdateTreeOfRendererForm(child_form) calls. Modification of
// this vector should be kept to a minimum to ensure memory safety. Only
// UpdateTreeOfRendererForm() should modify it.
std::vector<FormData> child_forms;
// Unique identifier of the form in the parent frame that contains this
// frame. When a parent form can Resolve() a child's FrameToken, it sets
// itself as the parent of the child frame, even if no form in this frame
// has been seen yet.
absl::optional<FormGlobalId> parent_form = absl::nullopt;
// Pointer to the frame's AutofillDriver. This may be null because an
// empty FrameData is created when a parent form can Resolve() a child's
// LocalFrameToken and no form from that child frame has been seen yet.
// However, if |child_forms| is non-empty, then driver is non-null.
raw_ptr<AutofillDriver> driver = nullptr;
};
FormForest();
FormForest(const FormForest&) = delete;
FormForest& operator=(const FormForest&&) = delete;
~FormForest();
// Adds or updates |renderer_form| and |driver| to/in the relevant tree, where
// |driver| must be the AutofillDriver of `renderer_form.host_frame`.
// Afterwards, `renderer_form.global_id()` is a known renderer form.
void UpdateTreeOfRendererForm(FormData renderer_form,
AutofillDriver* driver) {
UpdateTreeOfRendererForm(&renderer_form, driver);
}
// Returns the browser form of a known |renderer_form|.
const FormData& GetBrowserForm(FormGlobalId renderer_form) const;
struct RendererForms {
RendererForms();
RendererForms(RendererForms&&);
RendererForms& operator=(RendererForms&&);
~RendererForms();
std::vector<FormData> renderer_forms;
std::vector<FieldGlobalId> safe_fields;
};
// Returns the renderer forms of |browser_form| and the fields that are safe
// to be filled according to the security policy for cross-frame previewing
// and filling. The security policy depends on |triggered_origin| and
// |field_type_map|.
//
// The function reinstates each field from |browser_form| in the renderer form
// it originates from. These reinstated fields hold the (possibly autofilled)
// values from |browser_form|, provided that they are considered safe to fill
// according to the security policy defined below. The FormFieldData::value of
// unsafe fields is reset to the empty string.
//
// The |triggered_origin| should be the origin of the field from which
// Autofill was queried.
// The |field_type_map| should contain the field types of the fields in
// |browser_form|.
//
// A field is *safe to fill* iff at least one of the conditions (1–3) and
// additionally condition (4) hold:
//
// (1) The field's origin is the |triggered_origin|.
// (2) The field's origin is the main origin, the field's type in
// |field_type_map| is not sensitive (see IsSensitiveFieldType()), and the
// policy-controlled feature shared-autofill is enabled in the field's
// frame.
// (3) The |triggered_origin| is the main origin and the policy-controlled
// feature shared-autofill is enabled in the field's frame.
// (4) The field is in the same frame tree as the field on which Autofill was
// triggered.
//
// The *origin of a field* is the origin of the frame that contains the
// corresponding form-control element.
//
// The *main origin* is `browser_form.main_frame_origin`.
//
// The "allow" attribute of the <iframe> element controls whether the
// *policy-controlled feature shared-autofill* is enabled in a document
// (see https://www.w3.org/TR/permissions-policy-1/).
RendererForms GetRendererFormsOfBrowserForm(
const FormData& browser_form,
const url::Origin& triggered_origin,
const base::flat_map<FieldGlobalId, ServerFieldType>& field_type_map)
const;
// Deletes all forms and fields that originate from the |renderer_forms| and
// unsets the FrameData::parent_form pointers of all child forms.
//
// Afterwards, the |renderer_forms| are unknown.
//
// Returns the forms that lost fields due to the removal, which are known
// renderer forms.
base::flat_set<FormGlobalId> EraseForms(
base::span<const FormGlobalId> renderer_forms);
// Deletes all forms and fields that originate from |frame| and unsets the
// FrameData::parent_form pointers of all child forms.
//
// Afterwards, all renderer forms in |frame| are unknown.
void EraseFormsOfFrame(LocalFrameToken frame, bool keep_frame);
// Returns the set of FrameData nodes of the forest.
const base::flat_set<std::unique_ptr<FrameData>,
FrameData::CompareByFrameToken>&
frame_datas() const {
return frame_datas_;
}
private:
friend class FormForestTestApi;
struct FrameAndForm {
raw_ref<FrameData> frame;
raw_ref<FormData> form;
};
// Returns the FrameData known for |frame|, or creates a new one and returns
// it, in which case all members but FrameData::host_frame are uninitialized.
FrameData* GetOrCreateFrameData(LocalFrameToken frame);
// Returns the FrameData known for |frame|, or null.
// May be used in const qualified methods if the return value is not mutated.
FrameData* GetFrameData(LocalFrameToken frame);
// Returns the FormData known for |form|, or null.
// The returned value will point to |frame_datas_|, meaning that all fields
// have been moved to their respective root forms.
// The |frame_data| must be null or equal to `GetFrameData(form.host_frame)`.
// Beware of invalidating the returned form pointer by changing its host
// frame's FrameData::host_forms.
// May be used in const qualified methods if the return value is not mutated.
FormData* GetFormData(FormGlobalId form, FrameData* frame_data = nullptr);
// Returns the non-null root frame and form of the tree that contains |form|.
// Beware of invalidating the returned form pointer by changing its host
// frame's FrameData::host_forms.
// May be used in const qualified methods if the return value is not mutated.
FrameAndForm GetRoot(FormGlobalId form);
// Helper for EraseFormsOfFrame() and EraseForms() that removes all fields
// that originate from |frame_or_form| and unsets FrameData::parent_form
// pointer of |frame_or_form|'s children.
//
// Afterwards, all renderer forms in |frame_or_form| (if it is a frame) or the
// renderer form |frame_or_form| (if it is a form) are unknown.
//
// Adds every known renderer form from which a field is removed is to
// |forms_with_removed_fields|.
//
// We intentionally iterate over all frames and forms to search for fields
// from |frame_or_form|. Alternatively, we could limit this to the root form
// of |frame_or_form|. However, this would rely on |frame_or_form| being
// erased before its ancestors, since otherwise |frame_or_form| is
// disconnected from its root already.
void EraseReferencesTo(
absl::variant<LocalFrameToken, FormGlobalId> frame_or_form,
base::flat_set<FormGlobalId>* forms_with_removed_fields);
// Adds |renderer_form| and |driver| to the relevant tree, where |driver| must
// be the AutofillDriver of the |renderer_form|'s FormData::host_frame.
//
// Afterwards, `renderer_form->global_id()` is a known renderer form.
//
// Leaves `*renderer_form` in a valid but unspecified state (like after a
// move). In particular, `*renderer_form` and its members can be reassigned.
void UpdateTreeOfRendererForm(FormData* renderer_form,
AutofillDriver* driver);
// The FrameData nodes of the forest.
// Note that since the elements are (smart) pointers, they are not invalidated
// when the set is resized (unlike pointers or references to the elements).
base::flat_set<std::unique_ptr<FrameData>, FrameData::CompareByFrameToken>
frame_datas_;
};
} // namespace autofill::internal
#endif // COMPONENTS_AUTOFILL_CONTENT_BROWSER_FORM_FOREST_H_