| // Copyright 2021 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/autofill/content/browser/form_forest.h" |
| |
| #include "base/containers/stack.h" |
| #include "base/debug/dump_without_crashing.h" |
| #include "base/numerics/safe_conversions.h" |
| #include "base/ranges/algorithm.h" |
| #include "base/stl_util.h" |
| #include "content/public/browser/render_process_host.h" |
| #include "third_party/abseil-cpp/absl/types/variant.h" |
| #include "third_party/blink/public/common/permissions_policy/permissions_policy_features.h" |
| #include "third_party/blink/public/common/tokens/tokens.h" |
| #include "third_party/blink/public/mojom/permissions_policy/permissions_policy_feature.mojom-shared.h" |
| |
| // AFCHECK(condition[, error_handler]) creates a crash dump and executes |
| // |error_handler| if |condition| is false. |
| // TODO(https://crbug.com/1187842): Replace AFCHECK() with DCHECK(). |
| #if DCHECK_IS_ON() |
| #define AFCHECK(condition, ...) DCHECK(condition) |
| #else |
| #define AFCHECK(condition, ...) \ |
| { \ |
| if (!(condition)) { \ |
| base::debug::DumpWithoutCrashing(); \ |
| __VA_ARGS__; \ |
| } \ |
| } |
| #endif |
| |
| namespace autofill { |
| namespace internal { |
| |
| namespace { |
| |
| // Indicates if |rfh| is a fenced frame (https://crbug.com/1111084). |
| // |
| // We do not want to fill across the boundary of a fenced frame. Hence, a fenced |
| // frame's FrameData must be disconnected (in terms of FormData::child_frames |
| // and FrameData::parent_form) from its parent form. This is already guaranteed |
| // because FormData::child_frames does not contain fenced frames. However, |
| // UpdateTreeOfRendererForm() would still invoke TriggerReparse() to detect the |
| // parent form. IsFencedFrameRoot() should be implemented to suppress this. |
| // |
| // We also do not want to fill across iframes with the disallowdocumentaccess |
| // attribute (https://crbug.com/961448). Since disallowdocumentaccess is |
| // currently not going to ship and supporting it requires significant additional |
| // work in UpdateTreeOfRendererForm() to remove FormData::child_frame and unset |
| // FrameData::parent_form for frames that disallow document access, there is no |
| // immediate need to support it. See https://crrev.com/c/3055422 for a draft |
| // implementation. |
| bool IsFencedFrameRoot(content::RenderFrameHost* rfh) { |
| return rfh->IsFencedFrameRoot(); |
| } |
| |
| } // namespace |
| |
| FormForest::FrameData::FrameData(LocalFrameToken frame_token) |
| : frame_token(frame_token) {} |
| FormForest::FrameData::~FrameData() = default; |
| |
| FormForest::FormForest() = default; |
| FormForest::~FormForest() = default; |
| |
| absl::optional<LocalFrameToken> FormForest::Resolve(const FrameData& reference, |
| FrameToken query) { |
| if (absl::holds_alternative<LocalFrameToken>(query)) |
| return absl::get<LocalFrameToken>(query); |
| DCHECK(absl::holds_alternative<RemoteFrameToken>(query)); |
| AFCHECK(reference.driver, return absl::nullopt); |
| content::RenderFrameHost* rfh = reference.driver->render_frame_host(); |
| AFCHECK(rfh, return absl::nullopt); |
| content::RenderProcessHost* rph = rfh->GetProcess(); |
| AFCHECK(rph, return absl::nullopt); |
| blink::RemoteFrameToken blink_remote_token( |
| absl::get<RemoteFrameToken>(query).value()); |
| content::RenderFrameHost* remote_rfh = |
| content::RenderFrameHost::FromPlaceholderToken(rph->GetID(), |
| blink_remote_token); |
| if (!remote_rfh) |
| return absl::nullopt; |
| return LocalFrameToken(remote_rfh->GetFrameToken().value()); |
| } |
| |
| FormForest::FrameData* FormForest::GetOrCreateFrame(LocalFrameToken frame) { |
| auto it = frame_datas_.find(frame); |
| if (it == frame_datas_.end()) |
| it = frame_datas_.insert(it, std::make_unique<FrameData>(frame)); |
| AFCHECK(it != frame_datas_.end()); |
| AFCHECK(it->get()); |
| return it->get(); |
| } |
| |
| FormForest::FrameData* FormForest::GetFrameData(LocalFrameToken frame) { |
| auto it = frame_datas_.find(frame); |
| return it != frame_datas_.end() ? it->get() : nullptr; |
| } |
| |
| FormData* FormForest::GetFormData(const FormGlobalId& form, |
| FrameData* frame_data) { |
| DCHECK(!frame_data || frame_data == GetFrameData(form.frame_token)); |
| if (!frame_data) |
| frame_data = GetFrameData(form.frame_token); |
| if (!frame_data) |
| return nullptr; |
| auto it = base::ranges::find(frame_data->child_forms, form.renderer_id, |
| &FormData::unique_renderer_id); |
| return it != frame_data->child_forms.end() ? &*it : nullptr; |
| } |
| |
| FormForest::FrameAndForm FormForest::GetRoot(FormGlobalId form) { |
| FrameData* frame = nullptr; |
| for (;;) { |
| frame = GetFrameData(form.frame_token); |
| AFCHECK(frame, return {nullptr, nullptr}); |
| if (!frame->parent_form) { |
| auto it = base::ranges::find(frame->child_forms, form.renderer_id, |
| &FormData::unique_renderer_id); |
| AFCHECK(it != frame->child_forms.end(), return {nullptr, nullptr}); |
| return {frame, &*it}; |
| } |
| form = *frame->parent_form; |
| } |
| } |
| |
| void FormForest::EraseFrame(LocalFrameToken frame) { |
| if (!frame_datas_.erase(frame)) |
| return; |
| // Removes all fields and unsets |frame|'s children's FrameData::parent_form |
| // pointer. We intentionally iterate over all frames and forms to search for |
| // fields from |frame|. Alternatively, we could limit this to the root form of |
| // |frame|. However, this would rely on |frame| being erased before its |
| // ancestors, for otherwise |frame| is disconnected from its root already. |
| for (std::unique_ptr<FrameData>& some_frame : frame_datas_) { |
| AFCHECK(some_frame, continue); |
| for (FormData& form : some_frame->child_forms) { |
| base::EraseIf(form.fields, [&frame](const FormFieldData& field) { |
| return field.host_frame == frame; |
| }); |
| } |
| if (some_frame->parent_form && |
| some_frame->parent_form->frame_token == frame) { |
| some_frame->parent_form = absl::nullopt; |
| } |
| } |
| } |
| |
| // Maintains the following invariants: |
| // 1. The graph is a disjoint union of trees, and the trees faithfully represent |
| // the frame-transcending forms in the DOMs. |
| // 2. The root forms of each tree contain (only) the nodes of all forms in |
| // the subtree in DOM order, and all non-root forms have no fields. |
| // |
| // We keep the FormData::child_frame and FrameData::parent_form relations in |
| // symmetry to ease reasoning about the tree. Since children are predetermined |
| // by FormData:child_frame, we do so by always updating the childrens' |
| // FrameData::parent_form. |
| // |
| // If |form| is not part of a frame-transcending form, the function has minimal |
| // overhead. That is, if |form|'s FormData::child_frames is empty and |form|'s |
| // frame has no parent frame, the function reduces to a few moves and index |
| // lookups. |
| // |
| // If the FrameData::parent_form of |form|'s frame is not set although a parent |
| // frame exists, the function triggers a reparse in the parent frame. This will |
| // trigger an UpdateTreeOfRendererForm() for the true parent form (amongst |
| // others), which will then also set the child frame's FrameData::parent_form. |
| void FormForest::UpdateTreeOfRendererForm(FormData* form, |
| ContentAutofillDriver* driver) { |
| AFCHECK(form, return ); |
| AFCHECK(driver, return ); |
| AFCHECK(form->host_frame, return ); |
| |
| FrameData* frame = GetOrCreateFrame(form->host_frame); |
| AFCHECK(frame, return ); |
| AFCHECK(!frame->driver || frame->driver == driver, return ); |
| frame->driver = driver; |
| |
| content::RenderFrameHost* rfh = driver->render_frame_host(); |
| AFCHECK(rfh, return ); |
| AFCHECK(form->host_frame == LocalFrameToken(rfh->GetFrameToken().value()), |
| return ); |
| |
| // Moves |form| into its |frame|'s FrameData::child_forms, with a special |
| // treatment of the fields: |form|'s fields are replaced with |old_form|'s |
| // fields if |form| had previously been known as |old_form|. Retaining the old |
| // fields is important because |old_form| may have been a root and therefore |
| // contained fields from other forms. The relevant old and new fields will be |
| // moved to |form|'s root later. |
| std::vector<FormFieldData> form_fields = std::move(form->fields); |
| bool child_frames_changed; |
| if (FormData* old_form = GetFormData(form->global_id(), frame)) { |
| form->fields = std::move(old_form->fields); |
| child_frames_changed = old_form->child_frames != form->child_frames; |
| *old_form = std::move(*form); |
| form = old_form; |
| } else { |
| DCHECK(!base::Contains(frame->child_forms, form->unique_renderer_id, |
| &FormData::unique_renderer_id)); |
| form->fields = {}; |
| child_frames_changed = false; |
| frame->child_forms.push_back(std::move(*form)); |
| form = &frame->child_forms.back(); |
| } |
| DCHECK(form && form == GetFormData(form->global_id())); |
| |
| // Do *NOT* modify any FrameData::child_forms after this line! |
| // Doing so may resize FrameData::child_forms, while we keep raw pointers to |
| // individual items. |
| |
| // Traverses |form|'s tree starting at its root to set FrameData::parent_form |
| // and update the root form's fields, while keeping them in DOM order: |
| // - adds the fields from |form_fields| to `root->fields`; |
| // - removes fields from `root->fields` that originated from |old_form|'s |
| // renderer form but are not in |form_fields|; |
| // - removes fields that originated from frames that have been removed from |
| // |form| as well as fields from their descendant frames; |
| // - moves fields from former root forms that are now children of |form| |
| // from that form to `root->fields`. |
| // |
| // We perform all these tasks in a single pre-order depth-first traversal of |
| // |form|'s tree. That is, every field of a form is visited before/after |
| // recursing into the subframes of the form that come before/after that field. |
| // Only if |form|'s tree is trivial (consists of a single form), we can take |
| // an abbreviation and just move the fields. |
| if (!frame->parent_form && form->child_frames.empty() && |
| !child_frames_changed) { |
| form->fields = std::move(form_fields); |
| } else { |
| FrameAndForm root = GetRoot(form->global_id()); |
| AFCHECK(root, return ); |
| // Moves the first |max_number_of_fields_to_be_moved| fields that originated |
| // from the renderer form |source_form| from |source| to |target|. |
| // Default-initializes each source field after its move to prevent it from |
| // being moved in a future call. |
| // Calling MoveFields() repeatedly to move one field at a time runs in |
| // quadratic time due to the comparisons of the fields' renderer_form_id(). |
| // The cost could be reduced to linear time by reversing the source vectors |
| // and moving fields from the back of |source|. However, the cost of |
| // reversing likely exceeds the cost of the renderer_form_id() comparisons. |
| auto MoveFields = [](size_t max_number_of_fields_to_be_moved, |
| FormGlobalId source_form, |
| std::vector<FormFieldData>& source, |
| std::vector<FormFieldData>& target) { |
| DCHECK_NE(&source, &target); |
| size_t number_of_fields_moved = 0; |
| for (FormFieldData& f : source) { |
| if (f.renderer_form_id() == source_form) { |
| if (++number_of_fields_moved > max_number_of_fields_to_be_moved) |
| break; |
| target.push_back(std::move(f)); |
| f = {}; // Clobber |f| in |source| so future MoveFields() skip it. |
| DCHECK(!f.renderer_form_id()); |
| } |
| } |
| }; |
| |
| // The |frontier| contains the Nodes to be expanded in LIFO order. Each Node |
| // represents the range of fields and the next frame to be visited. |
| // |
| // A Node consists of |
| // - the Node::form whose fields and child frames are to be visited; |
| // - the Node::frame that hosts Node::form; |
| // - the Node::next_frame to be traversed after visiting the fields that |
| // precede this frame in the form. |
| // |
| // For example, consider a Node |n| whose form has 4 child frames and at |
| // least 5 fields: |
| // <form> |
| // <iframe> |
| // <input id="0"> |
| // <input id="1"> |
| // <input id="2"> |
| // <iframe> |
| // <iframe> |
| // <input id="3"> |
| // <input id="4"> |
| // <iframe> |
| // <input> |
| // ... |
| // </form> |
| // The relative order of fields and frames is encoded in |
| // `n.form->child_frames[i].predecessor`: the value represents which field |
| // from `n.form`, if any, precedes the frame in DOM order. In this example: |
| // - `n.form->child_frames[0].predecessor == -1`; |
| // - `n.form->child_frames[1].predecessor == 2`; |
| // - `n.form->child_frames[2].predecessor == 2`; |
| // - `n.form->child_frames[3].predecessor == 4`. |
| // Then the represented field range depends of the next frame: |
| // - `n.next_frame == 0` represents the empty range; |
| // - `n.next_frame == 1` represents the fields with indices 0, 1, 2; |
| // - `n.next_frame == 2` represents the empty range; |
| // - `n.next_frame == 3` represents the fields with indices 3, 4; |
| // - `n.next_frame == 4` represents the fields with indices greater than 4. |
| // |
| // Note that the fields of `n.form` are typically not stored in |
| // `n.form->fields`: |
| // - If `n.form` refers to the passed |form| (`n.form == form`), then these |
| // fields have been moved to |form_fields|. |
| // - Otherwise, they are stored in `n.form`'s root form. |
| // |
| // Since the relative order of the fields is the same as in the renderer |
| // form, the index |i| of a field from `n.form` translates to the |i|th |
| // field whose `renderer_form_id()` is `n.form->global_id()` in the |
| // respective vector that contains the fields from `n.form`. |
| // |
| // When the traversal expands a Node |n| from the |frontier|, we |
| // - pull these fields from |roots_on_path| (defined below) or |form_fields| |
| // and append them to |root_fields|, the future fields of the root; |
| // - push on the |frontier| |
| // - one Node for each child form in that frame, and |
| // - a Node with incremented `n.next_frame` for the successor frame and |
| // its preceding fields |
| // in an order that ensures DOM-order traversal. |
| // The latter step is omitted if Node::next_frame is out of bounds, |
| // indicating indicating that all fields and frames have been visited |
| // already. |
| struct Node { |
| FrameData* frame; // Not null. |
| FormData* form; // Not null. |
| size_t next_frame; // In the range [0, `form->child_frames.size()`]. |
| }; |
| base::stack<Node> frontier; |
| frontier.push({root.frame, root.form, 0}); |
| |
| // Fields to be moved to |root_fields| may not just come from |form_fields| |
| // or `root.form->fields`, but also from forms that used to be roots but |
| // have become children of |form|. These former roots contain the fields |
| // from their subtrees. To access these fields, we store the fields from the |
| // root as well as from former root nodes (unless they have no fields) in |
| // |roots_on_path|. |
| base::stack<FormData*> roots_on_path; |
| |
| // New fields of the root form. To be populated in the tree traversal. |
| std::vector<FormFieldData> root_fields; |
| root_fields.reserve(root.form->fields.size() + form_fields.size()); |
| |
| size_t emergency_counter = 0; |
| while (!frontier.empty()) { |
| if (++emergency_counter > 1000) { |
| AFCHECK(false); |
| break; |
| } |
| |
| Node n = frontier.top(); |
| frontier.pop(); |
| AFCHECK(n.frame, continue); |
| AFCHECK(n.form, continue); |
| |
| // Pushes the current form on |roots_on_path| only if this is the first |
| // time we encounter the form in the traversal (Node::next_frame == 0). |
| if (n.next_frame == 0 && (n.form == root.form || !n.form->fields.empty())) |
| roots_on_path.push(n.form); |
| AFCHECK(!roots_on_path.empty(), continue); |
| |
| std::vector<FormFieldData>& source = |
| n.form->global_id() == form->global_id() |
| ? form_fields |
| : roots_on_path.top()->fields; |
| // Moves the next fields from `n.form` to |root_fields|. |
| // |
| // If `n.next_frame` is in bounds, pushes on the |frontier| |
| // - one Node for each child form in that frame, and |
| // - a Node with incremented `n.next_frame` for the successor frame and |
| // its preceding fields. |
| // The order of these pushes is inversed so that they're expanded in DOM |
| // order. |
| // |
| // If `n.next_frame` is out of bounds, all paths through |n| have been |
| // fully traversed, so if applicable, we pop `n.form` from |
| // |roots_on_path|. |
| if (n.next_frame < n.form->child_frames.size()) { |
| // [begin, end) is the range of fields from `n.form` before |
| // `n.next_frame`. |
| size_t begin = base::checked_cast<size_t>( |
| n.next_frame > 0 |
| ? n.form->child_frames[n.next_frame - 1].predecessor + 1 |
| : 0); |
| size_t end = base::checked_cast<size_t>( |
| n.form->child_frames[n.next_frame].predecessor + 1); |
| AFCHECK(begin <= end, continue); |
| MoveFields(end - begin, n.form->global_id(), source, root_fields); |
| frontier.push( |
| {.frame = n.frame, .form = n.form, .next_frame = n.next_frame + 1}); |
| absl::optional<LocalFrameToken> local_child = |
| Resolve(*n.frame, n.form->child_frames[n.next_frame].token); |
| FrameData* child_frame; |
| // If the FrameData does not exist yet, creating it now and setting the |
| // FrameData::parent_form avoids a reparse if and when a form is seen in |
| // this child frame. |
| if (local_child && (child_frame = GetOrCreateFrame(*local_child))) { |
| child_frame->parent_form = n.form->global_id(); |
| for (size_t i = child_frame->child_forms.size(); i > 0; --i) { |
| frontier.push({.frame = child_frame, |
| .form = &child_frame->child_forms[i - 1], |
| .next_frame = 0}); |
| } |
| } |
| } else { |
| MoveFields(std::numeric_limits<size_t>::max(), n.form->global_id(), |
| source, root_fields); |
| if (n.form == roots_on_path.top()) { |
| roots_on_path.top()->fields.clear(); |
| roots_on_path.pop(); |
| } |
| } |
| } |
| root.form->fields = std::move(root_fields); |
| } |
| |
| // Triggers a reparse in a parent frame if `frame->parent_form` is unset. |
| // |
| // If |frame| has a parent frame and is not a fenced frame, there are two |
| // scenarios where `frame->parent_form` is unset: |
| // - The parent frame hasn't been processed by UpdateTreeOfRendererForm() yet. |
| // - The parent form did not include the correct token of |frame| in its |
| // FormData::child_frames (for example, because loading a cross-origin page |
| // into the <iframe> has changed |frame|'s FrameToken). |
| // |
| // In this case, we trigger a reparse in the parent frame. As a result, |
| // UpdateTreeOfRendererForm() will be called for the parent form, whose |
| // FormData::child_frames now include |frame|. |
| content::RenderFrameHost* parent_rfh = rfh->GetParent(); |
| if (!frame->parent_form && parent_rfh && !IsFencedFrameRoot(rfh)) { |
| LocalFrameToken parent_frame_token( |
| LocalFrameToken(parent_rfh->GetFrameToken().value())); |
| FrameData* parent_frame = GetFrameData(parent_frame_token); |
| if (parent_frame && parent_frame->driver) { |
| AFCHECK(parent_frame->driver->render_frame_host() == parent_rfh); |
| parent_frame->driver->TriggerReparse(); |
| } |
| } |
| } |
| |
| const FormData& FormForest::GetBrowserFormOfRendererForm( |
| const FormData& renderer_form) const { |
| AFCHECK(renderer_form.host_frame, return renderer_form); |
| |
| // For calling non-const-qualified getters. |
| FormForest& mutable_this = *const_cast<FormForest*>(this); |
| const FormData* form = mutable_this.GetRoot(renderer_form.global_id()).form; |
| AFCHECK(form, return renderer_form); |
| return *form; |
| } |
| |
| std::vector<FormData> FormForest::GetRendererFormsOfBrowserForm( |
| const FormData& browser_form, |
| const url::Origin& triggered_origin, |
| const base::flat_map<FieldGlobalId, ServerFieldType>& field_type_map) |
| const { |
| AFCHECK(browser_form.host_frame, return {browser_form}); |
| |
| // For calling non-const-qualified getters. |
| FormForest& mutable_this = *const_cast<FormForest*>(this); |
| |
| // Reinstates the fields of |browser_form| in copies of their renderer forms. |
| // See the function's documentation in the header for details on the security |
| // policy |IsSafeToFill|. |
| std::vector<FormData> renderer_forms; |
| for (const FormFieldData& browser_field : browser_form.fields) { |
| FormGlobalId form_id = browser_field.renderer_form_id(); |
| |
| // Finds or creates the renderer form from which |browser_field| originated. |
| // The form with |form_id| may have been removed from the tree, for example, |
| // in between of a refill. |
| auto renderer_form = |
| base::ranges::find(renderer_forms.rbegin(), renderer_forms.rend(), |
| form_id, &FormData::global_id); |
| if (renderer_form == renderer_forms.rend()) { |
| const FormData* original_form = mutable_this.GetFormData(form_id); |
| if (!original_form) // The form with |form_id| may have been removed. |
| continue; |
| renderer_forms.push_back(*original_form); |
| renderer_form = renderer_forms.rbegin(); |
| renderer_form->fields.clear(); // In case |original_form| is a root form. |
| } |
| DCHECK(renderer_form != renderer_forms.rend()); |
| |
| auto IsSafeToFill = [&mutable_this, &browser_form, &renderer_form, |
| &triggered_origin, |
| &field_type_map](const FormFieldData& field) { |
| // Non-sensitive values may be filled into fields that belong to the main |
| // frame's origin. This is independent of the origin of the field that |
| // triggered the autofill, |triggered_origin|. |
| auto IsSensitiveFieldType = [](ServerFieldType field_type) { |
| switch (field_type) { |
| case CREDIT_CARD_TYPE: |
| case CREDIT_CARD_NAME_FULL: |
| case CREDIT_CARD_NAME_FIRST: |
| case CREDIT_CARD_NAME_LAST: |
| return false; |
| default: |
| return true; |
| } |
| }; |
| // Fields in frames whose permissions policy allows shared-autofill may |
| // be filled if the |triggered_origin| is the main origin. |
| auto has_shared_autofill_permission = [&mutable_this]( |
| LocalFrameToken frame_token) { |
| FrameData* frame = mutable_this.GetFrameData(frame_token); |
| return frame && frame->driver && frame->driver->render_frame_host() && |
| frame->driver->render_frame_host()->IsFeatureEnabled( |
| blink::mojom::PermissionsPolicyFeature::kSharedAutofill); |
| }; |
| |
| const url::Origin& main_origin = browser_form.main_frame_origin; |
| auto it = field_type_map.find(field.global_id()); |
| ServerFieldType field_type = |
| it != field_type_map.end() ? it->second : UNKNOWN_TYPE; |
| return field.origin == triggered_origin || |
| (field.origin == main_origin && |
| !IsSensitiveFieldType(field_type)) || |
| (triggered_origin == main_origin && |
| has_shared_autofill_permission(renderer_form->host_frame)); |
| }; |
| |
| renderer_form->fields.push_back(browser_field); |
| if (!IsSafeToFill(renderer_form->fields.back())) |
| renderer_form->fields.back().value.clear(); |
| } |
| |
| return renderer_forms; |
| } |
| |
| } // namespace internal |
| } // namespace autofill |