[Autofill] Deep compare forms and fields when extracting forms
In this change, we introduce a new deep comparison function for FormData
and FormFieldData structs used when updating the FormCache.
The old ExtractNewForms() is responsible for extracting new and changed
forms in the DOM. It is called on page load or when a new field is added
(or called by TriggerReparse()). When called, it also re-extracts forms
that have other changes than having a new field.
This CL changes the behaviour of the kAutofillUseNewFormExtraction
experiment to have the same behaviour as the non-experimental
ExtractNewForms().
To this end, this CL introduces a DeepEqual() function for FormData and
FormFieldData. These functions compare *all* attributes. To improve
performance, they do a shallow comparison first. Preliminary testing
has shown that the shallow and deep comparisons differ in 4% of
comparisons; a metric is added in a followup CL.
To better reflect this behaviour of re-extracting not just new forms or
forms with new fields but all changed forms, this CL renames the
function to UpdateFormCache()
Bug: 1215333
Change-Id: I7af67bb485bdb28cd7b13bb6873c3d7e1462f749
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3138805
Reviewed-by: Christoph Schwering <schwering@google.com>
Reviewed-by: Matthias Körber <koerber@google.com>
Commit-Queue: Eva Herencsárová <evih@google.com>
Cr-Commit-Position: refs/heads/main@{#922624}
diff --git a/components/autofill/content/renderer/form_cache.cc b/components/autofill/content/renderer/form_cache.cc
index 631bac1e..3b24f71 100644
--- a/components/autofill/content/renderer/form_cache.cc
+++ b/components/autofill/content/renderer/form_cache.cc
@@ -120,27 +120,13 @@
FormCache::FormCache(WebLocalFrame* frame) : frame_(frame) {}
FormCache::~FormCache() = default;
-struct FormCache::CachedFormData {
- explicit CachedFormData(const FormData& form)
- : child_frames(form.child_frames) {
- for (const auto& field : form.fields)
- field_renderer_ids.push_back(field.unique_renderer_id);
- }
- CachedFormData(CachedFormData&& cached_form) = default;
- CachedFormData& operator=(CachedFormData&& cached_form) = default;
- ~CachedFormData() = default;
-
- std::vector<FieldRendererId> field_renderer_ids;
- std::vector<FrameTokenWithPredecessor> child_frames;
-};
-
void FormCache::MaybeUpdateParsedFormsPeak() {
- peak_size_of_parsed_forms_ =
- std::max(peak_size_of_parsed_forms_,
- std::max(parsed_forms_rendererid_.size(), parsed_forms_.size()));
+ peak_size_of_parsed_forms_ = std::max(
+ peak_size_of_parsed_forms_,
+ std::max(parsed_forms_by_renderer_id_.size(), parsed_forms_.size()));
}
-std::vector<FormData> FormCache::ModifiedExtractNewForms(
+std::vector<FormData> FormCache::UpdateFormCache(
const FieldDataManager* field_data_manager) {
std::vector<FormData> forms;
WebDocument document = frame_->GetDocument();
@@ -156,11 +142,11 @@
// Log an error message for deprecated attributes, but only the first time
// the form is parsed.
- bool log_deprecation_messages = parsed_forms_rendererid_.empty();
+ bool log_deprecation_messages = parsed_forms_by_renderer_id_.empty();
- std::map<FormRendererId, CachedFormData> old_parsed_forms =
- std::move(parsed_forms_rendererid_);
- parsed_forms_rendererid_.clear();
+ std::map<FormRendererId, FormData> old_parsed_forms =
+ std::move(parsed_forms_by_renderer_id_);
+ parsed_forms_by_renderer_id_.clear();
size_t frame_depth = form_util::GetFrameDepth(frame_);
size_t num_fields_seen = 0;
@@ -179,8 +165,8 @@
num_frames_seen += form.child_frames.size();
if (num_fields_seen > kMaxParseableFields) {
- // Restore |parsed_forms_rendererid_|.
- parsed_forms_rendererid_.insert(
+ // Restore |parsed_forms_by_renderer_id_|.
+ parsed_forms_by_renderer_id_.insert(
std::make_move_iterator(old_parsed_forms.begin()),
std::make_move_iterator(old_parsed_forms.end()));
PruneInitialValueCaches(observed_unique_renderer_ids);
@@ -196,20 +182,16 @@
// Store only "interesting" forms.
if (IsFormInteresting(form, num_editable_elements)) {
- DCHECK(parsed_forms_rendererid_.find(form.unique_renderer_id) ==
- parsed_forms_rendererid_.end());
- parsed_forms_rendererid_.insert(
- {form.unique_renderer_id, CachedFormData(form)});
- // If it is a new form or an input field of the form changed,
- // re-extract the form.
+ DCHECK(parsed_forms_by_renderer_id_.find(form.unique_renderer_id) ==
+ parsed_forms_by_renderer_id_.end());
+ // Re-extract the form if it is a new form or the form has changed.
auto it = old_parsed_forms.find(form.unique_renderer_id);
if (it == old_parsed_forms.end() ||
- form.child_frames != it->second.child_frames ||
- !base::ranges::equal(form.fields, it->second.field_renderer_ids, {},
- &FormFieldData::unique_renderer_id)) {
+ !FormData::DeepEqual(std::move(it->second), form)) {
SaveInitialValues(control_elements);
- forms.push_back(std::move(form));
+ forms.push_back(form);
}
+ parsed_forms_by_renderer_id_[form.unique_renderer_id] = std::move(form);
}
MaybeUpdateParsedFormsPeak();
return true;
@@ -264,7 +246,7 @@
std::vector<FormData> FormCache::ExtractNewForms(
const FieldDataManager* field_data_manager) {
if (base::FeatureList::IsEnabled(features::kAutofillUseNewFormExtraction)) {
- return ModifiedExtractNewForms(field_data_manager);
+ return UpdateFormCache(field_data_manager);
}
std::vector<FormData> forms;
WebDocument document = frame_->GetDocument();
@@ -392,8 +374,8 @@
}
void FormCache::Reset() {
- // Record the size of |parsed_forms_| every time it reaches its peak size. The
- // peak size is reached right before the cache is cleared.
+ // Record the size of the cached parsed forms every time it reaches its peak
+ // size. The peak size is reached right before the cache is cleared.
UMA_HISTOGRAM_COUNTS_1000("Autofill.FormCacheSize",
peak_size_of_parsed_forms_);
@@ -401,7 +383,7 @@
parsed_forms_.clear();
// TODO(crbug/1215333): Remove after the `AutofillUseNewFormExtraction`
// feature is deleted.
- parsed_forms_rendererid_.clear();
+ parsed_forms_by_renderer_id_.clear();
initial_select_values_.clear();
initial_checked_state_.clear();
fields_eligible_for_manual_filling_.clear();
diff --git a/components/autofill/content/renderer/form_cache.h b/components/autofill/content/renderer/form_cache.h
index 31efdc6..846893d 100644
--- a/components/autofill/content/renderer/form_cache.h
+++ b/components/autofill/content/renderer/form_cache.h
@@ -44,8 +44,7 @@
// summed over all forms, in addition to the per-form limits in
// form_util::FormOrFieldsetsToFormData():
// - if the number of fields over all forms exceeds |kMaxParseableFields|,
- // only a subset of forms is returned which does not exceed the limit is
- // returned;
+ // only a subset of forms is returned which does not exceed the limit;
// - if the number of frames over all forms exceeds MaxParseableFrames(), all
// forms are returned but only a subset of them have non-empty
// FormData::child_frames.
@@ -55,9 +54,25 @@
const FieldDataManager* field_data_manager);
// Modified version of ExtractNewForms(). It is used only if
- // `AutofillUseNewFormExtraction` feature is enabled. Remove after the feature
- // is deleted.
- std::vector<FormData> ModifiedExtractNewForms(
+ // `AutofillUseNewFormExtraction` feature is enabled.
+ // TODO(crbug/1215333): Remove ExtractNewForms() after the feature is deleted.
+ //
+ // Extracts and returns the new or modified forms in the |frame_|.
+ //
+ // To reduce the computational cost, we limit the number of fields and frames
+ // summed over all forms, in addition to the per-form limits in
+ // form_util::FormOrFieldsetsToFormData():
+ // - if the number of fields over all forms exceeds |kMaxParseableFields|,
+ // only a subset of forms is returned which does not exceed the limit;
+ // - if the number of frames over all forms exceeds MaxParseableFrames(), all
+ // forms are returned but only a subset of them have non-empty
+ // FormData::child_frames.
+ // In either case, the subset is chosen so that the returned list of forms
+ // does not exceed the limits of fields and frames.
+ //
+ // Updates |parsed_forms_by_renderer_id_| to contain the forms that are
+ // currently in the DOM.
+ std::vector<FormData> UpdateFormCache(
const FieldDataManager* field_data_manager);
// Resets the forms.
@@ -83,10 +98,6 @@
private:
friend class FormCacheTestApi;
- // Holds a subset of FormData members. ModifiedExtractNewForms() re-extracts
- // a form only if these values have changed.
- struct CachedFormData;
-
// Scans |control_elements| and returns the number of editable elements.
// Also logs warning messages for deprecated attribute if
// |log_deprecation_messages| is set.
@@ -120,14 +131,14 @@
std::set<FormData, FormData::IdentityComparator> parsed_forms_;
// Same as |parsed_forms_|, but moved to a different type. It is used only if
- // `AutofillUseNewFormExtraction` feature is enabled. Remove after the feature
- // is deleted.
- std::map<FormRendererId, CachedFormData> parsed_forms_rendererid_;
+ // `AutofillUseNewFormExtraction` feature is enabled.
+ // TODO(crbug/1215333): Remove |parsed_forms_| after the feature is deleted.
+ std::map<FormRendererId, FormData> parsed_forms_by_renderer_id_;
// Stores the peak size of the cached forms for `Autofill.FormCacheSize`
// metric. The cached forms are stored in |parsed_forms_| or
- // |parsed_forms_rendererid_| depending on the `AutofillUseNewFormExtraction`
- // feature.
+ // |parsed_forms_by_renderer_id_| depending on the
+ // `AutofillUseNewFormExtraction` feature.
// TODO(crbug/1215333): Remove after the experiment is completed.
size_t peak_size_of_parsed_forms_ = 0;
diff --git a/components/autofill/content/renderer/form_cache_browsertest.cc b/components/autofill/content/renderer/form_cache_browsertest.cc
index d307b29a..235658b 100644
--- a/components/autofill/content/renderer/form_cache_browsertest.cc
+++ b/components/autofill/content/renderer/form_cache_browsertest.cc
@@ -118,6 +118,26 @@
EXPECT_TRUE(unowned_form->child_frames.empty());
}
+// Test if the form gets re-extracted after a label change.
+TEST_P(ParameterizedFormCacheBrowserTest, ExtractFormAfterDynamicFieldChange) {
+ LoadHTML(R"(
+ <form id="f"><input></form>
+ <form id="g"> <label id="label">Name</label><input></form>
+ )");
+
+ FormCache form_cache(GetMainFrame());
+ std::vector<FormData> forms =
+ form_cache.ExtractNewForms(/*field_data_manager=*/nullptr);
+ EXPECT_EQ(2u, forms.size());
+
+ ExecuteJavaScriptForTests(R"(
+ document.getElementById("label").innerHTML = "Last Name";
+ )");
+
+ forms = form_cache.ExtractNewForms(/*field_data_manager=*/nullptr);
+ EXPECT_EQ(1u, forms.size());
+}
+
class FormCacheIframeBrowserTest : public ParameterizedFormCacheBrowserTest {
public:
FormCacheIframeBrowserTest() {
@@ -562,7 +582,8 @@
// Check if the modified form with the same rendererId was not added again.
if (base::FeatureList::IsEnabled(features::kAutofillUseNewFormExtraction)) {
- EXPECT_EQ(1u, FormCacheTestApi(&form_cache).parsed_forms_rendererid_size());
+ EXPECT_EQ(1u,
+ FormCacheTestApi(&form_cache).parsed_forms_by_renderer_id_size());
} else {
EXPECT_EQ(1u, FormCacheTestApi(&form_cache).parsed_forms_size());
}
@@ -600,7 +621,8 @@
// Check if the modified form with the same rendererId was not added again.
// (We expect that all the unowned fields have the same rendererId.)
if (base::FeatureList::IsEnabled(features::kAutofillUseNewFormExtraction)) {
- EXPECT_EQ(1u, FormCacheTestApi(&form_cache).parsed_forms_rendererid_size());
+ EXPECT_EQ(1u,
+ FormCacheTestApi(&form_cache).parsed_forms_by_renderer_id_size());
} else {
EXPECT_EQ(1u, FormCacheTestApi(&form_cache).parsed_forms_size());
}
@@ -618,7 +640,8 @@
form_cache.ExtractNewForms(nullptr);
EXPECT_EQ(1u, GetMainFrame()->GetDocument().Forms().size());
- EXPECT_EQ(0u, FormCacheTestApi(&form_cache).parsed_forms_rendererid_size());
+ EXPECT_EQ(0u,
+ FormCacheTestApi(&form_cache).parsed_forms_by_renderer_id_size());
}
// Test that the FormCache never contains more than |kMaxParseableFields|
@@ -642,7 +665,7 @@
EXPECT_EQ(kMaxParseableFields + 1,
GetMainFrame()->GetDocument().Forms().size());
EXPECT_EQ(kMaxParseableFields,
- FormCacheTestApi(&form_cache).parsed_forms_rendererid_size());
+ FormCacheTestApi(&form_cache).parsed_forms_by_renderer_id_size());
}
// Test that FormCache::ExtractNewForms() limits the number of total fields by
diff --git a/components/autofill/content/renderer/form_cache_test_api.h b/components/autofill/content/renderer/form_cache_test_api.h
index e4845db..1bff306 100644
--- a/components/autofill/content/renderer/form_cache_test_api.h
+++ b/components/autofill/content/renderer/form_cache_test_api.h
@@ -40,8 +40,8 @@
// TODO(crbug/1215333): Remove once the `AutofillUseNewFormExtraction` feature
// is launched.
- size_t parsed_forms_rendererid_size() {
- return form_cache_->parsed_forms_rendererid_.size();
+ size_t parsed_forms_by_renderer_id_size() {
+ return form_cache_->parsed_forms_by_renderer_id_.size();
}
private:
diff --git a/components/autofill/core/common/form_data.cc b/components/autofill/core/common/form_data.cc
index 453c07b..91b747f 100644
--- a/components/autofill/core/common/form_data.cc
+++ b/components/autofill/core/common/form_data.cc
@@ -174,6 +174,27 @@
a.fields, b.fields, FormFieldData::IdentityComparator());
}
+// static
+bool FormData::DeepEqual(const FormData& a, const FormData& b) {
+ // We compare all unique identifiers first, including the field renderer IDs,
+ // because we expect most inequalities to be due to them.
+ if (a.unique_renderer_id != b.unique_renderer_id ||
+ a.child_frames != b.child_frames ||
+ !base::ranges::equal(a.fields, b.fields, {},
+ &FormFieldData::unique_renderer_id,
+ &FormFieldData::unique_renderer_id)) {
+ return false;
+ }
+
+ if (a.name != b.name || a.id_attribute != b.id_attribute ||
+ a.name_attribute != b.name_attribute || a.url != b.url ||
+ a.action != b.action || a.is_form_tag != b.is_form_tag ||
+ !base::ranges::equal(a.fields, b.fields, &FormFieldData::DeepEqual)) {
+ return false;
+ }
+ return true;
+}
+
bool FormHasNonEmptyPasswordField(const FormData& form) {
for (const auto& field : form.fields) {
if (field.IsPasswordInputElement()) {
diff --git a/components/autofill/core/common/form_data.h b/components/autofill/core/common/form_data.h
index 0be6350..9cdec05 100644
--- a/components/autofill/core/common/form_data.h
+++ b/components/autofill/core/common/form_data.h
@@ -60,6 +60,9 @@
bool operator()(const FormData& a, const FormData& b) const;
};
+ // Returns true if all members of forms |a| and |b| are identical.
+ static bool DeepEqual(const FormData& a, const FormData& b);
+
FormData();
FormData(const FormData&);
FormData& operator=(const FormData&);
@@ -71,17 +74,22 @@
// Must not be leaked to renderer process. See FieldGlobalId for details.
FormGlobalId global_id() const { return {host_frame, unique_renderer_id}; }
- // Returns true if two forms are the same, not counting the values of the
- // form elements.
+ // TODO(crbug/1211834): This function is deprecated. Use FormData::DeepEqual()
+ // instead.
+ // Returns true if two forms are the same, not counting the values of the form
+ // elements.
bool SameFormAs(const FormData& other) const;
+ // TODO(crbug/1211834): This function is deprecated.
// Same as SameFormAs() except calling FormFieldData.SimilarFieldAs() to
// compare fields.
bool SimilarFormAs(const FormData& other) const;
+ // TODO(crbug/1211834): This function is deprecated.
// If |form| is the same as this from the POV of dynamic refills.
bool DynamicallySameFormAs(const FormData& form) const;
+ // TODO(crbug/1211834): This function is deprecated.
// Allow FormData to be a key in STL containers.
bool operator<(const FormData& form) const;
diff --git a/components/autofill/core/common/form_field_data.cc b/components/autofill/core/common/form_field_data.cc
index 07fb7a4b..2b82fd7 100644
--- a/components/autofill/core/common/form_field_data.cc
+++ b/components/autofill/core/common/form_field_data.cc
@@ -296,6 +296,12 @@
return properties_mask & kAutofilled;
}
+// static
+bool FormFieldData::DeepEqual(const FormFieldData& a, const FormFieldData& b) {
+ return a.unique_renderer_id == b.unique_renderer_id &&
+ IdentityTuple(a) == IdentityTuple(b);
+}
+
void SerializeFormFieldData(const FormFieldData& field_data,
base::Pickle* pickle) {
pickle->WriteInt(kFormFieldDataPickleVersion);
diff --git a/components/autofill/core/common/form_field_data.h b/components/autofill/core/common/form_field_data.h
index a4faa3b9..49837e5a 100644
--- a/components/autofill/core/common/form_field_data.h
+++ b/components/autofill/core/common/form_field_data.h
@@ -69,12 +69,16 @@
using RoleAttribute = mojom::FormFieldData_RoleAttribute;
using LabelSource = mojom::FormFieldData_LabelSource;
+ // TODO(crbug/1211834): This comparator is deprecated.
// Less-than relation for STL containers. Compares only members needed to
// uniquely identify a field.
struct IdentityComparator {
bool operator()(const FormFieldData& a, const FormFieldData& b) const;
};
+ // Returns true if all members of fields |a| and |b| are identical.
+ static bool DeepEqual(const FormFieldData& a, const FormFieldData& b);
+
FormFieldData();
FormFieldData(const FormFieldData&);
FormFieldData& operator=(const FormFieldData&);
@@ -92,16 +96,20 @@
// for details on the distinction between renderer and browser forms.
FormGlobalId renderer_form_id() const { return {host_frame, host_form_id}; }
+ // TODO(crbug/1211834): This function is deprecated. Use
+ // FormFieldData::DeepEqual() instead.
// Returns true if both fields are identical, ignoring value- and
// parsing related members.
// See also SimilarFieldAs(), DynamicallySameFieldAs().
bool SameFieldAs(const FormFieldData& field) const;
+ // TODO(crbug/1211834): This function is deprecated.
// Returns true if both fields are identical, ignoring members that
// are typically changed dynamically.
// Strictly weaker than SameFieldAs().
bool SimilarFieldAs(const FormFieldData& field) const;
+ // TODO(crbug/1211834): This function is deprecated.
// Returns true if both forms are equivalent from the POV of dynamic refills.
// Strictly weaker than SameFieldAs(): replaces equality of |is_focusable| and
// |role| with equality of IsVisible().