blob: d41617d668a9c9a8b58edc15357e12ea550e8c63 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/optional.h"
#include "base/strings/string_piece.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/autofill_metrics.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/field_types.h"
#include "components/autofill/core/browser/form_types.h"
#include "components/autofill/core/browser/proto/server.pb.h"
#include "url/gurl.h"
#include "url/origin.h"
namespace base {
class TimeTicks;
namespace ukm {
class UkmRecorder;
namespace autofill {
// Password attributes (whether a password has special symbols, numeric, etc.)
enum class PasswordAttribute {
struct FormData;
struct FormDataPredictions;
// FormStructure stores a single HTML form together with the values entered
// in the fields along with additional information needed by Autofill.
class FormStructure {
explicit FormStructure(const FormData& form);
virtual ~FormStructure();
// Runs several heuristics against the form fields to determine their possible
// types. If |ukm_recorder| and |source_id| is specified, logs UKM for
// the form structure corresponding to the source mapped from the |source_id|.
void DetermineHeuristicTypes(ukm::UkmRecorder* ukm_recorder,
ukm::SourceId source_id);
// Encodes the proto |upload| request from this FormStructure.
// In some cases, a |login_form_signature| is included as part of the upload.
// This field is empty when sending upload requests for non-login forms.
bool EncodeUploadRequest(const ServerFieldTypeSet& available_field_types,
bool form_was_autofilled,
const std::string& login_form_signature,
bool observed_submission,
autofill::AutofillUploadContents* upload) const;
// Encodes the proto |query| request for the set of |forms| that are valid
// (see implementation for details on which forms are not included in the
// query). The form signatures used in the Query request are output in
// |encoded_signatures|. All valid fields are encoded in |query|.
static bool EncodeQueryRequest(const std::vector<FormStructure*>& forms,
std::vector<std::string>* encoded_signatures,
autofill::AutofillQueryContents* query);
// Parses the field types from the server query response. |forms| must be the
// same as the one passed to EncodeQueryRequest when constructing the query.
static void ParseQueryResponse(std::string response,
const std::vector<FormStructure*>& forms,
// Returns predictions using the details from the given |form_structures| and
// their fields' predicted types.
static std::vector<FormDataPredictions> GetFieldTypePredictions(
const std::vector<FormStructure*>& form_structures);
// Returns whether sending autofill field metadata to the server is enabled.
static bool IsAutofillFieldMetadataEnabled();
// Return the form signature as string.
std::string FormSignatureAsStr() const;
// Runs a quick heuristic to rule out forms that are obviously not
// auto-fillable, like google/yahoo/msn search, etc.
bool IsAutofillable() const;
// Returns whether |this| form represents a complete Credit Card form, which
// consists in having at least a credit card number field and an expiration
// field.
bool IsCompleteCreditCardForm() const;
// Resets |autofill_count_| and counts the number of auto-fillable fields.
// This is used when we receive server data for form fields. At that time,
// we may have more known fields than just the number of fields we matched
// heuristically.
void UpdateAutofillCount();
// Returns true if this form matches the structural requirements for Autofill.
bool ShouldBeParsed() const;
// Returns true if heuristic autofill type detection should be attempted for
// this form.
bool ShouldRunHeuristics() const;
// Returns true if we should query the crowd-sourcing server to determine this
// form's field types. If the form includes author-specified types, this will
// return false unless there are password fields in the form. If there are no
// password fields the assumption is that the author has expressed their
// intent and crowdsourced data should not be used to override this. Password
// fields are different because there is no way to specify password generation
// directly.
bool ShouldBeQueried() const;
// Returns true if we should upload votes for this form to the crowd-sourcing
// server.
bool ShouldBeUploaded() const;
// Sets the field types to be those set for |cached_form|.
void RetrieveFromCache(const FormStructure& cached_form,
const bool apply_is_autofilled,
const bool only_server_and_autofill_state);
// Logs quality metrics for |this|, which should be a user-submitted form.
// This method should only be called after the possible field types have been
// set for each field. |interaction_time| should be a timestamp corresponding
// to the user's first interaction with the form. |submission_time| should be
// a timestamp corresponding to the form's submission. |observed_submission|
// indicates whether this method is called as a result of observing a
// submission event (otherwise, it may be that an upload was triggered after
// a form was unfocused or a navigation occurred).
// TODO(sebsg): We log more than quality metrics. Maybe rename or split
// function?
void LogQualityMetrics(
const base::TimeTicks& load_time,
const base::TimeTicks& interaction_time,
const base::TimeTicks& submission_time,
AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger,
bool did_show_suggestions,
bool observed_submission) const;
// Log the quality of the heuristics and server predictions for this form
// structure, if autocomplete attributes are present on the fields (they are
// used as golden truths).
void LogQualityMetricsBasedOnAutocomplete(
AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger)
// Classifies each field in |fields_| based upon its |autocomplete| attribute,
// if the attribute is available. The association is stored into the field's
// |heuristic_type|.
// Fills |has_author_specified_types_| with |true| if the attribute is
// available and neither empty nor set to the special values "on" or "off" for
// at least one field.
// Fills |has_author_specified_sections_| with |true| if the attribute
// specifies a section for at least one field.
void ParseFieldTypesFromAutocompleteAttributes();
// Returns the values that can be filled into the form structure for the
// given type. For example, there's no way to fill in a value of "The Moon"
// into ADDRESS_HOME_STATE if the form only has a
// <select autocomplete="region"> with no "The Moon" option. Returns an
// empty set if the form doesn't reference the given type or if all inputs
// are accepted (e.g., <input type="text" autocomplete="region">).
// All returned values are standardized to upper case.
std::set<base::string16> PossibleValues(ServerFieldType type);
// Gets the form's current value for |type|. For example, it may return
// the contents of a text input or the currently selected <option>.
base::string16 GetUniqueValue(HtmlFieldType type) const;
// Rationalize phone number fields in a given section, that is only fill
// the fields that are considered composing a first complete phone number.
void RationalizePhoneNumbersInSection(std::string section);
const AutofillField* field(size_t index) const;
AutofillField* field(size_t index);
size_t field_count() const;
// Returns the number of fields that are part of the form signature and that
// are included in queries to the Autofill server.
size_t active_field_count() const;
// Returns the number of fields that are able to be autofilled.
size_t autofill_count() const { return autofill_count_; }
// Used for iterating over the fields.
std::vector<std::unique_ptr<AutofillField>>::const_iterator begin() const {
return fields_.begin();
std::vector<std::unique_ptr<AutofillField>>::const_iterator end() const {
return fields_.end();
const base::string16& form_name() const { return form_name_; }
const GURL& source_url() const { return source_url_; }
const GURL& target_url() const { return target_url_; }
const url::Origin& main_frame_origin() const { return main_frame_origin_; }
bool has_author_specified_types() const {
return has_author_specified_types_;
bool has_author_specified_sections() const {
return has_author_specified_sections_;
bool has_author_specified_upi_vpa_hint() const {
return has_author_specified_upi_vpa_hint_;
void set_upload_required(UploadRequired required) {
upload_required_ = required;
UploadRequired upload_required() const { return upload_required_; }
void set_form_parsed_timestamp(const base::TimeTicks form_parsed_timestamp) {
form_parsed_timestamp_ = form_parsed_timestamp;
base::TimeTicks form_parsed_timestamp() const {
return form_parsed_timestamp_;
bool all_fields_are_passwords() const { return all_fields_are_passwords_; }
bool is_signin_upload() const { return is_signin_upload_; }
void set_is_signin_upload(bool is_signin_upload) {
is_signin_upload_ = is_signin_upload;
FormSignature form_signature() const { return form_signature_; }
// Returns a FormData containing the data this form structure knows about.
FormData ToFormData() const;
// Returns the possible form types.
std::set<FormType> GetFormTypes() const;
bool passwords_were_revealed() const { return passwords_were_revealed_; }
void set_passwords_were_revealed(bool passwords_were_revealed) {
passwords_were_revealed_ = passwords_were_revealed;
void set_password_attributes_vote(
const std::pair<PasswordAttribute, bool>& vote) {
password_attributes_vote_ = vote;
#if defined(UNIT_TEST)
base::Optional<std::pair<PasswordAttribute, bool>>
get_password_attributes_vote_for_testing() const {
return password_attributes_vote_;
void set_password_length_vote(const size_t noisified_password_length) {
<< "|password_length_vote_| doesn't make sense if "
"|password_attributes_vote_| has no value.";
password_length_vote_ = noisified_password_length;
#if defined(UNIT_TEST)
size_t get_password_length_vote_for_testing() const {
<< "|password_length_vote_| doesn't make sense if "
"|password_attributes_vote_| has no value.";
return password_length_vote_;
bool operator==(const FormData& form) const;
bool operator!=(const FormData& form) const;
// Returns an identifier that is used by the refill logic. Takes the first non
// empty of these or returns an empty string:
// - Form name
// - Name for Autofill of first field
base::string16 GetIdentifierForRefill() const;
friend class AutofillMergeTest;
friend class FormStructureTest;
FRIEND_TEST_ALL_PREFIXES(AutofillDownloadTest, QueryAndUploadTest);
FRIEND_TEST_ALL_PREFIXES(FormStructureTest, FindLongestCommonPrefix);
class SectionedFieldsIndexes {
size_t LastFieldIndex() const {
if (sectioned_indexes.empty())
return (size_t)-1; // Shouldn't happen.
return sectioned_indexes.back().back();
void AddFieldIndex(const size_t index, bool is_new_section) {
if (is_new_section || Empty()) {
sectioned_indexes.push_back(std::vector<size_t>(1, index));
void WalkForwardToTheNextSection() { current_section_ptr++; }
bool IsFinished() const {
return current_section_ptr >= sectioned_indexes.size();
size_t CurrentIndex() const { return CurrentSection()[0]; }
std::vector<size_t> CurrentSection() const {
if (current_section_ptr < sectioned_indexes.size())
return sectioned_indexes[current_section_ptr];
return std::vector<size_t>(1, (size_t)-1); // To handle edge cases.
void Reset() { current_section_ptr = 0; }
bool Empty() const { return sectioned_indexes.empty(); }
// A vector of sections. Each section is a vector of some of the indexes
// that belong to the same section. The sections and indexes are sorted by
// their order of appearance on the form.
std::vector<std::vector<size_t>> sectioned_indexes;
// Points to a vector of indexes that belong to the same section.
size_t current_section_ptr = 0;
// A function to fine tune the credit cards related predictions. For example:
// lone credit card fields in an otherwise non-credit-card related form is
// unlikely to be correct, the function will override that prediction.
void RationalizeCreditCardFieldPredictions();
// The rationalization is based on the visible fields, but should be applied
// to the hidden select fields. This is because hidden 'select' fields are
// also autofilled to take care of the synthetic fields.
void ApplyRationalizationsToHiddenSelects(
size_t field_index,
ServerFieldType new_type,
// Returns true if we can replace server predictions with the heuristics one.
bool HeuristicsPredictionsAreApplicable(size_t upper_index,
size_t lower_index,
ServerFieldType first_type,
ServerFieldType second_type);
// Applies upper type to upper field, and lower type to lower field, and
// applies the rationalization also to hidden select fields if necessary.
void ApplyRationalizationsToFields(
size_t upper_index,
size_t lower_index,
ServerFieldType upper_type,
ServerFieldType lower_type,
// Returns true if the fields_[index] server type should be rationalized to
bool FieldShouldBeRationalizedToCountry(size_t index);
// Set fields_[|field_index|] to |new_type| and log this change.
void ApplyRationalizationsToFieldAndLog(
size_t field_index,
ServerFieldType new_type,
AutofillMetrics::FormInteractionsUkmLogger* form_interactions_ukm_logger);
// Two or three fields predicted as the whole address should be address lines
// 1, 2 and 3 instead.
void RationalizeAddressLineFields(
SectionedFieldsIndexes& sections_of_address_indexes,
// Rationalize state and country interdependently.
void RationalizeAddressStateCountry(
SectionedFieldsIndexes& sections_of_state_indexes,
SectionedFieldsIndexes& sections_of_country_indexes,
// Tunes the fields with identical predictions.
void RationalizeRepeatedFields(AutofillMetrics::FormInteractionsUkmLogger*);
// A helper function to review the predictions and do appropriate adjustments
// when it considers necessary.
void RationalizeFieldTypePredictions();
// Encodes information about this form and its fields into |query_form|.
void EncodeFormForQuery(
autofill::AutofillQueryContents::Form* query_form) const;
// Encodes information about this form and its fields into |upload|.
void EncodeFormForUpload(autofill::AutofillUploadContents* upload) const;
// Returns true if the form has no fields, or too many.
bool IsMalformed() const;
// Classifies each field in |fields_| into a logical section.
// Sections are identified by the heuristic that a logical section should not
// include multiple fields of the same autofill type (with some exceptions, as
// described in the implementation). Credit card fields also, have a single
// separate section from address fields.
// If |has_author_specified_sections| is true, only the second pass --
// distinguishing credit card sections from non-credit card ones -- is made.
void IdentifySections(bool has_author_specified_sections);
// Returns true if field should be skipped when talking to Autofill server.
bool ShouldSkipField(const FormFieldData& field) const;
// Further processes the extracted |fields_|.
void ProcessExtractedFields();
// Returns the longest common prefix found within |strings|. Strings below a
// threshold length are excluded when performing this check; this is needed
// because an exceptional field may be missing a prefix which is otherwise
// consistently applied--for instance, a framework may only apply a prefix
// to those fields which are bound when POSTing.
static base::string16 FindLongestCommonPrefix(
const std::vector<base::string16>& strings);
// The name of the form.
base::string16 form_name_;
// The source URL.
GURL source_url_;
// The target URL.
GURL target_url_;
// The origin of the main frame of this form.
url::Origin main_frame_origin_;
// The number of fields able to be auto-filled.
size_t autofill_count_;
// A vector of all the input fields in the form.
std::vector<std::unique_ptr<AutofillField>> fields_;
// The number of fields that are part of the form signature and that are
// included in queries to the Autofill server.
size_t active_field_count_;
// Whether the server expects us to always upload, never upload, or default
// to the stored upload rates.
UploadRequired upload_required_;
// Whether the form includes any field types explicitly specified by the site
// author, via the |autocompletetype| attribute.
bool has_author_specified_types_;
// Whether the form includes any sections explicitly specified by the site
// author, via the autocomplete attribute.
bool has_author_specified_sections_;
// Whether the form includes a field that explicitly sets it autocomplete
// type to "upi-vpa".
bool has_author_specified_upi_vpa_hint_;
// Whether the form was parsed for autocomplete attribute, thus assigning
// the real values of |has_author_specified_types_| and
// |has_author_specified_sections_|.
bool was_parsed_for_autocomplete_attributes_;
// True if the form contains at least one password field.
bool has_password_field_;
// True if the form is a <form>.
bool is_form_tag_;
// True if the form is made of unowned fields (i.e., not within a <form> tag)
// in what appears to be a checkout flow. This attribute is only calculated
// and used if features::kAutofillRestrictUnownedFieldsToFormlessCheckout is
// enabled, to prevent heuristics from running on formless non-checkout.
bool is_formless_checkout_;
// True if all form fields are password fields.
bool all_fields_are_passwords_;
// True if the form is submitted and has 2 fields: one text and one password
// field.
bool is_signin_upload_;
// The unique signature for this form, composed of the target url domain,
// the form name, and the form field names in a 64-bit hash.
FormSignature form_signature_;
// When a form is parsed on this page.
base::TimeTicks form_parsed_timestamp_;
// If phone number rationalization has been performed for a given section.
std::map<std::string, bool> phone_rationalized_;
// True iff the form is a password form and the user has seen the password
// value before accepting the prompt to save. Used for crowdsourcing.
bool passwords_were_revealed_;
// The vote about password attributes (e.g. whether the password has a numeric
// character).
base::Optional<std::pair<PasswordAttribute, bool>> password_attributes_vote_;
// Noisified password length for crowdsourcing. If |password_attributes_vote_|
// has no value, |password_length_vote_| should be ignored.
size_t password_length_vote_;
} // namespace autofill