blob: eb0cbe389dc9bcc353da4c5d246645a3c77413ef [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/form_parsing/form_field.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
#include <memory>
#include <string>
#include <utility>
#include "base/feature_list.h"
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/autofill_regexes.h"
#include "components/autofill/core/browser/autofill_type.h"
#include "components/autofill/core/browser/form_parsing/address_field.h"
#include "components/autofill/core/browser/form_parsing/autofill_scanner.h"
#include "components/autofill/core/browser/form_parsing/credit_card_field.h"
#include "components/autofill/core/browser/form_parsing/email_field.h"
#include "components/autofill/core/browser/form_parsing/merchant_promo_code_field.h"
#include "components/autofill/core/browser/form_parsing/name_field.h"
#include "components/autofill/core/browser/form_parsing/phone_field.h"
#include "components/autofill/core/browser/form_parsing/price_field.h"
#include "components/autofill/core/browser/form_parsing/search_field.h"
#include "components/autofill/core/browser/form_parsing/travel_field.h"
#include "components/autofill/core/browser/form_structure.h"
#include "components/autofill/core/browser/logging/log_manager.h"
#include "components/autofill/core/common/autofill_constants.h"
#include "components/autofill/core/common/autofill_features.h"
#include "components/autofill/core/common/autofill_internals/log_message.h"
#include "components/autofill/core/common/autofill_internals/logging_scope.h"
#include "components/autofill/core/common/autofill_util.h"
namespace autofill {
// There's an implicit precedence determined by the values assigned here. Email
// is currently the most important followed by Phone, Travel, Address,
// Credit Card, Price, Name, Merchant promo code, and Search.
const float FormField::kBaseEmailParserScore = 1.4f;
const float FormField::kBasePhoneParserScore = 1.3f;
const float FormField::kBaseTravelParserScore = 1.2f;
const float FormField::kBaseAddressParserScore = 1.1f;
const float FormField::kBaseCreditCardParserScore = 1.0f;
const float FormField::kBasePriceParserScore = 0.95f;
const float FormField::kBaseNameParserScore = 0.9f;
const float FormField::kBaseMerchantPromoCodeParserScore = 0.85f;
const float FormField::kBaseSearchParserScore = 0.8f;
// static
FieldCandidatesMap FormField::ParseFormFields(
const std::vector<std::unique_ptr<AutofillField>>& fields,
const LanguageCode& page_language,
bool is_form_tag,
LogManager* log_manager) {
std::vector<AutofillField*> processed_fields = RemoveCheckableFields(fields);
FieldCandidatesMap field_candidates;
// Email pass.
ParseFormFieldsPass(EmailField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
const size_t email_count = field_candidates.size();
// Merchant promo code pass.
ParseFormFieldsPass(MerchantPromoCodeField::Parse, processed_fields,
&field_candidates, page_language, log_manager);
const size_t promo_code_count = field_candidates.size() - email_count;
// Phone pass.
ParseFormFieldsPass(PhoneField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
// Travel pass.
ParseFormFieldsPass(TravelField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
// Address pass.
ParseFormFieldsPass(autofill::AddressField::Parse, processed_fields,
&field_candidates, page_language, log_manager);
// Credit card pass.
ParseFormFieldsPass(CreditCardField::Parse, processed_fields,
&field_candidates, page_language, log_manager);
// Price pass.
ParseFormFieldsPass(PriceField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
// Name pass.
ParseFormFieldsPass(NameField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
// Search pass.
ParseFormFieldsPass(SearchField::Parse, processed_fields, &field_candidates,
page_language, log_manager);
size_t fillable_fields = 0;
if (base::FeatureList::IsEnabled(features::kAutofillFixFillableFieldTypes)) {
for (const auto& candidate : field_candidates) {
if (IsFillableFieldType(candidate.second.BestHeuristicType()))
++fillable_fields;
}
} else {
fillable_fields = field_candidates.size();
}
// Do not autofill a form if there aren't enough fields. Otherwise, it is
// very easy to have false positives. See http://crbug.com/447332
// For <form> tags, make an exception for email fields, which are commonly
// the only recognized field on account registration sites. Also make an
// exception for promo code fields, which are often a single field in its own
// form.
if (fillable_fields < kMinRequiredFieldsForHeuristics) {
if ((is_form_tag && email_count > 0) || promo_code_count > 0) {
base::EraseIf(field_candidates, [&](const auto& candidate) {
return !(candidate.second.BestHeuristicType() == EMAIL_ADDRESS ||
candidate.second.BestHeuristicType() == MERCHANT_PROMO_CODE);
});
} else {
if (log_manager) {
LogBuffer table_rows;
for (const auto& field : fields) {
table_rows << Tr{} << "Field:" << *field;
}
for (const auto& candidate : field_candidates) {
LogBuffer name;
name << "Type candidate for frame and renderer ID: "
<< candidate.first;
LogBuffer description;
ServerFieldType field_type = candidate.second.BestHeuristicType();
description << "BestHeuristicType: "
<< AutofillType::ServerFieldTypeToString(field_type)
<< ", is fillable: " << IsFillableFieldType(field_type);
table_rows << Tr{} << std::move(name) << std::move(description);
}
log_manager->Log()
<< LoggingScope::kParsing
<< LogMessage::kLocalHeuristicDidNotFindEnoughFillableFields
<< Tag{"table"} << Attrib{"class", "form"} << std::move(table_rows)
<< CTag{"table"};
}
field_candidates.clear();
}
}
return field_candidates;
}
FieldCandidatesMap FormField::ParseFormFieldsForPromoCodes(
const std::vector<std::unique_ptr<AutofillField>>& fields,
const LanguageCode& page_language,
bool is_form_tag,
LogManager* log_manager) {
std::vector<AutofillField*> processed_fields = RemoveCheckableFields(fields);
FieldCandidatesMap field_candidates;
// Merchant promo code pass.
ParseFormFieldsPass(MerchantPromoCodeField::Parse, processed_fields,
&field_candidates, page_language, log_manager);
return field_candidates;
}
// static
bool FormField::ParseFieldWithLegacyPattern(AutofillScanner* scanner,
base::StringPiece16 pattern,
AutofillField** match,
const RegExLogging& logging) {
return ParseFieldSpecificsWithLegacyPattern(scanner, pattern, MATCH_DEFAULT,
match, logging);
}
bool FormField::ParseField(AutofillScanner* scanner,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging) {
return ParseFieldSpecifics(scanner, patterns, match, logging);
}
bool FormField::ParseField(AutofillScanner* scanner,
base::StringPiece16 pattern,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging) {
return base::FeatureList::IsEnabled(features::kAutofillParsingPatternProvider)
? ParseField(scanner, patterns, match, logging)
: ParseFieldWithLegacyPattern(scanner, pattern, match, logging);
}
bool FormField::ParseFieldSpecificsWithLegacyPattern(
AutofillScanner* scanner,
base::StringPiece16 pattern,
int match_field_attributes,
int match_field_input_types,
AutofillField** match,
const RegExLogging& logging) {
if (scanner->IsEnd())
return false;
const AutofillField* field = scanner->Cursor();
if (!MatchesFormControlType(field->form_control_type,
match_field_input_types))
return false;
return MatchAndAdvance(scanner, pattern, match_field_attributes,
match_field_input_types, match, logging);
}
bool FormField::ParseFieldSpecifics(
AutofillScanner* scanner,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging) {
if (scanner->IsEnd())
return false;
const AutofillField* field = scanner->Cursor();
for (const auto& pattern : patterns) {
if (!MatchesFormControlType(field->form_control_type,
pattern.match_field_input_types)) {
continue;
}
// For each of the two match field attributes, MATCH_NAME and MATCH_LABEL,
// that are active for the current pattern, test if it matches the negative
// pattern. If yes, remove it from the attributes that are considered for
// positive matching.
uint8_t match_field_attributes = pattern.match_field_attributes;
if (!pattern.negative_pattern.empty()) {
for (auto attribute : {MATCH_LABEL, MATCH_NAME}) {
if ((match_field_attributes & attribute) &&
FormField::Match(field, pattern.negative_pattern, attribute,
pattern.match_field_input_types, logging)) {
match_field_attributes &= ~attribute;
}
}
}
if (match_field_attributes == 0)
continue;
// Apply the positive matching against all remaining match field attributes.
if (!pattern.positive_pattern.empty() &&
MatchAndAdvance(scanner, pattern.positive_pattern,
match_field_attributes, pattern.match_field_input_types,
match, logging)) {
return true;
}
}
return false;
}
// static
bool FormField::ParseFieldSpecificsWithLegacyPattern(
AutofillScanner* scanner,
base::StringPiece16 pattern,
int match_type,
AutofillField** match,
const RegExLogging& logging) {
int match_field_attributes = match_type & 0b11;
int match_field_types = match_type & ~0b11;
return ParseFieldSpecificsWithLegacyPattern(
scanner, pattern, match_field_attributes, match_field_types, match,
logging);
}
bool FormField::ParseFieldSpecifics(
AutofillScanner* scanner,
base::StringPiece16 pattern,
int match_type,
const std::vector<MatchingPattern>& patterns,
AutofillField** match,
const RegExLogging& logging,
MatchFieldBitmasks match_field_bitmasks) {
if (base::FeatureList::IsEnabled(features::kAutofillParsingPatternProvider)) {
// TODO(crbug/1142936): This hack is to allow
// AddressField::ParseNameAndLabelSeparately().
if (match_field_bitmasks.restrict_attributes != ~0 ||
match_field_bitmasks.augment_types != 0) {
std::vector<MatchingPattern> modified_patterns = patterns;
for (MatchingPattern& mp : modified_patterns) {
mp.match_field_attributes &= match_field_bitmasks.restrict_attributes;
mp.match_field_input_types |= match_field_bitmasks.augment_types;
}
return ParseFieldSpecifics(scanner, modified_patterns, match, logging);
}
return ParseFieldSpecifics(scanner, patterns, match, logging);
} else {
return ParseFieldSpecificsWithLegacyPattern(scanner, pattern, match_type,
match, logging);
}
}
// static
bool FormField::ParseEmptyLabel(AutofillScanner* scanner,
AutofillField** match) {
return ParseFieldSpecificsWithLegacyPattern(
scanner, u"^$", MATCH_LABEL | MATCH_ALL_INPUTS, match);
}
// static
void FormField::AddClassification(const AutofillField* field,
ServerFieldType type,
float score,
FieldCandidatesMap* field_candidates) {
// Several fields are optional.
if (field == nullptr)
return;
FieldCandidates& candidates = (*field_candidates)[field->global_id()];
candidates.AddFieldCandidate(type, score);
}
// static
std::vector<AutofillField*> FormField::RemoveCheckableFields(
const std::vector<std::unique_ptr<AutofillField>>& fields) {
// Set up a working copy of the fields to be processed.
std::vector<AutofillField*> processed_fields;
for (const auto& field : fields) {
// Ignore checkable fields as they interfere with parsers assuming context.
// Eg., while parsing address, "Is PO box" checkbox after ADDRESS_LINE1
// interferes with correctly understanding ADDRESS_LINE2.
// Ignore fields marked as presentational, unless for 'select' fields (for
// synthetic fields.)
if (IsCheckable(field->check_status) ||
(field->role == FormFieldData::RoleAttribute::kPresentation &&
field->form_control_type != "select-one")) {
continue;
}
processed_fields.push_back(field.get());
}
return processed_fields;
}
bool FormField::MatchAndAdvance(AutofillScanner* scanner,
base::StringPiece16 pattern,
int match_field_attributes,
int match_field_input_types,
AutofillField** match,
const RegExLogging& logging) {
AutofillField* field = scanner->Cursor();
if (FormField::Match(field, pattern, match_field_attributes,
match_field_input_types, logging)) {
if (match)
*match = field;
scanner->Advance();
return true;
}
return false;
}
// static
bool FormField::MatchAndAdvance(AutofillScanner* scanner,
base::StringPiece16 pattern,
int match_type,
AutofillField** match,
const RegExLogging& logging) {
int match_field_attributes = match_type & 0b11;
int match_field_types = match_type & ~0b11;
return MatchAndAdvance(scanner, pattern, match_field_attributes,
match_field_types, match, logging);
}
bool FormField::Match(const AutofillField* field,
base::StringPiece16 pattern,
int match_field_attributes,
int match_field_input_types,
const RegExLogging& logging) {
bool found_match = false;
base::StringPiece match_type_string;
base::StringPiece16 value;
std::u16string match;
// TODO(crbug/1165780): Remove once shared labels are launched.
const std::u16string& label =
base::FeatureList::IsEnabled(
features::kAutofillEnableSupportForParsingWithSharedLabels)
? field->parseable_label()
: field->label;
const std::u16string& name = field->parseable_name();
if ((match_field_attributes & MATCH_LABEL) &&
MatchesPattern(label, pattern, &match)) {
found_match = true;
match_type_string = "Match in label";
value = label;
} else if ((match_field_attributes & MATCH_NAME) &&
MatchesPattern(name, pattern, &match)) {
found_match = true;
match_type_string = "Match in name";
value = name;
}
if (found_match && logging.log_manager) {
LogBuffer table_rows;
table_rows << Tr{} << "Match type:" << match_type_string;
table_rows << Tr{} << "RegEx:" << logging.regex_name;
table_rows << Tr{} << "Value: " << HighlightValue(value, match);
// The matched substring is reported once more as the highlighting is not
// particularly copy&paste friendly.
table_rows << Tr{} << "Matched substring: " << match;
logging.log_manager->Log()
<< LoggingScope::kParsing << LogMessage::kLocalHeuristicRegExMatched
<< Tag{"table"} << std::move(table_rows) << CTag{"table"};
}
return found_match;
}
// static
bool FormField::Match(const AutofillField* field,
base::StringPiece16 pattern,
int match_type,
const RegExLogging& logging) {
int match_field_attributes = match_type & 0b11;
int match_field_types = match_type & ~0b11;
return Match(field, pattern, match_field_attributes, match_field_types,
logging);
}
// static
void FormField::ParseFormFieldsPass(ParseFunction parse,
const std::vector<AutofillField*>& fields,
FieldCandidatesMap* field_candidates,
const LanguageCode& page_language,
LogManager* log_manager) {
AutofillScanner scanner(fields);
while (!scanner.IsEnd()) {
std::unique_ptr<FormField> form_field =
parse(&scanner, page_language, log_manager);
if (form_field == nullptr) {
scanner.Advance();
} else {
// Add entries into |field_candidates| for each field type found in
// |fields|.
form_field->AddClassifications(field_candidates);
}
}
}
bool FormField::MatchesFormControlType(const std::string& type,
int match_type) {
if ((match_type & MATCH_TEXT) && type == "text")
return true;
if ((match_type & MATCH_EMAIL) && type == "email")
return true;
if ((match_type & MATCH_TELEPHONE) && type == "tel")
return true;
if ((match_type & MATCH_SELECT) && type == "select-one")
return true;
if ((match_type & MATCH_TEXT_AREA) && type == "textarea")
return true;
if ((match_type & MATCH_PASSWORD) && type == "password")
return true;
if ((match_type & MATCH_NUMBER) && type == "number")
return true;
if ((match_type & MATCH_SEARCH) && type == "search")
return true;
return false;
}
} // namespace autofill