blob: 343504b5d1b2f52ca2f387c619940a9b37aa1e99 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/phone_field.h"
#include <string.h>
#include <memory>
#include <utility>
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/string16.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/autofill/core/browser/autofill_field.h"
#include "components/autofill/core/browser/autofill_scanner.h"
#include "components/autofill/core/common/autofill_regex_constants.h"
namespace autofill {
namespace {
// This string includes all area code separators, including NoText.
std::string GetAreaRegex() {
std::string area_code = kAreaCodeRe;
area_code.append("|"); // Regexp separator.
area_code.append(kAreaCodeNotextRe);
return area_code;
}
} // namespace
PhoneField::~PhoneField() {}
// Phone field grammars - first matched grammar will be parsed. Grammars are
// separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
// parsed separately unless they are necessary parts of the match.
// The following notation is used to describe the patterns:
// <cc> - country code field.
// <ac> - area code field.
// <phone> - phone or prefix.
// <suffix> - suffix.
// <ext> - extension.
// :N means field is limited to N characters, otherwise it is unlimited.
// (pattern <field>)? means pattern is optional and matched separately.
const PhoneField::Parser PhoneField::kPhoneFieldGrammars[] = {
// Country code: <cc> Area Code: <ac> Phone: <phone> (- <suffix>
// (Ext: <ext>)?)?
{REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0},
{REGEX_AREA, FIELD_AREA_CODE, 0},
{REGEX_PHONE, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// \( <ac> \) <phone>:3 <suffix>:4 (Ext: <ext>)?
{REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
{REGEX_PHONE, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> <ac>:3 - <phone>:3 - <suffix>:4 (Ext: <ext>)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
{REGEX_PHONE, FIELD_AREA_CODE, 3},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
{REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc>:3 <ac>:3 <phone>:3 <suffix>:4 (Ext: <ext>)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 3},
{REGEX_PHONE, FIELD_AREA_CODE, 3},
{REGEX_PHONE, FIELD_PHONE, 3},
{REGEX_PHONE, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Area Code: <ac> Phone: <phone> (- <suffix> (Ext: <ext>)?)?
{REGEX_AREA, FIELD_AREA_CODE, 0},
{REGEX_PHONE, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> <phone>:3 <suffix>:4 (Ext: <ext>)?
{REGEX_PHONE, FIELD_AREA_CODE, 0},
{REGEX_PHONE, FIELD_PHONE, 3},
{REGEX_PHONE, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
{REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: \( <ac> \) <phone> (- <suffix> (Ext: <ext>)?)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
{REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> - <ac> - <phone> - <suffix> (Ext: <ext>)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0},
{REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Area code: <ac>:3 Prefix: <prefix>:3 Suffix: <suffix>:4 (Ext: <ext>)?
{REGEX_AREA, FIELD_AREA_CODE, 3},
{REGEX_PREFIX, FIELD_PHONE, 3},
{REGEX_SUFFIX, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> Prefix: <phone> Suffix: <suffix> (Ext: <ext>)?
{REGEX_PHONE, FIELD_AREA_CODE, 0},
{REGEX_PREFIX, FIELD_PHONE, 0},
{REGEX_SUFFIX, FIELD_SUFFIX, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> - <phone>:3 - <suffix>:4 (Ext: <ext>)?
{REGEX_PHONE, FIELD_AREA_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3},
{REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc> - <ac> - <phone> (Ext: <ext>)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 0},
{REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0},
{REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <ac> - <phone> (Ext: <ext>)?
{REGEX_AREA, FIELD_AREA_CODE, 0},
{REGEX_PHONE, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <cc>:3 - <phone>:10 (Ext: <ext>)?
{REGEX_PHONE, FIELD_COUNTRY_CODE, 3},
{REGEX_PHONE, FIELD_PHONE, 14},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Ext: <ext>
{REGEX_EXTENSION, FIELD_EXTENSION, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
// Phone: <phone> (Ext: <ext>)?
{REGEX_PHONE, FIELD_PHONE, 0},
{REGEX_SEPARATOR, FIELD_NONE, 0},
};
// static
std::unique_ptr<FormField> PhoneField::Parse(AutofillScanner* scanner) {
if (scanner->IsEnd())
return nullptr;
size_t start_cursor = scanner->SaveCursor();
// The form owns the following variables, so they should not be deleted.
AutofillField* parsed_fields[FIELD_MAX];
for (size_t i = 0; i < base::size(kPhoneFieldGrammars); ++i) {
memset(parsed_fields, 0, sizeof(parsed_fields));
size_t saved_cursor = scanner->SaveCursor();
// Attempt to parse according to the next grammar.
for (; i < base::size(kPhoneFieldGrammars) &&
kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR;
++i) {
if (!ParsePhoneField(scanner, GetRegExp(kPhoneFieldGrammars[i].regex),
&parsed_fields[kPhoneFieldGrammars[i].phone_part]))
break;
if (kPhoneFieldGrammars[i].max_size &&
(!parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length ||
kPhoneFieldGrammars[i].max_size <
parsed_fields[kPhoneFieldGrammars[i].phone_part]->max_length)) {
break;
}
}
if (i >= base::size(kPhoneFieldGrammars)) {
scanner->RewindTo(saved_cursor);
return nullptr; // Parsing failed.
}
if (kPhoneFieldGrammars[i].regex == REGEX_SEPARATOR)
break; // Parsing succeeded.
// Proceed to the next grammar.
do {
++i;
} while (i < base::size(kPhoneFieldGrammars) &&
kPhoneFieldGrammars[i].regex != REGEX_SEPARATOR);
scanner->RewindTo(saved_cursor);
if (i + 1 == base::size(kPhoneFieldGrammars)) {
return nullptr; // Tried through all the possibilities - did not match.
}
}
if (!parsed_fields[FIELD_PHONE]) {
scanner->RewindTo(start_cursor);
return nullptr;
}
std::unique_ptr<PhoneField> phone_field(new PhoneField);
for (int i = 0; i < FIELD_MAX; ++i)
phone_field->parsed_phone_fields_[i] = parsed_fields[i];
// Look for optional fields.
// Look for a third text box.
if (!phone_field->parsed_phone_fields_[FIELD_SUFFIX]) {
if (!ParsePhoneField(scanner, kPhoneSuffixRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX])) {
ParsePhoneField(scanner, kPhoneSuffixSeparatorRe,
&phone_field->parsed_phone_fields_[FIELD_SUFFIX]);
}
}
// Now look for an extension.
// The extension is not actually used, so this just eats the field so other
// parsers do not mistaken it for something else.
ParsePhoneField(scanner, kPhoneExtensionRe,
&phone_field->parsed_phone_fields_[FIELD_EXTENSION]);
return std::move(phone_field);
}
void PhoneField::AddClassifications(
FieldCandidatesMap* field_candidates) const {
DCHECK(parsed_phone_fields_[FIELD_PHONE]); // Phone was correctly parsed.
if ((parsed_phone_fields_[FIELD_COUNTRY_CODE]) ||
(parsed_phone_fields_[FIELD_AREA_CODE]) ||
(parsed_phone_fields_[FIELD_SUFFIX])) {
if (parsed_phone_fields_[FIELD_COUNTRY_CODE]) {
AddClassification(parsed_phone_fields_[FIELD_COUNTRY_CODE],
PHONE_HOME_COUNTRY_CODE, kBasePhoneParserScore,
field_candidates);
}
ServerFieldType field_number_type = PHONE_HOME_NUMBER;
if (parsed_phone_fields_[FIELD_AREA_CODE]) {
AddClassification(parsed_phone_fields_[FIELD_AREA_CODE],
PHONE_HOME_CITY_CODE, kBasePhoneParserScore,
field_candidates);
} else if (parsed_phone_fields_[FIELD_COUNTRY_CODE]) {
// Only if we can find country code without city code, it means the phone
// number include city code.
field_number_type = PHONE_HOME_CITY_AND_NUMBER;
}
// We tag the prefix as PHONE_HOME_NUMBER, then when filling the form
// we fill only the prefix depending on the size of the input field.
AddClassification(parsed_phone_fields_[FIELD_PHONE], field_number_type,
kBasePhoneParserScore, field_candidates);
// We tag the suffix as PHONE_HOME_NUMBER, then when filling the form
// we fill only the suffix depending on the size of the input field.
if (parsed_phone_fields_[FIELD_SUFFIX]) {
AddClassification(parsed_phone_fields_[FIELD_SUFFIX], PHONE_HOME_NUMBER,
kBasePhoneParserScore, field_candidates);
}
} else {
AddClassification(parsed_phone_fields_[FIELD_PHONE],
PHONE_HOME_WHOLE_NUMBER, kBasePhoneParserScore,
field_candidates);
}
if (parsed_phone_fields_[FIELD_EXTENSION]) {
AddClassification(parsed_phone_fields_[FIELD_EXTENSION],
PHONE_HOME_EXTENSION, kBasePhoneParserScore,
field_candidates);
}
}
PhoneField::PhoneField() {
memset(parsed_phone_fields_, 0, sizeof(parsed_phone_fields_));
}
// static
std::string PhoneField::GetRegExp(RegexType regex_id) {
switch (regex_id) {
case REGEX_COUNTRY:
return kCountryCodeRe;
case REGEX_AREA:
return GetAreaRegex();
case REGEX_AREA_NOTEXT:
return kAreaCodeNotextRe;
case REGEX_PHONE:
return kPhoneRe;
case REGEX_PREFIX_SEPARATOR:
return kPhonePrefixSeparatorRe;
case REGEX_PREFIX:
return kPhonePrefixRe;
case REGEX_SUFFIX_SEPARATOR:
return kPhoneSuffixSeparatorRe;
case REGEX_SUFFIX:
return kPhoneSuffixRe;
case REGEX_EXTENSION:
return kPhoneExtensionRe;
default:
NOTREACHED();
break;
}
return std::string();
}
// static
bool PhoneField::ParsePhoneField(AutofillScanner* scanner,
const std::string& regex,
AutofillField** field) {
return ParseFieldSpecifics(scanner, base::UTF8ToUTF16(regex),
MATCH_DEFAULT | MATCH_TELEPHONE | MATCH_NUMBER,
field);
}
} // namespace autofill