blob: 44126646790965044911a7bb1479a79410e1850e [file] [log] [blame]
// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/common/signatures.h"
#include <string_view>
#include "base/containers/span.h"
#include "base/hash/sha1.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "build/build_config.h"
#include "components/autofill/core/common/autofill_util.h"
#include "components/autofill/core/common/form_data.h"
#include "components/autofill/core/common/form_field_data.h"
#include "url/gurl.h"
namespace autofill {
namespace {
// Returns a copy of |input| without >= 5 consecutive digits.
std::string StripDigitsIfRequired(std::string_view input) {
static constexpr auto IsDigit = base::IsAsciiDigit<char>;
std::string result;
result.reserve(input.size());
for (size_t i = 0; i < input.size();) {
// If `input[i]` is not a digit, append it to `result` and move to the next
// character.
if (!IsDigit(input[i])) {
result.push_back(input[i]);
++i;
continue;
}
// If `input[i]` is a digit, find the range of consecutive digits starting
// at `i`. If this range is shorter than 5 characters append it to `result`.
auto end_it = std::ranges::find_if_not(input.substr(i), IsDigit);
std::string_view digits = std::string_view(input.begin() + i, end_it);
DCHECK(std::ranges::all_of(digits, IsDigit));
if (digits.size() < 5)
base::StrAppend(&result, {digits});
i += digits.size();
}
return result;
}
std::string CalculateAlternativeFormSignatureBase(const FormData& form_data) {
std::string_view scheme = form_data.action().scheme();
std::string_view host = form_data.action().host();
// If target host or scheme is empty, set scheme and host of source url.
// This is done to match the Toolbar's behavior.
if (scheme.empty() || host.empty()) {
scheme = form_data.url().scheme();
host = form_data.url().host();
}
std::string form_signature_field_types;
for (const FormFieldData& field : form_data.fields()) {
switch (field.form_control_type()) {
case mojom::FormControlType::kInputCheckbox:
case mojom::FormControlType::kInputDate:
case mojom::FormControlType::kInputRadio:
break;
case mojom::FormControlType::kContentEditable:
case mojom::FormControlType::kInputEmail:
case mojom::FormControlType::kInputMonth:
case mojom::FormControlType::kInputNumber:
case mojom::FormControlType::kInputPassword:
case mojom::FormControlType::kInputSearch:
case mojom::FormControlType::kInputTelephone:
case mojom::FormControlType::kInputText:
case mojom::FormControlType::kInputUrl:
case mojom::FormControlType::kSelectOne:
case mojom::FormControlType::kTextArea:
// We use the string representation of the FormControlType because
// changing the signature algorithm is non-trivial. If and when the
// sectioning algorithm changes, we could use the raw FormControlType
// enum instead.
base::StrAppend(
&form_signature_field_types,
{"&", FormControlTypeToString(field.form_control_type())});
}
}
return base::StrCat({scheme, "://", host, form_signature_field_types});
}
template <size_t N>
uint64_t PackBytes(base::span<const uint8_t, N> bytes) {
static_assert(N <= 8u,
"Error: Can't pack more than 8 bytes into a uint64_t.");
uint64_t result = 0;
for (auto byte : bytes)
result = (result << 8) | byte;
return result;
}
} // namespace
// If a form name was set by Chrome, we should ignore it when calculating
// the form signature.
std::string GetDOMFormName(const std::string& form_name) {
#if BUILDFLAG(IS_IOS)
// In case of an empty form name, the synthetic name is created. Ignore it.
return (StartsWith(form_name, "gChrome~form~", base::CompareCase::SENSITIVE)
? std::string()
: form_name);
#else
return form_name;
#endif
}
FormSignature CalculateFormSignature(const FormData& form_data) {
const GURL& target_url = form_data.action();
const GURL& source_url = form_data.url();
std::string_view scheme = target_url.scheme();
std::string_view host = target_url.host();
// If target host or scheme is empty, set scheme and host of source url.
// This is done to match the Toolbar's behavior.
if (scheme.empty() || host.empty()) {
scheme = source_url.scheme();
host = source_url.host();
}
std::string form_signature_field_names;
for (const FormFieldData& field : form_data.fields()) {
switch (field.form_control_type()) {
case mojom::FormControlType::kInputCheckbox:
case mojom::FormControlType::kInputDate:
case mojom::FormControlType::kInputRadio:
break;
case mojom::FormControlType::kContentEditable:
case mojom::FormControlType::kInputEmail:
case mojom::FormControlType::kInputMonth:
case mojom::FormControlType::kInputNumber:
case mojom::FormControlType::kInputPassword:
case mojom::FormControlType::kInputSearch:
case mojom::FormControlType::kInputTelephone:
case mojom::FormControlType::kInputText:
case mojom::FormControlType::kInputUrl:
case mojom::FormControlType::kSelectOne:
case mojom::FormControlType::kTextArea:
base::StrAppend(
&form_signature_field_names,
{"&", StripDigitsIfRequired(base::UTF16ToUTF8(field.name()))});
}
}
std::string form_name = StripDigitsIfRequired(
GetDOMFormName(base::UTF16ToUTF8(form_data.name())));
std::string form_string = base::StrCat(
{scheme, "://", host, "&", form_name, form_signature_field_names});
return FormSignature(StrToHash64Bit(form_string));
}
FormSignature CalculateStructuralFormSignature(const FormData& form_data) {
return FormSignature(
StrToHash64Bit(CalculateAlternativeFormSignatureBase(form_data)));
}
FormSignature CalculateAlternativeFormSignature(const FormData& form_data) {
std::string form_string = CalculateAlternativeFormSignatureBase(form_data);
// Add more non-empty elements (one of path, reference, or query ordered by
// preference) for small forms with 1-2 fields in order to prevent signature
// collisions.
if (form_data.fields().size() <= 2) {
// Path piece includes the slash "/", so a non-empty path must have length
// longer than 1.
if (form_data.url().path().length() > 1) {
base::StrAppend(&form_string, {form_data.url().path()});
} else if (form_data.url().has_ref()) {
base::StrAppend(&form_string, {"#", form_data.url().ref()});
} else if (form_data.url().has_query()) {
base::StrAppend(&form_string, {"?", form_data.url().query()});
}
}
return FormSignature(StrToHash64Bit(form_string));
}
FieldSignature CalculateFieldSignatureByNameAndType(
std::u16string_view field_name,
FormControlType field_type) {
return FieldSignature(
StrToHash32Bit(base::StrCat({base::UTF16ToUTF8(field_name), "&",
FormControlTypeToString(field_type)})));
}
FieldSignature CalculateFieldSignatureForField(
const FormFieldData& field_data) {
return CalculateFieldSignatureByNameAndType(field_data.name(),
field_data.form_control_type());
}
uint64_t StrToHash64Bit(std::string_view str) {
auto bytes = base::as_byte_span(str);
const base::SHA1Digest digest = base::SHA1Hash(bytes);
return PackBytes(base::span(digest).first<8>());
}
uint32_t StrToHash32Bit(std::string_view str) {
auto bytes = base::as_byte_span(str);
const base::SHA1Digest digest = base::SHA1Hash(bytes);
return PackBytes(base::span(digest).first<4>());
}
int32_t StrToHash3Bit(std::string_view str) {
const base::SHA1Digest digest = base::SHA1Hash(base::as_byte_span(str));
// Keep only the first 3 bits of the SHA1 hash.
return static_cast<int32_t>((digest[0] >> 5) & 0x07);
}
int32_t StrToHash3Bit(std::u16string_view str) {
return StrToHash3Bit(base::UTF16ToUTF8(str));
}
int64_t HashFormSignature(FormSignature form_signature) {
return static_cast<uint64_t>(form_signature.value()) % 1021;
}
int64_t HashFieldSignature(FieldSignature field_signature) {
return static_cast<uint64_t>(field_signature.value()) % 1021;
}
} // namespace autofill