blob: 376849e083fa8e932577d90055e6e54d3997896c [file] [log] [blame]
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/randomized_encoder.h"
#include <algorithm>
#include <limits>
#include "base/feature_list.h"
#include "base/format_macros.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/sparse_histogram.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/stringprintf.h"
#include "base/strings/utf_string_conversions.h"
#include "base/unguessable_token.h"
#include "components/autofill/core/common/autofill_features.h"
#include "components/autofill/core/common/autofill_prefs.h"
#include "components/autofill/core/common/autofill_switches.h"
#include "components/autofill/core/common/signatures.h"
#include "components/prefs/pref_service.h"
#include "crypto/hkdf.h"
namespace autofill {
namespace {
const RandomizedEncoder::EncodingInfo kEncodingInfo[] = {
// One bit per byte. These all require 8 bytes to encode and have 8-bit
// strides, starting from a different initial bit offset.
{AutofillRandomizedValue_EncodingType_BIT_0, 8, 0, 8},
{AutofillRandomizedValue_EncodingType_BIT_1, 8, 1, 8},
{AutofillRandomizedValue_EncodingType_BIT_2, 8, 2, 8},
{AutofillRandomizedValue_EncodingType_BIT_3, 8, 3, 8},
{AutofillRandomizedValue_EncodingType_BIT_4, 8, 4, 8},
{AutofillRandomizedValue_EncodingType_BIT_5, 8, 5, 8},
{AutofillRandomizedValue_EncodingType_BIT_6, 8, 6, 8},
{AutofillRandomizedValue_EncodingType_BIT_7, 8, 7, 8},
// Four bits per byte. These require 32 bytes to encode and have 2-bit
// strides/
{AutofillRandomizedValue_EncodingType_EVEN_BITS, 32, 0, 2},
{AutofillRandomizedValue_EncodingType_ODD_BITS, 32, 1, 2},
// All bits per byte. This require 64 bytes to encode and has a 1-bit
// stride.
{AutofillRandomizedValue_EncodingType_ALL_BITS, 64, 0, 1},
};
// Size related constants.
constexpr size_t kBitsPerByte = 8;
constexpr size_t kEncodedChunkLengthInBytes = 64;
constexpr size_t kMaxChunks = 8;
// Find the EncodingInfo struct for |encoding_type|, else return nullptr.
const RandomizedEncoder::EncodingInfo* GetEncodingInfo(
AutofillRandomizedValue_EncodingType encoding_type) {
DCHECK(std::is_sorted(std::begin(kEncodingInfo), std::end(kEncodingInfo),
[](const RandomizedEncoder::EncodingInfo& lhs,
const RandomizedEncoder::EncodingInfo& rhs) {
return lhs.encoding_type < rhs.encoding_type;
}));
const auto* encode_info = std::lower_bound(
std::begin(kEncodingInfo), std::end(kEncodingInfo), encoding_type,
[](const RandomizedEncoder::EncodingInfo& lhs,
AutofillRandomizedValue_EncodingType encoding_type) {
return lhs.encoding_type < encoding_type;
});
return (encode_info != std::end(kEncodingInfo) &&
encode_info->encoding_type == encoding_type)
? encode_info
: nullptr;
}
// Get the |i|-th bit of |s| where |i| counts up from the 0-bit of the first
// character in |s|. It is expected that the caller guarantees that |i| is a
// valid bit-offset into |s|
inline uint8_t GetBit(base::StringPiece s, size_t i) {
DCHECK_LT(i / kBitsPerByte, s.length());
return static_cast<bool>((s[i / kBitsPerByte]) & (1 << (i % kBitsPerByte)));
}
// Set the |i|-th bit of |s| where |i| counts up from the 0-bit of the first
// character in |s|. It is expected that the caller guarantees that |i| is a
// valid bit-offset into |s|.
inline void SetBit(size_t i, uint8_t bit_value, std::string* s) {
DCHECK(bit_value == 0u || bit_value == 1u);
DCHECK(s);
DCHECK_LT(i / kBitsPerByte, s->length());
// Clear the target bit value.
(*s)[i / kBitsPerByte] &= ~(1 << (i % kBitsPerByte));
// Set the target bit to the input bit-value.
(*s)[i / kBitsPerByte] |= (bit_value << (i % kBitsPerByte));
}
// Returns a pseudo-random value of length |encoding_length_in_bytes| that is
// derived from |secret|, |purpose|, |form_signature|, |field_signature| and
// |data_type|.
std::string GetPseudoRandomBits(base::StringPiece secret,
base::StringPiece purpose,
FormSignature form_signature,
FieldSignature field_signature,
base::StringPiece data_type,
int encoding_length_in_bytes) {
// The purpose and data_type strings are expect to be small semantic
// identifiers: "noise", "coins", "css_class", "html-name", "html_id", etc.
int purpose_length = base::checked_cast<int>(purpose.length());
int data_type_length = base::checked_cast<int>(data_type.length());
// Join the descriptive information about the encoding about to be performed.
std::string info = base::StringPrintf(
"%d:%.*s;%08" PRIx64 ";%08" PRIx64 ";%d:%.*s", purpose_length,
purpose_length, purpose.data(), form_signature.value(),
static_cast<uint64_t>(field_signature.value()), data_type_length,
data_type_length, data_type.data());
DVLOG(1) << "Generating pseudo-random bits from " << info;
// Generate the pseudo-random bits.
return crypto::HkdfSha256(secret, {}, info, encoding_length_in_bytes);
}
// Returns the "random" encoding type to use for encoding.
AutofillRandomizedValue_EncodingType GetEncodingType(const std::string& seed) {
DCHECK(!seed.empty());
// "Randomly" select one of eligible encodings. This "random" selection is
// persistent in that it is based directly on the persistent seed.
const uint8_t rand_byte = static_cast<uint8_t>(seed.front());
// Send either the EVEN_BITS or ODD_BITS.
const AutofillRandomizedValue_EncodingType encoding_type =
rand_byte % 2 ? AutofillRandomizedValue_EncodingType_ODD_BITS
: AutofillRandomizedValue_EncodingType_EVEN_BITS;
DCHECK_NE(encoding_type,
AutofillRandomizedValue_EncodingType_UNSPECIFIED_ENCODING_TYPE);
return encoding_type;
}
// Returns the "random" seed to use for encoding.
std::string GetEncodingSeed(PrefService* pref_service) {
// Get the persistent seed to use for the randomization.
std::string s = pref_service->GetString(prefs::kAutofillUploadEncodingSeed);
if (s.empty()) {
s = base::UnguessableToken::Create().ToString();
pref_service->SetString(prefs::kAutofillUploadEncodingSeed, s);
}
return s;
}
} // namespace
const char RandomizedEncoder::FORM_ID[] = "form-id";
const char RandomizedEncoder::FORM_NAME[] = "form-name";
const char RandomizedEncoder::FORM_ACTION[] = "form-action";
const char RandomizedEncoder::FORM_URL[] = "form-url";
const char RandomizedEncoder::FORM_CSS_CLASS[] = "form-css-class";
const char RandomizedEncoder::FORM_BUTTON_TITLES[] = "button-titles";
const char RandomizedEncoder::FIELD_ID[] = "field-id";
const char RandomizedEncoder::FIELD_NAME[] = "field-name";
const char RandomizedEncoder::FIELD_CONTROL_TYPE[] = "field-control-type";
const char RandomizedEncoder::FIELD_LABEL[] = "field-label";
const char RandomizedEncoder::FIELD_ARIA_LABEL[] = "field-aria-label";
const char RandomizedEncoder::FIELD_ARIA_DESCRIPTION[] =
"field-aria-description";
const char RandomizedEncoder::FIELD_CSS_CLASS[] = "field-css-classes";
const char RandomizedEncoder::FIELD_PLACEHOLDER[] = "field-placeholder";
const char RandomizedEncoder::FIELD_INITIAL_VALUE_HASH[] =
"field-initial-hash-value";
const char RandomizedEncoder::FIELD_AUTOCOMPLETE[] = "field-autocomplete";
// Copy of components/unified_consent/pref_names.cc
// We could not use the constant from components/unified_constants because of a
// circular dependency.
// TODO(https://crbug.com/831123): resolve circular dependency and remove
// hardcoded constant
const char RandomizedEncoder::kUrlKeyedAnonymizedDataCollectionEnabled[] =
"url_keyed_anonymized_data_collection.enabled";
// static
std::unique_ptr<RandomizedEncoder> RandomizedEncoder::Create(
PrefService* pref_service) {
// Early abort if metadata uploads are not enabled.
if (!pref_service) {
return nullptr;
}
// Return the randomized encoder. Note that for a given client, the seed and
// encoding type are constant via prefs/config.
const auto seed = GetEncodingSeed(pref_service);
const auto encoding_type = GetEncodingType(seed);
bool anonymous_url_collection_is_enabled = pref_service->GetBoolean(
RandomizedEncoder::kUrlKeyedAnonymizedDataCollectionEnabled);
return std::make_unique<RandomizedEncoder>(
std::move(seed), encoding_type, anonymous_url_collection_is_enabled);
}
RandomizedEncoder::RandomizedEncoder(
std::string seed,
AutofillRandomizedValue_EncodingType encoding_type,
bool anonymous_url_collection_is_enabled)
: seed_(std::move(seed)),
encoding_info_(GetEncodingInfo(encoding_type)),
anonymous_url_collection_is_enabled_(
anonymous_url_collection_is_enabled) {
DCHECK(encoding_info_ != nullptr);
}
std::string RandomizedEncoder::Encode(FormSignature form_signature,
FieldSignature field_signature,
base::StringPiece data_type,
base::StringPiece data_value) const {
if (!encoding_info_) {
NOTREACHED();
return std::string();
}
size_t chunk_count = GetChunkCount(data_value, data_type);
size_t padded_input_length_in_bytes =
chunk_count * kEncodedChunkLengthInBytes;
size_t padded_input_length_in_bits =
padded_input_length_in_bytes * kBitsPerByte;
std::string coins = GetCoins(form_signature, field_signature, data_type,
padded_input_length_in_bytes);
std::string noise = GetNoise(form_signature, field_signature, data_type,
padded_input_length_in_bytes);
DCHECK_EQ(coins.length() % kEncodedChunkLengthInBytes, 0u);
DCHECK_EQ(noise.length() % kEncodedChunkLengthInBytes, 0u);
DCHECK_EQ(coins.length(), padded_input_length_in_bytes);
DCHECK_EQ(noise.length(), padded_input_length_in_bytes);
// If we're encoding the bits encoding we can simply repurpose the noise
// vector and use the coins vector merge in the selected data value bits.
// For each bit, the encoded value is the true value if the coin-toss is TRUE
// or the noise value if the coin-toss is FALSE. All the bits in a given byte
// can be computed in parallel. The trailing bytes are all noise.
if (encoding_info_->encoding_type ==
AutofillRandomizedValue_EncodingType_ALL_BITS) {
std::string all_bits = std::move(noise);
const size_t value_length =
std::min(data_value.length(), padded_input_length_in_bytes);
for (size_t i = 0; i < value_length; ++i) {
// Initially this byte is all noise, we're replacing the bits for which
// the coin toss is 1 with the corresponding data_value bits, and keeping
// the noise bits where the coin toss is 0.
all_bits[i] = (data_value[i] & coins[i]) | (all_bits[i] & ~coins[i]);
}
return all_bits;
}
// Otherwise, pack the select the subset of bits into an output buffer.
// This encodes every |encoding_info_->bit_stride| bit starting from
// |encoding_info_->bit_offset|.
//
// For each bit, the encoded value is the true value if the coin-toss is TRUE
// or the noise value if the coin-toss is FALSE. All the bits in a given byte
// can be computed in parallel. The trailing bytes are all noise.
const size_t output_length_in_bytes =
encoding_info_->chunk_length_in_bytes * chunk_count;
std::string output(output_length_in_bytes, 0);
const size_t value_length_in_bits = data_value.length() * kBitsPerByte;
size_t dst_offset = 0;
size_t src_offset = encoding_info_->bit_offset;
while (src_offset < padded_input_length_in_bits) {
uint8_t output_bit = GetBit(noise, src_offset);
if (src_offset < value_length_in_bits) {
const uint8_t coin_bit = GetBit(coins, src_offset);
const uint8_t data_bit = GetBit(data_value, src_offset);
output_bit = ((coin_bit & data_bit) | (~coin_bit & output_bit));
}
SetBit(dst_offset, output_bit, &output);
src_offset += encoding_info_->bit_stride;
dst_offset += 1;
}
DCHECK_EQ(dst_offset,
encoding_info_->chunk_length_in_bytes * chunk_count * kBitsPerByte);
return output;
}
std::string RandomizedEncoder::EncodeForTesting(
FormSignature form_signature,
FieldSignature field_signature,
base::StringPiece data_type,
base::StringPiece16 data_value) const {
return Encode(form_signature, field_signature, data_type,
base::UTF16ToUTF8(data_value));
}
std::string RandomizedEncoder::GetCoins(FormSignature form_signature,
FieldSignature field_signature,
base::StringPiece data_type,
int encoding_length_in_bytes) const {
return GetPseudoRandomBits(seed_, "coins", form_signature, field_signature,
data_type, encoding_length_in_bytes);
}
// Get the pseudo-random string to use at the noise bit-field.
std::string RandomizedEncoder::GetNoise(FormSignature form_signature,
FieldSignature field_signature,
base::StringPiece data_type,
int encoding_length_in_bytes) const {
return GetPseudoRandomBits(seed_, "noise", form_signature, field_signature,
data_type, encoding_length_in_bytes);
}
int RandomizedEncoder::GetChunkCount(base::StringPiece data_value,
base::StringPiece data_type) const {
if (data_type == RandomizedEncoder::FORM_URL) {
// ceil(data_value.length / kEncodedChunkLengthInBytes).
int chunks = (data_value.length() + kEncodedChunkLengthInBytes - 1) /
kEncodedChunkLengthInBytes;
return std::min(chunks, static_cast<int>(kMaxChunks));
} else {
return 1;
}
}
} // namespace autofill