blob: 0d96e778604fd7fbfe07a6d1b9f9a37ff4dbcbe3 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/browsing_topics/browsing_topics_state.h"
#include "base/base64.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/json/json_file_value_serializer.h"
#include "base/json/json_writer.h"
#include "base/json/values_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/string_number_conversions.h"
#include "base/task/task_traits.h"
#include "base/task/thread_pool.h"
#include "components/browsing_topics/common/common_types.h"
#include "components/browsing_topics/util.h"
#include "third_party/blink/public/common/features.h"
namespace browsing_topics {
namespace {
// How often the file is saved at most.
const base::TimeDelta kSaveDelay = base::Milliseconds(2500);
const char kEpochsNameKey[] = "epochs";
const char kNextScheduledCalculationTimeNameKey[] =
"next_scheduled_calculation_time";
const char kHexEncodedHmacKeyNameKey[] = "hex_encoded_hmac_key";
// `config_version` is a deprecated key. Do not reuse.
std::unique_ptr<BrowsingTopicsState::LoadResult> LoadFileOnBackendTaskRunner(
const base::FilePath& file_path) {
bool file_exists = base::PathExists(file_path);
if (!file_exists) {
return std::make_unique<BrowsingTopicsState::LoadResult>(
/*file_exists=*/false, nullptr);
}
JSONFileValueDeserializer deserializer(file_path);
std::unique_ptr<base::Value> value = deserializer.Deserialize(
/*error_code=*/nullptr,
/*error_message=*/nullptr);
return std::make_unique<BrowsingTopicsState::LoadResult>(/*file_exists=*/true,
std::move(value));
}
bool AreConfigVersionsCompatible(int preexisting, int current) {
// The config version can be 0 for a failed topics calculation.
CHECK_GE(preexisting, 0);
CHECK_GE(current, 1);
CHECK_LE(current, ConfigVersion::kMaxValue);
// This could happen in rare case when Chrome rolls back to an earlier
// version.
if (preexisting > ConfigVersion::kMaxValue) {
return false;
}
// Epoch from a failed calculation is compatible with any version.
if (preexisting == 0) {
return true;
}
if (preexisting == current) {
return true;
}
if ((preexisting == ConfigVersion::kInitial &&
current == ConfigVersion::kUsePrioritizedTopicsList) ||
(preexisting == ConfigVersion::kUsePrioritizedTopicsList &&
current == ConfigVersion::kInitial)) {
// Versions 1 and 2 are forward and backward compatible.
return true;
}
return false;
}
} // namespace
BrowsingTopicsState::LoadResult::LoadResult(bool file_exists,
std::unique_ptr<base::Value> value)
: file_exists(file_exists), value(std::move(value)) {}
BrowsingTopicsState::LoadResult::~LoadResult() = default;
BrowsingTopicsState::BrowsingTopicsState(const base::FilePath& profile_path,
base::OnceClosure loaded_callback)
: backend_task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
{base::MayBlock(), base::TaskPriority::BEST_EFFORT,
base::TaskShutdownBehavior::BLOCK_SHUTDOWN})),
writer_(profile_path.Append(FILE_PATH_LITERAL("BrowsingTopicsState")),
backend_task_runner_,
kSaveDelay,
/*histogram_suffix=*/"BrowsingTopicsState") {
backend_task_runner_->PostTaskAndReplyWithResult(
FROM_HERE, base::BindOnce(&LoadFileOnBackendTaskRunner, writer_.path()),
base::BindOnce(&BrowsingTopicsState::DidLoadFile,
weak_ptr_factory_.GetWeakPtr(),
std::move(loaded_callback)));
}
BrowsingTopicsState::~BrowsingTopicsState() {
if (writer_.HasPendingWrite()) {
writer_.DoScheduledWrite();
}
}
void BrowsingTopicsState::ClearAllTopics() {
DCHECK(loaded_);
if (!epochs_.empty()) {
epochs_.clear();
ScheduleSave();
}
}
void BrowsingTopicsState::ClearOneEpoch(size_t epoch_index) {
DCHECK(loaded_);
epochs_[epoch_index].ClearTopics();
ScheduleSave();
}
void BrowsingTopicsState::ClearTopic(Topic topic) {
for (EpochTopics& epoch : epochs_) {
epoch.ClearTopic(topic);
}
ScheduleSave();
}
void BrowsingTopicsState::ClearContextDomain(
const HashedDomain& hashed_context_domain) {
for (EpochTopics& epoch : epochs_) {
epoch.ClearContextDomain(hashed_context_domain);
}
ScheduleSave();
}
std::optional<EpochTopics> BrowsingTopicsState::AddEpoch(
EpochTopics epoch_topics) {
DCHECK(loaded_);
epochs_.push_back(std::move(epoch_topics));
epochs_.back().ScheduleExpiration(base::BindOnce(
&BrowsingTopicsState::OnEpochExpired, weak_ptr_factory_.GetWeakPtr(),
epochs_.back().calculation_time()));
// Remove the epoch data that is no longer useful.
std::optional<EpochTopics> removed_epoch_topics;
if (epochs_.size() >
static_cast<size_t>(
blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get()) +
1) {
removed_epoch_topics = std::move(epochs_[0]);
epochs_.pop_front();
}
ScheduleSave();
return removed_epoch_topics;
}
void BrowsingTopicsState::ScheduleEpochsExpiration() {
base::Time expired_calculation_time =
base::Time::Now() -
blink::features::kBrowsingTopicsEpochRetentionDuration.Get();
// Remove expired epochs synchronously.
base::EraseIf(epochs_, [&expired_calculation_time](const EpochTopics& epoch) {
return epoch.calculation_time() <= expired_calculation_time;
});
for (EpochTopics& epoch : epochs_) {
epoch.ScheduleExpiration(base::BindOnce(
&BrowsingTopicsState::OnEpochExpired, weak_ptr_factory_.GetWeakPtr(),
epoch.calculation_time()));
}
ScheduleSave();
}
void BrowsingTopicsState::OnEpochExpired(base::Time calculation_time) {
// Remove all epochs associated with the given calculation_time.
// Though calculation times are typically unique, this handles potential
// duplicates.
base::EraseIf(epochs_, [&calculation_time](const EpochTopics& epoch) {
return epoch.calculation_time() == calculation_time;
});
ScheduleSave();
}
void BrowsingTopicsState::UpdateNextScheduledCalculationTime(
base::TimeDelta delay) {
DCHECK(loaded_);
DCHECK(!delay.is_negative());
next_scheduled_calculation_time_ = base::Time::Now() + delay;
ScheduleSave();
}
std::vector<const EpochTopics*> BrowsingTopicsState::EpochsForSite(
const std::string& top_domain) const {
DCHECK(loaded_);
if (epochs_.empty()) {
return {};
}
const size_t kNumberOfEpochsToExpose = static_cast<size_t>(
blink::features::kBrowsingTopicsNumberOfEpochsToExpose.Get());
DCHECK_GT(kNumberOfEpochsToExpose, 0u);
base::Time now = base::Time::Now();
// Derive a per-user per-site per-epoch time delta in the range of
// [0, `kBrowsingTopicsMaxEpochIntroductionDelay`). The latest epoch will only
// be used after `site_epoch_sticky_introduction_delay` has elapsed since the
// last calculation finish time (i.e. `next_scheduled_calculation_time_` -
// `kBrowsingTopicsTimePeriodPerEpoch`). This way, each site will see a
// different epoch switch time.
base::TimeDelta site_epoch_sticky_introduction_delay =
CalculateSiteStickyIntroductionDelay(top_domain);
size_t end_epoch_index = 0;
if (now <= next_scheduled_calculation_time_ -
blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get() +
site_epoch_sticky_introduction_delay) {
if (epochs_.size() < 2) {
return {};
}
end_epoch_index = epochs_.size() - 2;
} else {
end_epoch_index = epochs_.size() - 1;
}
size_t start_epoch_index = (end_epoch_index + 1 >= kNumberOfEpochsToExpose)
? end_epoch_index + 1 - kNumberOfEpochsToExpose
: 0;
std::vector<const EpochTopics*> result;
for (size_t i = start_epoch_index; i <= end_epoch_index; ++i) {
const EpochTopics& epoch = epochs_[i];
base::Time earliest_valid_epoch_time =
now + CalculateSiteStickyPhaseOutTimeOffset(top_domain, epoch) -
blink::features::kBrowsingTopicsEpochRetentionDuration.Get();
if (epoch.calculation_time() > earliest_valid_epoch_time) {
result.emplace_back(&epoch);
}
}
return result;
}
bool BrowsingTopicsState::HasScheduledSaveForTesting() const {
return writer_.HasPendingWrite();
}
base::TimeDelta BrowsingTopicsState::CalculateSiteStickyIntroductionDelay(
const std::string& top_domain) const {
CHECK(!epochs_.empty());
uint64_t epoch_introduction_time_decision_hash =
HashTopDomainForEpochIntroductionTimeDecision(
hmac_key_, epochs_.back().calculation_time(), top_domain);
// The random-over period cannot exceed an epoch. This limitation is due to:
// 1. We only keep data for the last `kNumberOfEpochsToExpose` + 1 epochs. A
// longer random-over period would require us to store more historical
// epochs to meet the `kNumberOfEpochsToExpose` configuration.
// 2. For past, non-latest epochs, we don't store the exact delimitation times
// (i.e. calculation finish times). Using the calculation start time as an
// approximation is not 100% accurate.
DCHECK_LE(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get(),
blink::features::kBrowsingTopicsTimePeriodPerEpoch.Get());
DCHECK_GT(blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
.InSeconds(),
0);
// If the latest epoch was manually triggered, make the latest epoch
// immediately available for testing purposes.
if (epochs_.back().from_manually_triggered_calculation()) {
return base::Seconds(0);
}
return base::Seconds(
epoch_introduction_time_decision_hash %
blink::features::kBrowsingTopicsMaxEpochIntroductionDelay.Get()
.InSeconds());
}
base::TimeDelta BrowsingTopicsState::CalculateSiteStickyPhaseOutTimeOffset(
const std::string& top_domain,
const EpochTopics& epoch) const {
uint64_t epoch_phase_out_time_decision_hash =
HashTopDomainForEpochPhaseOutTimeDecision(
hmac_key_, epoch.calculation_time(), top_domain);
return base::Seconds(
epoch_phase_out_time_decision_hash %
blink::features::kBrowsingTopicsMaxEpochPhaseOutTimeOffset.Get()
.InSeconds());
}
base::ImportantFileWriter::BackgroundDataProducerCallback
BrowsingTopicsState::GetSerializedDataProducerForBackgroundSequence() {
DCHECK(loaded_);
return base::BindOnce(
[](base::Value value) -> std::optional<std::string> {
// This runs on the background sequence.
std::string output;
if (!base::JSONWriter::WriteWithOptions(
value, base::JSONWriter::OPTIONS_PRETTY_PRINT, &output)) {
return std::nullopt;
}
return output;
},
base::Value(ToDictValue()));
}
base::Value::Dict BrowsingTopicsState::ToDictValue() const {
DCHECK(loaded_);
base::Value::List epochs_list;
for (const EpochTopics& epoch : epochs_) {
epochs_list.Append(epoch.ToDictValue());
}
base::Value::Dict result_dict;
result_dict.Set(kEpochsNameKey, std::move(epochs_list));
result_dict.Set(kNextScheduledCalculationTimeNameKey,
base::TimeToValue(next_scheduled_calculation_time_));
std::string hex_encoded_hmac_key = base::HexEncode(hmac_key_);
result_dict.Set(kHexEncodedHmacKeyNameKey, base::HexEncode(hmac_key_));
return result_dict;
}
void BrowsingTopicsState::ScheduleSave() {
DCHECK(loaded_);
writer_.ScheduleWriteWithBackgroundDataSerializer(this);
}
void BrowsingTopicsState::DidLoadFile(base::OnceClosure loaded_callback,
std::unique_ptr<LoadResult> load_result) {
DCHECK(load_result);
DCHECK(!loaded_);
bool success = false;
bool should_save_state_to_file = false;
if (!load_result->file_exists) {
// If this is the first time loading, generate a `hmac_key_`, and save it.
// This ensures we only generate the key once per profile, as data derived
// from the key may be subsequently stored elsewhere.
hmac_key_ = GenerateRandomHmacKey();
success = true;
should_save_state_to_file = true;
} else if (!load_result->value) {
// If a file read error was encountered, or if the JSON deserialization
// failed in general, empty the file.
should_save_state_to_file = true;
} else {
// JSON deserialization succeeded in general. Parse the value to individual
// fields.
ParseResult parse_result = ParseValue(*(load_result->value));
success = parse_result.success;
should_save_state_to_file = parse_result.should_save_state_to_file;
}
base::UmaHistogramBoolean(
"BrowsingTopics.BrowsingTopicsState.LoadFinishStatus", success);
loaded_ = true;
if (should_save_state_to_file) {
ScheduleSave();
}
std::move(loaded_callback).Run();
}
BrowsingTopicsState::ParseResult BrowsingTopicsState::ParseValue(
const base::Value& value) {
DCHECK(!loaded_);
const base::Value::Dict* dict_value = value.GetIfDict();
if (!dict_value) {
return ParseResult{.success = false, .should_save_state_to_file = true};
}
const std::string* hex_encoded_hmac_key =
dict_value->FindString(kHexEncodedHmacKeyNameKey);
if (!hex_encoded_hmac_key) {
return ParseResult{.success = false, .should_save_state_to_file = true};
}
if (!base::HexStringToSpan(*hex_encoded_hmac_key, hmac_key_)) {
// `HexStringToSpan` may partially fill the `hmac_key_` up until the
// failure. Reset it to empty.
hmac_key_.fill(0);
return ParseResult{.success = false, .should_save_state_to_file = true};
}
const base::Value::List* epochs_value = dict_value->FindList(kEpochsNameKey);
if (!epochs_value) {
return ParseResult{.success = false, .should_save_state_to_file = true};
}
for (const base::Value& epoch_value : *epochs_value) {
const base::Value::Dict* epoch_dict_value = epoch_value.GetIfDict();
if (!epoch_dict_value) {
return ParseResult{.success = false, .should_save_state_to_file = true};
}
epochs_.push_back(EpochTopics::FromDictValue(*epoch_dict_value));
}
for (const EpochTopics& epoch : epochs_) {
// If any preexisting epoch's version is incompatible with the current
// version, start with a fresh `epoch_`.
if (!AreConfigVersionsCompatible(epoch.config_version(),
CurrentConfigVersion())) {
epochs_.clear();
return ParseResult{.success = true, .should_save_state_to_file = true};
}
}
const base::Value* next_scheduled_calculation_time_value =
dict_value->Find(kNextScheduledCalculationTimeNameKey);
if (!next_scheduled_calculation_time_value) {
return ParseResult{.success = false, .should_save_state_to_file = true};
}
next_scheduled_calculation_time_ =
base::ValueToTime(next_scheduled_calculation_time_value).value();
return ParseResult{.success = true, .should_save_state_to_file = false};
}
} // namespace browsing_topics