blob: 2be68ab1d2a408b7252f58fcff57b7d14b5bfded [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/subresource_filter/content/browser/ruleset_service.h"
#include <utility>
#include "base/bind.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/metrics/histogram_macros.h"
#include "base/numerics/safe_conversions.h"
#include "base/sequenced_task_runner.h"
#include "base/strings/string_number_conversions.h"
#include "base/task/post_task.h"
#include "base/task_runner_util.h"
#include "base/threading/scoped_blocking_call.h"
#include "base/trace_event/trace_event.h"
#include "base/trace_event/traced_value.h"
#include "components/prefs/pref_registry_simple.h"
#include "components/prefs/pref_service.h"
#include "components/subresource_filter/content/browser/ruleset_publisher.h"
#include "components/subresource_filter/content/browser/ruleset_publisher_impl.h"
#include "components/subresource_filter/content/common/subresource_filter_messages.h"
#include "components/subresource_filter/core/browser/copying_file_stream.h"
#include "components/subresource_filter/core/browser/subresource_filter_constants.h"
#include "components/subresource_filter/core/common/common_features.h"
#include "components/subresource_filter/core/common/indexed_ruleset.h"
#include "components/subresource_filter/core/common/time_measurements.h"
#include "components/subresource_filter/core/common/unindexed_ruleset.h"
#include "components/url_pattern_index/proto/rules.pb.h"
#include "content/public/browser/browser_task_traits.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/render_process_host.h"
#include "third_party/protobuf/src/google/protobuf/io/zero_copy_stream_impl_lite.h"
namespace subresource_filter {
namespace {
void RecordIndexAndWriteRulesetResult(
RulesetService::IndexAndWriteRulesetResult result) {
UMA_HISTOGRAM_ENUMERATION(
"SubresourceFilter.WriteRuleset.Result", static_cast<int>(result),
static_cast<int>(RulesetService::IndexAndWriteRulesetResult::MAX));
}
// Implements operations on a `sentinel file`, which is used as a safeguard to
// prevent crash-looping if ruleset indexing crashes right after start-up.
//
// The sentinel file is placed in the ruleset version directory just before
// indexing commences, and removed afterwards. Therefore, if a sentinel file is
// found on next start-up, it is an indication that the previous indexing
// operation may have crashed, and indexing will not be attempted again.
//
// After the first failed indexing attempt, the sentinel file will not be
// removed unless |RulesetIndexer::kIndexedFormatVersion| is incremented. It is
// expected that by that time, either the indexing logic or the ruleset contents
// will be fixed. The consumed disk space is negligible as no ruleset data will
// be written to disk when indexing fails.
//
// Admittedly, this approach errs on the side of caution, and false positives
// can happen. For example, the sentinel file may fail to be removed in case of
// an unclean shutdown, or an unrelated crash as well. This should not be a
// problem, however, as the sentinel file only affects one version of the
// ruleset, and it is expected that version updates will be frequent enough.
class SentinelFile {
public:
explicit SentinelFile(const base::FilePath& version_directory)
: path_(IndexedRulesetLocator::GetSentinelFilePath(version_directory)) {}
bool IsPresent() { return base::PathExists(path_); }
bool Create() { return base::WriteFile(path_, nullptr, 0) == 0; }
bool Remove() { return base::DeleteFile(path_, false /* recursive */); }
private:
base::FilePath path_;
DISALLOW_COPY_AND_ASSIGN(SentinelFile);
};
} // namespace
// IndexedRulesetLocator ------------------------------------------------------
// static
base::FilePath IndexedRulesetLocator::GetSubdirectoryPathForVersion(
const base::FilePath& base_dir,
const IndexedRulesetVersion& version) {
return base_dir.AppendASCII(base::IntToString(version.format_version))
.AppendASCII(version.content_version);
}
// static
base::FilePath IndexedRulesetLocator::GetRulesetDataFilePath(
const base::FilePath& version_directory) {
return version_directory.Append(kRulesetDataFileName);
}
// static
base::FilePath IndexedRulesetLocator::GetLicenseFilePath(
const base::FilePath& version_directory) {
return version_directory.Append(kLicenseFileName);
}
// static
base::FilePath IndexedRulesetLocator::GetSentinelFilePath(
const base::FilePath& version_directory) {
return version_directory.Append(kSentinelFileName);
}
// static
void IndexedRulesetLocator::DeleteObsoleteRulesets(
const base::FilePath& indexed_ruleset_base_dir,
const IndexedRulesetVersion& most_recent_version) {
base::FilePath current_format_dir(indexed_ruleset_base_dir.AppendASCII(
base::IntToString(IndexedRulesetVersion::CurrentFormatVersion())));
// First delete all directories containing rulesets of obsolete formats.
base::FileEnumerator format_dirs(indexed_ruleset_base_dir,
false /* recursive */,
base::FileEnumerator::DIRECTORIES);
for (base::FilePath format_dir = format_dirs.Next(); !format_dir.empty();
format_dir = format_dirs.Next()) {
if (format_dir != current_format_dir)
base::DeleteFile(format_dir, true /* recursive */);
}
base::FilePath most_recent_version_dir =
most_recent_version.IsValid()
? IndexedRulesetLocator::GetSubdirectoryPathForVersion(
indexed_ruleset_base_dir, most_recent_version)
: base::FilePath();
// Then delete all indexed rulesets of the current format with obsolete
// content versions, except those with a sentinel file present.
base::FileEnumerator version_dirs(current_format_dir, false /* recursive */,
base::FileEnumerator::DIRECTORIES);
for (base::FilePath version_dir = version_dirs.Next(); !version_dir.empty();
version_dir = version_dirs.Next()) {
if (SentinelFile(version_dir).IsPresent())
continue;
if (version_dir == most_recent_version_dir)
continue;
base::DeleteFile(version_dir, true /* recursive */);
}
}
// RulesetService -------------------------------------------------------------
// static
decltype(&RulesetService::IndexRuleset) RulesetService::g_index_ruleset_func =
&RulesetService::IndexRuleset;
// static
decltype(&base::ReplaceFile) RulesetService::g_replace_file_func =
&base::ReplaceFile;
RulesetService::RulesetService(
PrefService* local_state,
scoped_refptr<base::SequencedTaskRunner> background_task_runner,
const base::FilePath& indexed_ruleset_base_dir,
scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
std::unique_ptr<RulesetPublisher> publisher)
: local_state_(local_state),
background_task_runner_(std::move(background_task_runner)),
is_initialized_(false),
indexed_ruleset_base_dir_(indexed_ruleset_base_dir) {
DCHECK_NE(local_state_->GetInitializationStatus(),
PrefService::INITIALIZATION_STATUS_WAITING);
publisher_ = publisher ? std::move(publisher)
: std::make_unique<RulesetPublisherImpl>(
this, blocking_task_runner);
IndexedRulesetVersion most_recently_indexed_version;
most_recently_indexed_version.ReadFromPrefs(local_state_);
TRACE_EVENT1(TRACE_DISABLED_BY_DEFAULT("loading"),
"RulesetService::RulesetService", "prefs_version",
most_recently_indexed_version.ToTracedValue());
if (most_recently_indexed_version.IsValid() &&
most_recently_indexed_version.IsCurrentFormatVersion()) {
OpenAndPublishRuleset(most_recently_indexed_version);
} else {
IndexedRulesetVersion().SaveToPrefs(local_state_);
}
DCHECK(publisher_->BestEffortTaskRunner()->BelongsToCurrentThread());
publisher_->BestEffortTaskRunner()->PostTask(
FROM_HERE,
base::BindOnce(&RulesetService::FinishInitialization, AsWeakPtr()));
}
RulesetService::~RulesetService() {}
void RulesetService::IndexAndStoreAndPublishRulesetIfNeeded(
const UnindexedRulesetInfo& unindexed_ruleset_info) {
if (unindexed_ruleset_info.content_version.empty())
return;
// Trying to store a ruleset with the same version for a second time would
// not only be futile, but would fail on Windows due to "File System
// Tunneling" as long as the previously stored copy of the rules is still
// in use.
IndexedRulesetVersion most_recently_indexed_version;
most_recently_indexed_version.ReadFromPrefs(local_state_);
if (most_recently_indexed_version.IsCurrentFormatVersion() &&
most_recently_indexed_version.content_version ==
unindexed_ruleset_info.content_version) {
return;
}
// Before initialization, retain information about the most recently supplied
// unindexed ruleset, to be processed during initialization.
if (!is_initialized_) {
queued_unindexed_ruleset_info_ = unindexed_ruleset_info;
return;
}
IndexAndStoreRuleset(
unindexed_ruleset_info,
base::BindOnce(&RulesetService::OpenAndPublishRuleset, AsWeakPtr()));
}
IndexedRulesetVersion RulesetService::GetMostRecentlyIndexedVersion() const {
IndexedRulesetVersion version;
version.ReadFromPrefs(local_state_);
return version;
}
// static
IndexedRulesetVersion RulesetService::IndexAndWriteRuleset(
const base::FilePath& indexed_ruleset_base_dir,
const UnindexedRulesetInfo& unindexed_ruleset_info) {
base::ScopedBlockingCall scoped_blocking_call(base::BlockingType::MAY_BLOCK);
base::File unindexed_ruleset_file(
unindexed_ruleset_info.ruleset_path,
base::File::FLAG_OPEN | base::File::FLAG_READ);
if (!unindexed_ruleset_file.IsValid()) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::FAILED_OPENING_UNINDEXED_RULESET);
return IndexedRulesetVersion();
}
IndexedRulesetVersion indexed_version(
unindexed_ruleset_info.content_version,
IndexedRulesetVersion::CurrentFormatVersion());
base::FilePath indexed_ruleset_version_dir =
IndexedRulesetLocator::GetSubdirectoryPathForVersion(
indexed_ruleset_base_dir, indexed_version);
if (!base::CreateDirectory(indexed_ruleset_version_dir)) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::FAILED_CREATING_VERSION_DIR);
return IndexedRulesetVersion();
}
SentinelFile sentinel_file(indexed_ruleset_version_dir);
if (sentinel_file.IsPresent()) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::ABORTED_BECAUSE_SENTINEL_FILE_PRESENT);
return IndexedRulesetVersion();
}
if (!sentinel_file.Create()) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::FAILED_CREATING_SENTINEL_FILE);
return IndexedRulesetVersion();
}
// --- Begin of guarded section.
//
// Crashes or errors occurring here will leave behind a sentinel file that
// will prevent this version of the ruleset from ever being indexed again.
RulesetIndexer indexer;
if (!(*g_index_ruleset_func)(std::move(unindexed_ruleset_file), &indexer)) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::FAILED_PARSING_UNINDEXED_RULESET);
return IndexedRulesetVersion();
}
// --- End of guarded section.
indexed_version.checksum = indexer.GetChecksum();
if (!sentinel_file.Remove()) {
RecordIndexAndWriteRulesetResult(
IndexAndWriteRulesetResult::FAILED_DELETING_SENTINEL_FILE);
return IndexedRulesetVersion();
}
IndexAndWriteRulesetResult result = WriteRuleset(
indexed_ruleset_version_dir, unindexed_ruleset_info.license_path,
indexer.data(), indexer.size());
RecordIndexAndWriteRulesetResult(result);
if (result != IndexAndWriteRulesetResult::SUCCESS)
return IndexedRulesetVersion();
DCHECK(indexed_version.IsValid());
return indexed_version;
}
// static
bool RulesetService::IndexRuleset(base::File unindexed_ruleset_file,
RulesetIndexer* indexer) {
SCOPED_UMA_HISTOGRAM_TIMER("SubresourceFilter.IndexRuleset.WallDuration");
SCOPED_UMA_HISTOGRAM_THREAD_TIMER(
"SubresourceFilter.IndexRuleset.CPUDuration");
int64_t unindexed_ruleset_size = unindexed_ruleset_file.GetLength();
if (unindexed_ruleset_size < 0)
return false;
CopyingFileInputStream copying_stream(std::move(unindexed_ruleset_file));
google::protobuf::io::CopyingInputStreamAdaptor zero_copy_stream_adaptor(
&copying_stream, 4096 /* buffer_size */);
UnindexedRulesetReader reader(&zero_copy_stream_adaptor);
size_t num_unsupported_rules = 0;
url_pattern_index::proto::FilteringRules ruleset_chunk;
while (reader.ReadNextChunk(&ruleset_chunk)) {
for (const auto& rule : ruleset_chunk.url_rules()) {
if (!indexer->AddUrlRule(rule))
++num_unsupported_rules;
}
}
indexer->Finish();
UMA_HISTOGRAM_COUNTS_10000(
"SubresourceFilter.IndexRuleset.NumUnsupportedRules",
num_unsupported_rules);
return reader.num_bytes_read() == unindexed_ruleset_size;
}
// static
RulesetService::IndexAndWriteRulesetResult RulesetService::WriteRuleset(
const base::FilePath& indexed_ruleset_version_dir,
const base::FilePath& license_source_path,
const uint8_t* indexed_ruleset_data,
size_t indexed_ruleset_size) {
base::ScopedTempDir scratch_dir;
if (!scratch_dir.CreateUniqueTempDirUnderPath(
indexed_ruleset_version_dir.DirName())) {
return IndexAndWriteRulesetResult::FAILED_CREATING_SCRATCH_DIR;
}
static_assert(sizeof(uint8_t) == sizeof(char), "Expected char = byte.");
const int data_size_in_chars = base::checked_cast<int>(indexed_ruleset_size);
if (base::WriteFile(
IndexedRulesetLocator::GetRulesetDataFilePath(scratch_dir.GetPath()),
reinterpret_cast<const char*>(indexed_ruleset_data),
data_size_in_chars) != data_size_in_chars) {
return IndexAndWriteRulesetResult::FAILED_WRITING_RULESET_DATA;
}
if (base::PathExists(license_source_path) &&
!base::CopyFile(
license_source_path,
IndexedRulesetLocator::GetLicenseFilePath(scratch_dir.GetPath()))) {
return IndexAndWriteRulesetResult::FAILED_WRITING_LICENSE;
}
// Creating a temporary directory also makes sure the path (except for the
// final segment) gets created. ReplaceFile would not create the path.
DCHECK(base::PathExists(indexed_ruleset_version_dir.DirName()));
// Need to manually delete the previously stored ruleset with the same
// version, if any, as ReplaceFile would not overwrite a non-empty directory.
// Due to the same-version check in IndexAndStoreAndPublishRulesetIfNeeded, we
// would not normally find a pre-existing copy at this point unless the
// previous write was interrupted.
if (!base::DeleteFile(indexed_ruleset_version_dir, true))
return IndexAndWriteRulesetResult::FAILED_DELETE_PREEXISTING;
base::FilePath scratch_dir_with_new_indexed_ruleset = scratch_dir.Take();
base::File::Error error;
if (!(*g_replace_file_func)(scratch_dir_with_new_indexed_ruleset,
indexed_ruleset_version_dir, &error)) {
base::DeleteFile(scratch_dir_with_new_indexed_ruleset, true);
// While enumerators of base::File::Error all have negative values, the
// histogram records the absolute values.
UMA_HISTOGRAM_ENUMERATION("SubresourceFilter.WriteRuleset.ReplaceFileError",
-error, -base::File::FILE_ERROR_MAX);
return IndexAndWriteRulesetResult::FAILED_REPLACE_FILE;
}
return IndexAndWriteRulesetResult::SUCCESS;
}
void RulesetService::FinishInitialization() {
is_initialized_ = true;
IndexedRulesetVersion most_recently_indexed_version;
most_recently_indexed_version.ReadFromPrefs(local_state_);
background_task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&IndexedRulesetLocator::DeleteObsoleteRulesets,
indexed_ruleset_base_dir_, most_recently_indexed_version));
if (!queued_unindexed_ruleset_info_.content_version.empty()) {
IndexAndStoreRuleset(
queued_unindexed_ruleset_info_,
base::BindOnce(&RulesetService::OpenAndPublishRuleset, AsWeakPtr()));
queued_unindexed_ruleset_info_ = UnindexedRulesetInfo();
}
}
void RulesetService::IndexAndStoreRuleset(
const UnindexedRulesetInfo& unindexed_ruleset_info,
WriteRulesetCallback success_callback) {
DCHECK(!unindexed_ruleset_info.content_version.empty());
base::PostTaskAndReplyWithResult(
background_task_runner_.get(), FROM_HERE,
base::BindOnce(&RulesetService::IndexAndWriteRuleset,
indexed_ruleset_base_dir_, unindexed_ruleset_info),
base::BindOnce(&RulesetService::OnWrittenRuleset, AsWeakPtr(),
std::move(success_callback)));
}
void RulesetService::OnWrittenRuleset(WriteRulesetCallback result_callback,
const IndexedRulesetVersion& version) {
DCHECK(!result_callback.is_null());
if (!version.IsValid())
return;
version.SaveToPrefs(local_state_);
std::move(result_callback).Run(version);
}
void RulesetService::OpenAndPublishRuleset(
const IndexedRulesetVersion& version) {
const base::FilePath file_path =
IndexedRulesetLocator::GetRulesetDataFilePath(
IndexedRulesetLocator::GetSubdirectoryPathForVersion(
indexed_ruleset_base_dir_, version));
publisher_->TryOpenAndSetRulesetFile(
file_path, version.checksum,
base::BindOnce(&RulesetService::OnRulesetSet, AsWeakPtr()));
}
void RulesetService::OnRulesetSet(base::File file) {
// The file has just been successfully written, so a failure here is unlikely
// unless |indexed_ruleset_base_dir_| has been tampered with or there are disk
// errors. Still, restore the invariant that a valid version in preferences
// always points to an existing version of disk by invalidating the prefs.
if (!file.IsValid()) {
IndexedRulesetVersion().SaveToPrefs(local_state_);
return;
}
publisher_->PublishNewRulesetVersion(std::move(file));
}
} // namespace subresource_filter