blob: 7f431a86841f6eb396ef27b7e11d2973c2dc077e [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ukm/ukm_recorder_impl.h"
#include <memory>
#include <string>
#include <utility>
#include "base/metrics/field_trial.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/metrics_hashes.h"
#include "base/strings/string_split.h"
#include "components/ukm/ukm_source.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "third_party/metrics_proto/ukm/entry.pb.h"
#include "third_party/metrics_proto/ukm/report.pb.h"
#include "third_party/metrics_proto/ukm/source.pb.h"
#include "url/gurl.h"
namespace ukm {
namespace {
// Gets the list of whitelisted Entries as string. Format is a comma seperated
// list of Entry names (as strings).
std::string GetWhitelistEntries() {
return base::GetFieldTrialParamValueByFeature(kUkmFeature,
"WhitelistEntries");
}
bool IsWhitelistedSourceId(SourceId source_id) {
return GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID;
}
// Gets the maximum number of Sources we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxSources() {
constexpr size_t kDefaultMaxSources = 500;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxSources", kDefaultMaxSources));
}
// Gets the maximum number of unferenced Sources kept after purging sources
// that were added to the log.
size_t GetMaxKeptSources() {
constexpr size_t kDefaultMaxKeptSources = 100;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxKeptSources", kDefaultMaxKeptSources));
}
// Gets the maximum number of Entries we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxEntries() {
constexpr size_t kDefaultMaxEntries = 5000;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxEntries", kDefaultMaxEntries));
}
// Returns whether |url| has one of the schemes supported for logging to UKM.
// URLs with other schemes will not be logged.
// Note: This currently excludes chrome-extension:// URLs as in order to log
// them, UKM needs to take into account extension-sync consent, which is not
// yet done.
bool HasSupportedScheme(const GURL& url) {
// Note: kChromeUIScheme is defined in content, which this code can't
// depend on - since it's used by iOS too. So "chrome" is hardcoded here.
return url.SchemeIsHTTPOrHTTPS() || url.SchemeIs(url::kFtpScheme) ||
url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome");
}
// True if we should record the initial_url field of the UKM Source proto.
bool ShouldRecordInitialUrl() {
return base::GetFieldTrialParamByFeatureAsBool(kUkmFeature,
"RecordInitialUrl", false);
}
enum class DroppedDataReason {
NOT_DROPPED = 0,
RECORDING_DISABLED = 1,
MAX_HIT = 2,
NOT_WHITELISTED = 3,
UNSUPPORTED_URL_SCHEME = 4,
NUM_DROPPED_DATA_REASONS
};
void RecordDroppedSource(DroppedDataReason reason) {
UMA_HISTOGRAM_ENUMERATION(
"UKM.Sources.Dropped", static_cast<int>(reason),
static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
}
void RecordDroppedEntry(DroppedDataReason reason) {
UMA_HISTOGRAM_ENUMERATION(
"UKM.Entries.Dropped", static_cast<int>(reason),
static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
}
void StoreEntryProto(const mojom::UkmEntry& in, Entry* out) {
DCHECK(!out->has_source_id());
DCHECK(!out->has_event_hash());
out->set_source_id(in.source_id);
out->set_event_hash(in.event_hash);
for (const auto& metric : in.metrics) {
Entry::Metric* proto_metric = out->add_metrics();
proto_metric->set_metric_hash(metric->metric_hash);
proto_metric->set_value(metric->value);
}
}
GURL SanitizeURL(const GURL& url) {
GURL::Replacements remove_params;
remove_params.ClearUsername();
remove_params.ClearPassword();
// chrome:// and about: URLs params are never used for navigation, only to
// prepopulate data on the page, so don't include their params.
if (url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome")) {
remove_params.ClearQuery();
}
return url.ReplaceComponents(remove_params);
}
} // namespace
UkmRecorderImpl::UkmRecorderImpl() : recording_enabled_(false) {}
UkmRecorderImpl::~UkmRecorderImpl() = default;
void UkmRecorderImpl::EnableRecording() {
DVLOG(1) << "UkmRecorderImpl::EnableRecording";
recording_enabled_ = true;
}
void UkmRecorderImpl::DisableRecording() {
DVLOG(1) << "UkmRecorderImpl::DisableRecording";
recording_enabled_ = false;
}
void UkmRecorderImpl::Purge() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
sources_.clear();
entries_.clear();
}
void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
std::set<SourceId> ids_seen;
for (const auto& entry : entries_) {
Entry* proto_entry = report->add_entries();
StoreEntryProto(*entry, proto_entry);
ids_seen.insert(entry->source_id);
}
std::vector<std::unique_ptr<UkmSource>> unsent_sources;
for (auto& kv : sources_) {
// If the source id is not whitelisted, don't send it unless it has
// associated entries. Note: If ShouldRestrictToWhitelistedSourceIds() is
// true, this logic will not be hit as the source would have already been
// filtered in UpdateSourceURL().
if (!IsWhitelistedSourceId(kv.first) &&
!base::ContainsKey(ids_seen, kv.first)) {
unsent_sources.push_back(std::move(kv.second));
continue;
}
Source* proto_source = report->add_sources();
kv.second->PopulateProto(proto_source);
if (!ShouldRecordInitialUrl())
proto_source->clear_initial_url();
}
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount",
sources_.size() - unsent_sources.size());
UMA_HISTOGRAM_COUNTS_1000("UKM.Entries.SerializedCount", entries_.size());
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount",
unsent_sources.size());
sources_.clear();
entries_.clear();
// Keep at most |max_kept_sources|, prioritizing most-recent entries (by
// creation time).
const size_t max_kept_sources = GetMaxKeptSources();
if (unsent_sources.size() > max_kept_sources) {
std::nth_element(unsent_sources.begin(),
unsent_sources.begin() + max_kept_sources,
unsent_sources.end(),
[](const std::unique_ptr<ukm::UkmSource>& lhs,
const std::unique_ptr<ukm::UkmSource>& rhs) {
return lhs->creation_time() > rhs->creation_time();
});
unsent_sources.resize(max_kept_sources);
}
for (auto& source : unsent_sources) {
sources_.emplace(source->id(), std::move(source));
}
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount", sources_.size());
}
bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const {
return base::GetFieldTrialParamByFeatureAsBool(
kUkmFeature, "RestrictToWhitelistedSourceIds", false);
}
void UkmRecorderImpl::UpdateSourceURL(SourceId source_id,
const GURL& unsanitized_url) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (!recording_enabled_) {
RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED);
return;
}
if (ShouldRestrictToWhitelistedSourceIds() &&
!IsWhitelistedSourceId(source_id)) {
RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED);
return;
}
GURL url = SanitizeURL(unsanitized_url);
if (!HasSupportedScheme(url)) {
RecordDroppedSource(DroppedDataReason::UNSUPPORTED_URL_SCHEME);
return;
}
// Update the pre-existing source if there is any. This happens when the
// initial URL is different from the committed URL for the same source, e.g.,
// when there is redirection.
if (base::ContainsKey(sources_, source_id)) {
sources_[source_id]->UpdateUrl(url);
return;
}
if (sources_.size() >= GetMaxSources()) {
RecordDroppedSource(DroppedDataReason::MAX_HIT);
return;
}
sources_.emplace(source_id, base::MakeUnique<UkmSource>(source_id, url));
}
void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (!recording_enabled_) {
RecordDroppedEntry(DroppedDataReason::RECORDING_DISABLED);
return;
}
if (entries_.size() >= GetMaxEntries()) {
RecordDroppedEntry(DroppedDataReason::MAX_HIT);
return;
}
if (!whitelisted_entry_hashes_.empty() &&
!base::ContainsKey(whitelisted_entry_hashes_, entry->event_hash)) {
RecordDroppedEntry(DroppedDataReason::NOT_WHITELISTED);
return;
}
entries_.push_back(std::move(entry));
}
void UkmRecorderImpl::StoreWhitelistedEntries() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
const auto entries =
base::SplitString(GetWhitelistEntries(), ",", base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
for (const auto& entry_string : entries)
whitelisted_entry_hashes_.insert(base::HashMetricName(entry_string));
}
} // namespace ukm