blob: 94b712f6bcfdcf294bde18a5af09eac17caf4a35 [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/ukm/ukm_recorder_impl.h"
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include "base/feature_list.h"
#include "base/metrics/crc32.h"
#include "base/metrics/field_trial.h"
#include "base/metrics/field_trial_params.h"
#include "base/metrics/histogram_macros.h"
#include "base/metrics/metrics_hashes.h"
#include "base/rand_util.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "components/ukm/scheme_constants.h"
#include "components/variations/variations_associated_data.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_decode.h"
#include "services/metrics/public/cpp/ukm_source.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#include "services/metrics/public/mojom/ukm_interface.mojom.h"
#include "third_party/metrics_proto/ukm/entry.pb.h"
#include "third_party/metrics_proto/ukm/report.pb.h"
#include "third_party/metrics_proto/ukm/source.pb.h"
#include "url/gurl.h"
namespace ukm {
namespace {
const base::Feature kUkmSamplingRateFeature{"UkmSamplingRate",
base::FEATURE_DISABLED_BY_DEFAULT};
// Gets the list of whitelisted Entries as string. Format is a comma separated
// list of Entry names (as strings).
std::string GetWhitelistEntries() {
return base::GetFieldTrialParamValueByFeature(kUkmFeature,
"WhitelistEntries");
}
bool IsWhitelistedSourceId(SourceId source_id) {
return GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID ||
GetSourceIdType(source_id) == SourceIdType::APP_ID ||
GetSourceIdType(source_id) == SourceIdType::HISTORY_ID ||
GetSourceIdType(source_id) == SourceIdType::WEBAPK_ID;
}
// Gets the maximum number of Sources we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxSources() {
constexpr size_t kDefaultMaxSources = 500;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxSources", kDefaultMaxSources));
}
// Gets the maximum number of Sources we can keep in memory at the end of the
// current reporting cycle that will stay accessible in the next reporting
// interval.
size_t GetMaxKeptSources() {
constexpr size_t kDefaultMaxKeptSources = 100;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxKeptSources", kDefaultMaxKeptSources));
}
// Gets the maximum number of Entries we'll keep in memory before discarding any
// new ones being added.
size_t GetMaxEntries() {
constexpr size_t kDefaultMaxEntries = 5000;
return static_cast<size_t>(base::GetFieldTrialParamByFeatureAsInt(
kUkmFeature, "MaxEntries", kDefaultMaxEntries));
}
// Returns whether |url| has one of the schemes supported for logging to UKM.
// URLs with other schemes will not be logged.
bool HasSupportedScheme(const GURL& url) {
return url.SchemeIsHTTPOrHTTPS() || url.SchemeIs(url::kFtpScheme) ||
url.SchemeIs(url::kAboutScheme) || url.SchemeIs(kChromeUIScheme) ||
url.SchemeIs(kExtensionScheme) || url.SchemeIs(kAppScheme);
}
enum class DroppedDataReason {
NOT_DROPPED = 0,
RECORDING_DISABLED = 1,
MAX_HIT = 2,
NOT_WHITELISTED = 3,
UNSUPPORTED_URL_SCHEME = 4,
SAMPLED_OUT = 5,
EXTENSION_URLS_DISABLED = 6,
EXTENSION_NOT_SYNCED = 7,
NOT_MATCHED = 8,
EMPTY_URL = 9,
NUM_DROPPED_DATA_REASONS
};
void RecordDroppedSource(DroppedDataReason reason) {
UMA_HISTOGRAM_ENUMERATION(
"UKM.Sources.Dropped", static_cast<int>(reason),
static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
}
void RecordDroppedEntry(DroppedDataReason reason) {
UMA_HISTOGRAM_ENUMERATION(
"UKM.Entries.Dropped", static_cast<int>(reason),
static_cast<int>(DroppedDataReason::NUM_DROPPED_DATA_REASONS));
}
void StoreEntryProto(const mojom::UkmEntry& in, Entry* out) {
DCHECK(!out->has_source_id());
DCHECK(!out->has_event_hash());
out->set_source_id(in.source_id);
out->set_event_hash(in.event_hash);
for (const auto& metric : in.metrics) {
Entry::Metric* proto_metric = out->add_metrics();
proto_metric->set_metric_hash(metric.first);
proto_metric->set_value(metric.second);
}
}
GURL SanitizeURL(const GURL& url) {
GURL::Replacements remove_params;
remove_params.ClearUsername();
remove_params.ClearPassword();
// chrome:// and about: URLs params are never used for navigation, only to
// prepopulate data on the page, so don't include their params.
if (url.SchemeIs(url::kAboutScheme) || url.SchemeIs("chrome")) {
remove_params.ClearQuery();
}
if (url.SchemeIs(kExtensionScheme)) {
remove_params.ClearPath();
remove_params.ClearQuery();
remove_params.ClearRef();
}
return url.ReplaceComponents(remove_params);
}
void AppendWhitelistedUrls(
const std::map<SourceId, std::unique_ptr<UkmSource>>& sources,
std::unordered_set<std::string>* urls) {
for (const auto& kv : sources) {
if (IsWhitelistedSourceId(kv.first)) {
urls->insert(kv.second->url().spec());
// Some non-navigation sources only record origin as a URL.
// Add the origin from the navigation source to match those too.
urls->insert(kv.second->url().GetOrigin().spec());
}
}
}
bool HasUnknownMetrics(const builders::DecodeMap& decode_map,
const mojom::UkmEntry& entry) {
const auto it = decode_map.find(entry.event_hash);
if (it == decode_map.end())
return true;
const auto& metric_map = it->second.metric_map;
for (const auto& metric : entry.metrics) {
if (metric_map.count(metric.first) == 0)
return true;
}
return false;
}
} // namespace
UkmRecorderImpl::UkmRecorderImpl()
: recording_enabled_(false),
sampling_seed_(static_cast<uint32_t>(base::RandUint64())) {}
UkmRecorderImpl::~UkmRecorderImpl() = default;
// static
void UkmRecorderImpl::CreateFallbackSamplingTrial(
bool is_stable_channel,
base::FeatureList* feature_list) {
static const char kSampledGroup_Stable[] = "Sampled_NoSeed_Stable";
static const char kSampledGroup_Other[] = "Sampled_NoSeed_Other";
const char* sampled_group = kSampledGroup_Other;
int default_sampling = 1; // Sampling is 1-in-N; this is N.
// Nothing is sampled out except for "stable" which omits almost everything
// in this configuration. This is done so that clients that fail to receive
// a configuration from the server do not bias aggregated results because
// of a relatively large number of records from them.
if (is_stable_channel) {
sampled_group = kSampledGroup_Stable;
default_sampling = 1000000;
}
scoped_refptr<base::FieldTrial> trial(
base::FieldTrialList::FactoryGetFieldTrial(
kUkmSamplingRateFeature.name, 100, sampled_group,
base::FieldTrial::ONE_TIME_RANDOMIZED, nullptr));
// Everybody (100%) should have a sampling configuration.
std::map<std::string, std::string> params = {
{"_default_sampling", base::NumberToString(default_sampling)}};
variations::AssociateVariationParams(trial->trial_name(), sampled_group,
params);
trial->AppendGroup(sampled_group, 100);
// Setup the feature.
feature_list->RegisterFieldTrialOverride(
kUkmSamplingRateFeature.name, base::FeatureList::OVERRIDE_ENABLE_FEATURE,
trial.get());
}
UkmRecorderImpl::EventAggregate::EventAggregate() = default;
UkmRecorderImpl::EventAggregate::~EventAggregate() = default;
UkmRecorderImpl::Recordings::Recordings() = default;
UkmRecorderImpl::Recordings& UkmRecorderImpl::Recordings::operator=(
Recordings&&) = default;
UkmRecorderImpl::Recordings::~Recordings() = default;
void UkmRecorderImpl::Recordings::Reset() {
*this = Recordings();
}
void UkmRecorderImpl::Recordings::SourceCounts::Reset() {
*this = SourceCounts();
}
void UkmRecorderImpl::EnableRecording(bool extensions) {
DVLOG(1) << "UkmRecorderImpl::EnableRecording, extensions=" << extensions;
recording_enabled_ = true;
extensions_enabled_ = extensions;
}
void UkmRecorderImpl::DisableRecording() {
DVLOG(1) << "UkmRecorderImpl::DisableRecording";
if (recording_enabled_)
recording_is_continuous_ = false;
recording_enabled_ = false;
extensions_enabled_ = false;
}
void UkmRecorderImpl::DisableSamplingForTesting() {
sampling_enabled_ = false;
}
bool UkmRecorderImpl::IsSamplingEnabled() const {
return sampling_enabled_ &&
base::FeatureList::IsEnabled(kUkmSamplingRateFeature);
}
void UkmRecorderImpl::Purge() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
recordings_.Reset();
recording_is_continuous_ = false;
}
void UkmRecorderImpl::PurgeExtensionRecordings() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Discard all sources that have an extension URL as well as all the entries
// related to any of these sources.
std::unordered_set<SourceId> extension_source_ids;
for (const auto& kv : recordings_.sources) {
if (kv.second->url().SchemeIs(kExtensionScheme)) {
extension_source_ids.insert(kv.first);
}
}
for (const auto source_id : extension_source_ids) {
recordings_.sources.erase(source_id);
}
std::vector<mojom::UkmEntryPtr>& events = recordings_.entries;
events.erase(
std::remove_if(events.begin(), events.end(),
[&](const auto& event) {
return extension_source_ids.count(event->source_id);
}),
events.end());
recording_is_continuous_ = false;
}
void UkmRecorderImpl::MarkSourceForDeletion(SourceId source_id) {
if (source_id == kInvalidSourceId)
return;
recordings_.obsolete_source_ids.insert(source_id);
}
void UkmRecorderImpl::SetIsWebstoreExtensionCallback(
const IsWebstoreExtensionCallback& callback) {
is_webstore_extension_callback_ = callback;
}
// TODO(rkaplow): This should be refactored.
void UkmRecorderImpl::StoreRecordingsInReport(Report* report) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
// Set of source ids seen by entries in recordings_.
std::set<SourceId> source_ids_seen;
for (const auto& entry : recordings_.entries) {
Entry* proto_entry = report->add_entries();
StoreEntryProto(*entry, proto_entry);
source_ids_seen.insert(entry->source_id);
}
// Number of sources excluded from this report because no entries referred to
// them.
const int num_sources_unsent =
recordings_.sources.size() - source_ids_seen.size();
// Construct set of whitelisted URLs by merging those carried over from the
// previous report cycle and those from sources recorded in this cycle.
std::unordered_set<std::string> url_whitelist;
recordings_.carryover_urls_whitelist.swap(url_whitelist);
AppendWhitelistedUrls(recordings_.sources, &url_whitelist);
// Number of sources discarded due to not matching a navigation URL.
int num_sources_unmatched = 0;
std::unordered_map<SourceIdType, int> serialized_source_type_counts;
for (const auto& kv : recordings_.sources) {
// Don't keep sources of these types after current report because their
// entries are logged only at source creation time.
if (GetSourceIdType(kv.first) == base::UkmSourceId::Type::APP_ID ||
GetSourceIdType(kv.first) == base::UkmSourceId::Type::HISTORY_ID ||
GetSourceIdType(kv.first) == base::UkmSourceId::Type::WEBAPK_ID) {
MarkSourceForDeletion(kv.first);
}
// If the source id is not whitelisted, don't send it unless it has
// associated entries and the URL matches that of a whitelisted source.
// Note: If ShouldRestrictToWhitelistedSourceIds() is true, this logic will
// not be hit as the source would have already been filtered in
// UpdateSourceURL().
if (!IsWhitelistedSourceId(kv.first)) {
// UkmSource should not keep initial_url for non-navigation source IDs.
DCHECK_EQ(1u, kv.second->urls().size());
if (!url_whitelist.count(kv.second->url().spec())) {
RecordDroppedSource(DroppedDataReason::NOT_MATCHED);
MarkSourceForDeletion(kv.first);
num_sources_unmatched++;
continue;
}
// Omit entryless sources from the report.
if (!base::Contains(source_ids_seen, kv.first)) {
continue;
} else {
// Source of base::UkmSourceId::Type::UKM type will not be kept after
// entries are logged.
MarkSourceForDeletion(kv.first);
}
}
Source* proto_source = report->add_sources();
kv.second->PopulateProto(proto_source);
serialized_source_type_counts[GetSourceIdType(kv.first)]++;
}
for (const auto& event_and_aggregate : recordings_.event_aggregations) {
const EventAggregate& event_aggregate = event_and_aggregate.second;
Aggregate* proto_aggregate = report->add_aggregates();
proto_aggregate->set_source_id(0); // Across all sources.
proto_aggregate->set_event_hash(event_and_aggregate.first);
proto_aggregate->set_total_count(event_aggregate.total_count);
proto_aggregate->set_dropped_due_to_limits(
event_aggregate.dropped_due_to_limits);
proto_aggregate->set_dropped_due_to_sampling(
event_aggregate.dropped_due_to_sampling);
proto_aggregate->set_dropped_due_to_whitelist(
event_aggregate.dropped_due_to_whitelist);
for (const auto& metric_and_aggregate : event_aggregate.metrics) {
const MetricAggregate& aggregate = metric_and_aggregate.second;
Aggregate::Metric* proto_metric = proto_aggregate->add_metrics();
proto_metric->set_metric_hash(metric_and_aggregate.first);
proto_metric->set_value_sum(aggregate.value_sum);
proto_metric->set_value_square_sum(aggregate.value_square_sum);
if (aggregate.total_count != event_aggregate.total_count) {
proto_metric->set_total_count(aggregate.total_count);
}
if (aggregate.dropped_due_to_limits !=
event_aggregate.dropped_due_to_limits) {
proto_metric->set_dropped_due_to_limits(
aggregate.dropped_due_to_limits);
}
if (aggregate.dropped_due_to_sampling !=
event_aggregate.dropped_due_to_sampling) {
proto_metric->set_dropped_due_to_sampling(
aggregate.dropped_due_to_sampling);
}
if (aggregate.dropped_due_to_whitelist !=
event_aggregate.dropped_due_to_whitelist) {
proto_metric->set_dropped_due_to_whitelist(
aggregate.dropped_due_to_whitelist);
}
}
}
int num_serialized_sources = 0;
for (const auto& entry : serialized_source_type_counts) {
num_serialized_sources += entry.second;
}
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2",
num_serialized_sources);
UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCount2",
recordings_.entries.size());
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnsentSourcesCount",
num_sources_unsent);
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.UnmatchedSourcesCount",
num_sources_unmatched);
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.SerializedCount2.Ukm",
serialized_source_type_counts[SourceIdType::UKM]);
UMA_HISTOGRAM_COUNTS_1000(
"UKM.Sources.SerializedCount2.Navigation",
serialized_source_type_counts[SourceIdType::NAVIGATION_ID]);
UMA_HISTOGRAM_COUNTS_1000(
"UKM.Sources.SerializedCount2.App",
serialized_source_type_counts[SourceIdType::APP_ID]);
// We record a UMA metric specifically for the number of serialized events
// with the FCP metric. This is for data quality verification.
const uint64_t pageload_hash =
base::HashMetricName(ukm::builders::PageLoad::kEntryName);
const uint64_t fcp_hash = base::HashMetricName(
ukm::builders::PageLoad::
kPaintTiming_NavigationToFirstContentfulPaintName);
int num_recorded_fcp = 0;
for (const auto& entry : recordings_.entries) {
if (entry->event_hash == pageload_hash) {
if (entry->metrics.find(fcp_hash) != entry->metrics.end()) {
num_recorded_fcp++;
}
}
}
UMA_HISTOGRAM_COUNTS_100000("UKM.Entries.SerializedCountFCP",
num_recorded_fcp);
// For each matching id in obsolete_source_ids, remove the Source from
// recordings_.sources. The remaining sources form the deferred sources for
// the next report.
for (const SourceId& source_id : recordings_.obsolete_source_ids) {
recordings_.sources.erase(source_id);
}
recordings_.obsolete_source_ids.clear();
// Populate SourceCounts field on the report then clear the recordings.
Report::SourceCounts* source_counts_proto = report->mutable_source_counts();
source_counts_proto->set_observed(recordings_.source_counts.observed);
source_counts_proto->set_navigation_sources(
recordings_.source_counts.navigation_sources);
source_counts_proto->set_unmatched_sources(num_sources_unmatched);
source_counts_proto->set_carryover_sources(
recordings_.source_counts.carryover_sources);
recordings_.source_counts.Reset();
recordings_.entries.clear();
recordings_.event_aggregations.clear();
report->set_is_continuous(recording_is_continuous_);
recording_is_continuous_ = true;
// Defer at most GetMaxKeptSources() sources to the next report,
// prioritizing most recently created ones.
int pruned_sources_age = PruneOldSources(GetMaxKeptSources());
// Record how old the newest truncated source is.
source_counts_proto->set_pruned_sources_age_seconds(pruned_sources_age);
// Set deferred sources count after pruning.
source_counts_proto->set_deferred_sources(recordings_.sources.size());
// Same value as the deferred source count, for setting the carryover count in
// the next reporting cycle.
recordings_.source_counts.carryover_sources = recordings_.sources.size();
// We already matched these deferred sources against the URL whitelist.
// Re-whitelist them for the next report.
for (const auto& kv : recordings_.sources) {
recordings_.carryover_urls_whitelist.insert(kv.second->url().spec());
}
UMA_HISTOGRAM_COUNTS_1000("UKM.Sources.KeptSourcesCount",
recordings_.sources.size());
// Record number of sources after pruning that were carried over due to not
// having any events in this reporting cycle.
int num_sources_entryless = 0;
for (const auto& kv : recordings_.sources) {
if (!base::Contains(source_ids_seen, kv.first)) {
num_sources_entryless++;
}
}
source_counts_proto->set_entryless_sources(num_sources_entryless);
}
bool UkmRecorderImpl::ShouldRestrictToWhitelistedSourceIds() const {
return base::GetFieldTrialParamByFeatureAsBool(
kUkmFeature, "RestrictToWhitelistedSourceIds", false);
}
bool UkmRecorderImpl::ShouldRestrictToWhitelistedEntries() const {
return true;
}
int UkmRecorderImpl::PruneOldSources(size_t max_kept_sources) {
if (recordings_.sources.size() <= max_kept_sources)
return 0;
std::vector<std::pair<base::TimeTicks, SourceId>> timestamp_source_id_pairs;
for (const auto& kv : recordings_.sources) {
timestamp_source_id_pairs.push_back(
std::make_pair(kv.second->creation_time(), kv.first));
}
// Partially sort so that the last |max_kept_sources| elements are the
// newest.
std::nth_element(timestamp_source_id_pairs.begin(),
timestamp_source_id_pairs.end() - max_kept_sources,
timestamp_source_id_pairs.end());
for (auto kv = timestamp_source_id_pairs.begin();
kv != timestamp_source_id_pairs.end() - max_kept_sources; ++kv) {
recordings_.sources.erase(kv->second);
}
base::TimeDelta pruned_sources_age =
base::TimeTicks::Now() -
(timestamp_source_id_pairs.end() - (max_kept_sources + 1))->first;
return pruned_sources_age.InSeconds();
}
void UkmRecorderImpl::UpdateSourceURL(SourceId source_id,
const GURL& unsanitized_url) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
if (base::Contains(recordings_.sources, source_id))
return;
const GURL sanitized_url = SanitizeURL(unsanitized_url);
if (!ShouldRecordUrl(source_id, sanitized_url))
return;
RecordSource(std::make_unique<UkmSource>(source_id, sanitized_url));
}
void UkmRecorderImpl::UpdateAppURL(SourceId source_id, const GURL& url) {
if (!extensions_enabled_) {
RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
return;
}
UpdateSourceURL(source_id, url);
}
void UkmRecorderImpl::RecordNavigation(
SourceId source_id,
const UkmSource::NavigationData& unsanitized_navigation_data) {
DCHECK(GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID);
DCHECK(!base::Contains(recordings_.sources, source_id));
// TODO(csharrison): Consider changing this behavior so the Source isn't even
// recorded at all if the final URL in |unsanitized_navigation_data| should
// not be recorded.
std::vector<GURL> urls;
for (const GURL& url : unsanitized_navigation_data.urls) {
const GURL sanitized_url = SanitizeURL(url);
if (ShouldRecordUrl(source_id, sanitized_url))
urls.push_back(std::move(sanitized_url));
}
// None of the URLs passed the ShouldRecordUrl check, so do not create a new
// Source for them.
if (urls.empty())
return;
UkmSource::NavigationData sanitized_navigation_data =
unsanitized_navigation_data.CopyWithSanitizedUrls(urls);
RecordSource(
std::make_unique<UkmSource>(source_id, sanitized_navigation_data));
}
bool UkmRecorderImpl::ShouldRecordUrl(SourceId source_id,
const GURL& sanitized_url) const {
if (!recording_enabled_) {
RecordDroppedSource(DroppedDataReason::RECORDING_DISABLED);
return false;
}
if (recordings_.sources.size() >= GetMaxSources()) {
RecordDroppedSource(DroppedDataReason::MAX_HIT);
return false;
}
if (ShouldRestrictToWhitelistedSourceIds() &&
!IsWhitelistedSourceId(source_id)) {
RecordDroppedSource(DroppedDataReason::NOT_WHITELISTED);
return false;
}
if (sanitized_url.is_empty()) {
RecordDroppedSource(DroppedDataReason::EMPTY_URL);
return false;
}
if (!HasSupportedScheme(sanitized_url)) {
RecordDroppedSource(DroppedDataReason::UNSUPPORTED_URL_SCHEME);
DVLOG(2) << "Dropped Unsupported UKM URL:" << source_id << ":"
<< sanitized_url.spec();
return false;
}
// Extension URLs need to be specifically enabled and the extension synced.
if (sanitized_url.SchemeIs(kExtensionScheme)) {
DCHECK_EQ(sanitized_url.GetWithEmptyPath(), sanitized_url);
if (!extensions_enabled_) {
RecordDroppedSource(DroppedDataReason::EXTENSION_URLS_DISABLED);
return false;
}
if (!is_webstore_extension_callback_ ||
!is_webstore_extension_callback_.Run(sanitized_url.host_piece())) {
RecordDroppedSource(DroppedDataReason::EXTENSION_NOT_SYNCED);
return false;
}
}
return true;
}
void UkmRecorderImpl::RecordSource(std::unique_ptr<UkmSource> source) {
SourceId source_id = source->id();
if (GetSourceIdType(source_id) == SourceIdType::NAVIGATION_ID)
recordings_.source_counts.navigation_sources++;
recordings_.source_counts.observed++;
recordings_.sources.emplace(source_id, std::move(source));
}
void UkmRecorderImpl::AddEntry(mojom::UkmEntryPtr entry) {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
DCHECK(!HasUnknownMetrics(decode_map_, *entry));
if (!recording_enabled_) {
RecordDroppedEntry(DroppedDataReason::RECORDING_DISABLED);
return;
}
EventAggregate& event_aggregate =
recordings_.event_aggregations[entry->event_hash];
event_aggregate.total_count++;
for (const auto& metric : entry->metrics) {
MetricAggregate& aggregate = event_aggregate.metrics[metric.first];
double value = metric.second;
aggregate.total_count++;
aggregate.value_sum += value;
aggregate.value_square_sum += value * value;
}
if (ShouldRestrictToWhitelistedEntries() &&
!base::Contains(whitelisted_entry_hashes_, entry->event_hash)) {
RecordDroppedEntry(DroppedDataReason::NOT_WHITELISTED);
event_aggregate.dropped_due_to_whitelist++;
for (auto& metric : entry->metrics)
event_aggregate.metrics[metric.first].dropped_due_to_whitelist++;
return;
}
if (IsSamplingEnabled()) {
if (default_sampling_rate_ < 0) {
LoadExperimentSamplingInfo();
}
auto found = event_sampling_rates_.find(entry->event_hash);
int sampling_rate = (found != event_sampling_rates_.end())
? found->second
: default_sampling_rate_;
bool sampled_in =
IsSampledIn(entry->source_id, entry->event_hash, sampling_rate);
if (!sampled_in) {
RecordDroppedEntry(DroppedDataReason::SAMPLED_OUT);
event_aggregate.dropped_due_to_sampling++;
for (auto& metric : entry->metrics)
event_aggregate.metrics[metric.first].dropped_due_to_sampling++;
return;
}
}
if (recordings_.entries.size() >= GetMaxEntries()) {
RecordDroppedEntry(DroppedDataReason::MAX_HIT);
event_aggregate.dropped_due_to_limits++;
for (auto& metric : entry->metrics)
event_aggregate.metrics[metric.first].dropped_due_to_limits++;
return;
}
recordings_.entries.push_back(std::move(entry));
}
void UkmRecorderImpl::LoadExperimentSamplingInfo() {
// This should be called only if a sampling rate hasn't been loaded.
DCHECK_LT(default_sampling_rate_, 0);
// Default rate must be >= 0 to indicate that load is complete.
default_sampling_rate_ = 1;
// If we don't have the feature, no parameters to load.
if (!base::FeatureList::IsEnabled(kUkmSamplingRateFeature)) {
return;
}
// Check the parameters for sampling controls.
std::map<std::string, std::string> params;
if (base::GetFieldTrialParamsByFeature(kUkmSamplingRateFeature, &params)) {
for (const auto& kv : params) {
const std::string& key = kv.first;
if (key.length() == 0)
continue;
// Keys starting with an underscore are global configuration.
if (key.at(0) == '_') {
if (key == "_default_sampling") {
int sampling;
// We only load non-negative global sampling rates.
if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
default_sampling_rate_ = sampling;
}
continue;
}
// Anything else is an event name.
int sampling;
if (base::StringToInt(kv.second, &sampling) && sampling >= 0)
event_sampling_rates_[base::HashMetricName(key)] = sampling;
}
}
}
bool UkmRecorderImpl::IsSampledIn(int64_t source_id,
uint64_t event_id,
int sampling_rate) {
// A sampling rate of 0 is "never"; everything else is 1-in-N but calculated
// deterministically based on a seed, the source-id, and the event-id. Skip
// the calculation, though, if N==1 because it will always be true. A negative
// rate means "unset"; treat it like "never".
if (sampling_rate <= 0)
return false;
if (sampling_rate == 1)
return true;
// Mutate the "sampling seed" number in a predictable manner based on the
// source and event IDs. This makes the result of this function be always
// the same for the same input parameters (since the seed is fixed during
// construction of this object) which is important for proper sampling
// behavior. CRC32 is fast and statistically random enough for these
// purposes.
uint32_t sampled_num = sampling_seed_;
sampled_num = base::Crc32(sampled_num, &source_id, sizeof(source_id));
sampled_num = base::Crc32(sampled_num, &event_id, sizeof(event_id));
return sampled_num % sampling_rate == 0;
}
void UkmRecorderImpl::StoreWhitelistedEntries() {
DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
const auto entries =
base::SplitString(GetWhitelistEntries(), ",", base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
for (const auto& entry_string : entries)
whitelisted_entry_hashes_.insert(base::HashMetricName(entry_string));
decode_map_ = builders::CreateDecodeMap();
}
} // namespace ukm