blob: d71963823965065dc09be2b768fae74be0830d4a [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/variations/limited_layer_entropy_cost_tracker.h"
#include <math.h>
#include <cstdint>
#include <limits>
#include "base/check_op.h"
#include "base/debug/dump_without_crashing.h"
#include "base/memory/ptr_util.h"
#include "base/metrics/histogram_functions.h"
#include "base/numerics/checked_math.h"
#include "base/numerics/safe_conversions.h"
#include "components/variations/variations_layers.h"
#include "components/variations/variations_seed_processor.h"
namespace variations {
namespace {
// Converts a probability value (represented by numerator/denominator) to an
// entropy value. Callers should ensure that both arguments are strictly
// positive and that `numerator` <= `denominator`. This always returns a
// non-negative number.
double ConvertToBitsOfEntropy(uint64_t numerator, uint64_t denominator) {
CHECK_GT(numerator, 0u);
CHECK_LE(numerator, denominator);
return -log2(base::strict_cast<double>(numerator) /
base::strict_cast<double>(denominator));
}
// Returns the number of bits of entropy used by a single study.
double GetEntropyUsedByStudy(const Study& study) {
if (study.consistency() == Study::SESSION) {
// Session-consistent studies do not consume entropy. They are randomized
// for each Chrome browser process' lifetime; they use neither the low
// entropy source nor the limited entropy randomization source.
return 0.0;
}
// Use uint32_t to match the type of `probability_weight` field in the
// experiment proto.
uint32_t min_weight = std::numeric_limits<uint32_t>::max();
uint64_t total_weight = 0;
// The entropy limit applies specifically to the experiments that specify a
// Google web experiment ID (or Google web trigger experiment ID).
bool has_google_web_experiment = false;
for (const auto& experiment : study.experiment()) {
// This will CHECK if `total_weight` (a uint64_t) overflows, which is nearly
// impossible since each `experiment.probability_weight()` is a uint32_t.
// This is not expected to come up for valid variations seeds in production.
total_weight = base::CheckAdd(total_weight, experiment.probability_weight())
.ValueOrDie();
// Skip experiments with zero probability. They will not cause entropy
// usage since they will never be assigned. Also, checking for non-zero
// probability ensures that `has_google_web_experiment`
// implies that `total_weight` > 0.
if (experiment.probability_weight() > 0u &&
VariationsSeedProcessor::HasGoogleWebExperimentId(experiment)) {
has_google_web_experiment = true;
min_weight = std::min(min_weight, experiment.probability_weight());
}
}
if (!has_google_web_experiment) {
return 0.0;
}
// By now, `has_google_web_experiment` being true implies 0 < `min_weight` <=
// `total_weight`, which is required by ConvertToBitsOfEntropy().
//
// Mathematically, this returns -log2(`min_weight` / `total_weight`).
// If the probability of a client being assigned to a specific group in the
// study is p, the entropy revealed by this assignment is -log2(p):
// https://en.wikipedia.org/wiki/Entropy_(information_theory). Hence, the
// entropy is maximal for clients assigned to the smallest group in the study.
return ConvertToBitsOfEntropy(min_weight, total_weight);
}
// Computes the entropy used by the limited layer member.
double GetLayerMemberEntropy(const Layer::LayerMember& member,
uint64_t num_slots) {
uint32_t num_slots_in_member = 0;
for (const Layer::LayerMember::SlotRange& range : member.slots()) {
// Adding one since the range is inclusive.
num_slots_in_member += range.end() - range.start() + 1;
}
return ConvertToBitsOfEntropy(num_slots_in_member, num_slots);
}
} // namespace
LimitedLayerEntropyCostTracker::LimitedLayerEntropyCostTracker(
const Layer& layer,
double entropy_limit_in_bits)
: entropy_limit_in_bits_(entropy_limit_in_bits),
limited_layer_id_(layer.id()) {
// The caller should have already validated the layer. However, as the layer
// data comes from an external source, we verify it here again for safety,
// instead of using a CHECK. Note that verify each condition individually in
// order to dump a unique stack trace for each failure condition.
if (limited_layer_id_ == 0u) {
Invalidate();
return;
}
if (entropy_limit_in_bits_ <= 0.0) {
Invalidate();
return;
}
const auto num_slots = layer.num_slots();
if (num_slots <= 0u) {
Invalidate();
return;
}
const auto& layer_members = layer.members();
if (layer_members.empty()) {
Invalidate();
return;
}
if (layer.entropy_mode() != Layer::LIMITED) {
Invalidate();
return;
}
if (!VariationsLayers::AreSlotBoundsValid(layer)) {
Invalidate();
return;
}
// Compute the entropy used by each layer member keyed by its memberID.
entropy_used_by_member_id_.reserve(layer_members.size());
for (const auto& member : layer_members) {
if (member.id() == 0u) {
Invalidate();
return;
}
// All layer members are included in the entropy calculation, including
// empty ones – ones not referenced by any study. A client assigned to an
// empty layer member would have the visible assignment state of "no study
// assigned", which itself reveals information and should be accounted for
// in the entropy calculation.
const bool inserted =
entropy_used_by_member_id_
.emplace(member.id(), GetLayerMemberEntropy(member, num_slots))
.second;
if (!inserted) {
// => Duplicated layer member ID.
Invalidate();
return;
}
}
}
LimitedLayerEntropyCostTracker::~LimitedLayerEntropyCostTracker() = default;
bool LimitedLayerEntropyCostTracker::AddEntropyUsedByStudy(const Study& study) {
if (!IsValid()) {
return false;
}
// The caller should have already validated the study's layer references.
// However, as the study data comes from an external source, we verify it
// here again for safety, instead of using a CHECK. Note that verify each
// condition individually in order to dump a unique stack trace for each
// failure condition.
if (!study.has_layer()) {
Invalidate();
return false;
}
const auto& layer_ref = study.layer();
if (layer_ref.layer_id() != limited_layer_id_) {
Invalidate();
return false;
}
const auto& layer_member_ids =
layer_ref.layer_member_ids().empty()
? VariationsLayers::FallbackLayerMemberIds(layer_ref)
: layer_ref.layer_member_ids();
if (layer_member_ids.empty()) {
Invalidate();
return false;
}
// Returns false if the entropy used by a layer member is already above the
// entropy limit, meaning no more study can be assigned to the limited layer.
if (entropy_limit_exceeded_) {
return false;
}
// Returns true if the study does not consume entropy at all (e.g. a study
// with no Google web experiment ID or Google web trigger experiment ID).
double study_entropy = GetEntropyUsedByStudy(study);
if (study_entropy <= 0) {
return true;
}
// Update the entropy in the members referenced by the study. It is assumed
// that layer member references have already been validated by the caller.
for (const uint32_t member_id : layer_member_ids) {
if (member_id == 0u) {
Invalidate();
return false;
}
const auto it = entropy_used_by_member_id_.find(member_id);
if (it == entropy_used_by_member_id_.end()) {
Invalidate();
return false;
}
auto& entropy_used = it->second;
entropy_used += study_entropy;
includes_study_entropy_ = true;
// TODO(siakabaro): The entropy used by a layer member could be over the
// entropy limit if the layer member covers a very small percentage of the
// population. In such a case, we need to need to pool the empty layer
// members together and check if their combined entropy is not over the
// limit.
if (entropy_used > entropy_limit_in_bits_) {
entropy_limit_exceeded_ = true;
}
}
// Returns false if the entropy limit is reached.
return !entropy_limit_exceeded_;
}
double LimitedLayerEntropyCostTracker::GetMaxEntropyUsedForTesting() const {
if (!includes_study_entropy_) {
return 0.0;
}
double max_entropy_used = 0.0;
for (const auto& [member_id, entropy_used] : entropy_used_by_member_id_) {
max_entropy_used = std::max(max_entropy_used, entropy_used);
}
return max_entropy_used;
}
void LimitedLayerEntropyCostTracker::Invalidate() {
// The caller should have already validated the layer and study info before
// any and all calls to the tracker. However, as the layer and study data
// comes from an external source, there are additional safety checks made
// throughout the tracker. We use these instead of CHECKS or DCHECKS and
// verify each condition individually in order to dump a unique stack trace
// for each failure condition.
is_valid_ = false;
base::debug::DumpWithoutCrashing();
}
} // namespace variations