blob: 6b538f2f08e0df8d55ca4e961b6cf494f0967cfa [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "content/browser/preloading/preloading_data_impl.h"
#include <limits>
#include <string_view>
#include <utility>
#include <vector>
#include "base/hash/hash.h"
#include "base/memory/weak_ptr.h"
#include "base/metrics/histogram_functions.h"
#include "base/rand_util.h"
#include "base/strings/strcat.h"
#include "content/browser/preloading/prefetch/prefetch_container.h"
#include "content/browser/preloading/prefetch/prefetch_service.h"
#include "content/browser/preloading/preloading.h"
#include "content/browser/preloading/preloading_attempt_impl.h"
#include "content/browser/preloading/preloading_config.h"
#include "content/browser/preloading/preloading_prediction.h"
#include "content/public/browser/navigation_handle.h"
#include "content/public/browser/web_contents.h"
#if DCHECK_IS_ON()
#include "base/no_destructor.h"
#endif // DCHECK_IS_ON()
namespace content {
namespace {
// Called by `AddPreloadingAttempt` and `AddPreloadingPrediction`. Fails
// the callers if the predictor is redefined. This method can be racy due to the
// static variable.
static void CheckPreloadingPredictorValidity(PreloadingPredictor predictor) {
#if DCHECK_IS_ON()
// Use `std::string` because we can't guarantee std::string_view has a static
// lifetime.
static base::NoDestructor<std::vector<std::pair<int64_t, std::string>>>
seen_predictors;
std::pair<int64_t, std::string> new_predictor(predictor.ukm_value(),
predictor.name());
bool found = false;
for (const auto& seen : *seen_predictors) {
if ((seen.first == new_predictor.first) ^
(seen.second == new_predictor.second)) {
// We cannot have two `PreloadingPredictor`s that only differ in either
// the ukm int value or the string description - each new
// `PreloadingPredictor` needs to be unique in both.
DCHECK(false) << new_predictor.second << "/" << new_predictor.first
<< " vs " << seen.second << "/" << seen.first;
} else if (seen == new_predictor) {
found = true;
break;
}
}
if (!found) {
seen_predictors->push_back(new_predictor);
}
#endif // DCHECK_IS_ON()
}
size_t GetMaxPredictions(bool max_predictions_is_ten_for_testing) {
// The limit is somewhat arbitrary. It should be large enough that most pages
// won't reach it, but small enough to keep memory usage reasonable for those
// that do.
constexpr size_t kMaxPredictions = 10000;
return max_predictions_is_ten_for_testing ? 10 : kMaxPredictions;
}
std::optional<double> GetSamplingLikelihood(
bool max_predictions_is_ten_for_testing,
size_t total_seen_predictions) {
const size_t max_predictions =
GetMaxPredictions(max_predictions_is_ten_for_testing);
return (total_seen_predictions <= max_predictions)
? std::nullopt
: std::optional<double>{static_cast<double>(max_predictions) /
total_seen_predictions};
}
// We may produce a large number of predictions over the lifetime of a long
// lived page. After the number of predictions grows sufficiently large, we'll
// start randomly sampling and replacing existing predictions in order to limit
// memory usage.
// See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple:_Algorithm_R
// We don't report anything from the predictions that aren't sampled in here
// when the page navigates/unloads. When we record UKMs, we include the
// sampling factor which indicates how much downsampling happened here.
template <typename PredictionType>
void PredictionReservoirSample(std::vector<PredictionType>& predictions,
size_t& items_seen,
bool max_predictions_is_ten_for_testing,
PredictionType new_prediction) {
CHECK_LE(predictions.size(), items_seen);
const size_t max_predictions =
GetMaxPredictions(max_predictions_is_ten_for_testing);
if (items_seen < max_predictions) {
predictions.push_back(std::move(new_prediction));
++items_seen;
return;
}
size_t replace_idx = static_cast<size_t>(base::RandGenerator(items_seen + 1));
if (replace_idx < predictions.size()) {
predictions[replace_idx] = std::move(new_prediction);
}
++items_seen;
}
} // namespace
// static
PreloadingURLMatchCallback PreloadingData::GetSameURLMatcher(
const GURL& destination_url) {
// To save memory, only bind a hash of the URL for comparison.
return base::BindRepeating(
[](size_t predicted_url_hash, const GURL& navigated_url) {
return predicted_url_hash == base::FastHash(navigated_url.spec());
},
base::FastHash(destination_url.spec()));
}
// static
PreloadingURLMatchCallback PreloadingDataImpl::GetPrefetchServiceMatcher(
PrefetchService& prefetch_service,
const PrefetchContainer::Key& predicted) {
return base::BindRepeating(
[](base::WeakPtr<PrefetchService> prefetch_service,
const PrefetchContainer::Key& predicted, const GURL& navigated_url) {
if (!prefetch_service) {
return predicted.url() == navigated_url;
}
if (predicted.url() == navigated_url) {
return true;
}
base::WeakPtr<PrefetchContainer> prefetch_container =
prefetch_service->MatchUrl(predicted.WithNewUrl(navigated_url));
return prefetch_container && prefetch_container->key() == predicted;
},
prefetch_service.GetWeakPtr(), predicted);
}
// static
PreloadingData* PreloadingData::GetOrCreateForWebContents(
WebContents* web_contents) {
return PreloadingDataImpl::GetOrCreateForWebContents(web_contents);
}
// static
PreloadingData* PreloadingData::GetForWebContents(WebContents* web_contents) {
return PreloadingDataImpl::FromWebContents(web_contents);
}
// static
PreloadingDataImpl* PreloadingDataImpl::GetOrCreateForWebContents(
WebContents* web_contents) {
return WebContentsUserData<PreloadingDataImpl>::GetOrCreateForWebContents(
web_contents);
}
PreloadingAttempt* PreloadingDataImpl::AddPreloadingAttempt(
PreloadingPredictor predictor,
PreloadingType preloading_type,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) {
// The same `predictor` created and enacted the candidate associated with this
// attempt.
return AddPreloadingAttempt(predictor, predictor, preloading_type,
std::move(url_match_predicate),
triggering_primary_page_source_id);
}
PreloadingAttemptImpl* PreloadingDataImpl::AddPreloadingAttempt(
const PreloadingPredictor& creating_predictor,
const PreloadingPredictor& enacting_predictor,
PreloadingType preloading_type,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) {
auto attempt = std::make_unique<PreloadingAttemptImpl>(
creating_predictor, enacting_predictor, preloading_type,
triggering_primary_page_source_id, std::move(url_match_predicate),
sampling_seed_);
preloading_attempts_.push_back(std::move(attempt));
return preloading_attempts_.back().get();
}
void PreloadingDataImpl::AddPreloadingPrediction(
PreloadingPredictor predictor,
int confidence,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) {
AddPreloadingPrediction(predictor, PreloadingConfidence{confidence},
std::move(url_match_predicate),
triggering_primary_page_source_id);
}
void PreloadingDataImpl::AddPreloadingPrediction(
const PreloadingPredictor& predictor,
PreloadingConfidence confidence,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) {
// We want to log the metrics for user visible primary pages to measure the
// impact of PreloadingPredictions on the page user is viewing.
// TODO(crbug.com/40227283): Extend this for non-primary page and inner
// WebContents preloading predictions.
PredictionReservoirSample(
preloading_predictions_, total_seen_preloading_predictions_,
max_predictions_is_ten_for_testing_,
PreloadingPrediction{predictor, confidence,
triggering_primary_page_source_id,
std::move(url_match_predicate)});
}
void PreloadingDataImpl::AddExperimentalPreloadingPrediction(
std::string_view name,
PreloadingURLMatchCallback url_match_predicate,
float score,
float min_score,
float max_score,
size_t buckets) {
PredictionReservoirSample(
experimental_predictions_, total_seen_experimental_predictions_,
max_predictions_is_ten_for_testing_,
ExperimentalPreloadingPrediction{name, std::move(url_match_predicate),
score, min_score, max_score, buckets});
}
void PreloadingDataImpl::SetIsNavigationInDomainCallback(
PreloadingPredictor predictor,
PredictorDomainCallback is_navigation_in_domain_callback) {
is_navigation_in_predictor_domain_callbacks_.insert(
{predictor, std::move(is_navigation_in_domain_callback)});
}
void PreloadingDataImpl::CopyPredictorDomains(
const PreloadingDataImpl& other,
const std::vector<PreloadingPredictor>& predictors) {
for (const auto& predictor : predictors) {
if (const auto it =
other.is_navigation_in_predictor_domain_callbacks_.find(predictor);
it != other.is_navigation_in_predictor_domain_callbacks_.end()) {
SetIsNavigationInDomainCallback(predictor, it->second);
}
}
}
PreloadingDataImpl::PreloadingDataImpl(WebContents* web_contents)
: WebContentsUserData<PreloadingDataImpl>(*web_contents),
WebContentsObserver(web_contents),
sampling_seed_(static_cast<uint32_t>(base::RandUint64())) {}
PreloadingDataImpl::~PreloadingDataImpl() = default;
void PreloadingDataImpl::DidFinishNavigation(
NavigationHandle* navigation_handle) {
// Record UKMs for primary page navigations only. The reason we don't use
// WebContentsObserver::PrimaryPageChanged is because we want to get the
// navigation UkmSourceId which is different from
// RenderFrameHost::GetPageUkmSourceId for prerender activation.
// TODO(crbug.com/40215894): Switch to PrimaryPageChanged once we align
// RenderFrameHost::GetPageUkmSourceId with
// PageLoadTracker::GetPageUKMSourceId.
if (!navigation_handle->IsInPrimaryMainFrame() ||
navigation_handle->IsSameDocument() ||
!navigation_handle->HasCommitted()) {
return;
}
ukm::SourceId navigated_page_source_id =
navigation_handle->GetNextPageUkmSourceId();
// Log the metrics also on navigation when the user ends up navigating. Please
// note that we currently log the metrics on the primary page to analyze
// preloading impact on user-visible primary pages.
RecordMetricsForPreloadingAttempts(navigated_page_source_id);
RecordUKMForPreloadingPredictions(navigated_page_source_id);
// Delete the user data after logging.
web_contents()->RemoveUserData(UserDataKey());
}
void PreloadingDataImpl::DidStartNavigation(
NavigationHandle* navigation_handle) {
// Only observe for the navigation in the primary frame tree to log the
// metrics after which this class will be deleted.
if (!navigation_handle->IsInPrimaryMainFrame()) {
return;
}
// Ignore same-document navigations as preloading is not served for these
// cases.
if (navigation_handle->IsSameDocument()) {
return;
}
// Match the preloading based on the URL the frame is navigating to rather
// than the committed URL as they could be different because of redirects. We
// set accurate triggering and prediction bits in DidStartNavigation before
// PrimaryPageChanged is invoked where the metrics are logged to capture if
// the prediction/triggering was accurate. This doesn't imply that the user
// navigated to the predicted URL.
ResetRecallStats();
SetIsAccurateTriggeringAndPrediction(navigation_handle->GetURL());
RecordRecallStatsToUMA(navigation_handle);
}
void PreloadingDataImpl::WebContentsDestroyed() {
// Log the metrics also on WebContentsDestroyed event to avoid losing the data
// in case the user doesn't end up navigating. When the WebContents is
// destroyed before navigation, we pass ukm::kInvalidSourceId and empty URL to
// avoid the UKM associated to wrong page.
RecordMetricsForPreloadingAttempts(ukm::kInvalidSourceId);
RecordUKMForPreloadingPredictions(ukm::kInvalidSourceId);
for (const auto& experimental_prediction : experimental_predictions_) {
experimental_prediction.RecordToUMA();
}
experimental_predictions_.clear();
total_seen_experimental_predictions_ = 0;
const std::optional<double> sampling_likelihood = GetSamplingLikelihood(
max_predictions_is_ten_for_testing_, total_seen_ml_predictions_);
for (auto& ml_prediction : ml_predictions_) {
ml_prediction.Record(sampling_likelihood);
}
ml_predictions_.clear();
total_seen_ml_predictions_ = 0;
// Delete the user data after logging.
web_contents()->RemoveUserData(UserDataKey());
}
void PreloadingDataImpl::RecordPreloadingAttemptPrecisionToUMA(
const PreloadingAttemptImpl& attempt) {
bool is_true_positive = attempt.IsAccurateTriggering();
for (const auto& predictor : attempt.GetPredictors()) {
const auto uma_attempt_precision = base::StrCat(
{"Preloading.", PreloadingTypeToString(attempt.preloading_type()),
".Attempt.", predictor.name(), ".Precision"});
base::UmaHistogramEnumeration(
uma_attempt_precision, is_true_positive
? PredictorConfusionMatrix::kTruePositive
: PredictorConfusionMatrix::kFalsePositive);
}
}
// static
bool PreloadingDataImpl::IsLinkClickNavigation(
NavigationHandle* navigation_handle) {
auto page_transition = navigation_handle->GetPageTransition();
return ui::PageTransitionCoreTypeIs(
page_transition, ui::PageTransition::PAGE_TRANSITION_LINK) &&
(page_transition & ui::PAGE_TRANSITION_CLIENT_REDIRECT) == 0 &&
ui::PageTransitionIsNewNavigation(page_transition);
}
void PreloadingDataImpl::RecordPredictionPrecisionToUMA(
const PreloadingPrediction& prediction) {
bool is_true_positive = prediction.IsAccuratePrediction();
const auto uma_predictor_precision =
base::StrCat({"Preloading.Predictor.", prediction.predictor_type().name(),
".Precision"});
base::UmaHistogramEnumeration(uma_predictor_precision,
is_true_positive
? PredictorConfusionMatrix::kTruePositive
: PredictorConfusionMatrix::kFalsePositive);
}
void PreloadingDataImpl::UpdatePreloadingAttemptRecallStats(
const PreloadingAttemptImpl& attempt) {
bool is_true_positive = attempt.IsAccurateTriggering();
if (is_true_positive) {
for (const auto& predictor : attempt.GetPredictors()) {
preloading_attempt_recall_stats_.insert(
{predictor, attempt.preloading_type()});
}
}
}
void PreloadingDataImpl::UpdatePredictionRecallStats(
const PreloadingPrediction& prediction) {
bool is_true_positive = prediction.IsAccuratePrediction();
if (is_true_positive) {
predictions_recall_stats_.insert(prediction.predictor_type());
}
}
void PreloadingDataImpl::ResetRecallStats() {
predictions_recall_stats_.clear();
preloading_attempt_recall_stats_.clear();
}
void PreloadingDataImpl::RecordRecallStatsToUMA(
NavigationHandle* navigation_handle) {
constexpr PreloadingType kPreloadingTypes[] = {PreloadingType::kPreconnect,
PreloadingType::kPrefetch,
PreloadingType::kPrerender};
for (const auto& [predictor_type, is_navigation_in_domain_callback] :
is_navigation_in_predictor_domain_callbacks_) {
if (!is_navigation_in_domain_callback.Run(navigation_handle)) {
continue;
}
const auto uma_predictor_recall = base::StrCat(
{"Preloading.Predictor.", predictor_type.name(), ".Recall"});
base::UmaHistogramEnumeration(
uma_predictor_recall, predictions_recall_stats_.contains(predictor_type)
? PredictorConfusionMatrix::kTruePositive
: PredictorConfusionMatrix::kFalseNegative);
for (const auto& preloading_type : kPreloadingTypes) {
const auto uma_attemp_recall =
base::StrCat({"Preloading.", PreloadingTypeToString(preloading_type),
".Attempt.", predictor_type.name(), ".Recall"});
base::UmaHistogramEnumeration(
uma_attemp_recall, preloading_attempt_recall_stats_.contains(
{predictor_type, preloading_type})
? PredictorConfusionMatrix::kTruePositive
: PredictorConfusionMatrix::kFalseNegative);
}
}
// Clear registered predictor domain callbacks and get ready for the next
// navigation.
is_navigation_in_predictor_domain_callbacks_.clear();
}
void PreloadingDataImpl::SetIsAccurateTriggeringAndPrediction(
const GURL& navigated_url) {
for (auto& experimental_prediction : experimental_predictions_) {
experimental_prediction.SetIsAccuratePrediction(navigated_url);
experimental_prediction.RecordToUMA();
}
experimental_predictions_.clear();
total_seen_experimental_predictions_ = 0;
const std::optional<double> sampling_likelihood = GetSamplingLikelihood(
max_predictions_is_ten_for_testing_, total_seen_ml_predictions_);
for (auto& ml_prediction : ml_predictions_) {
ml_prediction.SetIsAccuratePrediction(navigated_url);
ml_prediction.Record(sampling_likelihood);
}
ml_predictions_.clear();
total_seen_ml_predictions_ = 0;
for (auto& attempt : preloading_attempts_) {
attempt->SetIsAccurateTriggering(navigated_url);
RecordPreloadingAttemptPrecisionToUMA(*attempt);
UpdatePreloadingAttemptRecallStats(*attempt);
}
for (auto& prediction : preloading_predictions_) {
prediction.SetIsAccuratePrediction(navigated_url);
RecordPredictionPrecisionToUMA(prediction);
UpdatePredictionRecallStats(prediction);
}
}
void PreloadingDataImpl::SetHasSpeculationRulesPrerender() {
has_speculation_rules_prerender_ = true;
}
bool PreloadingDataImpl::HasSpeculationRulesPrerender() {
return has_speculation_rules_prerender_;
}
void PreloadingDataImpl::OnPreloadingHeuristicsModelInput(
const GURL& url,
ModelPredictionTrainingData::OutcomeCallback on_record_outcome) {
PredictionReservoirSample(
ml_predictions_, total_seen_ml_predictions_,
max_predictions_is_ten_for_testing_,
ModelPredictionTrainingData{std::move(on_record_outcome),
GetSameURLMatcher(url)});
}
void PreloadingDataImpl::RecordMetricsForPreloadingAttempts(
ukm::SourceId navigated_page_source_id) {
for (auto& attempt : preloading_attempts_) {
// Check the validity at the time of UKMs reporting, as the UKMs are
// reported from the same thread (whichever thread calls
// `PreloadingDataImpl::WebContentsDestroyed` or
// `PreloadingDataImpl::DidFinishNavigation`).
for (const auto& predictor : attempt->GetPredictors()) {
CheckPreloadingPredictorValidity(predictor);
}
attempt->RecordPreloadingAttemptMetrics(navigated_page_source_id);
}
// Clear all records once we record the UKMs.
preloading_attempts_.clear();
}
void PreloadingDataImpl::RecordUKMForPreloadingPredictions(
ukm::SourceId navigated_page_source_id) {
const std::optional<double> sampling_likelihood = GetSamplingLikelihood(
max_predictions_is_ten_for_testing_, total_seen_preloading_predictions_);
for (auto& prediction : preloading_predictions_) {
// Check the validity at the time of UKMs reporting, as the UKMs are
// reported from the same thread (whichever thread calls
// `PreloadingDataImpl::WebContentsDestroyed` or
// `PreloadingDataImpl::DidFinishNavigation`).
CheckPreloadingPredictorValidity(prediction.predictor_type());
prediction.RecordPreloadingPredictionUKMs(navigated_page_source_id,
sampling_likelihood);
}
// Clear all records once we record the UKMs.
preloading_predictions_.clear();
total_seen_preloading_predictions_ = 0;
}
size_t PreloadingDataImpl::GetPredictionsSizeForTesting() const {
return preloading_predictions_.size();
}
void PreloadingDataImpl::SetMaxPredictionsToTenForTesting() {
max_predictions_is_ten_for_testing_ = true;
}
WEB_CONTENTS_USER_DATA_KEY_IMPL(PreloadingDataImpl);
} // namespace content