blob: bb90639a69cb5f9a9819f9d49d5e628c344a5ef4 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CONTENT_BROWSER_PRELOADING_PRELOADING_DATA_IMPL_H_
#define CONTENT_BROWSER_PRELOADING_PRELOADING_DATA_IMPL_H_
#include <memory>
#include <string_view>
#include <tuple>
#include <vector>
#include "content/browser/preloading/prefetch/prefetch_container.h"
#include "content/browser/preloading/preloading_confidence.h"
#include "content/browser/preloading/preloading_prediction.h"
#include "content/public/browser/preloading_data.h"
#include "content/public/browser/web_contents_observer.h"
#include "content/public/browser/web_contents_user_data.h"
#include "url/gurl.h"
namespace content {
class PreloadingAttemptImpl;
// Defines predictors confusion matrix enums used by UMA records. Entries should
// not be renumbered and numeric values should never be reused. Please update
// "PredictorConfusionMatrix" in `tools/metrics/histograms/enums.xml` when new
// enums are added.
enum class PredictorConfusionMatrix {
// True positive.
kTruePositive = 0,
// False positive.
kFalsePositive = 1,
// True negative.
kTrueNegative = 2,
// False negative.
kFalseNegative = 3,
// Required by UMA histogram macro.
kMaxValue = kFalseNegative
};
// The scope of current preloading logging is only limited to the same
// WebContents navigations. If the predicted URL is opened in a new tab we lose
// the data corresponding to the navigation in different WebContents.
// TODO(crbug.com/40227626): Expand PreloadingData scope to consider multiple
// WebContent navigations.
class CONTENT_EXPORT PreloadingDataImpl
: public PreloadingData,
public WebContentsUserData<PreloadingDataImpl>,
public WebContentsObserver {
public:
~PreloadingDataImpl() override;
static PreloadingDataImpl* GetOrCreateForWebContents(
WebContents* web_contents);
// NoVarySearch is a `/content/browser` feature so is the matcher getter.
// The matcher first checks if the navigated URL is the same as the
// prediction; if not, the matcher checks if the navigated URL matches
// any NoVarySearch query using `PrefetchService` if No-Vary-Search feature is
// enabled.
static PreloadingURLMatchCallback GetPrefetchServiceMatcher(
PrefetchService& prefetch_service,
const PrefetchContainer::Key& predicted);
// Disallow copy and assign.
PreloadingDataImpl(const PreloadingDataImpl& other) = delete;
PreloadingDataImpl& operator=(const PreloadingDataImpl& other) = delete;
// PreloadingDataImpl implementation:
PreloadingAttempt* AddPreloadingAttempt(
PreloadingPredictor predictor,
PreloadingType preloading_type,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) override;
void AddPreloadingPrediction(
PreloadingPredictor predictor,
int confidence,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id) override;
void SetIsNavigationInDomainCallback(
PreloadingPredictor predictor,
PredictorDomainCallback is_navigation_in_domain_callback) override;
void SetHasSpeculationRulesPrerender();
bool HasSpeculationRulesPrerender() override;
void OnPreloadingHeuristicsModelInput(
const GURL& url,
ModelPredictionTrainingData::OutcomeCallback on_record_outcome) override;
void AddPreloadingPrediction(const PreloadingPredictor& predictor,
PreloadingConfidence confidence,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id);
// A version of `AddPreloadingAttempt` which takes two PreloadingPredictors in
// the case where one predictor creates a preloading candidate which is
// enacted by another predictor (e.g. a non-immediate speculation rule creates
// a candidate which is enacted by a pointer down heuristic).
PreloadingAttemptImpl* AddPreloadingAttempt(
const PreloadingPredictor& creating_predictor,
const PreloadingPredictor& enacting_predictor,
PreloadingType preloading_type,
PreloadingURLMatchCallback url_match_predicate,
ukm::SourceId triggering_primary_page_source_id);
void CopyPredictorDomains(const PreloadingDataImpl& other,
const std::vector<PreloadingPredictor>& predictors);
// The output of many predictors is a score (usually probability or logit),
// where a higher score indicates a higher confidence in the prediction. To
// use this score for binary classification, we compare it to a threshold. If
// the score is above the threshold, we classify the instance as positive;
// otherwise, we classify it as negative. Threshold choice affects classifier
// precision and recall. There is a trade-off between precision and recall. If
// we set the threshold too low, we will have high precision but low recall.
// If we set the threshold too high, we will have high recall but low
// precision. To choose the best threshold, we can use ROC curves,
// precision-recall curves, or precision and recall curves.
// `ExperimentalPreloadingPrediction` helps us collect the UMA data required
// to achieve this. This method creates a new
// ExperimentalPreloadingPrediction. Same as above `url_match_predicate` is
// passed by the caller to verify that navigated URL is a match. The `score`
// is the probability/logit score, `min_score` and `max_score` are the
// minimum/maximum values that the `score` can have and `buckets` is the
// number of buckets that will be used for UMA aggregation and should be less
// than 101.
void AddExperimentalPreloadingPrediction(
std::string_view name,
PreloadingURLMatchCallback url_match_predicate,
float score,
float min_score,
float max_score,
size_t buckets);
// WebContentsObserver override.
void DidStartNavigation(NavigationHandle* navigation_handle) override;
void DidFinishNavigation(NavigationHandle* navigation_handle) override;
void WebContentsDestroyed() override;
// A commonly used `PredictorDomainCallback`.
static bool IsLinkClickNavigation(NavigationHandle* navigation_handle);
size_t GetPredictionsSizeForTesting() const;
void SetMaxPredictionsToTenForTesting();
private:
explicit PreloadingDataImpl(WebContents* web_contents);
friend class WebContentsUserData<PreloadingDataImpl>;
WEB_CONTENTS_USER_DATA_KEY_DECL();
void RecordMetricsForPreloadingAttempts(
ukm::SourceId navigated_page_source_id);
void RecordUKMForPreloadingPredictions(
ukm::SourceId navigated_page_source_id);
void SetIsAccurateTriggeringAndPrediction(const GURL& navigated_url);
void RecordPreloadingAttemptPrecisionToUMA(
const PreloadingAttemptImpl& attempt);
void RecordPredictionPrecisionToUMA(const PreloadingPrediction& prediction);
void UpdatePreloadingAttemptRecallStats(const PreloadingAttemptImpl& attempt);
void UpdatePredictionRecallStats(const PreloadingPrediction& prediction);
void ResetRecallStats();
void RecordRecallStatsToUMA(NavigationHandle* navigation_handle);
// Stores recall statistics for preloading predictions/attempts to later
// record them to UMA.
base::flat_map<PreloadingPredictor, PredictorDomainCallback>
is_navigation_in_predictor_domain_callbacks_;
base::flat_set<PreloadingPredictor> predictions_recall_stats_;
base::flat_set<std::pair<PreloadingPredictor, PreloadingType>>
preloading_attempt_recall_stats_;
// Stores all the experimental preloading predictions that are happening for
// the next navigation until the navigation takes place or the WebContents is
// destroyed.
std::vector<ExperimentalPreloadingPrediction> experimental_predictions_;
size_t total_seen_experimental_predictions_ = 0;
std::vector<ModelPredictionTrainingData> ml_predictions_;
size_t total_seen_ml_predictions_ = 0;
// Stores all the preloading attempts that are happening for the next
// navigation until the navigation takes place.
std::vector<std::unique_ptr<PreloadingAttemptImpl>> preloading_attempts_;
// Stores all the preloading predictions that are happening for the next
// navigation until the navigation takes place.
std::vector<PreloadingPrediction> preloading_predictions_;
size_t total_seen_preloading_predictions_ = 0;
// This flag will be true if there's been at least 1 attempt to do a
// speculation-rules based prerender.
bool has_speculation_rules_prerender_ = false;
// The random seed used to determine if a preloading attempt should be sampled
// in UKM logs. We use a different random seed for each session and then hash
// that seed with the UKM source ID so that all attempts for a given source ID
// are sampled in or out together.
uint32_t sampling_seed_;
// In production, a large number of predictions are allowed before we start
// sampling. For tests, we may set the limit to something small.
bool max_predictions_is_ten_for_testing_ = false;
};
} // namespace content
#endif // CONTENT_BROWSER_PRELOADING_PRELOADING_DATA_IMPL_H_