blob: 291da2802f344764352315cd8fe89605b26acdd6 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/weak_ptr.h"
#include "base/scoped_observer.h"
#include "base/task/cancelable_task_tracker.h"
#include "base/time/time.h"
#include "chrome/browser/predictors/resource_prefetch_common.h"
#include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
#include "chrome/browser/predictors/resource_prefetcher.h"
#include "components/history/core/browser/history_db_task.h"
#include "components/history/core/browser/history_service_observer.h"
#include "components/history/core/browser/history_types.h"
#include "components/keyed_service/core/keyed_service.h"
#include "content/public/common/resource_type.h"
#include "url/gurl.h"
class PredictorsHandler;
class Profile;
namespace net {
class URLRequest;
namespace predictors {
namespace internal {
constexpr char kResourcePrefetchPredictorPrecisionHistogram[] =
constexpr char kResourcePrefetchPredictorRecallHistogram[] =
constexpr char kResourcePrefetchPredictorCountHistogram[] =
constexpr char kResourcePrefetchPredictorPrefetchingDurationHistogram[] =
constexpr char kResourcePrefetchPredictorPrefetchMissesCountCached[] =
constexpr char kResourcePrefetchPredictorPrefetchMissesCountNotCached[] =
constexpr char kResourcePrefetchPredictorPrefetchHitsCountCached[] =
constexpr char kResourcePrefetchPredictorPrefetchHitsCountNotCached[] =
constexpr char kResourcePrefetchPredictorPrefetchHitsSize[] =
constexpr char kResourcePrefetchPredictorPrefetchMissesSize[] =
constexpr char kResourcePrefetchPredictorRedirectStatusHistogram[] =
} // namespace internal
class TestObserver;
class ResourcePrefetcherManager;
// Contains logic for learning what can be prefetched and for kicking off
// speculative prefetching.
// - The class is a profile keyed service owned by the profile.
// - All the non-static methods of this class need to be called on the UI
// thread.
// The overall flow of the resource prefetching algorithm is as follows:
// * ResourcePrefetchPredictorObserver - Listens for URL requests, responses and
// redirects (client-side redirects are not supported) on the IO thread (via
// ResourceDispatcherHostDelegate) and posts tasks to the
// ResourcePrefetchPredictor on the UI thread. This is owned by the
// ProfileIOData for the profile.
// * ResourcePrefetchPredictorTables - Persists ResourcePrefetchPredictor data
// to a sql database. Runs entirely on the DB thread. Owned by the
// PredictorDatabase.
// * ResourcePrefetchPredictor - Learns about resource requirements per URL in
// the UI thread through the ResourcePrefetchPredictorObserver and persists
// it to disk in the DB thread through the ResourcePrefetchPredictorTables. It
// initiates resource prefetching using the ResourcePrefetcherManager. Owned
// by profile.
// * ResourcePrefetcherManager - Manages the ResourcePrefetchers that do the
// prefetching on the IO thread. The manager is owned by the
// ResourcePrefetchPredictor and interfaces between the predictor on the UI
// thread and the prefetchers on the IO thread.
// * ResourcePrefetcher - Lives entirely on the IO thread, owned by the
// ResourcePrefetcherManager, and issues net::URLRequest to fetch resources.
// TODO(zhenw): Currently only main frame requests/redirects/responses are
// recorded. Consider recording sub-frame responses independently or together
// with main frame.
class ResourcePrefetchPredictor
: public KeyedService,
public history::HistoryServiceObserver,
public base::SupportsWeakPtr<ResourcePrefetchPredictor> {
// Stores the data that we need to get from the URLRequest.
struct URLRequestSummary {
URLRequestSummary(const URLRequestSummary& other);
NavigationID navigation_id;
GURL resource_url;
content::ResourceType resource_type;
net::RequestPriority priority;
// Only for responses.
std::string mime_type;
bool was_cached;
GURL redirect_url; // Empty unless request was redirected to a valid url.
bool has_validators;
bool always_revalidate;
// Initializes a |URLRequestSummary| from a |URLRequest| response.
// Returns true for success. Note: NavigationID is NOT initialized
// by this function.
static bool SummarizeResponse(const net::URLRequest& request,
URLRequestSummary* summary);
// Stores the data learned from a single navigation.
struct PageRequestSummary {
explicit PageRequestSummary(const GURL& main_frame_url);
PageRequestSummary(const PageRequestSummary& other);
GURL main_frame_url;
GURL initial_url;
// Stores all subresource requests within a single navigation, from initial
// main frame request to navigation completion.
std::vector<URLRequestSummary> subresource_requests;
// Stores a result of prediction. Essentially, |subresource_urls| is main
// result and other fields are used for diagnosis and histograms reporting.
struct Prediction {
Prediction(const Prediction& other);
bool is_host;
bool is_redirected;
std::string main_frame_key;
std::vector<GURL> subresource_urls;
// Used for reporting redirect prediction success/failure in histograms.
// NOTE: This enumeration is used in histograms, so please do not add entries
// in the middle.
enum class RedirectStatus {
ResourcePrefetchPredictor(const ResourcePrefetchPredictorConfig& config,
Profile* profile);
~ResourcePrefetchPredictor() override;
// Starts initialization by posting a task to the DB thread to read the
// predictor database.
void StartInitialization();
// Thread safe.
static bool ShouldRecordRequest(net::URLRequest* request,
content::ResourceType resource_type);
static bool ShouldRecordResponse(net::URLRequest* response);
static bool ShouldRecordRedirect(net::URLRequest* response);
// Determines the resource type from the declared one, falling back to MIME
// type detection when it is not explicit.
static content::ResourceType GetResourceType(
content::ResourceType resource_type,
const std::string& mime_type);
// Determines the ResourceType from the mime type, defaulting to the
// |fallback| if the ResourceType could not be determined.
static content::ResourceType GetResourceTypeFromMimeType(
const std::string& mime_type,
content::ResourceType fallback);
// 'ResourcePrefetchPredictorObserver' calls the below functions to inform the
// predictor of main frame and resource requests. Should only be called if the
// corresponding Should* functions return true.
void RecordURLRequest(const URLRequestSummary& request);
void RecordURLResponse(const URLRequestSummary& response);
void RecordURLRedirect(const URLRequestSummary& response);
// Called when the main frame of a page completes loading.
void RecordMainFrameLoadComplete(const NavigationID& navigation_id);
// Starts prefetching if it is enabled for |origin| and prefetching data
// exists for the |main_frame_url| either at the URL or at the host level.
void StartPrefetching(const GURL& main_frame_url, PrefetchOrigin origin);
// Stops prefetching that may be in progress corresponding to
// |main_frame_url|.
void StopPrefetching(const GURL& main_frame_url);
// Called when ResourcePrefetcher is finished, i.e. there is nothing pending
// in flight.
void OnPrefetchingFinished(
const GURL& main_frame_url,
std::unique_ptr<ResourcePrefetcher::PrefetcherStats> stats);
// Returns true if prefetching data exists for the |main_frame_url|.
virtual bool IsUrlPrefetchable(const GURL& main_frame_url);
// Sets the |observer| to be notified when the resource prefetch predictor
// data changes. Previously registered observer will be discarded. Call
// this with nullptr parameter to de-register observer.
void SetObserverForTesting(TestObserver* observer);
friend class ::PredictorsHandler;
friend class ResourcePrefetchPredictorTest;
friend class ResourcePrefetchPredictorBrowserTest;
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, DeleteUrls);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlInDB);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, NavigationUrlNotInDB);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, RedirectUrlNotInDB);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, RedirectUrlInDB);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRequest);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, OnMainFrameRedirect);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, GetCorrectPLT);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, HandledResourceTypes);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, GetRedirectEndpoint);
FRIEND_TEST_ALL_PREFIXES(ResourcePrefetchPredictorTest, GetPrefetchData);
enum InitializationState {
typedef ResourcePrefetchPredictorTables::PrefetchDataMap PrefetchDataMap;
typedef ResourcePrefetchPredictorTables::RedirectDataMap RedirectDataMap;
typedef ResourcePrefetchPredictorTables::ManifestDataMap ManifestDataMap;
typedef std::map<NavigationID, std::unique_ptr<PageRequestSummary>>
// Returns true if the main page request is supported for prediction.
static bool IsHandledMainPage(net::URLRequest* request);
// Returns true if the subresource request is supported for prediction.
static bool IsHandledSubresource(net::URLRequest* request,
content::ResourceType resource_type);
// Returns true if the subresource has a supported type.
static bool IsHandledResourceType(content::ResourceType resource_type,
const std::string& mime_type);
// Returns true if the request (should have a response in it) is "no-store".
static bool IsNoStore(const net::URLRequest* request);
// Returns true iff |redirect_data_map| contains confident redirect endpoint
// for |entry_point| and assigns it to the |redirect_endpoint|.
static bool GetRedirectEndpoint(const std::string& entry_point,
const RedirectDataMap& redirect_data_map,
std::string* redirect_endpoint);
// KeyedService methods override.
void Shutdown() override;
// Functions called on different network events pertaining to the loading of
// main frame resource or sub resources.
void OnMainFrameRequest(const URLRequestSummary& request);
void OnMainFrameResponse(const URLRequestSummary& response);
void OnMainFrameRedirect(const URLRequestSummary& response);
void OnSubresourceResponse(const URLRequestSummary& response);
// Called when onload completes for a navigation. We treat this point as the
// "completion" of the navigation. The resources requested by the page up to
// this point are the only ones considered for prefetching.
void OnNavigationComplete(const NavigationID& nav_id_without_timing_info);
// Returns true iff there is PrefetchData that can be used for a
// |main_frame_url| and fills |prediction| with resources that need to be
// prefetched. |prediction| pointer may be equal nullptr to get return value
// only.
bool GetPrefetchData(const GURL& main_frame_url,
Prediction* prediction) const;
// Returns true iff the |data_map| contains PrefetchData that can be used
// for a |main_frame_key| and fills |urls| with resources that need to be
// prefetched. |urls| pointer may be equal nullptr to get return value only.
bool PopulatePrefetcherRequest(const std::string& main_frame_key,
const PrefetchDataMap& data_map,
std::vector<GURL>* urls) const;
// Callback for task to read predictor database. Takes ownership of
// all arguments.
void CreateCaches(std::unique_ptr<PrefetchDataMap> url_data_map,
std::unique_ptr<PrefetchDataMap> host_data_map,
std::unique_ptr<RedirectDataMap> url_redirect_data_map,
std::unique_ptr<RedirectDataMap> host_redirect_data_map,
std::unique_ptr<ManifestDataMap> manifest_data_map);
// Called during initialization when history is read and the predictor
// database has been read.
void OnHistoryAndCacheLoaded();
// Cleanup inflight_navigations_, inflight_prefetches_, and prefetcher_stats_.
void CleanupAbandonedNavigations(const NavigationID& navigation_id);
// Deletes all URLs from the predictor database, the caches and removes all
// inflight navigations.
void DeleteAllUrls();
// Deletes data for the input |urls| and their corresponding hosts from the
// predictor database and caches.
void DeleteUrls(const history::URLRows& urls);
// Callback for GetUrlVisitCountTask.
void OnVisitCountLookup(size_t url_visit_count,
const PageRequestSummary& summary);
// Removes the oldest entry in the input |data_map|, also deleting it from the
// predictor database.
void RemoveOldestEntryInPrefetchDataMap(PrefetchKeyType key_type,
PrefetchDataMap* data_map);
void RemoveOldestEntryInRedirectDataMap(PrefetchKeyType key_type,
RedirectDataMap* data_map);
// Merges resources in |new_resources| into the |data_map| and correspondingly
// updates the predictor database. Also calls LearnRedirect if relevant.
void LearnNavigation(const std::string& key,
PrefetchKeyType key_type,
const std::vector<URLRequestSummary>& new_resources,
size_t max_data_map_size,
PrefetchDataMap* data_map,
const std::string& key_before_redirects,
RedirectDataMap* redirect_map);
// Updates information about final redirect destination for |key| in
// |redirect_map| and correspondingly updates the predictor database.
void LearnRedirect(const std::string& key,
PrefetchKeyType key_type,
const std::string& final_redirect,
size_t max_redirect_map_size,
RedirectDataMap* redirect_map);
// Returns true iff |resource| has sufficient confidence level and required
// number of hits.
bool IsResourcePrefetchable(const ResourceData& resource) const;
// Reports database readiness metric defined as percentage of navigated hosts
// found in DB for last X entries in history.
void ReportDatabaseReadiness(const history::TopHostsList& top_hosts) const;
// history::HistoryServiceObserver:
void OnURLsDeleted(history::HistoryService* history_service,
bool all_history,
bool expired,
const history::URLRows& deleted_rows,
const std::set<GURL>& favicon_urls) override;
void OnHistoryServiceLoaded(
history::HistoryService* history_service) override;
// Used to connect to HistoryService or register for service loaded
// notificatioan.
void ConnectToHistoryService();
// Used for testing to inject mock tables.
void set_mock_tables(scoped_refptr<ResourcePrefetchPredictorTables> tables) {
tables_ = tables;
Profile* const profile_;
TestObserver* observer_;
ResourcePrefetchPredictorConfig const config_;
InitializationState initialization_state_;
scoped_refptr<ResourcePrefetchPredictorTables> tables_;
scoped_refptr<ResourcePrefetcherManager> prefetch_manager_;
base::CancelableTaskTracker history_lookup_consumer_;
// Copy of the data in the predictor tables.
std::unique_ptr<PrefetchDataMap> url_table_cache_;
std::unique_ptr<PrefetchDataMap> host_table_cache_;
std::unique_ptr<RedirectDataMap> url_redirect_table_cache_;
std::unique_ptr<RedirectDataMap> host_redirect_table_cache_;
std::unique_ptr<ManifestDataMap> manifest_table_cache_;
std::map<GURL, base::TimeTicks> inflight_prefetches_;
NavigationMap inflight_navigations_;
std::map<GURL, std::unique_ptr<ResourcePrefetcher::PrefetcherStats>>
ScopedObserver<history::HistoryService, history::HistoryServiceObserver>
// An interface used to notify that data in the ResourcePrefetchPredictor
// has changed. All methods are invoked on the UI thread.
class TestObserver {
// De-registers itself from |predictor_| on destruction.
virtual ~TestObserver();
virtual void OnPredictorInitialized() {}
virtual void OnNavigationLearned(
size_t url_visit_count,
const ResourcePrefetchPredictor::PageRequestSummary& summary) {}
virtual void OnPrefetchingStarted(const GURL& main_frame_url) {}
virtual void OnPrefetchingStopped(const GURL& main_frame_url) {}
virtual void OnPrefetchingFinished(const GURL& main_frame_url) {}
// |predictor| must be non-NULL and has to outlive the TestObserver.
// Also the predictor must not have a TestObserver set.
explicit TestObserver(ResourcePrefetchPredictor* predictor);
ResourcePrefetchPredictor* predictor_;
} // namespace predictors