| // Copyright 2014 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chrome/browser/predictors/resource_prefetch_predictor.h" |
| |
| #include <map> |
| #include <set> |
| #include <utility> |
| |
| #include "base/macros.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/rand_util.h" |
| #include "base/time/time.h" |
| #include "base/trace_event/trace_event.h" |
| #include "chrome/browser/history/history_service_factory.h" |
| #include "chrome/browser/predictors/loading_data_collector.h" |
| #include "chrome/browser/predictors/predictor_database.h" |
| #include "chrome/browser/predictors/predictor_database_factory.h" |
| #include "chrome/browser/profiles/profile.h" |
| #include "components/history/core/browser/history_database.h" |
| #include "components/history/core/browser/history_service.h" |
| #include "components/history/core/browser/url_utils.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/resource_request_info.h" |
| #include "content/public/browser/web_contents.h" |
| |
| using content::BrowserThread; |
| |
| namespace predictors { |
| |
| namespace { |
| |
| const size_t kNumSampleHosts = 50; |
| const size_t kReportReadinessThreshold = 50; |
| const float kMinOriginConfidenceToTriggerPreconnect = 0.75f; |
| const float kMinOriginConfidenceToTriggerPreresolve = 0.2f; |
| |
| // For reporting events of interest that are not tied to any navigation. |
| enum ReportingEvent { |
| REPORTING_EVENT_ALL_HISTORY_CLEARED = 0, |
| REPORTING_EVENT_PARTIAL_HISTORY_CLEARED = 1, |
| REPORTING_EVENT_COUNT = 2 |
| }; |
| |
| float ComputeRedirectConfidence(const predictors::RedirectStat& redirect) { |
| return (redirect.number_of_hits() + 0.0) / |
| (redirect.number_of_hits() + redirect.number_of_misses()); |
| } |
| |
| void InitializeOriginStatFromOriginRequestSummary( |
| OriginStat* origin, |
| const OriginRequestSummary& summary) { |
| origin->set_origin(summary.origin.spec()); |
| origin->set_number_of_hits(1); |
| origin->set_average_position(summary.first_occurrence + 1); |
| origin->set_always_access_network(summary.always_access_network); |
| origin->set_accessed_network(summary.accessed_network); |
| } |
| |
| // Used to fetch the visit count for a URL from the History database. |
| class GetUrlVisitCountTask : public history::HistoryDBTask { |
| public: |
| ~GetUrlVisitCountTask() override; |
| typedef base::OnceCallback<void(size_t, // URL visit count. |
| const PageRequestSummary&)> |
| VisitInfoCallback; |
| |
| GetUrlVisitCountTask(std::unique_ptr<PageRequestSummary> summary, |
| VisitInfoCallback callback); |
| |
| bool RunOnDBThread(history::HistoryBackend* backend, |
| history::HistoryDatabase* db) override; |
| |
| void DoneRunOnMainThread() override; |
| |
| private: |
| int visit_count_; |
| std::unique_ptr<PageRequestSummary> summary_; |
| VisitInfoCallback callback_; |
| |
| DISALLOW_COPY_AND_ASSIGN(GetUrlVisitCountTask); |
| }; |
| |
| GetUrlVisitCountTask::GetUrlVisitCountTask( |
| std::unique_ptr<PageRequestSummary> summary, |
| VisitInfoCallback callback) |
| : visit_count_(0), |
| summary_(std::move(summary)), |
| callback_(std::move(callback)) { |
| DCHECK(summary_.get()); |
| } |
| |
| bool GetUrlVisitCountTask::RunOnDBThread(history::HistoryBackend* backend, |
| history::HistoryDatabase* db) { |
| history::URLRow url_row; |
| if (db->GetRowForURL(summary_->main_frame_url, &url_row)) |
| visit_count_ = url_row.visit_count(); |
| return true; |
| } |
| |
| void GetUrlVisitCountTask::DoneRunOnMainThread() { |
| std::move(callback_).Run(visit_count_, *summary_); |
| } |
| |
| GetUrlVisitCountTask::~GetUrlVisitCountTask() {} |
| |
| void InitializeOnDBSequence( |
| ResourcePrefetchPredictor::PrefetchDataMap* url_resource_data, |
| ResourcePrefetchPredictor::PrefetchDataMap* host_resource_data, |
| ResourcePrefetchPredictor::RedirectDataMap* url_redirect_data, |
| ResourcePrefetchPredictor::RedirectDataMap* host_redirect_data, |
| ResourcePrefetchPredictor::OriginDataMap* origin_data) { |
| url_resource_data->InitializeOnDBSequence(); |
| host_resource_data->InitializeOnDBSequence(); |
| url_redirect_data->InitializeOnDBSequence(); |
| host_redirect_data->InitializeOnDBSequence(); |
| origin_data->InitializeOnDBSequence(); |
| } |
| |
| } // namespace |
| |
| PreconnectRequest::PreconnectRequest(const GURL& origin, int num_sockets) |
| : origin(origin), num_sockets(num_sockets) { |
| DCHECK_GE(num_sockets, 0); |
| } |
| |
| PreconnectPrediction::PreconnectPrediction() = default; |
| PreconnectPrediction::PreconnectPrediction( |
| const PreconnectPrediction& prediction) = default; |
| PreconnectPrediction::~PreconnectPrediction() = default; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // ResourcePrefetchPredictor static functions. |
| |
| bool ResourcePrefetchPredictor::GetRedirectEndpoint( |
| const std::string& entry_point, |
| const RedirectDataMap& redirect_data, |
| std::string* redirect_endpoint) const { |
| DCHECK(redirect_endpoint); |
| |
| RedirectData data; |
| bool exists = redirect_data.TryGetData(entry_point, &data); |
| if (!exists) { |
| // Fallback to fetching URLs based on the incoming URL/host. By default |
| // the predictor is confident that there is no redirect. |
| *redirect_endpoint = entry_point; |
| return true; |
| } |
| |
| DCHECK_GT(data.redirect_endpoints_size(), 0); |
| if (data.redirect_endpoints_size() > 1) { |
| // The predictor observed multiple redirect destinations recently. Redirect |
| // endpoint is ambiguous. The predictor predicts a redirect only if it |
| // believes that the redirect is "permanent", i.e. subsequent navigations |
| // will lead to the same destination. |
| return false; |
| } |
| |
| // The threshold is higher than the threshold for resources because the |
| // redirect misprediction causes the waste of whole prefetch. |
| const float kMinRedirectConfidenceToTriggerPrefetch = 0.9f; |
| const int kMinRedirectHitsToTriggerPrefetch = 2; |
| |
| // The predictor doesn't apply a minimum-number-of-hits threshold to |
| // the no-redirect case because the no-redirect is a default assumption. |
| const RedirectStat& redirect = data.redirect_endpoints(0); |
| if (ComputeRedirectConfidence(redirect) < |
| kMinRedirectConfidenceToTriggerPrefetch || |
| (redirect.number_of_hits() < kMinRedirectHitsToTriggerPrefetch && |
| redirect.url() != entry_point)) { |
| return false; |
| } |
| |
| *redirect_endpoint = redirect.url(); |
| return true; |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // ResourcePrefetchPredictor nested types. |
| |
| ResourcePrefetchPredictor::Prediction::Prediction() = default; |
| |
| ResourcePrefetchPredictor::Prediction::Prediction( |
| const ResourcePrefetchPredictor::Prediction& other) = default; |
| |
| ResourcePrefetchPredictor::Prediction::~Prediction() = default; |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // ResourcePrefetchPredictor. |
| |
| ResourcePrefetchPredictor::ResourcePrefetchPredictor( |
| const LoadingPredictorConfig& config, |
| Profile* profile) |
| : profile_(profile), |
| observer_(nullptr), |
| config_(config), |
| initialization_state_(NOT_INITIALIZED), |
| tables_(PredictorDatabaseFactory::GetForProfile(profile) |
| ->resource_prefetch_tables()), |
| history_service_observer_(this), |
| weak_factory_(this) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| // Some form of learning has to be enabled. |
| DCHECK(config_.IsLearningEnabled()); |
| } |
| |
| ResourcePrefetchPredictor::~ResourcePrefetchPredictor() {} |
| |
| void ResourcePrefetchPredictor::StartInitialization() { |
| TRACE_EVENT0("browser", "ResourcePrefetchPredictor::StartInitialization"); |
| |
| if (initialization_state_ != NOT_INITIALIZED) |
| return; |
| initialization_state_ = INITIALIZING; |
| |
| // Create local caches using the database as loaded. |
| auto url_resource_data = std::make_unique<PrefetchDataMap>( |
| tables_, tables_->url_resource_table(), config_.max_urls_to_track); |
| auto host_resource_data = std::make_unique<PrefetchDataMap>( |
| tables_, tables_->host_resource_table(), config_.max_hosts_to_track); |
| auto url_redirect_data = std::make_unique<RedirectDataMap>( |
| tables_, tables_->url_redirect_table(), config_.max_urls_to_track); |
| auto host_redirect_data = std::make_unique<RedirectDataMap>( |
| tables_, tables_->host_redirect_table(), config_.max_hosts_to_track); |
| auto origin_data = std::make_unique<OriginDataMap>( |
| tables_, tables_->origin_table(), config_.max_hosts_to_track); |
| |
| // Get raw pointers to pass to the first task. Ownership of the unique_ptrs |
| // will be passed to the reply task. |
| auto task = base::BindOnce(InitializeOnDBSequence, url_resource_data.get(), |
| host_resource_data.get(), url_redirect_data.get(), |
| host_redirect_data.get(), origin_data.get()); |
| auto reply = base::BindOnce( |
| &ResourcePrefetchPredictor::CreateCaches, weak_factory_.GetWeakPtr(), |
| std::move(url_resource_data), std::move(host_resource_data), |
| std::move(url_redirect_data), std::move(host_redirect_data), |
| std::move(origin_data)); |
| |
| tables_->GetTaskRunner()->PostTaskAndReply(FROM_HERE, std::move(task), |
| std::move(reply)); |
| } |
| |
| bool ResourcePrefetchPredictor::IsUrlPrefetchable( |
| const GURL& main_frame_url) const { |
| return GetPrefetchData(main_frame_url, nullptr); |
| } |
| |
| bool ResourcePrefetchPredictor::IsUrlPreconnectable( |
| const GURL& main_frame_url) const { |
| return PredictPreconnectOrigins(main_frame_url, nullptr); |
| } |
| |
| bool ResourcePrefetchPredictor::IsResourcePrefetchable( |
| const ResourceData& resource) const { |
| float confidence = static_cast<float>(resource.number_of_hits()) / |
| (resource.number_of_hits() + resource.number_of_misses()); |
| return confidence >= config_.min_resource_confidence_to_trigger_prefetch && |
| resource.number_of_hits() >= |
| config_.min_resource_hits_to_trigger_prefetch; |
| } |
| |
| void ResourcePrefetchPredictor::SetObserverForTesting(TestObserver* observer) { |
| observer_ = observer; |
| } |
| |
| void ResourcePrefetchPredictor::Shutdown() { |
| history_service_observer_.RemoveAll(); |
| } |
| |
| void ResourcePrefetchPredictor::RecordPageRequestSummary( |
| std::unique_ptr<PageRequestSummary> summary) { |
| // Make sure initialization is done or start initialization if necessary. |
| if (initialization_state_ == NOT_INITIALIZED) { |
| StartInitialization(); |
| return; |
| } else if (initialization_state_ == INITIALIZING) { |
| return; |
| } else if (initialization_state_ != INITIALIZED) { |
| NOTREACHED() << "Unexpected initialization_state_: " |
| << initialization_state_; |
| return; |
| } |
| |
| history::HistoryService* history_service = nullptr; |
| if (config_.is_url_learning_enabled) { |
| // Kick off history lookup to determine if we should record the URL. |
| history_service = HistoryServiceFactory::GetForProfile( |
| profile_, ServiceAccessType::EXPLICIT_ACCESS); |
| DCHECK(history_service); |
| history_service->ScheduleDBTask( |
| std::make_unique<GetUrlVisitCountTask>( |
| std::move(summary), |
| base::BindOnce(&ResourcePrefetchPredictor::OnVisitCountLookup, |
| weak_factory_.GetWeakPtr())), |
| &history_lookup_consumer_); |
| } else { |
| // We won't record the URL data anyway so avoid the hop to the history |
| // sequence and back. |
| OnVisitCountLookup(0, *summary); |
| } |
| |
| // Report readiness metric with 20% probability and only if the host learning |
| // is enabled. |
| if (config_.is_host_learning_enabled && base::RandInt(1, 5) == 5) { |
| if (!history_service) { |
| history_service = HistoryServiceFactory::GetForProfile( |
| profile_, ServiceAccessType::EXPLICIT_ACCESS); |
| } |
| DCHECK(history_service); |
| history_service->TopHosts( |
| kNumSampleHosts, |
| base::Bind(&ResourcePrefetchPredictor::ReportDatabaseReadiness, |
| weak_factory_.GetWeakPtr())); |
| } |
| } |
| |
| bool ResourcePrefetchPredictor::GetPrefetchData( |
| const GURL& main_frame_url, |
| ResourcePrefetchPredictor::Prediction* prediction) const { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (initialization_state_ != INITIALIZED) |
| return false; |
| |
| std::vector<GURL>* urls = |
| prediction ? &prediction->subresource_urls : nullptr; |
| DCHECK(!urls || urls->empty()); |
| |
| // Fetch resources using URL-keyed data first. |
| std::string redirect_endpoint; |
| const std::string& main_frame_url_spec = main_frame_url.spec(); |
| if (config_.is_url_learning_enabled && |
| GetRedirectEndpoint(main_frame_url_spec, *url_redirect_data_, |
| &redirect_endpoint) && |
| PopulatePrefetcherRequest(redirect_endpoint, *url_resource_data_, urls)) { |
| if (prediction) { |
| prediction->is_host = false; |
| prediction->main_frame_key = redirect_endpoint; |
| prediction->is_redirected = (redirect_endpoint != main_frame_url_spec); |
| } |
| return true; |
| } |
| |
| // Use host data if the URL-based prediction isn't available. |
| std::string main_frame_url_host = main_frame_url.host(); |
| if (config_.is_host_learning_enabled && |
| GetRedirectEndpoint(main_frame_url_host, *host_redirect_data_, |
| &redirect_endpoint) && |
| PopulatePrefetcherRequest(redirect_endpoint, *host_resource_data_, |
| urls)) { |
| if (prediction) { |
| prediction->is_host = true; |
| prediction->main_frame_key = redirect_endpoint; |
| prediction->is_redirected = (redirect_endpoint != main_frame_url_host); |
| } |
| return true; |
| } |
| |
| return false; |
| } |
| |
| bool ResourcePrefetchPredictor::PredictPreconnectOrigins( |
| const GURL& url, |
| PreconnectPrediction* prediction) const { |
| DCHECK(!prediction || prediction->requests.empty()); |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (initialization_state_ != INITIALIZED) |
| return false; |
| |
| std::string host = url.host(); |
| std::string redirect_endpoint; |
| if (!GetRedirectEndpoint(host, *host_redirect_data_, &redirect_endpoint)) |
| return false; |
| |
| OriginData data; |
| if (!origin_data_->TryGetData(redirect_endpoint, &data)) |
| return false; |
| |
| if (prediction) { |
| prediction->host = redirect_endpoint; |
| prediction->is_redirected = (host != redirect_endpoint); |
| } |
| |
| bool has_any_prediction = false; |
| for (const OriginStat& origin : data.origins()) { |
| float confidence = static_cast<float>(origin.number_of_hits()) / |
| (origin.number_of_hits() + origin.number_of_misses()); |
| if (confidence < kMinOriginConfidenceToTriggerPreresolve) |
| continue; |
| |
| has_any_prediction = true; |
| if (prediction) { |
| if (confidence > kMinOriginConfidenceToTriggerPreconnect) |
| prediction->requests.emplace_back(GURL(origin.origin()), 1); |
| else |
| prediction->requests.emplace_back(GURL(origin.origin()), 0); |
| } |
| } |
| |
| return has_any_prediction; |
| } |
| |
| bool ResourcePrefetchPredictor::PopulatePrefetcherRequest( |
| const std::string& main_frame_key, |
| const PrefetchDataMap& resource_data, |
| std::vector<GURL>* urls) const { |
| PrefetchData data; |
| bool exists = resource_data.TryGetData(main_frame_key, &data); |
| if (!exists) |
| return false; |
| |
| bool has_prefetchable_resource = false; |
| for (const ResourceData& resource : data.resources()) { |
| if (IsResourcePrefetchable(resource)) { |
| has_prefetchable_resource = true; |
| if (urls) |
| urls->push_back(GURL(resource.resource_url())); |
| } |
| } |
| |
| return has_prefetchable_resource; |
| } |
| |
| void ResourcePrefetchPredictor::CreateCaches( |
| std::unique_ptr<PrefetchDataMap> url_resource_data, |
| std::unique_ptr<PrefetchDataMap> host_resource_data, |
| std::unique_ptr<RedirectDataMap> url_redirect_data, |
| std::unique_ptr<RedirectDataMap> host_redirect_data, |
| std::unique_ptr<OriginDataMap> origin_data) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK_EQ(INITIALIZING, initialization_state_); |
| |
| DCHECK(url_resource_data); |
| DCHECK(host_resource_data); |
| DCHECK(url_redirect_data); |
| DCHECK(host_redirect_data); |
| DCHECK(origin_data); |
| |
| url_resource_data_ = std::move(url_resource_data); |
| host_resource_data_ = std::move(host_resource_data); |
| url_redirect_data_ = std::move(url_redirect_data); |
| host_redirect_data_ = std::move(host_redirect_data); |
| origin_data_ = std::move(origin_data); |
| |
| ConnectToHistoryService(); |
| } |
| |
| void ResourcePrefetchPredictor::OnHistoryAndCacheLoaded() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK_EQ(INITIALIZING, initialization_state_); |
| |
| initialization_state_ = INITIALIZED; |
| if (observer_) |
| observer_->OnPredictorInitialized(); |
| } |
| |
| void ResourcePrefetchPredictor::DeleteAllUrls() { |
| url_resource_data_->DeleteAllData(); |
| host_resource_data_->DeleteAllData(); |
| url_redirect_data_->DeleteAllData(); |
| host_redirect_data_->DeleteAllData(); |
| origin_data_->DeleteAllData(); |
| } |
| |
| void ResourcePrefetchPredictor::DeleteUrls(const history::URLRows& urls) { |
| std::vector<std::string> urls_to_delete; |
| std::vector<std::string> hosts_to_delete; |
| |
| // Transform GURLs to keys for given database. |
| for (const auto& it : urls) { |
| urls_to_delete.emplace_back(it.url().spec()); |
| hosts_to_delete.emplace_back(it.url().host()); |
| } |
| |
| url_resource_data_->DeleteData(urls_to_delete); |
| host_resource_data_->DeleteData(hosts_to_delete); |
| url_redirect_data_->DeleteData(urls_to_delete); |
| host_redirect_data_->DeleteData(hosts_to_delete); |
| origin_data_->DeleteData(hosts_to_delete); |
| } |
| |
| void ResourcePrefetchPredictor::OnVisitCountLookup( |
| size_t url_visit_count, |
| const PageRequestSummary& summary) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HistoryVisitCountForUrl", |
| url_visit_count); |
| |
| if (config_.is_url_learning_enabled) { |
| // URL level data - merge only if we already saved the data, or it |
| // meets the cutoff requirement. |
| const std::string& url_spec = summary.main_frame_url.spec(); |
| bool already_tracking = url_resource_data_->TryGetData(url_spec, nullptr); |
| bool should_track_url = |
| already_tracking || (url_visit_count >= config_.min_url_visit_count); |
| |
| if (should_track_url) { |
| LearnNavigation(url_spec, summary.subresource_requests, |
| url_resource_data_.get()); |
| LearnRedirect(summary.initial_url.spec(), url_spec, |
| url_redirect_data_.get()); |
| } |
| } |
| |
| const std::string host = summary.main_frame_url.host(); |
| LearnRedirect(summary.initial_url.host(), host, host_redirect_data_.get()); |
| |
| if (config_.is_host_learning_enabled) { |
| // Host level data - no cutoff, always learn the navigation if enabled. |
| LearnNavigation(host, summary.subresource_requests, |
| host_resource_data_.get()); |
| } |
| |
| if (config_.is_origin_learning_enabled) |
| LearnOrigins(host, summary.main_frame_url.GetOrigin(), summary.origins); |
| |
| if (observer_) |
| observer_->OnNavigationLearned(url_visit_count, summary); |
| } |
| |
| void ResourcePrefetchPredictor::LearnNavigation( |
| const std::string& key, |
| const std::vector<URLRequestSummary>& new_resources, |
| PrefetchDataMap* resource_data) { |
| TRACE_EVENT1("browser", "ResourcePrefetchPredictor::LearnNavigation", "key", |
| key); |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| |
| // If the primary key is too long reject it. |
| if (key.length() > ResourcePrefetchPredictorTables::kMaxStringLength) |
| return; |
| |
| PrefetchData data; |
| bool exists = resource_data->TryGetData(key, &data); |
| if (!exists) { |
| data.set_primary_key(key); |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| size_t new_resources_size = new_resources.size(); |
| std::set<GURL> resources_seen; |
| for (size_t i = 0; i < new_resources_size; ++i) { |
| const URLRequestSummary& summary = new_resources[i]; |
| if (resources_seen.find(summary.resource_url) != resources_seen.end()) |
| continue; |
| |
| ResourceData* resource_to_add = data.add_resources(); |
| resource_to_add->set_resource_url(summary.resource_url.spec()); |
| resource_to_add->set_resource_type( |
| static_cast<ResourceData::ResourceType>(summary.resource_type)); |
| resource_to_add->set_number_of_hits(1); |
| resource_to_add->set_average_position(i + 1); |
| resource_to_add->set_priority( |
| static_cast<ResourceData::Priority>(summary.priority)); |
| resource_to_add->set_before_first_contentful_paint( |
| summary.before_first_contentful_paint); |
| resource_to_add->set_has_validators(summary.has_validators); |
| resource_to_add->set_always_revalidate(summary.always_revalidate); |
| |
| resources_seen.insert(summary.resource_url); |
| } |
| } else { |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| |
| // Build indices over the data. |
| std::map<GURL, int> new_index, old_index; |
| int new_resources_size = static_cast<int>(new_resources.size()); |
| for (int i = 0; i < new_resources_size; ++i) { |
| const URLRequestSummary& summary = new_resources[i]; |
| // Take the first occurence of every url. |
| if (new_index.find(summary.resource_url) == new_index.end()) |
| new_index[summary.resource_url] = i; |
| } |
| int old_resources_size = static_cast<int>(data.resources_size()); |
| for (int i = 0; i < old_resources_size; ++i) { |
| bool is_new = |
| old_index |
| .insert(std::make_pair(GURL(data.resources(i).resource_url()), i)) |
| .second; |
| DCHECK(is_new); |
| } |
| |
| // Go through the old urls and update their hit/miss counts. |
| for (int i = 0; i < old_resources_size; ++i) { |
| ResourceData* old_resource = data.mutable_resources(i); |
| GURL resource_url(old_resource->resource_url()); |
| if (new_index.find(resource_url) == new_index.end()) { |
| old_resource->set_number_of_misses(old_resource->number_of_misses() + |
| 1); |
| old_resource->set_consecutive_misses( |
| old_resource->consecutive_misses() + 1); |
| } else { |
| const URLRequestSummary& new_summary = |
| new_resources[new_index[resource_url]]; |
| |
| // Update the resource type since it could have changed. |
| if (new_summary.resource_type != content::RESOURCE_TYPE_LAST_TYPE) { |
| old_resource->set_resource_type( |
| static_cast<ResourceData::ResourceType>( |
| new_summary.resource_type)); |
| } |
| |
| old_resource->set_priority( |
| static_cast<ResourceData::Priority>(new_summary.priority)); |
| old_resource->set_before_first_contentful_paint( |
| new_summary.before_first_contentful_paint); |
| |
| int position = new_index[resource_url] + 1; |
| int total = |
| old_resource->number_of_hits() + old_resource->number_of_misses(); |
| old_resource->set_average_position( |
| ((old_resource->average_position() * total) + position) / |
| (total + 1)); |
| old_resource->set_number_of_hits(old_resource->number_of_hits() + 1); |
| old_resource->set_consecutive_misses(0); |
| } |
| } |
| |
| // Add the new ones that we have not seen before. |
| for (int i = 0; i < new_resources_size; ++i) { |
| const URLRequestSummary& summary = new_resources[i]; |
| if (old_index.find(summary.resource_url) != old_index.end()) |
| continue; |
| |
| // Only need to add new stuff. |
| ResourceData* resource_to_add = data.add_resources(); |
| resource_to_add->set_resource_url(summary.resource_url.spec()); |
| resource_to_add->set_resource_type( |
| static_cast<ResourceData::ResourceType>(summary.resource_type)); |
| resource_to_add->set_number_of_hits(1); |
| resource_to_add->set_average_position(i + 1); |
| resource_to_add->set_priority( |
| static_cast<ResourceData::Priority>(summary.priority)); |
| resource_to_add->set_before_first_contentful_paint( |
| summary.before_first_contentful_paint); |
| resource_to_add->set_has_validators(new_resources[i].has_validators); |
| resource_to_add->set_always_revalidate( |
| new_resources[i].always_revalidate); |
| |
| // To ensure we dont add the same url twice. |
| old_index[summary.resource_url] = 0; |
| } |
| } |
| |
| // Trim and sort the resources after the update. |
| ResourcePrefetchPredictorTables::TrimResources( |
| &data, config_.max_consecutive_misses); |
| ResourcePrefetchPredictorTables::SortResources(&data); |
| if (data.resources_size() > |
| static_cast<int>(config_.max_resources_per_entry)) { |
| data.mutable_resources()->DeleteSubrange( |
| config_.max_resources_per_entry, |
| data.resources_size() - config_.max_resources_per_entry); |
| } |
| |
| if (data.resources_size() == 0) |
| resource_data->DeleteData({key}); |
| else |
| resource_data->UpdateData(key, data); |
| } |
| |
| void ResourcePrefetchPredictor::LearnRedirect(const std::string& key, |
| const std::string& final_redirect, |
| RedirectDataMap* redirect_data) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| // If the primary key is too long reject it. |
| if (key.length() > ResourcePrefetchPredictorTables::kMaxStringLength) |
| return; |
| |
| RedirectData data; |
| bool exists = redirect_data->TryGetData(key, &data); |
| if (!exists) { |
| data.set_primary_key(key); |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| RedirectStat* redirect_to_add = data.add_redirect_endpoints(); |
| redirect_to_add->set_url(final_redirect); |
| redirect_to_add->set_number_of_hits(1); |
| } else { |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| |
| bool need_to_add = true; |
| for (RedirectStat& redirect : *(data.mutable_redirect_endpoints())) { |
| if (redirect.url() == final_redirect) { |
| need_to_add = false; |
| redirect.set_number_of_hits(redirect.number_of_hits() + 1); |
| redirect.set_consecutive_misses(0); |
| } else { |
| redirect.set_number_of_misses(redirect.number_of_misses() + 1); |
| redirect.set_consecutive_misses(redirect.consecutive_misses() + 1); |
| } |
| } |
| |
| if (need_to_add) { |
| RedirectStat* redirect_to_add = data.add_redirect_endpoints(); |
| redirect_to_add->set_url(final_redirect); |
| redirect_to_add->set_number_of_hits(1); |
| } |
| } |
| |
| // Trim the redirects after the update. |
| ResourcePrefetchPredictorTables::TrimRedirects( |
| &data, config_.max_redirect_consecutive_misses); |
| |
| if (data.redirect_endpoints_size() == 0) |
| redirect_data->DeleteData({key}); |
| else |
| redirect_data->UpdateData(key, data); |
| } |
| |
| void ResourcePrefetchPredictor::LearnOrigins( |
| const std::string& host, |
| const GURL& main_frame_origin, |
| const std::map<GURL, OriginRequestSummary>& summaries) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (host.size() > ResourcePrefetchPredictorTables::kMaxStringLength) |
| return; |
| |
| OriginData data; |
| bool exists = origin_data_->TryGetData(host, &data); |
| if (!exists) { |
| data.set_host(host); |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| size_t origins_size = summaries.size(); |
| auto ordered_origins = |
| std::vector<const OriginRequestSummary*>(origins_size); |
| for (const auto& kv : summaries) { |
| size_t index = kv.second.first_occurrence; |
| DCHECK_LT(index, origins_size); |
| ordered_origins[index] = &kv.second; |
| } |
| |
| for (const OriginRequestSummary* summary : ordered_origins) { |
| auto* origin_to_add = data.add_origins(); |
| InitializeOriginStatFromOriginRequestSummary(origin_to_add, *summary); |
| } |
| } else { |
| data.set_last_visit_time(base::Time::Now().ToInternalValue()); |
| |
| std::map<GURL, int> old_index; |
| int old_size = static_cast<int>(data.origins_size()); |
| for (int i = 0; i < old_size; ++i) { |
| bool is_new = |
| old_index.insert({GURL(data.origins(i).origin()), i}).second; |
| DCHECK(is_new); |
| } |
| |
| // Update the old origins. |
| for (int i = 0; i < old_size; ++i) { |
| auto* old_origin = data.mutable_origins(i); |
| GURL origin(old_origin->origin()); |
| auto it = summaries.find(origin); |
| if (it == summaries.end()) { |
| // miss |
| old_origin->set_number_of_misses(old_origin->number_of_misses() + 1); |
| old_origin->set_consecutive_misses(old_origin->consecutive_misses() + |
| 1); |
| } else { |
| // hit: update. |
| const auto& new_origin = it->second; |
| old_origin->set_always_access_network(new_origin.always_access_network); |
| old_origin->set_accessed_network(new_origin.accessed_network); |
| |
| int position = new_origin.first_occurrence + 1; |
| int total = |
| old_origin->number_of_hits() + old_origin->number_of_misses(); |
| old_origin->set_average_position( |
| ((old_origin->average_position() * total) + position) / |
| (total + 1)); |
| old_origin->set_number_of_hits(old_origin->number_of_hits() + 1); |
| old_origin->set_consecutive_misses(0); |
| } |
| } |
| |
| // Add new origins. |
| for (const auto& kv : summaries) { |
| if (old_index.find(kv.first) != old_index.end()) |
| continue; |
| |
| auto* origin_to_add = data.add_origins(); |
| InitializeOriginStatFromOriginRequestSummary(origin_to_add, kv.second); |
| } |
| } |
| |
| // Trim and Sort. |
| ResourcePrefetchPredictorTables::TrimOrigins(&data, |
| config_.max_consecutive_misses); |
| ResourcePrefetchPredictorTables::SortOrigins(&data, main_frame_origin.spec()); |
| if (data.origins_size() > static_cast<int>(config_.max_origins_per_entry)) { |
| data.mutable_origins()->DeleteSubrange( |
| config_.max_origins_per_entry, |
| data.origins_size() - config_.max_origins_per_entry); |
| } |
| |
| // Update the database. |
| if (data.origins_size() == 0) |
| origin_data_->DeleteData({host}); |
| else |
| origin_data_->UpdateData(host, data); |
| } |
| |
| void ResourcePrefetchPredictor::ReportDatabaseReadiness( |
| const history::TopHostsList& top_hosts) const { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| if (top_hosts.size() == 0) |
| return; |
| |
| size_t count_in_cache = 0; |
| size_t total_visits = 0; |
| for (const std::pair<std::string, int>& top_host : top_hosts) { |
| const std::string& host = top_host.first; |
| total_visits += top_host.second; |
| |
| // Hostnames in TopHostsLists are stripped of their 'www.' prefix. We |
| // assume that www.foo.com entry from |host_resource_data_| is also suitable |
| // for foo.com. |
| if (PopulatePrefetcherRequest(host, *host_resource_data_, nullptr) || |
| (!base::StartsWith(host, "www.", base::CompareCase::SENSITIVE) && |
| PopulatePrefetcherRequest("www." + host, *host_resource_data_, |
| nullptr))) { |
| ++count_in_cache; |
| } |
| } |
| |
| // Filter users that don't have the rich browsing history. |
| if (total_visits > kReportReadinessThreshold) { |
| UMA_HISTOGRAM_PERCENTAGE("ResourcePrefetchPredictor.DatabaseReadiness", |
| 100 * count_in_cache / top_hosts.size()); |
| } |
| } |
| |
| void ResourcePrefetchPredictor::OnURLsDeleted( |
| history::HistoryService* history_service, |
| bool all_history, |
| bool expired, |
| const history::URLRows& deleted_rows, |
| const std::set<GURL>& favicon_urls) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK(initialization_state_ == INITIALIZED); |
| |
| if (all_history) { |
| DeleteAllUrls(); |
| UMA_HISTOGRAM_ENUMERATION("ResourcePrefetchPredictor.ReportingEvent", |
| REPORTING_EVENT_ALL_HISTORY_CLEARED, |
| REPORTING_EVENT_COUNT); |
| } else { |
| DeleteUrls(deleted_rows); |
| UMA_HISTOGRAM_ENUMERATION("ResourcePrefetchPredictor.ReportingEvent", |
| REPORTING_EVENT_PARTIAL_HISTORY_CLEARED, |
| REPORTING_EVENT_COUNT); |
| } |
| } |
| |
| void ResourcePrefetchPredictor::OnHistoryServiceLoaded( |
| history::HistoryService* history_service) { |
| if (initialization_state_ == INITIALIZING) { |
| OnHistoryAndCacheLoaded(); |
| } |
| } |
| |
| void ResourcePrefetchPredictor::ConnectToHistoryService() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK_EQ(INITIALIZING, initialization_state_); |
| |
| // Register for HistoryServiceLoading if it is not ready. |
| history::HistoryService* history_service = |
| HistoryServiceFactory::GetForProfile(profile_, |
| ServiceAccessType::EXPLICIT_ACCESS); |
| if (!history_service) |
| return; |
| DCHECK(!history_service_observer_.IsObserving(history_service)); |
| history_service_observer_.Add(history_service); |
| if (history_service->BackendLoaded()) { |
| // HistoryService is already loaded. Continue with Initialization. |
| OnHistoryAndCacheLoaded(); |
| } |
| } |
| |
| //////////////////////////////////////////////////////////////////////////////// |
| // TestObserver. |
| |
| TestObserver::~TestObserver() { |
| predictor_->SetObserverForTesting(nullptr); |
| } |
| |
| TestObserver::TestObserver(ResourcePrefetchPredictor* predictor) |
| : predictor_(predictor) { |
| predictor_->SetObserverForTesting(this); |
| } |
| |
| } // namespace predictors |