| // Copyright 2013 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "components/precache/core/precache_fetcher.h" |
| |
| #include <algorithm> |
| #include <limits> |
| #include <set> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/base64.h" |
| #include "base/bind.h" |
| #include "base/bind_helpers.h" |
| #include "base/callback.h" |
| #include "base/command_line.h" |
| #include "base/compiler_specific.h" |
| #include "base/containers/hash_tables.h" |
| #include "base/location.h" |
| #include "base/logging.h" |
| #include "base/memory/ptr_util.h" |
| #include "base/memory/ref_counted.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/sha1.h" |
| #include "base/strings/string_piece.h" |
| #include "base/task_runner_util.h" |
| #include "components/data_use_measurement/core/data_use_user_data.h" |
| #include "components/precache/core/precache_database.h" |
| #include "components/precache/core/precache_switches.h" |
| #include "components/precache/core/proto/precache.pb.h" |
| #include "components/precache/core/proto/quota.pb.h" |
| #include "components/precache/core/proto/unfinished_work.pb.h" |
| #include "net/base/completion_callback.h" |
| #include "net/base/escape.h" |
| #include "net/base/io_buffer.h" |
| #include "net/base/load_flags.h" |
| #include "net/base/net_errors.h" |
| #include "net/base/url_util.h" |
| #include "net/http/http_response_headers.h" |
| #include "net/url_request/url_fetcher_response_writer.h" |
| #include "net/url_request/url_request_context_getter.h" |
| #include "net/url_request/url_request_status.h" |
| |
| namespace precache { |
| |
| // The following flags are for privacy reasons. For example, if a user clears |
| // their cookies, but a tracking beacon is prefetched and the beacon specifies |
| // its source URL in a URL param, the beacon site would be able to rebuild a |
| // profile of the user. All three flags should occur together, or not at all, |
| // per |
| // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussion. |
| const int kNoTracking = |
| net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | |
| net::LOAD_DO_NOT_SEND_AUTH_DATA; |
| |
| // The maximum number of URLFetcher requests that can be in flight in parallel. |
| // Note that OnManifestFetchComplete and OnResourceFetchComplete perform |
| // remove_if operations which are O(kMaxParallelFetches). Those should be |
| // optimized before increasing this value significantly. |
| const int kMaxParallelFetches = 10; |
| |
| namespace { |
| |
| // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to |
| // a number we expect to be in the 99th percentile for the histogram, give or |
| // take. |
| const int kMaxResponseBytes = 500 * 1024 * 1024; |
| |
| GURL GetDefaultConfigURL() { |
| const base::CommandLine& command_line = |
| *base::CommandLine::ForCurrentProcess(); |
| if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) { |
| return GURL( |
| command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL)); |
| } |
| |
| #if defined(PRECACHE_CONFIG_SETTINGS_URL) |
| return GURL(PRECACHE_CONFIG_SETTINGS_URL); |
| #else |
| // The precache config settings URL could not be determined, so return an |
| // empty, invalid GURL. |
| return GURL(); |
| #endif |
| } |
| |
| std::string GetDefaultManifestURLPrefix() { |
| const base::CommandLine& command_line = |
| *base::CommandLine::ForCurrentProcess(); |
| if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) { |
| return command_line.GetSwitchValueASCII( |
| switches::kPrecacheManifestURLPrefix); |
| } |
| |
| #if defined(PRECACHE_MANIFEST_URL_PREFIX) |
| return PRECACHE_MANIFEST_URL_PREFIX; |
| #else |
| // The precache manifest URL prefix could not be determined, so return an |
| // empty string. |
| return std::string(); |
| #endif |
| } |
| |
| // Attempts to parse a protobuf message from the response string of a |
| // URLFetcher. If parsing is successful, the message parameter will contain the |
| // parsed protobuf and this function will return true. Otherwise, returns false. |
| bool ParseProtoFromFetchResponse(const net::URLFetcher& source, |
| ::google::protobuf::MessageLite* message) { |
| std::string response_string; |
| |
| if (!source.GetStatus().is_success()) { |
| DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); |
| return false; |
| } |
| if (!source.GetResponseAsString(&response_string)) { |
| DLOG(WARNING) << "No response string present: " |
| << source.GetOriginalURL().spec(); |
| return false; |
| } |
| if (!message->ParseFromString(response_string)) { |
| DLOG(WARNING) << "Unable to parse proto served from " |
| << source.GetOriginalURL().spec(); |
| return false; |
| } |
| return true; |
| } |
| |
| // Returns the resource selection bitset from the |manifest| for the given |
| // |experiment_id|. By default all resource will be selected if the experiment |
| // group is not found. |
| uint64_t GetResourceBitset(const PrecacheManifest& manifest, |
| uint32_t experiment_id) { |
| if (manifest.has_experiments()) { |
| const auto& resource_bitset_map = |
| manifest.experiments().resources_by_experiment_group(); |
| const auto& resource_bitset_it = resource_bitset_map.find(experiment_id); |
| if (resource_bitset_it != resource_bitset_map.end()) |
| return resource_bitset_it->second.bitset(); |
| } |
| return ~0ULL; |
| } |
| |
| // URLFetcherResponseWriter that ignores the response body, in order to avoid |
| // the unnecessary memory usage. Use it rather than the default if you don't |
| // care about parsing the response body. We use it below as a means to populate |
| // the cache with requested resource URLs. |
| class URLFetcherNullWriter : public net::URLFetcherResponseWriter { |
| public: |
| int Initialize(const net::CompletionCallback& callback) override { |
| return net::OK; |
| } |
| |
| int Write(net::IOBuffer* buffer, |
| int num_bytes, |
| const net::CompletionCallback& callback) override { |
| return num_bytes; |
| } |
| |
| int Finish(int net_error, const net::CompletionCallback& callback) override { |
| return net::OK; |
| } |
| }; |
| |
| // Returns the base64 encoded resource URL hashes. The resource URLs are hashed |
| // individually, and 8 bytes of each hash is appended together, which is then |
| // encoded to base64. |
| std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) { |
| // Each resource hash uses 8 bytes, instead of the 20 bytes of sha1 hash, as a |
| // tradeoff between sending more bytes and reducing hash collisions. |
| const size_t kHashBytesSize = 8; |
| std::string hashes; |
| hashes.reserve(urls.size() * kHashBytesSize); |
| |
| for (const auto& url : urls) { |
| const std::string& url_spec = url.spec(); |
| unsigned char sha1_hash[base::kSHA1Length]; |
| base::SHA1HashBytes( |
| reinterpret_cast<const unsigned char*>(url_spec.c_str()), |
| url_spec.size(), sha1_hash); |
| hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize); |
| } |
| base::Base64Encode(hashes, &hashes); |
| return hashes; |
| } |
| |
| // Retrieves the manifest info on the DB thread. Manifest info for each of the |
| // hosts in |hosts_to_fetch|, is added to |hosts_info|. |
| std::deque<ManifestHostInfo> RetrieveManifestInfo( |
| const base::WeakPtr<PrecacheDatabase>& precache_database, |
| std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) { |
| std::deque<ManifestHostInfo> hosts_info; |
| if (!precache_database) |
| return hosts_info; |
| |
| for (const auto& host : hosts_to_fetch) { |
| auto referrer_host_info = precache_database->GetReferrerHost(host.first); |
| if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { |
| std::vector<GURL> used_urls, unused_urls; |
| precache_database->GetURLListForReferrerHost(referrer_host_info.id, |
| &used_urls, &unused_urls); |
| hosts_info.push_back( |
| ManifestHostInfo(referrer_host_info.manifest_id, host.first, |
| host.second, GetResourceURLBase64Hash(used_urls), |
| GetResourceURLBase64Hash(unused_urls))); |
| } else { |
| hosts_info.push_back( |
| ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first, |
| host.second, std::string(), std::string())); |
| } |
| } |
| return hosts_info; |
| } |
| |
| PrecacheQuota RetrieveQuotaInfo( |
| const base::WeakPtr<PrecacheDatabase>& precache_database) { |
| PrecacheQuota quota; |
| if (precache_database) { |
| quota = precache_database->GetQuota(); |
| } |
| return quota; |
| } |
| |
| // Returns true if the |quota| time has expired. |
| bool IsQuotaTimeExpired(const PrecacheQuota& quota, |
| const base::Time& time_now) { |
| // Quota expires one day after the start time. |
| base::Time start_time = base::Time::FromInternalValue(quota.start_time()); |
| return start_time > time_now || |
| start_time + base::TimeDelta::FromDays(1) < time_now; |
| } |
| |
| double ResourceWeight(const PrecacheResource& resource, int64_t host_visits) { |
| return resource.weight_ratio() * host_visits; |
| } |
| |
| } // namespace |
| |
| PrecacheFetcher::Fetcher::Fetcher( |
| net::URLRequestContextGetter* request_context, |
| const GURL& url, |
| const std::string& referrer, |
| const base::Callback<void(const Fetcher&)>& callback, |
| bool is_resource_request, |
| size_t max_bytes) |
| : request_context_(request_context), |
| url_(url), |
| referrer_(referrer), |
| callback_(callback), |
| is_resource_request_(is_resource_request), |
| max_bytes_(max_bytes), |
| response_bytes_(0), |
| network_response_bytes_(0), |
| was_cached_(false) { |
| DCHECK(url.is_valid()); |
| if (is_resource_request_) |
| LoadFromCache(); |
| else |
| LoadFromNetwork(); |
| } |
| |
| PrecacheFetcher::Fetcher::~Fetcher() {} |
| |
| void PrecacheFetcher::Fetcher::LoadFromCache() { |
| fetch_stage_ = FetchStage::CACHE; |
| cache_url_fetcher_ = |
| net::URLFetcher::Create(url_, net::URLFetcher::GET, this); |
| data_use_measurement::DataUseUserData::AttachToFetcher( |
| cache_url_fetcher_.get(), |
| data_use_measurement::DataUseUserData::PRECACHE); |
| cache_url_fetcher_->SetRequestContext(request_context_); |
| cache_url_fetcher_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE | |
| net::LOAD_SKIP_CACHE_VALIDATION | |
| kNoTracking); |
| std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter); |
| cache_url_fetcher_->SaveResponseWithWriter(std::move(null_writer)); |
| cache_url_fetcher_->Start(); |
| } |
| |
| void PrecacheFetcher::Fetcher::LoadFromNetwork() { |
| fetch_stage_ = FetchStage::NETWORK; |
| network_url_fetcher_ = |
| net::URLFetcher::Create(url_, net::URLFetcher::GET, this); |
| data_use_measurement::DataUseUserData::AttachToFetcher( |
| network_url_fetcher_.get(), |
| data_use_measurement::DataUseUserData::PRECACHE); |
| network_url_fetcher_->SetRequestContext(request_context_); |
| if (is_resource_request_) { |
| // LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources |
| // already in the cache. The Date headers are updated from 304s as well as |
| // 200s. |
| network_url_fetcher_->SetLoadFlags(net::LOAD_VALIDATE_CACHE | kNoTracking); |
| // We don't need a copy of the response body for resource requests. The |
| // request is issued only to populate the browser cache. |
| std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter); |
| network_url_fetcher_->SaveResponseWithWriter(std::move(null_writer)); |
| } else { |
| // Config and manifest requests do not need to be revalidated. It's okay if |
| // they expire from the cache minutes after we request them. |
| network_url_fetcher_->SetLoadFlags(kNoTracking); |
| } |
| network_url_fetcher_->Start(); |
| } |
| |
| void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress( |
| const net::URLFetcher* source, |
| int64_t current, |
| int64_t total, |
| int64_t current_network_bytes) { |
| // If network bytes going over the per-resource download cap. |
| if (fetch_stage_ == FetchStage::NETWORK && |
| // |current_network_bytes| is guaranteed to be non-negative, so this cast |
| // is safe. |
| static_cast<size_t>(current_network_bytes) > max_bytes_) { |
| // Call the completion callback, to attempt the next download, or to trigger |
| // cleanup in precache_delegate_->OnDone(). |
| response_bytes_ = current; |
| network_response_bytes_ = current_network_bytes; |
| was_cached_ = source->WasCached(); |
| |
| UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.NetworkWasted", |
| network_response_bytes_, 1, |
| 1024 * 1024 /* 1 MB */, 100); |
| // Cancel the download. |
| network_url_fetcher_.reset(); |
| callback_.Run(*this); |
| } |
| } |
| |
| void PrecacheFetcher::Fetcher::OnURLFetchComplete( |
| const net::URLFetcher* source) { |
| CHECK(source); |
| if (fetch_stage_ == FetchStage::CACHE && |
| (source->GetStatus().error() == net::ERR_CACHE_MISS || |
| (source->GetResponseHeaders() && |
| source->GetResponseHeaders()->HasValidators()))) { |
| // If the resource was not found in the cache, request it from the |
| // network. |
| // |
| // If the resource was found in the cache, but contains validators, |
| // request a refresh. The presence of validators increases the chance that |
| // we get a 304 response rather than a full one, thus allowing us to |
| // refresh the cache with minimal network load. |
| LoadFromNetwork(); |
| return; |
| } |
| |
| // If any of: |
| // - The request was for a config or manifest. |
| // - The resource was a cache hit without validators. |
| // - The response came from the network. |
| // Then Fetcher is done with this URL and can return control to the caller. |
| response_bytes_ = source->GetReceivedResponseContentLength(); |
| network_response_bytes_ = source->GetTotalReceivedBytes(); |
| was_cached_ = source->WasCached(); |
| callback_.Run(*this); |
| } |
| |
| // static |
| void PrecacheFetcher::RecordCompletionStatistics( |
| const PrecacheUnfinishedWork& unfinished_work, |
| size_t remaining_manifest_urls_to_fetch, |
| size_t remaining_resource_urls_to_fetch) { |
| // These may be unset in tests. |
| if (!unfinished_work.has_start_time()) |
| return; |
| base::TimeDelta time_to_fetch = |
| base::Time::Now() - |
| base::Time::FromInternalValue(unfinished_work.start_time()); |
| UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch, |
| base::TimeDelta::FromSeconds(1), |
| base::TimeDelta::FromHours(4), 50); |
| |
| int num_total_resources = unfinished_work.num_resource_urls(); |
| int percent_completed = |
| num_total_resources == 0 |
| ? 101 // Overflow bucket. |
| : (100 * (static_cast<double>(num_total_resources - |
| remaining_resource_urls_to_fetch) / |
| num_total_resources)); |
| |
| UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", |
| percent_completed); |
| UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
| unfinished_work.total_bytes(), 1, |
| kMaxResponseBytes, 100); |
| UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", |
| unfinished_work.network_bytes(), 1, |
| kMaxResponseBytes, 100); |
| } |
| |
| // static |
| std::string PrecacheFetcher::GetResourceURLBase64HashForTesting( |
| const std::vector<GURL>& urls) { |
| return GetResourceURLBase64Hash(urls); |
| } |
| |
| PrecacheFetcher::PrecacheFetcher( |
| net::URLRequestContextGetter* request_context, |
| const GURL& config_url, |
| const std::string& manifest_url_prefix, |
| std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, |
| uint32_t experiment_id, |
| const base::WeakPtr<PrecacheDatabase>& precache_database, |
| const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, |
| PrecacheFetcher::PrecacheDelegate* precache_delegate) |
| : request_context_(request_context), |
| config_url_(config_url), |
| manifest_url_prefix_(manifest_url_prefix), |
| precache_database_(precache_database), |
| db_task_runner_(std::move(db_task_runner)), |
| precache_delegate_(precache_delegate), |
| pool_(kMaxParallelFetches), |
| experiment_id_(experiment_id) { |
| DCHECK(request_context_.get()); // Request context must be non-NULL. |
| DCHECK(precache_delegate_); // Precache delegate must be non-NULL. |
| |
| DCHECK_NE(GURL(), GetDefaultConfigURL()) |
| << "Could not determine the precache config settings URL."; |
| DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) |
| << "Could not determine the default precache manifest URL prefix."; |
| DCHECK(unfinished_work); |
| |
| // Copy resources to member variable as a convenience. |
| // TODO(rajendrant): Consider accessing these directly from the proto, by |
| // keeping track of the current resource index. |
| for (const auto& resource : unfinished_work->resource()) { |
| if (resource.has_url() && resource.has_top_host_name()) { |
| // Weight doesn't matter, as the resources have already been sorted by |
| // this point. |
| resources_to_fetch_.emplace_back(GURL(resource.url()), |
| resource.top_host_name(), 0); |
| } |
| } |
| unfinished_work_ = std::move(unfinished_work); |
| } |
| |
| PrecacheFetcher::~PrecacheFetcher() { |
| } |
| |
| std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
| // This could get called multiple times, and it should be handled gracefully. |
| if (!unfinished_work_) |
| return nullptr; |
| |
| unfinished_work_->clear_resource(); |
| if (unfinished_work_->has_config_settings()) { |
| // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and |
| // top hosts should be left as is in |unfinished_work_|. |
| unfinished_work_->clear_top_host(); |
| for (const auto& top_host : top_hosts_fetching_) |
| unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
| for (const auto& top_host : top_hosts_to_fetch_) |
| unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
| } |
| for (const auto& resource : resources_fetching_) { |
| auto new_resource = unfinished_work_->add_resource(); |
| new_resource->set_url(resource.url.spec()); |
| new_resource->set_top_host_name(resource.referrer); |
| } |
| for (const auto& resource : resources_to_fetch_) { |
| auto new_resource = unfinished_work_->add_resource(); |
| new_resource->set_url(resource.url.spec()); |
| new_resource->set_top_host_name(resource.referrer); |
| } |
| top_hosts_fetching_.clear(); |
| top_hosts_to_fetch_.clear(); |
| resources_fetching_.clear(); |
| resources_to_fetch_.clear(); |
| pool_.DeleteAll(); |
| return std::move(unfinished_work_); |
| } |
| |
| void PrecacheFetcher::Start() { |
| if (unfinished_work_->has_config_settings()) { |
| DCHECK(unfinished_work_->has_start_time()); |
| DetermineManifests(); |
| return; |
| } |
| |
| GURL config_url = |
| config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
| |
| DCHECK(config_url.is_valid()) << "Config URL not valid: " |
| << config_url.possibly_invalid_spec(); |
| |
| // Fetch the precache configuration settings from the server. |
| DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; |
| pool_.Add(base::MakeUnique<Fetcher>( |
| request_context_.get(), config_url, std::string(), |
| base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), |
| false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
| } |
| |
| void PrecacheFetcher::StartNextResourceFetch() { |
| DCHECK(unfinished_work_->has_config_settings()); |
| while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
| ResourceInfo& resource = resources_to_fetch_.front(); |
| const size_t max_bytes = std::min( |
| quota_.remaining(), |
| std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
| unfinished_work_->config_settings().max_bytes_total() - |
| unfinished_work_->total_bytes())); |
| pool_.Add(base::MakeUnique<Fetcher>( |
| request_context_.get(), resource.url, resource.referrer, |
| base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
| true /* is_resource_request */, max_bytes)); |
| |
| resources_fetching_.push_back(std::move(resource)); |
| resources_to_fetch_.pop_front(); |
| } |
| } |
| |
| void PrecacheFetcher::StartNextManifestFetches() { |
| // We fetch as many manifests at a time as possible, as we need all resource |
| // URLs in memory in order to rank them. |
| while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) { |
| ManifestHostInfo& top_host = top_hosts_to_fetch_.front(); |
| pool_.Add(base::MakeUnique<Fetcher>( |
| request_context_.get(), top_host.manifest_url, top_host.hostname, |
| base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(), |
| top_host.visits), |
| false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
| top_hosts_fetching_.push_back(std::move(top_host)); |
| top_hosts_to_fetch_.pop_front(); |
| } |
| } |
| |
| void PrecacheFetcher::NotifyDone( |
| size_t remaining_manifest_urls_to_fetch, |
| size_t remaining_resource_urls_to_fetch) { |
| RecordCompletionStatistics(*unfinished_work_, |
| remaining_manifest_urls_to_fetch, |
| remaining_resource_urls_to_fetch); |
| precache_delegate_->OnDone(); |
| } |
| |
| void PrecacheFetcher::StartNextFetch() { |
| DCHECK(unfinished_work_->has_config_settings()); |
| |
| // If over the precache total size cap or daily quota, then stop prefetching. |
| if ((unfinished_work_->total_bytes() > |
| unfinished_work_->config_settings().max_bytes_total()) || |
| quota_.remaining() == 0) { |
| pool_.DeleteAll(); |
| NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(), |
| resources_to_fetch_.size() + resources_fetching_.size()); |
| return; |
| } |
| |
| StartNextResourceFetch(); |
| StartNextManifestFetches(); |
| if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && |
| pool_.IsEmpty()) { |
| // There are no more URLs to fetch, so end the precache cycle. |
| NotifyDone(0, 0); |
| // OnDone may have deleted this PrecacheFetcher, so don't do anything after |
| // it is called. |
| } |
| } |
| |
| void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
| UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| if (source.network_url_fetcher() == nullptr) { |
| pool_.DeleteAll(); // Cancel any other ongoing request. |
| } else { |
| // Attempt to parse the config proto. On failure, continue on with the |
| // default configuration. |
| ParseProtoFromFetchResponse( |
| *source.network_url_fetcher(), |
| unfinished_work_->mutable_config_settings()); |
| pool_.Delete(source); |
| DetermineManifests(); |
| } |
| } |
| |
| void PrecacheFetcher::DetermineManifests() { |
| DCHECK(unfinished_work_->has_config_settings()); |
| |
| std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch; |
| // Keep track of manifest URLs that are being fetched, in order to elide |
| // duplicates. |
| std::set<base::StringPiece> seen_top_hosts; |
| int64_t rank = 0; |
| |
| for (const auto& host : unfinished_work_->top_host()) { |
| ++rank; |
| if (rank > unfinished_work_->config_settings().top_sites_count()) |
| break; |
| if (seen_top_hosts.insert(host.hostname()).second) |
| top_hosts_to_fetch.emplace_back(host.hostname(), host.visits()); |
| } |
| |
| // Attempt to fetch manifests for starting hosts up to the maximum top sites |
| // count. If a manifest does not exist for a particular starting host, then |
| // the fetch will fail, and that starting host will be ignored. Starting |
| // hosts are not added if this is a continuation from a previous precache |
| // session. |
| if (resources_to_fetch_.empty()) { |
| for (const std::string& host : |
| unfinished_work_->config_settings().forced_site()) { |
| // We add a forced site with visits == 0, which means its resources will |
| // be downloaded last. TODO(twifkak): Consider removing support for |
| // forced_site. |
| if (seen_top_hosts.insert(host).second) |
| top_hosts_to_fetch.emplace_back(host, 0); |
| } |
| } |
| // We retrieve manifest usage and quota info from the local database before |
| // fetching the manifests. |
| PostTaskAndReplyWithResult( |
| db_task_runner_.get(), FROM_HERE, |
| base::Bind(&RetrieveManifestInfo, precache_database_, |
| std::move(top_hosts_to_fetch)), |
| base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr())); |
| } |
| |
| void PrecacheFetcher::OnManifestInfoRetrieved( |
| std::deque<ManifestHostInfo> manifests_info) { |
| const std::string prefix = manifest_url_prefix_.empty() |
| ? GetDefaultManifestURLPrefix() |
| : manifest_url_prefix_; |
| if (!GURL(prefix).is_valid()) { |
| // Don't attempt to fetch any manifests if the manifest URL prefix |
| // is invalid. |
| top_hosts_to_fetch_.clear(); |
| unfinished_work_->set_num_manifest_urls(manifests_info.size()); |
| NotifyDone(manifests_info.size(), resources_to_rank_.size()); |
| return; |
| } |
| |
| top_hosts_to_fetch_ = std::move(manifests_info); |
| for (auto& manifest : top_hosts_to_fetch_) { |
| manifest.manifest_url = |
| GURL(prefix + |
| net::EscapeQueryParamValue( |
| net::EscapeQueryParamValue(manifest.hostname, false), false)); |
| if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) { |
| manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
| manifest.manifest_url, "manifest", |
| std::to_string(manifest.manifest_id)); |
| manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
| manifest.manifest_url, "used_resources", manifest.used_url_hash); |
| manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
| manifest.manifest_url, "unused_resources", manifest.unused_url_hash); |
| DCHECK(manifest.manifest_url.is_valid()); |
| } |
| } |
| unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size()); |
| |
| PostTaskAndReplyWithResult( |
| db_task_runner_.get(), FROM_HERE, |
| base::Bind(&RetrieveQuotaInfo, precache_database_), |
| base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr())); |
| } |
| |
| void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { |
| quota_ = quota; |
| base::Time time_now = base::Time::Now(); |
| if (IsQuotaTimeExpired(quota_, time_now)) { |
| // This is a new day. Update daily quota, that starts today and expires by |
| // end of today. |
| quota_.set_start_time(time_now.LocalMidnight().ToInternalValue()); |
| quota_.set_remaining( |
| unfinished_work_->config_settings().daily_quota_total()); |
| db_task_runner_->PostTask( |
| FROM_HERE, |
| base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); |
| } |
| StartNextFetch(); |
| } |
| |
| ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, |
| const std::string& hostname, |
| int64_t visits, |
| const std::string& used_url_hash, |
| const std::string& unused_url_hash) |
| : manifest_id(manifest_id), |
| hostname(hostname), |
| visits(visits), |
| used_url_hash(used_url_hash), |
| unused_url_hash(unused_url_hash) {} |
| |
| ManifestHostInfo::~ManifestHostInfo() {} |
| |
| ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; |
| |
| ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; |
| |
| ResourceInfo::ResourceInfo(const GURL& url, |
| const std::string& referrer, |
| double weight) |
| : url(url), referrer(referrer), weight(weight) {} |
| |
| ResourceInfo::~ResourceInfo() {} |
| |
| ResourceInfo::ResourceInfo(ResourceInfo&&) = default; |
| |
| ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default; |
| |
| void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits, |
| const Fetcher& source) { |
| DCHECK(unfinished_work_->has_config_settings()); |
| UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| if (source.network_url_fetcher() == nullptr) { |
| pool_.DeleteAll(); // Cancel any other ongoing request. |
| } else { |
| PrecacheManifest manifest; |
| |
| if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { |
| const int32_t len = |
| std::min(manifest.resource_size(), |
| unfinished_work_->config_settings().top_resources_count()); |
| const uint64_t resource_bitset = |
| GetResourceBitset(manifest, experiment_id_); |
| for (int i = 0; i < len; ++i) { |
| if (((0x1ULL << i) & resource_bitset) && |
| manifest.resource(i).has_url()) { |
| GURL url(manifest.resource(i).url()); |
| if (url.is_valid()) { |
| double weight = ResourceWeight(manifest.resource(i), host_visits); |
| if (weight >= unfinished_work_->config_settings().min_weight()) |
| resources_to_rank_.emplace_back(url, source.referrer(), weight); |
| } |
| } |
| } |
| db_task_runner_->PostTask( |
| FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, |
| precache_database_, source.referrer(), |
| manifest.id().id(), base::Time::Now())); |
| } |
| } |
| |
| top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) { |
| return top_host.manifest_url == source.url(); |
| }); |
| |
| pool_.Delete(source); |
| |
| if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) |
| QueueResourcesForFetch(); |
| |
| StartNextFetch(); |
| } |
| |
| void PrecacheFetcher::QueueResourcesForFetch() { |
| // Done fetching manifests. Now move resources_to_rank_ into |
| // resources_to_fetch_, so that StartNextFetch will begin fetching resources. |
| resources_to_fetch_ = std::move(resources_to_rank_); |
| |
| if (unfinished_work_->config_settings().global_ranking()) { |
| // Sort resources_to_fetch_ by descending weight. |
| std::stable_sort(resources_to_fetch_.begin(), resources_to_fetch_.end(), |
| [](const ResourceInfo& first, const ResourceInfo& second) { |
| return first.weight > second.weight; |
| }); |
| } |
| |
| // Truncate to size |total_resources_count|. |
| const size_t num_resources = std::min( |
| resources_to_fetch_.size(), |
| static_cast<size_t>( |
| unfinished_work_->config_settings().total_resources_count())); |
| resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources, |
| resources_to_fetch_.end()); |
| |
| // Save denominator for PercentCompleted UMA. |
| unfinished_work_->set_num_resource_urls(resources_to_fetch_.size()); |
| } |
| |
| void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
| UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| |
| db_task_runner_->PostTask( |
| FROM_HERE, |
| base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, |
| source.url(), source.referrer(), base::Time::Now(), |
| source.was_cached(), source.response_bytes())); |
| |
| resources_fetching_.remove_if([&source](const ResourceInfo& resource) { |
| return resource.url == source.url(); |
| }); |
| |
| pool_.Delete(source); |
| |
| // The resource has already been put in the cache during the fetch process, so |
| // nothing more needs to be done for the resource. |
| StartNextFetch(); |
| } |
| |
| void PrecacheFetcher::UpdateStats(int64_t response_bytes, |
| int64_t network_response_bytes) { |
| DCHECK_LE(0, response_bytes); |
| DCHECK_LE(0, network_response_bytes); |
| |
| unfinished_work_->set_total_bytes( |
| unfinished_work_->total_bytes() + response_bytes); |
| unfinished_work_->set_network_bytes( |
| unfinished_work_->network_bytes() + network_response_bytes); |
| |
| if (!IsQuotaTimeExpired(quota_, base::Time::Now())) { |
| uint64_t used_bytes = static_cast<uint64_t>(network_response_bytes); |
| int64_t remaining = |
| static_cast<int64_t>(quota_.remaining()) - network_response_bytes; |
| if (remaining < 0) |
| remaining = 0; |
| quota_.set_remaining( |
| used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes); |
| db_task_runner_->PostTask( |
| FROM_HERE, |
| base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); |
| } |
| } |
| |
| } // namespace precache |