blob: 18766b3f9403297da8639a74a8feece2705acd97 [file] [log] [blame]
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/precache/core/precache_fetcher.h"
#include <algorithm>
#include <limits>
#include <set>
#include <utility>
#include <vector>
#include "base/base64.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/callback.h"
#include "base/command_line.h"
#include "base/compiler_specific.h"
#include "base/containers/hash_tables.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/memory/ptr_util.h"
#include "base/memory/ref_counted.h"
#include "base/metrics/histogram_macros.h"
#include "base/sha1.h"
#include "base/strings/string_piece.h"
#include "base/task_runner_util.h"
#include "components/data_use_measurement/core/data_use_user_data.h"
#include "components/precache/core/precache_database.h"
#include "components/precache/core/precache_switches.h"
#include "components/precache/core/proto/precache.pb.h"
#include "components/precache/core/proto/quota.pb.h"
#include "components/precache/core/proto/unfinished_work.pb.h"
#include "net/base/completion_callback.h"
#include "net/base/escape.h"
#include "net/base/io_buffer.h"
#include "net/base/load_flags.h"
#include "net/base/net_errors.h"
#include "net/base/url_util.h"
#include "net/http/http_response_headers.h"
#include "net/url_request/url_fetcher_response_writer.h"
#include "net/url_request/url_request_context_getter.h"
#include "net/url_request/url_request_status.h"
namespace precache {
// The following flags are for privacy reasons. For example, if a user clears
// their cookies, but a tracking beacon is prefetched and the beacon specifies
// its source URL in a URL param, the beacon site would be able to rebuild a
// profile of the user. All three flags should occur together, or not at all,
// per
// https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussion.
const int kNoTracking =
net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
net::LOAD_DO_NOT_SEND_AUTH_DATA;
// The maximum number of URLFetcher requests that can be in flight in parallel.
// Note that OnManifestFetchComplete and OnResourceFetchComplete perform
// remove_if operations which are O(kMaxParallelFetches). Those should be
// optimized before increasing this value significantly.
const int kMaxParallelFetches = 10;
namespace {
// The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
// a number we expect to be in the 99th percentile for the histogram, give or
// take.
const int kMaxResponseBytes = 500 * 1024 * 1024;
GURL GetDefaultConfigURL() {
const base::CommandLine& command_line =
*base::CommandLine::ForCurrentProcess();
if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
return GURL(
command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL));
}
#if defined(PRECACHE_CONFIG_SETTINGS_URL)
return GURL(PRECACHE_CONFIG_SETTINGS_URL);
#else
// The precache config settings URL could not be determined, so return an
// empty, invalid GURL.
return GURL();
#endif
}
std::string GetDefaultManifestURLPrefix() {
const base::CommandLine& command_line =
*base::CommandLine::ForCurrentProcess();
if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) {
return command_line.GetSwitchValueASCII(
switches::kPrecacheManifestURLPrefix);
}
#if defined(PRECACHE_MANIFEST_URL_PREFIX)
return PRECACHE_MANIFEST_URL_PREFIX;
#else
// The precache manifest URL prefix could not be determined, so return an
// empty string.
return std::string();
#endif
}
// Attempts to parse a protobuf message from the response string of a
// URLFetcher. If parsing is successful, the message parameter will contain the
// parsed protobuf and this function will return true. Otherwise, returns false.
bool ParseProtoFromFetchResponse(const net::URLFetcher& source,
::google::protobuf::MessageLite* message) {
std::string response_string;
if (!source.GetStatus().is_success()) {
DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
return false;
}
if (!source.GetResponseAsString(&response_string)) {
DLOG(WARNING) << "No response string present: "
<< source.GetOriginalURL().spec();
return false;
}
if (!message->ParseFromString(response_string)) {
DLOG(WARNING) << "Unable to parse proto served from "
<< source.GetOriginalURL().spec();
return false;
}
return true;
}
// Returns the resource selection bitset from the |manifest| for the given
// |experiment_id|. By default all resource will be selected if the experiment
// group is not found.
uint64_t GetResourceBitset(const PrecacheManifest& manifest,
uint32_t experiment_id) {
if (manifest.has_experiments()) {
const auto& resource_bitset_map =
manifest.experiments().resources_by_experiment_group();
const auto& resource_bitset_it = resource_bitset_map.find(experiment_id);
if (resource_bitset_it != resource_bitset_map.end())
return resource_bitset_it->second.bitset();
}
return ~0ULL;
}
// URLFetcherResponseWriter that ignores the response body, in order to avoid
// the unnecessary memory usage. Use it rather than the default if you don't
// care about parsing the response body. We use it below as a means to populate
// the cache with requested resource URLs.
class URLFetcherNullWriter : public net::URLFetcherResponseWriter {
public:
int Initialize(const net::CompletionCallback& callback) override {
return net::OK;
}
int Write(net::IOBuffer* buffer,
int num_bytes,
const net::CompletionCallback& callback) override {
return num_bytes;
}
int Finish(int net_error, const net::CompletionCallback& callback) override {
return net::OK;
}
};
// Returns the base64 encoded resource URL hashes. The resource URLs are hashed
// individually, and 8 bytes of each hash is appended together, which is then
// encoded to base64.
std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) {
// Each resource hash uses 8 bytes, instead of the 20 bytes of sha1 hash, as a
// tradeoff between sending more bytes and reducing hash collisions.
const size_t kHashBytesSize = 8;
std::string hashes;
hashes.reserve(urls.size() * kHashBytesSize);
for (const auto& url : urls) {
const std::string& url_spec = url.spec();
unsigned char sha1_hash[base::kSHA1Length];
base::SHA1HashBytes(
reinterpret_cast<const unsigned char*>(url_spec.c_str()),
url_spec.size(), sha1_hash);
hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize);
}
base::Base64Encode(hashes, &hashes);
return hashes;
}
// Retrieves the manifest info on the DB thread. Manifest info for each of the
// hosts in |hosts_to_fetch|, is added to |hosts_info|.
std::deque<ManifestHostInfo> RetrieveManifestInfo(
const base::WeakPtr<PrecacheDatabase>& precache_database,
std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) {
std::deque<ManifestHostInfo> hosts_info;
if (!precache_database)
return hosts_info;
for (const auto& host : hosts_to_fetch) {
auto referrer_host_info = precache_database->GetReferrerHost(host.first);
if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) {
std::vector<GURL> used_urls, unused_urls;
precache_database->GetURLListForReferrerHost(referrer_host_info.id,
&used_urls, &unused_urls);
hosts_info.push_back(
ManifestHostInfo(referrer_host_info.manifest_id, host.first,
host.second, GetResourceURLBase64Hash(used_urls),
GetResourceURLBase64Hash(unused_urls)));
} else {
hosts_info.push_back(
ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first,
host.second, std::string(), std::string()));
}
}
return hosts_info;
}
PrecacheQuota RetrieveQuotaInfo(
const base::WeakPtr<PrecacheDatabase>& precache_database) {
PrecacheQuota quota;
if (precache_database) {
quota = precache_database->GetQuota();
}
return quota;
}
// Returns true if the |quota| time has expired.
bool IsQuotaTimeExpired(const PrecacheQuota& quota,
const base::Time& time_now) {
// Quota expires one day after the start time.
base::Time start_time = base::Time::FromInternalValue(quota.start_time());
return start_time > time_now ||
start_time + base::TimeDelta::FromDays(1) < time_now;
}
double ResourceWeight(const PrecacheResource& resource, int64_t host_visits) {
return resource.weight_ratio() * host_visits;
}
} // namespace
PrecacheFetcher::Fetcher::Fetcher(
net::URLRequestContextGetter* request_context,
const GURL& url,
const std::string& referrer,
const base::Callback<void(const Fetcher&)>& callback,
bool is_resource_request,
size_t max_bytes)
: request_context_(request_context),
url_(url),
referrer_(referrer),
callback_(callback),
is_resource_request_(is_resource_request),
max_bytes_(max_bytes),
response_bytes_(0),
network_response_bytes_(0),
was_cached_(false) {
DCHECK(url.is_valid());
if (is_resource_request_)
LoadFromCache();
else
LoadFromNetwork();
}
PrecacheFetcher::Fetcher::~Fetcher() {}
void PrecacheFetcher::Fetcher::LoadFromCache() {
fetch_stage_ = FetchStage::CACHE;
cache_url_fetcher_ =
net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
data_use_measurement::DataUseUserData::AttachToFetcher(
cache_url_fetcher_.get(),
data_use_measurement::DataUseUserData::PRECACHE);
cache_url_fetcher_->SetRequestContext(request_context_);
cache_url_fetcher_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE |
net::LOAD_SKIP_CACHE_VALIDATION |
kNoTracking);
std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
cache_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
cache_url_fetcher_->Start();
}
void PrecacheFetcher::Fetcher::LoadFromNetwork() {
fetch_stage_ = FetchStage::NETWORK;
network_url_fetcher_ =
net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
data_use_measurement::DataUseUserData::AttachToFetcher(
network_url_fetcher_.get(),
data_use_measurement::DataUseUserData::PRECACHE);
network_url_fetcher_->SetRequestContext(request_context_);
if (is_resource_request_) {
// LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources
// already in the cache. The Date headers are updated from 304s as well as
// 200s.
network_url_fetcher_->SetLoadFlags(net::LOAD_VALIDATE_CACHE | kNoTracking);
// We don't need a copy of the response body for resource requests. The
// request is issued only to populate the browser cache.
std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
network_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
} else {
// Config and manifest requests do not need to be revalidated. It's okay if
// they expire from the cache minutes after we request them.
network_url_fetcher_->SetLoadFlags(kNoTracking);
}
network_url_fetcher_->Start();
}
void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress(
const net::URLFetcher* source,
int64_t current,
int64_t total,
int64_t current_network_bytes) {
// If network bytes going over the per-resource download cap.
if (fetch_stage_ == FetchStage::NETWORK &&
// |current_network_bytes| is guaranteed to be non-negative, so this cast
// is safe.
static_cast<size_t>(current_network_bytes) > max_bytes_) {
// Call the completion callback, to attempt the next download, or to trigger
// cleanup in precache_delegate_->OnDone().
response_bytes_ = current;
network_response_bytes_ = current_network_bytes;
was_cached_ = source->WasCached();
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.NetworkWasted",
network_response_bytes_, 1,
1024 * 1024 /* 1 MB */, 100);
// Cancel the download.
network_url_fetcher_.reset();
callback_.Run(*this);
}
}
void PrecacheFetcher::Fetcher::OnURLFetchComplete(
const net::URLFetcher* source) {
CHECK(source);
if (fetch_stage_ == FetchStage::CACHE &&
(source->GetStatus().error() == net::ERR_CACHE_MISS ||
(source->GetResponseHeaders() &&
source->GetResponseHeaders()->HasValidators()))) {
// If the resource was not found in the cache, request it from the
// network.
//
// If the resource was found in the cache, but contains validators,
// request a refresh. The presence of validators increases the chance that
// we get a 304 response rather than a full one, thus allowing us to
// refresh the cache with minimal network load.
LoadFromNetwork();
return;
}
// If any of:
// - The request was for a config or manifest.
// - The resource was a cache hit without validators.
// - The response came from the network.
// Then Fetcher is done with this URL and can return control to the caller.
response_bytes_ = source->GetReceivedResponseContentLength();
network_response_bytes_ = source->GetTotalReceivedBytes();
was_cached_ = source->WasCached();
callback_.Run(*this);
}
// static
void PrecacheFetcher::RecordCompletionStatistics(
const PrecacheUnfinishedWork& unfinished_work,
size_t remaining_manifest_urls_to_fetch,
size_t remaining_resource_urls_to_fetch) {
// These may be unset in tests.
if (!unfinished_work.has_start_time())
return;
base::TimeDelta time_to_fetch =
base::Time::Now() -
base::Time::FromInternalValue(unfinished_work.start_time());
UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch,
base::TimeDelta::FromSeconds(1),
base::TimeDelta::FromHours(4), 50);
int num_total_resources = unfinished_work.num_resource_urls();
int percent_completed =
num_total_resources == 0
? 101 // Overflow bucket.
: (100 * (static_cast<double>(num_total_resources -
remaining_resource_urls_to_fetch) /
num_total_resources));
UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
percent_completed);
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
unfinished_work.total_bytes(), 1,
kMaxResponseBytes, 100);
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
unfinished_work.network_bytes(), 1,
kMaxResponseBytes, 100);
}
// static
std::string PrecacheFetcher::GetResourceURLBase64HashForTesting(
const std::vector<GURL>& urls) {
return GetResourceURLBase64Hash(urls);
}
PrecacheFetcher::PrecacheFetcher(
net::URLRequestContextGetter* request_context,
const GURL& config_url,
const std::string& manifest_url_prefix,
std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
uint32_t experiment_id,
const base::WeakPtr<PrecacheDatabase>& precache_database,
const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner,
PrecacheFetcher::PrecacheDelegate* precache_delegate)
: request_context_(request_context),
config_url_(config_url),
manifest_url_prefix_(manifest_url_prefix),
precache_database_(precache_database),
db_task_runner_(std::move(db_task_runner)),
precache_delegate_(precache_delegate),
pool_(kMaxParallelFetches),
experiment_id_(experiment_id) {
DCHECK(request_context_.get()); // Request context must be non-NULL.
DCHECK(precache_delegate_); // Precache delegate must be non-NULL.
DCHECK_NE(GURL(), GetDefaultConfigURL())
<< "Could not determine the precache config settings URL.";
DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
<< "Could not determine the default precache manifest URL prefix.";
DCHECK(unfinished_work);
// Copy resources to member variable as a convenience.
// TODO(rajendrant): Consider accessing these directly from the proto, by
// keeping track of the current resource index.
for (const auto& resource : unfinished_work->resource()) {
if (resource.has_url() && resource.has_top_host_name()) {
// Weight doesn't matter, as the resources have already been sorted by
// this point.
resources_to_fetch_.emplace_back(GURL(resource.url()),
resource.top_host_name(), 0);
}
}
unfinished_work_ = std::move(unfinished_work);
}
PrecacheFetcher::~PrecacheFetcher() {
}
std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
// This could get called multiple times, and it should be handled gracefully.
if (!unfinished_work_)
return nullptr;
unfinished_work_->clear_resource();
if (unfinished_work_->has_config_settings()) {
// If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and
// top hosts should be left as is in |unfinished_work_|.
unfinished_work_->clear_top_host();
for (const auto& top_host : top_hosts_fetching_)
unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
for (const auto& top_host : top_hosts_to_fetch_)
unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
}
for (const auto& resource : resources_fetching_) {
auto new_resource = unfinished_work_->add_resource();
new_resource->set_url(resource.url.spec());
new_resource->set_top_host_name(resource.referrer);
}
for (const auto& resource : resources_to_fetch_) {
auto new_resource = unfinished_work_->add_resource();
new_resource->set_url(resource.url.spec());
new_resource->set_top_host_name(resource.referrer);
}
top_hosts_fetching_.clear();
top_hosts_to_fetch_.clear();
resources_fetching_.clear();
resources_to_fetch_.clear();
pool_.DeleteAll();
return std::move(unfinished_work_);
}
void PrecacheFetcher::Start() {
if (unfinished_work_->has_config_settings()) {
DCHECK(unfinished_work_->has_start_time());
DetermineManifests();
return;
}
GURL config_url =
config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
DCHECK(config_url.is_valid()) << "Config URL not valid: "
<< config_url.possibly_invalid_spec();
// Fetch the precache configuration settings from the server.
DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
pool_.Add(base::MakeUnique<Fetcher>(
request_context_.get(), config_url, std::string(),
base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()),
false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
}
void PrecacheFetcher::StartNextResourceFetch() {
DCHECK(unfinished_work_->has_config_settings());
while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
ResourceInfo& resource = resources_to_fetch_.front();
const size_t max_bytes = std::min(
quota_.remaining(),
std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
unfinished_work_->config_settings().max_bytes_total() -
unfinished_work_->total_bytes()));
pool_.Add(base::MakeUnique<Fetcher>(
request_context_.get(), resource.url, resource.referrer,
base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
true /* is_resource_request */, max_bytes));
resources_fetching_.push_back(std::move(resource));
resources_to_fetch_.pop_front();
}
}
void PrecacheFetcher::StartNextManifestFetches() {
// We fetch as many manifests at a time as possible, as we need all resource
// URLs in memory in order to rank them.
while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) {
ManifestHostInfo& top_host = top_hosts_to_fetch_.front();
pool_.Add(base::MakeUnique<Fetcher>(
request_context_.get(), top_host.manifest_url, top_host.hostname,
base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(),
top_host.visits),
false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
top_hosts_fetching_.push_back(std::move(top_host));
top_hosts_to_fetch_.pop_front();
}
}
void PrecacheFetcher::NotifyDone(
size_t remaining_manifest_urls_to_fetch,
size_t remaining_resource_urls_to_fetch) {
RecordCompletionStatistics(*unfinished_work_,
remaining_manifest_urls_to_fetch,
remaining_resource_urls_to_fetch);
precache_delegate_->OnDone();
}
void PrecacheFetcher::StartNextFetch() {
DCHECK(unfinished_work_->has_config_settings());
// If over the precache total size cap or daily quota, then stop prefetching.
if ((unfinished_work_->total_bytes() >
unfinished_work_->config_settings().max_bytes_total()) ||
quota_.remaining() == 0) {
pool_.DeleteAll();
NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(),
resources_to_fetch_.size() + resources_fetching_.size());
return;
}
StartNextResourceFetch();
StartNextManifestFetches();
if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() &&
pool_.IsEmpty()) {
// There are no more URLs to fetch, so end the precache cycle.
NotifyDone(0, 0);
// OnDone may have deleted this PrecacheFetcher, so don't do anything after
// it is called.
}
}
void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
UpdateStats(source.response_bytes(), source.network_response_bytes());
if (source.network_url_fetcher() == nullptr) {
pool_.DeleteAll(); // Cancel any other ongoing request.
} else {
// Attempt to parse the config proto. On failure, continue on with the
// default configuration.
ParseProtoFromFetchResponse(
*source.network_url_fetcher(),
unfinished_work_->mutable_config_settings());
pool_.Delete(source);
DetermineManifests();
}
}
void PrecacheFetcher::DetermineManifests() {
DCHECK(unfinished_work_->has_config_settings());
std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch;
// Keep track of manifest URLs that are being fetched, in order to elide
// duplicates.
std::set<base::StringPiece> seen_top_hosts;
int64_t rank = 0;
for (const auto& host : unfinished_work_->top_host()) {
++rank;
if (rank > unfinished_work_->config_settings().top_sites_count())
break;
if (seen_top_hosts.insert(host.hostname()).second)
top_hosts_to_fetch.emplace_back(host.hostname(), host.visits());
}
// Attempt to fetch manifests for starting hosts up to the maximum top sites
// count. If a manifest does not exist for a particular starting host, then
// the fetch will fail, and that starting host will be ignored. Starting
// hosts are not added if this is a continuation from a previous precache
// session.
if (resources_to_fetch_.empty()) {
for (const std::string& host :
unfinished_work_->config_settings().forced_site()) {
// We add a forced site with visits == 0, which means its resources will
// be downloaded last. TODO(twifkak): Consider removing support for
// forced_site.
if (seen_top_hosts.insert(host).second)
top_hosts_to_fetch.emplace_back(host, 0);
}
}
// We retrieve manifest usage and quota info from the local database before
// fetching the manifests.
PostTaskAndReplyWithResult(
db_task_runner_.get(), FROM_HERE,
base::Bind(&RetrieveManifestInfo, precache_database_,
std::move(top_hosts_to_fetch)),
base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr()));
}
void PrecacheFetcher::OnManifestInfoRetrieved(
std::deque<ManifestHostInfo> manifests_info) {
const std::string prefix = manifest_url_prefix_.empty()
? GetDefaultManifestURLPrefix()
: manifest_url_prefix_;
if (!GURL(prefix).is_valid()) {
// Don't attempt to fetch any manifests if the manifest URL prefix
// is invalid.
top_hosts_to_fetch_.clear();
unfinished_work_->set_num_manifest_urls(manifests_info.size());
NotifyDone(manifests_info.size(), resources_to_rank_.size());
return;
}
top_hosts_to_fetch_ = std::move(manifests_info);
for (auto& manifest : top_hosts_to_fetch_) {
manifest.manifest_url =
GURL(prefix +
net::EscapeQueryParamValue(
net::EscapeQueryParamValue(manifest.hostname, false), false));
if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) {
manifest.manifest_url = net::AppendOrReplaceQueryParameter(
manifest.manifest_url, "manifest",
std::to_string(manifest.manifest_id));
manifest.manifest_url = net::AppendOrReplaceQueryParameter(
manifest.manifest_url, "used_resources", manifest.used_url_hash);
manifest.manifest_url = net::AppendOrReplaceQueryParameter(
manifest.manifest_url, "unused_resources", manifest.unused_url_hash);
DCHECK(manifest.manifest_url.is_valid());
}
}
unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size());
PostTaskAndReplyWithResult(
db_task_runner_.get(), FROM_HERE,
base::Bind(&RetrieveQuotaInfo, precache_database_),
base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr()));
}
void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) {
quota_ = quota;
base::Time time_now = base::Time::Now();
if (IsQuotaTimeExpired(quota_, time_now)) {
// This is a new day. Update daily quota, that starts today and expires by
// end of today.
quota_.set_start_time(time_now.LocalMidnight().ToInternalValue());
quota_.set_remaining(
unfinished_work_->config_settings().daily_quota_total());
db_task_runner_->PostTask(
FROM_HERE,
base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_));
}
StartNextFetch();
}
ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
const std::string& hostname,
int64_t visits,
const std::string& used_url_hash,
const std::string& unused_url_hash)
: manifest_id(manifest_id),
hostname(hostname),
visits(visits),
used_url_hash(used_url_hash),
unused_url_hash(unused_url_hash) {}
ManifestHostInfo::~ManifestHostInfo() {}
ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default;
ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default;
ResourceInfo::ResourceInfo(const GURL& url,
const std::string& referrer,
double weight)
: url(url), referrer(referrer), weight(weight) {}
ResourceInfo::~ResourceInfo() {}
ResourceInfo::ResourceInfo(ResourceInfo&&) = default;
ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default;
void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits,
const Fetcher& source) {
DCHECK(unfinished_work_->has_config_settings());
UpdateStats(source.response_bytes(), source.network_response_bytes());
if (source.network_url_fetcher() == nullptr) {
pool_.DeleteAll(); // Cancel any other ongoing request.
} else {
PrecacheManifest manifest;
if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
const int32_t len =
std::min(manifest.resource_size(),
unfinished_work_->config_settings().top_resources_count());
const uint64_t resource_bitset =
GetResourceBitset(manifest, experiment_id_);
for (int i = 0; i < len; ++i) {
if (((0x1ULL << i) & resource_bitset) &&
manifest.resource(i).has_url()) {
GURL url(manifest.resource(i).url());
if (url.is_valid()) {
double weight = ResourceWeight(manifest.resource(i), host_visits);
if (weight >= unfinished_work_->config_settings().min_weight())
resources_to_rank_.emplace_back(url, source.referrer(), weight);
}
}
}
db_task_runner_->PostTask(
FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost,
precache_database_, source.referrer(),
manifest.id().id(), base::Time::Now()));
}
}
top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) {
return top_host.manifest_url == source.url();
});
pool_.Delete(source);
if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty())
QueueResourcesForFetch();
StartNextFetch();
}
void PrecacheFetcher::QueueResourcesForFetch() {
// Done fetching manifests. Now move resources_to_rank_ into
// resources_to_fetch_, so that StartNextFetch will begin fetching resources.
resources_to_fetch_ = std::move(resources_to_rank_);
if (unfinished_work_->config_settings().global_ranking()) {
// Sort resources_to_fetch_ by descending weight.
std::stable_sort(resources_to_fetch_.begin(), resources_to_fetch_.end(),
[](const ResourceInfo& first, const ResourceInfo& second) {
return first.weight > second.weight;
});
}
// Truncate to size |total_resources_count|.
const size_t num_resources = std::min(
resources_to_fetch_.size(),
static_cast<size_t>(
unfinished_work_->config_settings().total_resources_count()));
resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources,
resources_to_fetch_.end());
// Save denominator for PercentCompleted UMA.
unfinished_work_->set_num_resource_urls(resources_to_fetch_.size());
}
void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
UpdateStats(source.response_bytes(), source.network_response_bytes());
db_task_runner_->PostTask(
FROM_HERE,
base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_,
source.url(), source.referrer(), base::Time::Now(),
source.was_cached(), source.response_bytes()));
resources_fetching_.remove_if([&source](const ResourceInfo& resource) {
return resource.url == source.url();
});
pool_.Delete(source);
// The resource has already been put in the cache during the fetch process, so
// nothing more needs to be done for the resource.
StartNextFetch();
}
void PrecacheFetcher::UpdateStats(int64_t response_bytes,
int64_t network_response_bytes) {
DCHECK_LE(0, response_bytes);
DCHECK_LE(0, network_response_bytes);
unfinished_work_->set_total_bytes(
unfinished_work_->total_bytes() + response_bytes);
unfinished_work_->set_network_bytes(
unfinished_work_->network_bytes() + network_response_bytes);
if (!IsQuotaTimeExpired(quota_, base::Time::Now())) {
uint64_t used_bytes = static_cast<uint64_t>(network_response_bytes);
int64_t remaining =
static_cast<int64_t>(quota_.remaining()) - network_response_bytes;
if (remaining < 0)
remaining = 0;
quota_.set_remaining(
used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes);
db_task_runner_->PostTask(
FROM_HERE,
base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_));
}
}
} // namespace precache