components/precache/core/precache_fetcher.cc - chromium/src - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "components/precache/core/precache_fetcher.h"

 #include <algorithm>
 #include <limits>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/bind.h"
 #include "base/bind_helpers.h"
 #include "base/callback.h"
 #include "base/command_line.h"
 #include "base/compiler_specific.h"
 #include "base/containers/hash_tables.h"
 #include "base/location.h"
 #include "base/logging.h"
 #include "base/memory/ptr_util.h"
 #include "base/memory/ref_counted.h"
 #include "base/metrics/histogram_macros.h"
 #include "components/precache/core/precache_switches.h"
 #include "components/precache/core/proto/precache.pb.h"
 #include "components/precache/core/proto/unfinished_work.pb.h"
 #include "net/base/completion_callback.h"
 #include "net/base/escape.h"
 #include "net/base/io_buffer.h"
 #include "net/base/load_flags.h"
 #include "net/base/net_errors.h"
 #include "net/http/http_response_headers.h"
 #include "net/url_request/url_fetcher_response_writer.h"
 #include "net/url_request/url_request_context_getter.h"
 #include "net/url_request/url_request_status.h"

 namespace precache {

 // The following flags are for privacy reasons. For example, if a user clears
 // their cookies, but a tracking beacon is prefetched and the beacon specifies
 // its source URL in a URL param, the beacon site would be able to rebuild a
 // profile of the user. All three flags should occur together, or not at all,
 // per
 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussion.
 const int kNoTracking =
     net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
     net::LOAD_DO_NOT_SEND_AUTH_DATA;

 namespace {

 // The maximum number of URLFetcher requests that can be on flight in parallel.
 const int kMaxParallelFetches = 10;

 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
 // a number we expect to be in the 99th percentile for the histogram, give or
 // take.
 const int kMaxResponseBytes = 500 * 1024 * 1024;

 GURL GetDefaultConfigURL() {
   const base::CommandLine& command_line =
       *base::CommandLine::ForCurrentProcess();
   if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
     return GURL(
         command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL));
   }

 #if defined(PRECACHE_CONFIG_SETTINGS_URL)
   return GURL(PRECACHE_CONFIG_SETTINGS_URL);
 #else
   // The precache config settings URL could not be determined, so return an
   // empty, invalid GURL.
   return GURL();
 #endif
 }

 std::string GetDefaultManifestURLPrefix() {
   const base::CommandLine& command_line =
       *base::CommandLine::ForCurrentProcess();
   if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) {
     return command_line.GetSwitchValueASCII(
         switches::kPrecacheManifestURLPrefix);
   }

 #if defined(PRECACHE_MANIFEST_URL_PREFIX)
   return PRECACHE_MANIFEST_URL_PREFIX;
 #else
   // The precache manifest URL prefix could not be determined, so return an
   // empty string.
   return std::string();
 #endif
 }

 // Construct the URL of the precache manifest for the given name (either host or
 // URL). The server is expecting a request for a URL consisting of the manifest
 // URL prefix followed by the doubly escaped name.
 std::string ConstructManifestURL(const std::string& prefix,
                                  const std::string& name) {
   return prefix + net::EscapeQueryParamValue(
                       net::EscapeQueryParamValue(name, false), false);
 }

 // Attempts to parse a protobuf message from the response string of a
 // URLFetcher. If parsing is successful, the message parameter will contain the
 // parsed protobuf and this function will return true. Otherwise, returns false.
 bool ParseProtoFromFetchResponse(const net::URLFetcher& source,
                                  ::google::protobuf::MessageLite* message) {
   std::string response_string;

   if (!source.GetStatus().is_success()) {
     DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
     return false;
   }
   if (!source.GetResponseAsString(&response_string)) {
     DLOG(WARNING) << "No response string present: "
                   << source.GetOriginalURL().spec();
     return false;
   }
   if (!message->ParseFromString(response_string)) {
     DLOG(WARNING) << "Unable to parse proto served from "
                   << source.GetOriginalURL().spec();
     return false;
   }
   return true;
 }

 // Returns the resource selection bitset from the |manifest| for the given
 // |experiment_id|. By default all resource will be selected if the experiment
 // group is not found.
 uint64_t GetResourceBitset(const PrecacheManifest& manifest,
                            uint32_t experiment_id) {
   if (manifest.has_experiments()) {
     const auto& resource_bitset_map =
         manifest.experiments().resources_by_experiment_group();
     const auto& resource_bitset_it = resource_bitset_map.find(experiment_id);
     if (resource_bitset_it != resource_bitset_map.end())
       return resource_bitset_it->second.bitset();
   }
   return ~0ULL;
 }

 // URLFetcherResponseWriter that ignores the response body, in order to avoid
 // the unnecessary memory usage. Use it rather than the default if you don't
 // care about parsing the response body. We use it below as a means to populate
 // the cache with requested resource URLs.
 class URLFetcherNullWriter : public net::URLFetcherResponseWriter {
  public:
   int Initialize(const net::CompletionCallback& callback) override {
     return net::OK;
   }

   int Write(net::IOBuffer* buffer,
             int num_bytes,
             const net::CompletionCallback& callback) override {
     return num_bytes;
   }

   int Finish(const net::CompletionCallback& callback) override {
     return net::OK;
   }
 };

 void AppendManifestURLIfValidAndNew(
     const std::string& prefix,
     const std::string& name,
     base::hash_set<std::string>* seen_manifest_urls,
     std::list<GURL>* unique_manifest_urls) {
   const std::string manifest_url = ConstructManifestURL(prefix, name);
   bool first_seen = seen_manifest_urls->insert(manifest_url).second;
   if (first_seen) {
     GURL url(manifest_url);
     if (url.is_valid())
       unique_manifest_urls->push_back(url);
   }
 }

 }  // namespace

 PrecacheFetcher::Fetcher::Fetcher(
     net::URLRequestContextGetter* request_context,
     const GURL& url,
     const base::Callback<void(const Fetcher&)>& callback,
     bool is_resource_request,
     size_t max_bytes)
     : request_context_(request_context),
       url_(url),
       callback_(callback),
       is_resource_request_(is_resource_request),
       max_bytes_(max_bytes),
       response_bytes_(0),
       network_response_bytes_(0) {
   if (is_resource_request_)
     LoadFromCache();
   else
     LoadFromNetwork();
 }

 PrecacheFetcher::Fetcher::~Fetcher() {}

 void PrecacheFetcher::Fetcher::LoadFromCache() {
   fetch_stage_ = FetchStage::CACHE;
   cache_url_fetcher_ =
       net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
   cache_url_fetcher_->SetRequestContext(request_context_);
   cache_url_fetcher_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE | kNoTracking);
   std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
   cache_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
   cache_url_fetcher_->Start();
 }

 void PrecacheFetcher::Fetcher::LoadFromNetwork() {
   fetch_stage_ = FetchStage::NETWORK;
   network_url_fetcher_ =
       net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
   network_url_fetcher_->SetRequestContext(request_context_);
   if (is_resource_request_) {
     // LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources
     // already in the cache. The Date headers are updated from 304s as well as
     // 200s.
     network_url_fetcher_->SetLoadFlags(net::LOAD_VALIDATE_CACHE | kNoTracking);
     // We don't need a copy of the response body for resource requests. The
     // request is issued only to populate the browser cache.
     std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
     network_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
   } else {
     // Config and manifest requests do not need to be revalidated. It's okay if
     // they expire from the cache minutes after we request them.
     network_url_fetcher_->SetLoadFlags(kNoTracking);
   }
   network_url_fetcher_->Start();
 }

 void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress(
     const net::URLFetcher* source,
     int64_t current,
     int64_t total) {
   // If going over the per-resource download cap.
   if (fetch_stage_ == FetchStage::NETWORK &&
       // |current| is guaranteed to be non-negative, so this cast is safe.
       static_cast<size_t>(std::max(current, total)) > max_bytes_) {
     VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total
             << ") is over " << max_bytes_;

     // Cancel the download.
     network_url_fetcher_.reset();

     // Call the completion callback, to attempt the next download, or to trigger
     // cleanup in precache_delegate_->OnDone().
     response_bytes_ = network_response_bytes_ = current;

     callback_.Run(*this);
   }
 }

 void PrecacheFetcher::Fetcher::OnURLFetchComplete(
     const net::URLFetcher* source) {
   CHECK(source);
   if (fetch_stage_ == FetchStage::CACHE &&
       (source->GetStatus().error() == net::ERR_CACHE_MISS ||
        (source->GetResponseHeaders() &&
         source->GetResponseHeaders()->HasValidators()))) {
     // If the resource was not found in the cache, request it from the
     // network.
     //
     // If the resource was found in the cache, but contains validators,
     // request a refresh. The presence of validators increases the chance that
     // we get a 304 response rather than a full one, thus allowing us to
     // refresh the cache with minimal network load.
     LoadFromNetwork();
     return;
   }

   // If any of:
   // - The request was for a config or manifest.
   // - The resource was a cache hit without validators.
   // - The response came from the network.
   // Then Fetcher is done with this URL and can return control to the caller.
   response_bytes_ = source->GetReceivedResponseContentLength();
   network_response_bytes_ = source->GetTotalReceivedBytes();
   callback_.Run(*this);
 }

 // static
 void PrecacheFetcher::RecordCompletionStatistics(
     const PrecacheUnfinishedWork& unfinished_work,
     size_t remaining_manifest_urls_to_fetch,
     size_t remaining_resource_urls_to_fetch) {
   // These may be unset in tests.
   if (!unfinished_work.has_start_time())
     return;
   base::TimeDelta time_to_fetch =
       base::Time::Now() -
       base::Time::FromInternalValue(unfinished_work.start_time());
   UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch,
                              base::TimeDelta::FromSeconds(1),
                              base::TimeDelta::FromHours(4), 50);

   // Number of manifests for which we have downloaded all resources.
   int manifests_completed =
       unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch;

   // If there are resource URLs left to fetch, the last manifest is not yet
   // completed.
   if (remaining_resource_urls_to_fetch > 0)
     --manifests_completed;

   DCHECK_GE(manifests_completed, 0);
   int percent_completed = unfinished_work.num_manifest_urls() == 0
                               ? 0
                               : (static_cast<double>(manifests_completed) /
                                   unfinished_work.num_manifest_urls() * 100);

   UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
                            percent_completed);
     UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
                                 unfinished_work.total_bytes(),
                                 1, kMaxResponseBytes, 100);
   UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
                               unfinished_work.network_bytes(),
                               1, kMaxResponseBytes,
                               100);
 }

 PrecacheFetcher::PrecacheFetcher(
     net::URLRequestContextGetter* request_context,
     const GURL& config_url,
     const std::string& manifest_url_prefix,
     std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
     uint32_t experiment_id,
     PrecacheFetcher::PrecacheDelegate* precache_delegate)
     : request_context_(request_context),
       config_url_(config_url),
       manifest_url_prefix_(manifest_url_prefix),
       precache_delegate_(precache_delegate),
       pool_(kMaxParallelFetches),
       experiment_id_(experiment_id) {
   DCHECK(request_context_.get());  // Request context must be non-NULL.
   DCHECK(precache_delegate_);  // Precache delegate must be non-NULL.

   DCHECK_NE(GURL(), GetDefaultConfigURL())
       << "Could not determine the precache config settings URL.";
   DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
       << "Could not determine the default precache manifest URL prefix.";
   DCHECK(unfinished_work);

   // Copy manifests and resources to member variables as a convenience.
   // TODO(bengr): Consider accessing these directly from the proto.
   for (const auto& manifest : unfinished_work->manifest()) {
     if (manifest.has_url())
       manifest_urls_to_fetch_.push_back(GURL(manifest.url()));
   }
   for (const auto& resource : unfinished_work->resource()) {
     if (resource.has_url())
       resource_urls_to_fetch_.push_back(GURL(resource.url()));
   }
   unfinished_work_ = std::move(unfinished_work);
 }

 PrecacheFetcher::~PrecacheFetcher() {
 }

 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
   // This could get called multiple times, and it should be handled gracefully.
   if (!unfinished_work_)
     return nullptr;

   unfinished_work_->clear_manifest();
   unfinished_work_->clear_resource();
   for (const auto& manifest : manifest_urls_to_fetch_)
     unfinished_work_->add_manifest()->set_url(manifest.spec());
   for (const auto& resource : resource_urls_to_fetch_)
     unfinished_work_->add_resource()->set_url(resource.spec());
   for (const auto& it : pool_.elements()) {
     const Fetcher* fetcher = it.first;
     if (fetcher->is_resource_request())
       unfinished_work_->add_resource()->set_url(fetcher->url().spec());
     else if (fetcher->url() != config_url_)
       unfinished_work_->add_manifest()->set_url(fetcher->url().spec());
   }
   manifest_urls_to_fetch_.clear();
   resource_urls_to_fetch_.clear();
   pool_.DeleteAll();
   return std::move(unfinished_work_);
 }

 void PrecacheFetcher::Start() {
   if (unfinished_work_->has_config_settings()) {
     DCHECK(unfinished_work_->has_start_time());
     DetermineManifests();
     return;
   }

   GURL config_url =
       config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;

   DCHECK(config_url.is_valid()) << "Config URL not valid: "
                                 << config_url.possibly_invalid_spec();

   // Fetch the precache configuration settings from the server.
   DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
   VLOG(3) << "Fetching " << config_url;
   pool_.Add(base::WrapUnique(new Fetcher(
       request_context_.get(), config_url,
       base::Bind(&PrecacheFetcher::OnConfigFetchComplete,
                  base::Unretained(this)),
       false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
 }

 void PrecacheFetcher::StartNextResourceFetch() {
   DCHECK(unfinished_work_->has_config_settings());
   while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) {
     const size_t max_bytes =
         std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
                  unfinished_work_->config_settings().max_bytes_total() -
                      unfinished_work_->total_bytes());
     VLOG(3) << "Fetching " << resource_urls_to_fetch_.front();
     pool_.Add(base::WrapUnique(
         new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(),
                     base::Bind(&PrecacheFetcher::OnResourceFetchComplete,
                                base::Unretained(this)),
                     true /* is_resource_request */, max_bytes)));

     resource_urls_to_fetch_.pop_front();
   }
 }

 void PrecacheFetcher::StartNextManifestFetch() {
   if (manifest_urls_to_fetch_.empty() || !pool_.IsAvailable())
     return;

   // We only fetch one manifest at a time to keep the size of
   // resource_urls_to_fetch_ as small as possible.
   VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front();
   pool_.Add(base::WrapUnique(new Fetcher(
       request_context_.get(), manifest_urls_to_fetch_.front(),
       base::Bind(&PrecacheFetcher::OnManifestFetchComplete,
                  base::Unretained(this)),
       false /* is_resource_request */, std::numeric_limits<int32_t>::max())));

   manifest_urls_to_fetch_.pop_front();
 }

 void PrecacheFetcher::NotifyDone(
     size_t remaining_manifest_urls_to_fetch,
     size_t remaining_resource_urls_to_fetch) {
   RecordCompletionStatistics(*unfinished_work_,
                              remaining_manifest_urls_to_fetch,
                              remaining_resource_urls_to_fetch);
   precache_delegate_->OnDone();
 }

 void PrecacheFetcher::StartNextFetch() {
   DCHECK(unfinished_work_->has_config_settings());
   // If over the precache total size cap, then stop prefetching.
   if (unfinished_work_->total_bytes() >
       unfinished_work_->config_settings().max_bytes_total()) {
     size_t pending_manifests_in_pool = 0;
     size_t pending_resources_in_pool = 0;
     for (const auto& element_pair : pool_.elements()) {
       const Fetcher* fetcher = element_pair.first;
       if (fetcher->is_resource_request())
         pending_resources_in_pool++;
       else if (fetcher->url() != config_url_)
         pending_manifests_in_pool++;
     }
     pool_.DeleteAll();
     NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool,
                resource_urls_to_fetch_.size() + pending_resources_in_pool);
     return;
   }

   StartNextResourceFetch();
   StartNextManifestFetch();
   if (pool_.IsEmpty()) {
     // There are no more URLs to fetch, so end the precache cycle.
     NotifyDone(0, 0);
     // OnDone may have deleted this PrecacheFetcher, so don't do anything after
     // it is called.
   }
 }

 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
   UpdateStats(source.response_bytes(), source.network_response_bytes());
   if (source.network_url_fetcher() == nullptr) {
     pool_.DeleteAll();  // Cancel any other ongoing request.
   } else {
     // Attempt to parse the config proto. On failure, continue on with the
     // default configuration.
     ParseProtoFromFetchResponse(
         *source.network_url_fetcher(),
         unfinished_work_->mutable_config_settings());
     pool_.Delete(source);
     DetermineManifests();
   }
 }

 void PrecacheFetcher::DetermineManifests() {
   DCHECK(unfinished_work_->has_config_settings());
     std::string prefix = manifest_url_prefix_.empty()
                              ? GetDefaultManifestURLPrefix()
                              : manifest_url_prefix_;
     DCHECK_NE(std::string(), prefix)
         << "Could not determine the precache manifest URL prefix.";

     // Keep track of manifest URLs that are being fetched, in order to elide
     // duplicates.
     base::hash_set<std::string> seen_manifest_urls;

     // Attempt to fetch manifests for starting hosts up to the maximum top sites
     // count. If a manifest does not exist for a particular starting host, then
     // the fetch will fail, and that starting host will be ignored. Starting
     // hosts are not added if this is a continuation from a previous precache
     // session.
     if (manifest_urls_to_fetch_.empty() &&
         resource_urls_to_fetch_.empty()) {
       int64_t rank = 0;
       for (const auto& host : unfinished_work_->top_host()) {
         ++rank;
         if (rank > unfinished_work_->config_settings().top_sites_count())
           break;
         AppendManifestURLIfValidAndNew(prefix, host.hostname(),
                                        &seen_manifest_urls,
                                        &manifest_urls_to_fetch_);
       }

       for (const std::string& host
           : unfinished_work_->config_settings().forced_site()) {
         AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls,
                                        &manifest_urls_to_fetch_);
       }
     }
     unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size());
     StartNextFetch();
 }

 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
   DCHECK(unfinished_work_->has_config_settings());
   UpdateStats(source.response_bytes(), source.network_response_bytes());
   if (source.network_url_fetcher() == nullptr) {
     pool_.DeleteAll();  // Cancel any other ongoing request.
   } else {
     PrecacheManifest manifest;

     if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
       const int32_t len =
           std::min(manifest.resource_size(),
                    unfinished_work_->config_settings().top_resources_count());
       const uint64_t resource_bitset =
           GetResourceBitset(manifest, experiment_id_);
       for (int i = 0; i < len; ++i) {
         if (((0x1ULL << i) & resource_bitset) &&
             manifest.resource(i).has_url()) {
           GURL url(manifest.resource(i).url());
           if (url.is_valid())
             resource_urls_to_fetch_.push_back(url);
         }
       }
     }
   }

   pool_.Delete(source);
   StartNextFetch();
 }

 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
   UpdateStats(source.response_bytes(), source.network_response_bytes());
   pool_.Delete(source);
   // The resource has already been put in the cache during the fetch process, so
   // nothing more needs to be done for the resource.
   StartNextFetch();
 }

 void PrecacheFetcher::UpdateStats(int64_t response_bytes,
                                   int64_t network_response_bytes) {
   unfinished_work_->set_total_bytes(
       unfinished_work_->total_bytes() + response_bytes);
   unfinished_work_->set_network_bytes(
       unfinished_work_->network_bytes() + network_response_bytes);
 }

 }  // namespace precache
	// Copyright 2013 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "components/precache/core/precache_fetcher.h"

	#include <algorithm>
	#include <limits>
	#include <string>
	#include <utility>
	#include <vector>

	#include "base/bind.h"
	#include "base/bind_helpers.h"
	#include "base/callback.h"
	#include "base/command_line.h"
	#include "base/compiler_specific.h"
	#include "base/containers/hash_tables.h"
	#include "base/location.h"
	#include "base/logging.h"
	#include "base/memory/ptr_util.h"
	#include "base/memory/ref_counted.h"
	#include "base/metrics/histogram_macros.h"
	#include "components/precache/core/precache_switches.h"
	#include "components/precache/core/proto/precache.pb.h"
	#include "components/precache/core/proto/unfinished_work.pb.h"
	#include "net/base/completion_callback.h"
	#include "net/base/escape.h"
	#include "net/base/io_buffer.h"
	#include "net/base/load_flags.h"
	#include "net/base/net_errors.h"
	#include "net/http/http_response_headers.h"
	#include "net/url_request/url_fetcher_response_writer.h"
	#include "net/url_request/url_request_context_getter.h"
	#include "net/url_request/url_request_status.h"

	namespace precache {

	// The following flags are for privacy reasons. For example, if a user clears
	// their cookies, but a tracking beacon is prefetched and the beacon specifies
	// its source URL in a URL param, the beacon site would be able to rebuild a
	// profile of the user. All three flags should occur together, or not at all,
	// per
	// https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussion.
	const int kNoTracking =
	net::LOAD_DO_NOT_SAVE_COOKIES \| net::LOAD_DO_NOT_SEND_COOKIES \|
	net::LOAD_DO_NOT_SEND_AUTH_DATA;

	namespace {

	// The maximum number of URLFetcher requests that can be on flight in parallel.
	const int kMaxParallelFetches = 10;

	// The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
	// a number we expect to be in the 99th percentile for the histogram, give or
	// take.
	const int kMaxResponseBytes = 500 * 1024 * 1024;

	GURL GetDefaultConfigURL() {
	const base::CommandLine& command_line =
	*base::CommandLine::ForCurrentProcess();
	if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
	return GURL(
	command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL));
	}

	#if defined(PRECACHE_CONFIG_SETTINGS_URL)
	return GURL(PRECACHE_CONFIG_SETTINGS_URL);
	#else
	// The precache config settings URL could not be determined, so return an
	// empty, invalid GURL.
	return GURL();
	#endif
	}

	std::string GetDefaultManifestURLPrefix() {
	const base::CommandLine& command_line =
	*base::CommandLine::ForCurrentProcess();
	if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) {
	return command_line.GetSwitchValueASCII(
	switches::kPrecacheManifestURLPrefix);
	}

	#if defined(PRECACHE_MANIFEST_URL_PREFIX)
	return PRECACHE_MANIFEST_URL_PREFIX;
	#else
	// The precache manifest URL prefix could not be determined, so return an
	// empty string.
	return std::string();
	#endif
	}

	// Construct the URL of the precache manifest for the given name (either host or
	// URL). The server is expecting a request for a URL consisting of the manifest
	// URL prefix followed by the doubly escaped name.
	std::string ConstructManifestURL(const std::string& prefix,
	const std::string& name) {
	return prefix + net::EscapeQueryParamValue(
	net::EscapeQueryParamValue(name, false), false);
	}

	// Attempts to parse a protobuf message from the response string of a
	// URLFetcher. If parsing is successful, the message parameter will contain the
	// parsed protobuf and this function will return true. Otherwise, returns false.
	bool ParseProtoFromFetchResponse(const net::URLFetcher& source,
	::google::protobuf::MessageLite* message) {
	std::string response_string;

	if (!source.GetStatus().is_success()) {
	DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
	return false;
	}
	if (!source.GetResponseAsString(&response_string)) {
	DLOG(WARNING) << "No response string present: "
	<< source.GetOriginalURL().spec();
	return false;
	}
	if (!message->ParseFromString(response_string)) {
	DLOG(WARNING) << "Unable to parse proto served from "
	<< source.GetOriginalURL().spec();
	return false;
	}
	return true;
	}

	// Returns the resource selection bitset from the \|manifest\| for the given
	// \|experiment_id\|. By default all resource will be selected if the experiment
	// group is not found.
	uint64_t GetResourceBitset(const PrecacheManifest& manifest,
	uint32_t experiment_id) {
	if (manifest.has_experiments()) {
	const auto& resource_bitset_map =
	manifest.experiments().resources_by_experiment_group();
	const auto& resource_bitset_it = resource_bitset_map.find(experiment_id);
	if (resource_bitset_it != resource_bitset_map.end())
	return resource_bitset_it->second.bitset();
	}
	return ~0ULL;
	}

	// URLFetcherResponseWriter that ignores the response body, in order to avoid
	// the unnecessary memory usage. Use it rather than the default if you don't
	// care about parsing the response body. We use it below as a means to populate
	// the cache with requested resource URLs.
	class URLFetcherNullWriter : public net::URLFetcherResponseWriter {
	public:
	int Initialize(const net::CompletionCallback& callback) override {
	return net::OK;
	}

	int Write(net::IOBuffer* buffer,
	int num_bytes,
	const net::CompletionCallback& callback) override {
	return num_bytes;
	}

	int Finish(const net::CompletionCallback& callback) override {
	return net::OK;
	}
	};

	void AppendManifestURLIfValidAndNew(
	const std::string& prefix,
	const std::string& name,
	base::hash_set<std::string>* seen_manifest_urls,
	std::list<GURL>* unique_manifest_urls) {
	const std::string manifest_url = ConstructManifestURL(prefix, name);
	bool first_seen = seen_manifest_urls->insert(manifest_url).second;
	if (first_seen) {
	GURL url(manifest_url);
	if (url.is_valid())
	unique_manifest_urls->push_back(url);
	}
	}

	} // namespace

	PrecacheFetcher::Fetcher::Fetcher(
	net::URLRequestContextGetter* request_context,
	const GURL& url,
	const base::Callback<void(const Fetcher&)>& callback,
	bool is_resource_request,
	size_t max_bytes)
	: request_context_(request_context),
	url_(url),
	callback_(callback),
	is_resource_request_(is_resource_request),
	max_bytes_(max_bytes),
	response_bytes_(0),
	network_response_bytes_(0) {
	if (is_resource_request_)
	LoadFromCache();
	else
	LoadFromNetwork();
	}

	PrecacheFetcher::Fetcher::~Fetcher() {}

	void PrecacheFetcher::Fetcher::LoadFromCache() {
	fetch_stage_ = FetchStage::CACHE;
	cache_url_fetcher_ =
	net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
	cache_url_fetcher_->SetRequestContext(request_context_);
	cache_url_fetcher_->SetLoadFlags(net::LOAD_ONLY_FROM_CACHE \| kNoTracking);
	std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
	cache_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
	cache_url_fetcher_->Start();
	}

	void PrecacheFetcher::Fetcher::LoadFromNetwork() {
	fetch_stage_ = FetchStage::NETWORK;
	network_url_fetcher_ =
	net::URLFetcher::Create(url_, net::URLFetcher::GET, this);
	network_url_fetcher_->SetRequestContext(request_context_);
	if (is_resource_request_) {
	// LOAD_VALIDATE_CACHE allows us to refresh Date headers for resources
	// already in the cache. The Date headers are updated from 304s as well as
	// 200s.
	network_url_fetcher_->SetLoadFlags(net::LOAD_VALIDATE_CACHE \| kNoTracking);
	// We don't need a copy of the response body for resource requests. The
	// request is issued only to populate the browser cache.
	std::unique_ptr<URLFetcherNullWriter> null_writer(new URLFetcherNullWriter);
	network_url_fetcher_->SaveResponseWithWriter(std::move(null_writer));
	} else {
	// Config and manifest requests do not need to be revalidated. It's okay if
	// they expire from the cache minutes after we request them.
	network_url_fetcher_->SetLoadFlags(kNoTracking);
	}
	network_url_fetcher_->Start();
	}

	void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress(
	const net::URLFetcher* source,
	int64_t current,
	int64_t total) {
	// If going over the per-resource download cap.
	if (fetch_stage_ == FetchStage::NETWORK &&
	// \|current\| is guaranteed to be non-negative, so this cast is safe.
	static_cast<size_t>(std::max(current, total)) > max_bytes_) {
	VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total
	<< ") is over " << max_bytes_;

	// Cancel the download.
	network_url_fetcher_.reset();

	// Call the completion callback, to attempt the next download, or to trigger
	// cleanup in precache_delegate_->OnDone().
	response_bytes_ = network_response_bytes_ = current;

	callback_.Run(*this);
	}
	}

	void PrecacheFetcher::Fetcher::OnURLFetchComplete(
	const net::URLFetcher* source) {
	CHECK(source);
	if (fetch_stage_ == FetchStage::CACHE &&
	(source->GetStatus().error() == net::ERR_CACHE_MISS \|\|
	(source->GetResponseHeaders() &&
	source->GetResponseHeaders()->HasValidators()))) {
	// If the resource was not found in the cache, request it from the
	// network.
	//
	// If the resource was found in the cache, but contains validators,
	// request a refresh. The presence of validators increases the chance that
	// we get a 304 response rather than a full one, thus allowing us to
	// refresh the cache with minimal network load.
	LoadFromNetwork();
	return;
	}

	// If any of:
	// - The request was for a config or manifest.
	// - The resource was a cache hit without validators.
	// - The response came from the network.
	// Then Fetcher is done with this URL and can return control to the caller.
	response_bytes_ = source->GetReceivedResponseContentLength();
	network_response_bytes_ = source->GetTotalReceivedBytes();
	callback_.Run(*this);
	}

	// static
	void PrecacheFetcher::RecordCompletionStatistics(
	const PrecacheUnfinishedWork& unfinished_work,
	size_t remaining_manifest_urls_to_fetch,
	size_t remaining_resource_urls_to_fetch) {
	// These may be unset in tests.
	if (!unfinished_work.has_start_time())
	return;
	base::TimeDelta time_to_fetch =
	base::Time::Now() -
	base::Time::FromInternalValue(unfinished_work.start_time());
	UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch,
	base::TimeDelta::FromSeconds(1),
	base::TimeDelta::FromHours(4), 50);

	// Number of manifests for which we have downloaded all resources.
	int manifests_completed =
	unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch;

	// If there are resource URLs left to fetch, the last manifest is not yet
	// completed.
	if (remaining_resource_urls_to_fetch > 0)
	--manifests_completed;

	DCHECK_GE(manifests_completed, 0);
	int percent_completed = unfinished_work.num_manifest_urls() == 0
	? 0
	: (static_cast<double>(manifests_completed) /
	unfinished_work.num_manifest_urls() * 100);

	UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
	percent_completed);
	UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
	unfinished_work.total_bytes(),
	1, kMaxResponseBytes, 100);
	UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
	unfinished_work.network_bytes(),
	1, kMaxResponseBytes,
	100);
	}

	PrecacheFetcher::PrecacheFetcher(
	net::URLRequestContextGetter* request_context,
	const GURL& config_url,
	const std::string& manifest_url_prefix,
	std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
	uint32_t experiment_id,
	PrecacheFetcher::PrecacheDelegate* precache_delegate)
	: request_context_(request_context),
	config_url_(config_url),
	manifest_url_prefix_(manifest_url_prefix),
	precache_delegate_(precache_delegate),
	pool_(kMaxParallelFetches),
	experiment_id_(experiment_id) {
	DCHECK(request_context_.get()); // Request context must be non-NULL.
	DCHECK(precache_delegate_); // Precache delegate must be non-NULL.

	DCHECK_NE(GURL(), GetDefaultConfigURL())
	<< "Could not determine the precache config settings URL.";
	DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
	<< "Could not determine the default precache manifest URL prefix.";
	DCHECK(unfinished_work);

	// Copy manifests and resources to member variables as a convenience.
	// TODO(bengr): Consider accessing these directly from the proto.
	for (const auto& manifest : unfinished_work->manifest()) {
	if (manifest.has_url())
	manifest_urls_to_fetch_.push_back(GURL(manifest.url()));
	}
	for (const auto& resource : unfinished_work->resource()) {
	if (resource.has_url())
	resource_urls_to_fetch_.push_back(GURL(resource.url()));
	}
	unfinished_work_ = std::move(unfinished_work);
	}

	PrecacheFetcher::~PrecacheFetcher() {
	}

	std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
	// This could get called multiple times, and it should be handled gracefully.
	if (!unfinished_work_)
	return nullptr;

	unfinished_work_->clear_manifest();
	unfinished_work_->clear_resource();
	for (const auto& manifest : manifest_urls_to_fetch_)
	unfinished_work_->add_manifest()->set_url(manifest.spec());
	for (const auto& resource : resource_urls_to_fetch_)
	unfinished_work_->add_resource()->set_url(resource.spec());
	for (const auto& it : pool_.elements()) {
	const Fetcher* fetcher = it.first;
	if (fetcher->is_resource_request())
	unfinished_work_->add_resource()->set_url(fetcher->url().spec());
	else if (fetcher->url() != config_url_)
	unfinished_work_->add_manifest()->set_url(fetcher->url().spec());
	}
	manifest_urls_to_fetch_.clear();
	resource_urls_to_fetch_.clear();
	pool_.DeleteAll();
	return std::move(unfinished_work_);
	}

	void PrecacheFetcher::Start() {
	if (unfinished_work_->has_config_settings()) {
	DCHECK(unfinished_work_->has_start_time());
	DetermineManifests();
	return;
	}

	GURL config_url =
	config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;

	DCHECK(config_url.is_valid()) << "Config URL not valid: "
	<< config_url.possibly_invalid_spec();

	// Fetch the precache configuration settings from the server.
	DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
	VLOG(3) << "Fetching " << config_url;
	pool_.Add(base::WrapUnique(new Fetcher(
	request_context_.get(), config_url,
	base::Bind(&PrecacheFetcher::OnConfigFetchComplete,
	base::Unretained(this)),
	false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
	}

	void PrecacheFetcher::StartNextResourceFetch() {
	DCHECK(unfinished_work_->has_config_settings());
	while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) {
	const size_t max_bytes =
	std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
	unfinished_work_->config_settings().max_bytes_total() -
	unfinished_work_->total_bytes());
	VLOG(3) << "Fetching " << resource_urls_to_fetch_.front();
	pool_.Add(base::WrapUnique(
	new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(),
	base::Bind(&PrecacheFetcher::OnResourceFetchComplete,
	base::Unretained(this)),
	true /* is_resource_request */, max_bytes)));

	resource_urls_to_fetch_.pop_front();
	}
	}

	void PrecacheFetcher::StartNextManifestFetch() {
	if (manifest_urls_to_fetch_.empty() \|\| !pool_.IsAvailable())
	return;

	// We only fetch one manifest at a time to keep the size of
	// resource_urls_to_fetch_ as small as possible.
	VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front();
	pool_.Add(base::WrapUnique(new Fetcher(
	request_context_.get(), manifest_urls_to_fetch_.front(),
	base::Bind(&PrecacheFetcher::OnManifestFetchComplete,
	base::Unretained(this)),
	false /* is_resource_request */, std::numeric_limits<int32_t>::max())));

	manifest_urls_to_fetch_.pop_front();
	}

	void PrecacheFetcher::NotifyDone(
	size_t remaining_manifest_urls_to_fetch,
	size_t remaining_resource_urls_to_fetch) {
	RecordCompletionStatistics(*unfinished_work_,
	remaining_manifest_urls_to_fetch,
	remaining_resource_urls_to_fetch);
	precache_delegate_->OnDone();
	}

	void PrecacheFetcher::StartNextFetch() {
	DCHECK(unfinished_work_->has_config_settings());
	// If over the precache total size cap, then stop prefetching.
	if (unfinished_work_->total_bytes() >
	unfinished_work_->config_settings().max_bytes_total()) {
	size_t pending_manifests_in_pool = 0;
	size_t pending_resources_in_pool = 0;
	for (const auto& element_pair : pool_.elements()) {
	const Fetcher* fetcher = element_pair.first;
	if (fetcher->is_resource_request())
	pending_resources_in_pool++;
	else if (fetcher->url() != config_url_)
	pending_manifests_in_pool++;
	}
	pool_.DeleteAll();
	NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool,
	resource_urls_to_fetch_.size() + pending_resources_in_pool);
	return;
	}

	StartNextResourceFetch();
	StartNextManifestFetch();
	if (pool_.IsEmpty()) {
	// There are no more URLs to fetch, so end the precache cycle.
	NotifyDone(0, 0);
	// OnDone may have deleted this PrecacheFetcher, so don't do anything after
	// it is called.
	}
	}

	void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
	UpdateStats(source.response_bytes(), source.network_response_bytes());
	if (source.network_url_fetcher() == nullptr) {
	pool_.DeleteAll(); // Cancel any other ongoing request.
	} else {
	// Attempt to parse the config proto. On failure, continue on with the
	// default configuration.
	ParseProtoFromFetchResponse(
	*source.network_url_fetcher(),
	unfinished_work_->mutable_config_settings());
	pool_.Delete(source);
	DetermineManifests();
	}
	}

	void PrecacheFetcher::DetermineManifests() {
	DCHECK(unfinished_work_->has_config_settings());
	std::string prefix = manifest_url_prefix_.empty()
	? GetDefaultManifestURLPrefix()
	: manifest_url_prefix_;
	DCHECK_NE(std::string(), prefix)
	<< "Could not determine the precache manifest URL prefix.";

	// Keep track of manifest URLs that are being fetched, in order to elide
	// duplicates.
	base::hash_set<std::string> seen_manifest_urls;

	// Attempt to fetch manifests for starting hosts up to the maximum top sites
	// count. If a manifest does not exist for a particular starting host, then
	// the fetch will fail, and that starting host will be ignored. Starting
	// hosts are not added if this is a continuation from a previous precache
	// session.
	if (manifest_urls_to_fetch_.empty() &&
	resource_urls_to_fetch_.empty()) {
	int64_t rank = 0;
	for (const auto& host : unfinished_work_->top_host()) {
	++rank;
	if (rank > unfinished_work_->config_settings().top_sites_count())
	break;
	AppendManifestURLIfValidAndNew(prefix, host.hostname(),
	&seen_manifest_urls,
	&manifest_urls_to_fetch_);
	}

	for (const std::string& host
	: unfinished_work_->config_settings().forced_site()) {
	AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls,
	&manifest_urls_to_fetch_);
	}
	}
	unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size());
	StartNextFetch();
	}

	void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
	DCHECK(unfinished_work_->has_config_settings());
	UpdateStats(source.response_bytes(), source.network_response_bytes());
	if (source.network_url_fetcher() == nullptr) {
	pool_.DeleteAll(); // Cancel any other ongoing request.
	} else {
	PrecacheManifest manifest;

	if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
	const int32_t len =
	std::min(manifest.resource_size(),
	unfinished_work_->config_settings().top_resources_count());
	const uint64_t resource_bitset =
	GetResourceBitset(manifest, experiment_id_);
	for (int i = 0; i < len; ++i) {
	if (((0x1ULL << i) & resource_bitset) &&
	manifest.resource(i).has_url()) {
	GURL url(manifest.resource(i).url());
	if (url.is_valid())
	resource_urls_to_fetch_.push_back(url);
	}
	}
	}
	}

	pool_.Delete(source);
	StartNextFetch();
	}

	void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
	UpdateStats(source.response_bytes(), source.network_response_bytes());
	pool_.Delete(source);
	// The resource has already been put in the cache during the fetch process, so
	// nothing more needs to be done for the resource.
	StartNextFetch();
	}

	void PrecacheFetcher::UpdateStats(int64_t response_bytes,
	int64_t network_response_bytes) {
	unfinished_work_->set_total_bytes(
	unfinished_work_->total_bytes() + response_bytes);
	unfinished_work_->set_network_bytes(
	unfinished_work_->network_bytes() + network_response_bytes);
	}

	} // namespace precache