components/precache/core/precache_fetcher.h - chromium/src - Git at Google

 // Copyright 2013 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_
 #define COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_

 #include <stdint.h>

 #include <algorithm>
 #include <list>
 #include <memory>
 #include <string>
 #include <vector>

 #include "base/callback.h"
 #include "base/macros.h"
 #include "base/memory/ref_counted.h"
 #include "base/time/time.h"
 #include "components/precache/core/fetcher_pool.h"
 #include "net/url_request/url_fetcher.h"
 #include "net/url_request/url_fetcher_delegate.h"
 #include "url/gurl.h"

 namespace net {
 class URLRequestContextGetter;
 }

 namespace precache {

 class PrecacheConfigurationSettings;

 // Visible for testing.
 extern const int kNoTracking;

 // Public interface to code that fetches resources that the user is likely to
 // want to fetch in the future, putting them in the network stack disk cache.
 // Precaching is intended to be done when Chrome is not actively in use, likely
 // hours ahead of the time when the resources are actually needed.
 //
 // This class takes as input a prioritized list of URL domains that the user
 // commonly visits, referred to as starting hosts. This class interacts with a
 // server, sending it the list of starting hosts sequentially. For each starting
 // host, the server returns a manifest of resource URLs that are good candidates
 // for precaching. Every resource returned is fetched, and responses are cached
 // as they are received. Destroying the PrecacheFetcher while it is precaching
 // will cancel any fetch in progress and cancel precaching.
 //
 // The URLs of the server-side component must be specified in order for the
 // PrecacheFetcher to work. This includes the URL that the precache
 // configuration settings are fetched from and the prefix of URLs where precache
 // manifests are fetched from. These can be set by using command line switches
 // or by providing default values.
 //
 // Sample interaction:
 //
 // class MyPrecacheFetcherDelegate : public PrecacheFetcher::PrecacheDelegate {
 //  public:
 //   void PrecacheResourcesForTopURLs(
 //       net::URLRequestContextGetter* request_context,
 //       const std::list<GURL>& top_urls) {
 //     fetcher_.reset(new PrecacheFetcher(request_context, top_urls, this));
 //     fetcher_->Start();
 //   }
 //
 //   virtual void OnDone() {
 //     // Do something when precaching is done.
 //   }
 //
 //  private:
 //   std::unique_ptr<PrecacheFetcher> fetcher_;
 // };
 class PrecacheFetcher {
  public:
   class PrecacheDelegate {
    public:
     // Called when the fetching of resources has finished, whether the resources
     // were fetched or not. If the PrecacheFetcher is destroyed before OnDone is
     // called, then precaching will be canceled and OnDone will not be called.
     virtual void OnDone() = 0;
   };

   // Visible for testing.
   class Fetcher;

   // Constructs a new PrecacheFetcher. The |starting_hosts| parameter is a
   // prioritized list of hosts that the user commonly visits. These hosts are
   // used by a server side component to construct a list of resource URLs that
   // the user is likely to fetch.
   PrecacheFetcher(const std::vector<std::string>& starting_hosts,
                   net::URLRequestContextGetter* request_context,
                   const GURL& config_url,
                   const std::string& manifest_url_prefix,
                   PrecacheDelegate* precache_delegate);

   virtual ~PrecacheFetcher();

   // Starts fetching resources to precache. URLs are fetched sequentially. Can
   // be called from any thread. Start should only be called once on a
   // PrecacheFetcher instance.
   void Start();

  private:
   // Fetches the next resource or manifest URL, if any remain. Fetching is done
   // sequentially and depth-first: all resources are fetched for a manifest
   // before the next manifest is fetched. This is done to limit the length of
   // the |resource_urls_to_fetch_| list, reducing the memory usage.
   void StartNextFetch();

   void StartNextManifestFetch();
   void StartNextResourceFetch();

   // Called when the precache configuration settings have been fetched.
   // Determines the list of manifest URLs to fetch according to the list of
   // |starting_hosts_| and information from the precache configuration settings.
   // If the fetch of the configuration settings fails, then precaching ends.
   void OnConfigFetchComplete(const Fetcher& source);

   // Called when a precache manifest has been fetched. Builds the list of
   // resource URLs to fetch according to the URLs in the manifest. If the fetch
   // of a manifest fails, then it skips to the next manifest.
   void OnManifestFetchComplete(const Fetcher& source);

   // Called when a resource has been fetched.
   void OnResourceFetchComplete(const Fetcher& source);

   // Adds up the response sizes.
   void UpdateStats(int64_t response_bytes, int64_t network_response_bytes);

   // The prioritized list of starting hosts that the server will pick resource
   // URLs to be precached for.
   const std::vector<std::string> starting_hosts_;

   // The request context used when fetching URLs.
   const scoped_refptr<net::URLRequestContextGetter> request_context_;

   // The custom URL to use when fetching the config. If not provided, the
   // default flag-specified URL will be used.
   const GURL config_url_;

   // The custom URL prefix to use when fetching manifests. If not provided, the
   // default flag-specified prefix will be used.
   const std::string manifest_url_prefix_;

   // Non-owning pointer. Should not be NULL.
   PrecacheDelegate* precache_delegate_;

   std::unique_ptr<PrecacheConfigurationSettings> config_;

   // Tally of the total number of bytes contained in URL fetches, including
   // config, manifests, and resources. This the number of bytes as they would be
   // compressed over the network.
   size_t total_response_bytes_;

   // Tally of the total number of bytes received over the network from URL
   // fetches (the same ones as in total_response_bytes_). This includes response
   // headers and intermediate responses such as 30xs.
   size_t network_response_bytes_;

   // Time when the prefetch was started.
   base::TimeTicks start_time_;

   int num_manifest_urls_to_fetch_;
   std::list<GURL> manifest_urls_to_fetch_;
   std::list<GURL> resource_urls_to_fetch_;

   FetcherPool<Fetcher> pool_;

   DISALLOW_COPY_AND_ASSIGN(PrecacheFetcher);
 };

 // Class that fetches a URL, and runs the specified callback when the fetch is
 // complete. This class exists so that a different method can be run in
 // response to different kinds of fetches, e.g. OnConfigFetchComplete when
 // configuration settings are fetched, OnManifestFetchComplete when a manifest
 // is fetched, etc.
 //
 // This class tries to increase freshness while limiting network usage, by using
 // the following strategy:
 // 1.  Fetch the URL from the cache.
 // 2a. If it's present and lacks revalidation headers, then stop.
 // 2b. If it's not present, or it's present and has revalidation headers, then
 //     refetch over the network.
 //
 // This allows the precache to "refresh" cache entries by increasing their
 // expiration date, but minimizes the network impact of doing so, by performing
 // only conditional GETs.
 //
 // On completion it calls the given callback. This class cancels requests whose
 // responses are or will be larger than max_bytes. In such cases,
 // network_url_fetcher() will return nullptr.
 class PrecacheFetcher::Fetcher : public net::URLFetcherDelegate {
  public:
   // Construct a new Fetcher. This will create and start a new URLFetcher for
   // the specified URL using the specified request context.
   Fetcher(net::URLRequestContextGetter* request_context,
           const GURL& url,
           const base::Callback<void(const Fetcher&)>& callback,
           bool is_resource_request,
           size_t max_bytes);
   ~Fetcher() override;
   void OnURLFetchDownloadProgress(const net::URLFetcher* source,
                                   int64_t current,
                                   int64_t total) override;
   void OnURLFetchComplete(const net::URLFetcher* source) override;
   int64_t response_bytes() const { return response_bytes_; }
   int64_t network_response_bytes() const { return network_response_bytes_; }
   const net::URLFetcher* network_url_fetcher() const {
     return network_url_fetcher_.get();
   }

  private:
   enum class FetchStage { CACHE, NETWORK };

   void LoadFromCache();
   void LoadFromNetwork();

   net::URLRequestContextGetter* const request_context_;
   const GURL url_;
   const base::Callback<void(const Fetcher&)> callback_;
   const bool is_resource_request_;
   const size_t max_bytes_;

   FetchStage fetch_stage_;
   // The cache_url_fetcher_ is kept alive until Fetcher destruction for testing.
   std::unique_ptr<net::URLFetcher> cache_url_fetcher_;
   std::unique_ptr<net::URLFetcher> network_url_fetcher_;
   int64_t response_bytes_;
   int64_t network_response_bytes_;

   DISALLOW_COPY_AND_ASSIGN(Fetcher);
 };

 }  // namespace precache

 #endif  // COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_
	// Copyright 2013 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_
	#define COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_

	#include <stdint.h>

	#include <algorithm>
	#include <list>
	#include <memory>
	#include <string>
	#include <vector>

	#include "base/callback.h"
	#include "base/macros.h"
	#include "base/memory/ref_counted.h"
	#include "base/time/time.h"
	#include "components/precache/core/fetcher_pool.h"
	#include "net/url_request/url_fetcher.h"
	#include "net/url_request/url_fetcher_delegate.h"
	#include "url/gurl.h"

	namespace net {
	class URLRequestContextGetter;
	}

	namespace precache {

	class PrecacheConfigurationSettings;

	// Visible for testing.
	extern const int kNoTracking;

	// Public interface to code that fetches resources that the user is likely to
	// want to fetch in the future, putting them in the network stack disk cache.
	// Precaching is intended to be done when Chrome is not actively in use, likely
	// hours ahead of the time when the resources are actually needed.
	//
	// This class takes as input a prioritized list of URL domains that the user
	// commonly visits, referred to as starting hosts. This class interacts with a
	// server, sending it the list of starting hosts sequentially. For each starting
	// host, the server returns a manifest of resource URLs that are good candidates
	// for precaching. Every resource returned is fetched, and responses are cached
	// as they are received. Destroying the PrecacheFetcher while it is precaching
	// will cancel any fetch in progress and cancel precaching.
	//
	// The URLs of the server-side component must be specified in order for the
	// PrecacheFetcher to work. This includes the URL that the precache
	// configuration settings are fetched from and the prefix of URLs where precache
	// manifests are fetched from. These can be set by using command line switches
	// or by providing default values.
	//
	// Sample interaction:
	//
	// class MyPrecacheFetcherDelegate : public PrecacheFetcher::PrecacheDelegate {
	// public:
	// void PrecacheResourcesForTopURLs(
	// net::URLRequestContextGetter* request_context,
	// const std::list<GURL>& top_urls) {
	// fetcher_.reset(new PrecacheFetcher(request_context, top_urls, this));
	// fetcher_->Start();
	// }
	//
	// virtual void OnDone() {
	// // Do something when precaching is done.
	// }
	//
	// private:
	// std::unique_ptr<PrecacheFetcher> fetcher_;
	// };
	class PrecacheFetcher {
	public:
	class PrecacheDelegate {
	public:
	// Called when the fetching of resources has finished, whether the resources
	// were fetched or not. If the PrecacheFetcher is destroyed before OnDone is
	// called, then precaching will be canceled and OnDone will not be called.
	virtual void OnDone() = 0;
	};

	// Visible for testing.
	class Fetcher;

	// Constructs a new PrecacheFetcher. The \|starting_hosts\| parameter is a
	// prioritized list of hosts that the user commonly visits. These hosts are
	// used by a server side component to construct a list of resource URLs that
	// the user is likely to fetch.
	PrecacheFetcher(const std::vector<std::string>& starting_hosts,
	net::URLRequestContextGetter* request_context,
	const GURL& config_url,
	const std::string& manifest_url_prefix,
	PrecacheDelegate* precache_delegate);

	virtual ~PrecacheFetcher();

	// Starts fetching resources to precache. URLs are fetched sequentially. Can
	// be called from any thread. Start should only be called once on a
	// PrecacheFetcher instance.
	void Start();

	private:
	// Fetches the next resource or manifest URL, if any remain. Fetching is done
	// sequentially and depth-first: all resources are fetched for a manifest
	// before the next manifest is fetched. This is done to limit the length of
	// the \|resource_urls_to_fetch_\| list, reducing the memory usage.
	void StartNextFetch();

	void StartNextManifestFetch();
	void StartNextResourceFetch();

	// Called when the precache configuration settings have been fetched.
	// Determines the list of manifest URLs to fetch according to the list of
	// \|starting_hosts_\| and information from the precache configuration settings.
	// If the fetch of the configuration settings fails, then precaching ends.
	void OnConfigFetchComplete(const Fetcher& source);

	// Called when a precache manifest has been fetched. Builds the list of
	// resource URLs to fetch according to the URLs in the manifest. If the fetch
	// of a manifest fails, then it skips to the next manifest.
	void OnManifestFetchComplete(const Fetcher& source);

	// Called when a resource has been fetched.
	void OnResourceFetchComplete(const Fetcher& source);

	// Adds up the response sizes.
	void UpdateStats(int64_t response_bytes, int64_t network_response_bytes);

	// The prioritized list of starting hosts that the server will pick resource
	// URLs to be precached for.
	const std::vector<std::string> starting_hosts_;

	// The request context used when fetching URLs.
	const scoped_refptr<net::URLRequestContextGetter> request_context_;

	// The custom URL to use when fetching the config. If not provided, the
	// default flag-specified URL will be used.
	const GURL config_url_;

	// The custom URL prefix to use when fetching manifests. If not provided, the
	// default flag-specified prefix will be used.
	const std::string manifest_url_prefix_;

	// Non-owning pointer. Should not be NULL.
	PrecacheDelegate* precache_delegate_;

	std::unique_ptr<PrecacheConfigurationSettings> config_;

	// Tally of the total number of bytes contained in URL fetches, including
	// config, manifests, and resources. This the number of bytes as they would be
	// compressed over the network.
	size_t total_response_bytes_;

	// Tally of the total number of bytes received over the network from URL
	// fetches (the same ones as in total_response_bytes_). This includes response
	// headers and intermediate responses such as 30xs.
	size_t network_response_bytes_;

	// Time when the prefetch was started.
	base::TimeTicks start_time_;

	int num_manifest_urls_to_fetch_;
	std::list<GURL> manifest_urls_to_fetch_;
	std::list<GURL> resource_urls_to_fetch_;

	FetcherPool<Fetcher> pool_;

	DISALLOW_COPY_AND_ASSIGN(PrecacheFetcher);
	};

	// Class that fetches a URL, and runs the specified callback when the fetch is
	// complete. This class exists so that a different method can be run in
	// response to different kinds of fetches, e.g. OnConfigFetchComplete when
	// configuration settings are fetched, OnManifestFetchComplete when a manifest
	// is fetched, etc.
	//
	// This class tries to increase freshness while limiting network usage, by using
	// the following strategy:
	// 1. Fetch the URL from the cache.
	// 2a. If it's present and lacks revalidation headers, then stop.
	// 2b. If it's not present, or it's present and has revalidation headers, then
	// refetch over the network.
	//
	// This allows the precache to "refresh" cache entries by increasing their
	// expiration date, but minimizes the network impact of doing so, by performing
	// only conditional GETs.
	//
	// On completion it calls the given callback. This class cancels requests whose
	// responses are or will be larger than max_bytes. In such cases,
	// network_url_fetcher() will return nullptr.
	class PrecacheFetcher::Fetcher : public net::URLFetcherDelegate {
	public:
	// Construct a new Fetcher. This will create and start a new URLFetcher for
	// the specified URL using the specified request context.
	Fetcher(net::URLRequestContextGetter* request_context,
	const GURL& url,
	const base::Callback<void(const Fetcher&)>& callback,
	bool is_resource_request,
	size_t max_bytes);
	~Fetcher() override;
	void OnURLFetchDownloadProgress(const net::URLFetcher* source,
	int64_t current,
	int64_t total) override;
	void OnURLFetchComplete(const net::URLFetcher* source) override;
	int64_t response_bytes() const { return response_bytes_; }
	int64_t network_response_bytes() const { return network_response_bytes_; }
	const net::URLFetcher* network_url_fetcher() const {
	return network_url_fetcher_.get();
	}

	private:
	enum class FetchStage { CACHE, NETWORK };

	void LoadFromCache();
	void LoadFromNetwork();

	net::URLRequestContextGetter* const request_context_;
	const GURL url_;
	const base::Callback<void(const Fetcher&)> callback_;
	const bool is_resource_request_;
	const size_t max_bytes_;

	FetchStage fetch_stage_;
	// The cache_url_fetcher_ is kept alive until Fetcher destruction for testing.
	std::unique_ptr<net::URLFetcher> cache_url_fetcher_;
	std::unique_ptr<net::URLFetcher> network_url_fetcher_;
	int64_t response_bytes_;
	int64_t network_response_bytes_;

	DISALLOW_COPY_AND_ASSIGN(Fetcher);
	};

	} // namespace precache

	#endif // COMPONENTS_PRECACHE_CORE_PRECACHE_FETCHER_H_