ios/chrome/browser/reading_list/url_downloader.mm - chromium/src - Git at Google

 // Copyright 2016 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "ios/chrome/browser/reading_list/url_downloader.h"

 #include <string>
 #include <vector>

 #include "base/base64.h"
 #include "base/bind.h"
 #include "base/containers/contains.h"
 #include "base/files/file_path.h"
 #include "base/files/file_util.h"
 #include "base/json/json_writer.h"
 #include "base/memory/ptr_util.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/path_service.h"
 #include "base/strings/string_util.h"
 #include "base/strings/stringprintf.h"
 #include "base/task/thread_pool.h"
 #include "components/reading_list/core/offline_url_utils.h"
 #include "ios/chrome/browser/chrome_paths.h"
 #include "ios/chrome/browser/dom_distiller/distiller_viewer.h"
 #include "ios/chrome/browser/reading_list/reading_list_distiller_page.h"
 #include "ios/chrome/browser/reading_list/reading_list_distiller_page_factory.h"
 #include "net/base/load_flags.h"
 #include "net/base/mime_sniffer.h"
 #include "net/http/http_response_headers.h"
 #include "services/network/public/cpp/resource_request.h"
 #include "services/network/public/cpp/shared_url_loader_factory.h"
 #include "services/network/public/cpp/simple_url_loader.h"
 #include "services/network/public/mojom/url_response_head.mojom.h"
 #include "url/gurl.h"

 #if !defined(__has_feature) || !__has_feature(objc_arc)
 #error "This file requires ARC support."
 #endif

 namespace {
 // This script disables context menu on img elements.
 // The pages are stored locally and long pressing on them will trigger a context
 // menu on the file:// URL which cannot be opened. Disable the context menu.
 const char kDisableImageContextMenuScript[] =
     "<script nonce=\"$1\">"
     "document.addEventListener('DOMContentLoaded', function (event) {"
     "    var imgMenuDisabler = document.createElement('style');"
     "    imgMenuDisabler.innerHTML = 'img { -webkit-touch-callout: none; }';"
     "    document.head.appendChild(imgMenuDisabler);"
     "}, false);"
     "</script>";

 // This script replaces any downloaded images with a data uri.
 const char kReplaceDownloadedImagesScript[] =
     "<script nonce=\"$1\">"
     "document.addEventListener('DOMContentLoaded', function (event) {"
     "    var imgData = {};"
     "    $2"
     "    var imgTags = document.getElementsByTagName(\"img\");"
     "    for(image of imgTags) {"
     "        image.src = imgData[image.src] || image.src;"
     "    }"
     "}, false);"
     "</script>";

 // The maximum size for the distilled page.
 // Note that the sum of the size of the resources will be used for this check,
 // so the total size of the page after processing can be slightly more than
 // this.
 const int kMaximumTotalPageSize = 10 * 1024 * 1024;

 // The maximum size for a single raw image. If a bigger image is found, the
 // page distillation is canceled (page will only be available online).
 const int kMaximumImageSize = 1024 * 1024;

 }  // namespace

 // URLDownloader

 URLDownloader::URLDownloader(
     dom_distiller::DistillerFactory* distiller_factory,
     reading_list::ReadingListDistillerPageFactory* distiller_page_factory,
     PrefService* prefs,
     base::FilePath chrome_profile_path,
     scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
     const DownloadCompletion& download_completion,
     const SuccessCompletion& delete_completion)
     : distiller_page_factory_(distiller_page_factory),
       distiller_factory_(distiller_factory),
       pref_service_(prefs),
       download_completion_(download_completion),
       delete_completion_(delete_completion),
       working_(false),
       base_directory_(chrome_profile_path),
       mime_type_(),
       url_loader_factory_(std::move(url_loader_factory)),
       task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
           {base::MayBlock(), base::TaskPriority::BEST_EFFORT,
            base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN})),
       task_tracker_() {}

 URLDownloader::~URLDownloader() {
   task_tracker_.TryCancelAll();
 }

 void URLDownloader::OfflinePathExists(const base::FilePath& path,
                                       base::OnceCallback<void(bool)> callback) {
   task_tracker_.PostTaskAndReplyWithResult(
       task_runner_.get(), FROM_HERE, base::BindOnce(&base::PathExists, path),
       std::move(callback));
 }

 void URLDownloader::RemoveOfflineURL(const GURL& url) {
   // Remove all download tasks for this url as it would be pointless work.
   CancelDownloadOfflineURL(url);
   tasks_.push_back(std::make_pair(DELETE, url));
   HandleNextTask();
 }

 void URLDownloader::DownloadOfflineURL(const GURL& url) {
   if (!base::Contains(tasks_, std::make_pair(DOWNLOAD, url))) {
     tasks_.push_back(std::make_pair(DOWNLOAD, url));
     HandleNextTask();
   }
 }

 void URLDownloader::CancelDownloadOfflineURL(const GURL& url) {
   tasks_.erase(
       std::remove(tasks_.begin(), tasks_.end(), std::make_pair(DOWNLOAD, url)),
       tasks_.end());
 }

 void URLDownloader::DownloadCompletionHandler(
     const GURL& url,
     const std::string& title,
     const base::FilePath& offline_path,
     SuccessState success) {
   DCHECK(working_);

   auto post_delete = base::BindOnce(
       [](URLDownloader* _this, const GURL& url, const std::string& title,
          const base::FilePath& offline_path, SuccessState success) {
         _this->download_completion_.Run(url, _this->distilled_url_, success,
                                         offline_path, _this->saved_size_,
                                         title);
         _this->distiller_.reset();
         _this->working_ = false;
         _this->HandleNextTask();
       },
       base::Unretained(this), url, title, offline_path, success);

   // If downloading failed, clean up any partial download.
   if (success == ERROR) {
     base::FilePath directory_path =
         reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
     task_tracker_.PostTaskAndReply(
         task_runner_.get(), FROM_HERE,
         base::BindOnce(
             [](const base::FilePath& offline_directory_path) {
               base::DeletePathRecursively(offline_directory_path);
             },
             directory_path),
         std::move(post_delete));
   } else {
     std::move(post_delete).Run();
   }
 }

 void URLDownloader::DeleteCompletionHandler(const GURL& url, bool success) {
   DCHECK(working_);
   delete_completion_.Run(url, success);
   working_ = false;
   HandleNextTask();
 }

 void URLDownloader::HandleNextTask() {
   if (working_ || tasks_.empty()) {
     return;
   }
   working_ = true;

   Task task = tasks_.front();
   tasks_.pop_front();
   GURL url = task.second;
   base::FilePath directory_path =
       reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);

   if (task.first == DELETE) {
     task_tracker_.PostTaskAndReplyWithResult(
         task_runner_.get(), FROM_HERE,
         base::BindOnce(&base::DeletePathRecursively, directory_path),
         base::BindOnce(&URLDownloader::DeleteCompletionHandler,
                        base::Unretained(this), url));
   } else if (task.first == DOWNLOAD) {
     DCHECK(!distiller_);
     OfflinePathExists(directory_path,
                       base::BindOnce(&URLDownloader::DownloadURL,
                                      base::Unretained(this), url));
   }
 }

 void URLDownloader::DownloadURL(const GURL& url, bool offline_url_exists) {
   if (offline_url_exists) {
     DownloadCompletionHandler(url, std::string(), base::FilePath(),
                               DOWNLOAD_EXISTS);
     return;
   }

   original_url_ = url;
   distilled_url_ = url;
   saved_size_ = 0;
   std::unique_ptr<reading_list::ReadingListDistillerPage>
       reading_list_distiller_page =
           distiller_page_factory_->CreateReadingListDistillerPage(url, this);

   distiller_.reset(new dom_distiller::DistillerViewer(
       distiller_factory_, std::move(reading_list_distiller_page), pref_service_,
       url,
       base::BindRepeating(&URLDownloader::DistillerCallback,
                           base::Unretained(this))));
 }

 void URLDownloader::DistilledPageRedirectedToURL(const GURL& page_url,
                                                  const GURL& redirected_url) {
   DCHECK(original_url_ == page_url);
   distilled_url_ = redirected_url;
 }

 void URLDownloader::DistilledPageHasMimeType(const GURL& original_url,
                                              const std::string& mime_type) {
   DCHECK(original_url_ == original_url);
   mime_type_ = mime_type;
 }

 void URLDownloader::OnURLLoadComplete(const GURL& original_url,
                                       base::FilePath response_path) {
   // At the moment, only pdf files are downloaded using URLFetcher.
   DCHECK(mime_type_ == "application/pdf");
   base::FilePath path = reading_list::OfflinePagePath(
       original_url_, reading_list::OFFLINE_TYPE_PDF);
   std::string mime_type;
   if (url_loader_->ResponseInfo()) {
     mime_type = url_loader_->ResponseInfo()->mime_type;
   }
   if (response_path.empty() || mime_type != mime_type_) {
     return DownloadCompletionHandler(original_url_, "", path, ERROR);
   }

   task_tracker_.PostTaskAndReplyWithResult(
       task_runner_.get(), FROM_HERE,
       base::BindOnce(&URLDownloader::SavePDFFile, base::Unretained(this),
                      response_path),
       base::BindOnce(&URLDownloader::DownloadCompletionHandler,
                      base::Unretained(this), original_url, "", path));

   url_loader_.reset();
 }

 void URLDownloader::CancelTask() {
   task_tracker_.TryCancelAll();
   distiller_.reset();
 }

 void URLDownloader::FetchPDFFile() {
   const GURL& pdf_url =
       distilled_url_.is_valid() ? distilled_url_ : original_url_;
   auto resource_request = std::make_unique<network::ResourceRequest>();
   resource_request->url = pdf_url;
   resource_request->load_flags = net::LOAD_SKIP_CACHE_VALIDATION;

   url_loader_ = network::SimpleURLLoader::Create(std::move(resource_request),
                                                  NO_TRAFFIC_ANNOTATION_YET);
   url_loader_->DownloadToTempFile(
       url_loader_factory_.get(),
       base::BindOnce(&URLDownloader::OnURLLoadComplete, base::Unretained(this),
                      pdf_url));
 }

 URLDownloader::SuccessState URLDownloader::SavePDFFile(
     const base::FilePath& temporary_path) {
   if (CreateOfflineURLDirectory(original_url_)) {
     base::FilePath path = reading_list::OfflinePagePath(
         original_url_, reading_list::OFFLINE_TYPE_PDF);
     base::FilePath absolute_path =
         reading_list::OfflineURLAbsolutePathFromRelativePath(base_directory_,
                                                              path);

     if (base::Move(temporary_path, absolute_path)) {
       int64_t pdf_file_size;
       base::GetFileSize(absolute_path, &pdf_file_size);
       saved_size_ += pdf_file_size;
       return DOWNLOAD_SUCCESS;
     } else {
       return ERROR;
     }
   }

   return ERROR;
 }

 void URLDownloader::DistillerCallback(
     const GURL& page_url,
     const std::string& html,
     const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
         images,
     const std::string& title) {
   if (html.empty()) {
     // The page may not be HTML. Check the mime-type to see if another handler
     // can save offline content.
     if (mime_type_ == "application/pdf") {
       // PDF handler just downloads the PDF file.
       FetchPDFFile();
       return;
     }
     // This content cannot be processed, return an error value to the client.
     DownloadCompletionHandler(page_url, std::string(), base::FilePath(), ERROR);
     return;
   }

   task_tracker_.PostTaskAndReplyWithResult(
       task_runner_.get(), FROM_HERE,
       base::BindOnce(&URLDownloader::SaveDistilledHTML, base::Unretained(this),
                      page_url, images, html),
       base::BindOnce(&URLDownloader::DownloadCompletionHandler,
                      base::Unretained(this), page_url, title,
                      reading_list::OfflinePagePath(
                          page_url, reading_list::OFFLINE_TYPE_HTML)));
 }

 URLDownloader::SuccessState URLDownloader::SaveDistilledHTML(
     const GURL& url,
     const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
         images,
     const std::string& html) {
   int total_size = html.size();
   for (size_t i = 0; i < images.size(); i++) {
     if (images[i].data.size() > kMaximumImageSize) {
       UMA_HISTOGRAM_MEMORY_KB("IOS.ReadingList.ImageTooLargeFailure",
                               images[i].data.size() / 1024);
       return PERMANENT_ERROR;
     }
     // Image will be base64 encoded.
     total_size += 4 * images[i].data.size() / 3;
   }
   if (total_size > kMaximumTotalPageSize) {
     UMA_HISTOGRAM_MEMORY_KB("IOS.ReadingList.PageTooLargeFailure",
                             total_size / 1024);
     return PERMANENT_ERROR;
   }

   if (CreateOfflineURLDirectory(url)) {
     return SaveHTMLForURL(ReplaceImagesInHTML(url, html, images), url)
                ? DOWNLOAD_SUCCESS
                : ERROR;
   }
   return ERROR;
 }

 bool URLDownloader::CreateOfflineURLDirectory(const GURL& url) {
   base::FilePath directory_path =
       reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
   if (!DirectoryExists(directory_path)) {
     return CreateDirectoryAndGetError(directory_path, nil);
   }
   return true;
 }

 std::string URLDownloader::ReplaceImagesInHTML(
     const GURL& url,
     const std::string& html,
     const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
         images) {
   std::string mutable_html = html;
   std::string image_js;
   bool local_images_found = false;
   for (size_t i = 0; i < images.size(); i++) {
     if (images[i].url.SchemeIs(url::kDataScheme)) {
       // Data URI, the data part of the image is empty, no need to store it.
       continue;
     }
     std::string local_image_name;
     // Mixed content is HTTP images on HTTPS pages.
     bool image_is_mixed_content = distilled_url_.SchemeIsCryptographic() &&
                                   !images[i].url.SchemeIsCryptographic();
     // Only inline images if it is not mixed content and image data is valid.
     if (image_is_mixed_content || !images[i].url.is_valid() ||
         images[i].data.empty()) {
       continue;
     }

     // Try to detect the mime-type from the bytes so an arbitrary page cannot
     // be included. Returned mime-type must start with "image/".
     std::string sniffed_type;
     if (!net::SniffMimeTypeFromLocalData(images[i].data, &sniffed_type)) {
       continue;
     }

     if (!base::StartsWith(sniffed_type, "image/")) {
       continue;
     }

     std::string image_url;
     std::string image_data;
     base::Value value(images[i].url.spec());

     base::JSONWriter::Write(value, &image_url);
     base::Base64Encode(images[i].data, &image_data);

     std::string src_with_data =
         base::StringPrintf("data:image/png;base64,%s", image_data.c_str());
     image_js += "imgData[" + image_url + "] = \"" + src_with_data + "\";";

     local_images_found = true;
   }

   if (local_images_found) {
     std::vector<std::string> substitutions;
     substitutions.push_back(distiller_->GetCspNonce());

     mutable_html += base::ReplaceStringPlaceholders(
         kDisableImageContextMenuScript, substitutions, nullptr);

     substitutions.push_back(image_js);
     mutable_html += base::ReplaceStringPlaceholders(
         kReplaceDownloadedImagesScript, substitutions, nullptr);
   }

   return mutable_html;
 }

 bool URLDownloader::SaveHTMLForURL(std::string html, const GURL& url) {
   if (html.empty()) {
     return false;
   }
   base::FilePath path = reading_list::OfflineURLAbsolutePathFromRelativePath(
       base_directory_,
       reading_list::OfflinePagePath(url, reading_list::OFFLINE_TYPE_HTML));
   int written = base::WriteFile(path, html.c_str(), html.length());
   if (written <= 0) {
     return false;
   }
   saved_size_ += written;
   return true;
 }
	// Copyright 2016 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "ios/chrome/browser/reading_list/url_downloader.h"

	#include <string>
	#include <vector>

	#include "base/base64.h"
	#include "base/bind.h"
	#include "base/containers/contains.h"
	#include "base/files/file_path.h"
	#include "base/files/file_util.h"
	#include "base/json/json_writer.h"
	#include "base/memory/ptr_util.h"
	#include "base/metrics/histogram_macros.h"
	#include "base/path_service.h"
	#include "base/strings/string_util.h"
	#include "base/strings/stringprintf.h"
	#include "base/task/thread_pool.h"
	#include "components/reading_list/core/offline_url_utils.h"
	#include "ios/chrome/browser/chrome_paths.h"
	#include "ios/chrome/browser/dom_distiller/distiller_viewer.h"
	#include "ios/chrome/browser/reading_list/reading_list_distiller_page.h"
	#include "ios/chrome/browser/reading_list/reading_list_distiller_page_factory.h"
	#include "net/base/load_flags.h"
	#include "net/base/mime_sniffer.h"
	#include "net/http/http_response_headers.h"
	#include "services/network/public/cpp/resource_request.h"
	#include "services/network/public/cpp/shared_url_loader_factory.h"
	#include "services/network/public/cpp/simple_url_loader.h"
	#include "services/network/public/mojom/url_response_head.mojom.h"
	#include "url/gurl.h"

	#if !defined(__has_feature) \|\| !__has_feature(objc_arc)
	#error "This file requires ARC support."
	#endif

	namespace {
	// This script disables context menu on img elements.
	// The pages are stored locally and long pressing on them will trigger a context
	// menu on the file:// URL which cannot be opened. Disable the context menu.
	const char kDisableImageContextMenuScript[] =
	"<script nonce=\"$1\">"
	"document.addEventListener('DOMContentLoaded', function (event) {"
	" var imgMenuDisabler = document.createElement('style');"
	" imgMenuDisabler.innerHTML = 'img { -webkit-touch-callout: none; }';"
	" document.head.appendChild(imgMenuDisabler);"
	"}, false);"
	"</script>";

	// This script replaces any downloaded images with a data uri.
	const char kReplaceDownloadedImagesScript[] =
	"<script nonce=\"$1\">"
	"document.addEventListener('DOMContentLoaded', function (event) {"
	" var imgData = {};"
	" $2"
	" var imgTags = document.getElementsByTagName(\"img\");"
	" for(image of imgTags) {"
	" image.src = imgData[image.src] \|\| image.src;"
	" }"
	"}, false);"
	"</script>";

	// The maximum size for the distilled page.
	// Note that the sum of the size of the resources will be used for this check,
	// so the total size of the page after processing can be slightly more than
	// this.
	const int kMaximumTotalPageSize = 10 * 1024 * 1024;

	// The maximum size for a single raw image. If a bigger image is found, the
	// page distillation is canceled (page will only be available online).
	const int kMaximumImageSize = 1024 * 1024;

	} // namespace

	// URLDownloader

	URLDownloader::URLDownloader(
	dom_distiller::DistillerFactory* distiller_factory,
	reading_list::ReadingListDistillerPageFactory* distiller_page_factory,
	PrefService* prefs,
	base::FilePath chrome_profile_path,
	scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
	const DownloadCompletion& download_completion,
	const SuccessCompletion& delete_completion)
	: distiller_page_factory_(distiller_page_factory),
	distiller_factory_(distiller_factory),
	pref_service_(prefs),
	download_completion_(download_completion),
	delete_completion_(delete_completion),
	working_(false),
	base_directory_(chrome_profile_path),
	mime_type_(),
	url_loader_factory_(std::move(url_loader_factory)),
	task_runner_(base::ThreadPool::CreateSequencedTaskRunner(
	{base::MayBlock(), base::TaskPriority::BEST_EFFORT,
	base::TaskShutdownBehavior::SKIP_ON_SHUTDOWN})),
	task_tracker_() {}

	URLDownloader::~URLDownloader() {
	task_tracker_.TryCancelAll();
	}

	void URLDownloader::OfflinePathExists(const base::FilePath& path,
	base::OnceCallback<void(bool)> callback) {
	task_tracker_.PostTaskAndReplyWithResult(
	task_runner_.get(), FROM_HERE, base::BindOnce(&base::PathExists, path),
	std::move(callback));
	}

	void URLDownloader::RemoveOfflineURL(const GURL& url) {
	// Remove all download tasks for this url as it would be pointless work.
	CancelDownloadOfflineURL(url);
	tasks_.push_back(std::make_pair(DELETE, url));
	HandleNextTask();
	}

	void URLDownloader::DownloadOfflineURL(const GURL& url) {
	if (!base::Contains(tasks_, std::make_pair(DOWNLOAD, url))) {
	tasks_.push_back(std::make_pair(DOWNLOAD, url));
	HandleNextTask();
	}
	}

	void URLDownloader::CancelDownloadOfflineURL(const GURL& url) {
	tasks_.erase(
	std::remove(tasks_.begin(), tasks_.end(), std::make_pair(DOWNLOAD, url)),
	tasks_.end());
	}

	void URLDownloader::DownloadCompletionHandler(
	const GURL& url,
	const std::string& title,
	const base::FilePath& offline_path,
	SuccessState success) {
	DCHECK(working_);

	auto post_delete = base::BindOnce(
	[](URLDownloader* _this, const GURL& url, const std::string& title,
	const base::FilePath& offline_path, SuccessState success) {
	_this->download_completion_.Run(url, _this->distilled_url_, success,
	offline_path, _this->saved_size_,
	title);
	_this->distiller_.reset();
	_this->working_ = false;
	_this->HandleNextTask();
	},
	base::Unretained(this), url, title, offline_path, success);

	// If downloading failed, clean up any partial download.
	if (success == ERROR) {
	base::FilePath directory_path =
	reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
	task_tracker_.PostTaskAndReply(
	task_runner_.get(), FROM_HERE,
	base::BindOnce(
	[](const base::FilePath& offline_directory_path) {
	base::DeletePathRecursively(offline_directory_path);
	},
	directory_path),
	std::move(post_delete));
	} else {
	std::move(post_delete).Run();
	}
	}

	void URLDownloader::DeleteCompletionHandler(const GURL& url, bool success) {
	DCHECK(working_);
	delete_completion_.Run(url, success);
	working_ = false;
	HandleNextTask();
	}

	void URLDownloader::HandleNextTask() {
	if (working_ \|\| tasks_.empty()) {
	return;
	}
	working_ = true;

	Task task = tasks_.front();
	tasks_.pop_front();
	GURL url = task.second;
	base::FilePath directory_path =
	reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);

	if (task.first == DELETE) {
	task_tracker_.PostTaskAndReplyWithResult(
	task_runner_.get(), FROM_HERE,
	base::BindOnce(&base::DeletePathRecursively, directory_path),
	base::BindOnce(&URLDownloader::DeleteCompletionHandler,
	base::Unretained(this), url));
	} else if (task.first == DOWNLOAD) {
	DCHECK(!distiller_);
	OfflinePathExists(directory_path,
	base::BindOnce(&URLDownloader::DownloadURL,
	base::Unretained(this), url));
	}
	}

	void URLDownloader::DownloadURL(const GURL& url, bool offline_url_exists) {
	if (offline_url_exists) {
	DownloadCompletionHandler(url, std::string(), base::FilePath(),
	DOWNLOAD_EXISTS);
	return;
	}

	original_url_ = url;
	distilled_url_ = url;
	saved_size_ = 0;
	std::unique_ptr<reading_list::ReadingListDistillerPage>
	reading_list_distiller_page =
	distiller_page_factory_->CreateReadingListDistillerPage(url, this);

	distiller_.reset(new dom_distiller::DistillerViewer(
	distiller_factory_, std::move(reading_list_distiller_page), pref_service_,
	url,
	base::BindRepeating(&URLDownloader::DistillerCallback,
	base::Unretained(this))));
	}

	void URLDownloader::DistilledPageRedirectedToURL(const GURL& page_url,
	const GURL& redirected_url) {
	DCHECK(original_url_ == page_url);
	distilled_url_ = redirected_url;
	}

	void URLDownloader::DistilledPageHasMimeType(const GURL& original_url,
	const std::string& mime_type) {
	DCHECK(original_url_ == original_url);
	mime_type_ = mime_type;
	}

	void URLDownloader::OnURLLoadComplete(const GURL& original_url,
	base::FilePath response_path) {
	// At the moment, only pdf files are downloaded using URLFetcher.
	DCHECK(mime_type_ == "application/pdf");
	base::FilePath path = reading_list::OfflinePagePath(
	original_url_, reading_list::OFFLINE_TYPE_PDF);
	std::string mime_type;
	if (url_loader_->ResponseInfo()) {
	mime_type = url_loader_->ResponseInfo()->mime_type;
	}
	if (response_path.empty() \|\| mime_type != mime_type_) {
	return DownloadCompletionHandler(original_url_, "", path, ERROR);
	}

	task_tracker_.PostTaskAndReplyWithResult(
	task_runner_.get(), FROM_HERE,
	base::BindOnce(&URLDownloader::SavePDFFile, base::Unretained(this),
	response_path),
	base::BindOnce(&URLDownloader::DownloadCompletionHandler,
	base::Unretained(this), original_url, "", path));

	url_loader_.reset();
	}

	void URLDownloader::CancelTask() {
	task_tracker_.TryCancelAll();
	distiller_.reset();
	}

	void URLDownloader::FetchPDFFile() {
	const GURL& pdf_url =
	distilled_url_.is_valid() ? distilled_url_ : original_url_;
	auto resource_request = std::make_unique<network::ResourceRequest>();
	resource_request->url = pdf_url;
	resource_request->load_flags = net::LOAD_SKIP_CACHE_VALIDATION;

	url_loader_ = network::SimpleURLLoader::Create(std::move(resource_request),
	NO_TRAFFIC_ANNOTATION_YET);
	url_loader_->DownloadToTempFile(
	url_loader_factory_.get(),
	base::BindOnce(&URLDownloader::OnURLLoadComplete, base::Unretained(this),
	pdf_url));
	}

	URLDownloader::SuccessState URLDownloader::SavePDFFile(
	const base::FilePath& temporary_path) {
	if (CreateOfflineURLDirectory(original_url_)) {
	base::FilePath path = reading_list::OfflinePagePath(
	original_url_, reading_list::OFFLINE_TYPE_PDF);
	base::FilePath absolute_path =
	reading_list::OfflineURLAbsolutePathFromRelativePath(base_directory_,
	path);

	if (base::Move(temporary_path, absolute_path)) {
	int64_t pdf_file_size;
	base::GetFileSize(absolute_path, &pdf_file_size);
	saved_size_ += pdf_file_size;
	return DOWNLOAD_SUCCESS;
	} else {
	return ERROR;
	}
	}

	return ERROR;
	}

	void URLDownloader::DistillerCallback(
	const GURL& page_url,
	const std::string& html,
	const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
	images,
	const std::string& title) {
	if (html.empty()) {
	// The page may not be HTML. Check the mime-type to see if another handler
	// can save offline content.
	if (mime_type_ == "application/pdf") {
	// PDF handler just downloads the PDF file.
	FetchPDFFile();
	return;
	}
	// This content cannot be processed, return an error value to the client.
	DownloadCompletionHandler(page_url, std::string(), base::FilePath(), ERROR);
	return;
	}

	task_tracker_.PostTaskAndReplyWithResult(
	task_runner_.get(), FROM_HERE,
	base::BindOnce(&URLDownloader::SaveDistilledHTML, base::Unretained(this),
	page_url, images, html),
	base::BindOnce(&URLDownloader::DownloadCompletionHandler,
	base::Unretained(this), page_url, title,
	reading_list::OfflinePagePath(
	page_url, reading_list::OFFLINE_TYPE_HTML)));
	}

	URLDownloader::SuccessState URLDownloader::SaveDistilledHTML(
	const GURL& url,
	const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
	images,
	const std::string& html) {
	int total_size = html.size();
	for (size_t i = 0; i < images.size(); i++) {
	if (images[i].data.size() > kMaximumImageSize) {
	UMA_HISTOGRAM_MEMORY_KB("IOS.ReadingList.ImageTooLargeFailure",
	images[i].data.size() / 1024);
	return PERMANENT_ERROR;
	}
	// Image will be base64 encoded.
	total_size += 4 * images[i].data.size() / 3;
	}
	if (total_size > kMaximumTotalPageSize) {
	UMA_HISTOGRAM_MEMORY_KB("IOS.ReadingList.PageTooLargeFailure",
	total_size / 1024);
	return PERMANENT_ERROR;
	}

	if (CreateOfflineURLDirectory(url)) {
	return SaveHTMLForURL(ReplaceImagesInHTML(url, html, images), url)
	? DOWNLOAD_SUCCESS
	: ERROR;
	}
	return ERROR;
	}

	bool URLDownloader::CreateOfflineURLDirectory(const GURL& url) {
	base::FilePath directory_path =
	reading_list::OfflineURLDirectoryAbsolutePath(base_directory_, url);
	if (!DirectoryExists(directory_path)) {
	return CreateDirectoryAndGetError(directory_path, nil);
	}
	return true;
	}

	std::string URLDownloader::ReplaceImagesInHTML(
	const GURL& url,
	const std::string& html,
	const std::vector<dom_distiller::DistillerViewerInterface::ImageInfo>&
	images) {
	std::string mutable_html = html;
	std::string image_js;
	bool local_images_found = false;
	for (size_t i = 0; i < images.size(); i++) {
	if (images[i].url.SchemeIs(url::kDataScheme)) {
	// Data URI, the data part of the image is empty, no need to store it.
	continue;
	}
	std::string local_image_name;
	// Mixed content is HTTP images on HTTPS pages.
	bool image_is_mixed_content = distilled_url_.SchemeIsCryptographic() &&
	!images[i].url.SchemeIsCryptographic();
	// Only inline images if it is not mixed content and image data is valid.
	if (image_is_mixed_content \|\| !images[i].url.is_valid() \|\|
	images[i].data.empty()) {
	continue;
	}

	// Try to detect the mime-type from the bytes so an arbitrary page cannot
	// be included. Returned mime-type must start with "image/".
	std::string sniffed_type;
	if (!net::SniffMimeTypeFromLocalData(images[i].data, &sniffed_type)) {
	continue;
	}

	if (!base::StartsWith(sniffed_type, "image/")) {
	continue;
	}

	std::string image_url;
	std::string image_data;
	base::Value value(images[i].url.spec());

	base::JSONWriter::Write(value, &image_url);
	base::Base64Encode(images[i].data, &image_data);

	std::string src_with_data =
	base::StringPrintf("data:image/png;base64,%s", image_data.c_str());
	image_js += "imgData[" + image_url + "] = \"" + src_with_data + "\";";

	local_images_found = true;
	}

	if (local_images_found) {
	std::vector<std::string> substitutions;
	substitutions.push_back(distiller_->GetCspNonce());

	mutable_html += base::ReplaceStringPlaceholders(
	kDisableImageContextMenuScript, substitutions, nullptr);

	substitutions.push_back(image_js);
	mutable_html += base::ReplaceStringPlaceholders(
	kReplaceDownloadedImagesScript, substitutions, nullptr);
	}

	return mutable_html;
	}

	bool URLDownloader::SaveHTMLForURL(std::string html, const GURL& url) {
	if (html.empty()) {
	return false;
	}
	base::FilePath path = reading_list::OfflineURLAbsolutePathFromRelativePath(
	base_directory_,
	reading_list::OfflinePagePath(url, reading_list::OFFLINE_TYPE_HTML));
	int written = base::WriteFile(path, html.c_str(), html.length());
	if (written <= 0) {
	return false;
	}
	saved_size_ += written;
	return true;
	}