Neutralize dangerous subresource files during Save Page.

Downloading a complete page using "Save as..." can result in downloading
hundreds of subresources. The user often isn't interested in accessing
individual resources directly while they are on disk. In addition,
scanning hundreds of files during a single save page operation isn't
currently practical.

In order to mitigate the potential risk of leaving dangerous files
around on the users' filesystem, this CL renames known dangerous files
with an additional ".download" extension. I.e. A subresource named
foo.exe would be saved as foo.exe.download.

The code review includes lists of file types that are known to be
affected by this change. Notable file types include .js, .swf, and
.class. As a side-effect of the rename, they will not receive the
correct MIME type when loaded via a file:// URL. The saved page should
still function correctly even with the renamed resources.

R=nparker@chromium.org, jam@chromium.org
BUG=599224
CQ_INCLUDE_TRYBOTS=tryserver.chromium.linux:closure_compilation

Review-Url: https://codereview.chromium.org/2060923002
Cr-Commit-Position: refs/heads/master@{#401729}
diff --git a/chrome/browser/download/chrome_download_manager_delegate.cc b/chrome/browser/download/chrome_download_manager_delegate.cc
index e36ed85..c3aa96086 100644
--- a/chrome/browser/download/chrome_download_manager_delegate.cc
+++ b/chrome/browser/download/chrome_download_manager_delegate.cc
@@ -42,6 +42,8 @@
 #include "chrome/common/chrome_constants.h"
 #include "chrome/common/features.h"
 #include "chrome/common/pref_names.h"
+#include "chrome/common/safe_browsing/file_type_policies.h"
+#include "chrome/grit/generated_resources.h"
 #include "components/pref_registry/pref_registry_syncable.h"
 #include "components/prefs/pref_member.h"
 #include "components/prefs/pref_service.h"
@@ -51,6 +53,7 @@
 #include "content/public/browser/page_navigator.h"
 #include "net/base/filename_util.h"
 #include "net/base/mime_util.h"
+#include "ui/base/l10n/l10n_util.h"
 
 #if BUILDFLAG(ANDROID_JAVA_UI)
 #include "chrome/browser/android/download/chrome_download_manager_overwrite_infobar_delegate.h"
@@ -432,6 +435,20 @@
       callback);
 }
 
+void ChromeDownloadManagerDelegate::SanitizeSavePackageResourceName(
+    base::FilePath* filename) {
+  safe_browsing::FileTypePolicies* file_type_policies =
+      safe_browsing::FileTypePolicies::GetInstance();
+
+  if (file_type_policies->GetFileDangerLevel(*filename) ==
+      safe_browsing::DownloadFileType::NOT_DANGEROUS)
+    return;
+
+  base::FilePath default_filename = base::FilePath::FromUTF8Unsafe(
+      l10n_util::GetStringUTF8(IDS_DEFAULT_DOWNLOAD_FILENAME));
+  *filename = filename->AddExtension(default_filename.BaseName().value());
+}
+
 void ChromeDownloadManagerDelegate::OpenDownloadUsingPlatformHandler(
     DownloadItem* download) {
   base::FilePath platform_path(
diff --git a/chrome/browser/download/chrome_download_manager_delegate.h b/chrome/browser/download/chrome_download_manager_delegate.h
index 04e2540b..7d738e5 100644
--- a/chrome/browser/download/chrome_download_manager_delegate.h
+++ b/chrome/browser/download/chrome_download_manager_delegate.h
@@ -80,6 +80,7 @@
       const base::FilePath::StringType& default_extension,
       bool can_save_as_complete,
       const content::SavePackagePathPickedCallback& callback) override;
+  void SanitizeSavePackageResourceName(base::FilePath* filename) override;
   void OpenDownload(content::DownloadItem* download) override;
   void ShowDownloadInShell(content::DownloadItem* download) override;
   void CheckForFileExistence(
diff --git a/chrome/browser/download/save_page_browsertest.cc b/chrome/browser/download/save_page_browsertest.cc
index 6bbe3c3..544d499 100644
--- a/chrome/browser/download/save_page_browsertest.cc
+++ b/chrome/browser/download/save_page_browsertest.cc
@@ -762,6 +762,23 @@
   EXPECT_EQ("foo", contents);
 }
 
+// If a save-page-complete operation results in creating subresources that would
+// otherwise be considered dangerous, such files should get a .download
+// extension appended so that they won't be accidentally executed by the user.
+IN_PROC_BROWSER_TEST_F(SavePageBrowserTest, DangerousSubresources) {
+  GURL url =
+      URLRequestMockHTTPJob::GetMockUrl("/save_page/dubious-subresources.html");
+
+  ui_test_utils::NavigateToURL(browser(), url);
+  base::FilePath full_file_name, dir;
+  SaveCurrentTab(url, content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML,
+                 "dubious-subresources", 2, &dir, &full_file_name);
+  ASSERT_FALSE(HasFailure());
+
+  EXPECT_TRUE(base::PathExists(full_file_name));
+  EXPECT_TRUE(base::PathExists(dir.AppendASCII("not-a-crx.crx.download")));
+}
+
 // Test that we don't crash when the page contains an iframe that
 // was handled as a download (http://crbug.com/42212).
 IN_PROC_BROWSER_TEST_F(SavePageBrowserTest, SaveDownloadableIFrame) {
diff --git a/chrome/browser/resources/safe_browsing/README.md b/chrome/browser/resources/safe_browsing/README.md
index 7b85908..c745b96 100644
--- a/chrome/browser/resources/safe_browsing/README.md
+++ b/chrome/browser/resources/safe_browsing/README.md
@@ -77,6 +77,12 @@
     `DANGEROUS_HOST`, or `DANGEROUS`, Chrome will show that more severe warning
     regardless of this setting.
 
+    This policy also affects also how subresources are handled for *"Save As
+    ..."* downloads of complete web pages. If any subresource ends up with a
+    file type that is considered `DANGEROUS` or `ALLOW_ON_USER_GESTURE`, then
+    the filename will be changed to end in `.download`. This is done to prevent
+    the file from being opened accidentally.
+
     * `NOT_DANGEROUS`: Safe to download and open, even if the download
        was accidental. No additional warnings are necessary.
     * `DANGEROUS`: Always warn the user that this file may harm their
diff --git a/chrome/test/data/save_page/dubious-subresources.html b/chrome/test/data/save_page/dubious-subresources.html
new file mode 100644
index 0000000..da0ce1cb
--- /dev/null
+++ b/chrome/test/data/save_page/dubious-subresources.html
@@ -0,0 +1,7 @@
+<!DOCTYPE html>
+<meta charset="utf-8">
+<title>Test base with dubious subresource links</title>
+<body>
+  <h1>Hello have a look at this "picture"!</h1>
+  <img src="not-a-crx.crx">
+</body>
diff --git a/chrome/test/data/save_page/not-a-crx.crx b/chrome/test/data/save_page/not-a-crx.crx
new file mode 100644
index 0000000..7f2d63e
--- /dev/null
+++ b/chrome/test/data/save_page/not-a-crx.crx
Binary files differ
diff --git a/chrome/test/data/save_page/not-a-crx.crx.mock-http-headers b/chrome/test/data/save_page/not-a-crx.crx.mock-http-headers
new file mode 100644
index 0000000..3222b3f
--- /dev/null
+++ b/chrome/test/data/save_page/not-a-crx.crx.mock-http-headers
@@ -0,0 +1,4 @@
+HTTP/1.1 200 OK
+Connection: close
+Content-Type: image/png
+Content-Length: 2241
diff --git a/content/browser/download/save_package.cc b/content/browser/download/save_package.cc
index be50bb4..da07f634 100644
--- a/content/browser/download/save_package.cc
+++ b/content/browser/download/save_package.cc
@@ -401,16 +401,20 @@
                                                    kDefaultSaveName);
 
   DCHECK(!file_path.empty());
+  if (need_html_ext)
+    file_path = file_path.ReplaceExtension(kDefaultHtmlExtension);
+
+  DownloadManagerDelegate* delegate = download_manager_->GetDelegate();
+  if (delegate)
+    delegate->SanitizeSavePackageResourceName(&file_path);
+
+  DCHECK_EQ(file_path.value(), file_path.BaseName().value())
+      << "SanitizeSavePackageResourceName should only return a basename.";
+
   base::FilePath::StringType base_name =
       file_path.RemoveExtension().BaseName().value();
   base::FilePath::StringType file_name_ext = file_path.Extension();
 
-  // If it is HTML resource, use ".html" as its extension.
-  if (need_html_ext) {
-    file_name_ext = FILE_PATH_LITERAL(".");
-    file_name_ext.append(kDefaultHtmlExtension);
-  }
-
   // Need to make sure the suggested file name is not too long.
   uint32_t max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
 
diff --git a/content/browser/download/save_package.h b/content/browser/download/save_package.h
index 0161616f..edc5484 100644
--- a/content/browser/download/save_package.h
+++ b/content/browser/download/save_package.h
@@ -208,7 +208,7 @@
   //
   // Returns true if |base_name| could be successfully adjusted to fit the
   // aforementioned constraints, or false otherwise.
-  // TODO(asanka): This funciton is wrong. |base_name| cannot be truncated
+  // TODO(asanka): This function is wrong. |base_name| cannot be truncated
   //   without knowing its encoding and truncation has to be performed on
   //   character boundaries. Also the implementation doesn't look up the actual
   //   path constraints and instead uses hard coded constants. crbug.com/618737
diff --git a/content/browser/download/save_package_unittest.cc b/content/browser/download/save_package_unittest.cc
index bdfea949..c75a28b 100644
--- a/content/browser/download/save_package_unittest.cc
+++ b/content/browser/download/save_package_unittest.cc
@@ -44,13 +44,6 @@
 const uint32_t kMaxFileNameLength = NAME_MAX;
 #endif
 
-// Used to make long filenames.
-std::string long_file_name(
-    "EFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz01234567"
-    "89ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz012345"
-    "6789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz0123"
-    "456789ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789a");
-
 bool HasOrdinalNumber(const base::FilePath::StringType& filename) {
   base::FilePath::StringType::size_type r_paren_index =
       filename.rfind(FPL(')'));
@@ -112,17 +105,11 @@
                         temp_dir_.path().AppendASCII("testfile" HTML_EXTENSION),
                         temp_dir_.path().AppendASCII("testfile_files"));
 
-    // We need to construct a path that is *almost* kMaxFilePathLength long
-    long_file_name.reserve(kMaxFilePathLength + long_file_name.length());
-    while (long_file_name.length() < kMaxFilePathLength)
-      long_file_name += long_file_name;
-    long_file_name.resize(
-        kMaxFilePathLength - 9 - temp_dir_.path().value().length());
-
+    base::FilePath::StringType long_file_name = GetLongFileName();
     save_package_fail_ = new SavePackage(
         contents(), SAVE_PAGE_TYPE_AS_COMPLETE_HTML,
-        temp_dir_.path().AppendASCII(long_file_name + HTML_EXTENSION),
-        temp_dir_.path().AppendASCII(long_file_name + "_files"));
+        temp_dir_.path().Append(long_file_name + FPL_HTML_EXTENSION),
+        temp_dir_.path().Append(long_file_name + FPL("_files")));
   }
 
   BrowserContext* CreateBrowserContext() override {
@@ -145,6 +132,13 @@
     RenderViewHostImplTestHarness::TearDown();
   }
 
+  // Returns a path that is *almost* kMaxFilePathLength long
+  base::FilePath::StringType GetLongFileName() const {
+    size_t target_length =
+        kMaxFilePathLength - 9 - temp_dir_.path().value().length();
+    return base::FilePath::StringType(target_length, FPL('a'));
+  }
+
  private:
   // SavePackage for successfully generating file name.
   scoped_refptr<SavePackage> save_package_success_;
@@ -237,19 +231,21 @@
 #endif
 TEST_F(SavePackageTest, MAYBE_TestLongSavePackageFilename) {
   const std::string base_url("http://www.google.com/");
-  const std::string long_file = long_file_name + ".css";
-  const std::string url = base_url + long_file;
+  const base::FilePath::StringType long_file_name =
+      GetLongFileName() + FPL(".css");
+  const std::string url =
+      base_url + base::FilePath(long_file_name).AsUTF8Unsafe();
 
   base::FilePath::StringType filename;
   // Test that the filename is successfully shortened to fit.
   ASSERT_TRUE(GetGeneratedFilename(true, std::string(), url, false, &filename));
-  EXPECT_TRUE(filename.length() < long_file.length());
+  EXPECT_TRUE(filename.length() < long_file_name.length());
   EXPECT_FALSE(HasOrdinalNumber(filename));
 
   // Test that the filename is successfully shortened to fit, and gets an
   // an ordinal appended.
   ASSERT_TRUE(GetGeneratedFilename(true, std::string(), url, false, &filename));
-  EXPECT_TRUE(filename.length() < long_file.length());
+  EXPECT_TRUE(filename.length() < long_file_name.length());
   EXPECT_TRUE(HasOrdinalNumber(filename));
 
   // Test that the filename is successfully shortened to fit, and gets a
@@ -257,7 +253,7 @@
   base::FilePath::StringType filename2;
   ASSERT_TRUE(
       GetGeneratedFilename(true, std::string(), url, false, &filename2));
-  EXPECT_TRUE(filename2.length() < long_file.length());
+  EXPECT_TRUE(filename2.length() < long_file_name.length());
   EXPECT_TRUE(HasOrdinalNumber(filename2));
   EXPECT_NE(filename, filename2);
 }
@@ -271,12 +267,7 @@
 TEST_F(SavePackageTest, MAYBE_TestLongSafePureFilename) {
   const base::FilePath save_dir(FPL("test_dir"));
   const base::FilePath::StringType ext(FPL_HTML_EXTENSION);
-  base::FilePath::StringType filename =
-#if defined(OS_WIN)
-      base::ASCIIToUTF16(long_file_name);
-#else
-      long_file_name;
-#endif
+  base::FilePath::StringType filename = GetLongFileName();
 
   // Test that the filename + extension doesn't exceed kMaxFileNameLength
   uint32_t max_path = SavePackage::GetMaxPathLengthForDirectory(save_dir);
diff --git a/content/public/browser/download_manager_delegate.h b/content/public/browser/download_manager_delegate.h
index 13b395721c..6acb960 100644
--- a/content/public/browser/download_manager_delegate.h
+++ b/content/public/browser/download_manager_delegate.h
@@ -119,6 +119,18 @@
       const SavePackagePathPickedCallback& callback) {
   }
 
+  // Sanitize a filename that's going to be used for saving a subresource of a
+  // SavePackage.
+  //
+  // If the delegate does nothing, the default filename already populated in
+  // |filename| will be used. Otherwise, the delegate can update |filename| to
+  // the desired filename.
+  //
+  // |filename| contains a basename with an extension, but without a path. This
+  // should be the case on return as well. I.e. |filename| cannot specify a
+  // relative path.
+  virtual void SanitizeSavePackageResourceName(base::FilePath* filename) {}
+
   // Opens the file associated with this download.
   virtual void OpenDownload(DownloadItem* download) {}