Sanitize https:// URLs before sending them to PAC scripts.

This additionally strips the path and query components for https:// URL (embedded identity and reference fragment were already being stripped).

For debugging purposes this behavior can be disabled with the command line flag --unsafe-pac-url.

BUG=593759
R=mmenke@chromium.org

Review URL: https://codereview.chromium.org/1996773002 .

Cr-Commit-Position: refs/heads/master@{#395266}
diff --git a/chrome/browser/net/proxy_service_factory.cc b/chrome/browser/net/proxy_service_factory.cc
index 26da0f6d..63e1062 100644
--- a/chrome/browser/net/proxy_service_factory.cc
+++ b/chrome/browser/net/proxy_service_factory.cc
@@ -188,5 +188,10 @@
 
   proxy_service->set_quick_check_enabled(quick_check_enabled);
 
+  if (command_line.HasSwitch(switches::kUnsafePacUrl)) {
+    proxy_service->set_sanitize_url_policy(
+        net::ProxyService::SanitizeUrlPolicy::UNSAFE);
+  }
+
   return proxy_service;
 }
diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc
index 08fb3c7..3c5d61f 100644
--- a/chrome/common/chrome_switches.cc
+++ b/chrome/common/chrome_switches.cc
@@ -1057,6 +1057,11 @@
 const char kUnsafelyTreatInsecureOriginAsSecure[] =
     "unsafely-treat-insecure-origin-as-secure";
 
+// Pass the full https:// URL to PAC (Proxy Auto Config) scripts. As opposed to
+// the default behavior which strips path and query components before passing
+// to the PAC scripts.
+const char kUnsafePacUrl[] = "unsafe-pac-url";
+
 // A string used to override the default user agent with a custom one.
 const char kUserAgent[]                     = "user-agent";
 
diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h
index 9cf07d9..49a9cda 100644
--- a/chrome/common/chrome_switches.h
+++ b/chrome/common/chrome_switches.h
@@ -290,6 +290,7 @@
 extern const char kTryChromeAgain[];
 extern const char kUnlimitedStorage[];
 extern const char kUnsafelyTreatInsecureOriginAsSecure[];
+extern const char kUnsafePacUrl[];
 extern const char kUseSimpleCacheBackend[];
 extern const char kUserAgent[];
 extern const char kUserDataDir[];
diff --git a/net/proxy/proxy_service.cc b/net/proxy/proxy_service.cc
index 5373a34..403b52c 100644
--- a/net/proxy/proxy_service.cc
+++ b/net/proxy/proxy_service.cc
@@ -347,6 +347,26 @@
 };
 #endif
 
+// Returns a sanitized copy of |url| which is safe to pass on to a PAC script.
+// The method for sanitizing is determined by |policy|. See the comments for
+// that enum for details.
+GURL SanitizeUrl(const GURL& url, ProxyService::SanitizeUrlPolicy policy) {
+  DCHECK(url.is_valid());
+
+  GURL::Replacements replacements;
+  replacements.ClearUsername();
+  replacements.ClearPassword();
+  replacements.ClearRef();
+
+  if (policy == ProxyService::SanitizeUrlPolicy::SAFE &&
+      url.SchemeIsCryptographic()) {
+    replacements.ClearPath();
+    replacements.ClearQuery();
+  }
+
+  return url.ReplaceComponents(replacements);
+}
+
 }  // namespace
 
 // ProxyService::InitProxyResolver --------------------------------------------
@@ -937,7 +957,8 @@
       net_log_(net_log),
       stall_proxy_auto_config_delay_(
           TimeDelta::FromMilliseconds(kDelayAfterNetworkChangesMs)),
-      quick_check_enabled_(true) {
+      quick_check_enabled_(true),
+      sanitize_url_policy_(SanitizeUrlPolicy::SAFE) {
   NetworkChangeNotifier::AddIPAddressObserver(this);
   NetworkChangeNotifier::AddDNSObserver(this);
   ResetConfigService(std::move(config_service));
@@ -1050,9 +1071,11 @@
   if (current_state_ == STATE_NONE)
     ApplyProxyConfigIfAvailable();
 
-  // Strip away any reference fragments and the username/password, as they
-  // are not relevant to proxy resolution.
-  GURL url = SimplifyUrlForRequest(raw_url);
+  // Sanitize the URL before passing it on to the proxy resolver (i.e. PAC
+  // script). The goal is to remove sensitive data (like embedded user names
+  // and password), and local data (i.e. reference fragment) which does not need
+  // to be disclosed to the resolver.
+  GURL url = SanitizeUrl(raw_url, sanitize_url_policy_);
 
   // Check if the request can be completed right away. (This is the case when
   // using a direct connection for example).
diff --git a/net/proxy/proxy_service.h b/net/proxy/proxy_service.h
index 635d26d..c00cada 100644
--- a/net/proxy/proxy_service.h
+++ b/net/proxy/proxy_service.h
@@ -25,6 +25,7 @@
 #include "net/proxy/proxy_config_service.h"
 #include "net/proxy/proxy_info.h"
 #include "net/proxy/proxy_server.h"
+#include "url/gurl.h"
 
 class GURL;
 
@@ -52,6 +53,25 @@
                                 public ProxyConfigService::Observer,
                                 NON_EXPORTED_BASE(public base::NonThreadSafe) {
  public:
+  // Enumerates the policy to use when sanitizing URLs for proxy resolution
+  // (before passing them off to PAC scripts).
+  enum class SanitizeUrlPolicy {
+    // Do a basic level of sanitization for URLs:
+    //   - strip embedded identities (ex: "username:password@")
+    //   - strip the fragment (ex: "#blah")
+    //
+    // This is considered "unsafe" because it does not do any additional
+    // stripping for https:// URLs.
+    UNSAFE,
+
+    // SAFE does the same sanitization as UNSAFE, but additionally strips
+    // everything but the (scheme,host,port) from cryptographic URL schemes
+    // (https:// and wss://).
+    //
+    // In other words, it strips the path and query portion of https:// URLs.
+    SAFE,
+  };
+
   static const size_t kDefaultNumPacThreads = 4;
 
   // This interface defines the set of policies for when to poll the PAC
@@ -296,6 +316,10 @@
     quick_check_enabled_ = value;
   }
 
+  void set_sanitize_url_policy(SanitizeUrlPolicy policy) {
+    sanitize_url_policy_ = policy;
+  }
+
  private:
   FRIEND_TEST_ALL_PREFIXES(ProxyServiceTest, UpdateConfigAfterFailedAutodetect);
   FRIEND_TEST_ALL_PREFIXES(ProxyServiceTest, UpdateConfigFromPACToDirect);
@@ -460,6 +484,9 @@
   // Whether child ProxyScriptDeciders should use QuickCheck
   bool quick_check_enabled_;
 
+  // The method to use for sanitizing URLs seen by the proxy resolver.
+  SanitizeUrlPolicy sanitize_url_policy_;
+
   DISALLOW_COPY_AND_ASSIGN(ProxyService);
 };
 
diff --git a/net/proxy/proxy_service_unittest.cc b/net/proxy/proxy_service_unittest.cc
index a0db685..34a0c5d 100644
--- a/net/proxy/proxy_service_unittest.cc
+++ b/net/proxy/proxy_service_unittest.cc
@@ -3480,4 +3480,224 @@
   EXPECT_EQ(0u, factory->pending_requests().size());
 }
 
+// Helper class to exercise URL sanitization using the different policies. This
+// works by submitted URLs to the ProxyService. In turn the ProxyService
+// sanitizes the URL and then passes it along to the ProxyResolver. This helper
+// returns the URL seen by the ProxyResolver.
+class SanitizeUrlHelper {
+ public:
+  SanitizeUrlHelper() {
+    std::unique_ptr<MockProxyConfigService> config_service(
+        new MockProxyConfigService("http://foopy/proxy.pac"));
+
+    factory = new MockAsyncProxyResolverFactory(false);
+
+    service_.reset(new ProxyService(std::move(config_service),
+                                    base::WrapUnique(factory), nullptr));
+
+    // Do an initial request to initialize the service (configure the PAC
+    // script).
+    GURL url("http://example.com");
+
+    ProxyInfo info;
+    TestCompletionCallback callback;
+    int rv = service_->ResolveProxy(url, std::string(), LOAD_NORMAL, &info,
+                                    callback.callback(), nullptr, nullptr,
+                                    BoundNetLog());
+    EXPECT_EQ(ERR_IO_PENDING, rv);
+
+    // First step is to download the PAC script.
+    EXPECT_EQ(GURL("http://foopy/proxy.pac"),
+              factory->pending_requests()[0]->script_data()->url());
+    factory->pending_requests()[0]->CompleteNowWithForwarder(OK, &resolver);
+
+    EXPECT_EQ(1u, resolver.pending_requests().size());
+    EXPECT_EQ(url, resolver.pending_requests()[0]->url());
+
+    // Complete the request.
+    resolver.pending_requests()[0]->results()->UsePacString("DIRECT");
+    resolver.pending_requests()[0]->CompleteNow(OK);
+    EXPECT_EQ(OK, callback.WaitForResult());
+    EXPECT_TRUE(info.is_direct());
+  }
+
+  // Changes the URL sanitization policy for the underlying ProxyService. This
+  // will affect subsequent calls to SanitizeUrl.
+  void SetSanitizeUrlPolicy(ProxyService::SanitizeUrlPolicy policy) {
+    service_->set_sanitize_url_policy(policy);
+  }
+
+  // Makes a proxy resolution request through the ProxyService, and returns the
+  // URL that was submitted to the Proxy Resolver.
+  GURL SanitizeUrl(const GURL& raw_url) {
+    // Issue a request and see what URL is sent to the proxy resolver.
+    ProxyInfo info;
+    TestCompletionCallback callback;
+    int rv = service_->ResolveProxy(raw_url, std::string(), LOAD_NORMAL, &info,
+                                    callback.callback(), nullptr, nullptr,
+                                    BoundNetLog());
+    EXPECT_EQ(ERR_IO_PENDING, rv);
+
+    EXPECT_EQ(1u, resolver.pending_requests().size());
+
+    GURL sanitized_url = resolver.pending_requests()[0]->url();
+
+    // Complete the request.
+    resolver.pending_requests()[0]->results()->UsePacString("DIRECT");
+    resolver.pending_requests()[0]->CompleteNow(OK);
+    EXPECT_EQ(OK, callback.WaitForResult());
+    EXPECT_TRUE(info.is_direct());
+
+    return sanitized_url;
+  }
+
+  // Changes the ProxyService's URL sanitization policy and then sanitizes
+  // |raw_url|.
+  GURL SanitizeUrl(const GURL& raw_url,
+                   ProxyService::SanitizeUrlPolicy policy) {
+    service_->set_sanitize_url_policy(policy);
+    return SanitizeUrl(raw_url);
+  }
+
+ private:
+  MockAsyncProxyResolver resolver;
+  MockAsyncProxyResolverFactory* factory;
+  std::unique_ptr<ProxyService> service_;
+};
+
+TEST_F(ProxyServiceTest, SanitizeUrlDefaultsToSafe) {
+  SanitizeUrlHelper helper;
+
+  // Without changing the URL sanitization policy, the default should be to
+  // strip https:// URLs.
+  EXPECT_EQ(GURL("https://example.com/"),
+            helper.SanitizeUrl(
+                GURL("https://foo:bar@example.com/foo/bar/baz?hello#sigh")));
+}
+
+// Tests URL sanitization with input URLs that have a // non-cryptographic
+// scheme (i.e. http://). The sanitized result is consistent regardless of the
+// stripping mode selected.
+TEST_F(ProxyServiceTest, SanitizeUrlForPacScriptNonCryptographic) {
+  const struct {
+    const char* raw_url;
+    const char* sanitized_url;
+  } kTests[] = {
+      // Embedded identity is stripped.
+      {
+          "http://foo:bar@example.com/", "http://example.com/",
+      },
+      {
+          "ftp://foo:bar@example.com/", "ftp://example.com/",
+      },
+      {
+          "ftp://example.com/some/path/here",
+          "ftp://example.com/some/path/here",
+      },
+      // Reference fragment is stripped.
+      {
+          "http://example.com/blah#hello", "http://example.com/blah",
+      },
+      // Query parameters are NOT stripped.
+      {
+          "http://example.com/foo/bar/baz?hello",
+          "http://example.com/foo/bar/baz?hello",
+      },
+      // Fragment is stripped, but path and query are left intact.
+      {
+          "http://foo:bar@example.com/foo/bar/baz?hello#sigh",
+          "http://example.com/foo/bar/baz?hello",
+      },
+      // Port numbers are not affected.
+      {
+          "http://example.com:88/hi", "http://example.com:88/hi",
+      },
+  };
+
+  SanitizeUrlHelper helper;
+
+  for (const auto& test : kTests) {
+    // The result of SanitizeUrlForPacScript() is the same regardless of the
+    // second parameter (sanitization mode), since the input URLs do not use a
+    // cryptographic scheme.
+    GURL raw_url(test.raw_url);
+    ASSERT_TRUE(raw_url.is_valid());
+    EXPECT_FALSE(raw_url.SchemeIsCryptographic());
+
+    EXPECT_EQ(
+        GURL(test.sanitized_url),
+        helper.SanitizeUrl(raw_url, ProxyService::SanitizeUrlPolicy::UNSAFE));
+
+    EXPECT_EQ(
+        GURL(test.sanitized_url),
+        helper.SanitizeUrl(raw_url, ProxyService::SanitizeUrlPolicy::SAFE));
+  }
+}
+
+// Tests URL sanitization using input URLs that have a cryptographic schemes
+// (i.e. https://). The sanitized result differs depending on the sanitization
+// mode chosen.
+TEST_F(ProxyServiceTest, SanitizeUrlForPacScriptCryptographic) {
+  const struct {
+    // Input URL.
+    const char* raw_url;
+
+    // Output URL when stripping of cryptographic URLs is disabled.
+    const char* sanitized_url_unstripped;
+
+    // Output URL when stripping of cryptographic URLs is enabled.
+    const char* sanitized_url;
+  } kTests[] = {
+      // Embedded identity is always stripped.
+      {
+          "https://foo:bar@example.com/", "https://example.com/",
+          "https://example.com/",
+      },
+      // Fragments are always stripped, but stripping path is conditional on the
+      // mode.
+      {
+          "https://example.com/blah#hello", "https://example.com/blah",
+          "https://example.com/",
+      },
+      // Stripping the query is conditional on the mode.
+      {
+          "https://example.com/?hello", "https://example.com/?hello",
+          "https://example.com/",
+      },
+      // The embedded identity and fragment is always stripped, however path and
+      // query are conditional on the stripping mode.
+      {
+          "https://foo:bar@example.com/foo/bar/baz?hello#sigh",
+          "https://example.com/foo/bar/baz?hello", "https://example.com/",
+      },
+      // The URL's port should not be stripped.
+      {
+          "https://example.com:88/hi", "https://example.com:88/hi",
+          "https://example.com:88/",
+      },
+      // Try a wss:// URL, to make sure it also strips (is is also a
+      // cryptographic URL).
+      {
+          "wss://example.com:88/hi", "wss://example.com:88/hi",
+          "wss://example.com:88/",
+      },
+  };
+
+  SanitizeUrlHelper helper;
+
+  for (const auto& test : kTests) {
+    GURL raw_url(test.raw_url);
+    ASSERT_TRUE(raw_url.is_valid());
+    EXPECT_TRUE(raw_url.SchemeIsCryptographic());
+
+    EXPECT_EQ(
+        GURL(test.sanitized_url_unstripped),
+        helper.SanitizeUrl(raw_url, ProxyService::SanitizeUrlPolicy::UNSAFE));
+
+    EXPECT_EQ(
+        GURL(test.sanitized_url),
+        helper.SanitizeUrl(raw_url, ProxyService::SanitizeUrlPolicy::SAFE));
+  }
+}
+
 }  // namespace net