Added characters that look like padlocks to URL unescaping blacklist.

This blacklists the following Unicode characters:
- U+1F50F LOCK WITH INK PEN
- U+1F510 CLOSED LOCK WITH KEY
- U+1F512 LOCK
- U+1F513 OPEN LOCK

This prevents LOCK characters from appearing in a URL in the Chrome UI,
potentially looking like an SSL padlock icon (e.g., "google.com/🔒" is
now displayed as "google.com/%F0%9F%94%92"). This presented a spoofing
risk due to a few complications:
1. In RTL mode, the end of the URL (path/query) is aligned right up
   against the right edge of the Omnibox, where the SSL padlock is
   usually displayed.
2. On Mac, ChromeOS, and Android, LOCK characters are displayed in
   colour, making them more convincing.

Note: These characters will still be unescaped when using the
SPOOFING_AND_CONTROL_CHARS unescape rule (used for decoding data URLs,
previously known as CONTROL_CHARS).

BUG=495934,421332
TBR=jam@chromium.org

Review URL: https://codereview.chromium.org/1180393003

Cr-Commit-Position: refs/heads/master@{#335870}
diff --git a/chrome/browser/safe_browsing/safe_browsing_util.cc b/chrome/browser/safe_browsing/safe_browsing_util.cc
index 6a043c59..728bd96 100644
--- a/chrome/browser/safe_browsing/safe_browsing_util.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_util.cc
@@ -272,9 +272,10 @@
   int loop_var = 0;
   do {
     old_unescaped_str = unescaped_str;
-    unescaped_str = net::UnescapeURLComponent(old_unescaped_str,
-        net::UnescapeRule::CONTROL_CHARS | net::UnescapeRule::SPACES |
-        net::UnescapeRule::URL_SPECIAL_CHARS);
+    unescaped_str = net::UnescapeURLComponent(
+        old_unescaped_str, net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS |
+                               net::UnescapeRule::SPACES |
+                               net::UnescapeRule::URL_SPECIAL_CHARS);
   } while (unescaped_str != old_unescaped_str && ++loop_var <=
            kMaxLoopIterations);
 
diff --git a/chrome/common/instant_types.cc b/chrome/common/instant_types.cc
index b2149859..c3a2dd23 100644
--- a/chrome/common/instant_types.cc
+++ b/chrome/common/instant_types.cc
@@ -97,9 +97,9 @@
   query.len = static_cast<int>(url_params.size());
 
   const net::UnescapeRule::Type unescape_rules =
-      net::UnescapeRule::CONTROL_CHARS | net::UnescapeRule::SPACES |
-      net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::NORMAL |
-      net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
+      net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS |
+      net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS |
+      net::UnescapeRule::NORMAL | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
 
   while (url::ExtractQueryKeyValue(url_params.c_str(), &query, &key, &value)) {
     if (!key.is_nonempty())
diff --git a/components/policy/core/common/cloud/device_management_service_unittest.cc b/components/policy/core/common/cloud/device_management_service_unittest.cc
index 92b1600..82ca7bb 100644
--- a/components/policy/core/common/cloud/device_management_service_unittest.cc
+++ b/components/policy/core/common/cloud/device_management_service_unittest.cc
@@ -336,7 +336,7 @@
           net::UnescapeRule::NORMAL |
           net::UnescapeRule::SPACES |
           net::UnescapeRule::URL_SPECIAL_CHARS |
-          net::UnescapeRule::CONTROL_CHARS |
+          net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS |
           net::UnescapeRule::REPLACE_PLUS_WITH_SPACE));
       if (unescaped_name == name) {
         if (found)
@@ -347,7 +347,7 @@
             net::UnescapeRule::NORMAL |
             net::UnescapeRule::SPACES |
             net::UnescapeRule::URL_SPECIAL_CHARS |
-            net::UnescapeRule::CONTROL_CHARS |
+            net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS |
             net::UnescapeRule::REPLACE_PLUS_WITH_SPACE));
         if (unescaped_value != expected_value)
           return false;
diff --git a/content/browser/web_contents/web_drag_source_mac.mm b/content/browser/web_contents/web_drag_source_mac.mm
index e6e8595..ff3a881e 100644
--- a/content/browser/web_contents/web_drag_source_mac.mm
+++ b/content/browser/web_contents/web_drag_source_mac.mm
@@ -173,7 +173,7 @@
       net::UnescapeRule::Type unescapeRules =
           net::UnescapeRule::SPACES |
           net::UnescapeRule::URL_SPECIAL_CHARS |
-          net::UnescapeRule::CONTROL_CHARS;
+          net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS;
       std::string unescapedUrlString =
           net::UnescapeURLComponent(dropData_->url.spec(), unescapeRules);
       std::string escapedUrlString =
diff --git a/extensions/browser/api/web_request/form_data_parser.cc b/extensions/browser/api/web_request/form_data_parser.cc
index 2e962dd..31b6fe8 100644
--- a/extensions/browser/api/web_request/form_data_parser.cc
+++ b/extensions/browser/api/web_request/form_data_parser.cc
@@ -350,8 +350,9 @@
 FormDataParser::FormDataParser() {}
 
 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ =
-    net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
-    net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
+    net::UnescapeRule::URL_SPECIAL_CHARS |
+    net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS | net::UnescapeRule::SPACES |
+    net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
 
 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
     : source_(NULL),
@@ -516,8 +517,8 @@
   }
 
   std::string unescaped_name = net::UnescapeURLComponent(
-      name.as_string(),
-      net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS);
+      name.as_string(), net::UnescapeRule::URL_SPECIAL_CHARS |
+                            net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS);
   result->set_name(unescaped_name);
   result->set_value(value);
 
diff --git a/net/base/data_url.cc b/net/base/data_url.cc
index d558587..e0502d3 100644
--- a/net/base/data_url.cc
+++ b/net/base/data_url.cc
@@ -99,7 +99,7 @@
   if (base64_encoded) {
     temp_data = UnescapeURLComponent(temp_data,
         UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
-        UnescapeRule::CONTROL_CHARS);
+        UnescapeRule::SPOOFING_AND_CONTROL_CHARS);
   }
 
   // Strip whitespace.
@@ -113,7 +113,7 @@
   if (!base64_encoded) {
     temp_data = UnescapeURLComponent(temp_data,
         UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS |
-        UnescapeRule::CONTROL_CHARS);
+        UnescapeRule::SPOOFING_AND_CONTROL_CHARS);
   }
 
   if (base64_encoded) {
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 972c285..15de5e1 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -164,6 +164,38 @@
   return third_byte >= 0xA6 && third_byte <= 0xA9;
 }
 
+// Returns true if there is a four-byte banned char at |index|. |first_byte| is
+// the byte at |index|.
+template <typename STR>
+bool HasFourByteBannedCharAtIndex(const STR& escaped_text,
+                                  unsigned char first_byte,
+                                  size_t index) {
+  // The following characters are blacklisted for spoofability concerns.
+  // U+1F50F LOCK WITH INK PEN         (%F0%9F%94%8F)
+  // U+1F510 CLOSED LOCK WITH KEY      (%F0%9F%94%90)
+  // U+1F512 LOCK                      (%F0%9F%94%92)
+  // U+1F513 OPEN LOCK                 (%F0%9F%94%93)
+  if (first_byte != 0xF0)
+    return false;
+
+  unsigned char second_byte;
+  if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) ||
+      second_byte != 0x9F) {
+    return false;
+  }
+
+  unsigned char third_byte;
+  if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) ||
+      third_byte != 0x94) {
+    return false;
+  }
+
+  unsigned char fourth_byte;
+  return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&
+         (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
+          fourth_byte == 0x93);
+}
+
 // Unescapes |escaped_text| according to |rules|, returning the resulting
 // string.  Fills in an |adjustments| parameter, if non-NULL, so it reflects
 // the alterations done to the string that are not one-character-to-one-
@@ -217,12 +249,20 @@
       // U+2068 FIRST STRONG ISOLATE       (%E2%81%A8)
       // U+2069 POP DIRECTIONAL ISOLATE    (%E2%81%A9)
       //
+      // The following spoofable characters are also banned, because they could
+      // be used to imitate parts of a web browser's UI.
+      //
+      // U+1F50F LOCK WITH INK PEN         (%F0%9F%94%8F)
+      // U+1F510 CLOSED LOCK WITH KEY      (%F0%9F%94%90)
+      // U+1F512 LOCK                      (%F0%9F%94%92)
+      // U+1F513 OPEN LOCK                 (%F0%9F%94%93)
+      //
       // However, some schemes such as data: and file: need to parse the exact
-      // binary data when loading the URL. For that reason, CONTROL_CHARS allows
-      // unescaping BiDi control characters.
-      // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed
-      // in the UI.
-      if (!(rules & UnescapeRule::CONTROL_CHARS)) {
+      // binary data when loading the URL. For that reason,
+      // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.
+      // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be
+      // displayed in the UI.
+      if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {
         if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
           // Keep Arabic Language Mark escaped.
           result.append(escaped_text, i, 6);
@@ -235,6 +275,12 @@
           i += 8;
           continue;
         }
+        if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
+          // Keep banned char escaped.
+          result.append(escaped_text, i, 12);
+          i += 11;
+          continue;
+        }
       }
 
       if (first_byte >= 0x80 ||  // Unescape all high-bit characters.
@@ -245,8 +291,9 @@
            // Allow any of the prohibited but non-control characters when
            // we're doing "special" chars.
            (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
-           // Additionally allow control characters if requested.
-           (first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
+           // Additionally allow non-display characters if requested.
+           (first_byte < ' ' &&
+            (rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)))) {
         // Use the unescaped version of the character.
         if (adjustments)
           adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1));
diff --git a/net/base/escape.h b/net/base/escape.h
index c4abe14..9c500b0 100644
--- a/net/base/escape.h
+++ b/net/base/escape.h
@@ -92,13 +92,14 @@
     // interpreting as a URL and want to do as much unescaping as possible.
     URL_SPECIAL_CHARS = 4,
 
-    // Unescapes control characters such as %01. This INCLUDES NULLs. This is
-    // used for rare cases such as data: URL decoding where the result is binary
-    // data. This flag also unescapes BiDi control characters.
+    // Unescapes characters that can be used in spoofing attempts (such as LOCK)
+    // and control characters (such as BiDi control characters and %01).  This
+    // INCLUDES NULLs.  This is used for rare cases such as data: URL decoding
+    // where the result is binary data.
     //
-    // DO NOT use CONTROL_CHARS if the URL is going to be displayed in the UI
-    // for security reasons.
-    CONTROL_CHARS = 8,
+    // DO NOT use SPOOFING_AND_CONTROL_CHARS if the URL is going to be displayed
+    // in the UI for security reasons.
+    SPOOFING_AND_CONTROL_CHARS = 8,
 
     // URL queries use "+" for space. This flag controls that replacement.
     REPLACE_PLUS_WITH_SPACE = 16,
diff --git a/net/base/escape_unittest.cc b/net/base/escape_unittest.cc
index 98afb8cf..b6f0229 100644
--- a/net/base/escape_unittest.cc
+++ b/net/base/escape_unittest.cc
@@ -182,10 +182,12 @@
     // Control characters.
     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
      "%01%02%03%04%05%06%07%08%09 %"},
-    {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
+    {"%01%02%03%04%05%06%07%08%09 %25",
+     UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
     {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
-    {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
+    {"Hello%20%13%10%02", UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
+     "Hello%20\x13\x10\x02"},
   };
 
   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
@@ -205,7 +207,8 @@
   expected.push_back(0);
   expected.push_back(0);
   expected.append("9Test");
-  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
+  EXPECT_EQ(expected, UnescapeURLComponent(
+                          input, UnescapeRule::SPOOFING_AND_CONTROL_CHARS));
 
   // When we're not unescaping NULLs.
   expected = "Null";
@@ -233,7 +236,7 @@
      L"Some%20random text %25\xE2\x80\x84OK"},
 
     // BiDi Control characters should not be unescaped unless explicity told to
-    // do so with UnescapeRule::CONTROL_CHARS
+    // do so with UnescapeRule::SPOOFING_AND_CONTROL_CHARS
     {L"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
      L"Some%20random text %25%D8%9COK"},
     {L"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
@@ -250,32 +253,62 @@
      L"Some%20random text %25%E2%81%A6OK"},
     {L"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
      L"Some%20random text %25%E2%81%A9OK"},
-    // UnescapeRule::CONTROL_CHARS should unescape BiDi Control characters.
+    // UnescapeRule::SPOOFING_AND_CONTROL_CHARS should unescape BiDi Control
+    // characters.
     {L"Some%20random text %25%D8%9COK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xD8\x9COK"},
     {L"Some%20random text %25%E2%80%8EOK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x80\x8EOK"},
     {L"Some%20random text %25%E2%80%8FOK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x80\x8FOK"},
     {L"Some%20random text %25%E2%80%AAOK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x80\xAAOK"},
     {L"Some%20random text %25%E2%80%ABOK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x80\xABOK"},
     {L"Some%20random text %25%E2%80%AEOK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x80\xAEOK"},
     {L"Some%20random text %25%E2%81%A6OK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x81\xA6OK"},
     {L"Some%20random text %25%E2%81%A9OK",
-     UnescapeRule::NORMAL | UnescapeRule::CONTROL_CHARS,
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Some%20random text %25\xE2\x81\xA9OK"},
 
+    // Certain banned characters should not be unescaped unless explicitly told
+    // to do so with UnescapeRule::SPOOFING_AND_CONTROL_CHARS.
+    // U+1F50F LOCK WITH INK PEN
+    {L"Some%20random text %25%F0%9F%94%8FOK", UnescapeRule::NORMAL,
+     L"Some%20random text %25%F0%9F%94%8FOK"},
+    // U+1F510 CLOSED LOCK WITH KEY
+    {L"Some%20random text %25%F0%9F%94%90OK", UnescapeRule::NORMAL,
+     L"Some%20random text %25%F0%9F%94%90OK"},
+    // U+1F512 LOCK
+    {L"Some%20random text %25%F0%9F%94%92OK", UnescapeRule::NORMAL,
+     L"Some%20random text %25%F0%9F%94%92OK"},
+    // U+1F513 OPEN LOCK
+    {L"Some%20random text %25%F0%9F%94%93OK", UnescapeRule::NORMAL,
+     L"Some%20random text %25%F0%9F%94%93OK"},
+    // UnescapeRule::SPOOFING_AND_CONTROL_CHARS should unescape banned
+    // characters.
+    {L"Some%20random text %25%F0%9F%94%8FOK",
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
+     L"Some%20random text %25\xF0\x9F\x94\x8FOK"},
+    {L"Some%20random text %25%F0%9F%94%90OK",
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
+     L"Some%20random text %25\xF0\x9F\x94\x90OK"},
+    {L"Some%20random text %25%F0%9F%94%92OK",
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
+     L"Some%20random text %25\xF0\x9F\x94\x92OK"},
+    {L"Some%20random text %25%F0%9F%94%93OK",
+     UnescapeRule::NORMAL | UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
+     L"Some%20random text %25\xF0\x9F\x94\x93OK"},
+
     {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
      L"Some random text %25-OK"},
     {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
@@ -298,12 +331,13 @@
     // Control characters.
     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
      L"%01%02%03%04%05%06%07%08%09 %"},
-    {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
+    {L"%01%02%03%04%05%06%07%08%09 %25",
+     UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
     {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
-    {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
+    {L"Hello%20%13%10%02", UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Hello%20\x13\x10\x02"},
-    {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
+    {L"Hello\x9824\x9827", UnescapeRule::SPOOFING_AND_CONTROL_CHARS,
      L"Hello\x9824\x9827"},
   };
 
@@ -324,7 +358,8 @@
   expected.push_back(0);
   expected.push_back(0);
   expected.append(base::ASCIIToUTF16("9Test"));
-  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
+  EXPECT_EQ(expected, UnescapeURLComponent(
+                          input, UnescapeRule::SPOOFING_AND_CONTROL_CHARS));
 
   // When we're not unescaping NULLs.
   expected = base::WideToUTF16(L"Null");
diff --git a/storage/common/fileapi/file_system_util.cc b/storage/common/fileapi/file_system_util.cc
index f67c864..98b6cc8 100644
--- a/storage/common/fileapi/file_system_util.cc
+++ b/storage/common/fileapi/file_system_util.cc
@@ -182,7 +182,7 @@
 
   std::string path = net::UnescapeURLComponent(url.path(),
       net::UnescapeRule::SPACES | net::UnescapeRule::URL_SPECIAL_CHARS |
-      net::UnescapeRule::CONTROL_CHARS);
+      net::UnescapeRule::SPOOFING_AND_CONTROL_CHARS);
 
   // Ensure the path is relative.
   while (!path.empty() && path[0] == '/')