Do not percent-encode '&' and ';' in URL queries. The URL spec has specified that U+0026 ('&') and U+003B (';') should not be percent-encoded since https://github.com/whatwg/url/commit/bbeacfe00317554daab0ece983f42692d42487fc, so follow suit a few years late and match what Gecko has already been doing. Bug: 795733 Change-Id: Icc28699fc14b63ef92751e81d94fb3eeee876781 Reviewed-on: https://chromium-review.googlesource.com/829313 Reviewed-by: Joshua Bell <jsbell@chromium.org> Commit-Queue: Raphael Kubo da Costa <raphael.kubo.da.costa@intel.com> Cr-Commit-Position: refs/heads/master@{#525101}

commit: 27eaa9817b9837aa478e0667158292a0295985df [log] [tgz]
author: Raphael Kubo da Costa <raphael.kubo.da.costa@intel.com> Tue Dec 19 19:21:02 2017
committer: Commit Bot <commit-bot@chromium.org> Tue Dec 19 19:21:02 2017
tree: ca89feb42d81748c0634eaa28c33a533e77d5fce
parent: 70e9f46252e6732b25744ca62f7cf4af73d028aa [diff]
diff --git a/third_party/WebKit/LayoutTests/external/wpt/XMLHttpRequest/open-url-encoding-expected.txt b/third_party/WebKit/LayoutTests/external/wpt/XMLHttpRequest/open-url-encoding-expected.txt
deleted file mode 100644
index ba9f4c4f..0000000
--- a/third_party/WebKit/LayoutTests/external/wpt/XMLHttpRequest/open-url-encoding-expected.txt
+++ /dev/null

@@ -1,5 +0,0 @@
-This is a testharness.js-based test.
-PASS percent encode characters
-FAIL lone surrogate assert_equals: expected "&%2365533;" but got "%26%2365533%3B"
-Harness: the test ran to completion.
-

diff --git a/third_party/WebKit/LayoutTests/external/wpt/encoding/big5-encoder-expected.txt b/third_party/WebKit/LayoutTests/external/wpt/encoding/big5-encoder-expected.txt
deleted file mode 100644
index af399bc..0000000
--- a/third_party/WebKit/LayoutTests/external/wpt/encoding/big5-encoder-expected.txt
+++ /dev/null

@@ -1,16 +0,0 @@
-This is a testharness.js-based test.
-PASS big5 encoder: very basic
-FAIL big5 encoder: Highest-pointer BMP character excluded from encoder assert_equals: expected "X&%2340614;X" but got "X%26%2340614%3BX"
-FAIL big5 encoder: Highest-pointer character excluded from encoder assert_equals: expected "X&%23156267;X" but got "X%26%23156267%3BX"
-PASS big5 encoder: Lowest-pointer character included in encoder
-PASS big5 encoder: Euro; the highest-pointer character before a range of 30 unmapped pointers
-PASS big5 encoder: The lowest-pointer character after the range of 30 unmapped pointers
-PASS big5 encoder: The highest-pointer character before a range of 41 unmapped pointers
-PASS big5 encoder: The lowest-pointer character after the range of 41 unmapped pointers
-PASS big5 encoder: The last character in the index
-FAIL big5 encoder: The canonical BMP test character that is not in the index assert_equals: expected "X&%239731;X" but got "X%26%239731%3BX"
-FAIL big5 encoder: The canonical astral test character that is not in the index assert_equals: expected "X&%23128169;X" but got "X%26%23128169%3BX"
-PASS big5 encoder: A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer
-PASS big5 encoder: A duplicate-mapped code point that prefers the highest pointer in the encoder
-Harness: the test ran to completion.
-

diff --git a/third_party/WebKit/LayoutTests/external/wpt/encoding/gbk-encoder-expected.txt b/third_party/WebKit/LayoutTests/external/wpt/encoding/gbk-encoder-expected.txt
deleted file mode 100644
index c4b35d1..0000000
--- a/third_party/WebKit/LayoutTests/external/wpt/encoding/gbk-encoder-expected.txt
+++ /dev/null

@@ -1,9 +0,0 @@
-This is a testharness.js-based test.
-PASS gbk encoder: very basic
-PASS gbk encoder: Euro
-PASS gbk encoder: character
-PASS gbk encoder: PUA
-PASS gbk encoder: PUA #2
-FAIL gbk encoder: poo assert_equals: expected "&%23128169;" but got "%26%23128169%3B"
-Harness: the test ran to completion.
-

diff --git a/third_party/WebKit/LayoutTests/fast/url/query-expected.txt b/third_party/WebKit/LayoutTests/fast/url/query-expected.txt
index e4816a5..54e4382 100644
--- a/third_party/WebKit/LayoutTests/fast/url/query-expected.txt
+++ b/third_party/WebKit/LayoutTests/fast/url/query-expected.txt

@@ -7,8 +7,8 @@
 PASS canonicalize('http://www.example.com/?as?df') is 'http://www.example.com/?as?df'
 PASS canonicalize('http://www.example.com/?\x02hello bye') is 'http://www.example.com/?%02hello%7F%20bye'
 PASS canonicalize('http://www.example.com/?%40%41123') is 'http://www.example.com/?%40%41123'
-PASS canonicalize('http://www.example.com/?q=你好') is 'http://www.example.com/?q=%26%2320320%3B%26%2322909%3B'
-PASS canonicalize('http://www.example.com/?q=\ud800\ud800') is 'http://www.example.com/?q=%26%2355296%3B%26%2355296%3B'
+PASS canonicalize('http://www.example.com/?q=你好') is 'http://www.example.com/?q=&%2320320;&%2322909;'
+PASS canonicalize('http://www.example.com/?q=\ud800\ud800') is 'http://www.example.com/?q=&%2355296;&%2355296;'
 PASS canonicalize('http://www.example.com/?q=<asdf>') is 'http://www.example.com/?q=%3Casdf%3E'
 PASS canonicalize('http://www.example.com/?q="asdf"') is 'http://www.example.com/?q=%22asdf%22'
 PASS successfullyParsed is true

diff --git a/third_party/WebKit/LayoutTests/fast/url/script-tests/query.js b/third_party/WebKit/LayoutTests/fast/url/script-tests/query.js
index dcb6028..0f3c153 100644
--- a/third_party/WebKit/LayoutTests/fast/url/script-tests/query.js
+++ b/third_party/WebKit/LayoutTests/fast/url/script-tests/query.js

@@ -12,9 +12,9 @@
   ["\\x02hello\x7f bye", "%02hello%7F%20bye"],
   ["%40%41123", "%40%41123"],
   // Chinese input/output
-  ["q=\u4F60\u597D", "q=%26%2320320%3B%26%2322909%3B"],
+  ["q=\u4F60\u597D", "q=&%2320320;&%2322909;"],
   // Invalid UTF-8/16 input should be replaced with invalid characters.
-  ["q=\\ud800\\ud800", "q=%26%2355296%3B%26%2355296%3B"],
+  ["q=\\ud800\\ud800", "q=&%2355296;&%2355296;"],
   // Don't allow < or > because sometimes they are used for XSS if the
   // URL is echoed in content. Firefox does this, IE doesn't.
   ["q=<asdf>", "q=%3Casdf%3E"],

diff --git a/third_party/WebKit/LayoutTests/http/tests/uri/escaped-entity-expected.txt b/third_party/WebKit/LayoutTests/http/tests/uri/escaped-entity-expected.txt
index 251d339..6ac700b 100644
--- a/third_party/WebKit/LayoutTests/http/tests/uri/escaped-entity-expected.txt
+++ b/third_party/WebKit/LayoutTests/http/tests/uri/escaped-entity-expected.txt

@@ -2,9 +2,9 @@
 
 Note that this exact page won't work in IE or Firefox. Firefox seems to always use UTF-8 for local files, and IE actually preserves the Unicode in the URL when we get it from JS, so we don't know what would get sent over the wire. However, both browsers will send %26%231758%3B over HTTP for the query.
 
-"/uri/intercept/print/script.js?%26%231758%3B" (no target charset specified, should be Big5)
-"/uri/intercept/print/script.js?%26%231758%3B" (Big5 specified)
+"/uri/intercept/print/script.js?&%231758;" (no target charset specified, should be Big5)
+"/uri/intercept/print/script.js?&%231758;" (Big5 specified)
 Show the source attribute of the scripts.
-"http://127.0.0.1:8000/uri/intercept/print/script.js?%26%231758%3B"
-"http://127.0.0.1:8000/uri/intercept/print/script.js?%26%231758%3B"
+"http://127.0.0.1:8000/uri/intercept/print/script.js?&%231758;"
+"http://127.0.0.1:8000/uri/intercept/print/script.js?&%231758;"
 

diff --git a/third_party/WebKit/Source/platform/wtf/text/TextCodec.cpp b/third_party/WebKit/Source/platform/wtf/text/TextCodec.cpp
index ddcafffe..67ddf59 100644
--- a/third_party/WebKit/Source/platform/wtf/text/TextCodec.cpp
+++ b/third_party/WebKit/Source/platform/wtf/text/TextCodec.cpp

@@ -42,10 +42,9 @@
                code_point);
       return static_cast<int>(strlen(replacement));
     case kURLEncodedEntitiesForUnencodables:
-      snprintf(replacement, sizeof(UnencodableReplacementArray),
-               "%%26%%23%u%%3B", code_point);
+      snprintf(replacement, sizeof(UnencodableReplacementArray), "&%%23%u;",
+               code_point);
       return static_cast<int>(strlen(replacement));
-
     case kCSSEncodedEntitiesForUnencodables:
       snprintf(replacement, sizeof(UnencodableReplacementArray), "\\%x ",
                code_point);

diff --git a/third_party/WebKit/Source/platform/wtf/text/TextCodec.h b/third_party/WebKit/Source/platform/wtf/text/TextCodec.h
index dddd7d74..f18c149 100644
--- a/third_party/WebKit/Source/platform/wtf/text/TextCodec.h
+++ b/third_party/WebKit/Source/platform/wtf/text/TextCodec.h

@@ -46,7 +46,7 @@
 
   // Encodes the character as en entity as above, but escaped
   // non-alphanumeric characters. This is used in URLs.
-  // For example, U+6DE would be "%26%231758%3B".
+  // For example, U+6DE would be "&%231758;".
   kURLEncodedEntitiesForUnencodables,
 
   // Encodes the character as a CSS entity.  For example U+06DE

diff --git a/third_party/WebKit/Source/platform/wtf/text/TextCodecTest.cpp b/third_party/WebKit/Source/platform/wtf/text/TextCodecTest.cpp
index 21a7249..d9cf9bfe 100644
--- a/third_party/WebKit/Source/platform/wtf/text/TextCodecTest.cpp
+++ b/third_party/WebKit/Source/platform/wtf/text/TextCodecTest.cpp

@@ -49,9 +49,9 @@
   UnencodableReplacementArray replacement;
   int size = TextCodec::GetUnencodableReplacement(
       0xE003, kURLEncodedEntitiesForUnencodables, replacement);
-  EXPECT_EQ(size, 14);
-  EXPECT_EQ(std::string(replacement), "%26%2357347%3B");
-  EXPECT_EQ(replacement[14], 0);
+  EXPECT_EQ(size, 10);
+  EXPECT_EQ(std::string(replacement), "&%2357347;");
+  EXPECT_EQ(replacement[10], 0);
 }
 
 TEST(TextCodec, CSSEntityEncoding) {
commit	27eaa9817b9837aa478e0667158292a0295985df	[log] [tgz]
author	Raphael Kubo da Costa <raphael.kubo.da.costa@intel.com>	Tue Dec 19 19:21:02 2017
committer	Commit Bot <commit-bot@chromium.org>	Tue Dec 19 19:21:02 2017
tree	ca89feb42d81748c0634eaa28c33a533e77d5fce
parent	70e9f46252e6732b25744ca62f7cf4af73d028aa [diff]