Escape "--" in the page URL at page serialization

This patch makes page serializer to escape the page URL embed into a HTML
comment of result HTML[1] to avoid inserting text as HTML from URL by
introducing a static member function |PageSerialzier::markOfTheWebDeclaration()|
for sharing it between |PageSerialzier| and |WebPageSerialzier| classes.

[1] We use following format for serialized HTML:
saved from url=(${lengthOfURL})${URL}

BUG=503217
TEST=webkit_unit_tests --gtest_filter=PageSerializerTest.markOfTheWebDeclaration
TEST=webkit_unit_tests --gtest_filter=WebPageSerializerTest.fromUrlWithMinusMinu

Review URL: https://codereview.chromium.org/1371323003

Cr-Commit-Position: refs/heads/master@{#351736}
diff --git a/third_party/WebKit/Source/core/page/PageSerializer.cpp b/third_party/WebKit/Source/core/page/PageSerializer.cpp
index a847813..33498c6 100644
--- a/third_party/WebKit/Source/core/page/PageSerializer.cpp
+++ b/third_party/WebKit/Source/core/page/PageSerializer.cpp
@@ -253,9 +253,7 @@
         // Add MOTW (Mark of the Web) declaration before html tag.
         // See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
         result.append('\n');
-        MarkupFormatter::appendComment(result, String::format(" saved from url=(%04d)%s ",
-            static_cast<int>(document().url().string().utf8().length()),
-            document().url().string().utf8().data()));
+        MarkupFormatter::appendComment(result, PageSerializer::markOfTheWebDeclaration(document().url()));
         result.append('\n');
     }
 
@@ -577,4 +575,26 @@
     return m_delegate.get();
 }
 
+// Returns MOTW (Mark of the Web) declaration before html tag which is in
+// HTML comment, e.g. "<!-- saved from url=(%04d)%s -->"
+// See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
+String PageSerializer::markOfTheWebDeclaration(const KURL& url)
+{
+    StringBuilder builder;
+    bool emitsMinus = false;
+    CString orignalUrl = url.string().ascii();
+    for (const char* string = orignalUrl.data(); *string; ++string) {
+        const char ch = *string;
+        if (ch == '-' && emitsMinus) {
+            builder.append("%2D");
+            emitsMinus = false;
+            continue;
+        }
+        emitsMinus = ch == '-';
+        builder.append(ch);
+    }
+    CString escapedUrl = builder.toString().ascii();
+    return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl.length()), escapedUrl.data());
+}
+
 } // namespace blink
diff --git a/third_party/WebKit/Source/core/page/PageSerializer.h b/third_party/WebKit/Source/core/page/PageSerializer.h
index 402752d..86d17ba 100644
--- a/third_party/WebKit/Source/core/page/PageSerializer.h
+++ b/third_party/WebKit/Source/core/page/PageSerializer.h
@@ -84,6 +84,8 @@
 
     Delegate* delegate();
 
+    static String markOfTheWebDeclaration(const KURL&);
+
 private:
     void serializeFrame(LocalFrame*);
 
diff --git a/third_party/WebKit/Source/web/WebPageSerializer.cpp b/third_party/WebKit/Source/web/WebPageSerializer.cpp
index f995480..d8df886 100644
--- a/third_party/WebKit/Source/web/WebPageSerializer.cpp
+++ b/third_party/WebKit/Source/web/WebPageSerializer.cpp
@@ -267,19 +267,23 @@
 
 WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset)
 {
+    // TODO(yosin) We should call |PageSerializer::metaCharsetDeclarationOf()|.
     String charsetString = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=" + static_cast<const String&>(charset) + "\">";
     return charsetString;
 }
 
 WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
 {
-    return String::format("\n<!-- saved from url=(%04d)%s -->\n",
-                          static_cast<int>(url.spec().length()),
-                          url.spec().data());
+    StringBuilder builder;
+    builder.append("\n<!-- ");
+    builder.append(PageSerializer::markOfTheWebDeclaration(url));
+    builder.append(" -->\n");
+    return builder.toString();
 }
 
 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
 {
+    // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
     if (baseTarget.isEmpty())
         return String("<base href=\".\">");
     String baseString = "<base href=\".\" target=\"" + static_cast<const String&>(baseTarget) + "\">";
diff --git a/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp b/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
index 34cc912..877da4b 100644
--- a/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
+++ b/third_party/WebKit/Source/web/tests/PageSerializerTest.cpp
@@ -496,4 +496,15 @@
     EXPECT_GT(getSerializedData("namespace_element.html", "text/html").length(), 0U);
 }
 
+TEST_F(PageSerializerTest, markOfTheWebDeclaration)
+{
+    EXPECT_EQ("saved from url=(0015)http://foo.com/", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://foo.com")));
+    EXPECT_EQ("saved from url=(0015)http://f-o.com/", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://f-o.com")));
+    EXPECT_EQ("saved from url=(0019)http://foo.com-%2D/", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://foo.com--")));
+    EXPECT_EQ("saved from url=(0024)http://f-%2D.com-%2D%3E/", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://f--.com-->")));
+    EXPECT_EQ("saved from url=(0020)http://foo.com/?-%2D", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://foo.com?--")));
+    EXPECT_EQ("saved from url=(0020)http://foo.com/#-%2D", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://foo.com#--")));
+    EXPECT_EQ("saved from url=(0026)http://foo.com/#bar-%2Dbaz", PageSerializer::markOfTheWebDeclaration(KURL(ParsedURLString, "http://foo.com#bar--baz")));
+}
+
 } // namespace blink
diff --git a/third_party/WebKit/Source/web/tests/WebPageSerializerTest.cpp b/third_party/WebKit/Source/web/tests/WebPageSerializerTest.cpp
index 3ab3b47..0c2821a 100644
--- a/third_party/WebKit/Source/web/tests/WebPageSerializerTest.cpp
+++ b/third_party/WebKit/Source/web/tests/WebPageSerializerTest.cpp
@@ -40,6 +40,7 @@
 #include "public/platform/WebUnitTestSupport.h"
 #include "public/web/WebDocument.h"
 #include "public/web/WebFrame.h"
+#include "public/web/WebPageSerializerClient.h"
 #include "public/web/WebView.h"
 #include "web/tests/FrameTestHelpers.h"
 #include <gtest/gtest.h>
@@ -49,6 +50,22 @@
 
 namespace blink {
 
+namespace {
+class SimpleWebPageSerializerClient : public  WebPageSerializerClient {
+public:
+    std::string toString() const { return m_string; }
+
+private:
+    void didSerializeDataForFrame(const WebURL&, const WebCString& data, PageSerializationStatus) final
+    {
+        m_string += data;
+    }
+
+    std::string m_string;
+};
+
+} // namespace
+
 class WebPageSerializerTest : public testing::Test {
 public:
     WebPageSerializerTest() : m_supportedSchemes(static_cast<size_t>(3))
@@ -134,6 +151,20 @@
     EXPECT_TRUE(webVectorContains(resources, "file://c/my_folder/file.gif"));
 }
 
+TEST_F(WebPageSerializerTest, fromUrlWithMinusMinus)
+{
+    WebURL topFrameURL = toKURL("http://www.test.com?--x--");
+    registerMockedURLLoad(topFrameURL.spec(), WebString::fromUTF8("text_only_page.html"));
+    loadURLInTopFrame(topFrameURL);
+
+    SimpleWebPageSerializerClient serializerClient;
+    WebVector<WebURL> links(&topFrameURL, 1);
+    WebVector<WebString> localPaths(&"local", 1);
+    WebPageSerializer::serialize(webView()->mainFrame()->toWebLocalFrame(), &serializerClient, links, localPaths, "");
+
+    EXPECT_EQ("<!-- saved from url=(0030)http://www.test.com/?-%2Dx-%2D -->", serializerClient.toString().substr(1, 60));
+}
+
 TEST_F(WebPageSerializerTest, MultipleFrames)
 {
     // Register the mocked frames.
diff --git a/third_party/WebKit/Source/web/tests/data/pageserialization/text_only_page.html b/third_party/WebKit/Source/web/tests/data/pageserialization/text_only_page.html
new file mode 100644
index 0000000..257cc564
--- /dev/null
+++ b/third_party/WebKit/Source/web/tests/data/pageserialization/text_only_page.html
@@ -0,0 +1 @@
+foo