blob: 37f992a46e98db8299f6dde12cc35f95889a5024 [file] [log] [blame]
/*
* Copyright (C) 2014 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <map>
#include "build/build_config.h"
#include "core/dom/ClassCollection.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
#include "core/dom/ElementShadow.h"
#include "core/frame/FrameTestHelpers.h"
#include "core/frame/LocalFrame.h"
#include "core/frame/Location.h"
#include "core/frame/WebLocalFrameImpl.h"
#include "core/page/Page.h"
#include "platform/SerializedResource.h"
#include "platform/SharedBuffer.h"
#include "platform/mhtml/MHTMLArchive.h"
#include "platform/mhtml/MHTMLParser.h"
#include "platform/testing/TestingPlatformSupport.h"
#include "platform/testing/URLTestHelpers.h"
#include "platform/testing/UnitTestHelpers.h"
#include "platform/weborigin/KURL.h"
#include "platform/weborigin/SchemeRegistry.h"
#include "public/platform/WebString.h"
#include "public/platform/WebURL.h"
#include "public/platform/WebURLLoaderMockFactory.h"
#include "public/platform/WebURLRequest.h"
#include "public/platform/WebURLResponse.h"
#include "public/web/WebDocument.h"
#include "public/web/WebFrame.h"
#include "public/web/WebView.h"
#include "testing/gtest/include/gtest/gtest.h"
using blink::URLTestHelpers::ToKURL;
namespace blink {
class LineReader {
public:
LineReader(const std::string& text) : text_(text), index_(0) {}
bool GetNextLine(std::string* line) {
line->clear();
if (index_ >= text_.length())
return false;
size_t end_of_line_index = text_.find("\r\n", index_);
if (end_of_line_index == std::string::npos) {
*line = text_.substr(index_);
index_ = text_.length();
return true;
}
*line = text_.substr(index_, end_of_line_index - index_);
index_ = end_of_line_index + 2;
return true;
}
private:
std::string text_;
size_t index_;
};
class MHTMLTest : public ::testing::Test {
public:
MHTMLTest() { file_path_ = testing::CoreTestDataPath("mhtml/"); }
protected:
void SetUp() override { helper_.Initialize(); }
void TearDown() override {
platform_->GetURLLoaderMockFactory()
->UnregisterAllURLsAndClearMemoryCache();
}
void RegisterMockedURLLoad(const std::string& url,
const std::string& file_name) {
URLTestHelpers::RegisterMockedURLLoad(
ToKURL(url),
testing::CoreTestDataPath(WebString::FromUTF8("mhtml/" + file_name)),
WebString::FromUTF8("multipart/related"));
}
void LoadURLInTopFrame(const WebURL& url) {
FrameTestHelpers::LoadFrame(helper_.GetWebView()->MainFrameImpl(),
url.GetString().Utf8().data());
}
Page* GetPage() const { return helper_.GetWebView()->GetPage(); }
void AddResource(const char* url,
const char* mime,
scoped_refptr<SharedBuffer> data) {
SerializedResource resource(ToKURL(url), mime, std::move(data));
resources_.push_back(resource);
}
void AddResource(const char* url, const char* mime, const char* file_name) {
AddResource(url, mime, ReadFile(file_name));
}
void AddTestResources() {
AddResource("http://www.test.com", "text/html", "css_test_page.html");
AddResource("http://www.test.com/link_styles.css", "text/css",
"link_styles.css");
AddResource("http://www.test.com/import_style_from_link.css", "text/css",
"import_style_from_link.css");
AddResource("http://www.test.com/import_styles.css", "text/css",
"import_styles.css");
AddResource("http://www.test.com/red_background.png", "image/png",
"red_background.png");
AddResource("http://www.test.com/orange_background.png", "image/png",
"orange_background.png");
AddResource("http://www.test.com/yellow_background.png", "image/png",
"yellow_background.png");
AddResource("http://www.test.com/green_background.png", "image/png",
"green_background.png");
AddResource("http://www.test.com/blue_background.png", "image/png",
"blue_background.png");
AddResource("http://www.test.com/purple_background.png", "image/png",
"purple_background.png");
AddResource("http://www.test.com/ul-dot.png", "image/png", "ul-dot.png");
AddResource("http://www.test.com/ol-dot.png", "image/png", "ol-dot.png");
}
static std::map<std::string, std::string> ExtractMHTMLHeaders(
scoped_refptr<RawData> mhtml_data) {
// Read the MHTML data per line until reaching the empty line.
std::map<std::string, std::string> mhtml_headers;
LineReader line_reader(
std::string(mhtml_data->data(), mhtml_data->length()));
std::string line;
line_reader.GetNextLine(&line);
while (line.length()) {
// Peek next line to see if it starts with soft line break. If yes, append
// to current line.
std::string next_line;
while (true) {
line_reader.GetNextLine(&next_line);
if (next_line.length() > 1 &&
(next_line[0] == ' ' || next_line[0] == '\t')) {
line += &(next_line.at(1));
continue;
}
break;
}
std::string::size_type pos = line.find(':');
if (pos == std::string::npos)
continue;
std::string key = line.substr(0, pos);
std::string value = line.substr(pos + 2);
mhtml_headers.emplace(key, value);
line = next_line;
}
return mhtml_headers;
}
static scoped_refptr<RawData> GenerateMHTMLData(
const Vector<SerializedResource>& resources,
MHTMLArchive::EncodingPolicy encoding_policy,
const KURL& url,
const String& title,
const String& mime_type) {
// This boundary is as good as any other. Plus it gets used in almost
// all the examples in the MHTML spec - RFC 2557.
String boundary = String::FromUTF8("boundary-example");
scoped_refptr<RawData> mhtml_data = RawData::Create();
MHTMLArchive::GenerateMHTMLHeader(boundary, url, title, mime_type,
*mhtml_data->MutableData());
for (const auto& resource : resources) {
MHTMLArchive::GenerateMHTMLPart(boundary, String(), encoding_policy,
resource, *mhtml_data->MutableData());
}
MHTMLArchive::GenerateMHTMLFooterForTesting(boundary,
*mhtml_data->MutableData());
// Validate the generated MHTML.
MHTMLParser parser(
SharedBuffer::Create(mhtml_data->data(), mhtml_data->length()));
EXPECT_FALSE(parser.ParseArchive().IsEmpty())
<< "Generated MHTML is malformed";
return mhtml_data;
}
scoped_refptr<RawData> Serialize(
const KURL& url,
const String& title,
const String& mime,
MHTMLArchive::EncodingPolicy encoding_policy) {
return GenerateMHTMLData(resources_, encoding_policy, url, title, mime);
}
private:
scoped_refptr<SharedBuffer> ReadFile(const char* file_name) {
String file_path = file_path_ + file_name;
return testing::ReadFromFile(file_path);
}
String file_path_;
Vector<SerializedResource> resources_;
FrameTestHelpers::WebViewHelper helper_;
ScopedTestingPlatformSupport<TestingPlatformSupport> platform_;
};
// Checks that the domain is set to the actual MHTML file, not the URL it was
// generated from.
TEST_F(MHTMLTest, CheckDomain) {
const char kFileURL[] = "file:///simple_test.mht";
// Register the mocked frame and load it.
WebURL url = ToKURL(kFileURL);
RegisterMockedURLLoad(kFileURL, "simple_test.mht");
LoadURLInTopFrame(url);
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
EXPECT_STREQ(kFileURL,
frame->DomWindow()->location()->toString().Ascii().data());
const SecurityOrigin* origin = document->GetSecurityOrigin();
EXPECT_STRNE("localhost", origin->Domain().Ascii().data());
}
TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingAllPrintableCharacters) {
const char kURL[] = "http://www.example.com/";
const char kTitle[] = "abc";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
MHTMLArchive::kUseDefaultEncoding);
std::map<std::string, std::string> mhtml_headers = ExtractMHTMLHeaders(data);
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ("abc", mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}
TEST_F(MHTMLTest, TestMHTMLHeadersWithTitleContainingNonPrintableCharacters) {
const char kURL[] = "http://www.example.com/";
const char kTitle[] = "abc \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
MHTMLArchive::kUseDefaultEncoding);
std::map<std::string, std::string> mhtml_headers = ExtractMHTMLHeaders(data);
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ("=?utf-8?Q?abc=20=09=3D=E2=98=9D=F0=9F=8F=BB?=",
mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}
TEST_F(MHTMLTest,
TestMHTMLHeadersWithLongTitleContainingNonPrintableCharacters) {
const char kURL[] = "http://www.example.com/";
const char kTitle[] =
"01234567890123456789012345678901234567890123456789"
"01234567890123456789012345678901234567890123456789"
" \t=\xe2\x98\x9d\xf0\x9f\x8f\xbb";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), String::FromUTF8(kTitle), "text/html",
MHTMLArchive::kUseDefaultEncoding);
std::map<std::string, std::string> mhtml_headers = ExtractMHTMLHeaders(data);
EXPECT_EQ("<Saved by Blink>", mhtml_headers["From"]);
EXPECT_FALSE(mhtml_headers["Date"].empty());
EXPECT_EQ(
"multipart/related;type=\"text/html\";boundary=\"boundary-example\"",
mhtml_headers["Content-Type"]);
EXPECT_EQ(
"=?utf-8?Q?012345678901234567890123456789"
"012345678901234567890123456789012?="
"=?utf-8?Q?345678901234567890123456789"
"0123456789=20=09=3D=E2=98=9D=F0=9F?="
"=?utf-8?Q?=8F=BB?=",
mhtml_headers["Subject"]);
EXPECT_EQ(kURL, mhtml_headers["Snapshot-Content-Location"]);
}
TEST_F(MHTMLTest, TestMHTMLEncoding) {
const char kURL[] = "http://www.example.com";
AddTestResources();
scoped_refptr<RawData> data =
Serialize(ToKURL(kURL), "Test Serialization", "text/html",
MHTMLArchive::kUseDefaultEncoding);
// Read the MHTML data line per line and do some pseudo-parsing to make sure
// the right encoding is used for the different sections.
LineReader line_reader(std::string(data->data(), data->length()));
int section_checked_count = 0;
const char* expected_encoding = nullptr;
std::string line;
while (line_reader.GetNextLine(&line)) {
if (line.compare(0, 13, "Content-Type:") == 0) {
ASSERT_FALSE(expected_encoding);
if (line.find("multipart/related;") != std::string::npos) {
// Skip this one, it's part of the MHTML header.
continue;
}
if (line.find("text/") != std::string::npos)
expected_encoding = "quoted-printable";
else if (line.find("image/") != std::string::npos)
expected_encoding = "base64";
else
FAIL() << "Unexpected Content-Type: " << line;
continue;
}
if (line.compare(0, 26, "Content-Transfer-Encoding:") == 0) {
ASSERT_TRUE(expected_encoding);
EXPECT_NE(line.find(expected_encoding), std::string::npos);
expected_encoding = nullptr;
section_checked_count++;
}
}
EXPECT_EQ(12, section_checked_count);
}
TEST_F(MHTMLTest, MHTMLFromScheme) {
const char kURL[] = "http://www.example.com";
AddTestResources();
scoped_refptr<RawData> raw_data =
Serialize(ToKURL(kURL), "Test Serialization", "text/html",
MHTMLArchive::kUseDefaultEncoding);
scoped_refptr<SharedBuffer> data =
SharedBuffer::Create(raw_data->data(), raw_data->length());
KURL http_url = ToKURL("http://www.example.com");
KURL content_url = ToKURL("content://foo");
KURL file_url = ToKURL("file://foo");
KURL special_scheme_url = ToKURL("fooscheme://bar");
// MHTMLArchives can only be initialized from local schemes, http/https
// schemes, and content scheme(Android specific).
EXPECT_NE(nullptr, MHTMLArchive::Create(http_url, data.get()));
#if defined(OS_ANDROID)
EXPECT_NE(nullptr, MHTMLArchive::Create(content_url, data.get()));
#else
EXPECT_EQ(nullptr, MHTMLArchive::Create(content_url, data.get()));
#endif
EXPECT_NE(nullptr, MHTMLArchive::Create(file_url, data.get()));
EXPECT_EQ(nullptr, MHTMLArchive::Create(special_scheme_url, data.get()));
SchemeRegistry::RegisterURLSchemeAsLocal("fooscheme");
EXPECT_NE(nullptr, MHTMLArchive::Create(special_scheme_url, data.get()));
}
// Checks that full sandboxing protection has been turned on.
TEST_F(MHTMLTest, EnforceSandboxFlags) {
const char kURL[] = "http://www.example.com";
// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "page_with_javascript.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
// Full sandboxing with the exception to new top-level windows should be
// turned on.
EXPECT_EQ(kSandboxAll & ~(kSandboxPopups |
kSandboxPropagatesToAuxiliaryBrowsingContexts),
document->GetSandboxFlags());
// MHTML document should be loaded into unique origin.
EXPECT_TRUE(document->GetSecurityOrigin()->IsUnique());
// Script execution should be disabled.
EXPECT_FALSE(document->CanExecuteScripts(kNotAboutToExecuteScript));
// The element to be created by the script is not there.
EXPECT_FALSE(document->getElementById("mySpan"));
// Make sure the subframe is also sandboxed.
LocalFrame* child_frame =
ToLocalFrame(GetPage()->MainFrame()->Tree().FirstChild());
ASSERT_TRUE(child_frame);
Document* child_document = child_frame->GetDocument();
ASSERT_TRUE(child_document);
EXPECT_EQ(kSandboxAll & ~(kSandboxPopups |
kSandboxPropagatesToAuxiliaryBrowsingContexts),
child_document->GetSandboxFlags());
// MHTML document should be loaded into unique origin.
EXPECT_TRUE(child_document->GetSecurityOrigin()->IsUnique());
// Script execution should be disabled.
EXPECT_FALSE(child_document->CanExecuteScripts(kNotAboutToExecuteScript));
// The element to be created by the script is not there.
EXPECT_FALSE(child_document->getElementById("mySpan"));
}
TEST_F(MHTMLTest, EnforceSandboxFlagsInXSLT) {
const char kURL[] = "http://www.example.com";
// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "xslt.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
// Full sandboxing with the exception to new top-level windows should be
// turned on.
EXPECT_EQ(kSandboxAll & ~(kSandboxPopups |
kSandboxPropagatesToAuxiliaryBrowsingContexts),
document->GetSandboxFlags());
// MHTML document should be loaded into unique origin.
EXPECT_TRUE(document->GetSecurityOrigin()->IsUnique());
// Script execution should be disabled.
EXPECT_FALSE(document->CanExecuteScripts(kNotAboutToExecuteScript));
}
TEST_F(MHTMLTest, ShadowDom) {
const char kURL[] = "http://www.example.com";
// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "shadow.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
EXPECT_TRUE(IsShadowHost(document->getElementById("h1")));
EXPECT_TRUE(IsShadowHost(document->getElementById("h2")));
// The nested shadow DOM tree is created.
EXPECT_TRUE(IsShadowHost(document->getElementById("h2")
->Shadow()
->OldestShadowRoot()
.getElementById("h3")));
EXPECT_TRUE(IsShadowHost(document->getElementById("h4")));
// The static element in the shadow dom template is found.
EXPECT_TRUE(document->getElementById("h4")
->Shadow()
->OldestShadowRoot()
.getElementById("s1"));
// The element to be created by the script in the shadow dom template is
// not found because the script is blocked.
EXPECT_FALSE(document->getElementById("h4")
->Shadow()
->OldestShadowRoot()
.getElementById("s2"));
}
TEST_F(MHTMLTest, FormControlElements) {
const char kURL[] = "http://www.example.com";
// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "form.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
ClassCollection* formControlElements = document->getElementsByClassName("fc");
ASSERT_TRUE(formControlElements);
for (Element* element : *formControlElements)
EXPECT_TRUE(element->IsDisabledFormControl());
EXPECT_FALSE(document->getElementById("h1")->IsDisabledFormControl());
EXPECT_FALSE(document->getElementById("fm")->IsDisabledFormControl());
}
TEST_F(MHTMLTest, LoadMHTMLContainingSoftLineBreaks) {
const char kURL[] = "http://www.example.com";
// Register the mocked frame and load it.
RegisterMockedURLLoad(kURL, "soft_line_break.mht");
LoadURLInTopFrame(ToKURL(kURL));
ASSERT_TRUE(GetPage());
LocalFrame* frame = ToLocalFrame(GetPage()->MainFrame());
ASSERT_TRUE(frame);
// We should not have problem to concatenate header lines separated by soft
// line breaks.
Document* document = frame->GetDocument();
ASSERT_TRUE(document);
// We should not have problem to concatenate body lines separated by soft
// line breaks.
EXPECT_TRUE(document->getElementById(
"AVeryLongID012345678901234567890123456789012345678901234567890End"));
}
} // namespace blink