blob: a13f4b889a8636db7c420c113e6177e0092e4a7f [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/path_service.h"
#include "base/stl_util.h"
#include "base/strings/stringprintf.h"
#include "base/threading/thread_restrictions.h"
#include "build/build_config.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/ui/browser.h"
#include "chrome/browser/ui/browser_commands.h"
#include "chrome/browser/ui/tabs/tab_strip_model.h"
#include "chrome/common/chrome_paths.h"
#include "chrome/common/pref_names.h"
#include "chrome/test/base/in_process_browser_test.h"
#include "chrome/test/base/ui_test_utils.h"
#include "components/prefs/pref_service.h"
#include "content/public/browser/download_manager.h"
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/notification_service.h"
#include "content/public/browser/notification_source.h"
#include "content/public/browser/notification_types.h"
#include "content/public/browser/web_contents.h"
#include "content/public/test/download_test_observer.h"
#include "content/public/test/test_navigation_observer.h"
#include "net/test/embedded_test_server/embedded_test_server.h"
namespace {
struct EncodingTestData {
const char* file_name;
const char* encoding_name;
};
const EncodingTestData kEncodingTestDatas[] = {
{ "Big5.html", "Big5" },
{ "EUC-JP.html", "EUC-JP" },
{ "gb18030.html", "gb18030" },
{ "iso-8859-1.html", "windows-1252" },
{ "ISO-8859-2.html", "ISO-8859-2" },
{ "ISO-8859-4.html", "ISO-8859-4" },
{ "ISO-8859-5.html", "ISO-8859-5" },
{ "ISO-8859-6.html", "ISO-8859-6" },
{ "ISO-8859-7.html", "ISO-8859-7" },
{ "ISO-8859-8.html", "ISO-8859-8" },
{ "ISO-8859-13.html", "ISO-8859-13" },
{ "ISO-8859-15.html", "ISO-8859-15" },
{ "KOI8-R.html", "KOI8-R" },
{ "KOI8-U.html", "KOI8-U" },
{ "macintosh.html", "macintosh" },
{ "Shift-JIS.html", "Shift_JIS" },
{ "US-ASCII.html", "windows-1252" }, // http://crbug.com/15801
{ "UTF-8.html", "UTF-8" },
{ "UTF-16LE.html", "UTF-16LE" },
{ "windows-874.html", "windows-874" },
{ "EUC-KR.html", "EUC-KR" },
{ "windows-1250.html", "windows-1250" },
{ "windows-1251.html", "windows-1251" },
{ "windows-1252.html", "windows-1252" },
{ "windows-1253.html", "windows-1253" },
{ "windows-1254.html", "windows-1254" },
{ "windows-1255.html", "windows-1255" },
{ "windows-1256.html", "windows-1256" },
{ "windows-1257.html", "windows-1257" },
{ "windows-1258.html", "windows-1258" }
};
} // namespace
static const base::FilePath::CharType* kTestDir =
FILE_PATH_LITERAL("encoding_tests");
class BrowserEncodingTest
: public InProcessBrowserTest,
public testing::WithParamInterface<EncodingTestData> {
protected:
BrowserEncodingTest() {}
// Saves the current page and verifies that the output matches the expected
// result.
void SaveAndCompare(const char* filename_to_write,
const base::FilePath& expected,
const GURL& url) {
// Dump the page, the content of dump page should be identical to the
// expected result file.
base::FilePath full_file_name = save_dir_.AppendASCII(filename_to_write);
// We save the page as way of complete HTML file, which requires a directory
// name to save sub resources in it. Although this test file does not have
// sub resources, but the directory name is still required.
scoped_refptr<content::MessageLoopRunner> loop_runner(
new content::MessageLoopRunner);
content::SavePackageFinishedObserver observer(
content::BrowserContext::GetDownloadManager(browser()->profile()),
loop_runner->QuitClosure());
browser()->tab_strip_model()->GetActiveWebContents()->SavePage(
full_file_name, temp_sub_resource_dir_,
content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
loop_runner->Run();
base::FilePath expected_file_name = ui_test_utils::GetTestFilePath(
base::FilePath(kTestDir), expected);
std::string actual_contents;
std::string expected_contents;
{
base::ScopedAllowBlockingForTesting allow_blocking;
ASSERT_TRUE(base::ReadFileToString(full_file_name, &actual_contents));
ASSERT_TRUE(
base::ReadFileToString(expected_file_name, &expected_contents));
}
// Add "Mark of the Web" path with source URL.
expected_contents = base::StringPrintf(
expected_contents.c_str(), url.spec().length(), url.spec().c_str());
EXPECT_EQ(expected_contents, actual_contents);
}
void SetUpOnMainThread() override {
base::FilePath test_data_dir;
ASSERT_TRUE(base::PathService::Get(chrome::DIR_TEST_DATA, &test_data_dir));
embedded_test_server()->ServeFilesFromDirectory(test_data_dir);
ASSERT_TRUE(embedded_test_server()->Start());
ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
save_dir_ = temp_dir_.GetPath();
temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
}
base::ScopedTempDir temp_dir_;
base::FilePath save_dir_;
base::FilePath temp_sub_resource_dir_;
};
// TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
// http://crbug.com/13306.
// 2. Add more files with multiple encoding name variants for each canonical
// encoding name). Webkit layout tests cover some, but testing in the UI test is
// also necessary.
IN_PROC_BROWSER_TEST_P(BrowserEncodingTest, TestEncodingAliasMapping) {
const char* const kAliasTestDir = "alias_mapping";
base::FilePath test_dir_path = base::FilePath(kTestDir).AppendASCII(
kAliasTestDir);
base::FilePath test_file_path(test_dir_path);
test_file_path = test_file_path.AppendASCII(
GetParam().file_name);
GURL url =
embedded_test_server()->GetURL("/" + test_file_path.MaybeAsASCII());
ui_test_utils::NavigateToURL(browser(), url);
EXPECT_EQ(GetParam().encoding_name,
browser()->tab_strip_model()->GetActiveWebContents()->
GetEncoding());
}
INSTANTIATE_TEST_SUITE_P(EncodingAliases,
BrowserEncodingTest,
testing::ValuesIn(kEncodingTestDatas));
// The following encodings are excluded from the auto-detection test because
// it's a known issue that the current encoding detector does not detect them:
// ISO-8859-4
// ISO-8859-13
// KOI8-U
// macintosh
// windows-874
// windows-1252
// windows-1253
// windows-1257
// windows-1258
IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, TestEncodingAutoDetect) {
struct EncodingAutoDetectTestData {
const char* test_file_name; // File name of test data.
const char* expected_result; // File name of expected results.
const char* expected_encoding; // expected encoding.
};
const EncodingAutoDetectTestData kTestDatas[] = {
{ "Big5_with_no_encoding_specified.html",
"expected_Big5_saved_from_no_encoding_specified.html",
"Big5" },
{ "GBK_with_no_encoding_specified.html",
"expected_GBK_saved_from_no_encoding_specified.html",
"GBK" },
{ "iso-8859-1_with_no_encoding_specified.html",
"expected_iso-8859-1_saved_from_no_encoding_specified.html",
"windows-1252" },
{ "ISO-8859-5_with_no_encoding_specified.html",
"expected_ISO-8859-5_saved_from_no_encoding_specified.html",
"ISO-8859-5" },
{ "ISO-8859-6_with_no_encoding_specified.html",
"expected_ISO-8859-6_saved_from_no_encoding_specified.html",
"ISO-8859-6" },
{ "ISO-8859-7_with_no_encoding_specified.html",
"expected_ISO-8859-7_saved_from_no_encoding_specified.html",
"ISO-8859-7" },
{ "ISO-8859-8-I_with_no_encoding_specified.html",
"expected_ISO-8859-8-I_saved_from_no_encoding_specified.html",
"windows-1255" },
{ "KOI8-R_with_no_encoding_specified.html",
"expected_KOI8-R_saved_from_no_encoding_specified.html",
"KOI8-R" },
{ "Shift-JIS_with_no_encoding_specified.html",
"expected_Shift-JIS_saved_from_no_encoding_specified.html",
"Shift_JIS" },
{ "EUC-KR_with_no_encoding_specified.html",
"expected_EUC-KR_saved_from_no_encoding_specified.html",
"EUC-KR" },
{ "windows-1251_with_no_encoding_specified.html",
"expected_windows-1251_saved_from_no_encoding_specified.html",
"windows-1251" },
{ "windows-1254_with_no_encoding_specified.html",
"expected_windows-1254_saved_from_no_encoding_specified.html",
"windows-1254" },
{ "windows-1255_with_no_encoding_specified.html",
"expected_windows-1255_saved_from_no_encoding_specified.html",
"windows-1255" },
{ "windows-1256_with_no_encoding_specified.html",
"expected_windows-1256_saved_from_no_encoding_specified.html",
"windows-1256" }
};
const char* const kAutoDetectDir = "auto_detect";
// Directory of the files of expected results.
const char* const kExpectedResultDir = "expected_results";
base::FilePath test_dir_path =
base::FilePath(kTestDir).AppendASCII(kAutoDetectDir);
// Set the default charset to one of encodings not supported by the current
// auto-detector (Please refer to the above comments) to make sure we
// incorrectly decode the page. Now we use ISO-8859-4.
browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset,
"ISO-8859-4");
content::WebContents* web_contents =
browser()->tab_strip_model()->GetActiveWebContents();
for (size_t i = 0; i < base::size(kTestDatas); ++i) {
base::FilePath test_file_path(test_dir_path);
test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
GURL url =
embedded_test_server()->GetURL("/" + test_file_path.MaybeAsASCII());
ui_test_utils::NavigateToURL(browser(), url);
// Get the encoding of page. It should return the real encoding now.
EXPECT_EQ(kTestDatas[i].expected_encoding, web_contents->GetEncoding());
// Dump the page, the content of dump page should be equal with our expect
// result file.
base::FilePath expected_result_file_name =
base::FilePath().AppendASCII(kAutoDetectDir).
AppendASCII(kExpectedResultDir).
AppendASCII(kTestDatas[i].expected_result);
SaveAndCompare(kTestDatas[i].test_file_name, expected_result_file_name,
url);
}
}