blob: 746792fb78b34fbce500f2c8d30105c51ba2a57d [file] [log] [blame]
<!doctype html>
<title>document.characterSet (inputEncoding and charset as aliases) normalization tests</title>
<link rel=author title="Aryeh Gregor" href=ayg@aryeh.name>
<meta name=timeout content=long>
<div id=log></div>
<script src=/resources/testharness.js></script>
<script src=/resources/testharnessreport.js></script>
<style>iframe { display: none }</style>
<script>
"use strict";
// Taken straight from https://encoding.spec.whatwg.org/
var encodingMap = {
"UTF-8": [
"unicode-1-1-utf-8",
"utf-8",
"utf8",
// As we use <meta>, utf-16 will map to utf-8 per
// https://html.spec.whatwg.org/multipage/#documentEncoding
"utf-16",
"utf-16le",
"utf-16be",
],
"IBM866": [
"866",
"cp866",
"csibm866",
"ibm866",
],
"ISO-8859-2": [
"csisolatin2",
"iso-8859-2",
"iso-ir-101",
"iso8859-2",
"iso88592",
"iso_8859-2",
"iso_8859-2:1987",
"l2",
"latin2",
],
"ISO-8859-3": [
"csisolatin3",
"iso-8859-3",
"iso-ir-109",
"iso8859-3",
"iso88593",
"iso_8859-3",
"iso_8859-3:1988",
"l3",
"latin3",
],
"ISO-8859-4": [
"csisolatin4",
"iso-8859-4",
"iso-ir-110",
"iso8859-4",
"iso88594",
"iso_8859-4",
"iso_8859-4:1988",
"l4",
"latin4",
],
"ISO-8859-5": [
"csisolatincyrillic",
"cyrillic",
"iso-8859-5",
"iso-ir-144",
"iso8859-5",
"iso88595",
"iso_8859-5",
"iso_8859-5:1988",
],
"ISO-8859-6": [
"arabic",
"asmo-708",
"csiso88596e",
"csiso88596i",
"csisolatinarabic",
"ecma-114",
"iso-8859-6",
"iso-8859-6-e",
"iso-8859-6-i",
"iso-ir-127",
"iso8859-6",
"iso88596",
"iso_8859-6",
"iso_8859-6:1987",
],
"ISO-8859-7": [
"csisolatingreek",
"ecma-118",
"elot_928",
"greek",
"greek8",
"iso-8859-7",
"iso-ir-126",
"iso8859-7",
"iso88597",
"iso_8859-7",
"iso_8859-7:1987",
"sun_eu_greek",
],
"ISO-8859-8": [
"csiso88598e",
"csisolatinhebrew",
"hebrew",
"iso-8859-8",
"iso-8859-8-e",
"iso-ir-138",
"iso8859-8",
"iso88598",
"iso_8859-8",
"iso_8859-8:1988",
"visual",
],
"ISO-8859-8-I": [
"csiso88598i",
"iso-8859-8-i",
"logical",
],
"ISO-8859-10": [
"csisolatin6",
"iso-8859-10",
"iso-ir-157",
"iso8859-10",
"iso885910",
"l6",
"latin6",
],
"ISO-8859-13": [
"iso-8859-13",
"iso8859-13",
"iso885913",
],
"ISO-8859-14": [
"iso-8859-14",
"iso8859-14",
"iso885914",
],
"ISO-8859-15": [
"csisolatin9",
"iso-8859-15",
"iso8859-15",
"iso885915",
"iso_8859-15",
"l9",
],
"ISO-8859-16": [
"iso-8859-16",
],
"KOI8-R": [
"cskoi8r",
"koi",
"koi8",
"koi8-r",
"koi8_r",
],
"KOI8-U": [
"koi8-ru",
"koi8-u",
],
"macintosh": [
"csmacintosh",
"mac",
"macintosh",
"x-mac-roman",
],
"windows-874": [
"dos-874",
"iso-8859-11",
"iso8859-11",
"iso885911",
"tis-620",
"windows-874",
],
"windows-1250": [
"cp1250",
"windows-1250",
"x-cp1250",
],
"windows-1251": [
"cp1251",
"windows-1251",
"x-cp1251",
],
"windows-1252": [
"ansi_x3.4-1968",
"ascii",
"cp1252",
"cp819",
"csisolatin1",
"ibm819",
"iso-8859-1",
"iso-ir-100",
"iso8859-1",
"iso88591",
"iso_8859-1",
"iso_8859-1:1987",
"l1",
"latin1",
"us-ascii",
"windows-1252",
"x-cp1252",
// As we use <meta>, x-user-defined will map to windows-1252 per
// https://html.spec.whatwg.org/multipage/#documentEncoding
"x-user-defined"
],
"windows-1253": [
"cp1253",
"windows-1253",
"x-cp1253",
],
"windows-1254": [
"cp1254",
"csisolatin5",
"iso-8859-9",
"iso-ir-148",
"iso8859-9",
"iso88599",
"iso_8859-9",
"iso_8859-9:1989",
"l5",
"latin5",
"windows-1254",
"x-cp1254",
],
"windows-1255": [
"cp1255",
"windows-1255",
"x-cp1255",
],
"windows-1256": [
"cp1256",
"windows-1256",
"x-cp1256",
],
"windows-1257": [
"cp1257",
"windows-1257",
"x-cp1257",
],
"windows-1258": [
"cp1258",
"windows-1258",
"x-cp1258",
],
"x-mac-cyrillic": [
"x-mac-cyrillic",
"x-mac-ukrainian",
],
"GBK": [
"chinese",
"csgb2312",
"csiso58gb231280",
"gb2312",
"gb_2312",
"gb_2312-80",
"gbk",
"iso-ir-58",
"x-gbk",
],
"gb18030": [
"gb18030",
],
"Big5": [
"big5",
"big5-hkscs",
"cn-big5",
"csbig5",
"x-x-big5",
],
"EUC-JP": [
"cseucpkdfmtjapanese",
"euc-jp",
"x-euc-jp",
],
"ISO-2022-JP": [
"csiso2022jp",
"iso-2022-jp",
],
"Shift_JIS": [
"csshiftjis",
"ms932",
"ms_kanji",
"shift-jis",
"shift_jis",
"sjis",
"windows-31j",
"x-sjis",
],
"EUC-KR": [
"cseuckr",
"csksc56011987",
"euc-kr",
"iso-ir-149",
"korean",
"ks_c_5601-1987",
"ks_c_5601-1989",
"ksc5601",
"ksc_5601",
"windows-949",
],
"replacement": [
"csiso2022kr",
"hz-gb-2312",
"iso-2022-cn",
"iso-2022-cn-ext",
"iso-2022-kr",
],
};
// Add spaces and mix up case
Object.keys(encodingMap).forEach(function(name) {
var lower = encodingMap[name];
var upper = encodingMap[name].map(function(s) { return s.toUpperCase() });
var mixed = encodingMap[name].map(function(s) {
var ret = "";
for (var i = 0; i < s.length; i += 2) {
ret += s[i].toUpperCase();
if (i + 1 < s.length) {
ret += s[i + 1];
}
}
return ret;
});
var spacey = encodingMap[name].map(function(s) {
return " \t\n\f\r" + s + " \t\n\f\r";
});
encodingMap[name] = [];
for (var i = 0; i < lower.length; i++) {
encodingMap[name].push(lower[i]);
/*
if (lower[i] != upper[i]) {
encodingMap[name].push(upper[i]);
}
if (lower[i] != mixed[i] && upper[i] != mixed[i]) {
encodingMap[name].push(mixed[i]);
}
encodingMap[name].push(spacey[i]);
*/
}
});
Object.keys(encodingMap).forEach(function(name) {
encodingMap[name].forEach(function(label) {
var iframe = document.createElement("iframe");
var t = async_test("Name " + format_value(name) +
" has label " + format_value(label) + " (characterSet)");
var t2 = async_test("Name " + format_value(name) +
" has label " + format_value(label) + " (inputEncoding)");
var t3 = async_test("Name " + format_value(name) +
" has label " + format_value(label) + " (charset)");
iframe.src = "encoding.py?label=" + label;
iframe.onload = function() {
t.step(function() {
assert_equals(iframe.contentDocument.characterSet, name);
});
t2.step(function() {
assert_equals(iframe.contentDocument.inputEncoding, name);
});
t3.step(function() {
assert_equals(iframe.contentDocument.charset, name);
});
document.body.removeChild(iframe);
t.done();
t2.done();
t3.done();
};
document.body.appendChild(iframe);
});
});
</script>
<!-- vim: set expandtab tabstop=2 shiftwidth=2: -->