| <!doctype html> |
| <title>document.characterSet (inputEncoding and charset as aliases) normalization tests</title> |
| <link rel=author title="Aryeh Gregor" href=ayg@aryeh.name> |
| <meta name=timeout content=long> |
| <div id=log></div> |
| <script src=/resources/testharness.js></script> |
| <script src=/resources/testharnessreport.js></script> |
| <style>iframe { display: none }</style> |
| <script> |
| "use strict"; |
| |
| // Taken straight from https://encoding.spec.whatwg.org/ |
| var encodingMap = { |
| "UTF-8": [ |
| "unicode-1-1-utf-8", |
| "utf-8", |
| "utf8", |
| // As we use <meta>, utf-16 will map to utf-8 per |
| // https://html.spec.whatwg.org/multipage/#documentEncoding |
| "utf-16", |
| "utf-16le", |
| "utf-16be", |
| ], |
| "IBM866": [ |
| "866", |
| "cp866", |
| "csibm866", |
| "ibm866", |
| ], |
| "ISO-8859-2": [ |
| "csisolatin2", |
| "iso-8859-2", |
| "iso-ir-101", |
| "iso8859-2", |
| "iso88592", |
| "iso_8859-2", |
| "iso_8859-2:1987", |
| "l2", |
| "latin2", |
| ], |
| "ISO-8859-3": [ |
| "csisolatin3", |
| "iso-8859-3", |
| "iso-ir-109", |
| "iso8859-3", |
| "iso88593", |
| "iso_8859-3", |
| "iso_8859-3:1988", |
| "l3", |
| "latin3", |
| ], |
| "ISO-8859-4": [ |
| "csisolatin4", |
| "iso-8859-4", |
| "iso-ir-110", |
| "iso8859-4", |
| "iso88594", |
| "iso_8859-4", |
| "iso_8859-4:1988", |
| "l4", |
| "latin4", |
| ], |
| "ISO-8859-5": [ |
| "csisolatincyrillic", |
| "cyrillic", |
| "iso-8859-5", |
| "iso-ir-144", |
| "iso8859-5", |
| "iso88595", |
| "iso_8859-5", |
| "iso_8859-5:1988", |
| ], |
| "ISO-8859-6": [ |
| "arabic", |
| "asmo-708", |
| "csiso88596e", |
| "csiso88596i", |
| "csisolatinarabic", |
| "ecma-114", |
| "iso-8859-6", |
| "iso-8859-6-e", |
| "iso-8859-6-i", |
| "iso-ir-127", |
| "iso8859-6", |
| "iso88596", |
| "iso_8859-6", |
| "iso_8859-6:1987", |
| ], |
| "ISO-8859-7": [ |
| "csisolatingreek", |
| "ecma-118", |
| "elot_928", |
| "greek", |
| "greek8", |
| "iso-8859-7", |
| "iso-ir-126", |
| "iso8859-7", |
| "iso88597", |
| "iso_8859-7", |
| "iso_8859-7:1987", |
| "sun_eu_greek", |
| ], |
| "ISO-8859-8": [ |
| "csiso88598e", |
| "csisolatinhebrew", |
| "hebrew", |
| "iso-8859-8", |
| "iso-8859-8-e", |
| "iso-ir-138", |
| "iso8859-8", |
| "iso88598", |
| "iso_8859-8", |
| "iso_8859-8:1988", |
| "visual", |
| ], |
| "ISO-8859-8-I": [ |
| "csiso88598i", |
| "iso-8859-8-i", |
| "logical", |
| ], |
| "ISO-8859-10": [ |
| "csisolatin6", |
| "iso-8859-10", |
| "iso-ir-157", |
| "iso8859-10", |
| "iso885910", |
| "l6", |
| "latin6", |
| ], |
| "ISO-8859-13": [ |
| "iso-8859-13", |
| "iso8859-13", |
| "iso885913", |
| ], |
| "ISO-8859-14": [ |
| "iso-8859-14", |
| "iso8859-14", |
| "iso885914", |
| ], |
| "ISO-8859-15": [ |
| "csisolatin9", |
| "iso-8859-15", |
| "iso8859-15", |
| "iso885915", |
| "iso_8859-15", |
| "l9", |
| ], |
| "ISO-8859-16": [ |
| "iso-8859-16", |
| ], |
| "KOI8-R": [ |
| "cskoi8r", |
| "koi", |
| "koi8", |
| "koi8-r", |
| "koi8_r", |
| ], |
| "KOI8-U": [ |
| "koi8-ru", |
| "koi8-u", |
| ], |
| "macintosh": [ |
| "csmacintosh", |
| "mac", |
| "macintosh", |
| "x-mac-roman", |
| ], |
| "windows-874": [ |
| "dos-874", |
| "iso-8859-11", |
| "iso8859-11", |
| "iso885911", |
| "tis-620", |
| "windows-874", |
| ], |
| "windows-1250": [ |
| "cp1250", |
| "windows-1250", |
| "x-cp1250", |
| ], |
| "windows-1251": [ |
| "cp1251", |
| "windows-1251", |
| "x-cp1251", |
| ], |
| "windows-1252": [ |
| "ansi_x3.4-1968", |
| "ascii", |
| "cp1252", |
| "cp819", |
| "csisolatin1", |
| "ibm819", |
| "iso-8859-1", |
| "iso-ir-100", |
| "iso8859-1", |
| "iso88591", |
| "iso_8859-1", |
| "iso_8859-1:1987", |
| "l1", |
| "latin1", |
| "us-ascii", |
| "windows-1252", |
| "x-cp1252", |
| // As we use <meta>, x-user-defined will map to windows-1252 per |
| // https://html.spec.whatwg.org/multipage/#documentEncoding |
| "x-user-defined" |
| ], |
| "windows-1253": [ |
| "cp1253", |
| "windows-1253", |
| "x-cp1253", |
| ], |
| "windows-1254": [ |
| "cp1254", |
| "csisolatin5", |
| "iso-8859-9", |
| "iso-ir-148", |
| "iso8859-9", |
| "iso88599", |
| "iso_8859-9", |
| "iso_8859-9:1989", |
| "l5", |
| "latin5", |
| "windows-1254", |
| "x-cp1254", |
| ], |
| "windows-1255": [ |
| "cp1255", |
| "windows-1255", |
| "x-cp1255", |
| ], |
| "windows-1256": [ |
| "cp1256", |
| "windows-1256", |
| "x-cp1256", |
| ], |
| "windows-1257": [ |
| "cp1257", |
| "windows-1257", |
| "x-cp1257", |
| ], |
| "windows-1258": [ |
| "cp1258", |
| "windows-1258", |
| "x-cp1258", |
| ], |
| "x-mac-cyrillic": [ |
| "x-mac-cyrillic", |
| "x-mac-ukrainian", |
| ], |
| "GBK": [ |
| "chinese", |
| "csgb2312", |
| "csiso58gb231280", |
| "gb2312", |
| "gb_2312", |
| "gb_2312-80", |
| "gbk", |
| "iso-ir-58", |
| "x-gbk", |
| ], |
| "gb18030": [ |
| "gb18030", |
| ], |
| "Big5": [ |
| "big5", |
| "big5-hkscs", |
| "cn-big5", |
| "csbig5", |
| "x-x-big5", |
| ], |
| "EUC-JP": [ |
| "cseucpkdfmtjapanese", |
| "euc-jp", |
| "x-euc-jp", |
| ], |
| "ISO-2022-JP": [ |
| "csiso2022jp", |
| "iso-2022-jp", |
| ], |
| "Shift_JIS": [ |
| "csshiftjis", |
| "ms932", |
| "ms_kanji", |
| "shift-jis", |
| "shift_jis", |
| "sjis", |
| "windows-31j", |
| "x-sjis", |
| ], |
| "EUC-KR": [ |
| "cseuckr", |
| "csksc56011987", |
| "euc-kr", |
| "iso-ir-149", |
| "korean", |
| "ks_c_5601-1987", |
| "ks_c_5601-1989", |
| "ksc5601", |
| "ksc_5601", |
| "windows-949", |
| ], |
| "replacement": [ |
| "csiso2022kr", |
| "hz-gb-2312", |
| "iso-2022-cn", |
| "iso-2022-cn-ext", |
| "iso-2022-kr", |
| ], |
| }; |
| |
| // Add spaces and mix up case |
| Object.keys(encodingMap).forEach(function(name) { |
| var lower = encodingMap[name]; |
| var upper = encodingMap[name].map(function(s) { return s.toUpperCase() }); |
| var mixed = encodingMap[name].map(function(s) { |
| var ret = ""; |
| for (var i = 0; i < s.length; i += 2) { |
| ret += s[i].toUpperCase(); |
| if (i + 1 < s.length) { |
| ret += s[i + 1]; |
| } |
| } |
| return ret; |
| }); |
| var spacey = encodingMap[name].map(function(s) { |
| return " \t\n\f\r" + s + " \t\n\f\r"; |
| }); |
| encodingMap[name] = []; |
| for (var i = 0; i < lower.length; i++) { |
| encodingMap[name].push(lower[i]); |
| /* |
| if (lower[i] != upper[i]) { |
| encodingMap[name].push(upper[i]); |
| } |
| if (lower[i] != mixed[i] && upper[i] != mixed[i]) { |
| encodingMap[name].push(mixed[i]); |
| } |
| encodingMap[name].push(spacey[i]); |
| */ |
| } |
| }); |
| |
| Object.keys(encodingMap).forEach(function(name) { |
| encodingMap[name].forEach(function(label) { |
| var iframe = document.createElement("iframe"); |
| var t = async_test("Name " + format_value(name) + |
| " has label " + format_value(label) + " (characterSet)"); |
| var t2 = async_test("Name " + format_value(name) + |
| " has label " + format_value(label) + " (inputEncoding)"); |
| var t3 = async_test("Name " + format_value(name) + |
| " has label " + format_value(label) + " (charset)"); |
| iframe.src = "encoding.py?label=" + label; |
| iframe.onload = function() { |
| t.step(function() { |
| assert_equals(iframe.contentDocument.characterSet, name); |
| }); |
| t2.step(function() { |
| assert_equals(iframe.contentDocument.inputEncoding, name); |
| }); |
| t3.step(function() { |
| assert_equals(iframe.contentDocument.charset, name); |
| }); |
| document.body.removeChild(iframe); |
| t.done(); |
| t2.done(); |
| t3.done(); |
| }; |
| document.body.appendChild(iframe); |
| }); |
| }); |
| </script> |
| <!-- vim: set expandtab tabstop=2 shiftwidth=2: --> |