blob: 0b261bcd108b7662be0c1c44ad5896f06e6253c0 [file] [log] [blame]
// Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
// Use of lib.wc source code is governed by a BSD-style license that can be
// found in the LICENSE file.
'use strict';
/**
* @fileoverview
* This JavaScript library is ported from the wcwidth.js module of node.js.
* The original implementation can be found at:
* https://npmjs.org/package/wcwidth.js
*/
/**
* JavaScript porting of Markus Kuhn's wcwidth() implementation
*
* The following explanation comes from the original C implementation:
*
* This is an implementation of wcwidth() and wcswidth() (defined in
* IEEE Std 1002.1-2001) for Unicode.
*
* https://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
* https://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
*
* In fixed-width output devices, Latin characters all occupy a single
* "cell" position of equal width, whereas ideographic CJK characters
* occupy two such cells. Interoperability between terminal-line
* applications and (teletype-style) character terminals using the
* UTF-8 encoding requires agreement on which character should advance
* the cursor by how many cell positions. No established formal
* standards exist at present on which Unicode character shall occupy
* how many cell positions on character terminals. These routines are
* a first attempt of defining such behavior based on simple rules
* applied to data provided by the Unicode Consortium.
*
* For some graphical characters, the Unicode standard explicitly
* defines a character-cell width via the definition of the East Asian
* FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
* In all these cases, there is no ambiguity about which width a
* terminal shall use. For characters in the East Asian Ambiguous (A)
* class, the width choice depends purely on a preference of backward
* compatibility with either historic CJK or Western practice.
* Choosing single-width for these characters is easy to justify as
* the appropriate long-term solution, as the CJK practice of
* displaying these characters as double-width comes from historic
* implementation simplicity (8-bit encoded characters were displayed
* single-width and 16-bit ones double-width, even for Greek,
* Cyrillic, etc.) and not any typographic considerations.
*
* Much less clear is the choice of width for the Not East Asian
* (Neutral) class. Existing practice does not dictate a width for any
* of these characters. It would nevertheless make sense
* typographically to allocate two character cells to characters such
* as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
* represented adequately with a single-width glyph. The following
* routines at present merely assign a single-cell width to all
* neutral characters, in the interest of simplicity. This is not
* entirely satisfactory and should be reconsidered before
* establishing a formal standard in lib.wc area. At the moment, the
* decision which Not East Asian (Neutral) characters should be
* represented by double-width glyphs cannot yet be answered by
* applying a simple rule from the Unicode database content. Setting
* up a proper standard for the behavior of UTF-8 character terminals
* will require a careful analysis not only of each Unicode character,
* but also of each presentation form, something the author of these
* routines has avoided to do so far.
*
* https://www.unicode.org/unicode/reports/tr11/
*
* Markus Kuhn -- 2007-05-26 (Unicode 5.0)
*
* Permission to use, copy, modify, and distribute lib.wc software
* for any purpose and without fee is hereby granted. The author
* disclaims all warranties with regard to lib.wc software.
*
* Latest version: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
*/
/**
* The following function defines the column width of an ISO 10646 character
* as follows:
*
* - The null character (U+0000) has a column width of 0.
* - Other C0/C1 control characters and DEL will lead to a return value of -1.
* - Non-spacing and enclosing combining characters (general category code Mn
* or Me in the Unicode database) have a column width of 0.
* - SOFT HYPHEN (U+00AD) has a column width of 1.
* - Other format characters (general category code Cf in the Unicode database)
* and ZERO WIDTH SPACE (U+200B) have a column width of 0.
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) have a
* column width of 0.
* - Spacing characters in the East Asian Wide (W) or East Asian Full-width (F)
* category as defined in Unicode Technical Report #11 have a column width of
* 2.
* - East Asian Ambiguous characters are taken into account if
* regardCjkAmbiguous flag is enabled. They have a column width of 2.
* - All remaining characters (including all printable ISO 8859-1 and WGL4
* characters, Unicode control characters, etc.) have a column width of 1.
*
* This implementation assumes that characters are encoded in ISO 10646.
*/
lib.wc = {};
// Width of a nul character.
lib.wc.nulWidth = 0;
// Width of a control character.
lib.wc.controlWidth = 0;
// Flag whether to consider East Asian Ambiguous characters.
lib.wc.regardCjkAmbiguous = false;
// Width of an East Asian Ambiguous character.
lib.wc.cjkAmbiguousWidth = 2;
// Sorted list of non-overlapping intervals of non-spacing characters
// generated by the `./ranges.py` helper.
lib.wc.combining = [
[0x00ad, 0x00ad], [0x0300, 0x036f], [0x0483, 0x0489],
[0x0591, 0x05bd], [0x05bf, 0x05bf], [0x05c1, 0x05c2],
[0x05c4, 0x05c5], [0x05c7, 0x05c7], [0x0610, 0x061a],
[0x061c, 0x061c], [0x064b, 0x065f], [0x0670, 0x0670],
[0x06d6, 0x06dc], [0x06df, 0x06e4], [0x06e7, 0x06e8],
[0x06ea, 0x06ed], [0x0711, 0x0711], [0x0730, 0x074a],
[0x07a6, 0x07b0], [0x07eb, 0x07f3], [0x07fd, 0x07fd],
[0x0816, 0x0819], [0x081b, 0x0823], [0x0825, 0x0827],
[0x0829, 0x082d], [0x0859, 0x085b], [0x08d3, 0x08e1],
[0x08e3, 0x0902], [0x093a, 0x093a], [0x093c, 0x093c],
[0x0941, 0x0948], [0x094d, 0x094d], [0x0951, 0x0957],
[0x0962, 0x0963], [0x0981, 0x0981], [0x09bc, 0x09bc],
[0x09c1, 0x09c4], [0x09cd, 0x09cd], [0x09e2, 0x09e3],
[0x09fe, 0x09fe], [0x0a01, 0x0a02], [0x0a3c, 0x0a3c],
[0x0a41, 0x0a42], [0x0a47, 0x0a48], [0x0a4b, 0x0a4d],
[0x0a51, 0x0a51], [0x0a70, 0x0a71], [0x0a75, 0x0a75],
[0x0a81, 0x0a82], [0x0abc, 0x0abc], [0x0ac1, 0x0ac5],
[0x0ac7, 0x0ac8], [0x0acd, 0x0acd], [0x0ae2, 0x0ae3],
[0x0afa, 0x0aff], [0x0b01, 0x0b01], [0x0b3c, 0x0b3c],
[0x0b3f, 0x0b3f], [0x0b41, 0x0b44], [0x0b4d, 0x0b4d],
[0x0b55, 0x0b56], [0x0b62, 0x0b63], [0x0b82, 0x0b82],
[0x0bc0, 0x0bc0], [0x0bcd, 0x0bcd], [0x0c00, 0x0c00],
[0x0c04, 0x0c04], [0x0c3e, 0x0c40], [0x0c46, 0x0c48],
[0x0c4a, 0x0c4d], [0x0c55, 0x0c56], [0x0c62, 0x0c63],
[0x0c81, 0x0c81], [0x0cbc, 0x0cbc], [0x0cbf, 0x0cbf],
[0x0cc6, 0x0cc6], [0x0ccc, 0x0ccd], [0x0ce2, 0x0ce3],
[0x0d00, 0x0d01], [0x0d3b, 0x0d3c], [0x0d41, 0x0d44],
[0x0d4d, 0x0d4d], [0x0d62, 0x0d63], [0x0d81, 0x0d81],
[0x0dca, 0x0dca], [0x0dd2, 0x0dd4], [0x0dd6, 0x0dd6],
[0x0e31, 0x0e31], [0x0e34, 0x0e3a], [0x0e47, 0x0e4e],
[0x0eb1, 0x0eb1], [0x0eb4, 0x0ebc], [0x0ec8, 0x0ecd],
[0x0f18, 0x0f19], [0x0f35, 0x0f35], [0x0f37, 0x0f37],
[0x0f39, 0x0f39], [0x0f71, 0x0f7e], [0x0f80, 0x0f84],
[0x0f86, 0x0f87], [0x0f8d, 0x0f97], [0x0f99, 0x0fbc],
[0x0fc6, 0x0fc6], [0x102d, 0x1030], [0x1032, 0x1037],
[0x1039, 0x103a], [0x103d, 0x103e], [0x1058, 0x1059],
[0x105e, 0x1060], [0x1071, 0x1074], [0x1082, 0x1082],
[0x1085, 0x1086], [0x108d, 0x108d], [0x109d, 0x109d],
[0x1160, 0x11ff], [0x135d, 0x135f], [0x1712, 0x1714],
[0x1732, 0x1734], [0x1752, 0x1753], [0x1772, 0x1773],
[0x17b4, 0x17b5], [0x17b7, 0x17bd], [0x17c6, 0x17c6],
[0x17c9, 0x17d3], [0x17dd, 0x17dd], [0x180b, 0x180e],
[0x1885, 0x1886], [0x18a9, 0x18a9], [0x1920, 0x1922],
[0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193b],
[0x1a17, 0x1a18], [0x1a1b, 0x1a1b], [0x1a56, 0x1a56],
[0x1a58, 0x1a5e], [0x1a60, 0x1a60], [0x1a62, 0x1a62],
[0x1a65, 0x1a6c], [0x1a73, 0x1a7c], [0x1a7f, 0x1a7f],
[0x1ab0, 0x1ac0], [0x1b00, 0x1b03], [0x1b34, 0x1b34],
[0x1b36, 0x1b3a], [0x1b3c, 0x1b3c], [0x1b42, 0x1b42],
[0x1b6b, 0x1b73], [0x1b80, 0x1b81], [0x1ba2, 0x1ba5],
[0x1ba8, 0x1ba9], [0x1bab, 0x1bad], [0x1be6, 0x1be6],
[0x1be8, 0x1be9], [0x1bed, 0x1bed], [0x1bef, 0x1bf1],
[0x1c2c, 0x1c33], [0x1c36, 0x1c37], [0x1cd0, 0x1cd2],
[0x1cd4, 0x1ce0], [0x1ce2, 0x1ce8], [0x1ced, 0x1ced],
[0x1cf4, 0x1cf4], [0x1cf8, 0x1cf9], [0x1dc0, 0x1df9],
[0x1dfb, 0x1dff], [0x200b, 0x200f], [0x202a, 0x202e],
[0x2060, 0x2064], [0x2066, 0x206f], [0x20d0, 0x20f0],
[0x2cef, 0x2cf1], [0x2d7f, 0x2d7f], [0x2de0, 0x2dff],
[0x302a, 0x302d], [0x3099, 0x309a], [0xa66f, 0xa672],
[0xa674, 0xa67d], [0xa69e, 0xa69f], [0xa6f0, 0xa6f1],
[0xa802, 0xa802], [0xa806, 0xa806], [0xa80b, 0xa80b],
[0xa825, 0xa826], [0xa82c, 0xa82c], [0xa8c4, 0xa8c5],
[0xa8e0, 0xa8f1], [0xa8ff, 0xa8ff], [0xa926, 0xa92d],
[0xa947, 0xa951], [0xa980, 0xa982], [0xa9b3, 0xa9b3],
[0xa9b6, 0xa9b9], [0xa9bc, 0xa9bd], [0xa9e5, 0xa9e5],
[0xaa29, 0xaa2e], [0xaa31, 0xaa32], [0xaa35, 0xaa36],
[0xaa43, 0xaa43], [0xaa4c, 0xaa4c], [0xaa7c, 0xaa7c],
[0xaab0, 0xaab0], [0xaab2, 0xaab4], [0xaab7, 0xaab8],
[0xaabe, 0xaabf], [0xaac1, 0xaac1], [0xaaec, 0xaaed],
[0xaaf6, 0xaaf6], [0xabe5, 0xabe5], [0xabe8, 0xabe8],
[0xabed, 0xabed], [0xfb1e, 0xfb1e], [0xfe00, 0xfe0f],
[0xfe20, 0xfe2f], [0xfeff, 0xfeff], [0xfff9, 0xfffb],
[0x101fd, 0x101fd], [0x102e0, 0x102e0], [0x10376, 0x1037a],
[0x10a01, 0x10a03], [0x10a05, 0x10a06], [0x10a0c, 0x10a0f],
[0x10a38, 0x10a3a], [0x10a3f, 0x10a3f], [0x10ae5, 0x10ae6],
[0x10d24, 0x10d27], [0x10eab, 0x10eac], [0x10f46, 0x10f50],
[0x11001, 0x11001], [0x11038, 0x11046], [0x1107f, 0x11081],
[0x110b3, 0x110b6], [0x110b9, 0x110ba], [0x11100, 0x11102],
[0x11127, 0x1112b], [0x1112d, 0x11134], [0x11173, 0x11173],
[0x11180, 0x11181], [0x111b6, 0x111be], [0x111c9, 0x111cc],
[0x111cf, 0x111cf], [0x1122f, 0x11231], [0x11234, 0x11234],
[0x11236, 0x11237], [0x1123e, 0x1123e], [0x112df, 0x112df],
[0x112e3, 0x112ea], [0x11300, 0x11301], [0x1133b, 0x1133c],
[0x11340, 0x11340], [0x11366, 0x1136c], [0x11370, 0x11374],
[0x11438, 0x1143f], [0x11442, 0x11444], [0x11446, 0x11446],
[0x1145e, 0x1145e], [0x114b3, 0x114b8], [0x114ba, 0x114ba],
[0x114bf, 0x114c0], [0x114c2, 0x114c3], [0x115b2, 0x115b5],
[0x115bc, 0x115bd], [0x115bf, 0x115c0], [0x115dc, 0x115dd],
[0x11633, 0x1163a], [0x1163d, 0x1163d], [0x1163f, 0x11640],
[0x116ab, 0x116ab], [0x116ad, 0x116ad], [0x116b0, 0x116b5],
[0x116b7, 0x116b7], [0x1171d, 0x1171f], [0x11722, 0x11725],
[0x11727, 0x1172b], [0x1182f, 0x11837], [0x11839, 0x1183a],
[0x1193b, 0x1193c], [0x1193e, 0x1193e], [0x11943, 0x11943],
[0x119d4, 0x119d7], [0x119da, 0x119db], [0x119e0, 0x119e0],
[0x11a01, 0x11a0a], [0x11a33, 0x11a38], [0x11a3b, 0x11a3e],
[0x11a47, 0x11a47], [0x11a51, 0x11a56], [0x11a59, 0x11a5b],
[0x11a8a, 0x11a96], [0x11a98, 0x11a99], [0x11c30, 0x11c36],
[0x11c38, 0x11c3d], [0x11c3f, 0x11c3f], [0x11c92, 0x11ca7],
[0x11caa, 0x11cb0], [0x11cb2, 0x11cb3], [0x11cb5, 0x11cb6],
[0x11d31, 0x11d36], [0x11d3a, 0x11d3a], [0x11d3c, 0x11d3d],
[0x11d3f, 0x11d45], [0x11d47, 0x11d47], [0x11d90, 0x11d91],
[0x11d95, 0x11d95], [0x11d97, 0x11d97], [0x11ef3, 0x11ef4],
[0x13430, 0x13438], [0x16af0, 0x16af4], [0x16b30, 0x16b36],
[0x16f4f, 0x16f4f], [0x16f8f, 0x16f92], [0x16fe4, 0x16fe4],
[0x1bc9d, 0x1bc9e], [0x1bca0, 0x1bca3], [0x1d167, 0x1d169],
[0x1d173, 0x1d182], [0x1d185, 0x1d18b], [0x1d1aa, 0x1d1ad],
[0x1d242, 0x1d244], [0x1da00, 0x1da36], [0x1da3b, 0x1da6c],
[0x1da75, 0x1da75], [0x1da84, 0x1da84], [0x1da9b, 0x1da9f],
[0x1daa1, 0x1daaf], [0x1e000, 0x1e006], [0x1e008, 0x1e018],
[0x1e01b, 0x1e021], [0x1e023, 0x1e024], [0x1e026, 0x1e02a],
[0x1e130, 0x1e136], [0x1e2ec, 0x1e2ef], [0x1e8d0, 0x1e8d6],
[0x1e944, 0x1e94a], [0xe0001, 0xe0001], [0xe0020, 0xe007f],
[0xe0100, 0xe01ef],
];
// Sorted list of non-overlapping intervals of East Asian Ambiguous characters
// generated by the `./ranges.py` helper.
lib.wc.ambiguous = [
[0x00a1, 0x00a1], [0x00a4, 0x00a4], [0x00a7, 0x00a8],
[0x00aa, 0x00aa], [0x00ad, 0x00ae], [0x00b0, 0x00b4],
[0x00b6, 0x00ba], [0x00bc, 0x00bf], [0x00c6, 0x00c6],
[0x00d0, 0x00d0], [0x00d7, 0x00d8], [0x00de, 0x00e1],
[0x00e6, 0x00e6], [0x00e8, 0x00ea], [0x00ec, 0x00ed],
[0x00f0, 0x00f0], [0x00f2, 0x00f3], [0x00f7, 0x00fa],
[0x00fc, 0x00fc], [0x00fe, 0x00fe], [0x0101, 0x0101],
[0x0111, 0x0111], [0x0113, 0x0113], [0x011b, 0x011b],
[0x0126, 0x0127], [0x012b, 0x012b], [0x0131, 0x0133],
[0x0138, 0x0138], [0x013f, 0x0142], [0x0144, 0x0144],
[0x0148, 0x014b], [0x014d, 0x014d], [0x0152, 0x0153],
[0x0166, 0x0167], [0x016b, 0x016b], [0x01ce, 0x01ce],
[0x01d0, 0x01d0], [0x01d2, 0x01d2], [0x01d4, 0x01d4],
[0x01d6, 0x01d6], [0x01d8, 0x01d8], [0x01da, 0x01da],
[0x01dc, 0x01dc], [0x0251, 0x0251], [0x0261, 0x0261],
[0x02c4, 0x02c4], [0x02c7, 0x02c7], [0x02c9, 0x02cb],
[0x02cd, 0x02cd], [0x02d0, 0x02d0], [0x02d8, 0x02db],
[0x02dd, 0x02dd], [0x02df, 0x02df], [0x0300, 0x036f],
[0x0391, 0x03a1], [0x03a3, 0x03a9], [0x03b1, 0x03c1],
[0x03c3, 0x03c9], [0x0401, 0x0401], [0x0410, 0x044f],
[0x0451, 0x0451], [0x1100, 0x115f], [0x2010, 0x2010],
[0x2013, 0x2016], [0x2018, 0x2019], [0x201c, 0x201d],
[0x2020, 0x2022], [0x2024, 0x2027], [0x2030, 0x2030],
[0x2032, 0x2033], [0x2035, 0x2035], [0x203b, 0x203b],
[0x203e, 0x203e], [0x2074, 0x2074], [0x207f, 0x207f],
[0x2081, 0x2084], [0x20ac, 0x20ac], [0x2103, 0x2103],
[0x2105, 0x2105], [0x2109, 0x2109], [0x2113, 0x2113],
[0x2116, 0x2116], [0x2121, 0x2122], [0x2126, 0x2126],
[0x212b, 0x212b], [0x2153, 0x2154], [0x215b, 0x215e],
[0x2160, 0x216b], [0x2170, 0x2179], [0x2189, 0x2189],
[0x2190, 0x2199], [0x21b8, 0x21b9], [0x21d2, 0x21d2],
[0x21d4, 0x21d4], [0x21e7, 0x21e7], [0x2200, 0x2200],
[0x2202, 0x2203], [0x2207, 0x2208], [0x220b, 0x220b],
[0x220f, 0x220f], [0x2211, 0x2211], [0x2215, 0x2215],
[0x221a, 0x221a], [0x221d, 0x2220], [0x2223, 0x2223],
[0x2225, 0x2225], [0x2227, 0x222c], [0x222e, 0x222e],
[0x2234, 0x2237], [0x223c, 0x223d], [0x2248, 0x2248],
[0x224c, 0x224c], [0x2252, 0x2252], [0x2260, 0x2261],
[0x2264, 0x2267], [0x226a, 0x226b], [0x226e, 0x226f],
[0x2282, 0x2283], [0x2286, 0x2287], [0x2295, 0x2295],
[0x2299, 0x2299], [0x22a5, 0x22a5], [0x22bf, 0x22bf],
[0x2312, 0x2312], [0x231a, 0x231b], [0x2329, 0x232a],
[0x23e9, 0x23ec], [0x23f0, 0x23f0], [0x23f3, 0x23f3],
[0x2460, 0x24e9], [0x24eb, 0x254b], [0x2550, 0x2573],
[0x2580, 0x258f], [0x2592, 0x2595], [0x25a0, 0x25a1],
[0x25a3, 0x25a9], [0x25b2, 0x25b3], [0x25b6, 0x25b7],
[0x25bc, 0x25bd], [0x25c0, 0x25c1], [0x25c6, 0x25c8],
[0x25cb, 0x25cb], [0x25ce, 0x25d1], [0x25e2, 0x25e5],
[0x25ef, 0x25ef], [0x25fd, 0x25fe], [0x2605, 0x2606],
[0x2609, 0x2609], [0x260e, 0x260f], [0x2614, 0x2615],
[0x261c, 0x261c], [0x261e, 0x261e], [0x2640, 0x2640],
[0x2642, 0x2642], [0x2648, 0x2653], [0x2660, 0x2661],
[0x2663, 0x2665], [0x2667, 0x266a], [0x266c, 0x266d],
[0x266f, 0x266f], [0x267f, 0x267f], [0x2693, 0x2693],
[0x269e, 0x269f], [0x26a1, 0x26a1], [0x26aa, 0x26ab],
[0x26bd, 0x26bf], [0x26c4, 0x26e1], [0x26e3, 0x26e3],
[0x26e8, 0x26ff], [0x2705, 0x2705], [0x270a, 0x270b],
[0x2728, 0x2728], [0x273d, 0x273d], [0x274c, 0x274c],
[0x274e, 0x274e], [0x2753, 0x2755], [0x2757, 0x2757],
[0x2776, 0x277f], [0x2795, 0x2797], [0x27b0, 0x27b0],
[0x27bf, 0x27bf], [0x2b1b, 0x2b1c], [0x2b50, 0x2b50],
[0x2b55, 0x2b59], [0x2e80, 0x2fdf], [0x2ff0, 0x303e],
[0x3040, 0x4dbf], [0x4e00, 0xa4cf], [0xa960, 0xa97f],
[0xac00, 0xd7a3], [0xe000, 0xfaff], [0xfe00, 0xfe19],
[0xfe30, 0xfe6f], [0xff01, 0xff60], [0xffe0, 0xffe6],
[0xfffd, 0xfffd], [0x16fe0, 0x16fe4], [0x16ff0, 0x16ff1],
[0x17000, 0x18cd5], [0x18d00, 0x18d08], [0x1b000, 0x1b12f],
[0x1b150, 0x1b152], [0x1b164, 0x1b167], [0x1b170, 0x1b2ff],
[0x1f004, 0x1f004], [0x1f0cf, 0x1f0cf], [0x1f100, 0x1f10a],
[0x1f110, 0x1f12d], [0x1f130, 0x1f169], [0x1f170, 0x1f1ac],
[0x1f200, 0x1f202], [0x1f210, 0x1f23b], [0x1f240, 0x1f248],
[0x1f250, 0x1f251], [0x1f260, 0x1f265], [0x1f300, 0x1f320],
[0x1f32d, 0x1f335], [0x1f337, 0x1f37c], [0x1f37e, 0x1f393],
[0x1f3a0, 0x1f3ca], [0x1f3cf, 0x1f3d3], [0x1f3e0, 0x1f3f0],
[0x1f3f4, 0x1f3f4], [0x1f3f8, 0x1f43e], [0x1f440, 0x1f440],
[0x1f442, 0x1f4fc], [0x1f4ff, 0x1f53d], [0x1f54b, 0x1f54e],
[0x1f550, 0x1f567], [0x1f57a, 0x1f57a], [0x1f595, 0x1f596],
[0x1f5a4, 0x1f5a4], [0x1f5fb, 0x1f64f], [0x1f680, 0x1f6c5],
[0x1f6cc, 0x1f6cc], [0x1f6d0, 0x1f6d2], [0x1f6d5, 0x1f6d7],
[0x1f6eb, 0x1f6ec], [0x1f6f4, 0x1f6fc], [0x1f7e0, 0x1f7eb],
[0x1f90c, 0x1f93a], [0x1f93c, 0x1f945], [0x1f947, 0x1f978],
[0x1f97a, 0x1f9cb], [0x1f9cd, 0x1f9ff], [0x1fa70, 0x1fa74],
[0x1fa78, 0x1fa7a], [0x1fa80, 0x1fa86], [0x1fa90, 0x1faa8],
[0x1fab0, 0x1fab6], [0x1fac0, 0x1fac2], [0x1fad0, 0x1fad6],
[0x20000, 0x2fffd], [0x30000, 0x3fffd], [0xe0100, 0xe01ef],
[0xf0000, 0xffffd], [0x100000, 0x10fffd],
];
// Sorted list of non-overlapping intervals of East Asian Unambiguous characters
// generated by the `./ranges.py` helper.
lib.wc.unambiguous = [
[0x1100, 0x115f], [0x231a, 0x231b], [0x2329, 0x232a],
[0x23e9, 0x23ec], [0x23f0, 0x23f0], [0x23f3, 0x23f3],
[0x25fd, 0x25fe], [0x2614, 0x2615], [0x2648, 0x2653],
[0x267f, 0x267f], [0x2693, 0x2693], [0x26a1, 0x26a1],
[0x26aa, 0x26ab], [0x26bd, 0x26be], [0x26c4, 0x26c5],
[0x26ce, 0x26ce], [0x26d4, 0x26d4], [0x26ea, 0x26ea],
[0x26f2, 0x26f3], [0x26f5, 0x26f5], [0x26fa, 0x26fa],
[0x26fd, 0x26fd], [0x2705, 0x2705], [0x270a, 0x270b],
[0x2728, 0x2728], [0x274c, 0x274c], [0x274e, 0x274e],
[0x2753, 0x2755], [0x2757, 0x2757], [0x2795, 0x2797],
[0x27b0, 0x27b0], [0x27bf, 0x27bf], [0x2b1b, 0x2b1c],
[0x2b50, 0x2b50], [0x2b55, 0x2b55], [0x2e80, 0x2fdf],
[0x2ff0, 0x303e], [0x3040, 0x3247], [0x3250, 0x4dbf],
[0x4e00, 0xa4cf], [0xa960, 0xa97f], [0xac00, 0xd7a3],
[0xf900, 0xfaff], [0xfe10, 0xfe19], [0xfe30, 0xfe6f],
[0xff01, 0xff60], [0xffe0, 0xffe6], [0x16fe0, 0x16fe4],
[0x16ff0, 0x16ff1], [0x17000, 0x18cd5], [0x18d00, 0x18d08],
[0x1b000, 0x1b12f], [0x1b150, 0x1b152], [0x1b164, 0x1b167],
[0x1b170, 0x1b2ff], [0x1f004, 0x1f004], [0x1f0cf, 0x1f0cf],
[0x1f18e, 0x1f18e], [0x1f191, 0x1f19a], [0x1f200, 0x1f202],
[0x1f210, 0x1f23b], [0x1f240, 0x1f248], [0x1f250, 0x1f251],
[0x1f260, 0x1f265], [0x1f300, 0x1f320], [0x1f32d, 0x1f335],
[0x1f337, 0x1f37c], [0x1f37e, 0x1f393], [0x1f3a0, 0x1f3ca],
[0x1f3cf, 0x1f3d3], [0x1f3e0, 0x1f3f0], [0x1f3f4, 0x1f3f4],
[0x1f3f8, 0x1f43e], [0x1f440, 0x1f440], [0x1f442, 0x1f4fc],
[0x1f4ff, 0x1f53d], [0x1f54b, 0x1f54e], [0x1f550, 0x1f567],
[0x1f57a, 0x1f57a], [0x1f595, 0x1f596], [0x1f5a4, 0x1f5a4],
[0x1f5fb, 0x1f64f], [0x1f680, 0x1f6c5], [0x1f6cc, 0x1f6cc],
[0x1f6d0, 0x1f6d2], [0x1f6d5, 0x1f6d7], [0x1f6eb, 0x1f6ec],
[0x1f6f4, 0x1f6fc], [0x1f7e0, 0x1f7eb], [0x1f90c, 0x1f93a],
[0x1f93c, 0x1f945], [0x1f947, 0x1f978], [0x1f97a, 0x1f9cb],
[0x1f9cd, 0x1f9ff], [0x1fa70, 0x1fa74], [0x1fa78, 0x1fa7a],
[0x1fa80, 0x1fa86], [0x1fa90, 0x1faa8], [0x1fab0, 0x1fab6],
[0x1fac0, 0x1fac2], [0x1fad0, 0x1fad6], [0x20000, 0x2fffd],
[0x30000, 0x3fffd],
];
/**
* Binary search to check if the given unicode character is in the table.
*
* @param {number} ucs A unicode character code.
* @param {!Object} table A sorted list of internals to match against.
* @return {boolean} True if the given character is in the table.
*/
lib.wc.binaryTableSearch_ = function(ucs, table) {
var min = 0, max = table.length - 1;
var mid;
if (ucs < table[min][0] || ucs > table[max][1])
return false;
while (max >= min) {
mid = Math.floor((min + max) / 2);
if (ucs > table[mid][1]) {
min = mid + 1;
} else if (ucs < table[mid][0]) {
max = mid - 1;
} else {
return true;
}
}
return false;
};
/**
* Binary search to check if the given unicode character is a space character.
*
* @param {number} ucs A unicode character code.
* @return {boolean} True if the given character is a space character; false
* otherwise.
*/
lib.wc.isSpace = function(ucs) {
return lib.wc.binaryTableSearch_(ucs, lib.wc.combining);
};
/**
* Auxiliary function for checking if the given unicode character is a East
* Asian Ambiguous character.
*
* @param {number} ucs A unicode character code.
* @return {boolean} True if the given character is a East Asian Ambiguous
* character.
*/
lib.wc.isCjkAmbiguous = function(ucs) {
return lib.wc.binaryTableSearch_(ucs, lib.wc.ambiguous);
};
/**
* Determine the column width of the given character.
*
* @param {number} ucs A unicode character code.
* @return {number} The column width of the given character.
*/
lib.wc.charWidth = function(ucs) {
if (lib.wc.regardCjkAmbiguous) {
return lib.wc.charWidthRegardAmbiguous(ucs);
} else {
return lib.wc.charWidthDisregardAmbiguous(ucs);
}
};
/**
* Determine the column width of the given character without considering East
* Asian Ambiguous characters.
*
* @param {number} ucs A unicode character code.
* @return {number} The column width of the given character.
*/
lib.wc.charWidthDisregardAmbiguous = function(ucs) {
// Optimize for ASCII characters.
if (ucs < 0x7f) {
if (ucs >= 0x20)
return 1;
else if (ucs == 0)
return lib.wc.nulWidth;
else /* if (ucs < 0x20) */
return lib.wc.controlWidth;
}
// Test for 8-bit control characters.
if (ucs < 0xa0)
return lib.wc.controlWidth;
// Binary search in table of non-spacing characters.
if (lib.wc.isSpace(ucs))
return 0;
// Binary search in table of wide characters.
return lib.wc.binaryTableSearch_(ucs, lib.wc.unambiguous) ? 2 : 1;
};
/**
* Determine the column width of the given character considering East Asian
* Ambiguous characters.
*
* @param {number} ucs A unicode character code.
* @return {number} The column width of the given character.
*/
lib.wc.charWidthRegardAmbiguous = function(ucs) {
if (lib.wc.isCjkAmbiguous(ucs))
return lib.wc.cjkAmbiguousWidth;
return lib.wc.charWidthDisregardAmbiguous(ucs);
};
/**
* Determine the column width of the given string.
*
* @param {string} str A string.
* @return {number} The column width of the given string.
*/
lib.wc.strWidth = function(str) {
var width, rv = 0;
for (var i = 0; i < str.length;) {
var codePoint = str.codePointAt(i);
width = lib.wc.charWidth(codePoint);
if (width < 0)
return -1;
rv += width;
i += (codePoint <= 0xffff) ? 1 : 2;
}
return rv;
};
/**
* Get the substring at the given column offset of the given column width.
*
* @param {string} str The string to get substring from.
* @param {number} start The starting column offset to get substring.
* @param {number=} opt_width The column width of the substring.
* @return {string} The substring.
*/
lib.wc.substr = function(str, start, opt_width) {
var startIndex = 0;
var endIndex, width;
// Fun edge case: Normally we associate zero width codepoints (like combining
// characters) with the previous codepoint, so we skip any leading ones while
// including trailing ones. However, if there are zero width codepoints at
// the start of the string, and the substring starts at 0, lets include them
// in the result. This also makes for a simple optimization for a common
// request.
if (start) {
for (width = 0; startIndex < str.length;) {
const codePoint = str.codePointAt(startIndex);
width += lib.wc.charWidth(codePoint);
if (width > start)
break;
startIndex += (codePoint <= 0xffff) ? 1 : 2;
}
}
if (opt_width != undefined) {
for (endIndex = startIndex, width = 0; endIndex < str.length;) {
const codePoint = str.codePointAt(endIndex);
width += lib.wc.charWidth(codePoint);
if (width > opt_width)
break;
endIndex += (codePoint <= 0xffff) ? 1 : 2;
}
return str.substring(startIndex, endIndex);
}
return str.substr(startIndex);
};
/**
* Get substring at the given start and end column offset.
*
* @param {string} str The string to get substring from.
* @param {number} start The starting column offset.
* @param {number} end The ending column offset.
* @return {string} The substring.
*/
lib.wc.substring = function(str, start, end) {
return lib.wc.substr(str, start, end - start);
};