| import htmlTrie from "./generated/encode-html.js"; |
| import { xmlReplacer, getCodePoint } from "./escape.js"; |
| const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g; |
| /** |
| * Encodes all characters in the input using HTML entities. This includes |
| * characters that are valid ASCII characters in HTML documents, such as `#`. |
| * |
| * To get a more compact output, consider using the `encodeNonAsciiHTML` |
| * function, which will only encode characters that are not valid in HTML |
| * documents, as well as non-ASCII characters. |
| * |
| * If a character has no equivalent entity, a numeric hexadecimal reference |
| * (eg. `ü`) will be used. |
| */ |
| export function encodeHTML(data) { |
| return encodeHTMLTrieRe(htmlReplacer, data); |
| } |
| /** |
| * Encodes all non-ASCII characters, as well as characters not valid in HTML |
| * documents using HTML entities. This function will not encode characters that |
| * are valid in HTML documents, such as `#`. |
| * |
| * If a character has no equivalent entity, a numeric hexadecimal reference |
| * (eg. `ü`) will be used. |
| */ |
| export function encodeNonAsciiHTML(data) { |
| return encodeHTMLTrieRe(xmlReplacer, data); |
| } |
| function encodeHTMLTrieRe(regExp, str) { |
| let ret = ""; |
| let lastIdx = 0; |
| let match; |
| while ((match = regExp.exec(str)) !== null) { |
| const i = match.index; |
| ret += str.substring(lastIdx, i); |
| const char = str.charCodeAt(i); |
| let next = htmlTrie.get(char); |
| if (typeof next === "object") { |
| // We are in a branch. Try to match the next char. |
| if (i + 1 < str.length) { |
| const nextChar = str.charCodeAt(i + 1); |
| const value = typeof next.n === "number" |
| ? next.n === nextChar |
| ? next.o |
| : undefined |
| : next.n.get(nextChar); |
| if (value !== undefined) { |
| ret += value; |
| lastIdx = regExp.lastIndex += 1; |
| continue; |
| } |
| } |
| next = next.v; |
| } |
| // We might have a tree node without a value; skip and use a numeric entity. |
| if (next !== undefined) { |
| ret += next; |
| lastIdx = i + 1; |
| } |
| else { |
| const cp = getCodePoint(str, i); |
| ret += `&#x${cp.toString(16)};`; |
| // Increase by 1 if we have a surrogate pair |
| lastIdx = regExp.lastIndex += Number(cp !== char); |
| } |
| } |
| return ret + str.substr(lastIdx); |
| } |
| //# sourceMappingURL=encode.js.map |