|  | #! /usr/bin/env python | 
|  |  | 
|  | # Copyright (C) 2016-2019 Apple Inc. All rights reserved. | 
|  | # | 
|  | # Redistribution and use in source and binary forms, with or without | 
|  | # modification, are permitted provided that the following conditions | 
|  | # are met: | 
|  | # | 
|  | # 1.  Redistributions of source code must retain the above copyright | 
|  | #     notice, this list of conditions and the following disclaimer. | 
|  | # 2.  Redistributions in binary form must reproduce the above copyright | 
|  | #     notice, this list of conditions and the following disclaimer in the | 
|  | #     documentation and/or other materials provided with the distribution. | 
|  | # | 
|  | # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
|  | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
|  | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
|  | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
|  | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | # This tool processes the Unicode Character Database file CaseFolding.txt to create | 
|  | # canonicalization table as decribed in ECMAScript 6 standard in section | 
|  | # "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2. | 
|  |  | 
|  | import optparse | 
|  | import os | 
|  | import re | 
|  | import sys | 
|  |  | 
|  | header = """/* | 
|  | * Copyright (C) 2016 Apple Inc. All rights reserved. | 
|  | * | 
|  | * Redistribution and use in source and binary forms, with or without | 
|  | * modification, are permitted provided that the following conditions | 
|  | * are met: | 
|  | * | 
|  | * 1.  Redistributions of source code must retain the above copyright | 
|  | *     notice, this list of conditions and the following disclaimer. | 
|  | * 2.  Redistributions in binary form must reproduce the above copyright | 
|  | *     notice, this list of conditions and the following disclaimer in the | 
|  | *     documentation and/or other materials provided with the distribution. | 
|  | * | 
|  | * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
|  | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
|  | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
|  | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
|  | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | */ | 
|  |  | 
|  | // DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode | 
|  |  | 
|  | #include "config.h" | 
|  | #include "YarrCanonicalize.h" | 
|  |  | 
|  | namespace JSC { namespace Yarr { | 
|  |  | 
|  | """ | 
|  |  | 
|  | footer = """} } // JSC::Yarr | 
|  | """ | 
|  |  | 
|  | MaxUnicode = 0x10ffff | 
|  | commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE) | 
|  |  | 
|  | def openOrExit(path, mode): | 
|  | try: | 
|  | dirname = os.path.dirname(path) | 
|  | if not os.path.isdir(dirname): | 
|  | os.makedirs(dirname) | 
|  | if sys.version_info.major >= 3: | 
|  | return open(path, mode, encoding="UTF-8") | 
|  | else: | 
|  | return open(path, mode) | 
|  | except IOError as e: | 
|  | print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)) | 
|  | exit(1) | 
|  |  | 
|  | class Canonicalize: | 
|  | def __init__(self): | 
|  | self.canonicalGroups = {}; | 
|  |  | 
|  | def addMapping(self, code, mapping): | 
|  | if mapping not in self.canonicalGroups: | 
|  | self.canonicalGroups[mapping] = [] | 
|  | self.canonicalGroups[mapping].append(code) | 
|  |  | 
|  | def readCaseFolding(self, file): | 
|  | codesSeen = set() | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = commonAndSimpleLinesRE.match(line) | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | code = int(fields.group('code'), 16) | 
|  | mapping = int(fields.group('mapping'), 16) | 
|  |  | 
|  | codesSeen.add(code) | 
|  | self.addMapping(code, mapping) | 
|  |  | 
|  | for i in range(MaxUnicode + 1): | 
|  | if i in codesSeen: | 
|  | continue; | 
|  |  | 
|  | self.addMapping(i, i) | 
|  |  | 
|  | def createTables(self, file): | 
|  | typeInfo = [""] * (MaxUnicode + 1) | 
|  | characterSets = [] | 
|  |  | 
|  | for mapping in sorted(self.canonicalGroups.keys()): | 
|  | characters = self.canonicalGroups[mapping] | 
|  | if len(characters) == 1: | 
|  | typeInfo[characters[0]] = "CanonicalizeUnique:0" | 
|  | else: | 
|  | characters.sort() | 
|  | if len(characters) > 2: | 
|  | for ch in characters: | 
|  | typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets) | 
|  | characterSets.append(characters) | 
|  | else: | 
|  | low = characters[0] | 
|  | high = characters[1] | 
|  | delta = high - low | 
|  | if delta == 1: | 
|  | type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0" | 
|  | typeInfo[low] = type | 
|  | typeInfo[high] = type | 
|  | else: | 
|  | typeInfo[low] = "CanonicalizeRangeLo:%d" % delta | 
|  | typeInfo[high] = "CanonicalizeRangeHi:%d" % delta | 
|  |  | 
|  | rangeInfo = [] | 
|  | end = 0 | 
|  | while end <= MaxUnicode: | 
|  | begin = end | 
|  | type = typeInfo[end] | 
|  | while end < MaxUnicode and typeInfo[end + 1] == type: | 
|  | end = end + 1 | 
|  | rangeInfo.append({"begin": begin, "end": end, "type": type}) | 
|  | end = end + 1 | 
|  |  | 
|  | for i in range(len(characterSets)): | 
|  | characters = "" | 
|  | cur_set = characterSets[i] | 
|  | for ch in cur_set: | 
|  | characters = characters + "0x{character:04x}, ".format(character=ch) | 
|  | file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters)) | 
|  |  | 
|  | file.write("\n") | 
|  | file.write("static constexpr size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets))) | 
|  | file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n") | 
|  |  | 
|  | for i in range(len(characterSets)): | 
|  | file.write("    unicodeCharacterSet{setNumber:d},\n".format(setNumber=i)) | 
|  |  | 
|  | file.write("};\n") | 
|  | file.write("\n") | 
|  | file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo))) | 
|  | file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n") | 
|  |  | 
|  | for info in rangeInfo: | 
|  | typeAndValue = info["type"].split(":") | 
|  | file.write("    {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0])) | 
|  |  | 
|  | file.write("};\n") | 
|  | file.write("\n") | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | parser = optparse.OptionParser(usage = "usage: %prog  <CaseFolding.txt> <YarrCanonicalizeUnicode.h>") | 
|  | (options, args) = parser.parse_args() | 
|  |  | 
|  | if len(args) != 2: | 
|  | parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>") | 
|  |  | 
|  | caseFoldingTxtPath = args[0] | 
|  | canonicalizeHPath = args[1] | 
|  | caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r") | 
|  | canonicalizeHFile = openOrExit(canonicalizeHPath, "w") | 
|  |  | 
|  | canonicalize = Canonicalize() | 
|  | canonicalize.readCaseFolding(caseFoldingTxtFile) | 
|  |  | 
|  | canonicalizeHFile.write(header); | 
|  | canonicalize.createTables(canonicalizeHFile) | 
|  | canonicalizeHFile.write(footer); | 
|  |  | 
|  | caseFoldingTxtFile.close() | 
|  | canonicalizeHFile.close() | 
|  |  | 
|  | exit(0) |