| #! /usr/bin/env python | 
 |  | 
 | # Copyright (C) 2016-2019 Apple Inc. All rights reserved. | 
 | # | 
 | # Redistribution and use in source and binary forms, with or without | 
 | # modification, are permitted provided that the following conditions | 
 | # are met: | 
 | # | 
 | # 1.  Redistributions of source code must retain the above copyright | 
 | #     notice, this list of conditions and the following disclaimer.  | 
 | # 2.  Redistributions in binary form must reproduce the above copyright | 
 | #     notice, this list of conditions and the following disclaimer in the | 
 | #     documentation and/or other materials provided with the distribution.  | 
 | # | 
 | # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
 | # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 |  | 
 | # This tool processes the Unicode Character Database file CaseFolding.txt to create | 
 | # canonicalization table as decribed in ECMAScript 6 standard in section | 
 | # "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2. | 
 |  | 
 | import optparse | 
 | import os | 
 | import re | 
 | import sys | 
 |  | 
 | header = """/* | 
 | * Copyright (C) 2016 Apple Inc. All rights reserved. | 
 | * | 
 | * Redistribution and use in source and binary forms, with or without | 
 | * modification, are permitted provided that the following conditions | 
 | * are met: | 
 | * | 
 | * 1.  Redistributions of source code must retain the above copyright | 
 | *     notice, this list of conditions and the following disclaimer.  | 
 | * 2.  Redistributions in binary form must reproduce the above copyright | 
 | *     notice, this list of conditions and the following disclaimer in the | 
 | *     documentation and/or other materials provided with the distribution.  | 
 | * | 
 | * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
 | * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
 | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | // DO NO EDIT! - This file was generated by generateYarrCanonicalizeUnicode | 
 |  | 
 | #include "config.h" | 
 | #include "YarrCanonicalize.h" | 
 |  | 
 | namespace JSC { namespace Yarr { | 
 |  | 
 | """ | 
 |  | 
 | footer = """} } // JSC::Yarr | 
 | """ | 
 |  | 
 | MaxUnicode = 0x10ffff | 
 | commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE) | 
 |  | 
 | def openOrExit(path, mode): | 
 |     try: | 
 |         dirname = os.path.dirname(path) | 
 |         if not os.path.isdir(dirname): | 
 |             os.makedirs(dirname) | 
 |         if sys.version_info.major >= 3: | 
 |             return open(path, mode, encoding="UTF-8") | 
 |         else: | 
 |             return open(path, mode) | 
 |     except IOError as e: | 
 |         print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)) | 
 |         exit(1) | 
 |  | 
 | class Canonicalize: | 
 |     def __init__(self): | 
 |         self.canonicalGroups = {}; | 
 |  | 
 |     def addMapping(self, code, mapping): | 
 |         if mapping not in self.canonicalGroups: | 
 |             self.canonicalGroups[mapping] = [] | 
 |         self.canonicalGroups[mapping].append(code) | 
 |          | 
 |     def readCaseFolding(self, file): | 
 |         codesSeen = set() | 
 |         for line in file: | 
 |             line = line.split('#', 1)[0] | 
 |             line = line.rstrip() | 
 |             if (not len(line)): | 
 |                 continue | 
 |  | 
 |             fields = commonAndSimpleLinesRE.match(line) | 
 |             if (not fields): | 
 |                 continue | 
 |  | 
 |             code = int(fields.group('code'), 16) | 
 |             mapping = int(fields.group('mapping'), 16) | 
 |  | 
 |             codesSeen.add(code) | 
 |             self.addMapping(code, mapping) | 
 |  | 
 |         for i in range(MaxUnicode + 1): | 
 |             if i in codesSeen: | 
 |                 continue; | 
 |  | 
 |             self.addMapping(i, i) | 
 |  | 
 |     def createTables(self, file): | 
 |         typeInfo = [""] * (MaxUnicode + 1) | 
 |         characterSets = [] | 
 |  | 
 |         for mapping in sorted(self.canonicalGroups.keys()): | 
 |             characters = self.canonicalGroups[mapping] | 
 |             if len(characters) == 1: | 
 |                 typeInfo[characters[0]] = "CanonicalizeUnique:0" | 
 |             else: | 
 |                 characters.sort() | 
 |                 if len(characters) > 2: | 
 |                     for ch in characters: | 
 |                         typeInfo[ch] = "CanonicalizeSet:%d" % len(characterSets) | 
 |                     characterSets.append(characters) | 
 |                 else: | 
 |                     low = characters[0] | 
 |                     high = characters[1] | 
 |                     delta = high - low | 
 |                     if delta == 1: | 
 |                         type = "CanonicalizeAlternatingUnaligned:0" if low & 1 else "CanonicalizeAlternatingAligned:0" | 
 |                         typeInfo[low] = type | 
 |                         typeInfo[high] = type | 
 |                     else: | 
 |                         typeInfo[low] = "CanonicalizeRangeLo:%d" % delta | 
 |                         typeInfo[high] = "CanonicalizeRangeHi:%d" % delta | 
 |  | 
 |         rangeInfo = [] | 
 |         end = 0 | 
 |         while end <= MaxUnicode: | 
 |             begin = end | 
 |             type = typeInfo[end] | 
 |             while end < MaxUnicode and typeInfo[end + 1] == type: | 
 |                 end = end + 1 | 
 |             rangeInfo.append({"begin": begin, "end": end, "type": type}) | 
 |             end = end + 1 | 
 |          | 
 |         for i in range(len(characterSets)): | 
 |             characters = "" | 
 |             cur_set = characterSets[i] | 
 |             for ch in cur_set: | 
 |                 characters = characters + "0x{character:04x}, ".format(character=ch) | 
 |             file.write("const UChar32 unicodeCharacterSet{index:d}[] = {{ {characters}0 }};\n".format(index=i, characters=characters)) | 
 |  | 
 |         file.write("\n") | 
 |         file.write("static constexpr size_t UNICODE_CANONICALIZATION_SETS = {setCount:d};\n".format(setCount=len(characterSets))) | 
 |         file.write("const UChar32* const unicodeCharacterSetInfo[UNICODE_CANONICALIZATION_SETS] = {\n") | 
 |  | 
 |         for i in range(len(characterSets)): | 
 |             file.write("    unicodeCharacterSet{setNumber:d},\n".format(setNumber=i)) | 
 |  | 
 |         file.write("};\n") | 
 |         file.write("\n") | 
 |         file.write("const size_t UNICODE_CANONICALIZATION_RANGES = {rangeCount:d};\n".format(rangeCount=len(rangeInfo))) | 
 |         file.write("const CanonicalizationRange unicodeRangeInfo[UNICODE_CANONICALIZATION_RANGES] = {\n") | 
 |  | 
 |         for info in rangeInfo: | 
 |             typeAndValue = info["type"].split(":") | 
 |             file.write("    {{ 0x{begin:04x}, 0x{end:04x}, 0x{value:04x}, {type} }},\n".format(begin=info["begin"], end=info["end"], value=int(typeAndValue[1]), type=typeAndValue[0])) | 
 |  | 
 |         file.write("};\n") | 
 |         file.write("\n") | 
 |  | 
 |          | 
 | if __name__ == "__main__": | 
 |     parser = optparse.OptionParser(usage = "usage: %prog  <CaseFolding.txt> <YarrCanonicalizeUnicode.h>") | 
 |     (options, args) = parser.parse_args() | 
 |  | 
 |     if len(args) != 2: | 
 |         parser.error("<CaseFolding.txt> <YarrCanonicalizeUnicode.h>") | 
 |  | 
 |     caseFoldingTxtPath = args[0] | 
 |     canonicalizeHPath = args[1] | 
 |     caseFoldingTxtFile = openOrExit(caseFoldingTxtPath, "r") | 
 |     canonicalizeHFile = openOrExit(canonicalizeHPath, "w") | 
 |  | 
 |     canonicalize = Canonicalize() | 
 |     canonicalize.readCaseFolding(caseFoldingTxtFile) | 
 |  | 
 |     canonicalizeHFile.write(header); | 
 |     canonicalize.createTables(canonicalizeHFile) | 
 |     canonicalizeHFile.write(footer); | 
 |  | 
 |     caseFoldingTxtFile.close() | 
 |     canonicalizeHFile.close() | 
 |  | 
 |     exit(0) |