|  | #!/usr/bin/env python | 
|  |  | 
|  | # Copyright (C) 2017 Apple Inc. All rights reserved. | 
|  | # | 
|  | # Redistribution and use in source and binary forms, with or without | 
|  | # modification, are permitted provided that the following conditions | 
|  | # are met: | 
|  | # | 
|  | # 1.  Redistributions of source code must retain the above copyright | 
|  | #     notice, this list of conditions and the following disclaimer. | 
|  | # 2.  Redistributions in binary form must reproduce the above copyright | 
|  | #     notice, this list of conditions and the following disclaimer in the | 
|  | #     documentation and/or other materials provided with the distribution. | 
|  | # | 
|  | # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
|  | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
|  | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
|  | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
|  | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  |  | 
|  | # This tool processes the Unicode Character Database file CaseFolding.txt to create | 
|  | # canonicalization table as decribed in ECMAScript 6 standard in section | 
|  | # "21.2.2.8.2 Runtime Semantics: Canonicalize()", step 2. | 
|  |  | 
|  | import sys | 
|  | import copy | 
|  | import optparse | 
|  | import os | 
|  | import re | 
|  | from hasher import stringHash | 
|  |  | 
|  | header = """/* | 
|  | * Copyright (C) 2017-2019 Apple Inc. All rights reserved. | 
|  | * | 
|  | * Redistribution and use in source and binary forms, with or without | 
|  | * modification, are permitted provided that the following conditions | 
|  | * are met: | 
|  | * | 
|  | * 1.  Redistributions of source code must retain the above copyright | 
|  | *     notice, this list of conditions and the following disclaimer. | 
|  | * 2.  Redistributions in binary form must reproduce the above copyright | 
|  | *     notice, this list of conditions and the following disclaimer in the | 
|  | *     documentation and/or other materials provided with the distribution. | 
|  | * | 
|  | * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY | 
|  | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
|  | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
|  | * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY | 
|  | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
|  | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
|  | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | 
|  | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
|  | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 
|  | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
|  | */ | 
|  |  | 
|  | // DO NO EDIT! - This file was generated by """ + __file__ + """ | 
|  | """ | 
|  |  | 
|  |  | 
|  | footer = """ | 
|  | """ | 
|  |  | 
|  | RequiredUCDFiles = ["DerivedBinaryProperties.txt", "DerivedCoreProperties.txt", "DerivedNormalizationProps.txt", "PropList.txt", "PropertyAliases.txt", "PropertyValueAliases.txt", "ScriptExtensions.txt", "UnicodeData.txt", "emoji-data.txt"] | 
|  | UCDDirectoryPath = None | 
|  |  | 
|  | SupportedBinaryProperties = [ | 
|  | "Alphabetic", "Any", "ASCII", "ASCII_Hex_Digit", "Assigned", "Bidi_Control", "Bidi_Mirrored", "Case_Ignorable", | 
|  | "Cased", "Changes_When_Casefolded", "Changes_When_Casemapped", "Changes_When_Lowercased", "Changes_When_NFKC_Casefolded", | 
|  | "Changes_When_Titlecased", "Changes_When_Uppercased", "Dash", "Default_Ignorable_Code_Point", "Deprecated", | 
|  | "Diacritic", "Emoji", "Emoji_Component", "Emoji_Modifier_Base", "Emoji_Modifier", "Emoji_Presentation", | 
|  | "Extended_Pictographic", "Extender", "Grapheme_Base", "Grapheme_Extend", "Hex_Digit", "ID_Continue", "ID_Start", | 
|  | "Ideographic", "IDS_Binary_Operator", "IDS_Trinary_Operator", "Join_Control", "Logical_Order_Exception", "Lowercase", | 
|  | "Math", "Noncharacter_Code_Point", "Pattern_Syntax", "Pattern_White_Space", "Quotation_Mark", "Radical", | 
|  | "Regional_Indicator", "Sentence_Terminal", "Soft_Dotted", "Terminal_Punctuation", "Unified_Ideograph", "Uppercase", | 
|  | "Variation_Selector", "White_Space", "XID_Continue", "XID_Start"] | 
|  |  | 
|  | lastASCIICodePoint = 0x7f | 
|  | firstUnicodeCodePoint = 0x80 | 
|  | MaxUnicode = 0x10ffff | 
|  | MaxBMP = 0xffff | 
|  | commonAndSimpleLinesRE = re.compile(r"(?P<code>[0-9A-F]+)\s*;\s*[CS]\s*;\s*(?P<mapping>[0-9A-F]+)", re.IGNORECASE) | 
|  | aliases = None | 
|  |  | 
|  |  | 
|  | def openOrExit(path, mode): | 
|  | try: | 
|  | if sys.version_info.major >= 3: | 
|  | return open(path, mode, encoding="UTF-8") | 
|  | else: | 
|  | return open(path, mode) | 
|  | except IOError as e: | 
|  | print("I/O error opening {0}, ({1}): {2}".format(path, e.errno, e.strerror)) | 
|  | exit(1) | 
|  |  | 
|  |  | 
|  | def openUCDFileOrExit(path): | 
|  | if not UCDDirectoryPath: | 
|  | exit(1) | 
|  |  | 
|  | return openOrExit(os.path.join(UCDDirectoryPath, path), 'r') | 
|  |  | 
|  |  | 
|  | def verifyUCDFilesExist(): | 
|  | if not UCDDirectoryPath: | 
|  | exit(1) | 
|  |  | 
|  | missingFileCount = 0 | 
|  | for file in RequiredUCDFiles: | 
|  | fullPath = os.path.join(UCDDirectoryPath, file) | 
|  | if not os.path.exists(fullPath): | 
|  | print("Couldn't find UCD file {0} at {1}".format(file, fullPath)) | 
|  | missingFileCount = missingFileCount + 1 | 
|  | if missingFileCount: | 
|  | exit(1) | 
|  |  | 
|  |  | 
|  | def ceilingToPowerOf2(size): | 
|  | powerOf2 = 1 | 
|  | while size > powerOf2: | 
|  | powerOf2 = powerOf2 << 1 | 
|  |  | 
|  | return powerOf2 | 
|  |  | 
|  |  | 
|  | class Aliases: | 
|  | def __init__(self): | 
|  | self.globalNameToAliases = {} | 
|  | self.generalCategoryToAliases = {} | 
|  | self.aliasToGeneralCategory = {} | 
|  | self.scriptToAliases = {} | 
|  | self.aliasToScript = {} | 
|  |  | 
|  | def parsePropertyAliasesFile(self, file): | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | aliases = [fields[0].strip()] | 
|  | fullName = fields[1].strip() | 
|  | for otherAlias in fields[2:]: | 
|  | aliases.append(otherAlias.strip()) | 
|  |  | 
|  | if fullName in self.globalNameToAliases: | 
|  | print("Error, already an alias for {}".format(fullName)) | 
|  | else: | 
|  | self.globalNameToAliases[fullName] = aliases | 
|  |  | 
|  | def parsePropertyValueAliasesFile(self, file): | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | propertyType = fields[0].strip() | 
|  |  | 
|  | if propertyType == "gc": | 
|  | mapToModify = self.generalCategoryToAliases | 
|  | reverseMapToModify = self.aliasToGeneralCategory | 
|  | elif propertyType == "sc": | 
|  | mapToModify = self.scriptToAliases | 
|  | reverseMapToModify = self.aliasToScript | 
|  | else: | 
|  | continue | 
|  |  | 
|  | primaryAlias = fields[1].strip() | 
|  | fullName = fields[2].strip() | 
|  | aliases = [primaryAlias] | 
|  | for otherAlias in fields[3:]: | 
|  | aliases.append(otherAlias.strip()) | 
|  |  | 
|  | if fullName in mapToModify: | 
|  | print("Error, already an {} alias for {}".format(propertyType, fullName)) | 
|  | else: | 
|  | mapToModify[fullName] = aliases | 
|  | if reverseMapToModify != None: | 
|  | reverseMapToModify[primaryAlias] = fullName | 
|  |  | 
|  | def globalAliasesFor(self, name): | 
|  | if name not in self.globalNameToAliases: | 
|  | return [] | 
|  | return self.globalNameToAliases[name] | 
|  |  | 
|  | def generalCategoryAliasesFor(self, name): | 
|  | if name not in self.generalCategoryToAliases: | 
|  | return "" | 
|  | return self.generalCategoryToAliases[name] | 
|  |  | 
|  | def generalCategoryForAlias(self, name): | 
|  | if name not in self.aliasToGeneralCategory: | 
|  | return "" | 
|  | return self.aliasToGeneralCategory[name] | 
|  |  | 
|  | def scriptAliasesFor(self, name): | 
|  | if name not in self.scriptToAliases: | 
|  | return "" | 
|  | return self.scriptToAliases[name] | 
|  |  | 
|  | def scriptNameForAlias(self, name): | 
|  | if name not in self.aliasToScript: | 
|  | return "" | 
|  | return self.aliasToScript[name] | 
|  |  | 
|  |  | 
|  | class PropertyData: | 
|  | allPropertyData = [] | 
|  |  | 
|  | def __init__(self, name): | 
|  | self.name = name | 
|  | self.aliases = [] | 
|  | self.index = len(PropertyData.allPropertyData) | 
|  | self.hasBMPCharacters = False | 
|  | self.hasNonBMPCharacters = False | 
|  | self.matches = [] | 
|  | self.ranges = [] | 
|  | self.unicodeMatches = [] | 
|  | self.unicodeRanges = [] | 
|  | self.codePointCount = 0 | 
|  | PropertyData.allPropertyData.append(self) | 
|  |  | 
|  | def setAliases(self, aliases): | 
|  | self.aliases = aliases | 
|  |  | 
|  | def makeCopy(self): | 
|  | result = copy.deepcopy(self) | 
|  | result.index = len(PropertyData.allPropertyData) | 
|  | PropertyData.allPropertyData.append(result) | 
|  | return result | 
|  |  | 
|  | def getIndex(self): | 
|  | return self.index | 
|  |  | 
|  | def getCreateFuncName(self): | 
|  | return "createCharacterClass{}".format(self.index) | 
|  |  | 
|  | def addMatch(self, codePoint): | 
|  | if codePoint <= MaxBMP: | 
|  | self.hasBMPCharacters = True | 
|  | else: | 
|  | self.hasNonBMPCharacters = True | 
|  | if codePoint <= lastASCIICodePoint: | 
|  | if (len(self.matches) and self.matches[-1] > codePoint) or (len(self.ranges) and self.ranges[-1][1] > codePoint): | 
|  | self.addMatchUnordered(codePoint) | 
|  | return | 
|  |  | 
|  | self.codePointCount = self.codePointCount + 1 | 
|  | if len(self.matches) and self.matches[-1] == (codePoint - 1): | 
|  | lowCodePoint = self.matches.pop() | 
|  | self.ranges.append((lowCodePoint, codePoint)) | 
|  | elif len(self.ranges) and self.ranges[-1][1] == (codePoint - 1): | 
|  | priorRange = self.ranges.pop() | 
|  | self.ranges.append((priorRange[0], codePoint)) | 
|  | else: | 
|  | self.matches.append(codePoint) | 
|  | else: | 
|  | if (len(self.unicodeMatches) and self.unicodeMatches[-1] > codePoint) or (len(self.unicodeRanges) and self.unicodeRanges[-1][1] > codePoint): | 
|  | self.addMatchUnordered(codePoint) | 
|  | return | 
|  |  | 
|  | self.codePointCount = self.codePointCount + 1 | 
|  | if len(self.unicodeMatches) and self.unicodeMatches[-1] == (codePoint - 1): | 
|  | lowCodePoint = self.unicodeMatches.pop() | 
|  | self.unicodeRanges.append((lowCodePoint, codePoint)) | 
|  | elif len(self.unicodeRanges) and self.unicodeRanges[-1][1] == (codePoint - 1): | 
|  | priorRange = self.unicodeRanges.pop() | 
|  | self.unicodeRanges.append((priorRange[0], codePoint)) | 
|  | else: | 
|  | self.unicodeMatches.append(codePoint) | 
|  |  | 
|  | def addRange(self, lowCodePoint, highCodePoint): | 
|  | if lowCodePoint <= MaxBMP: | 
|  | self.hasBMPCharacters = True | 
|  | if highCodePoint > MaxBMP: | 
|  | self.hasNonBMPCharacters = True | 
|  | if highCodePoint <= lastASCIICodePoint: | 
|  | if (len(self.matches) and self.matches[-1] > lowCodePoint) or (len(self.ranges) and self.ranges[-1][1] > lowCodePoint): | 
|  | self.addRangeUnordered(lowCodePoint, highCodePoint) | 
|  | return | 
|  |  | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | if len(self.matches) and self.matches[-1] == (lowCodePoint - 1): | 
|  | lowCodePoint = self.matches.pop() | 
|  | elif len(self.ranges) and self.ranges[-1][1] == (lowCodePoint - 1): | 
|  | priorRange = self.ranges.pop() | 
|  | lowCodePoint = priorRange[0] | 
|  | self.ranges.append((lowCodePoint, highCodePoint)) | 
|  | elif lowCodePoint <= lastASCIICodePoint: | 
|  | if lowCodePoint == lastASCIICodePoint: | 
|  | self.addMatch(lowCodePoint) | 
|  | else: | 
|  | self.addRange(lowCodePoint, lastASCIICodePoint) | 
|  | if highCodePoint == firstUnicodeCodePoint: | 
|  | self.addMatch(highCodePoint) | 
|  | else: | 
|  | self.addRange(firstUnicodeCodePoint, highCodePoint) | 
|  | else: | 
|  | if (len(self.unicodeMatches) and self.unicodeMatches[-1] > lowCodePoint) or (len(self.unicodeRanges) and self.unicodeRanges[-1][1] > lowCodePoint): | 
|  | self.addRangeUnordered(lowCodePoint, highCodePoint) | 
|  | return | 
|  |  | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | if len(self.unicodeMatches) and self.unicodeMatches[-1] == (lowCodePoint - 1): | 
|  | lowCodePoint = self.unicodeMatches.pop() | 
|  | self.unicodeRanges.append((lowCodePoint, highCodePoint)) | 
|  | elif len(self.unicodeRanges) and self.unicodeRanges[-1][1] == (lowCodePoint - 1): | 
|  | priorRange = self.unicodeRanges.pop() | 
|  | self.unicodeRanges.append((priorRange[0], highCodePoint)) | 
|  | else: | 
|  | self.unicodeRanges.append((lowCodePoint, highCodePoint)) | 
|  |  | 
|  | def addMatchUnorderedForMatchesAndRanges(self, codePoint, matches, ranges): | 
|  | if codePoint in matches: | 
|  | return | 
|  | insertLocation = None | 
|  | lowCodePoint = None | 
|  | highCodePoint = None | 
|  | for idx in range(len(matches)): | 
|  | match = matches[idx] | 
|  | if codePoint == match + 1: | 
|  | lowCodePoint = match | 
|  | if idx < (len(matches) - 1) and codePoint == matches[idx + 1] - 1: | 
|  | highCodePoint = matches[idx + 1] | 
|  | del matches[idx + 1] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | else: | 
|  | highCodePoint = codePoint | 
|  | del matches[idx] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | break | 
|  | elif codePoint == match - 1: | 
|  | lowCodePoint = codePoint | 
|  | highCodePoint = match | 
|  | del matches[idx] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | break | 
|  | elif codePoint < match: | 
|  | insertLocation = idx | 
|  | break | 
|  |  | 
|  | if insertLocation is None: | 
|  | insertLocation = len(matches) | 
|  | if lowCodePoint is None: | 
|  | lowCodePoint = codePoint | 
|  | highCodePoint = codePoint | 
|  |  | 
|  | for idx in range(len(ranges)): | 
|  | cur_range = ranges[idx] | 
|  | if lowCodePoint >= cur_range[0] and highCodePoint <= cur_range[1]: | 
|  | return | 
|  | if lowCodePoint <= (cur_range[1] + 1) and highCodePoint >= (cur_range[0] - 1): | 
|  | while idx < len(ranges) and highCodePoint >= (ranges[idx][0] - 1): | 
|  | cur_range = ranges[idx] | 
|  | lowCodePoint = min(lowCodePoint, cur_range[0]) | 
|  | highCodePoint = max(highCodePoint, cur_range[1]) | 
|  | del ranges[idx] | 
|  | self.codePointCount = self.codePointCount - (cur_range[1] - cur_range[0]) - 1 | 
|  |  | 
|  | ranges.insert(idx, (lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | return | 
|  | elif highCodePoint < cur_range[0]: | 
|  | if lowCodePoint != highCodePoint: | 
|  | ranges.insert(idx, (lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | return | 
|  | break | 
|  |  | 
|  | if lowCodePoint != highCodePoint: | 
|  | ranges.append((lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | return | 
|  |  | 
|  | if insertLocation is not None: | 
|  | matches.insert(insertLocation, codePoint) | 
|  | self.codePointCount = self.codePointCount + 1 | 
|  |  | 
|  | def addRangeUnorderedForMatchesAndRanges(self, lowCodePoint, highCodePoint, matches, ranges): | 
|  | if len(matches) and highCodePoint >= matches[0] and lowCodePoint <= matches[-1]: | 
|  | for idx in range(len(matches)): | 
|  | match = matches[idx] | 
|  | if lowCodePoint <= match and highCodePoint >= match: | 
|  | while idx < len(matches) and highCodePoint >= matches[idx]: | 
|  | del matches[idx] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | if idx < (len(matches) - 1) and highCodePoint == matches[idx + 1] - 1: | 
|  | highCodePoint = matches[idx + 1] | 
|  | del matches[idx + 1] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | break | 
|  | elif lowCodePoint == match + 1: | 
|  | lowCodePoint = match | 
|  | while idx < len(matches) and highCodePoint >= matches[idx]: | 
|  | del matches[idx] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  |  | 
|  | if idx < (len(matches) - 1) and highCodePoint == matches[idx + 1] - 1: | 
|  | highCodePoint = matches[idx + 1] | 
|  | del matches[idx + 1] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | break | 
|  | elif highCodePoint == match - 1: | 
|  | highCodePoint = match | 
|  | del matches[idx] | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | break | 
|  | elif highCodePoint < match: | 
|  | break | 
|  |  | 
|  | for idx in range(len(ranges)): | 
|  | cur_range = ranges[idx] | 
|  | if lowCodePoint >= cur_range[0] and highCodePoint <= cur_range[1]: | 
|  | return | 
|  | if lowCodePoint <= (cur_range[1] + 1) and highCodePoint >= (cur_range[0] - 1): | 
|  | while idx < len(ranges) and highCodePoint >= (ranges[idx][0] - 1): | 
|  | cur_range = ranges[idx] | 
|  | lowCodePoint = min(lowCodePoint, cur_range[0]) | 
|  | highCodePoint = max(highCodePoint, cur_range[1]) | 
|  | del ranges[idx] | 
|  | self.codePointCount = self.codePointCount - (cur_range[1] - cur_range[0]) - 1 | 
|  |  | 
|  | ranges.insert(idx, (lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | return | 
|  | elif highCodePoint < cur_range[0]: | 
|  | ranges.insert(idx, (lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  | return | 
|  |  | 
|  | ranges.append((lowCodePoint, highCodePoint)) | 
|  | self.codePointCount = self.codePointCount + (highCodePoint - lowCodePoint) + 1 | 
|  |  | 
|  | def addMatchUnordered(self, codePoint): | 
|  | if codePoint <= lastASCIICodePoint: | 
|  | self.addMatchUnorderedForMatchesAndRanges(codePoint, self.matches, self.ranges) | 
|  | else: | 
|  | self.addMatchUnorderedForMatchesAndRanges(codePoint, self.unicodeMatches, self.unicodeRanges) | 
|  |  | 
|  | def addRangeUnordered(self, lowCodePoint, highCodePoint): | 
|  | if highCodePoint <= lastASCIICodePoint: | 
|  | self.addRangeUnorderedForMatchesAndRanges(lowCodePoint, highCodePoint, self.matches, self.ranges) | 
|  | elif lowCodePoint >= firstUnicodeCodePoint: | 
|  | self.addRangeUnorderedForMatchesAndRanges(lowCodePoint, highCodePoint, self.unicodeMatches, self.unicodeRanges) | 
|  | else: | 
|  | if lowCodePoint == lastASCIICodePoint: | 
|  | self.addMatchUnorderedForMatchesAndRanges(lowCodePoint, self.matches, self.ranges) | 
|  | else: | 
|  | self.addRangeUnorderedForMatchesAndRanges(lowCodePoint, lastASCIICodePoint, self.unicodeMatches, self.ranges) | 
|  | if highCodePoint == firstUnicodeCodePoint: | 
|  | self.addMatchUnorderedForMatchesAndRanges(highCodePoint, self.unicodeMatches, self.unicodeRanges) | 
|  | else: | 
|  | self.addRangeUnorderedForMatchesAndRanges(firstUnicodeCodePoint, highCodePoint, self.unicodeMatches, self.unicodeRanges) | 
|  |  | 
|  | def removeMatchFromRanges(self, codePoint, ranges): | 
|  | for idx in range(len(ranges)): | 
|  | cur_range = ranges[idx] | 
|  | if cur_range[0] <= codePoint and codePoint <= cur_range[1]: | 
|  | ranges.pop(idx) | 
|  | if cur_range[0] < codePoint and codePoint < cur_range[1]: | 
|  | lowRange = (cur_range[0], codePoint - 1) | 
|  | highRange = (codePoint + 1, cur_range[1]) | 
|  | # Since list.insert inserts before the index given, handle the high range first | 
|  | if highRange[0] == highRange[1]: | 
|  | self.addMatchUnordered(highRange[0]) | 
|  | else: | 
|  | ranges.insert(idx, highRange) | 
|  | if lowRange[0] == lowRange[1]: | 
|  | self.addMatchUnordered(lowRange[0]) | 
|  | else: | 
|  | ranges.insert(idx, lowRange) | 
|  | else: | 
|  | if cur_range[0] == codePoint: | 
|  | cur_range = (codePoint + 1, cur_range[1]) | 
|  | else: | 
|  | cur_range = (cur_range[0], codePoint - 1) | 
|  | if cur_range[0] == cur_range[1]: | 
|  | self.addMatchUnordered(cur_range[0]) | 
|  | else: | 
|  | ranges.insert(idx, cur_range) | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | return | 
|  |  | 
|  | def removeMatch(self, codePoint): | 
|  | if codePoint <= lastASCIICodePoint: | 
|  | if codePoint in self.matches: | 
|  | self.matches.remove(codePoint) | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | else: | 
|  | self.removeMatchFromRanges(codePoint, self.ranges) | 
|  | else: | 
|  | if codePoint in self.unicodeMatches: | 
|  | self.unicodeMatches.remove(codePoint) | 
|  | self.codePointCount = self.codePointCount - 1 | 
|  | else: | 
|  | self.removeMatchFromRanges(codePoint, self.unicodeRanges) | 
|  |  | 
|  | def dumpMatchData(self, file, valuesPerLine, dataList, formatter): | 
|  | valuesThisLine = 0 | 
|  | firstValue = True | 
|  |  | 
|  | file.write("{") | 
|  | for elem in dataList: | 
|  | if firstValue: | 
|  | firstValue = False | 
|  | else: | 
|  | file.write(", ") | 
|  | valuesThisLine = valuesThisLine + 1 | 
|  | if valuesThisLine > valuesPerLine: | 
|  | file.write("\n                 ") | 
|  | valuesThisLine = 1 | 
|  | formatter(file, elem) | 
|  | file.write("}") | 
|  |  | 
|  | def dump(self, file, commaAfter): | 
|  | file.write("static std::unique_ptr<CharacterClass> {}()\n{{\n".format(self.getCreateFuncName())) | 
|  | file.write("    // Name = {}, number of codePoints: {}\n".format(self.name, self.codePointCount)) | 
|  | file.write("    auto characterClass = makeUnique<CharacterClass>(\n") | 
|  | file.write("        std::initializer_list<UChar32>(") | 
|  | self.dumpMatchData(file, 8, self.matches, lambda file, match: (file.write("{0:0=#4x}".format(match)))) | 
|  | file.write("),\n") | 
|  | file.write("        std::initializer_list<CharacterRange>(") | 
|  | self.dumpMatchData(file, 4, self.ranges, lambda file, range: (file.write("{{{0:0=#4x}, {1:0=#4x}}}".format(range[0], range[1])))) | 
|  | file.write("),\n") | 
|  | file.write("        std::initializer_list<UChar32>(") | 
|  | self.dumpMatchData(file, 8, self.unicodeMatches, lambda file, match: (file.write("{0:0=#6x}".format(match)))) | 
|  | file.write("),\n") | 
|  | file.write("        std::initializer_list<CharacterRange>(") | 
|  | self.dumpMatchData(file, 4, self.unicodeRanges, lambda file, range: (file.write("{{{0:0=#6x}, {1:0=#6x}}}".format(range[0], range[1])))) | 
|  | file.write("),\n") | 
|  |  | 
|  | file.write("        CharacterClassWidths::{});\n".format(("Unknown", "HasBMPChars", "HasNonBMPChars", "HasBothBMPAndNonBMP")[int(self.hasNonBMPCharacters) * 2 + int(self.hasBMPCharacters)])) | 
|  | file.write("    return characterClass;\n}\n\n") | 
|  |  | 
|  | @classmethod | 
|  | def dumpAll(cls, file): | 
|  | for propertyData in cls.allPropertyData: | 
|  | propertyData.dump(file, propertyData != cls.allPropertyData[-1]) | 
|  |  | 
|  | file.write("typedef std::unique_ptr<CharacterClass> (*CreateCharacterClass)();\n") | 
|  | file.write("static CreateCharacterClass createFunctions[{}] = {{\n   ".format(len(cls.allPropertyData))) | 
|  | functionsOnThisLine = 0 | 
|  | for propertyData in cls.allPropertyData: | 
|  | file.write(" {},".format(propertyData.getCreateFuncName())) | 
|  | functionsOnThisLine = functionsOnThisLine + 1 | 
|  | if functionsOnThisLine == 4: | 
|  | file.write("\n   ") | 
|  | functionsOnThisLine = 0 | 
|  |  | 
|  | file.write("};\n\n") | 
|  |  | 
|  | @classmethod | 
|  | def createAndDumpHashTable(self, file, propertyDict, tablePrefix): | 
|  | propertyKeys = propertyDict.keys() | 
|  | numberOfKeys = len(propertyKeys) | 
|  | hashSize = ceilingToPowerOf2(numberOfKeys * 2) | 
|  | hashMask = hashSize - 1 | 
|  | hashTable = [None] * hashSize | 
|  | valueTable = [] | 
|  | tableSize = hashSize | 
|  |  | 
|  | keyValuesToHash = [] | 
|  | for propertyName in propertyKeys: | 
|  | propertyData = propertyDict[propertyName] | 
|  | keyValuesToHash.append((propertyName, propertyData.getIndex())) | 
|  | for alias in propertyData.aliases: | 
|  | keyValuesToHash.append((alias, propertyData.getIndex())) | 
|  |  | 
|  | for keyValue in keyValuesToHash: | 
|  | key = keyValue[0] | 
|  | hash = stringHash(key) % hashSize | 
|  | while hashTable[hash] is not None: | 
|  | if hashTable[hash][1] is not None: | 
|  | hash = hashTable[hash][1] | 
|  | else: | 
|  | hashTable[hash] = (hashTable[hash][0], tableSize) | 
|  | hashTable.append(None) | 
|  | hash = tableSize | 
|  | tableSize = tableSize + 1 | 
|  |  | 
|  | hashTable[hash] = (len(valueTable), None) | 
|  | valueTable.append((key, keyValue[1])) | 
|  |  | 
|  | file.write("static const struct HashIndex {}TableIndex[{}] = {{\n".format(tablePrefix, len(hashTable))) | 
|  |  | 
|  | for tableIndex in hashTable: | 
|  | value = -1 | 
|  | next = -1 | 
|  | if tableIndex is not None: | 
|  | value = tableIndex[0] | 
|  | if tableIndex[1] is not None: | 
|  | next = tableIndex[1] | 
|  |  | 
|  | file.write("    {{ {}, {} }},\n".format(value, next)) | 
|  |  | 
|  | file.write("};\n\n") | 
|  |  | 
|  | file.write("static const struct HashValue {}TableValue[{}] = {{\n".format(tablePrefix, len(valueTable))) | 
|  | for value in valueTable: | 
|  | file.write("    {{ \"{}\", {} }},\n".format(value[0], value[1])) | 
|  | file.write("};\n\n") | 
|  |  | 
|  | file.write("static const struct HashTable {}HashTable = \n".format(tablePrefix)) | 
|  | file.write("    {{ {}, {}, {}TableValue, {}TableIndex }};\n\n".format(len(valueTable), hashMask, tablePrefix, tablePrefix)) | 
|  |  | 
|  |  | 
|  | class Scripts: | 
|  | def __init__(self): | 
|  | self.allPropertyData = [] | 
|  | self.scriptsByName = {} | 
|  | self.scriptExtensionsByName = {} | 
|  | self.unknownScript = PropertyData("Unknown") | 
|  | self.unknownScript.setAliases(aliases.scriptAliasesFor("Unknown")) | 
|  | self.allPropertyData.append(self.unknownScript) | 
|  | self.scriptsParsed = False | 
|  |  | 
|  | def parseScriptsFile(self, file): | 
|  | currentScriptName = None | 
|  | currentPropertyData = None | 
|  | # To calculate the Unknown, we gather all the codePoints asigned to a scripts as ranges, | 
|  | # sort them, and then go the list to create the inverse of the assigned ranges. | 
|  | assignedCodePointRanges = [] | 
|  |  | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | codePoints = fields[0].strip() | 
|  | scriptName = fields[1].strip() | 
|  |  | 
|  | if scriptName != currentScriptName: | 
|  | currentScriptName = scriptName | 
|  | currentPropertyData = PropertyData(scriptName) | 
|  | currentPropertyData.setAliases(aliases.scriptAliasesFor(scriptName)) | 
|  | self.allPropertyData.append(currentPropertyData) | 
|  | self.scriptsByName[scriptName] = currentPropertyData | 
|  |  | 
|  | dotDot = codePoints.find("..") | 
|  | if dotDot == -1: | 
|  | codePoint = int(codePoints, 16) | 
|  | currentPropertyData.addMatch(codePoint) | 
|  | assignedCodePointRanges.append((codePoint, codePoint)) | 
|  | else: | 
|  | lowCodePoint = int(codePoints[:dotDot], 16) | 
|  | highCodePoint = int(codePoints[dotDot + 2:], 16) | 
|  | currentPropertyData.addRange(lowCodePoint, highCodePoint) | 
|  | assignedCodePointRanges.append((lowCodePoint, highCodePoint)) | 
|  |  | 
|  | assignedCodePointRanges.sort(key=lambda range: range[0]) | 
|  | lastAssignedCodePoint = 0 | 
|  |  | 
|  | for range in assignedCodePointRanges: | 
|  | if range[0] - lastAssignedCodePoint > 1: | 
|  | if range[0] - lastAssignedCodePoint == 2: | 
|  | self.unknownScript.addMatch(lastAssignedCodePoint + 1) | 
|  | else: | 
|  | self.unknownScript.addRange(lastAssignedCodePoint + 1, range[0] - 1) | 
|  | lastAssignedCodePoint = range[1] | 
|  |  | 
|  | if lastAssignedCodePoint < MaxUnicode: | 
|  | if MaxUnicode - lastAssignedCodePoint == 1: | 
|  | self.unknownScript.addMatch(MaxUnicode) | 
|  | else: | 
|  | self.unknownScript.addRange(lastAssignedCodePoint + 1, MaxUnicode) | 
|  |  | 
|  | self.scriptsParsed = True | 
|  |  | 
|  | def parseScriptExtensionsFile(self, file): | 
|  | currentPropertyData = None | 
|  | # To calculate the Unknown, we gather all the codePoints asigned to a scripts as ranges, | 
|  | # sort them, and then go the list to create the inverse of the assigned ranges. | 
|  | assignedCodePointRanges = [] | 
|  |  | 
|  | if not self.scriptsParsed: | 
|  | print("Error: parsing ScriptExtensions.txt before Scripts.txt") | 
|  | exit(1) | 
|  |  | 
|  | commonScriptExtenstionPropertyData = None | 
|  | inheritedScriptExtensionPropertyData = None | 
|  |  | 
|  | scriptName = "Common" | 
|  | if scriptName in self.scriptsByName: | 
|  | commonScriptExtenstionPropertyData = self.scriptsByName[scriptName].makeCopy() | 
|  | else: | 
|  | commonScriptExtenstionPropertyData = PropertyData(scriptName) | 
|  | commonScriptExtenstionPropertyData.setAliases(aliases.scriptAliasesFor(scriptName)) | 
|  | self.allPropertyData.append(commonScriptExtenstionPropertyData) | 
|  | self.scriptExtensionsByName[scriptName] = commonScriptExtenstionPropertyData | 
|  |  | 
|  | scriptName = "Inherited" | 
|  | if scriptName in self.scriptsByName: | 
|  | inheritedScriptExtensionPropertyData = self.scriptsByName[scriptName].makeCopy() | 
|  | else: | 
|  | inheritedScriptExtensionPropertyData = PropertyData(scriptName) | 
|  | inheritedScriptExtensionPropertyData.setAliases(aliases.scriptAliasesFor(scriptName)) | 
|  | self.allPropertyData.append(inheritedScriptExtensionPropertyData) | 
|  | self.scriptExtensionsByName[scriptName] = inheritedScriptExtensionPropertyData | 
|  |  | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | codePoints = fields[0].strip() | 
|  | scriptAliasList = fields[1].strip() | 
|  |  | 
|  | for scriptAlias in scriptAliasList.split(' '): | 
|  | scriptName = aliases.scriptNameForAlias(scriptAlias) | 
|  | currentPropertyData = None | 
|  |  | 
|  | if scriptName not in self.scriptExtensionsByName: | 
|  | currentPropertyData = self.scriptsByName[scriptName].makeCopy() | 
|  | self.allPropertyData.append(currentPropertyData) | 
|  | self.scriptExtensionsByName[scriptName] = currentPropertyData | 
|  | else: | 
|  | currentPropertyData = self.scriptExtensionsByName[scriptName] | 
|  |  | 
|  | dotDot = codePoints.find("..") | 
|  | if dotDot == -1: | 
|  | codePoint = int(codePoints, 16) | 
|  | currentPropertyData.addMatch(codePoint) | 
|  | commonScriptExtenstionPropertyData.removeMatch(codePoint) | 
|  | inheritedScriptExtensionPropertyData.removeMatch(codePoint) | 
|  | else: | 
|  | lowCodePoint = int(codePoints[:dotDot], 16) | 
|  | highCodePoint = int(codePoints[dotDot + 2:], 16) | 
|  | currentPropertyData.addRange(lowCodePoint, highCodePoint) | 
|  | for codePoint in range(lowCodePoint, highCodePoint + 1): | 
|  | commonScriptExtenstionPropertyData.removeMatch(codePoint) | 
|  | inheritedScriptExtensionPropertyData.removeMatch(codePoint) | 
|  |  | 
|  | # For the scripts that don't have any additional extension codePoints, copy the script | 
|  | # data to the script extension with the same name | 
|  | for scriptName, propertyData in self.scriptsByName.items(): | 
|  | if scriptName not in self.scriptExtensionsByName: | 
|  | self.scriptExtensionsByName[scriptName] = propertyData | 
|  |  | 
|  | def dump(self, file): | 
|  | file.write("// Scripts:\n") | 
|  | PropertyData.createAndDumpHashTable(file, self.scriptsByName, "script") | 
|  |  | 
|  | file.write("// Script_Extensions:\n") | 
|  | PropertyData.createAndDumpHashTable(file, self.scriptExtensionsByName, "scriptExtension") | 
|  |  | 
|  |  | 
|  | class GeneralCategory: | 
|  | def __init__(self, file): | 
|  | self.file = file | 
|  | self.allPropertyData = [] | 
|  | self.propertyDataByCategory = {} | 
|  | self.createSpecialPropertyData("Any", (0, MaxUnicode)) | 
|  | self.createSpecialPropertyData("ASCII", (0, lastASCIICodePoint)) | 
|  | self.assignedPropertyData = self.createSpecialPropertyData("Assigned") | 
|  | self.unassignedProperyData = self.findPropertyGroupFor("Cn")[1] | 
|  | self.casedLetterPropertyData = self.findPropertyGroupFor("LC")[1] | 
|  | self.lastAddedCodePoint = 0 | 
|  |  | 
|  | def createSpecialPropertyData(self, name, range=None): | 
|  | propertyData = PropertyData(name) | 
|  | self.allPropertyData.append(propertyData) | 
|  | self.propertyDataByCategory[name] = propertyData | 
|  | if range: | 
|  | propertyData.addRange(range[0], range[1]) | 
|  |  | 
|  | return propertyData | 
|  |  | 
|  | def findPropertyGroupFor(self, categoryAlias): | 
|  | category = aliases.generalCategoryForAlias(categoryAlias) | 
|  | allCategoryAliases = aliases.generalCategoryAliasesFor(category) | 
|  | categoryGroupAlias = categoryAlias[0] | 
|  | categoryGroup = aliases.generalCategoryForAlias(categoryGroupAlias) | 
|  | allCategoryGroupAlias = aliases.generalCategoryAliasesFor(categoryGroup) | 
|  | groupPropertyData = None | 
|  | propertyData = None | 
|  |  | 
|  | if categoryGroup not in self.propertyDataByCategory: | 
|  | groupPropertyData = PropertyData(categoryGroup) | 
|  | groupPropertyData.setAliases(allCategoryGroupAlias) | 
|  | self.allPropertyData.append(groupPropertyData) | 
|  | self.propertyDataByCategory[categoryGroup] = groupPropertyData | 
|  | else: | 
|  | groupPropertyData = self.propertyDataByCategory[categoryGroup] | 
|  |  | 
|  | if category not in self.propertyDataByCategory: | 
|  | propertyData = PropertyData(category) | 
|  | propertyData.setAliases(allCategoryAliases) | 
|  | self.allPropertyData.append(propertyData) | 
|  | self.propertyDataByCategory[category] = propertyData | 
|  | else: | 
|  | propertyData = self.propertyDataByCategory[category] | 
|  |  | 
|  | return (groupPropertyData, propertyData) | 
|  |  | 
|  | def addNextCodePoints(self, categoryAlias, codePoint, highCodePoint=None): | 
|  | if codePoint - self.lastAddedCodePoint > 1: | 
|  | propertyDatas = self.findPropertyGroupFor("Cn") | 
|  | if codePoint - self.lastAddedCodePoint == 2: | 
|  | propertyDatas[0].addMatch(self.lastAddedCodePoint + 1) | 
|  | propertyDatas[1].addMatch(self.lastAddedCodePoint + 1) | 
|  | else: | 
|  | propertyDatas[0].addRange(self.lastAddedCodePoint + 1, codePoint - 1) | 
|  | propertyDatas[1].addRange(self.lastAddedCodePoint + 1, codePoint - 1) | 
|  |  | 
|  | propertyDatas = self.findPropertyGroupFor(categoryAlias) | 
|  | if highCodePoint: | 
|  | propertyDatas[0].addRange(codePoint, highCodePoint) | 
|  | propertyDatas[1].addRange(codePoint, highCodePoint) | 
|  | if categoryAlias == "Ll" or categoryAlias == "Lt" or categoryAlias == "Lu": | 
|  | self.casedLetterPropertyData.addRange(codePoint, highCodePoint) | 
|  | self.assignedPropertyData.addRange(codePoint, highCodePoint) | 
|  |  | 
|  | self.lastAddedCodePoint = highCodePoint | 
|  | else: | 
|  | propertyDatas[0].addMatch(codePoint) | 
|  | propertyDatas[1].addMatch(codePoint) | 
|  | if categoryAlias == "Ll" or categoryAlias == "Lt" or categoryAlias == "Lu": | 
|  | self.casedLetterPropertyData.addMatch(codePoint) | 
|  | self.assignedPropertyData.addMatch(codePoint) | 
|  |  | 
|  | self.lastAddedCodePoint = codePoint | 
|  |  | 
|  | def parse(self): | 
|  | lastLineFirstOfRange = None | 
|  | lastLineCodePoint = 0 | 
|  | for line in self.file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | codePoint = int(fields[0].strip(), 16) | 
|  | description = fields[1].strip() | 
|  | categoryAlias = fields[2].strip() | 
|  |  | 
|  | if lastLineFirstOfRange: | 
|  | if description[-5:-1] == "Last": | 
|  | self.addNextCodePoints(categoryAlias, lastLineFirstOfRange, codePoint) | 
|  | lastLineFirstOfRange = None | 
|  | continue | 
|  | else: | 
|  | print("Malformed First..Last pair of lines") | 
|  |  | 
|  | if description[-6:-1] == "First": | 
|  | lastLineFirstOfRange = codePoint | 
|  | continue | 
|  |  | 
|  | self.addNextCodePoints(categoryAlias, codePoint) | 
|  |  | 
|  | if self.lastAddedCodePoint < MaxUnicode: | 
|  | propertyDatas = self.findPropertyGroupFor("Cn") | 
|  | if MaxUnicode - self.lastAddedCodePoint == 1: | 
|  | propertyDatas[0].addMatch(MaxUnicode) | 
|  | propertyDatas[1].addMatch(MaxUnicode) | 
|  | else: | 
|  | propertyDatas[0].addRange(self.lastAddedCodePoint + 1, MaxUnicode) | 
|  | propertyDatas[1].addRange(self.lastAddedCodePoint + 1, MaxUnicode) | 
|  |  | 
|  | def dump(self, file): | 
|  | file.write("// General_Category:\n") | 
|  | PropertyData.createAndDumpHashTable(file, self.propertyDataByCategory, "generalCategory") | 
|  |  | 
|  |  | 
|  | class BinaryProperty: | 
|  | def __init__(self): | 
|  | self.allPropertyData = [] | 
|  | self.propertyDataByProperty = {} | 
|  |  | 
|  | def parsePropertyFile(self, file): | 
|  | currentPropertyName = None | 
|  | currentPropertyData = None | 
|  |  | 
|  | for line in file: | 
|  | line = line.split('#', 1)[0] | 
|  | line = line.rstrip() | 
|  | if (not len(line)): | 
|  | continue | 
|  |  | 
|  | fields = line.split(';') | 
|  | if (not fields): | 
|  | continue | 
|  |  | 
|  | codePoints = fields[0].strip() | 
|  | propertyName = fields[1].strip() | 
|  |  | 
|  | if propertyName != currentPropertyName: | 
|  | if propertyName not in SupportedBinaryProperties: | 
|  | continue | 
|  |  | 
|  | currentPropertyName = propertyName | 
|  | currentPropertyData = PropertyData(propertyName) | 
|  | currentPropertyData.setAliases(aliases.globalAliasesFor(propertyName)) | 
|  | self.allPropertyData.append(currentPropertyData) | 
|  | self.propertyDataByProperty[propertyName] = currentPropertyData | 
|  |  | 
|  | dotDot = codePoints.find("..") | 
|  | if dotDot == -1: | 
|  | currentPropertyData.addMatch(int(codePoints, 16)) | 
|  | else: | 
|  | currentPropertyData.addRange(int(codePoints[:dotDot], 16), int(codePoints[dotDot + 2:], 16)) | 
|  |  | 
|  | def dump(self, file): | 
|  | file.write("// binary properties:\n") | 
|  | PropertyData.createAndDumpHashTable(file, self.propertyDataByProperty, "binaryProperty") | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | parser = optparse.OptionParser(usage="usage: %prog <UCD-Directory> <YarrUnicodePropertyData.h>") | 
|  | (options, args) = parser.parse_args() | 
|  |  | 
|  | if len(args) != 2: | 
|  | parser.error("<UCD-Directory> <YarrUnicodePropertyData.h>") | 
|  |  | 
|  | UCDDirectoryPath = args[0] | 
|  | unicodeProertyDataHPath = args[1] | 
|  |  | 
|  | verifyUCDFilesExist() | 
|  |  | 
|  | propertyAliasesFile = openUCDFileOrExit("PropertyAliases.txt") | 
|  | propertyValueAliasesFile = openUCDFileOrExit("PropertyValueAliases.txt") | 
|  | scriptsFile = openUCDFileOrExit("Scripts.txt") | 
|  | scriptExtensionsFile = openUCDFileOrExit("ScriptExtensions.txt") | 
|  | unicodeDataFile = openUCDFileOrExit("UnicodeData.txt") | 
|  | derivedBinaryPropertiesFile = openUCDFileOrExit("DerivedBinaryProperties.txt") | 
|  | derivedCorePropertiesFile = openUCDFileOrExit("DerivedCoreProperties.txt") | 
|  | derivedNormalizationPropertiesFile = openUCDFileOrExit("DerivedNormalizationProps.txt") | 
|  | propListFile = openUCDFileOrExit("PropList.txt") | 
|  | emojiDataFile = openUCDFileOrExit("emoji-data.txt") | 
|  |  | 
|  | aliases = Aliases() | 
|  |  | 
|  | propertyDataHFile = openOrExit(unicodeProertyDataHPath, "w") | 
|  |  | 
|  | propertyDataHFile.write(header) | 
|  |  | 
|  | aliases.parsePropertyAliasesFile(propertyAliasesFile) | 
|  | aliases.parsePropertyValueAliasesFile(propertyValueAliasesFile) | 
|  |  | 
|  | generalCategory = GeneralCategory(unicodeDataFile) | 
|  | generalCategory.parse() | 
|  |  | 
|  | binaryProperty = BinaryProperty() | 
|  | binaryProperty.parsePropertyFile(derivedBinaryPropertiesFile) | 
|  | binaryProperty.parsePropertyFile(derivedCorePropertiesFile) | 
|  | binaryProperty.parsePropertyFile(derivedNormalizationPropertiesFile) | 
|  | binaryProperty.parsePropertyFile(propListFile) | 
|  | binaryProperty.parsePropertyFile(emojiDataFile) | 
|  |  | 
|  | scripts = Scripts() | 
|  | scripts.parseScriptsFile(scriptsFile) | 
|  | scripts.parseScriptExtensionsFile(scriptExtensionsFile) | 
|  |  | 
|  | PropertyData.dumpAll(propertyDataHFile) | 
|  | generalCategory.dump(propertyDataHFile) | 
|  | binaryProperty.dump(propertyDataHFile) | 
|  | scripts.dump(propertyDataHFile) | 
|  |  | 
|  | propertyDataHFile.write(footer) | 
|  |  | 
|  | exit(0) |