chrome/browser/resources/chromeos/emoji_picker/tools/symbol_data.py - chromium/src - Git at Google

 #!/usr/bin/env python3
 # Copyright 2022 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import argparse
 import dataclasses
 import json
 import logging
 import os
 import sys
 import unicodedata
 from typing import Any, Dict, Generator, List, Optional, Sequence, Set, Tuple

 # Add extra dependencies to the python path.
 _SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
 _CHROME_SOURCE = os.path.realpath(
     os.path.join(_SCRIPT_DIR, *[os.path.pardir] * 6))
 sys.path.append(os.path.join(_CHROME_SOURCE, 'build/android/gyp'))

 from util import build_utils

 # Initialize logger.
 logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
 LOGGER = logging.getLogger(__name__)


 # List of unicode ranges for each symbol group (ranges are inclusive).
 SYMBOLS_GROUPS = {
     'Arrows': [
         # Arrows Unicode Block.
         (0x2190, 0x21ff),
         # Supplemental Arrows-C Unicode Block.
         # Note: There are unassigned code points in the block which are
         # automatically skipped by the script.
         (0x1f800, 0x1f8ff),
     ],
     'Bullet/Stars': [
         # Some rows from Miscellaneous Symbols and Arrows Unicode block.
         (0x2b20, 0x2b2f),
         (0x2b50, 0x2b5f),
         (0x2b90, 0x2b9f),
         (0x2bb0, 0x2bbf),
         (0x2bc0, 0x2bcf),
     ],
     'Currency': [
         # Currency Unicode Block.
         (0x20a0, 0x20bf),
     ],
     'Letterlike': [
         # Letterlike Symbols Unicode Block.
         (0x2100, 0x210f),
     ],
     'Math': [
         # Greek Letters and Symbols from Mathematical and Alphanumeric
         # Symbols Unicode Block.
         # Normal Capital Letters.
         (0x0391, 0x0391 + 25),
         # Normal Small Letters.
         (0x03b1, 0x03b1 + 25),
         # Mathematical Operators
         (0x2200, 0x2235),
         (0x2260, 0x228b),
     ],
     'Miscellaneous': [
         # Miscellaneous Symbols Unicode Block.
         (0x2600, 0x26ff)
     ],
 }

 # List of unicode ranges (inclusive) for each search only symbol group.
 SEARCH_ONLY_SYMBOLS_GROUPS = {
     'Letterlike': [
         # Letterlike Symbols Unicode Block.
         (0x2110, 0x214f),
     ],
     'Math': [
         # Greek Letters and Symbols from Mathematical and Alphanumeric
         # Symbols Unicode Block.
         # Bold Capital Letters.
         (0x1D6A8, 0x1D6A8 + 25),
         # Italic Capital Letters.
         (0x1D6E2, 0x1D6E2 + 25),
         # Bold-Italic Capital Letters.
         (0x1D71C, 0x1D71C + 25),
         # Mathematical Operators
         (0x2236, 0x225f),
         (0x228c, 0x22df),
     ],
     'Miscellaneous': [
         # Miscellaneous Symbols Unicode Block.
         (0x2300, 0x23cf)
     ],
 }


 # Set of unicode symbols that do not render with fonts available on ChromeOS
 INVALID_SYMBOLS = set([
     '\u2BBA',
     '\u2BBB',
     '\u2BBC',
     '\u2B97',
     '\u2BC9',
     '\U0001F8B0',
     '\U0001F8B1',
 ])


 @dataclasses.dataclass
 class EmojiPickerChar:
     """A type representing a single character in EmojiPicker."""
     # Unicode character.
     string: str
     # Name of the unicode character.
     name: str
     # Search keywords related to the unicode character.
     keywords: List[str] = dataclasses.field(
         default_factory=list)


 @dataclasses.dataclass
 class EmojiPickerEmoji:
     """A type representing an emoji/emoticon/symbol in EmojiPicker."""
     # Base Emoji.
     base: EmojiPickerChar
     # Base Emoji's variants and alternative emojis.
     alternates: List[EmojiPickerChar] = dataclasses.field(
         default_factory=list)


 @dataclasses.dataclass
 class EmojiPickerGroup:
     """A type representing a group of emoji/emoticon/symbols."""
     # Name of the group.
     group: str
     # List of the emojis in the group.
     emoji: List[EmojiPickerEmoji]
     # Determines If the group is search-only.
     search_only: bool = False


 def _convert_snake_case_to_camel_case(snake_case_input: str) -> str:
     """Converts an snake-case string to camel-case.

     Args:
         snake_case_input: String that is snake case.

     Returns:
         An string that is camel-case version of input.

     """
     words = snake_case_input.split('_')
     return words[0] + ''.join(word.title() for word in words[1:])


 def _emoji_data_dict_factory(
         data: Sequence[Tuple[str, Any]]) -> Dict[str, Any]:
     """Implements a dictionary factory for emoji data preparation.

     This factory skips empty keys with empty value. It also converts snake-case
     keys to camel-case.

     Args:
         data: A sequence of (key, value) pairs

     Returns:
         A dictionary created from the input sequence where keys with an empty
             list value are ignored and keys are converted to camel-case.
     """
     return {
         _convert_snake_case_to_camel_case(key): value
         for (key, value) in data
         if not isinstance(value, list) or value
     }


 def _load_emoji_characters_from_files(data_paths: List[str]) -> Set[str]:
     """Loads a set of emoji characters from a list of data file paths.

     Args:
         data_paths: A list of emoji data files.

     Returns:
         The set of emoji unicode characters read from the data.
     """
     emoji_character_set = set()
     for data_path in data_paths:
         with open(data_path, 'r') as data_file:
             emoji_groups = json.load(data_file)
             file_character_set = {
                 emoji['base']['string']
                 for emoji_group in emoji_groups
                 for emoji in emoji_group['emoji']
             }
             emoji_character_set.update(file_character_set)
     return emoji_character_set


 def _convert_unicode_ranges_to_emoji_chars(
         unicode_ranges: List[Tuple[int, int]],
         ignore_errors: bool = True) -> Generator[EmojiPickerChar, None, None]:
     """Converts unicode ranges to `EmojiPickerChar` instances.

     Given a list of unicode ranges, it iterates over all characters in all the
     ranges and creates and yields an instance of `EmojiPickerChar` for each
     one.

     Args:
         unicode_ranges: A list of unicode ranges.
         ignore_errors: If True, any exceptions raised during processing
             unicode characters is silently ignored.

     Raises:
         ValueError: If a unicode character does not exist in the data source
             and `ignore_errors` is true, an exception is raised.

     Yields:
         The converted version of each unicode character in the input ranges.
     """

     LOGGER.info(
         'generating EmojiPickerChar instances for ranges: [%s].',
         ', '.join(
             '(U+{:02x}, U+{:02x})'.format(*rng)
             for rng in unicode_ranges))

     num_chars = 0
     num_ignored = 0

     # Iterate over the input unicode ranges.
     for (start_code_point, end_code_point) in unicode_ranges:
         LOGGER.debug(
             'generating EmojiPickerChar instances '
             'for range (U+%02x to U+%02x).',
             start_code_point,
             end_code_point)

         num_chars += end_code_point + 1 - start_code_point
         # Iterate over all code points in the range.
         for code_point in range(start_code_point, end_code_point + 1):
             try:
                 # For the current code point, create the corresponding
                 # character and lookup its name in the unicodedata. Then,
                 # create an instance of  `EmojiPickerChar` from the data.
                 unicode_character = chr(code_point)
                 yield EmojiPickerChar(
                     string=unicode_character,
                     name=unicodedata.name(unicode_character).lower())
             except ValueError:
                 # If ignore_errors is False, raise the exception.
                 if not ignore_errors:
                     raise
                 else:
                     num_ignored += 1
                     LOGGER.warning(
                         'invalid code point U+%02x.', code_point)

     LOGGER.info(
         'stats: #returned instances: %d, #ignored code points: %d',
         num_chars,
         num_ignored)


 def get_symbols_groups(
         group_unicode_ranges: Dict[str, List[Tuple[int, int]]],
         search_only: bool = False, ignore_errors: bool = True,
         filter_set: Optional[Set[str]] = None) -> List[EmojiPickerGroup]:
     """Creates symbols data from predefined groups and their unicode ranges.

     Args:
         group_unicode_ranges: A base mapping of group names to unicode ranges.
         search_only: If True, the group is considered search-only.
         ignore_errors: If True, any exceptions raised during processing
             unicode characters is silently ignored.
         filter_set: If not None, the characters that exist in this set are
             excluded from output symbol groups.

     Raises:
         ValueError: If a unicode character does not exist in the data source
             and `ignore_errors` is true, the exception is raised.
     """

     emoji_groups = list()
     for (group_name, unicode_ranges) in group_unicode_ranges.items():
         LOGGER.info('generating symbols for group %s.', group_name)
         emoji_chars = _convert_unicode_ranges_to_emoji_chars(
             unicode_ranges, ignore_errors=ignore_errors)
         emoji = [
             EmojiPickerEmoji(base=emoji_char)
             for emoji_char in emoji_chars
             if filter_set is None or emoji_char.string not in filter_set]

         emoji_group = EmojiPickerGroup(
             group=group_name, emoji=emoji, search_only=search_only)
         emoji_groups.append(emoji_group)
     return emoji_groups


 def main(argv: List[str]) -> None:
     parser = argparse.ArgumentParser()
     parser.add_argument(
         '--output', required=True, type=str,
         help='Path to write the output JSON file.')
     parser.add_argument(
         '--verbose', required=False, default=False,
         action='store_true',
         help="Set the logging level to Debug.")
     parser.add_argument(
         '--filter-data-paths', action='append', nargs='+')

     args = parser.parse_args(argv)

     if args.verbose:
         LOGGER.setLevel(level=logging.DEBUG)

     # Flatten list of data paths if any.
     filter_data_paths = list()
     if args.filter_data_paths is not None:
         for data_path_element in args.filter_data_paths:
             filter_data_paths.extend(data_path_element)

     # Loads a list of other emoji characters that must be
     # excluded from symbols.
     filter_set = _load_emoji_characters_from_files(
         data_paths=filter_data_paths)

     # Explicitly remove individual symbols that don't render on ChromeOS
     filter_set |= INVALID_SYMBOLS

     # Add symbol groups.
     symbols_groups = get_symbols_groups(
         group_unicode_ranges=SYMBOLS_GROUPS,
         filter_set=filter_set,
         search_only=False)

     # Add search-only symbol groups.
     symbols_groups.extend(
         get_symbols_groups(
             group_unicode_ranges=SEARCH_ONLY_SYMBOLS_GROUPS,
             filter_set=filter_set,
             search_only=True)
     )

     # Create the data and convert them to dict.
     symbols_groups_dicts = []
     for symbol_group in symbols_groups:
         symbol_group_dict = dataclasses.asdict(
             symbol_group,
             dict_factory=_emoji_data_dict_factory)
         symbols_groups_dicts.append(symbol_group_dict)

     # Write the result to output path as json file.
     with build_utils.AtomicOutput(args.output) as tmp_file:
         tmp_file.write(
             json.dumps(
                 symbols_groups_dicts,
                 separators=(',', ':'),
                 ensure_ascii=False).encode('utf-8'))


 if __name__ == "__main__":
     main(sys.argv[1:])
	#!/usr/bin/env python3
	# Copyright 2022 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import argparse
	import dataclasses
	import json
	import logging
	import os
	import sys
	import unicodedata
	from typing import Any, Dict, Generator, List, Optional, Sequence, Set, Tuple

	# Add extra dependencies to the python path.
	_SCRIPT_DIR = os.path.realpath(os.path.dirname(__file__))
	_CHROME_SOURCE = os.path.realpath(
	os.path.join(_SCRIPT_DIR, [os.path.pardir] 6))
	sys.path.append(os.path.join(_CHROME_SOURCE, 'build/android/gyp'))

	from util import build_utils

	# Initialize logger.
	logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
	LOGGER = logging.getLogger(__name__)


	# List of unicode ranges for each symbol group (ranges are inclusive).
	SYMBOLS_GROUPS = {
	'Arrows': [
	# Arrows Unicode Block.
	(0x2190, 0x21ff),
	# Supplemental Arrows-C Unicode Block.
	# Note: There are unassigned code points in the block which are
	# automatically skipped by the script.
	(0x1f800, 0x1f8ff),
	],
	'Bullet/Stars': [
	# Some rows from Miscellaneous Symbols and Arrows Unicode block.
	(0x2b20, 0x2b2f),
	(0x2b50, 0x2b5f),
	(0x2b90, 0x2b9f),
	(0x2bb0, 0x2bbf),
	(0x2bc0, 0x2bcf),
	],
	'Currency': [
	# Currency Unicode Block.
	(0x20a0, 0x20bf),
	],
	'Letterlike': [
	# Letterlike Symbols Unicode Block.
	(0x2100, 0x210f),
	],
	'Math': [
	# Greek Letters and Symbols from Mathematical and Alphanumeric
	# Symbols Unicode Block.
	# Normal Capital Letters.
	(0x0391, 0x0391 + 25),
	# Normal Small Letters.
	(0x03b1, 0x03b1 + 25),
	# Mathematical Operators
	(0x2200, 0x2235),
	(0x2260, 0x228b),
	],
	'Miscellaneous': [
	# Miscellaneous Symbols Unicode Block.
	(0x2600, 0x26ff)
	],
	}

	# List of unicode ranges (inclusive) for each search only symbol group.
	SEARCH_ONLY_SYMBOLS_GROUPS = {
	'Letterlike': [
	# Letterlike Symbols Unicode Block.
	(0x2110, 0x214f),
	],
	'Math': [
	# Greek Letters and Symbols from Mathematical and Alphanumeric
	# Symbols Unicode Block.
	# Bold Capital Letters.
	(0x1D6A8, 0x1D6A8 + 25),
	# Italic Capital Letters.
	(0x1D6E2, 0x1D6E2 + 25),
	# Bold-Italic Capital Letters.
	(0x1D71C, 0x1D71C + 25),
	# Mathematical Operators
	(0x2236, 0x225f),
	(0x228c, 0x22df),
	],
	'Miscellaneous': [
	# Miscellaneous Symbols Unicode Block.
	(0x2300, 0x23cf)
	],
	}


	# Set of unicode symbols that do not render with fonts available on ChromeOS
	INVALID_SYMBOLS = set([
	'\u2BBA',
	'\u2BBB',
	'\u2BBC',
	'\u2B97',
	'\u2BC9',
	'\U0001F8B0',
	'\U0001F8B1',
	])


	@dataclasses.dataclass
	class EmojiPickerChar:
	"""A type representing a single character in EmojiPicker."""
	# Unicode character.
	string: str
	# Name of the unicode character.
	name: str
	# Search keywords related to the unicode character.
	keywords: List[str] = dataclasses.field(
	default_factory=list)


	@dataclasses.dataclass
	class EmojiPickerEmoji:
	"""A type representing an emoji/emoticon/symbol in EmojiPicker."""
	# Base Emoji.
	base: EmojiPickerChar
	# Base Emoji's variants and alternative emojis.
	alternates: List[EmojiPickerChar] = dataclasses.field(
	default_factory=list)


	@dataclasses.dataclass
	class EmojiPickerGroup:
	"""A type representing a group of emoji/emoticon/symbols."""
	# Name of the group.
	group: str
	# List of the emojis in the group.
	emoji: List[EmojiPickerEmoji]
	# Determines If the group is search-only.
	search_only: bool = False


	def _convert_snake_case_to_camel_case(snake_case_input: str) -> str:
	"""Converts an snake-case string to camel-case.

	Args:
	snake_case_input: String that is snake case.

	Returns:
	An string that is camel-case version of input.

	"""
	words = snake_case_input.split('_')
	return words[0] + ''.join(word.title() for word in words[1:])


	def _emoji_data_dict_factory(
	data: Sequence[Tuple[str, Any]]) -> Dict[str, Any]:
	"""Implements a dictionary factory for emoji data preparation.

	This factory skips empty keys with empty value. It also converts snake-case
	keys to camel-case.

	Args:
	data: A sequence of (key, value) pairs

	Returns:
	A dictionary created from the input sequence where keys with an empty
	list value are ignored and keys are converted to camel-case.
	"""
	return {
	_convert_snake_case_to_camel_case(key): value
	for (key, value) in data
	if not isinstance(value, list) or value
	}


	def _load_emoji_characters_from_files(data_paths: List[str]) -> Set[str]:
	"""Loads a set of emoji characters from a list of data file paths.

	Args:
	data_paths: A list of emoji data files.

	Returns:
	The set of emoji unicode characters read from the data.
	"""
	emoji_character_set = set()
	for data_path in data_paths:
	with open(data_path, 'r') as data_file:
	emoji_groups = json.load(data_file)
	file_character_set = {
	emoji['base']['string']
	for emoji_group in emoji_groups
	for emoji in emoji_group['emoji']
	}
	emoji_character_set.update(file_character_set)
	return emoji_character_set


	def _convert_unicode_ranges_to_emoji_chars(
	unicode_ranges: List[Tuple[int, int]],
	ignore_errors: bool = True) -> Generator[EmojiPickerChar, None, None]:
	"""Converts unicode ranges to `EmojiPickerChar` instances.

	Given a list of unicode ranges, it iterates over all characters in all the
	ranges and creates and yields an instance of `EmojiPickerChar` for each
	one.

	Args:
	unicode_ranges: A list of unicode ranges.
	ignore_errors: If True, any exceptions raised during processing
	unicode characters is silently ignored.

	Raises:
	ValueError: If a unicode character does not exist in the data source
	and `ignore_errors` is true, an exception is raised.

	Yields:
	The converted version of each unicode character in the input ranges.
	"""

	LOGGER.info(
	'generating EmojiPickerChar instances for ranges: [%s].',
	', '.join(
	'(U+{:02x}, U+{:02x})'.format(*rng)
	for rng in unicode_ranges))

	num_chars = 0
	num_ignored = 0

	# Iterate over the input unicode ranges.
	for (start_code_point, end_code_point) in unicode_ranges:
	LOGGER.debug(
	'generating EmojiPickerChar instances '
	'for range (U+%02x to U+%02x).',
	start_code_point,
	end_code_point)

	num_chars += end_code_point + 1 - start_code_point
	# Iterate over all code points in the range.
	for code_point in range(start_code_point, end_code_point + 1):
	try:
	# For the current code point, create the corresponding
	# character and lookup its name in the unicodedata. Then,
	# create an instance of `EmojiPickerChar` from the data.
	unicode_character = chr(code_point)
	yield EmojiPickerChar(
	string=unicode_character,
	name=unicodedata.name(unicode_character).lower())
	except ValueError:
	# If ignore_errors is False, raise the exception.
	if not ignore_errors:
	raise
	else:
	num_ignored += 1
	LOGGER.warning(
	'invalid code point U+%02x.', code_point)

	LOGGER.info(
	'stats: #returned instances: %d, #ignored code points: %d',
	num_chars,
	num_ignored)


	def get_symbols_groups(
	group_unicode_ranges: Dict[str, List[Tuple[int, int]]],
	search_only: bool = False, ignore_errors: bool = True,
	filter_set: Optional[Set[str]] = None) -> List[EmojiPickerGroup]:
	"""Creates symbols data from predefined groups and their unicode ranges.

	Args:
	group_unicode_ranges: A base mapping of group names to unicode ranges.
	search_only: If True, the group is considered search-only.
	ignore_errors: If True, any exceptions raised during processing
	unicode characters is silently ignored.
	filter_set: If not None, the characters that exist in this set are
	excluded from output symbol groups.

	Raises:
	ValueError: If a unicode character does not exist in the data source
	and `ignore_errors` is true, the exception is raised.
	"""

	emoji_groups = list()
	for (group_name, unicode_ranges) in group_unicode_ranges.items():
	LOGGER.info('generating symbols for group %s.', group_name)
	emoji_chars = _convert_unicode_ranges_to_emoji_chars(
	unicode_ranges, ignore_errors=ignore_errors)
	emoji = [
	EmojiPickerEmoji(base=emoji_char)
	for emoji_char in emoji_chars
	if filter_set is None or emoji_char.string not in filter_set]

	emoji_group = EmojiPickerGroup(
	group=group_name, emoji=emoji, search_only=search_only)
	emoji_groups.append(emoji_group)
	return emoji_groups


	def main(argv: List[str]) -> None:
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--output', required=True, type=str,
	help='Path to write the output JSON file.')
	parser.add_argument(
	'--verbose', required=False, default=False,
	action='store_true',
	help="Set the logging level to Debug.")
	parser.add_argument(
	'--filter-data-paths', action='append', nargs='+')

	args = parser.parse_args(argv)

	if args.verbose:
	LOGGER.setLevel(level=logging.DEBUG)

	# Flatten list of data paths if any.
	filter_data_paths = list()
	if args.filter_data_paths is not None:
	for data_path_element in args.filter_data_paths:
	filter_data_paths.extend(data_path_element)

	# Loads a list of other emoji characters that must be
	# excluded from symbols.
	filter_set = _load_emoji_characters_from_files(
	data_paths=filter_data_paths)

	# Explicitly remove individual symbols that don't render on ChromeOS
	filter_set \|= INVALID_SYMBOLS

	# Add symbol groups.
	symbols_groups = get_symbols_groups(
	group_unicode_ranges=SYMBOLS_GROUPS,
	filter_set=filter_set,
	search_only=False)

	# Add search-only symbol groups.
	symbols_groups.extend(
	get_symbols_groups(
	group_unicode_ranges=SEARCH_ONLY_SYMBOLS_GROUPS,
	filter_set=filter_set,
	search_only=True)
	)

	# Create the data and convert them to dict.
	symbols_groups_dicts = []
	for symbol_group in symbols_groups:
	symbol_group_dict = dataclasses.asdict(
	symbol_group,
	dict_factory=_emoji_data_dict_factory)
	symbols_groups_dicts.append(symbol_group_dict)

	# Write the result to output path as json file.
	with build_utils.AtomicOutput(args.output) as tmp_file:
	tmp_file.write(
	json.dumps(
	symbols_groups_dicts,
	separators=(',', ':'),
	ensure_ascii=False).encode('utf-8'))


	if __name__ == "__main__":
	main(sys.argv[1:])