source/data/buildtool/__main__.py - chromium/deps/icu.git - Git at Google

 # Copyright (C) 2018 and later: Unicode, Inc. and others.
 # License & terms of use: http://www.unicode.org/copyright.html

 # Python 2/3 Compatibility (ICU-20299)
 # TODO(ICU-20301): Remove this.
 from __future__ import print_function

 import argparse
 import glob as pyglob
 import json
 import os
 import sys

 from . import *
 from .comment_stripper import CommentStripper
 from .request_types import CopyRequest
 from .renderers import makefile, unix_exec, windows_exec
 from . import filtration, utils
 import BUILDRULES

 flag_parser = argparse.ArgumentParser(
     description = """Generates rules for building ICU binary data files from text
 and other input files in source control.

 Use the --mode option to declare how to execute those rules, either exporting
 the rules to a Makefile or spawning child processes to run them immediately:

   --mode=gnumake prints a Makefile to standard out.
   --mode=unix-exec spawns child processes in a Unix-like environment.
   --mode=windows-exec spawns child processes in a Windows-like environment.

 Tips for --mode=unix-exec
 =========================

 Create two empty directories for out_dir and tmp_dir. They will get filled
 with a lot of intermediate files.

 Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source:

   $ LD_LIBRARY_PATH=lib PYTHONPATH=data python3 -m buildtool ...

 Once buildtool finishes, you have compiled the data, but you have not packaged
 it into a .dat or .so file. This is done by the separate pkgdata tool in bin.
 Read the docs of pkgdata:

   $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help

 Example command line to call pkgdata:

   $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\
       -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst

 where $OUTDIR and $TMPDIR are your out and tmp directories, respectively.
 The above command will create icudt63l.dat in the tmpdir.

 Command-Line Arguments
 ======================
 """,
     formatter_class = argparse.RawDescriptionHelpFormatter
 )

 arg_group_required = flag_parser.add_argument_group("required arguments")
 arg_group_required.add_argument(
     "--mode",
     help = "What to do with the generated rules.",
     choices = ["gnumake", "unix-exec", "windows-exec"],
     required = True
 )

 flag_parser.add_argument(
     "--src_dir",
     help = "Path to data source folder (icu4c/source/data).",
     default = "."
 )
 flag_parser.add_argument(
     "--filter_file",
     metavar = "PATH",
     help = "Path to an ICU data filter JSON file.",
     default = None
 )
 flag_parser.add_argument(
     "--include_uni_core_data",
     help = "Include the full Unicode core data in the dat file.",
     default = False,
     action = "store_true"
 )
 flag_parser.add_argument(
     "--seqmode",
     help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
     choices = ["sequential", "parallel"],
     default = "sequential"
 )

 arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes")
 arg_group_exec.add_argument(
     "--out_dir",
     help = "Path to where to save output data files.",
     default = "icudata"
 )
 arg_group_exec.add_argument(
     "--tmp_dir",
     help = "Path to where to save temporary files.",
     default = "icutmp"
 )
 arg_group_exec.add_argument(
     "--tool_dir",
     help = "Path to where to find binary tools (genrb, etc).",
     default = "../bin"
 )
 arg_group_exec.add_argument(
     "--tool_cfg",
     help = "The build configuration of the tools. Used in 'windows-exec' mode only.",
     default = "x86/Debug"
 )


 class Config(object):

     def __init__(self, args):
         # Process arguments
         self.max_parallel = (args.seqmode == "parallel")

         # Boolean: Whether to include core Unicode data files in the .dat file
         self.include_uni_core_data = args.include_uni_core_data

         # Default fields before processing filter file
         self.filters_json_data = {}

         # Process filter file
         if args.filter_file:
             try:
                 with open(args.filter_file, "r") as f:
                     print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
                     self._parse_filter_file(f)
             except IOError:
                 print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
                 exit(1)

         # Either "unihan" or "implicithan"
         self.coll_han_type = "unihan"
         if "collationUCAData" in self.filters_json_data:
             self.coll_han_type = self.filters_json_data["collationUCAData"]

     def _parse_filter_file(self, f):
         # Use the Hjson parser if it is available; otherwise, use vanilla JSON.
         try:
             import hjson
             self.filters_json_data = hjson.load(f)
         except ImportError:
             self.filters_json_data = json.load(CommentStripper(f))

         # Optionally pre-validate the JSON schema before further processing.
         # Some schema errors will be caught later, but this step ensures
         # maximal validity.
         try:
             import jsonschema
             schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
             with open(schema_path) as schema_f:
                 schema = json.load(CommentStripper(schema_f))
             validator = jsonschema.Draft4Validator(schema)
             for error in validator.iter_errors(self.filters_json_data, schema):
                 print("WARNING: ICU data filter JSON file:", error.message,
                     "at", "".join(
                         "[%d]" % part if isinstance(part, int) else ".%s" % part
                         for part in error.absolute_path
                     ),
                     file=sys.stderr)
         except ImportError:
             print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
             pass


 def add_copy_input_requests(requests, config, common_vars):
     files_to_copy = set()
     for request in requests:
         for f in request.all_input_files():
             if isinstance(f, InFile):
                 files_to_copy.add(f)

     result = []
     id = 0

     json_data = config.filters_json_data["fileReplacements"]
     dirname = json_data["directory"]
     for directive in json_data["replacements"]:
         input_file = LocalFile(dirname, directive["src"])
         output_file = InFile(directive["dest"])
         result += [
             CopyRequest(
                 name = "input_copy_%d" % id,
                 input_file = input_file,
                 output_file = output_file
             )
         ]
         files_to_copy.remove(output_file)
         id += 1

     for f in files_to_copy:
         result += [
             CopyRequest(
                 name = "input_copy_%d" % id,
                 input_file = SrcFile(f.filename),
                 output_file = f
             )
         ]
         id += 1

     result += requests
     return result


 def main():
     args = flag_parser.parse_args()
     config = Config(args)

     if args.mode == "gnumake":
         makefile_vars = {
             "SRC_DIR": "$(srcdir)",
             "IN_DIR": "$(srcdir)",
             "INDEX_NAME": "res_index"
         }
         makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"]
         common = {
             key: "$(%s)" % key
             for key in list(makefile_vars.keys()) + makefile_env
         }
         common["GLOB_DIR"] = args.src_dir
     else:
         common = {
             # GLOB_DIR is used now, whereas IN_DIR is used during execution phase.
             # There is no useful distinction in unix-exec or windows-exec mode.
             "GLOB_DIR": args.src_dir,
             "SRC_DIR": args.src_dir,
             "IN_DIR": args.src_dir,
             "OUT_DIR": args.out_dir,
             "TMP_DIR": args.tmp_dir,
             "INDEX_NAME": "res_index",
             # TODO: Pull this from configure script:
             "ICUDATA_CHAR": "l"
         }

     def glob(pattern):
         result_paths = pyglob.glob("{GLOB_DIR}/{PATTERN}".format(
             GLOB_DIR = args.src_dir,
             PATTERN = pattern
         ))
         # For the purposes of buildtool, force Unix-style directory separators.
         return [v.replace("\\", "/")[len(args.src_dir)+1:] for v in sorted(result_paths)]

     requests = BUILDRULES.generate(config, glob, common)
     requests = filtration.apply_filters(requests, config)
     requests = utils.flatten_requests(requests, config, common)

     if "fileReplacements" in config.filters_json_data:
         tmp_in_dir = "{TMP_DIR}/in".format(**common)
         if makefile_vars:
             makefile_vars["IN_DIR"] = tmp_in_dir
         else:
             common["IN_DIR"] = tmp_in_dir
         requests = add_copy_input_requests(requests, config, common)

     build_dirs = utils.compute_directories(requests)

     if args.mode == "gnumake":
         print(makefile.get_gnumake_rules(
             build_dirs,
             requests,
             makefile_vars,
             common_vars = common
         ))
     elif args.mode == "windows-exec":
         return windows_exec.run(
             build_dirs = build_dirs,
             requests = requests,
             common_vars = common,
             tool_dir = args.tool_dir,
             tool_cfg = args.tool_cfg
         )
     elif args.mode == "unix-exec":
         return unix_exec.run(
             build_dirs = build_dirs,
             requests = requests,
             common_vars = common,
             tool_dir = args.tool_dir
         )
     else:
         print("Mode not supported: %s" % args.mode)
         return 1
     return 0

 if __name__ == "__main__":
     exit(main())
	# Copyright (C) 2018 and later: Unicode, Inc. and others.
	# License & terms of use: http://www.unicode.org/copyright.html

	# Python 2/3 Compatibility (ICU-20299)
	# TODO(ICU-20301): Remove this.
	from __future__ import print_function

	import argparse
	import glob as pyglob
	import json
	import os
	import sys

	from . import *
	from .comment_stripper import CommentStripper
	from .request_types import CopyRequest
	from .renderers import makefile, unix_exec, windows_exec
	from . import filtration, utils
	import BUILDRULES

	flag_parser = argparse.ArgumentParser(
	description = """Generates rules for building ICU binary data files from text
	and other input files in source control.

	Use the --mode option to declare how to execute those rules, either exporting
	the rules to a Makefile or spawning child processes to run them immediately:

	--mode=gnumake prints a Makefile to standard out.
	--mode=unix-exec spawns child processes in a Unix-like environment.
	--mode=windows-exec spawns child processes in a Windows-like environment.

	Tips for --mode=unix-exec
	=========================

	Create two empty directories for out_dir and tmp_dir. They will get filled
	with a lot of intermediate files.

	Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source:

	$ LD_LIBRARY_PATH=lib PYTHONPATH=data python3 -m buildtool ...

	Once buildtool finishes, you have compiled the data, but you have not packaged
	it into a .dat or .so file. This is done by the separate pkgdata tool in bin.
	Read the docs of pkgdata:

	$ LD_LIBRARY_PATH=lib ./bin/pkgdata --help

	Example command line to call pkgdata:

	$ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\
	-O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst

	where $OUTDIR and $TMPDIR are your out and tmp directories, respectively.
	The above command will create icudt63l.dat in the tmpdir.

	Command-Line Arguments
	======================
	""",
	formatter_class = argparse.RawDescriptionHelpFormatter
	)

	arg_group_required = flag_parser.add_argument_group("required arguments")
	arg_group_required.add_argument(
	"--mode",
	help = "What to do with the generated rules.",
	choices = ["gnumake", "unix-exec", "windows-exec"],
	required = True
	)

	flag_parser.add_argument(
	"--src_dir",
	help = "Path to data source folder (icu4c/source/data).",
	default = "."
	)
	flag_parser.add_argument(
	"--filter_file",
	metavar = "PATH",
	help = "Path to an ICU data filter JSON file.",
	default = None
	)
	flag_parser.add_argument(
	"--include_uni_core_data",
	help = "Include the full Unicode core data in the dat file.",
	default = False,
	action = "store_true"
	)
	flag_parser.add_argument(
	"--seqmode",
	help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
	choices = ["sequential", "parallel"],
	default = "sequential"
	)

	arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes")
	arg_group_exec.add_argument(
	"--out_dir",
	help = "Path to where to save output data files.",
	default = "icudata"
	)
	arg_group_exec.add_argument(
	"--tmp_dir",
	help = "Path to where to save temporary files.",
	default = "icutmp"
	)
	arg_group_exec.add_argument(
	"--tool_dir",
	help = "Path to where to find binary tools (genrb, etc).",
	default = "../bin"
	)
	arg_group_exec.add_argument(
	"--tool_cfg",
	help = "The build configuration of the tools. Used in 'windows-exec' mode only.",
	default = "x86/Debug"
	)


	class Config(object):

	def __init__(self, args):
	# Process arguments
	self.max_parallel = (args.seqmode == "parallel")

	# Boolean: Whether to include core Unicode data files in the .dat file
	self.include_uni_core_data = args.include_uni_core_data

	# Default fields before processing filter file
	self.filters_json_data = {}

	# Process filter file
	if args.filter_file:
	try:
	with open(args.filter_file, "r") as f:
	print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr)
	self._parse_filter_file(f)
	except IOError:
	print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr)
	exit(1)

	# Either "unihan" or "implicithan"
	self.coll_han_type = "unihan"
	if "collationUCAData" in self.filters_json_data:
	self.coll_han_type = self.filters_json_data["collationUCAData"]

	def _parse_filter_file(self, f):
	# Use the Hjson parser if it is available; otherwise, use vanilla JSON.
	try:
	import hjson
	self.filters_json_data = hjson.load(f)
	except ImportError:
	self.filters_json_data = json.load(CommentStripper(f))

	# Optionally pre-validate the JSON schema before further processing.
	# Some schema errors will be caught later, but this step ensures
	# maximal validity.
	try:
	import jsonschema
	schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json")
	with open(schema_path) as schema_f:
	schema = json.load(CommentStripper(schema_f))
	validator = jsonschema.Draft4Validator(schema)
	for error in validator.iter_errors(self.filters_json_data, schema):
	print("WARNING: ICU data filter JSON file:", error.message,
	"at", "".join(
	"[%d]" % part if isinstance(part, int) else ".%s" % part
	for part in error.absolute_path
	),
	file=sys.stderr)
	except ImportError:
	print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr)
	pass


	def add_copy_input_requests(requests, config, common_vars):
	files_to_copy = set()
	for request in requests:
	for f in request.all_input_files():
	if isinstance(f, InFile):
	files_to_copy.add(f)

	result = []
	id = 0

	json_data = config.filters_json_data["fileReplacements"]
	dirname = json_data["directory"]
	for directive in json_data["replacements"]:
	input_file = LocalFile(dirname, directive["src"])
	output_file = InFile(directive["dest"])
	result += [
	CopyRequest(
	name = "input_copy_%d" % id,
	input_file = input_file,
	output_file = output_file
	)
	]
	files_to_copy.remove(output_file)
	id += 1

	for f in files_to_copy:
	result += [
	CopyRequest(
	name = "input_copy_%d" % id,
	input_file = SrcFile(f.filename),
	output_file = f
	)
	]
	id += 1

	result += requests
	return result


	def main():
	args = flag_parser.parse_args()
	config = Config(args)

	if args.mode == "gnumake":
	makefile_vars = {
	"SRC_DIR": "$(srcdir)",
	"IN_DIR": "$(srcdir)",
	"INDEX_NAME": "res_index"
	}
	makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"]
	common = {
	key: "$(%s)" % key
	for key in list(makefile_vars.keys()) + makefile_env
	}
	common["GLOB_DIR"] = args.src_dir
	else:
	common = {
	# GLOB_DIR is used now, whereas IN_DIR is used during execution phase.
	# There is no useful distinction in unix-exec or windows-exec mode.
	"GLOB_DIR": args.src_dir,
	"SRC_DIR": args.src_dir,
	"IN_DIR": args.src_dir,
	"OUT_DIR": args.out_dir,
	"TMP_DIR": args.tmp_dir,
	"INDEX_NAME": "res_index",
	# TODO: Pull this from configure script:
	"ICUDATA_CHAR": "l"
	}

	def glob(pattern):
	result_paths = pyglob.glob("{GLOB_DIR}/{PATTERN}".format(
	GLOB_DIR = args.src_dir,
	PATTERN = pattern
	))
	# For the purposes of buildtool, force Unix-style directory separators.
	return [v.replace("\\", "/")[len(args.src_dir)+1:] for v in sorted(result_paths)]

	requests = BUILDRULES.generate(config, glob, common)
	requests = filtration.apply_filters(requests, config)
	requests = utils.flatten_requests(requests, config, common)

	if "fileReplacements" in config.filters_json_data:
	tmp_in_dir = "{TMP_DIR}/in".format(**common)
	if makefile_vars:
	makefile_vars["IN_DIR"] = tmp_in_dir
	else:
	common["IN_DIR"] = tmp_in_dir
	requests = add_copy_input_requests(requests, config, common)

	build_dirs = utils.compute_directories(requests)

	if args.mode == "gnumake":
	print(makefile.get_gnumake_rules(
	build_dirs,
	requests,
	makefile_vars,
	common_vars = common
	))
	elif args.mode == "windows-exec":
	return windows_exec.run(
	build_dirs = build_dirs,
	requests = requests,
	common_vars = common,
	tool_dir = args.tool_dir,
	tool_cfg = args.tool_cfg
	)
	elif args.mode == "unix-exec":
	return unix_exec.run(
	build_dirs = build_dirs,
	requests = requests,
	common_vars = common,
	tool_dir = args.tool_dir
	)
	else:
	print("Mode not supported: %s" % args.mode)
	return 1
	return 0

	if __name__ == "__main__":
	exit(main())