libdot/bin/concat - apps/libapps - Git at Google

 #!/usr/bin/env python3
 # Copyright 2018 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 r"""Concatenate a list of files into a single file.

 The list of files to concat is supplied via a text file using the --input-from
 option.  All filenames in that list are treated relative to the list file
 itself.  Absolute paths also work, but should be avoided when the input file is
 intended to be used by others.

 Long lines may be wrapped using the standard \ notation.

 There are a few directives that can be specified in the input file.  They are...

 @file FILE

   Include the specified file verbatim in the output.  The file is relative to
   the input file.  For convenience, the @file may be omitted.

 @include FILE

   This can be used to include an additional list of files.  It's useful when
   you want to include a list of files specified by a separate project, or
   any time you want to compose lists of dependencies.

   If an included file specifies a file that is already part of the result it
   will not be duplicated.

   When an included file is being processed this script will change the current
   directory to the directory where the FILE was found.  This is to make certain
   that any scripts executed by an included @resource directive happen relative
   to a known location.

 @resource NAME TYPE SOURCE

   NAME - The resource NAME is that name that you'd use to fetch the resource
   with lib.resource.get(name).

   TYPE - If the resource type is 'raw' then the resource will be included
   without any munging.  Otherwise, the resource will be wrapped in a
   JavaScript string. If you specify the type as a valid mimetype then you'll
   be able to get the resource as a 'data:' url easily from
   lib.resource.getDataUrl(...).  If the type ends with ';base64', then the
   input will automatically be base64 encoded.

   SOURCE - There are a variety of source formats that are supported.
   '< FILENAME' is interpreted as a file to read.
   'date <format>' uses the current date/time with strftime.
   'changelog <date|version> [../doc/ChangeLog.md]' loads changelog details.
   'git-rev REV' expands the symbolic revision using `git rev-parse`.
   'head -1 FILENAME' reads the first line from a file.
   'grep REGEX FILENAME' reads all lines from a file that match a regex.

   This includes a resource in the output.  Resources are output as JavaScript
   strings by default but can also be the raw contents, which is useful when you
   want to include a JSON resource.

   The resource directive depends on libdot/js/lib_resource.js, but the
   dependency is not automatically injected.  It's up to your input file to
   include it.
 """

 import argparse
 import base64
 import datetime
 import logging
 import os
 import re
 import subprocess
 import sys
 import textwrap

 import libdot


 def echo_results(output_to, data, header_files):
     """Write |data| to |output_to| or stdout."""
     header = (
         '// This file was generated by libdot/bin/concat.sh.\n'
         '// It has been marked read-only for your safety.  Rather than\n'
         '// edit it directly, please modify one of these source files.\n'
         '//\n'
     )
     for f in header_files:
         header += '// %s\n' % (f,)

     # Insert the strict directive for the whole file.
     header += "\n'use strict';\n"

     data = header + '\n' + data

     if not output_to:
         print(data)
     else:
         libdot.unlink(output_to)
         with open(output_to, 'w', encoding='utf-8') as f:
             f.write(data)


 def append_string(s):
     """Create a JavaScript string from |s|.

     The output is surrounded in single quote ("'") characters and wrapped to 79
     columns.  Wrapped lines are joined with a plus ("+").

     Newlines in the input are stripped.

     Lines with embedded escapes or spaces won't be wrapped at all.

     Single quotes found in the input are escaped.
     """
     ret = ''
     if '\\' in s or ' ' in s:
         lines = s.splitlines()
     else:
         lines = textwrap.wrap(s, 76)
     for line in lines:
         ret += "'%s' +\n" % (line.replace("'", r'\''),)
     return ret[:-3]


 def append_resource(name, mime, resource, header_files):
     """Convert data into a format that can be included in JavaScript.

     This makes the resource available via lib.resource.get(...), and depends
     on libdot/js/lib_resource.js.
     """
     mode, arg = resource.split(' ', 1)
     if mode == '<':
         # Resource is the contents of an existing file.
         source = arg

         if source.startswith('.'):
             source = os.path.realpath(source)
         else:
             source = os.path.join(libdot.LIBAPPS_DIR, source)

         header_files.append(os.path.relpath(source, libdot.LIBAPPS_DIR))

         with open(source, 'rb') as f:
             data = f.read()

         if 'base64' in mime.split(';'):
             data = base64.b64encode(data)

         # Since we're going to be appending the content directly to the output
         # below, we need to make sure it's a string.  If the input is not in a
         # compatible format (e.g. binary), then we won't be able to append it
         # to the output directly anyways (and why we have base64 support).
         data = data.decode('utf-8')

         # For text formats, escape characters that the textwrap module will
         # consume on us.  This preserves the content when we output it.
         if mime == 'text/plain':
             # Escape \ first since JS handles that itself.
             data = data.replace('\\', '\\\\')
             for codepoint in range(9, 14):
                 data = data.replace(chr(codepoint), r'\x%02x' % codepoint)

     elif mode == 'date':
         # Resource is a date/time stamp.
         data = datetime.datetime.utcnow().strftime(arg)
         logging.info('  -> %s', data)

     elif mode == 'changelog':
         # Resource is querying a changelog file.
         argv = arg.split()
         if not argv:
             raise ValueError('Missing changelog key')

         key = argv[0]
         if len(argv) == 1:
             path = '../doc/ChangeLog.md'
         elif len(argv) == 2:
             path = argv[1]
         else:
             raise ValueError('Too many args to changelog: %s' % (arg,))

         # The first line of a changelog should look like:
         # # 0.8.44.1, 2018-06-22, Extension startup fix.
         with open(path, encoding='utf-8') as f:
             line = f.readline()
         line = line.lstrip('#')

         fields = line.split(',')
         if key == 'version':
             data = fields[0]
         elif key == 'date':
             data = fields[1]
         else:
             raise ValueError('Unknown changelog key: %s' % (key,))

         data = data.strip()
         logging.info('  -> %s', data)

     elif mode == 'git-rev':
         # Look up current git revision.
         argv = arg.split()
         if not argv:
             raise ValueError('Missing git rev')
         if len(argv) != 1:
             raise ValueError('Too many args to git-rev: %s' % (arg,))
         rev = argv[0]

         # See if there's an env var (e.g. ebuilds).
         vcsid = os.getenv('VCSID')
         if vcsid:
             data = vcsid
             logging.info('  -> %s [$VCSID]', data)
         else:
             # Query git directly.
             output = subprocess.check_output(['git', 'rev-parse', rev])
             data = output.decode('utf-8').strip()
             logging.info('  -> %s', data)

     elif mode == 'head':
         # Peel off the first line.
         argv = arg.split()
         if not argv:
             raise ValueError('Missing head settings')
         if len(argv) != 2:
             raise ValueError('Too many args to head: %s' % (arg,))
         count, path = argv

         # We can expand this if need comes up.
         if count != '-1':
             raise ValueError('Only count==-1 supported currently')

         with open(path, encoding='utf-8') as f:
             data = f.readline().strip()
         logging.info('  -> %s', data)

     elif mode == 'grep':
         # Return all lines matching a regex.
         argv = arg.split()
         if not argv:
             raise ValueError('Missing grep settings')
         if len(argv) != 2:
             raise ValueError('Too many args to grep: %s' % (arg,))
         regex, path = argv

         with open(path, encoding='utf-8') as f:
             lines = [x for x in f.readlines() if re.match(regex, x)]
         data = ''.join(lines)
         logging.info('  -> %s ...', data[:80])

     else:
         raise ValueError('Unknown resource: %s' % (resource,))

     ret = "lib.resource.add('%s', '%s',\n" % (name, mime)

     if mime == 'raw':
         # The resource should be the raw contents of the file or command output.
         # Great for json data.
         ret += data

     else:
         # Resource should be wrapped in a JS string.
         ret += append_string(data)

     return ret + '\n);\n\n'


 def process_concat_line(line, seen_files, header_files):
     """Process a single line from a concat file."""
     ret = ''

     # If the input line doesn't start with an "@", it's just a file to include
     # in the output.
     if not line.startswith('@'):
         line = '@file ' + line

     logging.info(line)

     mode, arg = line.split(' ', 1)
     if mode == '@file':
         path = os.path.realpath(os.path.join(libdot.LIBAPPS_DIR, arg))
         if path in seen_files:
             return ''
         seen_files.add(path)
         header_files.append(arg)

         ret += '// SOURCE FILE: %s\n' % (arg,)
         with open(path, encoding='utf-8') as f:
             lines = f.readlines()

             # Strip out the 'use strict' directive as we add it ourselves and
             # a lot of tools get confused when it shows up more than once.
             try:
                 i = lines[0:10].index("'use strict';\n")
                 lines.pop(i)
                 while lines[i] == '\n':
                     lines.pop(i)
             except ValueError:
                 pass

             ret += ''.join(lines)

         # Make sure we always emit a trailing newline even if the input didn't.
         ret = ret.rstrip() + '\n'

     elif mode == '@resource':
         name, mime, resource = arg.split(' ', 2)
         ret += append_resource(name, mime, resource, header_files)

     else:
         raise ValueError('Unknown directive: %s' % (line,))

     return ret


 def process_concat_file(path, seen_files, header_files):
     """Process a concat file specified by absolute path."""
     oldwd = os.getcwd()
     os.chdir(os.path.abspath(os.path.dirname(path)))

     ret = ''

     with open(path, encoding='utf-8') as f:
         lines = f.readlines()
         i = 0
         while i < len(lines):
             line = lines[i].strip()

             # Handle trailing escape as line continuation.
             while line.endswith('\\'):
                 # Strip trailing escape.
                 line = line[:-1]
                 i += 1
                 line += lines[i].strip()

             if not line or line.startswith('#'):
                 # Skip blank lines and comments.
                 i += 1
                 continue

             if line.startswith('@include '):
                 _, inc_path = line.split(' ', 1)
                 logging.info(inc_path)
                 inc_path = os.path.join(libdot.LIBAPPS_DIR, inc_path)
                 ret += process_concat_file(inc_path, seen_files, header_files)
             else:
                 ret += process_concat_line(line, seen_files, header_files)

             i += 1

     os.chdir(oldwd)
     return ret


 def concat(input_file, output_file):
     """Process the concat |input_file| and write the result to |output_file|."""
     logging.info('Creating %s', output_file)

     # List of files we've included in the output to be included in the header of
     # the output.  These paths should be as specified in the concat source list
     # so they're short and relative to the LIBDOT_SEARCH_PATH.
     header_files = []

     # Files that we've seen but don't want to process more than once.
     seen_files = set()

     data = process_concat_file(input_file, seen_files, header_files)
     echo_results(output_file, data, header_files)


 def get_parser():
     """Get a command line parser."""
     parser = libdot.ArgumentParser(
         description=__doc__,
         formatter_class=argparse.RawDescriptionHelpFormatter)
     parser.add_argument('-i', '--input-from', required=True,
                         help='File containing a list of files to concatenate.')
     parser.add_argument('-o', '--output',
                         help='The output file.')
     return parser


 def main(argv):
     """The main func!"""
     parser = get_parser()
     opts = parser.parse_args(argv)

     input_from = os.path.abspath(opts.input_from)
     output_to = os.path.abspath(opts.output)
     concat(input_from, output_to)


 if __name__ == '__main__':
     sys.exit(main(sys.argv[1:]))
	#!/usr/bin/env python3
	# Copyright 2018 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	r"""Concatenate a list of files into a single file.

	The list of files to concat is supplied via a text file using the --input-from
	option. All filenames in that list are treated relative to the list file
	itself. Absolute paths also work, but should be avoided when the input file is
	intended to be used by others.

	Long lines may be wrapped using the standard \ notation.

	There are a few directives that can be specified in the input file. They are...

	@file FILE

	Include the specified file verbatim in the output. The file is relative to
	the input file. For convenience, the @file may be omitted.

	@include FILE

	This can be used to include an additional list of files. It's useful when
	you want to include a list of files specified by a separate project, or
	any time you want to compose lists of dependencies.

	If an included file specifies a file that is already part of the result it
	will not be duplicated.

	When an included file is being processed this script will change the current
	directory to the directory where the FILE was found. This is to make certain
	that any scripts executed by an included @resource directive happen relative
	to a known location.

	@resource NAME TYPE SOURCE

	NAME - The resource NAME is that name that you'd use to fetch the resource
	with lib.resource.get(name).

	TYPE - If the resource type is 'raw' then the resource will be included
	without any munging. Otherwise, the resource will be wrapped in a
	JavaScript string. If you specify the type as a valid mimetype then you'll
	be able to get the resource as a 'data:' url easily from
	lib.resource.getDataUrl(...). If the type ends with ';base64', then the
	input will automatically be base64 encoded.

	SOURCE - There are a variety of source formats that are supported.
	'< FILENAME' is interpreted as a file to read.
	'date <format>' uses the current date/time with strftime.
	'changelog <date\|version> [../doc/ChangeLog.md]' loads changelog details.
	'git-rev REV' expands the symbolic revision using `git rev-parse`.
	'head -1 FILENAME' reads the first line from a file.
	'grep REGEX FILENAME' reads all lines from a file that match a regex.

	This includes a resource in the output. Resources are output as JavaScript
	strings by default but can also be the raw contents, which is useful when you
	want to include a JSON resource.

	The resource directive depends on libdot/js/lib_resource.js, but the
	dependency is not automatically injected. It's up to your input file to
	include it.
	"""

	import argparse
	import base64
	import datetime
	import logging
	import os
	import re
	import subprocess
	import sys
	import textwrap

	import libdot


	def echo_results(output_to, data, header_files):
	"""Write \|data\| to \|output_to\| or stdout."""
	header = (
	'// This file was generated by libdot/bin/concat.sh.\n'
	'// It has been marked read-only for your safety. Rather than\n'
	'// edit it directly, please modify one of these source files.\n'
	'//\n'
	)
	for f in header_files:
	header += '// %s\n' % (f,)

	# Insert the strict directive for the whole file.
	header += "\n'use strict';\n"

	data = header + '\n' + data

	if not output_to:
	print(data)
	else:
	libdot.unlink(output_to)
	with open(output_to, 'w', encoding='utf-8') as f:
	f.write(data)


	def append_string(s):
	"""Create a JavaScript string from \|s\|.

	The output is surrounded in single quote ("'") characters and wrapped to 79
	columns. Wrapped lines are joined with a plus ("+").

	Newlines in the input are stripped.

	Lines with embedded escapes or spaces won't be wrapped at all.

	Single quotes found in the input are escaped.
	"""
	ret = ''
	if '\\' in s or ' ' in s:
	lines = s.splitlines()
	else:
	lines = textwrap.wrap(s, 76)
	for line in lines:
	ret += "'%s' +\n" % (line.replace("'", r'\''),)
	return ret[:-3]


	def append_resource(name, mime, resource, header_files):
	"""Convert data into a format that can be included in JavaScript.

	This makes the resource available via lib.resource.get(...), and depends
	on libdot/js/lib_resource.js.
	"""
	mode, arg = resource.split(' ', 1)
	if mode == '<':
	# Resource is the contents of an existing file.
	source = arg

	if source.startswith('.'):
	source = os.path.realpath(source)
	else:
	source = os.path.join(libdot.LIBAPPS_DIR, source)

	header_files.append(os.path.relpath(source, libdot.LIBAPPS_DIR))

	with open(source, 'rb') as f:
	data = f.read()

	if 'base64' in mime.split(';'):
	data = base64.b64encode(data)

	# Since we're going to be appending the content directly to the output
	# below, we need to make sure it's a string. If the input is not in a
	# compatible format (e.g. binary), then we won't be able to append it
	# to the output directly anyways (and why we have base64 support).
	data = data.decode('utf-8')

	# For text formats, escape characters that the textwrap module will
	# consume on us. This preserves the content when we output it.
	if mime == 'text/plain':
	# Escape \ first since JS handles that itself.
	data = data.replace('\\', '\\\\')
	for codepoint in range(9, 14):
	data = data.replace(chr(codepoint), r'\x%02x' % codepoint)

	elif mode == 'date':
	# Resource is a date/time stamp.
	data = datetime.datetime.utcnow().strftime(arg)
	logging.info(' -> %s', data)

	elif mode == 'changelog':
	# Resource is querying a changelog file.
	argv = arg.split()
	if not argv:
	raise ValueError('Missing changelog key')

	key = argv[0]
	if len(argv) == 1:
	path = '../doc/ChangeLog.md'
	elif len(argv) == 2:
	path = argv[1]
	else:
	raise ValueError('Too many args to changelog: %s' % (arg,))

	# The first line of a changelog should look like:
	# # 0.8.44.1, 2018-06-22, Extension startup fix.
	with open(path, encoding='utf-8') as f:
	line = f.readline()
	line = line.lstrip('#')

	fields = line.split(',')
	if key == 'version':
	data = fields[0]
	elif key == 'date':
	data = fields[1]
	else:
	raise ValueError('Unknown changelog key: %s' % (key,))

	data = data.strip()
	logging.info(' -> %s', data)

	elif mode == 'git-rev':
	# Look up current git revision.
	argv = arg.split()
	if not argv:
	raise ValueError('Missing git rev')
	if len(argv) != 1:
	raise ValueError('Too many args to git-rev: %s' % (arg,))
	rev = argv[0]

	# See if there's an env var (e.g. ebuilds).
	vcsid = os.getenv('VCSID')
	if vcsid:
	data = vcsid
	logging.info(' -> %s [$VCSID]', data)
	else:
	# Query git directly.
	output = subprocess.check_output(['git', 'rev-parse', rev])
	data = output.decode('utf-8').strip()
	logging.info(' -> %s', data)

	elif mode == 'head':
	# Peel off the first line.
	argv = arg.split()
	if not argv:
	raise ValueError('Missing head settings')
	if len(argv) != 2:
	raise ValueError('Too many args to head: %s' % (arg,))
	count, path = argv

	# We can expand this if need comes up.
	if count != '-1':
	raise ValueError('Only count==-1 supported currently')

	with open(path, encoding='utf-8') as f:
	data = f.readline().strip()
	logging.info(' -> %s', data)

	elif mode == 'grep':
	# Return all lines matching a regex.
	argv = arg.split()
	if not argv:
	raise ValueError('Missing grep settings')
	if len(argv) != 2:
	raise ValueError('Too many args to grep: %s' % (arg,))
	regex, path = argv

	with open(path, encoding='utf-8') as f:
	lines = [x for x in f.readlines() if re.match(regex, x)]
	data = ''.join(lines)
	logging.info(' -> %s ...', data[:80])

	else:
	raise ValueError('Unknown resource: %s' % (resource,))

	ret = "lib.resource.add('%s', '%s',\n" % (name, mime)

	if mime == 'raw':
	# The resource should be the raw contents of the file or command output.
	# Great for json data.
	ret += data

	else:
	# Resource should be wrapped in a JS string.
	ret += append_string(data)

	return ret + '\n);\n\n'


	def process_concat_line(line, seen_files, header_files):
	"""Process a single line from a concat file."""
	ret = ''

	# If the input line doesn't start with an "@", it's just a file to include
	# in the output.
	if not line.startswith('@'):
	line = '@file ' + line

	logging.info(line)

	mode, arg = line.split(' ', 1)
	if mode == '@file':
	path = os.path.realpath(os.path.join(libdot.LIBAPPS_DIR, arg))
	if path in seen_files:
	return ''
	seen_files.add(path)
	header_files.append(arg)

	ret += '// SOURCE FILE: %s\n' % (arg,)
	with open(path, encoding='utf-8') as f:
	lines = f.readlines()

	# Strip out the 'use strict' directive as we add it ourselves and
	# a lot of tools get confused when it shows up more than once.
	try:
	i = lines[0:10].index("'use strict';\n")
	lines.pop(i)
	while lines[i] == '\n':
	lines.pop(i)
	except ValueError:
	pass

	ret += ''.join(lines)

	# Make sure we always emit a trailing newline even if the input didn't.
	ret = ret.rstrip() + '\n'

	elif mode == '@resource':
	name, mime, resource = arg.split(' ', 2)
	ret += append_resource(name, mime, resource, header_files)

	else:
	raise ValueError('Unknown directive: %s' % (line,))

	return ret


	def process_concat_file(path, seen_files, header_files):
	"""Process a concat file specified by absolute path."""
	oldwd = os.getcwd()
	os.chdir(os.path.abspath(os.path.dirname(path)))

	ret = ''

	with open(path, encoding='utf-8') as f:
	lines = f.readlines()
	i = 0
	while i < len(lines):
	line = lines[i].strip()

	# Handle trailing escape as line continuation.
	while line.endswith('\\'):
	# Strip trailing escape.
	line = line[:-1]
	i += 1
	line += lines[i].strip()

	if not line or line.startswith('#'):
	# Skip blank lines and comments.
	i += 1
	continue

	if line.startswith('@include '):
	_, inc_path = line.split(' ', 1)
	logging.info(inc_path)
	inc_path = os.path.join(libdot.LIBAPPS_DIR, inc_path)
	ret += process_concat_file(inc_path, seen_files, header_files)
	else:
	ret += process_concat_line(line, seen_files, header_files)

	i += 1

	os.chdir(oldwd)
	return ret


	def concat(input_file, output_file):
	"""Process the concat \|input_file\| and write the result to \|output_file\|."""
	logging.info('Creating %s', output_file)

	# List of files we've included in the output to be included in the header of
	# the output. These paths should be as specified in the concat source list
	# so they're short and relative to the LIBDOT_SEARCH_PATH.
	header_files = []

	# Files that we've seen but don't want to process more than once.
	seen_files = set()

	data = process_concat_file(input_file, seen_files, header_files)
	echo_results(output_file, data, header_files)


	def get_parser():
	"""Get a command line parser."""
	parser = libdot.ArgumentParser(
	description=__doc__,
	formatter_class=argparse.RawDescriptionHelpFormatter)
	parser.add_argument('-i', '--input-from', required=True,
	help='File containing a list of files to concatenate.')
	parser.add_argument('-o', '--output',
	help='The output file.')
	return parser


	def main(argv):
	"""The main func!"""
	parser = get_parser()
	opts = parser.parse_args(argv)

	input_from = os.path.abspath(opts.input_from)
	output_to = os.path.abspath(opts.output)
	concat(input_from, output_to)


	if __name__ == '__main__':
	sys.exit(main(sys.argv[1:]))