| #!/usr/bin/env python3 |
| # Copyright 2018 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| r"""Concatenate a list of files into a single file. |
| |
| The list of files to concat is supplied via a text file using the --input-from |
| option. All filenames in that list are treated relative to the list file |
| itself. Absolute paths also work, but should be avoided when the input file is |
| intended to be used by others. |
| |
| Long lines may be wrapped using the standard \ notation. |
| |
| There are a few directives that can be specified in the input file. They are... |
| |
| @file FILE |
| |
| Include the specified file verbatim in the output. The file is relative to |
| the input file. For convenience, the @file may be omitted. |
| |
| @include FILE |
| |
| This can be used to include an additional list of files. It's useful when |
| you want to include a list of files specified by a separate project, or |
| any time you want to compose lists of dependencies. |
| |
| If an included file specifies a file that is already part of the result it |
| will not be duplicated. |
| |
| When an included file is being processed this script will change the current |
| directory to the directory where the FILE was found. This is to make certain |
| that any scripts executed by an included @resource directive happen relative |
| to a known location. |
| |
| @resource NAME TYPE SOURCE |
| |
| NAME - The resource NAME is that name that you'd use to fetch the resource |
| with lib.resource.get(name). |
| |
| TYPE - If the resource type is 'raw' then the resource will be included |
| without any munging. Otherwise, the resource will be wrapped in a |
| JavaScript string. If you specify the type as a valid mimetype then you'll |
| be able to get the resource as a 'data:' url easily from |
| lib.resource.getDataUrl(...). If the type ends with ';base64', then the |
| input will automatically be base64 encoded. |
| |
| SOURCE - There are a variety of source formats that are supported. |
| '< FILENAME' is interpreted as a file to read. |
| 'date <format>' uses the current date/time with strftime. |
| 'changelog <date|version> [../doc/ChangeLog.md]' loads changelog details. |
| 'git-rev REV' expands the symbolic revision using `git rev-parse`. |
| 'head -1 FILENAME' reads the first line from a file. |
| 'grep REGEX FILENAME' reads all lines from a file that match a regex. |
| |
| This includes a resource in the output. Resources are output as JavaScript |
| strings by default but can also be the raw contents, which is useful when you |
| want to include a JSON resource. |
| |
| The resource directive depends on libdot/js/lib_resource.js, but the |
| dependency is not automatically injected. It's up to your input file to |
| include it. |
| """ |
| |
| import argparse |
| import base64 |
| import datetime |
| import logging |
| import os |
| import re |
| import subprocess |
| import sys |
| import textwrap |
| |
| import libdot |
| |
| |
| def echo_results(output_to, data, header_files): |
| """Write |data| to |output_to| or stdout.""" |
| header = ( |
| '// This file was generated by libdot/bin/concat.sh.\n' |
| '// It has been marked read-only for your safety. Rather than\n' |
| '// edit it directly, please modify one of these source files.\n' |
| '//\n' |
| ) |
| for f in header_files: |
| header += '// %s\n' % (f,) |
| |
| # Insert the strict directive for the whole file. |
| header += "\n'use strict';\n" |
| |
| data = header + '\n' + data |
| |
| if not output_to: |
| print(data) |
| else: |
| libdot.unlink(output_to) |
| with open(output_to, 'w', encoding='utf-8') as f: |
| f.write(data) |
| |
| |
| def append_string(s): |
| """Create a JavaScript string from |s|. |
| |
| The output is surrounded in single quote ("'") characters and wrapped to 79 |
| columns. Wrapped lines are joined with a plus ("+"). |
| |
| Newlines in the input are stripped. |
| |
| Lines with embedded escapes or spaces won't be wrapped at all. |
| |
| Single quotes found in the input are escaped. |
| """ |
| ret = '' |
| if '\\' in s or ' ' in s: |
| lines = s.splitlines() |
| else: |
| lines = textwrap.wrap(s, 76) |
| for line in lines: |
| ret += "'%s' +\n" % (line.replace("'", r'\''),) |
| return ret[:-3] |
| |
| |
| def append_resource(name, mime, resource, header_files): |
| """Convert data into a format that can be included in JavaScript. |
| |
| This makes the resource available via lib.resource.get(...), and depends |
| on libdot/js/lib_resource.js. |
| """ |
| mode, arg = resource.split(' ', 1) |
| if mode == '<': |
| # Resource is the contents of an existing file. |
| source = arg |
| |
| if source.startswith('.'): |
| source = os.path.realpath(source) |
| else: |
| source = os.path.join(libdot.LIBAPPS_DIR, source) |
| |
| header_files.append(os.path.relpath(source, libdot.LIBAPPS_DIR)) |
| |
| with open(source, 'rb') as f: |
| data = f.read() |
| |
| if 'base64' in mime.split(';'): |
| data = base64.b64encode(data) |
| |
| # Since we're going to be appending the content directly to the output |
| # below, we need to make sure it's a string. If the input is not in a |
| # compatible format (e.g. binary), then we won't be able to append it |
| # to the output directly anyways (and why we have base64 support). |
| data = data.decode('utf-8') |
| |
| # For text formats, escape characters that the textwrap module will |
| # consume on us. This preserves the content when we output it. |
| if mime == 'text/plain': |
| # Escape \ first since JS handles that itself. |
| data = data.replace('\\', '\\\\') |
| for codepoint in range(9, 14): |
| data = data.replace(chr(codepoint), r'\x%02x' % codepoint) |
| |
| elif mode == 'date': |
| # Resource is a date/time stamp. |
| data = datetime.datetime.utcnow().strftime(arg) |
| logging.info(' -> %s', data) |
| |
| elif mode == 'changelog': |
| # Resource is querying a changelog file. |
| argv = arg.split() |
| if not argv: |
| raise ValueError('Missing changelog key') |
| |
| key = argv[0] |
| if len(argv) == 1: |
| path = '../doc/ChangeLog.md' |
| elif len(argv) == 2: |
| path = argv[1] |
| else: |
| raise ValueError('Too many args to changelog: %s' % (arg,)) |
| |
| # The first line of a changelog should look like: |
| # # 0.8.44.1, 2018-06-22, Extension startup fix. |
| with open(path, encoding='utf-8') as f: |
| line = f.readline() |
| line = line.lstrip('#') |
| |
| fields = line.split(',') |
| if key == 'version': |
| data = fields[0] |
| elif key == 'date': |
| data = fields[1] |
| else: |
| raise ValueError('Unknown changelog key: %s' % (key,)) |
| |
| data = data.strip() |
| logging.info(' -> %s', data) |
| |
| elif mode == 'git-rev': |
| # Look up current git revision. |
| argv = arg.split() |
| if not argv: |
| raise ValueError('Missing git rev') |
| if len(argv) != 1: |
| raise ValueError('Too many args to git-rev: %s' % (arg,)) |
| rev = argv[0] |
| |
| # See if there's an env var (e.g. ebuilds). |
| vcsid = os.getenv('VCSID') |
| if vcsid: |
| data = vcsid |
| logging.info(' -> %s [$VCSID]', data) |
| else: |
| # Query git directly. |
| output = subprocess.check_output(['git', 'rev-parse', rev]) |
| data = output.decode('utf-8').strip() |
| logging.info(' -> %s', data) |
| |
| elif mode == 'head': |
| # Peel off the first line. |
| argv = arg.split() |
| if not argv: |
| raise ValueError('Missing head settings') |
| if len(argv) != 2: |
| raise ValueError('Too many args to head: %s' % (arg,)) |
| count, path = argv |
| |
| # We can expand this if need comes up. |
| if count != '-1': |
| raise ValueError('Only count==-1 supported currently') |
| |
| with open(path, encoding='utf-8') as f: |
| data = f.readline().strip() |
| logging.info(' -> %s', data) |
| |
| elif mode == 'grep': |
| # Return all lines matching a regex. |
| argv = arg.split() |
| if not argv: |
| raise ValueError('Missing grep settings') |
| if len(argv) != 2: |
| raise ValueError('Too many args to grep: %s' % (arg,)) |
| regex, path = argv |
| |
| with open(path, encoding='utf-8') as f: |
| lines = [x for x in f.readlines() if re.match(regex, x)] |
| data = ''.join(lines) |
| logging.info(' -> %s ...', data[:80]) |
| |
| else: |
| raise ValueError('Unknown resource: %s' % (resource,)) |
| |
| ret = "lib.resource.add('%s', '%s',\n" % (name, mime) |
| |
| if mime == 'raw': |
| # The resource should be the raw contents of the file or command output. |
| # Great for json data. |
| ret += data |
| |
| else: |
| # Resource should be wrapped in a JS string. |
| ret += append_string(data) |
| |
| return ret + '\n);\n\n' |
| |
| |
| def process_concat_line(line, seen_files, header_files): |
| """Process a single line from a concat file.""" |
| ret = '' |
| |
| # If the input line doesn't start with an "@", it's just a file to include |
| # in the output. |
| if not line.startswith('@'): |
| line = '@file ' + line |
| |
| logging.info(line) |
| |
| mode, arg = line.split(' ', 1) |
| if mode == '@file': |
| path = os.path.realpath(os.path.join(libdot.LIBAPPS_DIR, arg)) |
| if path in seen_files: |
| return '' |
| seen_files.add(path) |
| header_files.append(arg) |
| |
| ret += '// SOURCE FILE: %s\n' % (arg,) |
| with open(path, encoding='utf-8') as f: |
| lines = f.readlines() |
| |
| # Strip out the 'use strict' directive as we add it ourselves and |
| # a lot of tools get confused when it shows up more than once. |
| try: |
| i = lines[0:10].index("'use strict';\n") |
| lines.pop(i) |
| while lines[i] == '\n': |
| lines.pop(i) |
| except ValueError: |
| pass |
| |
| ret += ''.join(lines) |
| |
| # Make sure we always emit a trailing newline even if the input didn't. |
| ret = ret.rstrip() + '\n' |
| |
| elif mode == '@resource': |
| name, mime, resource = arg.split(' ', 2) |
| ret += append_resource(name, mime, resource, header_files) |
| |
| else: |
| raise ValueError('Unknown directive: %s' % (line,)) |
| |
| return ret |
| |
| |
| def process_concat_file(path, seen_files, header_files): |
| """Process a concat file specified by absolute path.""" |
| oldwd = os.getcwd() |
| os.chdir(os.path.abspath(os.path.dirname(path))) |
| |
| ret = '' |
| |
| with open(path, encoding='utf-8') as f: |
| lines = f.readlines() |
| i = 0 |
| while i < len(lines): |
| line = lines[i].strip() |
| |
| # Handle trailing escape as line continuation. |
| while line.endswith('\\'): |
| # Strip trailing escape. |
| line = line[:-1] |
| i += 1 |
| line += lines[i].strip() |
| |
| if not line or line.startswith('#'): |
| # Skip blank lines and comments. |
| i += 1 |
| continue |
| |
| if line.startswith('@include '): |
| _, inc_path = line.split(' ', 1) |
| logging.info(inc_path) |
| inc_path = os.path.join(libdot.LIBAPPS_DIR, inc_path) |
| ret += process_concat_file(inc_path, seen_files, header_files) |
| else: |
| ret += process_concat_line(line, seen_files, header_files) |
| |
| i += 1 |
| |
| os.chdir(oldwd) |
| return ret |
| |
| |
| def concat(input_file, output_file): |
| """Process the concat |input_file| and write the result to |output_file|.""" |
| logging.info('Creating %s', output_file) |
| |
| # List of files we've included in the output to be included in the header of |
| # the output. These paths should be as specified in the concat source list |
| # so they're short and relative to the LIBDOT_SEARCH_PATH. |
| header_files = [] |
| |
| # Files that we've seen but don't want to process more than once. |
| seen_files = set() |
| |
| data = process_concat_file(input_file, seen_files, header_files) |
| echo_results(output_file, data, header_files) |
| |
| |
| def get_parser(): |
| """Get a command line parser.""" |
| parser = libdot.ArgumentParser( |
| description=__doc__, |
| formatter_class=argparse.RawDescriptionHelpFormatter) |
| parser.add_argument('-i', '--input-from', required=True, |
| help='File containing a list of files to concatenate.') |
| parser.add_argument('-o', '--output', |
| help='The output file.') |
| return parser |
| |
| |
| def main(argv): |
| """The main func!""" |
| parser = get_parser() |
| opts = parser.parse_args(argv) |
| |
| input_from = os.path.abspath(opts.input_from) |
| output_to = os.path.abspath(opts.output) |
| concat(input_from, output_to) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv[1:])) |