blob: dcbe21a9558abfa454fc8085e1aa2535ba26d3bb [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
r"""Concatenate a list of files into a single file.
The list of files to concat is supplied via a text file using the --input-from
option. All filenames in that list are treated relative to the list file
itself. Absolute paths also work, but should be avoided when the input file is
intended to be used by others.
Long lines may be wrapped using the standard \ notation.
There are a few directives that can be specified in the input file. They are...
@file FILE
Include the specified file verbatim in the output. The file is relative to
the input file. For convenience, the @file may be omitted.
@include FILE
This can be used to include an additional list of files. It's useful when
you want to include a list of files specified by a separate project, or
any time you want to compose lists of dependencies.
If an included file specifies a file that is already part of the result it
will not be duplicated.
When an included file is being processed this script will change the current
directory to the directory where the FILE was found. This is to make certain
that any scripts executed by an included @resource directive happen relative
to a known location.
@resource NAME TYPE SOURCE
NAME - The resource NAME is that name that you'd use to fetch the resource
with lib.resource.get(name).
TYPE - If the resource type is 'raw' then the resource will be included
without any munging. Otherwise, the resource will be wrapped in a
JavaScript string. If you specify the type as a valid mimetype then you'll
be able to get the resource as a 'data:' url easily from
lib.resource.getDataUrl(...). If the type ends with ';base64', then the
input will automatically be base64 encoded.
SOURCE - There are a variety of source formats that are supported.
'< FILENAME' is interpreted as a file to read.
'date <format>' uses the current date/time with strftime.
'changelog <date|version> [../doc/ChangeLog.md]' loads changelog details.
'git-rev REV' expands the symbolic revision using `git rev-parse`.
'head -1 FILENAME' reads the first line from a file.
'grep REGEX FILENAME' reads all lines from a file that match a regex.
This includes a resource in the output. Resources are output as JavaScript
strings by default but can also be the raw contents, which is useful when you
want to include a JSON resource.
The resource directive depends on libdot/js/lib_resource.js, but the
dependency is not automatically injected. It's up to your input file to
include it.
"""
import argparse
import base64
import datetime
import logging
import os
import re
import subprocess
import sys
import textwrap
import libdot
def echo_results(output_to, data, header_files):
"""Write |data| to |output_to| or stdout."""
header = (
'// This file was generated by libdot/bin/concat.sh.\n'
'// It has been marked read-only for your safety. Rather than\n'
'// edit it directly, please modify one of these source files.\n'
'//\n'
)
for f in header_files:
header += '// %s\n' % (f,)
# Insert the strict directive for the whole file.
header += "\n'use strict';\n"
data = header + '\n' + data
if not output_to:
print(data)
else:
libdot.unlink(output_to)
with open(output_to, 'w', encoding='utf-8') as f:
f.write(data)
def append_string(s):
"""Create a JavaScript string from |s|.
The output is surrounded in single quote ("'") characters and wrapped to 79
columns. Wrapped lines are joined with a plus ("+").
Newlines in the input are stripped.
Lines with embedded escapes or spaces won't be wrapped at all.
Single quotes found in the input are escaped.
"""
ret = ''
if '\\' in s or ' ' in s:
lines = s.splitlines()
else:
lines = textwrap.wrap(s, 76)
for line in lines:
ret += "'%s' +\n" % (line.replace("'", r'\''),)
return ret[:-3]
def append_resource(name, mime, resource, header_files):
"""Convert data into a format that can be included in JavaScript.
This makes the resource available via lib.resource.get(...), and depends
on libdot/js/lib_resource.js.
"""
mode, arg = resource.split(' ', 1)
if mode == '<':
# Resource is the contents of an existing file.
source = arg
if source.startswith('.'):
source = os.path.realpath(source)
else:
source = os.path.join(libdot.LIBAPPS_DIR, source)
header_files.append(os.path.relpath(source, libdot.LIBAPPS_DIR))
with open(source, 'rb') as f:
data = f.read()
if 'base64' in mime.split(';'):
data = base64.b64encode(data)
# Since we're going to be appending the content directly to the output
# below, we need to make sure it's a string. If the input is not in a
# compatible format (e.g. binary), then we won't be able to append it
# to the output directly anyways (and why we have base64 support).
data = data.decode('utf-8')
# For text formats, escape characters that the textwrap module will
# consume on us. This preserves the content when we output it.
if mime == 'text/plain':
# Escape \ first since JS handles that itself.
data = data.replace('\\', '\\\\')
for codepoint in range(9, 14):
data = data.replace(chr(codepoint), r'\x%02x' % codepoint)
elif mode == 'date':
# Resource is a date/time stamp.
data = datetime.datetime.utcnow().strftime(arg)
logging.info(' -> %s', data)
elif mode == 'changelog':
# Resource is querying a changelog file.
argv = arg.split()
if not argv:
raise ValueError('Missing changelog key')
key = argv[0]
if len(argv) == 1:
path = '../doc/ChangeLog.md'
elif len(argv) == 2:
path = argv[1]
else:
raise ValueError('Too many args to changelog: %s' % (arg,))
# The first line of a changelog should look like:
# # 0.8.44.1, 2018-06-22, Extension startup fix.
with open(path, encoding='utf-8') as f:
line = f.readline()
line = line.lstrip('#')
fields = line.split(',')
if key == 'version':
data = fields[0]
elif key == 'date':
data = fields[1]
else:
raise ValueError('Unknown changelog key: %s' % (key,))
data = data.strip()
logging.info(' -> %s', data)
elif mode == 'git-rev':
# Look up current git revision.
argv = arg.split()
if not argv:
raise ValueError('Missing git rev')
if len(argv) != 1:
raise ValueError('Too many args to git-rev: %s' % (arg,))
rev = argv[0]
# See if there's an env var (e.g. ebuilds).
vcsid = os.getenv('VCSID')
if vcsid:
data = vcsid
logging.info(' -> %s [$VCSID]', data)
else:
# Query git directly.
output = subprocess.check_output(['git', 'rev-parse', rev])
data = output.decode('utf-8').strip()
logging.info(' -> %s', data)
elif mode == 'head':
# Peel off the first line.
argv = arg.split()
if not argv:
raise ValueError('Missing head settings')
if len(argv) != 2:
raise ValueError('Too many args to head: %s' % (arg,))
count, path = argv
# We can expand this if need comes up.
if count != '-1':
raise ValueError('Only count==-1 supported currently')
with open(path, encoding='utf-8') as f:
data = f.readline().strip()
logging.info(' -> %s', data)
elif mode == 'grep':
# Return all lines matching a regex.
argv = arg.split()
if not argv:
raise ValueError('Missing grep settings')
if len(argv) != 2:
raise ValueError('Too many args to grep: %s' % (arg,))
regex, path = argv
with open(path, encoding='utf-8') as f:
lines = [x for x in f.readlines() if re.match(regex, x)]
data = ''.join(lines)
logging.info(' -> %s ...', data[:80])
else:
raise ValueError('Unknown resource: %s' % (resource,))
ret = "lib.resource.add('%s', '%s',\n" % (name, mime)
if mime == 'raw':
# The resource should be the raw contents of the file or command output.
# Great for json data.
ret += data
else:
# Resource should be wrapped in a JS string.
ret += append_string(data)
return ret + '\n);\n\n'
def process_concat_line(line, seen_files, header_files):
"""Process a single line from a concat file."""
ret = ''
# If the input line doesn't start with an "@", it's just a file to include
# in the output.
if not line.startswith('@'):
line = '@file ' + line
logging.info(line)
mode, arg = line.split(' ', 1)
if mode == '@file':
path = os.path.realpath(os.path.join(libdot.LIBAPPS_DIR, arg))
if path in seen_files:
return ''
seen_files.add(path)
header_files.append(arg)
ret += '// SOURCE FILE: %s\n' % (arg,)
with open(path, encoding='utf-8') as f:
lines = f.readlines()
# Strip out the 'use strict' directive as we add it ourselves and
# a lot of tools get confused when it shows up more than once.
try:
i = lines[0:10].index("'use strict';\n")
lines.pop(i)
while lines[i] == '\n':
lines.pop(i)
except ValueError:
pass
ret += ''.join(lines)
# Make sure we always emit a trailing newline even if the input didn't.
ret = ret.rstrip() + '\n'
elif mode == '@resource':
name, mime, resource = arg.split(' ', 2)
ret += append_resource(name, mime, resource, header_files)
else:
raise ValueError('Unknown directive: %s' % (line,))
return ret
def process_concat_file(path, seen_files, header_files):
"""Process a concat file specified by absolute path."""
oldwd = os.getcwd()
os.chdir(os.path.abspath(os.path.dirname(path)))
ret = ''
with open(path, encoding='utf-8') as f:
lines = f.readlines()
i = 0
while i < len(lines):
line = lines[i].strip()
# Handle trailing escape as line continuation.
while line.endswith('\\'):
# Strip trailing escape.
line = line[:-1]
i += 1
line += lines[i].strip()
if not line or line.startswith('#'):
# Skip blank lines and comments.
i += 1
continue
if line.startswith('@include '):
_, inc_path = line.split(' ', 1)
logging.info(inc_path)
inc_path = os.path.join(libdot.LIBAPPS_DIR, inc_path)
ret += process_concat_file(inc_path, seen_files, header_files)
else:
ret += process_concat_line(line, seen_files, header_files)
i += 1
os.chdir(oldwd)
return ret
def concat(input_file, output_file):
"""Process the concat |input_file| and write the result to |output_file|."""
logging.info('Creating %s', output_file)
# List of files we've included in the output to be included in the header of
# the output. These paths should be as specified in the concat source list
# so they're short and relative to the LIBDOT_SEARCH_PATH.
header_files = []
# Files that we've seen but don't want to process more than once.
seen_files = set()
data = process_concat_file(input_file, seen_files, header_files)
echo_results(output_file, data, header_files)
def get_parser():
"""Get a command line parser."""
parser = libdot.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', '--input-from', required=True,
help='File containing a list of files to concatenate.')
parser.add_argument('-o', '--output',
help='The output file.')
return parser
def main(argv):
"""The main func!"""
parser = get_parser()
opts = parser.parse_args(argv)
input_from = os.path.abspath(opts.input_from)
output_to = os.path.abspath(opts.output)
concat(input_from, output_to)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))