| #!/usr/bin/env python3 |
| # Copyright 2021 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Extracts licenses from a Chrome apk. |
| |
| 1) Extracts assets/resources.pak |
| 2) Extracts entries using tools/grit/pak_util.py |
| 3) Finds about_credits.html |
| 4) Converts it to plaintext |
| |
| This script requires the "brotli" executable. |
| Option 1) sudo apt-get install brotli |
| Option 2) "ninja clang_x64/brotli", and use "--brotli clang_x64/brotli" |
| """ |
| import argparse |
| import os |
| import pathlib |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import zipfile |
| |
| _DIR_SOURCE_ROOT = pathlib.Path(__file__).parents[2] |
| _PAK_UTIL = _DIR_SOURCE_ROOT / 'tools' / 'grit' / 'pak_util.py' |
| |
| |
| def _extract_pak(pak_path, output_dir, brotli): |
| cmd = [ |
| sys.executable, _PAK_UTIL, 'extract', pak_path, '--output-dir', output_dir |
| ] |
| if brotli: |
| cmd += ['--brotli', brotli] |
| try: |
| subprocess.run(cmd, check=True) |
| except subprocess.CalledProcessError as e: |
| sys.stderr.write(str(e) + '\n') |
| sys.exit(1) |
| |
| |
| def _find_licenses_file(output_dir): |
| for subpath in os.listdir(output_dir): |
| with open(os.path.join(output_dir, subpath), 'rb') as f: |
| data = f.read(100) |
| if b'Generated by licenses.py' in data: |
| f.seek(0) |
| return f.read().decode('utf8') |
| sys.stderr.write('Could not find credits html in pak file.\n') |
| sys.exit(1) |
| |
| |
| def _extract_licenses(chrome_apk, brotli): |
| with tempfile.NamedTemporaryFile() as temp_pak: |
| with zipfile.ZipFile(chrome_apk) as z: |
| temp_pak.write(z.read('assets/resources.pak')) |
| temp_pak.flush() |
| with tempfile.TemporaryDirectory() as temp_d: |
| _extract_pak(temp_pak.name, temp_d, brotli) |
| return _find_licenses_file(temp_d) |
| |
| |
| def _transform_html(html): |
| # <span class="title">TITLE</span> |
| # <span class="homepage"><a href="URL">homepage</a></span> |
| # <pre>LICENSE</pre> |
| pattern = re.compile(r'"title".*?>(.*?)<.*?href="(.*?)".*?<pre>(.*?)</pre>', |
| re.DOTALL) |
| entries = ['Open-source libraries used by Chrome:\n'] |
| for title, url, text in pattern.findall(html): |
| entry = ['Project: ' + title, 'URL: ' + url, '', text] |
| entries.append('\n'.join(entry)) |
| |
| actual_count = len(entries) - 1 |
| expected_count = html.count('</pre>') |
| if expected_count != actual_count: |
| sys.stderr.write( |
| f'Parsed {actual_count} but should have parse {expected_count}\n') |
| sys.exit(1) |
| |
| sep = '\n' + '=' * 80 + '\n' |
| return sep.join(entries) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('--brotli', |
| help='Path to brotli executable if not on PATH.') |
| parser.add_argument('--chrome-apk', |
| required=True, |
| help='Path to .apk with assets/resources.pak in it.') |
| parser.add_argument('--output', required=True, help='Output file path.') |
| parser.add_argument('--raw', |
| action='store_true', |
| help='Do not convert to plain text.') |
| args = parser.parse_args() |
| |
| data = _extract_licenses(args.chrome_apk, args.brotli) |
| with open(args.output, 'w', encoding='utf8') as f: |
| if not args.raw: |
| data = _transform_html(data) |
| f.write(data) |
| |
| |
| if __name__ == '__main__': |
| main() |