blob: ea2ffbc4d834ed9126dfe619ce3976dde3d8dc2e [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Extracts licenses from a Chrome apk.
1) Extracts assets/resources.pak
2) Extracts entries using tools/grit/pak_util.py
3) Finds about_credits.html
4) Converts it to plaintext
This script requires the "brotli" executable.
Option 1) sudo apt-get install brotli
Option 2) "ninja clang_x64/brotli", and use "--brotli clang_x64/brotli"
"""
import argparse
import os
import pathlib
import re
import subprocess
import sys
import tempfile
import zipfile
_DIR_SOURCE_ROOT = pathlib.Path(__file__).parents[2]
_PAK_UTIL = _DIR_SOURCE_ROOT / 'tools' / 'grit' / 'pak_util.py'
def _extract_pak(pak_path, output_dir, brotli):
cmd = [
sys.executable, _PAK_UTIL, 'extract', pak_path, '--output-dir', output_dir
]
if brotli:
cmd += ['--brotli', brotli]
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
sys.stderr.write(str(e) + '\n')
sys.exit(1)
def _find_licenses_file(output_dir):
for subpath in os.listdir(output_dir):
with open(os.path.join(output_dir, subpath), 'rb') as f:
data = f.read(100)
if b'Generated by licenses.py' in data:
f.seek(0)
return f.read().decode('utf8')
sys.stderr.write('Could not find credits html in pak file.\n')
sys.exit(1)
def _extract_licenses(chrome_apk, brotli):
with tempfile.NamedTemporaryFile() as temp_pak:
with zipfile.ZipFile(chrome_apk) as z:
temp_pak.write(z.read('assets/resources.pak'))
temp_pak.flush()
with tempfile.TemporaryDirectory() as temp_d:
_extract_pak(temp_pak.name, temp_d, brotli)
return _find_licenses_file(temp_d)
def _transform_html(html):
# <span class="title">TITLE</span>
# <span class="homepage"><a href="URL">homepage</a></span>
# <pre>LICENSE</pre>
pattern = re.compile(r'"title".*?>(.*?)<.*?href="(.*?)".*?<pre>(.*?)</pre>',
re.DOTALL)
entries = ['Open-source libraries used by Chrome:\n']
for title, url, text in pattern.findall(html):
entry = ['Project: ' + title, 'URL: ' + url, '', text]
entries.append('\n'.join(entry))
actual_count = len(entries) - 1
expected_count = html.count('</pre>')
if expected_count != actual_count:
sys.stderr.write(
f'Parsed {actual_count} but should have parse {expected_count}\n')
sys.exit(1)
sep = '\n' + '=' * 80 + '\n'
return sep.join(entries)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--brotli',
help='Path to brotli executable if not on PATH.')
parser.add_argument('--chrome-apk',
required=True,
help='Path to .apk with assets/resources.pak in it.')
parser.add_argument('--output', required=True, help='Output file path.')
parser.add_argument('--raw',
action='store_true',
help='Do not convert to plain text.')
args = parser.parse_args()
data = _extract_licenses(args.chrome_apk, args.brotli)
with open(args.output, 'w', encoding='utf8') as f:
if not args.raw:
data = _transform_html(data)
f.write(data)
if __name__ == '__main__':
main()