| #!/usr/bin/env python3 | 
 | # Copyright 2021 The Chromium Authors | 
 | # Use of this source code is governed by a BSD-style license that can be | 
 | # found in the LICENSE file. | 
 | """Extracts licenses from a Chrome apk. | 
 |  | 
 | 1) Extracts assets/resources.pak | 
 | 2) Extracts entries using tools/grit/pak_util.py | 
 | 3) Finds about_credits.html | 
 | 4) Converts it to plaintext | 
 |  | 
 | This script requires the "brotli" executable. | 
 | Option 1) sudo apt-get install brotli | 
 | Option 2) "ninja clang_x64/brotli", and use "--brotli clang_x64/brotli" | 
 | """ | 
 | import argparse | 
 | import os | 
 | import pathlib | 
 | import re | 
 | import subprocess | 
 | import sys | 
 | import tempfile | 
 | import zipfile | 
 |  | 
 | _DIR_SOURCE_ROOT = pathlib.Path(__file__).parents[2] | 
 | _PAK_UTIL = _DIR_SOURCE_ROOT / 'tools' / 'grit' / 'pak_util.py' | 
 |  | 
 |  | 
 | def _extract_pak(pak_path, output_dir, brotli): | 
 |   cmd = [ | 
 |       sys.executable, _PAK_UTIL, 'extract', pak_path, '--output-dir', output_dir | 
 |   ] | 
 |   if brotli: | 
 |     cmd += ['--brotli', brotli] | 
 |   try: | 
 |     subprocess.run(cmd, check=True) | 
 |   except subprocess.CalledProcessError as e: | 
 |     sys.stderr.write(str(e) + '\n') | 
 |     sys.exit(1) | 
 |  | 
 |  | 
 | def _find_licenses_file(output_dir): | 
 |   for subpath in os.listdir(output_dir): | 
 |     with open(os.path.join(output_dir, subpath), 'rb') as f: | 
 |       data = f.read(100) | 
 |       if b'Generated by licenses.py' in data: | 
 |         f.seek(0) | 
 |         return f.read().decode('utf8') | 
 |   sys.stderr.write('Could not find credits html in pak file.\n') | 
 |   sys.exit(1) | 
 |  | 
 |  | 
 | def _extract_licenses(chrome_apk, brotli): | 
 |   with tempfile.NamedTemporaryFile() as temp_pak: | 
 |     with zipfile.ZipFile(chrome_apk) as z: | 
 |       temp_pak.write(z.read('assets/resources.pak')) | 
 |       temp_pak.flush() | 
 |     with tempfile.TemporaryDirectory() as temp_d: | 
 |       _extract_pak(temp_pak.name, temp_d, brotli) | 
 |       return _find_licenses_file(temp_d) | 
 |  | 
 |  | 
 | def _transform_html(html): | 
 |   # <span class="title">TITLE</span> | 
 |   # <span class="homepage"><a href="URL">homepage</a></span> | 
 |   # <pre>LICENSE</pre> | 
 |   pattern = re.compile(r'"title".*?>(.*?)<.*?href="(.*?)".*?<pre>(.*?)</pre>', | 
 |                        re.DOTALL) | 
 |   entries = ['Open-source libraries used by Chrome:\n'] | 
 |   for title, url, text in pattern.findall(html): | 
 |     entry = ['Project: ' + title, 'URL: ' + url, '', text] | 
 |     entries.append('\n'.join(entry)) | 
 |  | 
 |   actual_count = len(entries) - 1 | 
 |   expected_count = html.count('</pre>') | 
 |   if expected_count != actual_count: | 
 |     sys.stderr.write( | 
 |         f'Parsed {actual_count} but should have parse {expected_count}\n') | 
 |     sys.exit(1) | 
 |  | 
 |   sep = '\n' + '=' * 80 + '\n' | 
 |   return sep.join(entries) | 
 |  | 
 |  | 
 | def main(): | 
 |   parser = argparse.ArgumentParser() | 
 |   parser.add_argument('--brotli', | 
 |                       help='Path to brotli executable if not on PATH.') | 
 |   parser.add_argument('--chrome-apk', | 
 |                       required=True, | 
 |                       help='Path to .apk with assets/resources.pak in it.') | 
 |   parser.add_argument('--output', required=True, help='Output file path.') | 
 |   parser.add_argument('--raw', | 
 |                       action='store_true', | 
 |                       help='Do not convert to plain text.') | 
 |   args = parser.parse_args() | 
 |  | 
 |   data = _extract_licenses(args.chrome_apk, args.brotli) | 
 |   with open(args.output, 'w', encoding='utf8') as f: | 
 |     if not args.raw: | 
 |       data = _transform_html(data) | 
 |     f.write(data) | 
 |  | 
 |  | 
 | if __name__ == '__main__': | 
 |   main() |