android/extract_licenses_from_apk.py - chromium/src/tools - Git at Google

 #!/usr/bin/env python3
 # Copyright 2021 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Extracts licenses from a Chrome apk.

 1) Extracts assets/resources.pak
 2) Extracts entries using tools/grit/pak_util.py
 3) Finds about_credits.html
 4) Converts it to plaintext

 This script requires the "brotli" executable.
 Option 1) sudo apt-get install brotli
 Option 2) "ninja clang_x64/brotli", and use "--brotli clang_x64/brotli"
 """
 import argparse
 import os
 import pathlib
 import re
 import subprocess
 import sys
 import tempfile
 import zipfile

 _DIR_SOURCE_ROOT = pathlib.Path(__file__).parents[2]
 _PAK_UTIL = _DIR_SOURCE_ROOT / 'tools' / 'grit' / 'pak_util.py'


 def _extract_pak(pak_path, output_dir, brotli):
   cmd = [
       sys.executable, _PAK_UTIL, 'extract', pak_path, '--output-dir', output_dir
   ]
   if brotli:
     cmd += ['--brotli', brotli]
   try:
     subprocess.run(cmd, check=True)
   except subprocess.CalledProcessError as e:
     sys.stderr.write(str(e) + '\n')
     sys.exit(1)


 def _find_licenses_file(output_dir):
   for subpath in os.listdir(output_dir):
     with open(os.path.join(output_dir, subpath), 'rb') as f:
       data = f.read(100)
       if b'Generated by licenses.py' in data:
         f.seek(0)
         return f.read().decode('utf8')
   sys.stderr.write('Could not find credits html in pak file.\n')
   sys.exit(1)


 def _extract_licenses(chrome_apk, brotli):
   with tempfile.NamedTemporaryFile() as temp_pak:
     with zipfile.ZipFile(chrome_apk) as z:
       temp_pak.write(z.read('assets/resources.pak'))
       temp_pak.flush()
     with tempfile.TemporaryDirectory() as temp_d:
       _extract_pak(temp_pak.name, temp_d, brotli)
       return _find_licenses_file(temp_d)


 def _transform_html(html):
   # <span class="title">TITLE</span>
   # <span class="homepage"><a href="URL">homepage</a></span>
   # <pre>LICENSE</pre>
   pattern = re.compile(r'"title".*?>(.*?)<.*?href="(.*?)".*?<pre>(.*?)</pre>',
                        re.DOTALL)
   entries = ['Open-source libraries used by Chrome:\n']
   for title, url, text in pattern.findall(html):
     entry = ['Project: ' + title, 'URL: ' + url, '', text]
     entries.append('\n'.join(entry))

   actual_count = len(entries) - 1
   expected_count = html.count('</pre>')
   if expected_count != actual_count:
     sys.stderr.write(
         f'Parsed {actual_count} but should have parse {expected_count}\n')
     sys.exit(1)

   sep = '\n' + '=' * 80 + '\n'
   return sep.join(entries)


 def main():
   parser = argparse.ArgumentParser()
   parser.add_argument('--brotli',
                       help='Path to brotli executable if not on PATH.')
   parser.add_argument('--chrome-apk',
                       required=True,
                       help='Path to .apk with assets/resources.pak in it.')
   parser.add_argument('--output', required=True, help='Output file path.')
   parser.add_argument('--raw',
                       action='store_true',
                       help='Do not convert to plain text.')
   args = parser.parse_args()

   data = _extract_licenses(args.chrome_apk, args.brotli)
   with open(args.output, 'w', encoding='utf8') as f:
     if not args.raw:
       data = _transform_html(data)
     f.write(data)


 if __name__ == '__main__':
   main()
	#!/usr/bin/env python3
	# Copyright 2021 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Extracts licenses from a Chrome apk.

	1) Extracts assets/resources.pak
	2) Extracts entries using tools/grit/pak_util.py
	3) Finds about_credits.html
	4) Converts it to plaintext

	This script requires the "brotli" executable.
	Option 1) sudo apt-get install brotli
	Option 2) "ninja clang_x64/brotli", and use "--brotli clang_x64/brotli"
	"""
	import argparse
	import os
	import pathlib
	import re
	import subprocess
	import sys
	import tempfile
	import zipfile

	_DIR_SOURCE_ROOT = pathlib.Path(__file__).parents[2]
	_PAK_UTIL = _DIR_SOURCE_ROOT / 'tools' / 'grit' / 'pak_util.py'


	def _extract_pak(pak_path, output_dir, brotli):
	cmd = [
	sys.executable, _PAK_UTIL, 'extract', pak_path, '--output-dir', output_dir
	]
	if brotli:
	cmd += ['--brotli', brotli]
	try:
	subprocess.run(cmd, check=True)
	except subprocess.CalledProcessError as e:
	sys.stderr.write(str(e) + '\n')
	sys.exit(1)


	def _find_licenses_file(output_dir):
	for subpath in os.listdir(output_dir):
	with open(os.path.join(output_dir, subpath), 'rb') as f:
	data = f.read(100)
	if b'Generated by licenses.py' in data:
	f.seek(0)
	return f.read().decode('utf8')
	sys.stderr.write('Could not find credits html in pak file.\n')
	sys.exit(1)


	def _extract_licenses(chrome_apk, brotli):
	with tempfile.NamedTemporaryFile() as temp_pak:
	with zipfile.ZipFile(chrome_apk) as z:
	temp_pak.write(z.read('assets/resources.pak'))
	temp_pak.flush()
	with tempfile.TemporaryDirectory() as temp_d:
	_extract_pak(temp_pak.name, temp_d, brotli)
	return _find_licenses_file(temp_d)


	def _transform_html(html):
	# <span class="title">TITLE</span>
	# <span class="homepage"><a href="URL">homepage</a></span>
	# <pre>LICENSE</pre>
	pattern = re.compile(r'"title".?>(.?)<.?href="(.?)".?<pre>(.?)</pre>',
	re.DOTALL)
	entries = ['Open-source libraries used by Chrome:\n']
	for title, url, text in pattern.findall(html):
	entry = ['Project: ' + title, 'URL: ' + url, '', text]
	entries.append('\n'.join(entry))

	actual_count = len(entries) - 1
	expected_count = html.count('</pre>')
	if expected_count != actual_count:
	sys.stderr.write(
	f'Parsed {actual_count} but should have parse {expected_count}\n')
	sys.exit(1)

	sep = '\n' + '=' * 80 + '\n'
	return sep.join(entries)


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--brotli',
	help='Path to brotli executable if not on PATH.')
	parser.add_argument('--chrome-apk',
	required=True,
	help='Path to .apk with assets/resources.pak in it.')
	parser.add_argument('--output', required=True, help='Output file path.')
	parser.add_argument('--raw',
	action='store_true',
	help='Do not convert to plain text.')
	args = parser.parse_args()

	data = _extract_licenses(args.chrome_apk, args.brotli)
	with open(args.output, 'w', encoding='utf8') as f:
	if not args.raw:
	data = _transform_html(data)
	f.write(data)


	if __name__ == '__main__':
	main()