| #!/usr/bin/python |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Checks third-party licenses for the purposes of the Android WebView build. |
| |
| The Android tree includes a snapshot of Chromium in order to power the system |
| WebView. This tool checks that all code uses open-source licenses compatible |
| with Android, and that we meet the requirements of those licenses. It can also |
| be used to generate an Android NOTICE file for the third-party code. |
| |
| It makes use of src/tools/licenses.py and the README.chromium files on which |
| it depends. It also makes use of a data file, third_party_files_whitelist.txt, |
| which whitelists indicidual files which contain third-party code but which |
| aren't in a third-party directory with a README.chromium file. |
| """ |
| |
| import glob |
| import imp |
| import multiprocessing |
| import optparse |
| import os |
| import re |
| import subprocess |
| import sys |
| import textwrap |
| |
| |
| REPOSITORY_ROOT = os.path.abspath(os.path.join( |
| os.path.dirname(__file__), '..', '..')) |
| |
| # Import third_party/PRESUBMIT.py via imp to avoid importing a random |
| # PRESUBMIT.py from $PATH, also make sure we don't generate a .pyc file. |
| sys.dont_write_bytecode = True |
| third_party = \ |
| imp.load_source('PRESUBMIT', \ |
| os.path.join(REPOSITORY_ROOT, 'third_party', 'PRESUBMIT.py')) |
| |
| sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools')) |
| import licenses |
| |
| import known_issues |
| |
| class InputApi(object): |
| def __init__(self): |
| self.re = re |
| |
| def GetIncompatibleDirectories(): |
| """Gets a list of third-party directories which use licenses incompatible |
| with Android. This is used by the snapshot tool. |
| Returns: |
| A list of directories. |
| """ |
| |
| result = [] |
| for directory in _FindThirdPartyDirs(): |
| if directory in known_issues.KNOWN_ISSUES: |
| result.append(directory) |
| continue |
| try: |
| metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, |
| require_license_file=False) |
| except licenses.LicenseError as e: |
| print 'Got LicenseError while scanning ' + directory |
| raise |
| if metadata.get('License Android Compatible', 'no').upper() == 'YES': |
| continue |
| license = re.split(' [Ll]icenses?$', metadata['License'])[0] |
| if not third_party.LicenseIsCompatibleWithAndroid(InputApi(), license): |
| result.append(directory) |
| return result |
| |
| def GetUnknownIncompatibleDirectories(): |
| """Gets a list of third-party directories which use licenses incompatible |
| with Android which are not present in the known_issues.py file. |
| This is used by the AOSP bot. |
| Returns: |
| A list of directories. |
| """ |
| incompatible_directories = frozenset(GetIncompatibleDirectories()) |
| known_incompatible = [] |
| for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems(): |
| for exclude in exclude_list: |
| if glob.has_magic(exclude): |
| exclude_dirname = os.path.dirname(exclude) |
| if glob.has_magic(exclude_dirname): |
| print ('Exclude path %s contains an unexpected glob expression,' \ |
| ' skipping.' % exclude) |
| exclude = exclude_dirname |
| known_incompatible.append(os.path.normpath(os.path.join(path, exclude))) |
| known_incompatible = frozenset(known_incompatible) |
| return incompatible_directories.difference(known_incompatible) |
| |
| |
| class ScanResult(object): |
| Ok, Warnings, Errors = range(3) |
| |
| # Needs to be a top-level function for multiprocessing |
| def _FindCopyrights(files_to_scan): |
| args = [os.path.join('android_webview', 'tools', 'find_copyrights.pl')] |
| p = subprocess.Popen( |
| args=args, cwd=REPOSITORY_ROOT, |
| stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| lines = p.communicate(files_to_scan)[0].splitlines() |
| |
| offending_files = [] |
| allowed_copyrights = '^(?:\*No copyright\*' \ |
| '|20[0-9][0-9](?:-20[0-9][0-9])? The Chromium Authors\. ' \ |
| 'All rights reserved.*)$' |
| allowed_copyrights_re = re.compile(allowed_copyrights) |
| for l in lines: |
| entries = l.split('\t') |
| if entries[1] == "GENERATED FILE": |
| continue |
| copyrights = entries[1].split(' / ') |
| for c in copyrights: |
| if c and not allowed_copyrights_re.match(c): |
| offending_files.append(os.path.normpath(entries[0])) |
| break |
| return offending_files |
| |
| def _ShardString(s, delimiter, shard_len): |
| result = [] |
| index = 0 |
| last_pos = 0 |
| for m in re.finditer(delimiter, s): |
| index += 1 |
| if index % shard_len == 0: |
| result.append(s[last_pos:m.end()]) |
| last_pos = m.end() |
| if not index % shard_len == 0: |
| result.append(s[last_pos:]) |
| return result |
| |
| def _CheckLicenseHeaders(excluded_dirs_list, whitelisted_files): |
| """Checks that all files which are not in a listed third-party directory, |
| and which do not use the standard Chromium license, are whitelisted. |
| Args: |
| excluded_dirs_list: The list of directories to exclude from scanning. |
| whitelisted_files: The whitelist of files. |
| Returns: |
| ScanResult.Ok if all files with non-standard license headers are whitelisted |
| and the whitelist contains no stale entries; |
| ScanResult.Warnings if there are stale entries; |
| ScanResult.Errors if new non-whitelisted entries found. |
| """ |
| |
| excluded_dirs_list = [d for d in excluded_dirs_list if not 'third_party' in d] |
| # Using a common pattern for third-partyies makes the ignore regexp shorter |
| excluded_dirs_list.append('third_party') |
| # VCS dirs |
| excluded_dirs_list.append('.git') |
| excluded_dirs_list.append('.svn') |
| # Build output |
| excluded_dirs_list.append('out/Debug') |
| excluded_dirs_list.append('out/Release') |
| # 'Copyright' appears in license agreements |
| excluded_dirs_list.append('chrome/app/resources') |
| # Quickoffice js files from internal src used on buildbots. crbug.com/350472. |
| excluded_dirs_list.append('chrome/browser/resources/chromeos/quickoffice') |
| # This is a test output directory |
| excluded_dirs_list.append('chrome/tools/test/reference_build') |
| # blink style copy right headers. |
| excluded_dirs_list.append('content/shell/renderer/test_runner') |
| # blink style copy right headers. |
| excluded_dirs_list.append('content/shell/tools/plugin') |
| # This is tests directory, doesn't exist in the snapshot |
| excluded_dirs_list.append('content/test/data') |
| # This is a tests directory that doesn't exist in the shipped product. |
| excluded_dirs_list.append('gin/test') |
| # This is a test output directory |
| excluded_dirs_list.append('data/dom_perf') |
| # This is a tests directory that doesn't exist in the shipped product. |
| excluded_dirs_list.append('tools/perf/page_sets') |
| excluded_dirs_list.append('tools/perf/page_sets/tough_animation_cases') |
| # Histogram tools, doesn't exist in the snapshot |
| excluded_dirs_list.append('tools/histograms') |
| # Swarming tools, doesn't exist in the snapshot |
| excluded_dirs_list.append('tools/swarming_client') |
| # Arm sysroot tools, doesn't exist in the snapshot |
| excluded_dirs_list.append('arm-sysroot') |
| # Data is not part of open source chromium, but are included on some bots. |
| excluded_dirs_list.append('data') |
| # This is not part of open source chromium, but are included on some bots. |
| excluded_dirs_list.append('skia/tools/clusterfuzz-data') |
| |
| args = [os.path.join('android_webview', 'tools', 'find_files.pl'), |
| '.' |
| ] + excluded_dirs_list |
| p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE) |
| files_to_scan = p.communicate()[0] |
| |
| sharded_files_to_scan = _ShardString(files_to_scan, '\n', 2000) |
| pool = multiprocessing.Pool() |
| offending_files_chunks = pool.map_async( |
| _FindCopyrights, sharded_files_to_scan).get(999999) |
| pool.close() |
| pool.join() |
| # Flatten out the result |
| offending_files = \ |
| [item for sublist in offending_files_chunks for item in sublist] |
| |
| unknown = set(offending_files) - set(whitelisted_files) |
| if unknown: |
| print 'The following files contain a third-party license but are not in ' \ |
| 'a listed third-party directory and are not whitelisted. You must ' \ |
| 'add the following files to the whitelist.\n%s' % \ |
| '\n'.join(sorted(unknown)) |
| |
| stale = set(whitelisted_files) - set(offending_files) |
| if stale: |
| print 'The following files are whitelisted unnecessarily. You must ' \ |
| 'remove the following files from the whitelist.\n%s' % \ |
| '\n'.join(sorted(stale)) |
| missing = [f for f in whitelisted_files if not os.path.exists(f)] |
| if missing: |
| print 'The following files are whitelisted, but do not exist.\n%s' % \ |
| '\n'.join(sorted(missing)) |
| |
| if unknown: |
| return ScanResult.Errors |
| elif stale or missing: |
| return ScanResult.Warnings |
| else: |
| return ScanResult.Ok |
| |
| |
| def _ReadFile(path): |
| """Reads a file from disk. |
| Args: |
| path: The path of the file to read, relative to the root of the repository. |
| Returns: |
| The contents of the file as a string. |
| """ |
| |
| return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read() |
| |
| |
| def _FindThirdPartyDirs(): |
| """Gets the list of third-party directories. |
| Returns: |
| The list of third-party directories. |
| """ |
| |
| # Please don't add here paths that have problems with license files, |
| # as they will end up included in Android WebView snapshot. |
| # Instead, add them into known_issues.py. |
| prune_paths = [ |
| # Temporary until we figure out how not to check out quickoffice on the |
| # Android license check bot. Tracked in crbug.com/350472. |
| os.path.join('chrome', 'browser', 'resources', 'chromeos', 'quickoffice'), |
| # Placeholder directory, no third-party code. |
| os.path.join('third_party', 'adobe'), |
| # Apache 2.0 license. See |
| # https://code.google.com/p/chromium/issues/detail?id=140478. |
| os.path.join('third_party', 'bidichecker'), |
| # Isn't checked out on clients |
| os.path.join('third_party', 'gles2_conform'), |
| # The llvm-build doesn't exist for non-clang builder |
| os.path.join('third_party', 'llvm-build'), |
| # Binaries doesn't apply to android |
| os.path.join('third_party', 'widevine'), |
| # third_party directories in this tree aren't actually third party, but |
| # provide a way to shadow experimental buildfiles into those directories. |
| os.path.join('build', 'secondary'), |
| # Not shipped, Chromium code |
| os.path.join('tools', 'swarming_client'), |
| ] |
| third_party_dirs = licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT) |
| return licenses.FilterDirsWithFiles(third_party_dirs, REPOSITORY_ROOT) |
| |
| |
| def _Scan(): |
| """Checks that license meta-data is present for all third-party code and |
| that all non third-party code doesn't contain external copyrighted code. |
| Returns: |
| ScanResult.Ok if everything is in order; |
| ScanResult.Warnings if there are non-fatal problems (e.g. stale whitelist |
| entries) |
| ScanResult.Errors otherwise. |
| """ |
| |
| third_party_dirs = _FindThirdPartyDirs() |
| |
| # First, check designated third-party directories using src/tools/licenses.py. |
| all_licenses_valid = True |
| for path in sorted(third_party_dirs): |
| try: |
| licenses.ParseDir(path, REPOSITORY_ROOT) |
| except licenses.LicenseError, e: |
| if not (path in known_issues.KNOWN_ISSUES): |
| print 'Got LicenseError "%s" while scanning %s' % (e, path) |
| all_licenses_valid = False |
| |
| # Second, check for non-standard license text. |
| files_data = _ReadFile(os.path.join('android_webview', 'tools', |
| 'third_party_files_whitelist.txt')) |
| whitelisted_files = [] |
| for line in files_data.splitlines(): |
| match = re.match(r'([^#\s]+)', line) |
| if match: |
| whitelisted_files.append(match.group(1)) |
| licenses_check = _CheckLicenseHeaders(third_party_dirs, whitelisted_files) |
| |
| return licenses_check if all_licenses_valid else ScanResult.Errors |
| |
| |
| def GenerateNoticeFile(): |
| """Generates the contents of an Android NOTICE file for the third-party code. |
| This is used by the snapshot tool. |
| Returns: |
| The contents of the NOTICE file. |
| """ |
| |
| third_party_dirs = _FindThirdPartyDirs() |
| |
| # Don't forget Chromium's LICENSE file |
| content = [_ReadFile('LICENSE')] |
| |
| # We provide attribution for all third-party directories. |
| # TODO(steveblock): Limit this to only code used by the WebView binary. |
| for directory in sorted(third_party_dirs): |
| metadata = licenses.ParseDir(directory, REPOSITORY_ROOT, |
| require_license_file=False) |
| license_file = metadata['License File'] |
| if license_file and license_file != licenses.NOT_SHIPPED: |
| content.append(_ReadFile(license_file)) |
| |
| return '\n'.join(content) |
| |
| |
| def _ProcessIncompatibleResult(incompatible_directories): |
| if incompatible_directories: |
| print ("Incompatibly licensed directories found:\n" + |
| "\n".join(sorted(incompatible_directories))) |
| return ScanResult.Errors |
| return ScanResult.Ok |
| |
| def main(): |
| class FormatterWithNewLines(optparse.IndentedHelpFormatter): |
| def format_description(self, description): |
| paras = description.split('\n') |
| formatted_paras = [textwrap.fill(para, self.width) for para in paras] |
| return '\n'.join(formatted_paras) + '\n' |
| |
| parser = optparse.OptionParser(formatter=FormatterWithNewLines(), |
| usage='%prog [options]') |
| parser.description = (__doc__ + |
| '\nCommands:\n' \ |
| ' scan Check licenses.\n' \ |
| ' notice Generate Android NOTICE file on stdout.\n' \ |
| ' incompatible_directories Scan for incompatibly' |
| ' licensed directories.\n' |
| ' all_incompatible_directories Scan for incompatibly' |
| ' licensed directories (even those in' |
| ' known_issues.py).\n') |
| (_, args) = parser.parse_args() |
| if len(args) != 1: |
| parser.print_help() |
| return ScanResult.Errors |
| |
| if args[0] == 'scan': |
| scan_result = _Scan() |
| if scan_result == ScanResult.Ok: |
| print 'OK!' |
| return scan_result |
| elif args[0] == 'notice': |
| print GenerateNoticeFile() |
| return ScanResult.Ok |
| elif args[0] == 'incompatible_directories': |
| return _ProcessIncompatibleResult(GetUnknownIncompatibleDirectories()) |
| elif args[0] == 'all_incompatible_directories': |
| return _ProcessIncompatibleResult(GetIncompatibleDirectories()) |
| parser.print_help() |
| return ScanResult.Errors |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |