blob: 82938626c7e48a5c1473af30ea894ba71bff4e31 [file] [log] [blame]
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Checks third-party licenses for the purposes of the Android WebView build.
The Android tree includes a snapshot of Chromium in order to power the system
WebView. This tool checks that all code uses open-source licenses compatible
with Android, and that we meet the requirements of those licenses. It can also
be used to generate an Android NOTICE file for the third-party code.
It makes use of src/tools/ and the README.chromium files on which
it depends. It also makes use of a data file, third_party_files_whitelist.txt,
which whitelists indicidual files which contain third-party code but which
aren't in a third-party directory with a README.chromium file.
import optparse
import os
import re
import subprocess
import sys
import textwrap
REPOSITORY_ROOT = os.path.abspath(os.path.join(
os.path.dirname(__file__), '..', '..'))
sys.path.append(os.path.join(REPOSITORY_ROOT, 'tools'))
import licenses
def GetIncompatibleDirectories():
"""Gets a list of third-party directories which use licenses incompatible
with Android. This is used by the snapshot tool.
A list of directories.
whitelist = [
'Apache( Version)? 2(\.0)?',
'(New )?BSD( 3-Clause)?( with advertising clause)?',
'L?GPL ?v?2(\.[01])?( or later)?',
'MPL 1\.1 ?/ ?GPL 2(\.0)? ?/ ?LGPL 2\.1',
'Microsoft Limited Public License',
'Microsoft Permissive License',
'Public Domain',
'SGI Free Software License B',
regex = '^(%s)$' % '|'.join(whitelist)
result = []
for directory in _FindThirdPartyDirs():
metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
if metadata.get('License Android Compatible', 'no') == 'yes':
license = re.split(' [Ll]icenses?$', metadata['License'])[0]
tokens = [x.strip() for x in re.split(' and |,', license) if len(x) > 0]
for token in tokens:
if not re.match(regex, token, re.IGNORECASE):
return result
def _CheckLicenseHeaders(directory_list, whitelisted_files):
"""Checks that all files which are not in a listed third-party directory,
and which do not use the standard Chromium license, are whitelisted.
directory_list: The list of directories.
whitelisted_files: The whitelist of files.
True if all files with non-standard license headers are whitelisted and the
whitelist contains no stale entries, otherwise false.
# Matches one of ...
# - '[Cc]opyright', but not when followed by
# ' 20[0-9][0-9] The Chromium Authors.', with optional (c) and date range
# - '([Cc]) (19|20)[0-9][0-9]', but not when preceeded by the word copyright,
# as this is handled above
regex = '[Cc]opyright(?!( \(c\))? 20[0-9][0-9](-20[0-9][0-9])? ' \
'The Chromium Authors\. All rights reserved\.)' \
'|' \
'(?<!(pyright |opyright))\([Cc]\) (19|20)[0-9][0-9]'
args = ['grep',
'--exclude', '*.orig',
'--exclude', '*.rej',
'--exclude-dir', 'third_party',
'--exclude-dir', 'out',
'--exclude-dir', '.git',
'--exclude-dir', '.svn',
p = subprocess.Popen(args=args, cwd=REPOSITORY_ROOT, stdout=subprocess.PIPE)
files = p.communicate()[0].splitlines()
directory_list = directory_list[:]
# Ignore these tools.
# This is a build intermediate directory.
# This is tests directory, doesn't exist in the snapshot
# This is a test output directory.
# This is a test output directory.
# 'Copyright' appears in strings.
# This is a Chrome on Linux reference build, doesn't exist in the snapshot
# Remoting internal tools, doesn't exist in the snapshot
# Histogram tools, doesn't exist in the snapshot
# Exclude files under listed directories and some known offenders.
offending_files = []
for x in files:
x = os.path.normpath(x)
is_in_listed_directory = False
for y in directory_list:
if x.startswith(y):
is_in_listed_directory = True
if not is_in_listed_directory:
all_files_valid = True
unknown = set(offending_files) - set(whitelisted_files)
if unknown:
print 'The following files contain a third-party license but are not in ' \
'a listed third-party directory and are not whitelisted. You must ' \
'add the following files to the whitelist.\n%s' % \
all_files_valid = False
stale = set(whitelisted_files) - set(offending_files)
if stale:
print 'The following files are whitelisted unnecessarily. You must ' \
' remove the following files from the whitelist.\n%s' % \
all_files_valid = False
return all_files_valid
def _ReadFile(path):
"""Reads a file from disk.
path: The path of the file to read, relative to the root of the repository.
The contents of the file as a string.
return open(os.path.join(REPOSITORY_ROOT, path), 'rb').read()
def _FindThirdPartyDirs():
"""Gets the list of third-party directories.
The list of third-party directories.
prune_paths = [
# Placeholder directory, no third-party code.
os.path.join('third_party', 'adobe'),
# Apache 2.0 license. See
os.path.join('third_party', 'bidichecker'),
# Isn't checked out on clients
os.path.join('third_party', 'gles2_conform'),
# The llvm-build doesn't exist for non-clang builder
os.path.join('third_party', 'llvm-build'),
# Binaries doesn't apply to android
os.path.join('third_party', 'widevine'),
return licenses.FindThirdPartyDirs(prune_paths, REPOSITORY_ROOT)
def _Scan():
"""Checks that license meta-data is present for all third-party code.
Whether the check succeeded.
third_party_dirs = _FindThirdPartyDirs()
# First, check designated third-party directories using src/tools/
all_licenses_valid = True
for path in sorted(third_party_dirs):
licenses.ParseDir(path, REPOSITORY_ROOT)
except licenses.LicenseError, e:
print 'Got LicenseError "%s" while scanning %s' % (e, path)
all_licenses_valid = False
# Second, check for non-standard license text.
files_data = _ReadFile(os.path.join('android_webview', 'tools',
whitelisted_files = []
for line in files_data.splitlines():
match = re.match(r'([^#\s]+)', line)
if match:
return _CheckLicenseHeaders(third_party_dirs, whitelisted_files) \
and all_licenses_valid
def GenerateNoticeFile():
"""Generates the contents of an Android NOTICE file for the third-party code.
This is used by the snapshot tool.
The contents of the NOTICE file.
third_party_dirs = _FindThirdPartyDirs()
# Don't forget Chromium's LICENSE file
content = [_ReadFile('LICENSE')]
# We provide attribution for all third-party directories.
# TODO(steveblock): Limit this to only code used by the WebView binary.
for directory in third_party_dirs:
metadata = licenses.ParseDir(directory, REPOSITORY_ROOT,
license_file = metadata['License File']
if license_file and license_file != licenses.NOT_SHIPPED:
return '\n'.join(content)
def main():
class FormatterWithNewLines(optparse.IndentedHelpFormatter):
def format_description(self, description):
paras = description.split('\n')
formatted_paras = [textwrap.fill(para, self.width) for para in paras]
return '\n'.join(formatted_paras) + '\n'
parser = optparse.OptionParser(formatter=FormatterWithNewLines(),
usage='%prog [options]')
parser.description = (__doc__ +
'\nCommands:\n' \
' scan Check licenses.\n' \
' notice Generate Android NOTICE file on stdout')
(options, args) = parser.parse_args()
if len(args) != 1:
return 1
if args[0] == 'scan':
if _Scan():
print 'OK!'
return 0
return 1
elif args[0] == 'notice':
print GenerateNoticeFile()
return 0
return 1
if __name__ == '__main__':