blob: 8ee1a10dbfe79d9c2714174317c7d4f24b4d68b9 [file] [log] [blame]
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Top-level presubmit script for the Git repo backing chromium.org.
See https://www.chromium.org/developers/how-tos/depottools/presubmit-scripts
for more details about the presubmit API built into depot_tools.
"""
import re
from typing import NamedTuple
import urllib.parse
PRESUBMIT_VERSION = '2.0.0'
# This line is 'magic' in that git-cl looks for it to decide whether to
# use Python3 instead of Python2 when running the code in this file.
USE_PYTHON3 = True
# This list must be kept in sync with the lists in //.eleventy.js and
# //scripts/upload_lobs.py.
# TODO(crbug.com/1457683): Figure out how to share these lists to eliminate
# the duplication and need to keep them in sync.
LOB_EXTENSIONS = [
'.ai',
'.bin',
'.bmp',
'.brd',
'.bz2',
'.crx',
'.config',
'.dia',
'.gif',
'.graffle',
'.ico',
'.jpg',
'jpg', # Some files are missing the '.' :(.
'.jpeg',
'.mp4',
'.msi',
'.pdf',
'pdf', # Some files are missing the '.' :(.
'.png',
'png', # Some files are missing the '.' :(.
'.PNG',
'.swf',
'.svg',
'.tar.gz',
'.tiff',
'_trace',
'.webp',
'.xcf',
'.xlsx',
'.zip'
]
def CheckPatchFormatted(input_api, output_api):
return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)
def CheckChangeHasDescription(input_api, output_api):
return input_api.canned_checks.CheckChangeHasDescription(
input_api, output_api)
def CheckForLobs(input_api, output_api):
output_status = []
for file in input_api.change.AffectedFiles():
# The tar.gz for example prevents using a hashmap to look up the
# extension.
for ext in LOB_EXTENSIONS:
if str(file).endswith(ext) and file.Action() != 'D':
error_msg = (
'The file \'{file_name}\' is a binary that has not been '
'uploaded to GCE. Please run:\n\tscripts/upload_lobs.py '
'"{file_name}"\nand commit {file_name}.sha1 instead\n'
'Run:\n\tgit rm --cached "{file_name}"\n'
'to remove the lob from git'.format(
file_name=file.LocalPath()))
error = output_api.PresubmitError(error_msg)
output_status.append(error)
break
return output_status
def CheckLobIgnores(input_api, output_api):
output_status = []
with open("site/.gitignore", 'r') as ignore_file:
ignored_lobs = list(line.rstrip() for line in ignore_file.readlines())
ignored_lobs = set(
ignored_lobs[ignored_lobs.index('#start_lob_ignore') +
1:ignored_lobs.index('#end_lob_ignore')])
for ignored_lob in ignored_lobs:
lob_sha_file = input_api.os_path.join('site', ignored_lob + '.sha1')
if not lob_sha_file.startswith(
'#') and not input_api.os_path.exists(lob_sha_file):
error_msg = (
'The sha1 file \'{removed_file}\' no longer exists, '
'please remove "{ignored_file}" from site/.gitignore'.
format(removed_file=lob_sha_file, ignored_file=ignored_lob))
error = output_api.PresubmitError(error_msg)
output_status.append(error)
return output_status
def CheckPatchFormatted(input_api, output_api):
"""Check formatting of files."""
return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)
class _MdLink(NamedTuple):
"""Link found in markdown."""
# The file link is found in.
file: str
# The actual link.
uri: str
# Whether the link supports local/relative paths like /dir/foo.md.
relative_ok: bool
# What line was the link found on?
line_num: int
# Mapping of preferred host names. If we find people using <key>, we'll
# make them use <value> instead.
_MD_HOST_ALIASES = {
# keep-sorted start
'b': 'issuetracker.google.com',
'chromium.org': 'www.chromium.org',
'dev.chromium.org': 'www.chromium.org',
'goto': 'go',
'goto.google.com': 'go',
'www.youtube.com': 'youtube.com',
# keep-sorted end
}
# These hosts should always use https://
# This isn't an exhaustive list, just hosts we commonly refer to.
# TODO(vapier): Require https:// on all hosts by default, and require any
# actual http:// hosts be enumerated below. This requires a large cleanup
# of existing docs first.
_MD_HTTPS_HOSTS = {
# keep-sorted start
'crbug.com',
'crrev.com',
'en.wikipedia.org',
'github.com',
'google.com',
'issuetracker.google.com',
'www.chromium.org',
'www.google.com',
'www.w3.org',
'youtu.be',
'youtube.com',
# keep-sorted end
}
# These hosts should always use http://
_MD_HTTP_HOSTS = {
# keep-sorted start
'g',
'go',
# keep-sorted end
}
def CheckLinks(input_api, output_api):
"""Check links used in markdown."""
# Build up the files to analyze.
affected_files = input_api.AffectedFiles(
file_filter=lambda x: x.LocalPath().endswith('.md'))
# Extract the links from the files. We have a variety of styles:
# [text](link)
# [anchor]: link
# [anchor]: link "extra text"
# <link>
# link
links = []
for affected_file in affected_files:
file = affected_file.LocalPath()
for i, line in enumerate(affected_file.NewContents(), start=1):
# [text](link)
# We don't match the opening [ because it can span multiple lines.
# The ](...) part has to be on one line.
links += [
_MdLink(file, x, True, i)
for x in re.findall(r'\]\(([^) ]+)\)', line)
]
# [anchor]: link
m = re.match(r'^\[[^]]+\]:\s*(\S+)', line)
if m:
links.append(_MdLink(file, m.group(1), True, i))
# <link>
links += [
_MdLink(file, x, False, i)
for x in re.findall(r'<(https?://[^>]+)>', line)
]
# Check links.
results = []
def _create_result(link, msg, want_uri) -> None:
want_link = urllib.parse.urlunparse(want_uri)
results.append(
output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}',
long_text=f'- {link.uri}\n+ {want_link}'))
for link in links:
o = urllib.parse.urlparse(link.uri)
# Check bad http:// usage.
if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS:
_create_result(link, 'Always use https:// with this host',
o._replace(scheme='https'))
# Check bad https:// usage.
if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS:
_create_result(link, 'Always use http:// with this host',
o._replace(scheme='http'))
# Check host aliases.
for oldhost, newhost in _MD_HOST_ALIASES.items():
if o.netloc == oldhost:
_create_result(link, f'Use {newhost} in links',
o._replace(netloc=newhost))
# Have people use relative /foo/bar links instead of
# https//www.chromium.org/foo/bar so we can check target links, and so
# navigating via the sandbox website works correctly.
if (link.relative_ok and o.netloc == 'www.chromium.org'
and link.file.startswith('site/')):
_create_result(
link, 'Use local paths instead of www.chromium.org in links',
o._replace(scheme='', netloc='', path=o.path or '/'))
# Check relative links for generated docs (under site/).
if o.scheme == o.netloc == '' and link.file.startswith('site/'):
# The /site/ prefix is removed in generated content, but works when
# viewing under gitiles, so sometimes people test the wrong page.
if o.path.startswith('/site/'):
_create_result(link, 'Omit the /site/ prefix in local paths',
o._replace(path=o.path[5:]))
return results