blob: 8ee1a10dbfe79d9c2714174317c7d4f24b4d68b9 [file] [log] [blame]
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Top-level presubmit script for the Git repo backing
for more details about the presubmit API built into depot_tools.
import re
from typing import NamedTuple
import urllib.parse
# This line is 'magic' in that git-cl looks for it to decide whether to
# use Python3 instead of Python2 when running the code in this file.
# This list must be kept in sync with the lists in //.eleventy.js and
# //scripts/
# TODO( Figure out how to share these lists to eliminate
# the duplication and need to keep them in sync.
'jpg', # Some files are missing the '.' :(.
'pdf', # Some files are missing the '.' :(.
'png', # Some files are missing the '.' :(.
def CheckPatchFormatted(input_api, output_api):
return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)
def CheckChangeHasDescription(input_api, output_api):
return input_api.canned_checks.CheckChangeHasDescription(
input_api, output_api)
def CheckForLobs(input_api, output_api):
output_status = []
for file in input_api.change.AffectedFiles():
# The tar.gz for example prevents using a hashmap to look up the
# extension.
for ext in LOB_EXTENSIONS:
if str(file).endswith(ext) and file.Action() != 'D':
error_msg = (
'The file \'{file_name}\' is a binary that has not been '
'uploaded to GCE. Please run:\n\tscripts/ '
'"{file_name}"\nand commit {file_name}.sha1 instead\n'
'Run:\n\tgit rm --cached "{file_name}"\n'
'to remove the lob from git'.format(
error = output_api.PresubmitError(error_msg)
return output_status
def CheckLobIgnores(input_api, output_api):
output_status = []
with open("site/.gitignore", 'r') as ignore_file:
ignored_lobs = list(line.rstrip() for line in ignore_file.readlines())
ignored_lobs = set(
ignored_lobs[ignored_lobs.index('#start_lob_ignore') +
for ignored_lob in ignored_lobs:
lob_sha_file = input_api.os_path.join('site', ignored_lob + '.sha1')
if not lob_sha_file.startswith(
'#') and not input_api.os_path.exists(lob_sha_file):
error_msg = (
'The sha1 file \'{removed_file}\' no longer exists, '
'please remove "{ignored_file}" from site/.gitignore'.
format(removed_file=lob_sha_file, ignored_file=ignored_lob))
error = output_api.PresubmitError(error_msg)
return output_status
def CheckPatchFormatted(input_api, output_api):
"""Check formatting of files."""
return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)
class _MdLink(NamedTuple):
"""Link found in markdown."""
# The file link is found in.
file: str
# The actual link.
uri: str
# Whether the link supports local/relative paths like /dir/
relative_ok: bool
# What line was the link found on?
line_num: int
# Mapping of preferred host names. If we find people using <key>, we'll
# make them use <value> instead.
# keep-sorted start
'b': '',
'': '',
'': '',
'goto': 'go',
'': 'go',
'': '',
# keep-sorted end
# These hosts should always use https://
# This isn't an exhaustive list, just hosts we commonly refer to.
# TODO(vapier): Require https:// on all hosts by default, and require any
# actual http:// hosts be enumerated below. This requires a large cleanup
# of existing docs first.
# keep-sorted start
# keep-sorted end
# These hosts should always use http://
# keep-sorted start
# keep-sorted end
def CheckLinks(input_api, output_api):
"""Check links used in markdown."""
# Build up the files to analyze.
affected_files = input_api.AffectedFiles(
file_filter=lambda x: x.LocalPath().endswith('.md'))
# Extract the links from the files. We have a variety of styles:
# [text](link)
# [anchor]: link
# [anchor]: link "extra text"
# <link>
# link
links = []
for affected_file in affected_files:
file = affected_file.LocalPath()
for i, line in enumerate(affected_file.NewContents(), start=1):
# [text](link)
# We don't match the opening [ because it can span multiple lines.
# The ](...) part has to be on one line.
links += [
_MdLink(file, x, True, i)
for x in re.findall(r'\]\(([^) ]+)\)', line)
# [anchor]: link
m = re.match(r'^\[[^]]+\]:\s*(\S+)', line)
if m:
links.append(_MdLink(file,, True, i))
# <link>
links += [
_MdLink(file, x, False, i)
for x in re.findall(r'<(https?://[^>]+)>', line)
# Check links.
results = []
def _create_result(link, msg, want_uri) -> None:
want_link = urllib.parse.urlunparse(want_uri)
output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}',
long_text=f'- {link.uri}\n+ {want_link}'))
for link in links:
o = urllib.parse.urlparse(link.uri)
# Check bad http:// usage.
if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS:
_create_result(link, 'Always use https:// with this host',
# Check bad https:// usage.
if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS:
_create_result(link, 'Always use http:// with this host',
# Check host aliases.
for oldhost, newhost in _MD_HOST_ALIASES.items():
if o.netloc == oldhost:
_create_result(link, f'Use {newhost} in links',
# Have people use relative /foo/bar links instead of
# https// so we can check target links, and so
# navigating via the sandbox website works correctly.
if (link.relative_ok and o.netloc == ''
and link.file.startswith('site/')):
link, 'Use local paths instead of in links',
o._replace(scheme='', netloc='', path=o.path or '/'))
# Check relative links for generated docs (under site/).
if o.scheme == o.netloc == '' and link.file.startswith('site/'):
# The /site/ prefix is removed in generated content, but works when
# viewing under gitiles, so sometimes people test the wrong page.
if o.path.startswith('/site/'):
_create_result(link, 'Omit the /site/ prefix in local paths',
return results