| # Copyright 2021 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Top-level presubmit script for the Git repo backing chromium.org. |
| |
| See https://www.chromium.org/developers/how-tos/depottools/presubmit-scripts |
| for more details about the presubmit API built into depot_tools. |
| """ |
| |
| import re |
| from typing import NamedTuple |
| import urllib.parse |
| |
| PRESUBMIT_VERSION = '2.0.0' |
| |
| # This line is 'magic' in that git-cl looks for it to decide whether to |
| # use Python3 instead of Python2 when running the code in this file. |
| USE_PYTHON3 = True |
| |
| # This list must be kept in sync with the lists in //.eleventy.js and |
| # //scripts/upload_lobs.py. |
| # TODO(crbug.com/1457683): Figure out how to share these lists to eliminate |
| # the duplication and need to keep them in sync. |
| |
| LOB_EXTENSIONS = [ |
| '.ai', |
| '.bin', |
| '.bmp', |
| '.brd', |
| '.bz2', |
| '.crx', |
| '.config', |
| '.dia', |
| '.gif', |
| '.graffle', |
| '.ico', |
| '.jpg', |
| 'jpg', # Some files are missing the '.' :(. |
| '.jpeg', |
| '.mp4', |
| '.msi', |
| '.pdf', |
| 'pdf', # Some files are missing the '.' :(. |
| '.png', |
| 'png', # Some files are missing the '.' :(. |
| '.PNG', |
| '.swf', |
| '.svg', |
| '.tar.gz', |
| '.tiff', |
| '_trace', |
| '.webp', |
| '.xcf', |
| '.xlsx', |
| '.zip' |
| ] |
| |
| |
| def CheckPatchFormatted(input_api, output_api): |
| return input_api.canned_checks.CheckPatchFormatted(input_api, output_api) |
| |
| |
| def CheckChangeHasDescription(input_api, output_api): |
| return input_api.canned_checks.CheckChangeHasDescription( |
| input_api, output_api) |
| |
| |
| def CheckForLobs(input_api, output_api): |
| output_status = [] |
| for file in input_api.change.AffectedFiles(): |
| # The tar.gz for example prevents using a hashmap to look up the |
| # extension. |
| for ext in LOB_EXTENSIONS: |
| if str(file).endswith(ext) and file.Action() != 'D': |
| error_msg = ( |
| 'The file \'{file_name}\' is a binary that has not been ' |
| 'uploaded to GCE. Please run:\n\tscripts/upload_lobs.py ' |
| '"{file_name}"\nand commit {file_name}.sha1 instead\n' |
| 'Run:\n\tgit rm --cached "{file_name}"\n' |
| 'to remove the lob from git'.format( |
| file_name=file.LocalPath())) |
| |
| error = output_api.PresubmitError(error_msg) |
| output_status.append(error) |
| break |
| |
| return output_status |
| |
| |
| def CheckLobIgnores(input_api, output_api): |
| output_status = [] |
| with open("site/.gitignore", 'r') as ignore_file: |
| ignored_lobs = list(line.rstrip() for line in ignore_file.readlines()) |
| ignored_lobs = set( |
| ignored_lobs[ignored_lobs.index('#start_lob_ignore') + |
| 1:ignored_lobs.index('#end_lob_ignore')]) |
| |
| for ignored_lob in ignored_lobs: |
| lob_sha_file = input_api.os_path.join('site', ignored_lob + '.sha1') |
| if not lob_sha_file.startswith( |
| '#') and not input_api.os_path.exists(lob_sha_file): |
| error_msg = ( |
| 'The sha1 file \'{removed_file}\' no longer exists, ' |
| 'please remove "{ignored_file}" from site/.gitignore'. |
| format(removed_file=lob_sha_file, ignored_file=ignored_lob)) |
| |
| error = output_api.PresubmitError(error_msg) |
| output_status.append(error) |
| return output_status |
| |
| |
| def CheckPatchFormatted(input_api, output_api): |
| """Check formatting of files.""" |
| return input_api.canned_checks.CheckPatchFormatted(input_api, output_api) |
| |
| |
| class _MdLink(NamedTuple): |
| """Link found in markdown.""" |
| |
| # The file link is found in. |
| file: str |
| |
| # The actual link. |
| uri: str |
| |
| # Whether the link supports local/relative paths like /dir/foo.md. |
| relative_ok: bool |
| |
| # What line was the link found on? |
| line_num: int |
| |
| |
| # Mapping of preferred host names. If we find people using <key>, we'll |
| # make them use <value> instead. |
| _MD_HOST_ALIASES = { |
| # keep-sorted start |
| 'b': 'issuetracker.google.com', |
| 'chromium.org': 'www.chromium.org', |
| 'dev.chromium.org': 'www.chromium.org', |
| 'goto': 'go', |
| 'goto.google.com': 'go', |
| 'www.youtube.com': 'youtube.com', |
| # keep-sorted end |
| } |
| |
| # These hosts should always use https:// |
| # This isn't an exhaustive list, just hosts we commonly refer to. |
| # TODO(vapier): Require https:// on all hosts by default, and require any |
| # actual http:// hosts be enumerated below. This requires a large cleanup |
| # of existing docs first. |
| _MD_HTTPS_HOSTS = { |
| # keep-sorted start |
| 'crbug.com', |
| 'crrev.com', |
| 'en.wikipedia.org', |
| 'github.com', |
| 'google.com', |
| 'issuetracker.google.com', |
| 'www.chromium.org', |
| 'www.google.com', |
| 'www.w3.org', |
| 'youtu.be', |
| 'youtube.com', |
| # keep-sorted end |
| } |
| |
| # These hosts should always use http:// |
| _MD_HTTP_HOSTS = { |
| # keep-sorted start |
| 'g', |
| 'go', |
| # keep-sorted end |
| } |
| |
| |
| def CheckLinks(input_api, output_api): |
| """Check links used in markdown.""" |
| # Build up the files to analyze. |
| affected_files = input_api.AffectedFiles( |
| file_filter=lambda x: x.LocalPath().endswith('.md')) |
| |
| # Extract the links from the files. We have a variety of styles: |
| # [text](link) |
| # [anchor]: link |
| # [anchor]: link "extra text" |
| # <link> |
| # link |
| links = [] |
| for affected_file in affected_files: |
| file = affected_file.LocalPath() |
| for i, line in enumerate(affected_file.NewContents(), start=1): |
| # [text](link) |
| # We don't match the opening [ because it can span multiple lines. |
| # The ](...) part has to be on one line. |
| links += [ |
| _MdLink(file, x, True, i) |
| for x in re.findall(r'\]\(([^) ]+)\)', line) |
| ] |
| # [anchor]: link |
| m = re.match(r'^\[[^]]+\]:\s*(\S+)', line) |
| if m: |
| links.append(_MdLink(file, m.group(1), True, i)) |
| # <link> |
| links += [ |
| _MdLink(file, x, False, i) |
| for x in re.findall(r'<(https?://[^>]+)>', line) |
| ] |
| |
| # Check links. |
| results = [] |
| |
| def _create_result(link, msg, want_uri) -> None: |
| want_link = urllib.parse.urlunparse(want_uri) |
| results.append( |
| output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}', |
| long_text=f'- {link.uri}\n+ {want_link}')) |
| |
| for link in links: |
| o = urllib.parse.urlparse(link.uri) |
| |
| # Check bad http:// usage. |
| if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS: |
| _create_result(link, 'Always use https:// with this host', |
| o._replace(scheme='https')) |
| |
| # Check bad https:// usage. |
| if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS: |
| _create_result(link, 'Always use http:// with this host', |
| o._replace(scheme='http')) |
| |
| # Check host aliases. |
| for oldhost, newhost in _MD_HOST_ALIASES.items(): |
| if o.netloc == oldhost: |
| _create_result(link, f'Use {newhost} in links', |
| o._replace(netloc=newhost)) |
| |
| # Have people use relative /foo/bar links instead of |
| # https//www.chromium.org/foo/bar so we can check target links, and so |
| # navigating via the sandbox website works correctly. |
| if (link.relative_ok and o.netloc == 'www.chromium.org' |
| and link.file.startswith('site/')): |
| _create_result( |
| link, 'Use local paths instead of www.chromium.org in links', |
| o._replace(scheme='', netloc='', path=o.path or '/')) |
| |
| # Check relative links for generated docs (under site/). |
| if o.scheme == o.netloc == '' and link.file.startswith('site/'): |
| # The /site/ prefix is removed in generated content, but works when |
| # viewing under gitiles, so sometimes people test the wrong page. |
| if o.path.startswith('/site/'): |
| _create_result(link, 'Omit the /site/ prefix in local paths', |
| o._replace(path=o.path[5:])) |
| |
| return results |