PRESUBMIT.py - website - Git at Google

 # Copyright 2021 The Chromium Authors
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Top-level presubmit script for the Git repo backing chromium.org.

 See https://www.chromium.org/developers/how-tos/depottools/presubmit-scripts
 for more details about the presubmit API built into depot_tools.
 """

 import re
 from typing import NamedTuple
 import urllib.parse

 PRESUBMIT_VERSION = '2.0.0'

 # This line is 'magic' in that git-cl looks for it to decide whether to
 # use Python3 instead of Python2 when running the code in this file.
 USE_PYTHON3 = True

 # This list must be kept in sync with the lists in //.eleventy.js and
 # //scripts/upload_lobs.py.
 # TODO(crbug.com/1457683): Figure out how to share these lists to eliminate
 # the duplication and need to keep them in sync.

 LOB_EXTENSIONS = [
     '.ai',
     '.bin',
     '.bmp',
     '.brd',
     '.bz2',
     '.crx',
     '.config',
     '.dia',
     '.gif',
     '.graffle',
     '.ico',
     '.jpg',
     'jpg',  # Some files are missing the '.' :(.
     '.jpeg',
     '.mp4',
     '.msi',
     '.pdf',
     'pdf',  # Some files are missing the '.' :(.
     '.png',
     'png',  # Some files are missing the '.' :(.
     '.PNG',
     '.swf',
     '.svg',
     '.tar.gz',
     '.tiff',
     '_trace',
     '.webp',
     '.xcf',
     '.xlsx',
     '.zip'
 ]


 def CheckPatchFormatted(input_api, output_api):
     return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)


 def CheckChangeHasDescription(input_api, output_api):
     return input_api.canned_checks.CheckChangeHasDescription(
         input_api, output_api)


 def CheckForLobs(input_api, output_api):
     output_status = []
     for file in input_api.change.AffectedFiles():
         # The tar.gz for example prevents using a hashmap to look up the
         # extension.
         for ext in LOB_EXTENSIONS:
             if str(file).endswith(ext) and file.Action() != 'D':
                 error_msg = (
                     'The file \'{file_name}\' is a binary that has not been '
                     'uploaded to GCE. Please run:\n\tscripts/upload_lobs.py '
                     '"{file_name}"\nand commit {file_name}.sha1 instead\n'
                     'Run:\n\tgit rm --cached "{file_name}"\n'
                     'to remove the lob from git'.format(
                         file_name=file.LocalPath()))

                 error = output_api.PresubmitError(error_msg)
                 output_status.append(error)
                 break

     return output_status


 def CheckLobIgnores(input_api, output_api):
     output_status = []
     with open("site/.gitignore", 'r') as ignore_file:
         ignored_lobs = list(line.rstrip() for line in ignore_file.readlines())
         ignored_lobs = set(
             ignored_lobs[ignored_lobs.index('#start_lob_ignore') +
                          1:ignored_lobs.index('#end_lob_ignore')])

         for ignored_lob in ignored_lobs:
             lob_sha_file = input_api.os_path.join('site', ignored_lob + '.sha1')
             if not lob_sha_file.startswith(
                     '#') and not input_api.os_path.exists(lob_sha_file):
                 error_msg = (
                     'The sha1 file \'{removed_file}\' no longer exists, '
                     'please remove "{ignored_file}" from site/.gitignore'.
                     format(removed_file=lob_sha_file, ignored_file=ignored_lob))

                 error = output_api.PresubmitError(error_msg)
                 output_status.append(error)
     return output_status


 def CheckPatchFormatted(input_api, output_api):
     """Check formatting of files."""
     return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)


 class _MdLink(NamedTuple):
     """Link found in markdown."""

     # The file link is found in.
     file: str

     # The actual link.
     uri: str

     # Whether the link supports local/relative paths like /dir/foo.md.
     relative_ok: bool

     # What line was the link found on?
     line_num: int


 # Mapping of preferred host names.  If we find people using <key>, we'll
 # make them use <value> instead.
 _MD_HOST_ALIASES = {
     # keep-sorted start
     'b': 'issuetracker.google.com',
     'chromium.org': 'www.chromium.org',
     'dev.chromium.org': 'www.chromium.org',
     'goto': 'go',
     'goto.google.com': 'go',
     'www.youtube.com': 'youtube.com',
     # keep-sorted end
 }

 # These hosts should always use https://
 # This isn't an exhaustive list, just hosts we commonly refer to.
 # TODO(vapier): Require https:// on all hosts by default, and require any
 # actual http:// hosts be enumerated below.  This requires a large cleanup
 # of existing docs first.
 _MD_HTTPS_HOSTS = {
     # keep-sorted start
     'crbug.com',
     'crrev.com',
     'en.wikipedia.org',
     'github.com',
     'google.com',
     'issuetracker.google.com',
     'www.chromium.org',
     'www.google.com',
     'www.w3.org',
     'youtu.be',
     'youtube.com',
     # keep-sorted end
 }

 # These hosts should always use http://
 _MD_HTTP_HOSTS = {
     # keep-sorted start
     'g',
     'go',
     # keep-sorted end
 }


 def CheckLinks(input_api, output_api):
     """Check links used in markdown."""
     # Build up the files to analyze.
     affected_files = input_api.AffectedFiles(
         file_filter=lambda x: x.LocalPath().endswith('.md'))

     # Extract the links from the files.  We have a variety of styles:
     #   [text](link)
     #   [anchor]: link
     #   [anchor]: link "extra text"
     #   <link>
     #   link
     links = []
     for affected_file in affected_files:
         file = affected_file.LocalPath()
         for i, line in enumerate(affected_file.NewContents(), start=1):
             # [text](link)
             # We don't match the opening [ because it can span multiple lines.
             # The ](...) part has to be on one line.
             links += [
                 _MdLink(file, x, True, i)
                 for x in re.findall(r'\]\(([^) ]+)\)', line)
             ]
             # [anchor]: link
             m = re.match(r'^\[[^]]+\]:\s*(\S+)', line)
             if m:
                 links.append(_MdLink(file, m.group(1), True, i))
             # <link>
             links += [
                 _MdLink(file, x, False, i)
                 for x in re.findall(r'<(https?://[^>]+)>', line)
             ]

     # Check links.
     results = []

     def _create_result(link, msg, want_uri) -> None:
         want_link = urllib.parse.urlunparse(want_uri)
         results.append(
             output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}',
                                       long_text=f'- {link.uri}\n+ {want_link}'))

     for link in links:
         o = urllib.parse.urlparse(link.uri)

         # Check bad http:// usage.
         if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS:
             _create_result(link, 'Always use https:// with this host',
                            o._replace(scheme='https'))

         # Check bad https:// usage.
         if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS:
             _create_result(link, 'Always use http:// with this host',
                            o._replace(scheme='http'))

         # Check host aliases.
         for oldhost, newhost in _MD_HOST_ALIASES.items():
             if o.netloc == oldhost:
                 _create_result(link, f'Use {newhost} in links',
                                o._replace(netloc=newhost))

         # Have people use relative /foo/bar links instead of
         # https//www.chromium.org/foo/bar so we can check target links, and so
         # navigating via the sandbox website works correctly.
         if (link.relative_ok and o.netloc == 'www.chromium.org'
                 and link.file.startswith('site/')):
             _create_result(
                 link, 'Use local paths instead of www.chromium.org in links',
                 o._replace(scheme='', netloc='', path=o.path or '/'))

         # Check relative links for generated docs (under site/).
         if o.scheme == o.netloc == '' and link.file.startswith('site/'):
             # The /site/ prefix is removed in generated content, but works when
             # viewing under gitiles, so sometimes people test the wrong page.
             if o.path.startswith('/site/'):
                 _create_result(link, 'Omit the /site/ prefix in local paths',
                                o._replace(path=o.path[5:]))

     return results
	# Copyright 2021 The Chromium Authors
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Top-level presubmit script for the Git repo backing chromium.org.

	See https://www.chromium.org/developers/how-tos/depottools/presubmit-scripts
	for more details about the presubmit API built into depot_tools.
	"""

	import re
	from typing import NamedTuple
	import urllib.parse

	PRESUBMIT_VERSION = '2.0.0'

	# This line is 'magic' in that git-cl looks for it to decide whether to
	# use Python3 instead of Python2 when running the code in this file.
	USE_PYTHON3 = True

	# This list must be kept in sync with the lists in //.eleventy.js and
	# //scripts/upload_lobs.py.
	# TODO(crbug.com/1457683): Figure out how to share these lists to eliminate
	# the duplication and need to keep them in sync.

	LOB_EXTENSIONS = [
	'.ai',
	'.bin',
	'.bmp',
	'.brd',
	'.bz2',
	'.crx',
	'.config',
	'.dia',
	'.gif',
	'.graffle',
	'.ico',
	'.jpg',
	'jpg', # Some files are missing the '.' :(.
	'.jpeg',
	'.mp4',
	'.msi',
	'.pdf',
	'pdf', # Some files are missing the '.' :(.
	'.png',
	'png', # Some files are missing the '.' :(.
	'.PNG',
	'.swf',
	'.svg',
	'.tar.gz',
	'.tiff',
	'_trace',
	'.webp',
	'.xcf',
	'.xlsx',
	'.zip'
	]


	def CheckPatchFormatted(input_api, output_api):
	return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)


	def CheckChangeHasDescription(input_api, output_api):
	return input_api.canned_checks.CheckChangeHasDescription(
	input_api, output_api)


	def CheckForLobs(input_api, output_api):
	output_status = []
	for file in input_api.change.AffectedFiles():
	# The tar.gz for example prevents using a hashmap to look up the
	# extension.
	for ext in LOB_EXTENSIONS:
	if str(file).endswith(ext) and file.Action() != 'D':
	error_msg = (
	'The file \'{file_name}\' is a binary that has not been '
	'uploaded to GCE. Please run:\n\tscripts/upload_lobs.py '
	'"{file_name}"\nand commit {file_name}.sha1 instead\n'
	'Run:\n\tgit rm --cached "{file_name}"\n'
	'to remove the lob from git'.format(
	file_name=file.LocalPath()))

	error = output_api.PresubmitError(error_msg)
	output_status.append(error)
	break

	return output_status


	def CheckLobIgnores(input_api, output_api):
	output_status = []
	with open("site/.gitignore", 'r') as ignore_file:
	ignored_lobs = list(line.rstrip() for line in ignore_file.readlines())
	ignored_lobs = set(
	ignored_lobs[ignored_lobs.index('#start_lob_ignore') +
	1:ignored_lobs.index('#end_lob_ignore')])

	for ignored_lob in ignored_lobs:
	lob_sha_file = input_api.os_path.join('site', ignored_lob + '.sha1')
	if not lob_sha_file.startswith(
	'#') and not input_api.os_path.exists(lob_sha_file):
	error_msg = (
	'The sha1 file \'{removed_file}\' no longer exists, '
	'please remove "{ignored_file}" from site/.gitignore'.
	format(removed_file=lob_sha_file, ignored_file=ignored_lob))

	error = output_api.PresubmitError(error_msg)
	output_status.append(error)
	return output_status


	def CheckPatchFormatted(input_api, output_api):
	"""Check formatting of files."""
	return input_api.canned_checks.CheckPatchFormatted(input_api, output_api)


	class _MdLink(NamedTuple):
	"""Link found in markdown."""

	# The file link is found in.
	file: str

	# The actual link.
	uri: str

	# Whether the link supports local/relative paths like /dir/foo.md.
	relative_ok: bool

	# What line was the link found on?
	line_num: int


	# Mapping of preferred host names. If we find people using <key>, we'll
	# make them use <value> instead.
	_MD_HOST_ALIASES = {
	# keep-sorted start
	'b': 'issuetracker.google.com',
	'chromium.org': 'www.chromium.org',
	'dev.chromium.org': 'www.chromium.org',
	'goto': 'go',
	'goto.google.com': 'go',
	'www.youtube.com': 'youtube.com',
	# keep-sorted end
	}

	# These hosts should always use https://
	# This isn't an exhaustive list, just hosts we commonly refer to.
	# TODO(vapier): Require https:// on all hosts by default, and require any
	# actual http:// hosts be enumerated below. This requires a large cleanup
	# of existing docs first.
	_MD_HTTPS_HOSTS = {
	# keep-sorted start
	'crbug.com',
	'crrev.com',
	'en.wikipedia.org',
	'github.com',
	'google.com',
	'issuetracker.google.com',
	'www.chromium.org',
	'www.google.com',
	'www.w3.org',
	'youtu.be',
	'youtube.com',
	# keep-sorted end
	}

	# These hosts should always use http://
	_MD_HTTP_HOSTS = {
	# keep-sorted start
	'g',
	'go',
	# keep-sorted end
	}


	def CheckLinks(input_api, output_api):
	"""Check links used in markdown."""
	# Build up the files to analyze.
	affected_files = input_api.AffectedFiles(
	file_filter=lambda x: x.LocalPath().endswith('.md'))

	# Extract the links from the files. We have a variety of styles:
	# [text](link)
	# [anchor]: link
	# [anchor]: link "extra text"
	# <link>
	# link
	links = []
	for affected_file in affected_files:
	file = affected_file.LocalPath()
	for i, line in enumerate(affected_file.NewContents(), start=1):
	# [text](link)
	# We don't match the opening [ because it can span multiple lines.
	# The ](...) part has to be on one line.
	links += [
	_MdLink(file, x, True, i)
	for x in re.findall(r'\]\(([^) ]+)\)', line)
	]
	# [anchor]: link
	m = re.match(r'^\[[^]]+\]:\s*(\S+)', line)
	if m:
	links.append(_MdLink(file, m.group(1), True, i))
	# <link>
	links += [
	_MdLink(file, x, False, i)
	for x in re.findall(r'<(https?://[^>]+)>', line)
	]

	# Check links.
	results = []

	def _create_result(link, msg, want_uri) -> None:
	want_link = urllib.parse.urlunparse(want_uri)
	results.append(
	output_api.PresubmitError(f'{link.file}:{link.line_num}: {msg}',
	long_text=f'- {link.uri}\n+ {want_link}'))

	for link in links:
	o = urllib.parse.urlparse(link.uri)

	# Check bad http:// usage.
	if o.scheme == 'http' and o.netloc in _MD_HTTPS_HOSTS:
	_create_result(link, 'Always use https:// with this host',
	o._replace(scheme='https'))

	# Check bad https:// usage.
	if o.scheme == 'https' and o.netloc in _MD_HTTP_HOSTS:
	_create_result(link, 'Always use http:// with this host',
	o._replace(scheme='http'))

	# Check host aliases.
	for oldhost, newhost in _MD_HOST_ALIASES.items():
	if o.netloc == oldhost:
	_create_result(link, f'Use {newhost} in links',
	o._replace(netloc=newhost))

	# Have people use relative /foo/bar links instead of
	# https//www.chromium.org/foo/bar so we can check target links, and so
	# navigating via the sandbox website works correctly.
	if (link.relative_ok and o.netloc == 'www.chromium.org'
	and link.file.startswith('site/')):
	_create_result(
	link, 'Use local paths instead of www.chromium.org in links',
	o._replace(scheme='', netloc='', path=o.path or '/'))

	# Check relative links for generated docs (under site/).
	if o.scheme == o.netloc == '' and link.file.startswith('site/'):
	# The /site/ prefix is removed in generated content, but works when
	# viewing under gitiles, so sometimes people test the wrong page.
	if o.path.startswith('/site/'):
	_create_result(link, 'Omit the /site/ prefix in local paths',
	o._replace(path=o.path[5:]))

	return results