blob: 6be3ede726b1e64bebafd58df94260e254ba04e4 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from __future__ import print_function
import difflib
import logging
import os
import re
import textwrap
def ProcessIncludesInContent(lines, fname):
'''ProcessIncludesInContent expands any INCLUDE directives found in a markdown file.
Parameters:
lines : a list of strings with line endings that represent the contents
of the Markdown file.
fname : the name of the file including the path if applicable. Includes
are resolved relative to this path.
Include directives take the form:
<!-- INCLUDE <relative_path> [(<line_count> lines)] [fenced as <fence_type>] -->
The parts of the directive in brackets are optional.
The <relative_path> is an unquoted string with no spaces in it that
specifies the relative path to the markdown file to be included. The lack
of spaces in the paths can be easily remedied, but is not expected to be an
issue for our use case in CEL.
<fence_type> can be used to indicate that the content of the file should be
included as a fenced block of the specified type. Note that the type is
mandatory for the fence specification. If specified, the contents of the
file will be included in the file as follows:
``` <fence_type>
<contents of the file>
```
Once expanded, the "(<line_count> lines)" portion will be inserted to
indicate how many lines were added during the expansion. Do not change the
line count or the included contents.
'''
class Include(object):
def __init__(self, fn, lc, at, ft):
self.filename = fn
self.line_count = int(lc) if lc else 0
self.at = at
self.fence_type = ft
dirname = os.path.dirname(fname)
replacements = []
for i, l in enumerate(lines):
if l.startswith('<!-- INCLUDE '):
m = re.match(
r'<!-- INCLUDE +(?P<fn>[^ ]*) +(?:\((?P<lc>\d+) lines\) +|)(?:fenced as (?P<ft>\w+) +|)-->',
l)
if m is None:
raise Exception("improperly formatted INCLUDE line: {}".format(l))
replacements.append(
Include(m.group('fn'), m.group('lc'), i, m.group('ft')))
for r in reversed(replacements):
newlines = []
with open(os.path.join(dirname, r.filename), 'r') as f:
newlines = f.readlines()
for l in newlines:
if l.startswith('<!-- INCLUDE '):
raise Exception('''Recursive includes are not supported.''')
if r.fence_type is not None:
newlines[0:0] = ['``` {}\n'.format(r.fence_type)]
newlines.append('```\n')
lines[r.at] = '<!-- INCLUDE {} ({} lines){} -->\n'.format(
r.filename, len(newlines),
' fenced as {}'.format(r.fence_type) if r.fence_type else '')
lines[r.at + 1:r.at + 1 + r.line_count] = newlines
def FixOldStyleLinks(lines, fname):
'''FixOldStyleLinks replaces links of the form [foo] with [foo][]
The former style is accepted by Gitiles, but is not valid CommonMark. Hence
this function replaces it with the equivalent latter form. This replacement
ensures that the links are correctly handled by editors and viewers other
than Gitiles.
'''
bad_link_re = re.compile(r'(^|[^]])\[(?P<ref>[^]]+)\](?=$|[^[:(])')
in_pre = False
for i, l in enumerate(lines):
if '```' in l:
in_pre = not in_pre
if in_pre:
continue
if "[TOC]" in l:
continue
lines[i] = re.sub(bad_link_re, r'\1[\g<ref>][]', l)
def FixTrailingWhitespace(lines, fname):
'''FixTrailingWhitespace does what it says and removes trailing whitespace.
'''
trailing_ws_re = re.compile(r'\s+(?=\n)$')
for i, l in enumerate(lines):
lines[i] = re.sub(trailing_ws_re, '', l)
def CheckLinksInContent(lines, fname):
'''CheckLinksInContent verifies that reference style links are defined in the
same document.
Reference style links are links of the form [foo][], or [Foo][foo] where
[foo] needs to be defiend somewhere else in the document as:
[foo]: https://example.com/foo
This function raises an exception with a suitable description if a reference
style link is not defined.
'''
links = set()
link_re = re.compile(r"\[(?P<ref>[^]]+)\]: ")
for l in lines:
m = link_re.match(l)
if m is None:
continue
links.add(m.group('ref'))
whole_thing = re.sub(r'\s+', ' ', ''.join(lines), count=0)
whole_thing = re.sub(r'`[^`]*`', '', whole_thing, count=0)
not_found = set()
for m in re.finditer(r'\[(?P<a>[^]]*)\]\[(?P<ref>[^]]*)\]', whole_thing):
ref = m.group('ref') if m.group('ref') != '' else m.group('a')
if ref not in links:
not_found.add(ref)
if len(not_found) != 0:
raise Exception(
textwrap.dedent('''\
The following list of links were unresolved in {}:
{}
'''.format(fname, ','.join(list(not_found)))))
def FormatMarkdown(fname, dry_run=False):
'''FormatMarkdown resolves any includes, fixes links, corrects trailing
whitespace, and verifies that reference style links are defined in Markdown
document specified by the filename in |fname|.
if |dry_run| is True, then checks whether the contents in the file at |fname|
would be modified by the function.
Returns True if the file at |fname| was modified (or in the case of
|dry_run==True|, would be modified). False otherwise. A return value of False
can be safely assumed to mean that the file contents were not modified.
For additional information about the changes that are applied see the
documentation in:
ProcessIncludesInContent()
FixOldStyleLinks()
FixTrailingWhitespace()
CheckLinksInContent()
'''
lines = []
with open(fname, 'r') as f:
lines = f.readlines()
for l in lines:
if "-- Skip validation --" in l:
logging.info("Skipping %s due to skip validation directive", fname)
return False
unmodified = lines[:]
ProcessIncludesInContent(lines, fname)
FixOldStyleLinks(lines, fname)
FixTrailingWhitespace(lines, fname)
CheckLinksInContent(lines, fname)
if lines == unmodified:
logging.info("%s is already correctly formatted", fname)
return False
if dry_run:
print("Would write %s with the following changes:", fname)
for l in difflib.unified_diff(unmodified, lines, fname + " (original)",
fname + " (modified)"):
print(l, end='')
print('\n')
else:
logging.info("Writing %s", fname)
with open(fname, 'w') as f:
f.writelines(lines)
return True