blob: a50b84886bd5510e465fd488d9affc8716291a9c [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is govered by a BSD-style
# license that can be found in the LICENSE file or at
# https://developers.google.com/open-source/licenses/bsd
"""Autolink helps auto-link references to artifacts in text.
This class maintains a registry of artifact autolink syntax specs and
callbacks. The structure of that registry is:
{ component_name: (lookup_callback,
{ regex: substitution_callback, ...}),
...
}
For example:
{ 'tracker':
(GetReferencedIssues,
ExtractProjectAndIssueIds,
{_ISSUE_REF_RE: ReplaceIssueRef}),
'versioncontrol':
(GetReferencedRevisions,
ExtractProjectAndRevNum,
{_GIT_HASH_RE: ReplaceRevisionRef}),
}
The dictionary of regexes is used here because, in the future, we
might add more regexes for each component rather than have one complex
regex per component.
"""
import logging
import re
import urllib
import urlparse
import settings
from framework import template_helpers
from framework import validate
from proto import project_pb2
from tracker import tracker_helpers
_CLOSING_TAG_RE = re.compile('</[a-z0-9]+>$', re.IGNORECASE)
_LINKIFY_SCHEMES = r'(https?://|ftp://|mailto:)'
# Also count a start-tag '<' as a url delimeter, since the autolinker
# is sometimes run against html fragments.
_IS_A_LINK_RE = re.compile(r'(%s)([^\s<]+)' % _LINKIFY_SCHEMES, re.UNICODE)
# These are allowed in links, but if any of closing delimiters appear
# at the end of the link, and the opening one is not part of the link,
# then trim off the closing delimiters.
_LINK_TRAILING_CHARS = [
(None, ':'),
(None, '.'),
(None, ','),
('<', '>'),
('"', '"'),
('(', ')'),
('[', ']'),
('{', '}'),
]
def Linkify(_mr, autolink_regex_match,
_component_ref_artifacts):
"""Examine a textual reference and replace it with a hyperlink or not.
This is a callback for use with the autolink feature.
Args:
_mr: common info parsed from the user HTTP request.
autolink_regex_match: regex match for the textual reference.
_component_ref_artifacts: unused value
Returns:
A list of TextRuns with tag=a for all matched ftp, http, https and mailto
links converted into HTML hyperlinks.
"""
hyperlink = autolink_regex_match.group(0)
trailing = ''
for begin, end in _LINK_TRAILING_CHARS:
if hyperlink.endswith(end):
if not begin or hyperlink[:-len(end)].find(begin) == -1:
trailing = end + trailing
hyperlink = hyperlink[:-len(end)]
tag_match = _CLOSING_TAG_RE.search(hyperlink)
if tag_match:
trailing = hyperlink[tag_match.start(0):] + trailing
hyperlink = hyperlink[:tag_match.start(0)]
if (not validate.IsValidURL(hyperlink) and
not validate.IsValidEmail(hyperlink)):
return [template_helpers.TextRun(hyperlink)]
result = [template_helpers.TextRun(hyperlink, tag='a', href=hyperlink)]
if trailing:
result.append(template_helpers.TextRun(trailing))
return result
# Regular expression to detect git hashes.
# Used to auto-link to Git hashes on crrev.com when displaying issue details.
# Matches "rN", "r#N", and "revision N" when "rN" is not part of a larger word
# and N is a hexadecimal string of 40 chars.
_GIT_HASH_RE = re.compile(
r'\b(?P<prefix>r(evision\s+#?)?)?(?P<revnum>([a-f0-9]{40}))\b',
re.IGNORECASE | re.MULTILINE)
# This is for SVN revisions and Git commit posisitons.
_SVN_REF_RE = re.compile(
r'\b(?P<prefix>r(evision\s+#?)?)(?P<revnum>([0-9]{1,7}))\b',
re.IGNORECASE | re.MULTILINE)
def GetReferencedRevisions(_mr, _refs):
"""Load the referenced revision objects."""
# For now we just autolink any revision hash without actually
# checking that such a revision exists,
# TODO(jrobbins): Hit crrev.com and check that the revision exists
# and show a rollover with revision info.
return None
def ExtractRevNums(_mr, autolink_regex_match):
"""Return internal representation of a rev reference."""
ref = autolink_regex_match.group('revnum')
logging.debug('revision ref = %s', ref)
return [ref]
def ReplaceRevisionRef(
mr, autolink_regex_match, _component_ref_artifacts):
"""Return HTML markup for an autolink reference."""
prefix = autolink_regex_match.group('prefix')
revnum = autolink_regex_match.group('revnum')
url = _GetRevisionURLFormat(mr.project).format(revnum=revnum)
content = revnum
if prefix:
content = '%s%s' % (prefix, revnum)
return [template_helpers.TextRun(content, tag='a', href=url)]
def _GetRevisionURLFormat(project):
# TODO(jrobbins): Expose a UI to customize it to point to whatever site
# hosts the source code. Also, site-wide default.
return (project.revision_url_format or settings.revision_url_format)
# Regular expression to detect issue references.
# Used to auto-link to other issues when displaying issue details.
# Matches "issue " when "issue" is not part of a larger word, or
# "issue #", or just a "#" when it is preceeded by a space.
_ISSUE_REF_RE = re.compile(r"""
(?P<prefix>\b(issues?|bugs?)[ \t]*(:|=)?)
([ \t]*(?P<project_name>\b[-a-z0-9]+[:\#])?
(?P<number_sign>\#?)
(?P<local_id>\d+)\b
(,?[ \t]*(and|or)?)?)+""", re.IGNORECASE | re.VERBOSE)
_SINGLE_ISSUE_REF_RE = re.compile(r"""
(?P<prefix>\b(issue|bug)[ \t]*)?
(?P<project_name>\b[-a-z0-9]+[:\#])?
(?P<number_sign>\#?)
(?P<local_id>\d+)\b""", re.IGNORECASE | re.VERBOSE)
def CurryGetReferencedIssues(services):
"""Return a function to get ref'd issues with these persist objects bound.
Currying is a convienent way to give the callback access to the persist
objects, but without requiring that all possible persist objects be passed
through the autolink registry and functions.
Args:
services: connection to issue, config, and project persistence layers.
Returns:
A ready-to-use function that accepts the arguments that autolink
expects to pass to it.
"""
def GetReferencedIssues(mr, ref_tuples):
"""Return lists of open and closed issues referenced by these comments.
Args:
mr: commonly used info parsed from the request.
ref_tuples: list of (project_name, local_id) tuples for each issue
that is mentioned in the comment text. The project_name may be None,
in which case the issue is assumed to be in the current project.
Returns:
A list of open and closed issue dicts.
"""
ref_projects = services.project.GetProjectsByName(
mr.cnxn,
[(ref_pn or mr.project_name) for ref_pn, _ in ref_tuples])
issue_ids = services.issue.ResolveIssueRefs(
mr.cnxn, ref_projects, mr.project_name, ref_tuples)
open_issues, closed_issues = (
tracker_helpers.GetAllowedOpenedAndClosedIssues(
mr, issue_ids, services))
open_dict = {}
for issue in open_issues:
open_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
closed_dict = {}
for issue in closed_issues:
closed_dict[_IssueProjectKey(issue.project_name, issue.local_id)] = issue
logging.info('autolinking dicts %r and %r', open_dict, closed_dict)
return open_dict, closed_dict
return GetReferencedIssues
def _ParseProjectNameMatch(project_name):
"""Process the passed project name and determine the best representation.
Args:
project_name: a string with the project name matched in a regex
Returns:
A minimal representation of the project name, None if no valid content.
"""
if not project_name:
return None
return project_name.lstrip().rstrip('#: \t\n')
def ExtractProjectAndIssueIds(_mr, autolink_regex_match):
"""Convert a regex match for a textual reference into our internal form."""
whole_str = autolink_regex_match.group(0)
refs = []
for submatch in _SINGLE_ISSUE_REF_RE.finditer(whole_str):
ref = (_ParseProjectNameMatch(submatch.group('project_name')),
int(submatch.group('local_id')))
refs.append(ref)
logging.info('issue ref = %s', ref)
return refs
# This uses project name to avoid a lookup on project ID in a function
# that has no services object.
def _IssueProjectKey(project_name, local_id):
"""Make a dictionary key to identify a referenced issue."""
return '%s:%d' % (project_name, local_id)
class IssueRefRun(object):
"""A text run that links to a referenced issue."""
def __init__(self, issue, is_closed, project_name, prefix):
self.tag = 'a'
self.css_class = 'closed_ref' if is_closed else None
self.title = issue.summary
self.href = '/p/%s/issues/detail?id=%d' % (project_name, issue.local_id)
self.content = '%s%d' % (prefix, issue.local_id)
if is_closed and not prefix:
self.content = ' %s ' % self.content
def ReplaceIssueRef(mr, autolink_regex_match, component_ref_artifacts):
"""Examine a textual reference and replace it with an autolink or not.
Args:
mr: commonly used info parsed from the request
autolink_regex_match: regex match for the textual reference.
component_ref_artifacts: result of earlier call to GetReferencedIssues.
Returns:
A list of IssueRefRuns and TextRuns to replace the textual
reference. If there is an issue to autolink to, we return an HTML
hyperlink. Otherwise, we the run will have the original plain
text.
"""
open_dict, closed_dict = component_ref_artifacts
original = autolink_regex_match.group(0)
logging.info('called ReplaceIssueRef on %r', original)
result_runs = []
pos = 0
for submatch in _SINGLE_ISSUE_REF_RE.finditer(original):
if submatch.start() >= pos:
if original[pos: submatch.start()]:
result_runs.append(template_helpers.TextRun(
original[pos: submatch.start()]))
replacement_run = _ReplaceSingleIssueRef(
mr, submatch, open_dict, closed_dict)
result_runs.append(replacement_run)
pos = submatch.end()
if original[pos:]:
result_runs.append(template_helpers.TextRun(original[pos:]))
return result_runs
def _ReplaceSingleIssueRef(mr, submatch, open_dict, closed_dict):
"""Replace one issue reference with a link, or the original text."""
prefix = submatch.group('prefix') or ''
project_name = submatch.group('project_name')
if project_name:
prefix += project_name
project_name = project_name.lstrip().rstrip(':#')
else:
# We need project_name for the URL, even if it is not in the text.
project_name = mr.project_name
number_sign = submatch.group('number_sign')
if number_sign:
prefix += number_sign
local_id = int(submatch.group('local_id'))
issue_key = _IssueProjectKey(project_name or mr.project_name, local_id)
if issue_key in open_dict:
return IssueRefRun(open_dict[issue_key], False, project_name, prefix)
elif issue_key in closed_dict:
return IssueRefRun(closed_dict[issue_key], True, project_name, prefix)
else: # Don't link to non-existent issues.
return template_helpers.TextRun('%s%d' % (prefix, local_id))
class Autolink(object):
"""Maintains a registry of autolink syntax and can apply it to comments."""
def __init__(self):
self.registry = {}
def RegisterComponent(self, component_name, artifact_lookup_function,
match_to_reference_function, autolink_re_subst_dict):
"""Register all the autolink info for a software component.
Args:
component_name: string name of software component, must be unique.
artifact_lookup_function: function to batch lookup all artifacts that
might have been referenced in a set of comments:
function(all_matches) -> referenced_artifacts
the referenced_artifacts will be pased to each subst function.
match_to_reference_function: convert a regex match object to
some internal representation of the artifact reference.
autolink_re_subst_dict: dictionary of regular expressions and
the substitution function that should be called for each match:
function(match, referenced_artifacts) -> replacement_markup
"""
self.registry[component_name] = (artifact_lookup_function,
match_to_reference_function,
autolink_re_subst_dict)
def GetAllReferencedArtifacts(self, mr, comment_text_list):
"""Call callbacks to lookup all artifacts possibly referenced.
Args:
mr: information parsed out of the user HTTP request.
comment_text_list: list of comment content strings.
Returns:
Opaque object that can be pased to MarkupAutolinks. It's
structure happens to be {component_name: artifact_list, ...}.
"""
all_referenced_artifacts = {}
for comp, (lookup, match_to_refs, re_dict) in self.registry.iteritems():
refs = set()
for comment_text in comment_text_list:
for regex in re_dict:
for match in regex.finditer(comment_text):
additional_refs = match_to_refs(mr, match)
if additional_refs:
refs.update(additional_refs)
all_referenced_artifacts[comp] = lookup(mr, refs)
return all_referenced_artifacts
def MarkupAutolinks(self, mr, text_runs, all_referenced_artifacts):
"""Loop over components and regexes, applying all substitutions.
Args:
mr: info parsed from the user's HTTP request.
text_runs: List of text runs for the user's comment.
all_referenced_artifacts: result of previous call to
GetAllReferencedArtifacts.
Returns:
List of text runs for the entire user comment, some of which may have
attribures that cause them to render as links in render-rich-text.ezt.
"""
items = self.registry.items()
items.sort() # Process components in determinate alphabetical order.
for component, (_lookup, _match_ref, re_subst_dict) in items:
component_ref_artifacts = all_referenced_artifacts[component]
for regex, subst_fun in re_subst_dict.iteritems():
text_runs = self._ApplySubstFunctionToRuns(
text_runs, regex, subst_fun, mr, component_ref_artifacts)
return text_runs
def _ApplySubstFunctionToRuns(
self, text_runs, regex, subst_fun, mr, component_ref_artifacts):
"""Apply autolink regex and substitution function to each text run.
Args:
text_runs: list of TextRun objects with parts of the original comment.
regex: Regular expression for detecting textual references to artifacts.
subst_fun: function to return autolink markup, or original text.
mr: common info parsed from the user HTTP request.
component_ref_artifacts: already-looked-up destination artifacts to use
when computing substitution text.
Returns:
A new list with more and smaller runs, some of which may have tag
and link attributes set.
"""
result_runs = []
for run in text_runs:
content = run.content
if run.tag:
# This chunk has already been substituted, don't allow nested
# autolinking to mess up our output.
result_runs.append(run)
else:
pos = 0
for match in regex.finditer(content):
if match.start() > pos:
result_runs.append(template_helpers.TextRun(
content[pos: match.start()]))
replacement_runs = subst_fun(mr, match, component_ref_artifacts)
result_runs.extend(replacement_runs)
pos = match.end()
if run.content[pos:]: # Keep any text that came after the last match
result_runs.append(template_helpers.TextRun(run.content[pos:]))
# TODO(jrobbins): ideally we would merge consecutive plain text runs
# so that regexes can match across those run boundaries.
return result_runs
def RegisterAutolink(services):
"""Register all the autolink hooks."""
services.autolink.RegisterComponent(
'01-linkify',
lambda request, mr: None,
lambda mr, match: None,
{_IS_A_LINK_RE: Linkify})
services.autolink.RegisterComponent(
'02-tracker',
CurryGetReferencedIssues(services),
ExtractProjectAndIssueIds,
{_ISSUE_REF_RE: ReplaceIssueRef})
services.autolink.RegisterComponent(
'03-versioncontrol',
GetReferencedRevisions,
ExtractRevNums,
{_GIT_HASH_RE: ReplaceRevisionRef,
_SVN_REF_RE: ReplaceRevisionRef})