blob: 825de30a1751746848c6916ff1cde40cebf61075 [file] [log] [blame]
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import re
import urlparse
RIETVELD_CODE_REVIEW_URL_PATTERN = re.compile(
'^(?:Review URL|Review-Url): (?P<url>.*/(?P<change_id>\d+)).*$',
re.IGNORECASE)
COMMIT_POSITION_PATTERN = (
r'^Cr-Commit-Position: %s@{#(?P<commit_position>\d+)}$')
REVERTED_REVISION_PATTERN = re.compile(
'^> Committed: https://.+/(?P<revision>[0-9a-fA-F]{40})$', re.IGNORECASE)
GERRIT_CHANGE_ID_PATTERN = re.compile('^Change-Id: (?P<change_id>.*)$',
re.IGNORECASE)
GERRIT_REVIEW_URL_PATTERN = re.compile('^Reviewed-on: (?P<url>.*/\d+).*$',
re.IGNORECASE)
CHANGE_INFO_PATTERN = re.compile('^.*:.*$', re.IGNORECASE)
def ExtractChangeInfo(message, ref):
"""Returns the commit position and code review url in the commit message.
A "commit position" is something similar to SVN version ids; i.e.,
numeric identifiers which are issued in sequential order. The reason
we care about them is that they're easier for humans to read than
the hashes that Git uses internally for identifying commits. We
should never actually use them for *identifying* commits; they're
only for pretty printing to humans.
Returns:
change_info (dict): information about a change. For example:
Gerrit:
{
'commit_position': 12345,
'code_review_url':
'https://chromium-review.googlesource.com/54322',
'host': 'chromium-review.googlesource.com',
'change_id': 'Iaa1234567fer890'
}
Rietveld:
{
'commit_position': 12345,
'code_review_url': 'https://codereview.chromium.org/1234567890',
'host': 'codereview.chromium.org',
'change_id': '1234567890'
}
"""
change_info = {
'commit_position': None,
'code_review_url': None,
'host': None,
'change_id': None
}
if not message:
return change_info
lines = message.strip().split('\n')
lines.reverse()
for line in lines:
if (line != '' and not line.isspace() and
not CHANGE_INFO_PATTERN.match(line)):
# Breaks when hit the first first non-empty and non-space line
# which is not in format "footer-name:value".
break
if not change_info['commit_position']:
commit_position_pattern = re.compile(COMMIT_POSITION_PATTERN % ref,
re.IGNORECASE)
match = commit_position_pattern.match(line)
if match:
change_info['commit_position'] = int(match.group('commit_position'))
if not change_info['host']:
match = RIETVELD_CODE_REVIEW_URL_PATTERN.match(line)
if match:
change_info['code_review_url'] = match.group('url')
change_info['host'] = urlparse.urlparse(match.group('url')).netloc
change_info['change_id'] = match.group('change_id')
else:
match = GERRIT_REVIEW_URL_PATTERN.match(line)
if match:
change_info['host'] = urlparse.urlparse(match.group('url')).netloc
if not change_info['change_id']:
match = GERRIT_CHANGE_ID_PATTERN.match(line)
if match:
change_info['change_id'] = match.group('change_id')
if (not change_info['code_review_url'] and change_info['host'] and
change_info['change_id']):
# For code review urls for Gerrit CLs, we want to unify them in
# 'https://host/q/change_id' format.
change_info['code_review_url'] = 'https://%s/q/%s' % (
change_info['host'], change_info['change_id'])
return change_info
def NormalizeEmail(email):
"""Normalizes the email from git repo.
Some email is like: test@chromium.org@bbb929c8-8fbe-4397-9dbb-9b2b20218538.
"""
parts = email.split('@')
return '@'.join(parts[0:2])
def GetRevertedRevision(message):
"""Parse message to get the reverted revision if there is one."""
lines = message.strip().splitlines()
if not lines:
return None
if not lines[0].lower().startswith('revert'):
return None
for line in reversed(lines): # pragma: no cover
# TODO: Handle cases where no reverted_revision in reverting message.
reverted_revision_match = REVERTED_REVISION_PATTERN.match(line)
if reverted_revision_match:
return reverted_revision_match.group(1)
# TODO(katesonia): Deprecate this copy (there is a copy in min_distance
# feature), after scorer-based classfier got depecated.
def DistanceBetweenLineRanges((start1, end1), (start2, end2)):
"""Given two ranges, compute the (unsigned) distance between them.
Args:
start1 (int): the first line included in the first range.
end1 (int): the last line included in the first range. Must be
greater than or equal to ``start1``.
start2 (int): the first line included in the second range.
end2 (int): the last line included in the second range. Must be
greater than or equal to ``start1``.
Returns:
If the end of the earlier range comes before the start of the later
range, then the difference between those points. Otherwise, returns
zero (because the ranges overlap).
"""
if end1 < start1:
raise ValueError('the first range is empty: %d < %d' % (end1, start1))
if end2 < start2:
raise ValueError('the second range is empty: %d < %d' % (end2, start2))
# There are six possible cases, but in all the cases where the two
# ranges overlap, the latter two differences will be negative.
return max(0, start2 - end1, start1 - end2)