blob: 5967ed4fc434ed55a6262c94490171f0098473ec [file] [log] [blame]
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for extracting emails and components from OWNERS files."""
import extract_histograms
import os
import re
_EMAIL_PATTERN = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$'
_OWNERS = 'OWNERS'
# Three '..' are used because calling dirname() yields the path to this
# module's directory, histograms, and the directory above tools, which may or
# may not be src depending on the machine running the code, is up three
# directory levels from the histograms directory.
_DIR_ABOVE_TOOLS = [os.path.dirname(__file__), '..', '..', '..']
_SRC = 'src/'
class Error(Exception):
pass
def _AddTextNodeWithNewLineAndIndent(histogram, node_to_insert_before):
"""Creates and adds a DOM Text Node before the given node in the histogram.
Args:
histogram: The histogram node in which to insert a text node.
node_to_insert_before: A node before which to add the text node.
"""
histogram.insertBefore(
histogram.ownerDocument.createTextNode('\n '),
node_to_insert_before)
def _IsEmailOrPlaceholder(is_first_owner, owner_tag_text, histogram_name):
"""Returns true if |owner_tag_text| is an email or the placeholder text.
Also, verifies that a histogram's first owner tag contains either an email
address, e.g. 'ali@chromium.org' or the placeholder text.
Args:
is_first_owner: True if a histogram's first owner tag is being checked.
owner_tag_text: The text of the owner tag being checked, e.g.
'julie@google.com' or 'src/ios/net/cookies/OWNERS'.
histogram_name: The string name of the histogram.
Raises:
Error: Raised if (A) the text is from the first owner tag and (B) the text
is not a primary owner.
"""
is_email_or_placeholder = (re.match(_EMAIL_PATTERN, owner_tag_text) or
owner_tag_text == extract_histograms.OWNER_PLACEHOLDER)
if is_first_owner and not is_email_or_placeholder:
raise Error('The histogram {} must have a valid first owner, i.e. an '
'individual\'s email address.'
.format(histogram_name))
return is_email_or_placeholder
def _IsWellFormattedFilePath(path):
"""Returns True if the given path begins with 'src/' and ends with 'OWNERS'.
Args:
path: The path to an OWNERS file, e.g. 'src/gin/OWNERS'.
"""
return path.startswith(_SRC) and path.endswith(_OWNERS)
def _GetHigherLevelOwnersFilePath(path):
"""Returns a path to an OWNERS file at a higher level than the given path.
Returns an empty string if an OWNERS file path in a higher level directory
cannot be found.
Suppose the given path is //stuff/chromium/src/jam/tea/milk/OWNERS. The
path //stuff/chromium/src/jam/tea/OWNERS will then be generated, and if it
exists, it will be returned. If not, the path //stuff/chromium/src/jam/OWNERS
will be generated, and if it exists, it will be returned.
Args:
path: The path to an OWNERS file.
"""
# The highest directory that is searched for component information is one
# directory lower than the directory above tools. Depending on the machine
# running this code, the directory above tools may or may not be src.
path_to_limiting_dir = os.path.abspath(os.path.join(*_DIR_ABOVE_TOOLS))
limiting_dir = path_to_limiting_dir.split(os.sep)[-1]
owners_file_limit = (os.sep).join([limiting_dir, _OWNERS])
if path.endswith(owners_file_limit):
return ''
parent_directory = os.path.dirname(os.path.dirname(path))
parent_owners_file_path = os.path.join(parent_directory, _OWNERS)
if (os.path.exists(parent_owners_file_path) and
os.path.isfile(parent_owners_file_path)):
return parent_owners_file_path
return _GetHigherLevelOwnersFilePath(parent_owners_file_path)
def _GetOwnersFilePath(path):
"""Returns an absolute path that can be opened.
Args:
path: A well-formatted path to an OWNERS file, e.g. 'src/courgette/OWNERS'.
Raises:
Error: Raised if the given path is not well-formatted.
"""
if _IsWellFormattedFilePath(path):
# _SRC is removed because the file system on the machine running the code
# may not have a(n) src directory.
path_without_src = path[len(_SRC):]
return os.path.abspath(
os.path.join(*(_DIR_ABOVE_TOOLS + path_without_src.split(os.sep))))
else:
raise Error('The given path {} is not well-formatted.'
'Well-formatted paths begin with "src/" and end with "OWNERS"'
.format(path))
def _ExtractEmailAddressesFromOWNERS(path, depth=0):
"""Returns a list of email addresses in the given file.
Args:
path: The path to an OWNERS file.
depth: The depth of the recursion, which is used to fail fast in the rare
case that the OWNERS file path results in a loop.
Raises:
Error: Raised in two situations. First, raised if (A) the OWNERS file with
the given path has a file directive and (B) the OWNERS file indicated by
the directive does not exist. Second, raised if the depth reaches a
certain limit.
"""
# It is unlikely that any chain of OWNERS files will exceed 10 redirections
# via file:// directives.
limit = 10
if (depth > limit):
raise Error('_ExtractEmailAddressesFromOWNERS has been called {} times. The'
' path {} may be part of an OWNERS loop.'.format(limit, path))
directive = 'file://'
email_pattern = re.compile(_EMAIL_PATTERN)
extracted_emails = []
with open(path, 'r') as owners_file:
for line in [line.lstrip()
for line in owners_file.read().splitlines() if line]:
index = line.find(' ')
first_word = line[:index] if index != -1 else line
if email_pattern.match(first_word):
extracted_emails.append(first_word)
elif first_word.startswith(directive):
next_path = _GetOwnersFilePath(
os.path.join(_SRC, first_word[len(directive):]))
if os.path.exists(next_path) and os.path.isfile(next_path):
extracted_emails.extend(
_ExtractEmailAddressesFromOWNERS(next_path, depth + 1))
else:
raise Error('The path derived from {} does not exist. '
'Derived path: {}'.format(first_word, next_path))
return extracted_emails
def _ExtractComponentFromOWNERS(path):
"""Returns the string component associated with the file at the given path.
Examples are 'Blink>Storage>FileAPI' and 'UI'.
Returns an empty string if no component can be extracted from the OWNERS file
located at path or OWNERS files in higher level directories.
Args:
path: The path to an OWNERS file, e.g. 'src/storage/OWNERS'.
"""
with open(path, 'r') as owners_file:
for line in [line.lstrip()
for line in owners_file.read().splitlines() if line]:
if line.startswith('# COMPONENT: '):
# A typical line is '# COMPONENT: UI>Browser>Bubbles''. The colon is
# always followed by exactly one space. And the symbol >, if present,
# is never preceded or followed by any spaces.
words = line.split(': ')
if len(words) == 2:
return words[1].rstrip()
raise Error('The component info in {} is poorly formatted.'
.format(path))
higher_level_owners_file_path = _GetHigherLevelOwnersFilePath(path)
if higher_level_owners_file_path:
return _ExtractComponentFromOWNERS(higher_level_owners_file_path)
return ''
def _MakeOwners(document, path, emails_with_dom_elements):
"""Makes DOM Elements for owners and returns the elements.
The owners are extracted from the OWNERS file with the given path and
deduped using the given set emails_with_dom_elements. This set has email
addresses that were explicitly listed as histogram owners, e.g.
<owner>liz@chromium.org</owner>. If a histogram has multiple OWNERS file
paths, e.g. <owner>src/cc/OWNERS</owner> and <owner>src/ui/OWNERS</owner>,
then the given set also contains any email addresses that have already been
extracted from OWNERS files.
New owners that are extracted from the given file are also added to
emails_with_dom_elements.
Args:
document: The Document to which the new owners elements will belong.
path: The absolute path to an OWNERS file.
emails_with_dom_elements: The set of email addresses that already have
corresponding DOM Elements.
Returns:
A collection of DOM Elements made from owners in the given OWNERS file.
"""
owner_elements = []
# TODO(crbug.com/987709): An OWNERS file API would be ideal.
emails_from_owners_file = _ExtractEmailAddressesFromOWNERS(path)
# A list is used to respect the order of email addresses in the OWNERS file.
deduped_emails_from_owners_file = []
for email in emails_from_owners_file:
if email not in emails_with_dom_elements:
deduped_emails_from_owners_file.append(email)
emails_with_dom_elements.add(email)
for email in deduped_emails_from_owners_file:
owner_element = document.createElement('owner')
owner_element.appendChild(document.createTextNode(email))
owner_elements.append(owner_element)
return owner_elements
def _UpdateHistogramOwners(histogram, owner_to_replace, owners_to_add):
"""Replaces |owner_to_replace| with |owners_to_add| for the given histogram.
Args:
histogram: The DOM Element to update.
owner: The DOM Element to be replaced. This is a child node of histogram,
and its text is a file path to an OWNERS file, e.g. 'src/mojo/OWNERS'
owners_to_add: A collection of DOM Elements with which to replace
owner_to_replace.
"""
node_after_owners_file = owner_to_replace.nextSibling
replacement_done = False
for owner_to_add in owners_to_add:
if not replacement_done:
histogram.replaceChild(owner_to_add, owner_to_replace)
replacement_done = True
else:
_AddTextNodeWithNewLineAndIndent(histogram, node_after_owners_file)
histogram.insertBefore(owner_to_add, node_after_owners_file)
def _AddHistogramComponent(histogram, component):
"""Makes a DOM Element for the component and adds it to the given histogram.
Args:
histogram: The DOM Element to update.
component: A string component to add, e.g. 'Internals>Network' or 'Build'.
"""
node_to_insert_before = histogram.lastChild
_AddTextNodeWithNewLineAndIndent(histogram, node_to_insert_before)
document = histogram.ownerDocument
component_element = document.createElement('component')
component_element.appendChild(document.createTextNode(component))
histogram.insertBefore(component_element, node_to_insert_before)
def ExpandHistogramsOWNERS(histograms):
"""Updates the given DOM Element's descendants, if necessary.
When a histogram has an owner node whose text is an OWNERS file path rather
than an email address, e.g. <owner>src/base/android/OWNERS</owner> instead of
<owner>joy@chromium.org</owner>, then (A) the histogram's owners need to be
updated and (B) a component may be added.
If the text of an owner node is an OWNERS file path, then this node is
replaced by owner nodes for the emails derived from the OWNERS file. If a
component, e.g. UI>GFX, can be derived from the OWNERS file or an OWNERS file
in a higher-level directory, then a component tag will be added to the
histogram, e.g. <component>UI&gt;GFX</component>.
Args:
histograms: The DOM Element whose descendants may be updated.
Raises:
Error: Raised if the OWNERS file with the given path does not exist.
"""
email_pattern = re.compile(_EMAIL_PATTERN)
for histogram in histograms.getElementsByTagName('histogram'):
owners = histogram.getElementsByTagName('owner')
# owner is a DOM Element with a single child, which is a DOM Text Node.
emails_with_dom_elements = set([
owner.childNodes[0].data
for owner in owners
if email_pattern.match(owner.childNodes[0].data)])
# component is a DOM Element with a single child, which is a DOM Text Node.
components_with_dom_elements = set([
extract_histograms.NormalizeString(component.childNodes[0].data)
for component in histogram.getElementsByTagName('component')])
for index in range(len(owners)):
owner = owners[index]
owner_text = owner.childNodes[0].data
name = histogram.getAttribute('name')
if _IsEmailOrPlaceholder(index == 0, owner_text, name):
continue
path = _GetOwnersFilePath(owner_text)
if not os.path.exists(path) or not os.path.isfile(path):
raise Error('The file at {} does not exist.'.format(path))
owners_to_add = _MakeOwners(
owner.ownerDocument, path, emails_with_dom_elements)
if not owners_to_add:
raise Error('No emails could be derived from {}.'.format(path))
_UpdateHistogramOwners(histogram, owner, owners_to_add)
component = _ExtractComponentFromOWNERS(path)
if component and component not in components_with_dom_elements:
components_with_dom_elements.add(component)
_AddHistogramComponent(histogram, component)