blob: 631b0da844c1c624a36c38bbbda15fe0bedf187a [file] [log] [blame]
# Copyright 2020 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Migrates histogram_suffixes to patterned histograms"""
import argparse
import logging
import os
from xml.dom import minidom
import extract_histograms
import histogram_configuration_model
import histogram_paths
import path_util
HISTOGRAM_SUFFIXES_LIST_PATH = path_util.GetInputFile(
'tools/metrics/histograms/metadata/histogram_suffixes_list.xml')
def _ExtractOwnerNodes(node):
"""Extracts all owners from |node|. Returns None if not exists."""
return node.getElementsByTagName('owner')
def _RemoveSuffixesComment(node, histogram_suffixes_name):
"""Remove suffixes related comments from |node|."""
for child in node.childNodes:
if child.nodeType == minidom.Node.COMMENT_NODE:
if ('Name completed by' in child.data
and histogram_suffixes_name in child.data):
node.removeChild(child)
def _UpdateSummary(histogram, histogram_suffixes_name):
"""Appends a placeholder string to the |histogram|'s summary node."""
summary = histogram.getElementsByTagName('summary')
assert len(summary) == 1, 'A histogram should have a single summary node.'
summary = summary[0]
if summary.firstChild.nodeType != summary.TEXT_NODE:
raise ValueError('summary_node doesn\'t contain text.')
summary.firstChild.replaceWholeText(
'%s {%s}' % (summary.firstChild.data.strip(), histogram_suffixes_name))
def _AreAllAffectedHistogramsFound(affected_histograms, histograms):
"""Checks that are all affected histograms found in |histograms|."""
histogram_names = [histogram.getAttribute('name') for histogram in histograms]
return all(
affected_histogram.getAttribute('name') in histogram_names
for affected_histogram in affected_histograms)
def _GetSuffixesDict(nodes, all_histograms):
"""Gets a dict of simple histogram-suffixes to be used in the migration.
Returns two dicts of histogram-suffixes to be migrated to the new patterned
histograms syntax.
The first dict: the keys are the histogram-suffixes' affected histogram name
and the values are the histogram_suffixes nodes that have only one
affected-histogram. These histograms-suffixes can be converted to inline
patterned histograms.
The second dict: the keys are the histogram_suffixes name and the values
are the histogram_suffixes nodes whose affected-histograms are all present in
the |all_histograms|. These histogram suffixes can be converted to out-of-line
variants.
Args:
nodes: A Nodelist of histograms_suffixes nodes.
all_histograms: A Nodelist of all chosen histograms.
Returns:
A dict of histograms-suffixes nodes keyed by their names.
"""
single_affected = {}
all_affected_found = {}
for histogram_suffixes in nodes:
affected_histograms = histogram_suffixes.getElementsByTagName(
'affected-histogram')
if len(affected_histograms) == 1:
affected_histogram = affected_histograms[0].getAttribute('name')
single_affected[affected_histogram] = histogram_suffixes
elif _AreAllAffectedHistogramsFound(affected_histograms, all_histograms):
for affected_histogram in affected_histograms:
affected_histogram = affected_histogram.getAttribute('name')
if affected_histogram in all_affected_found:
logging.warning(
'Histogram %s is already associated with other suffixes. '
'Please manually migrate it.', affected_histogram)
continue
all_affected_found[affected_histogram] = histogram_suffixes
return single_affected, all_affected_found
def _GetBaseVariant(doc, histogram):
"""Returns a <variant> node whose name is an empty string as the base variant.
Args:
doc: A Document object which is used to create a new <variant> node.
histogram: The <histogram> node to check whether its base is true or not.
Returns:
A <variant> node, or None if base="true" is set.
"""
base_variant = doc.createElement('variant')
base_variant.setAttribute('name', '')
return base_variant
def _PopulateVariantsWithSuffixes(doc, node, histogram_suffixes):
"""Populates <variant> nodes to |node| from <suffix>.
If this function returns false, the caller's histogram node will not be
updated. This is mainly because base suffix is a much more complicated case
and thus it is not automatically updated by this script.
Args:
doc: A Document object which is used to create a new <variant> node.
node: The node to be populated. it should be either <token> for inline
variants or <variants> for out-of-line variants.
histogram_suffixes: A <histogram_suffixes> node.
Returns:
True if the node can be updated automatically.
"""
separator = histogram_suffixes.getAttribute('separator')
suffixes_owners = _ExtractOwnerNodes(histogram_suffixes)
suffixes_name = histogram_suffixes.getAttribute('name')
for suffix in histogram_suffixes.getElementsByTagName('suffix'):
suffix_name = suffix.getAttribute('name')
# Suffix name might be empty. In this case, in order not to collide with the
# base variant, remove the base variant first before populating this.
if not suffix_name:
logging.warning(
'histogram suffixes: %s contains empty string suffix and thus we '
'have to manually update the empty string variant in these base '
'histograms: %s.', suffixes_name, ','.join(
h.getAttribute('name') for h in
histogram_suffixes.getElementsByTagName('affected-histogram')))
return False
variant = doc.createElement('variant')
if histogram_suffixes.hasAttribute('ordering'):
variant.setAttribute('name', suffix_name + separator)
else:
variant.setAttribute('name', separator + suffix_name)
if suffix.hasAttribute('label'):
variant.setAttribute('summary', suffix.getAttribute('label'))
# Populate owner's node from histogram suffixes to each new variant.
for owner in suffixes_owners:
variant.appendChild(owner.cloneNode(deep=True))
node.appendChild(variant)
return True
def _UpdateHistogramName(histogram, histogram_suffixes):
"""Adds histogram_suffixes's placeholder to the histogram name."""
histogram_name = histogram.getAttribute('name')
histogram_suffixes_name = histogram_suffixes.getAttribute('name')
ordering = histogram_suffixes.getAttribute('ordering')
if not ordering:
histogram.setAttribute('name',
'%s{%s}' % (histogram_name, histogram_suffixes_name))
else:
parts = ordering.split(',')
placement = 1
if len(parts) > 1:
placement = int(parts[1])
sections = histogram_name.split('.')
cluster = '.'.join(sections[0:placement]) + '.'
reminder = '.'.join(sections[placement:])
histogram.setAttribute(
'name', '%s{%s}%s' % (cluster, histogram_suffixes_name, reminder))
def MigrateToInlinePatterenedHistogram(doc, histogram, histogram_suffixes):
"""Migates a single histogram suffixes to an inline patterned histogram."""
# Keep a deep copy in case when the |histogram| fails to be migrated.
old_histogram = histogram.cloneNode(deep=True)
# Update histogram's name with the histogram_suffixes' name.
histogram_suffixes_name = histogram_suffixes.getAttribute('name')
_UpdateHistogramName(histogram, histogram_suffixes)
# Append |histogram_suffixes_name| placeholder string to the summary text.
_UpdateSummary(histogram, histogram_suffixes_name)
# Create an inline <token> node.
token = doc.createElement('token')
token.setAttribute('key', histogram_suffixes_name)
base_variant = _GetBaseVariant(doc, histogram)
if base_variant:
token.appendChild(base_variant)
# Populate <variant>s to the inline <token> node.
if not _PopulateVariantsWithSuffixes(doc, token, histogram_suffixes):
logging.warning('histogram_suffixes: %s needs manually effort',
histogram_suffixes_name)
histograms = histogram.parentNode
histograms.removeChild(histogram)
# Restore old histogram when we the script fails to migrate it.
histograms.appendChild(old_histogram)
else:
histogram.appendChild(token)
histogram_suffixes.parentNode.removeChild(histogram_suffixes)
# Remove obsolete comments from the histogram node.
_RemoveSuffixesComment(histogram, histogram_suffixes_name)
def MigrateToOutOflinePatterenedHistogram(doc, histogram, histogram_suffixes):
"""Migates a histogram suffixes to out-of-line patterned histogram."""
# Update histogram's name with the histogram_suffixes' name.
histogram_suffixes_name = histogram_suffixes.getAttribute('name')
_UpdateHistogramName(histogram, histogram_suffixes)
# Append |histogram_suffixes_name| placeholder string to the summary text.
_UpdateSummary(histogram, histogram_suffixes_name)
# Create a <token> node that links to an out-of-line <variants>.
base_variant = _GetBaseVariant(doc, histogram)
if base_variant:
token = doc.createElement('token')
token.setAttribute('key', histogram_suffixes_name)
token.setAttribute('variants', histogram_suffixes_name)
token.appendChild(base_variant)
histogram.appendChild(token)
# Remove obsolete comments from the histogram node.
_RemoveSuffixesComment(histogram, histogram_suffixes_name)
def _MigrateOutOfLineVariants(doc, histograms, suffixes_to_convert):
"""Converts a histogram-suffixes node to an out-of-line variants."""
histograms_node = histograms.getElementsByTagName('histograms')
assert len(histograms_node) == 1, (
'Every histograms.xml should have only one <histograms> node.')
for suffixes in suffixes_to_convert:
histogram_suffixes_name = suffixes.getAttribute('name')
variants = doc.createElement('variants')
variants.setAttribute('name', histogram_suffixes_name)
if not _PopulateVariantsWithSuffixes(doc, variants, suffixes):
logging.warning('histogram_suffixes: %s needs manually effort',
histogram_suffixes_name)
else:
histograms_node[0].appendChild(variants)
suffixes.parentNode.removeChild(suffixes)
def ChooseFiles(args):
"""Chooses a set of files to process so that we can migrate incrementally."""
paths = []
for path in sorted(histogram_paths.HISTOGRAMS_XMLS):
if 'metadata' in path and path.endswith('histograms.xml'):
name = os.path.basename(os.path.dirname(path))
if args.start <= name[0] <= args.end:
paths.append(path)
return paths
def SuffixesToVariantsMigration(args):
"""Migates all histogram suffixes to patterned histograms."""
histogram_suffixes_list = minidom.parse(open(HISTOGRAM_SUFFIXES_LIST_PATH))
histogram_suffixes_nodes = histogram_suffixes_list.getElementsByTagName(
'histogram_suffixes')
doc = minidom.Document()
for histograms_file in ChooseFiles(args):
histograms = minidom.parse(open(histograms_file))
single_affected, all_affected_found = _GetSuffixesDict(
histogram_suffixes_nodes, histograms.getElementsByTagName('histogram'))
suffixes_to_convert = set()
for histogram in histograms.getElementsByTagName('histogram'):
name = histogram.getAttribute('name')
# Migrate inline patterned histograms.
if name in single_affected.keys():
MigrateToInlinePatterenedHistogram(doc, histogram,
single_affected[name])
elif name in all_affected_found.keys():
suffixes_to_convert.add(all_affected_found[name])
MigrateToOutOflinePatterenedHistogram(doc, histogram,
all_affected_found[name])
_MigrateOutOfLineVariants(doc, histograms, suffixes_to_convert)
# Update histograms.xml with patterned histograms.
with open(histograms_file, 'w') as f:
pretty_xml_string = histogram_configuration_model.PrettifyTree(histograms)
f.write(pretty_xml_string)
# Remove histogram_suffixes that have already been migrated.
with open(HISTOGRAM_SUFFIXES_LIST_PATH, 'w') as f:
pretty_xml_string = histogram_configuration_model.PrettifyTree(
histogram_suffixes_list)
f.write(pretty_xml_string)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--start',
help='Start migration from a certain character (inclusive).',
default='a')
parser.add_argument('--end',
help='End migration at a certain character (inclusive).',
default='z')
args = parser.parse_args()
assert len(args.start) == 1 and len(args.end) == 1, (
'start and end flag should only contain a single letter.')
SuffixesToVariantsMigration(args)