| # Copyright 2013 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Extract histogram names from the description XML file. |
| |
| For more information on the format of the XML file, which is self-documenting, |
| see histograms.xml; example path: |
| tools/metrics/histograms/metadata/uma/histograms.xml |
| """ |
| |
| import bisect |
| import copy |
| import datetime |
| import itertools |
| |
| import logging |
| import os |
| import re |
| import sys |
| from typing import Any, TypedDict |
| import xml.dom.minidom |
| |
| import histogram_configuration_model |
| |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'common')) |
| import xml_utils |
| |
| BASIC_EMAIL_REGEXP = r'^[\w\-\+\%\.]+\@[\w\-\+\%\.]+$' |
| |
| MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH = 5 |
| |
| EXPIRY_DATE_PATTERN = '%Y-%m-%d' |
| EXPIRY_MILESTONE_RE = re.compile(r'M[0-9]{2,3}\Z') |
| |
| |
| class Error(Exception): |
| pass |
| |
| |
| class ExtractionErrors(list[str]): |
| """A list of error strings, with new entries also logged.""" |
| |
| def AppendAndLog(self, error: str) -> None: |
| """Appends an error to the list after logging it.""" |
| logging.error(error) |
| self.append(error) |
| |
| |
| # Public dictionaries representing data structures derived from histogram.xml |
| # files, also referenced in google3. |
| |
| |
| class VariantDict(TypedDict, total=False): |
| """A dict representing a variant.""" |
| |
| name: str |
| summary: str |
| obsolete: str |
| owners: list[str] |
| |
| |
| class TokenDict(TypedDict): |
| """A dict representing a token.""" |
| |
| key: str |
| variants: list[VariantDict] |
| |
| |
| class _BucketDict(TypedDict): |
| """A dict representing a bucket in an enum.""" |
| |
| key: int |
| label: str |
| summary: str |
| |
| |
| class EnumDict(TypedDict, total=False): |
| """A dict representing an enum.""" |
| |
| name: str |
| type: str | None |
| buckets: list[_BucketDict] |
| summary: str |
| |
| |
| # TODO: crbug.com/443050753 - Rename CapCase names to using snake_case. |
| class HistogramDict(TypedDict, total=False): |
| """A dict representing a histogram.""" |
| |
| histogramName: str |
| description: str |
| owners: list[str] |
| enumDetails: EnumDict |
| units: str |
| components: list[str] |
| obsoletionMessage: str |
| expires_after: str |
| improvement: str |
| |
| |
| def ExpandHistogramNameWithSuffixes( |
| suffix_name: str, |
| histogram_name: str, |
| histogram_suffixes_node: xml.dom.minidom.Element, |
| ) -> tuple[str | None, ExtractionErrors]: |
| """Creates a new histogram name based on a histogram suffix. |
| |
| Args: |
| suffix_name: The suffix string to apply to the histogram name. May be empty. |
| histogram_name: The name of the histogram. May be of the form Group.BaseName |
| or BaseName. |
| histogram_suffixes_node: The histogram_suffixes XML node. |
| |
| Returns: |
| A tuple with: |
| * A string with the expanded histogram name. |
| * Any errors accumulated during this process. |
| """ |
| errors = ExtractionErrors() |
| |
| if histogram_suffixes_node.hasAttribute('separator'): |
| separator = histogram_suffixes_node.getAttribute('separator') |
| else: |
| separator = '_' |
| |
| if histogram_suffixes_node.hasAttribute('ordering'): |
| ordering = histogram_suffixes_node.getAttribute('ordering') |
| else: |
| ordering = 'suffix' |
| parts = ordering.split(',') |
| ordering = parts[0] |
| if len(parts) > 1: |
| placement = int(parts[1]) |
| else: |
| placement = 1 |
| if ordering not in ['prefix', 'suffix']: |
| errors.AppendAndLog( |
| f'ordering needs to be prefix or suffix, value is {ordering}') |
| return None, errors |
| |
| if not suffix_name: |
| return histogram_name, errors |
| |
| if ordering == 'suffix': |
| return histogram_name + separator + suffix_name, errors |
| |
| # For prefixes, the suffix_name is inserted between the "cluster" and the |
| # "remainder", e.g. Foo.BarHist expanded with gamma becomes Foo.gamma_BarHist. |
| sections = histogram_name.split('.') |
| if len(sections) <= placement: |
| suffixes_name = histogram_suffixes_node.getAttribute('name') |
| errors.AppendAndLog( |
| 'Prefix histogram_suffixes expansions require histogram names which ' |
| f'include a dot separator. Histogram name is {histogram_name}, ' |
| f'histogram_suffixes is {suffixes_name}, and placment is {placement}') |
| return None, errors |
| |
| cluster = '.'.join(sections[0:placement]) + '.' |
| remainder = '.'.join(sections[placement:]) |
| return cluster + suffix_name + separator + remainder, errors |
| |
| |
| def ExtractEnumsFromXmlTree( |
| tree: xml.dom.minidom.Element, |
| ) -> tuple[dict[str, EnumDict], ExtractionErrors]: |
| """Extracts all <enum> nodes in the tree into a dictionary. |
| |
| Args: |
| tree: The XML dom tree. |
| |
| Returns: |
| A tuple with: |
| * A mapping of name -> enum metadata proto. |
| * Any errors accumulated during extraction. |
| |
| An enum metadata dictionary looks like this: |
| { |
| 'name': string |
| 'values': { |
| (int cast to string) : { |
| 'summary': string, |
| 'label': string |
| } |
| ... |
| } |
| ... |
| }, |
| """ |
| enums = {} |
| errors = ExtractionErrors() |
| |
| for enum in xml_utils.IterElementsWithTag(tree, 'enum'): |
| name = enum.getAttribute('name') |
| if name in enums: |
| errors.AppendAndLog(f'Duplicate enum {name}') |
| continue |
| |
| enum_dict = {} |
| enum_dict['name'] = name |
| enum_dict['type'] = enum.getAttribute('type') or None |
| enum_dict['buckets'] = [] |
| labels = set() |
| processed_keys = set() |
| |
| nodes = list(xml_utils.IterElementsWithTag(enum, 'int')) |
| |
| obsolete_nodes = list(xml_utils.IterElementsWithTag(enum, 'obsolete', 1)) |
| if not nodes and not obsolete_nodes: |
| errors.AppendAndLog( |
| f'Non-obsolete enum {name} should have at least one <int>') |
| continue |
| |
| for int_tag in nodes: |
| try: |
| int_value = int(int_tag.getAttribute('value')) |
| except ValueError: |
| errors.AppendAndLog( |
| f'Invalid enum value "{int_tag.getAttribute("value")}" for enum' |
| f' {name}') |
| continue |
| if int_value in processed_keys: |
| errors.AppendAndLog(f'Duplicate enum value {int_value} for enum {name}') |
| continue |
| processed_keys.add(int_value) |
| |
| label = int_tag.getAttribute('label') |
| if label in labels: |
| errors.AppendAndLog(f'Duplicate enum label "{label}" for enum {name}') |
| continue |
| labels.add(label) |
| |
| bucket = { |
| 'key': int_value, |
| 'label': label, |
| 'summary': xml_utils.GetTextFromChildNodes(int_tag), |
| } |
| enum_dict['buckets'].append(bucket) |
| |
| # Sort buckets by key |
| enum_dict['buckets'].sort(key=lambda b: b['key']) |
| enum_int_values = [b['key'] for b in enum_dict['buckets']] |
| |
| # Check that the int values are in numerical order in the XML. |
| last_int_value = None |
| for int_tag in nodes: |
| # We've already validated that the value is an int. |
| int_value = int(int_tag.getAttribute('value')) |
| if last_int_value is not None and int_value < last_int_value: |
| errors.AppendAndLog(f'Enum {name} int values {last_int_value} and ' |
| f'{int_value} are not in numerical order') |
| left_item_index = bisect.bisect_left(enum_int_values, int_value) |
| if left_item_index == 0: |
| logging.warning('Insert value %d at the beginning', int_value) |
| else: |
| left_int_value = enum_int_values[left_item_index - 1] |
| left_label = enum_dict['buckets'][left_item_index - 1]['label'] |
| logging.warning( |
| 'Insert value %d after %d ("%s")', |
| int_value, |
| left_int_value, |
| left_label, |
| ) |
| else: |
| last_int_value = int_value |
| |
| for summary in xml_utils.IterElementsWithTag(enum, 'summary'): |
| enum_dict['summary'] = xml_utils.GetTextFromChildNodes(summary) |
| break |
| |
| enums[name] = enum_dict |
| |
| return enums, errors |
| |
| |
| def _ExtractOwners(node: xml.dom.minidom.Element) -> tuple[list[str], bool]: |
| """Extracts owners information from the given node, if exists. |
| |
| Args: |
| node: A DOM Element. |
| |
| Returns: |
| A tuple of owner-related info, e.g. (['alice@chromium.org'], True) |
| |
| The first element is a list of owners' email addresses. |
| The second element is a boolean indicating whether there is an owner node. |
| """ |
| email_pattern = re.compile(BASIC_EMAIL_REGEXP) |
| owners = [] |
| has_owner = False |
| |
| owner_nodes = list(xml_utils.IterElementsWithTag(node, 'owner', 1)) |
| for owner_node in owner_nodes: |
| child = owner_node.firstChild |
| if isinstance(child, xml.dom.minidom.Text): |
| owner_text = child.nodeValue |
| else: |
| owner_text = '' |
| if email_pattern.match(owner_text): |
| has_owner = True |
| owners.append(owner_text) |
| |
| return owners, has_owner |
| |
| |
| def _ExtractImprovementDirection( |
| histogram_node: xml.dom.minidom.Element, |
| ) -> tuple[str | None, ExtractionErrors]: |
| """Extracts improvement direction from the given histogram element, if any. |
| |
| Args: |
| histogram_node: A DOM Element corresponding to a histogram. |
| |
| Returns: |
| A tuple, where the first element is the improvement direction, if any; |
| the second element is an error message if the given direction is invalid. |
| """ |
| errors = ExtractionErrors() |
| improvement_nodes = histogram_node.getElementsByTagName('improvement') |
| if not improvement_nodes: |
| return None, errors |
| if len(improvement_nodes) > 1: |
| histogram_name = histogram_node.getAttribute('name') |
| errors.AppendAndLog( |
| f'Histogram "{histogram_name}" has multiple <improvement> tags.') |
| return None, errors |
| |
| improvement_node = improvement_nodes[0] |
| direction = improvement_node.getAttribute('direction') |
| if (direction |
| not in histogram_configuration_model.IMPROVEMENT_DIRECTION_VALID_VALUES): |
| histogram_name = histogram_node.getAttribute('name') |
| errors.AppendAndLog( |
| f'Histogram "{histogram_name}" has an invalid direction ' |
| f'"{direction}" in its <improvement> tag.') |
| return None, errors |
| |
| return direction, errors |
| |
| |
| def _ExtractComponents(histogram: xml.dom.minidom.Element) -> list[str]: |
| """Extracts component information from the given histogram element. |
| |
| Components are present when a histogram has a component tag, e.g. |
| <component>UI>Browser</component>. Components may also be present when an |
| OWNERS file is given as a histogram owner, e.g. <owner>src/dir/OWNERS</owner>; |
| in this case the component is extracted from adjacent DIR_METADATA files. |
| See _ExtractComponentViaDirmd() in the following file for details: |
| chromium/src/tools/metrics/histograms/expand_owners.py. |
| |
| Args: |
| histogram: A DOM Element corresponding to a histogram. |
| |
| Returns: |
| A list of the components associated with the histogram, e.g. |
| ['UI>Browser>Spellcheck']. |
| """ |
| component_nodes = histogram.getElementsByTagName('component') |
| return [ |
| xml_utils.GetTextFromChildNodes(component_node) |
| for component_node in component_nodes |
| ] |
| |
| |
| def _ValidateDateString(date_str: str) -> bool: |
| """Checks if |date_str| matches 'YYYY-MM-DD'. |
| |
| Args: |
| date_str: string |
| |
| Returns: |
| True iff |date_str| matches 'YYYY-MM-DD' format. |
| """ |
| try: |
| _ = datetime.datetime.strptime(date_str, EXPIRY_DATE_PATTERN).date() |
| except ValueError: |
| return False |
| return True |
| |
| |
| def _ValidateMilestoneString(milestone_str: str) -> bool: |
| """Checks if |milestone_str| matches 'M*'.""" |
| return EXPIRY_MILESTONE_RE.match(milestone_str) is not None |
| |
| |
| def ExtractTokens( |
| histogram: xml.dom.minidom.Element, |
| variants_dict: dict[str, list[VariantDict]], |
| ) -> tuple[list[TokenDict], ExtractionErrors]: |
| """Extracts tokens and variants from the given histogram element. |
| |
| Args: |
| histogram: A DOM Element corresponding to a histogram. |
| variants_dict: A dictionary of variants extracted from the tree. |
| |
| Returns: |
| A tuple where the first element is a list of extracted Tokens, and the |
| second indicates if any errors were detected while extracting them. |
| """ |
| tokens_seen = set() |
| tokens = [] |
| errors = ExtractionErrors() |
| histogram_name = histogram.getAttribute('name') |
| |
| for token_node in xml_utils.IterElementsWithTag(histogram, 'token', 1): |
| token_key = token_node.getAttribute('key') |
| if token_key in tokens_seen: |
| errors.AppendAndLog( |
| f'Histogram {histogram_name} contains duplicate token key ' |
| f'{token_key}, please ensure token keys are unique.') |
| continue |
| tokens_seen.add(token_key) |
| |
| token_key_format = '{' + token_key + '}' |
| if token_key_format not in histogram_name: |
| errors.AppendAndLog( |
| f'Histogram {histogram_name} includes a token tag but the token key ' |
| 'is not present in histogram name. Please insert the token key into ' |
| 'the histogram name in order for the token to be added.') |
| continue |
| |
| token = dict(key=token_key) |
| token['variants'] = [] |
| |
| # If 'variants' attribute is set for the <token>, get the list of Variant |
| # objects from from the |variants_dict|. Else, extract the <variant> |
| # children nodes of the |token_node| as a list of Variant objects. |
| if token_node.hasAttribute('variants'): |
| variants_name = token_node.getAttribute('variants') |
| variant_list = variants_dict.get(variants_name) |
| if variant_list: |
| token['variants'] = variant_list[:] |
| else: |
| errors.AppendAndLog( |
| f'The variants attribute {variants_name} of token key {token_key} ' |
| f'of histogram {histogram_name} does not have a corresponding ' |
| '<variants> tag.') |
| token['variants'] = [] |
| # Inline and out-of-line variants can be combined. |
| token['variants'].extend(_ExtractVariantNodes(token_node)) |
| |
| tokens.append(token) |
| |
| # A histogram name may also reference external tokens implicitly, when the |
| # name includes patterns (e.g. Foo.{Bar}) without a corresponding token tag. |
| # These are treated as implicit reference to variants with the same name. |
| tokens_in_name = re.findall(r'\{(.+?)\}', histogram.getAttribute('name')) |
| for token_key in tokens_in_name: |
| # If the key has been seen already, it means we've already added the |
| # variants for the token to |tokens|, for example if it had an explicit |
| # <token> tag. |
| if token_key in tokens_seen: |
| continue |
| tokens_seen.add(token_key) |
| variant_list = variants_dict.get(token_key) |
| if not variant_list: |
| errors.AppendAndLog( |
| f'Could not find variant "{token_key}" specified by histogram' |
| f' "{histogram_name}".') |
| variant_list = [] |
| token = dict(key=token_key, variants=variant_list) |
| tokens.append(token) |
| |
| return tokens, errors |
| |
| |
| def _ExtractVariantNodes(node: xml.dom.minidom.Element) -> list[VariantDict]: |
| """Extracts the variants of a given node into a list of variant dictionaries. |
| |
| Args: |
| node: A DOM element corresponding to <token> node |
| |
| Returns: |
| A list of Variants. |
| """ |
| variant_list = [] |
| for variant_node in xml_utils.IterElementsWithTag(node, 'variant', 1): |
| name = variant_node.getAttribute('name') |
| summary = (variant_node.getAttribute('summary') |
| if variant_node.hasAttribute('summary') else name) |
| variant = dict(name=name, summary=summary) |
| |
| obsolete_text = _GetObsoleteReason(variant_node) |
| if obsolete_text: |
| variant['obsolete'] = obsolete_text |
| |
| owners, has_owner = _ExtractOwners(variant_node) |
| if has_owner: |
| variant['owners'] = owners |
| |
| variant_list.append(variant) |
| |
| return variant_list |
| |
| |
| def ExtractHistogramsFromXmlTree( |
| tree: xml.dom.minidom.Element, enums: dict[str, EnumDict] |
| ) -> tuple[dict[str, HistogramDict], dict[str, list[TokenDict]], |
| ExtractionErrors]: |
| """Extracts histogram definitions from an XML tree. |
| |
| Parses all <histogram> nodes in the XML `tree` to extract definitions before |
| suffix or token expansion. It also validates core requirements like owners, |
| expiry, and units/enum. |
| |
| Args: |
| tree: The <histograms> XML element. |
| enums: A dict of parsed enum definitions, used to link to histograms. |
| |
| Returns: |
| A tuple (histograms, tokens_dict, errors): |
| - histograms: A dict mapping histogram names to their definitions. |
| - tokens_dict: A dict mapping tokenized histogram names to their tokens |
| for later expansion. |
| - errors: A list of any validation errors. |
| """ |
| histograms = {} |
| tokens_dict = {} |
| variants_dict, errors = ExtractVariantsFromXmlTree(tree) |
| |
| for histogram in xml_utils.IterElementsWithTag(tree, 'histogram'): |
| name = histogram.getAttribute('name') |
| if name in histograms: |
| errors.AppendAndLog(f'Duplicate histogram definition {name}') |
| continue |
| histograms[name] = histogram_entry = {} |
| histogram_entry['histogramName'] = name |
| |
| # Handle expiry attribute. |
| if histogram.hasAttribute('expires_after'): |
| expiry_str = histogram.getAttribute('expires_after') |
| if (expiry_str == 'never' or _ValidateMilestoneString(expiry_str) |
| or _ValidateDateString(expiry_str)): |
| histogram_entry['expires_after'] = expiry_str |
| else: |
| errors.AppendAndLog( |
| f'Expiry of histogram {name} does not match expected date format ' |
| f'("{EXPIRY_DATE_PATTERN}"), milestone format (M*), or "never": ' |
| 'found {expiry_str}') |
| else: |
| errors.AppendAndLog(f'Your histogram {name} must have an expiry date.') |
| |
| # Handle <owner> tags. |
| owners, has_owner = _ExtractOwners(histogram) |
| if owners: |
| histogram_entry['owners'] = owners |
| |
| # Handle <improvement> tags. |
| improvement_direction, improvement_errors = _ExtractImprovementDirection( |
| histogram) |
| errors.extend(improvement_errors) |
| if improvement_direction: |
| histogram_entry['improvement'] = improvement_direction |
| |
| # Find <component> tag. |
| components = _ExtractComponents(histogram) |
| if components: |
| histogram_entry['components'] = components |
| |
| # Find <summary> tag. |
| summary_nodes = list(xml_utils.IterElementsWithTag(histogram, 'summary')) |
| if summary_nodes: |
| summary_text = xml_utils.GetTextFromChildNodes(summary_nodes[0]) |
| else: |
| summary_text = 'TBD' |
| histogram_entry['description'] = summary_text |
| |
| # Find <obsolete> tag. |
| obsolete_nodes = list( |
| xml_utils.IterElementsWithTag(histogram, 'obsolete', 1)) |
| if obsolete_nodes: |
| reason = xml_utils.GetTextFromChildNodes(obsolete_nodes[0]) |
| histogram_entry['obsoletionMessage'] = reason |
| |
| # Non-obsolete histograms should provide a non-empty <summary>. |
| has_summary = (histogram_entry.get('description') |
| and histogram_entry.get('description') != 'TBD') |
| if not obsolete_nodes and not has_summary: |
| errors.AppendAndLog( |
| f'histogram {name} should provide a non-empty <summary>') |
| |
| # Non-obsolete histograms should specify <owner>s. |
| if not obsolete_nodes and not has_owner: |
| errors.AppendAndLog(f'histogram {name} should specify <owner>s') |
| |
| # Histograms should have either units or enum. |
| if (not histogram.hasAttribute('units') |
| and not histogram.hasAttribute('enum')): |
| errors.AppendAndLog(f'histogram {name} should have either units or enum') |
| |
| # Histograms should not have both units and enum. |
| if histogram.hasAttribute('units') and histogram.hasAttribute('enum'): |
| errors.AppendAndLog( |
| f'histogram {name} should not have both units and enum') |
| |
| # Handle units. |
| if histogram.hasAttribute('units'): |
| histogram_entry['units'] = histogram.getAttribute('units') |
| |
| # Handle enum types. |
| if histogram.hasAttribute('enum'): |
| enum_name = histogram.getAttribute('enum') |
| if enum_name not in enums: |
| errors.AppendAndLog(f'Unknown enum {enum_name} in histogram {name}.') |
| else: |
| histogram_entry['enumDetails'] = enums[enum_name] |
| |
| # Find <token> tag. |
| tokens, token_errors = ExtractTokens(histogram, variants_dict) |
| if tokens: |
| tokens_dict[name] = tokens |
| errors.extend(token_errors) |
| |
| return histograms, tokens_dict, errors |
| |
| |
| def ExtractVariantsFromXmlTree( |
| tree: xml.dom.minidom.Element, |
| ) -> tuple[dict[str, list[VariantDict]], ExtractionErrors]: |
| """Extracts all <variants> nodes in the tree into a dictionary. |
| |
| Args: |
| tree: A DOM Element containing histograms and variants nodes. |
| |
| Returns: |
| A tuple where the first element is a dictionary of extracted Variants, where |
| the key is the variants name and the value is a list of Variant objects. The |
| second element indicates if any errors were detected while extracting them. |
| """ |
| variants_dict = {} |
| errors = ExtractionErrors() |
| for variants_node in xml_utils.IterElementsWithTag(tree, 'variants'): |
| variants_name = variants_node.getAttribute('name') |
| if variants_name in variants_dict: |
| errors.AppendAndLog(f'Duplicate variants definition {variants_name}') |
| continue |
| |
| variants_dict[variants_name] = _ExtractVariantNodes(variants_node) |
| |
| return variants_dict, errors |
| |
| |
| def _GetObsoleteReason(node: xml.dom.minidom.Element) -> str | None: |
| """If the node's histogram is obsolete, returns a string explanation. |
| |
| Otherwise, returns None. |
| |
| Args: |
| node: A DOM Element associated with a histogram. |
| |
| Returns: |
| A string explaining why the histogram is obsolete, or None if it is not. |
| """ |
| for child in node.childNodes: |
| if child.localName == 'obsolete': |
| # There can be at most 1 obsolete element per node. |
| return xml_utils.GetTextFromChildNodes(child) |
| return None |
| |
| |
| def UpdateHistogramsWithSuffixes( |
| tree: xml.dom.minidom.Element, |
| histograms: dict[str, HistogramDict]) -> ExtractionErrors: |
| """Processes <histogram_suffixes> tags and combines with affected histograms. |
| |
| The histograms dictionary will be updated in-place by adding new histograms |
| created by combining histograms themselves with histogram_suffixes targeting |
| these histograms. |
| |
| Args: |
| tree: XML dom tree. |
| histograms: a dictionary of histograms previously extracted from the tree; |
| |
| Returns: |
| A list of error messages if any errors were found. |
| """ |
| errors = ExtractionErrors() |
| |
| histogram_suffix_tag = 'histogram_suffixes' |
| suffix_tag = 'suffix' |
| with_tag = 'with-suffix' |
| |
| # histogram_suffixes can depend on other histogram_suffixes, so we need to be |
| # careful. Make a temporary copy of the list of histogram_suffixes to use as a |
| # queue. histogram_suffixes whose dependencies have not yet been processed |
| # will get relegated to the back of the queue to be processed later. |
| reprocess_queue = [] |
| |
| def GenerateHistogramSuffixes(): |
| for f in xml_utils.IterElementsWithTag(tree, histogram_suffix_tag): |
| yield 0, f |
| for r, f in reprocess_queue: |
| yield r, f |
| |
| for reprocess_count, histogram_suffixes in GenerateHistogramSuffixes(): |
| # Check dependencies first. |
| dependencies_valid = True |
| missing_dependency = None |
| affected_histograms = list( |
| xml_utils.IterElementsWithTag(histogram_suffixes, 'affected-histogram', |
| 1)) |
| for affected_histogram in affected_histograms: |
| histogram_name = affected_histogram.getAttribute('name') |
| # Check if the affected histogram name is a pattern or exists directly. |
| is_pattern = '{' in histogram_name |
| found = histogram_name in histograms |
| if not found and is_pattern: |
| # Check if any current histogram matches the pattern |
| prefix = histogram_name.split('{', 1)[0] |
| found = any(name.startswith(prefix) for name in histograms) |
| if found: |
| break |
| |
| if not found: |
| # Base histogram is missing. |
| dependencies_valid = False |
| missing_dependency = histogram_name |
| break |
| if not dependencies_valid: |
| if reprocess_count < MAX_HISTOGRAM_SUFFIX_DEPENDENCY_DEPTH: |
| reprocess_queue.append((reprocess_count + 1, histogram_suffixes)) |
| continue |
| else: |
| suffixes_name = histogram_suffixes.getAttribute('name') |
| errors.AppendAndLog( |
| f'histogram_suffixes {suffixes_name} is missing its ' |
| f'dependency {missing_dependency}') |
| continue |
| |
| # If the suffix group has an obsolete tag, all suffixes it generates inherit |
| # its reason. |
| group_obsolete_reason = _GetObsoleteReason(histogram_suffixes) |
| |
| name = histogram_suffixes.getAttribute('name') |
| suffix_nodes = list( |
| xml_utils.IterElementsWithTag(histogram_suffixes, suffix_tag, 1)) |
| suffix_labels = {} |
| for suffix in suffix_nodes: |
| suffix_name = suffix.getAttribute('name') |
| if not suffix.hasAttribute('label'): |
| errors.AppendAndLog(f'suffix {suffix_name} in histogram_suffixes ' |
| f'{name} should have a label') |
| suffix_labels[suffix_name] = suffix.getAttribute('label') |
| # Find owners list under current histogram_suffixes tag. |
| owners, _ = _ExtractOwners(histogram_suffixes) |
| |
| for affected_histogram in affected_histograms: |
| with_suffixes = list( |
| xml_utils.IterElementsWithTag(affected_histogram, with_tag, 1)) |
| if with_suffixes: |
| suffixes_to_add = with_suffixes |
| else: |
| suffixes_to_add = suffix_nodes |
| |
| affected_name = affected_histogram.getAttribute('name') |
| histograms_to_process = [] |
| if '{' in affected_name: |
| # Pattern, find all matching expanded histograms |
| pattern_prefix = affected_name.split('{', 1)[0] |
| for name in list(histograms.keys()): |
| if name.startswith(pattern_prefix): |
| histograms_to_process.append(name) |
| elif affected_name in histograms: |
| histograms_to_process.append(affected_name) |
| |
| for histogram_name in histograms_to_process: |
| for suffix in suffixes_to_add: |
| suffix_name = suffix.getAttribute('name') |
| new_histogram_name, expand_errors = ExpandHistogramNameWithSuffixes( |
| suffix_name, histogram_name, histogram_suffixes) |
| errors.extend(expand_errors) |
| if new_histogram_name is None: |
| continue |
| if new_histogram_name != histogram_name: |
| if histogram_name not in histograms: |
| # This can happen if a previous suffix operation renamed it. |
| continue |
| new_histogram = copy.deepcopy(histograms[histogram_name]) |
| histograms[new_histogram_name] = new_histogram |
| |
| histogram_entry = histograms[new_histogram_name] |
| |
| # If no owners are added for this histogram-suffixes, it inherits the |
| # owners of its parents. |
| if owners: |
| histogram_entry['owners'] = owners |
| |
| # If a suffix has an obsolete node, it's marked as obsolete for the |
| # specified reason, overwriting its group's obsoletion reason if the |
| # group itself was obsolete as well. |
| obsolete_reason = _GetObsoleteReason(suffix) |
| if not obsolete_reason: |
| obsolete_reason = group_obsolete_reason |
| |
| # If the suffix has an obsolete tag, all histograms it generates |
| # inherit it. |
| if obsolete_reason: |
| histogram_entry['obsoletionMessage'] = obsolete_reason |
| |
| return errors |
| |
| |
| class TokenAssignment: |
| """Assignment of a Variant for each Token of histogram pattern. |
| |
| Attributes: |
| pairings: A token_name to Variant map. |
| """ |
| |
| def __init__(self, pairings): |
| self.pairings = pairings |
| |
| |
| def GetTokenAssignments(tokens: list[TokenDict]) -> list[TokenAssignment]: |
| """Gets all possible TokenAssignments for the listed tokens. |
| |
| Args: |
| tokens: The list of Tokens to create assignments for. |
| |
| Returns: |
| A list of TokenAssignments. |
| """ |
| token_keys = [token['key'] for token in tokens] |
| token_variants = [token['variants'] for token in tokens] |
| |
| return [ |
| TokenAssignment(pairings=dict(zip(token_keys, selected_variants))) |
| for selected_variants in itertools.product(*token_variants) |
| ] |
| |
| |
| def _AddHistogramOrExpandedVariants( |
| histogram_name: str, |
| histogram_node: HistogramDict, |
| tokens: list[TokenDict], |
| new_histograms_dict: dict[str, HistogramDict], |
| ) -> ExtractionErrors: |
| """Adds histogram or all variant expanded histograms to |new_histograms_dict|. |
| |
| If the histogram does not reference any variants, it's added directly to the |
| new histograms dict. Else, the tokens are expanded to produce all the |
| variants of that histogram and these are added to the new histogram dict. |
| |
| Args: |
| histogram_name: The name of the histogram. |
| histogram_node: The histogram node to add. |
| tokens: The list of tokens to expand. |
| new_histograms_dict: The dictionary of histograms to add to. |
| |
| Returns: |
| List of errors, if any. |
| """ |
| errors = ExtractionErrors() |
| |
| if not tokens: |
| # If the histogram references no variants, simply copy it over. |
| new_histograms_dict[histogram_name] = histogram_node |
| return errors |
| |
| # |token_assignments| contains all the cross-product combinations of token |
| # variants, representing all the possible histogram names that could be |
| # generated. |
| token_assignments = GetTokenAssignments(tokens) |
| summary_text = histogram_node['description'] |
| |
| summary_errors = set() |
| |
| # Each |token_assignment| contains one of the cross-product combinations and |
| # corresponds to one new generated histogram. |
| for token_assignment in token_assignments: |
| new_histogram_node = copy.deepcopy(histogram_node) |
| new_obsolete_reason = '' |
| new_owners = [] |
| # Dictionaries of pairings used for string formatting of histogram name and |
| # summary. |
| token_name_pairings = {} |
| token_summary_pairings = {} |
| |
| for token_key, variant in token_assignment.pairings.items(): |
| token_name_pairings[token_key] = variant['name'] |
| token_summary_pairings[token_key] = variant['summary'] |
| |
| # If a variant has owner(s), append to |new_owners|, overwriting the |
| # owners of the original histogram. |
| if 'owners' in variant: |
| new_owners += variant['owners'] |
| |
| # If a variant has an obsolete reason, the new reason overwrites the |
| # obsolete reason of the original histogram. |
| if 'obsolete' in variant: |
| new_obsolete_reason = variant['obsolete'] |
| |
| # Replace token in histogram name with variant name. |
| new_histogram_name = histogram_name.format(**token_name_pairings) |
| if new_histogram_name in new_histograms_dict: |
| errors.AppendAndLog( |
| f'Duplicate histogram name {new_histogram_name} generated.' |
| 'Please remove identical variants in different tokens in ' |
| f'{histogram_name}.') |
| continue |
| |
| # Replace token in summary with variant summary. |
| try: |
| new_summary_text = summary_text.format(**token_summary_pairings) |
| new_histogram_node['description'] = new_summary_text |
| except KeyError: |
| if histogram_name not in summary_errors: |
| summary_errors.add(histogram_name) |
| errors.AppendAndLog( |
| 'Could not format summary text when expanding histogram %s. Please ' |
| "check that it's not using {Token} syntax for unknown tokens." % |
| (histogram_name)) |
| continue |
| |
| if new_owners: |
| new_histogram_node['owners'] = new_owners |
| |
| if new_obsolete_reason: |
| new_histogram_node['obsoletionMessage'] = new_obsolete_reason |
| |
| new_histograms_dict[new_histogram_name] = new_histogram_node |
| |
| return errors |
| |
| |
| def _UpdateHistogramsWithTokens( |
| histograms_dict: dict[str, HistogramDict], |
| tokens_dict: dict[str, list[TokenDict]], |
| ) -> tuple[dict[str, HistogramDict], ExtractionErrors]: |
| """Processes histograms and combines with variants of tokens. |
| |
| Args: |
| histograms_dict: A dictionary of all the histograms extracted from the tree. |
| tokens_dict: A dictionary mapping a histogram name to its tokens. |
| |
| Returns: |
| A tuple where the first element is the new histograms dictionary, |
| containing the original histograms without tokens and histograms whose |
| tokens are replaced by newly variant combinations. The second element is a |
| list of errors detected while extracting them. |
| """ |
| errors = ExtractionErrors() |
| # Create new dict instead of modify in place because newly generated |
| # histograms will be added when iterating through |histograms_dict|. |
| new_histograms_dict = {} |
| for histogram_name, histogram_node in histograms_dict.items(): |
| if tokens := tokens_dict.get(histogram_name, []): |
| errors.extend( |
| _AddHistogramOrExpandedVariants(histogram_name, histogram_node, |
| tokens, new_histograms_dict)) |
| # For histograms without tokens, copy to new histograms dict. |
| else: |
| new_histograms_dict[histogram_name] = histogram_node |
| |
| return new_histograms_dict, errors |
| |
| |
| def ExtractHistogramsFromDom( |
| tree: xml.dom.minidom.Document, |
| ) -> tuple[dict[str, HistogramDict], ExtractionErrors]: |
| """Computes the histogram names and descriptions from the XML representation. |
| |
| Args: |
| tree: A DOM tree of XML content. |
| |
| Returns: |
| a tuple of (histograms, errors) where histograms is a dictionary mapping |
| histogram names to dictionaries containing histogram descriptions and |
| errors is a list of errors encountered in processing, if any. |
| """ |
| xml_utils.NormalizeAllAttributeValues(tree) |
| |
| enums_tree = xml_utils.GetTagSubTree(tree, 'enums', 2) |
| histograms_tree = xml_utils.GetTagSubTree(tree, 'histograms', 2) |
| histogram_suffixes_tree = xml_utils.GetTagSubTree(tree, |
| 'histogram_suffixes_list', |
| 2) |
| enums, enum_errors = ExtractEnumsFromXmlTree(enums_tree) |
| histograms, tokens_dict, histogram_errors = ExtractHistogramsFromXmlTree( |
| histograms_tree, enums) |
| histograms, update_token_errors = _UpdateHistogramsWithTokens( |
| histograms, tokens_dict) |
| # Only expand expand suffixes if there were no token errors. |
| if not update_token_errors: |
| update_suffix_errors = UpdateHistogramsWithSuffixes(histogram_suffixes_tree, |
| histograms) |
| else: |
| update_suffix_errors = ExtractionErrors() |
| errors = ExtractionErrors([ |
| *enum_errors, |
| *histogram_errors, |
| *update_token_errors, |
| *update_suffix_errors, |
| ]) |
| |
| return histograms, errors |
| |
| |
| def ExtractHistograms(filename: str) -> dict[str, HistogramDict]: |
| """Loads histogram definitions from a disk file. |
| |
| Args: |
| filename: a file path to load data from. |
| |
| Returns: |
| a dictionary of histogram descriptions. |
| |
| Raises: |
| Error: if the file is not well-formatted. |
| """ |
| with open(filename, 'r') as f: |
| tree = xml.dom.minidom.parse(f) |
| histograms, errors = ExtractHistogramsFromDom(tree) |
| if errors: |
| logging.error('Error parsing %s', filename) |
| raise Error() |
| return histograms |
| |
| |
| def ExtractNames(histograms: dict[str, HistogramDict]) -> list[str]: |
| """Returns all the names of extracted histograms.""" |
| return sorted(histograms.keys()) |