|  | # -*- coding: utf-8 -*- | 
|  | # Copyright 2021 The Chromium Authors | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  | """Utilities for parsing structured.xml. | 
|  |  | 
|  | Functions in this module raise an error if constraints on the format of the | 
|  | structured.xml file are not met. | 
|  |  | 
|  | Functions use the concept of 'compound' and 'text' XML nodes. | 
|  | - compound nodes can have attributes and child nodes, but no text | 
|  | - text nodes can have text, but no attributes or child nodes | 
|  | """ | 
|  |  | 
|  | import collections | 
|  | import re | 
|  |  | 
|  |  | 
|  | def error(elem, msg): | 
|  | """Raise a nicely formatted error with some context.""" | 
|  | name = elem.attrib.get("name", None) | 
|  | name = name + " " if name else "" | 
|  | msg = "Structured metrics error, {} node {}: {}.".format(elem.tag, name, msg) | 
|  | raise ValueError(msg) | 
|  |  | 
|  |  | 
|  | def get_attr(elem, tag, regex=None): | 
|  | """Get an attribute. | 
|  |  | 
|  | Error if it is missing, optionally error if it doesn't match the provided | 
|  | regex. | 
|  | """ | 
|  | attr = elem.attrib.get(tag, None) | 
|  | if not attr: | 
|  | error(elem, "missing attribute '{}'".format(tag)) | 
|  | if regex and not re.match(regex, attr): | 
|  | error(elem, ("has '{}' attribute '{}' which does " | 
|  | "not match regex '{}'").format(tag, attr, regex)) | 
|  | return attr | 
|  |  | 
|  |  | 
|  | def get_optional_attr(elem, tag, regex=None): | 
|  | """Get an attribute. | 
|  |  | 
|  | Returns None if it doesn't exist. | 
|  | """ | 
|  | attr = elem.attrib.get(tag) | 
|  | if not attr: | 
|  | return None | 
|  | if regex and not re.match(regex, attr): | 
|  | error(elem, ("has '{}' attribute '{}' which does " | 
|  | "not match regex '{}'").format(tag, attr, regex)) | 
|  | return attr | 
|  |  | 
|  |  | 
|  | def get_compound_children(elem, tag, allow_missing_children=False): | 
|  | """Get all child nodes of `elem` with tag `tag`. | 
|  |  | 
|  | Error if none exist, or a child is not a compound node. | 
|  | """ | 
|  | children = elem.findall(tag) | 
|  | if not children and not allow_missing_children: | 
|  | error(elem, "missing node '{}'".format(tag)) | 
|  | for child in children: | 
|  | if child.text and child.text.strip(): | 
|  | error(child, "contains text, but shouldn't") | 
|  | return children | 
|  |  | 
|  |  | 
|  | def get_compound_child(elem, tag): | 
|  | """Get the child of `elem` with tag `tag`. | 
|  |  | 
|  | Error if there isn't exactly one matching child, or it isn't compound. | 
|  | """ | 
|  | children = elem.findall(tag) | 
|  | if len(children) != 1: | 
|  | error(elem, "needs exactly one '{}' node".format(tag)) | 
|  | return children[0] | 
|  |  | 
|  |  | 
|  | def get_text_children(elem, tag, regex=None): | 
|  | """Get the text of all child nodes of `elem` with tag `tag`. | 
|  |  | 
|  | Error if none exist, or a child is not a text node. Optionally ensure the | 
|  | text matches `regex`. | 
|  | """ | 
|  | children = elem.findall(tag) | 
|  | if not children: | 
|  | error(elem, "missing node '{}'".format(tag)) | 
|  |  | 
|  | result = [] | 
|  | for child in children: | 
|  | check_attributes(child, set()) | 
|  | check_children(child, set()) | 
|  | text = child.text.strip() | 
|  | if not text: | 
|  | error(elem, "missing text in '{}'".format(tag)) | 
|  | if regex and not re.match(regex, text): | 
|  | error(elem, ("has '{}' node '{}' which does " | 
|  | "not match regex '{}'").format(tag, text, regex)) | 
|  | result.append(text) | 
|  | return result | 
|  |  | 
|  |  | 
|  | def get_text_child(elem, tag, regex=None): | 
|  | """Get the text of the child of `elem` with tag `tag`. | 
|  |  | 
|  | Error if there isn't exactly one matching child, or it isn't a text node. | 
|  | Optionally ensure the text matches `regex`. | 
|  | """ | 
|  | result = get_text_children(elem, tag, regex) | 
|  | if len(result) != 1: | 
|  | error(elem, "needs exactly one '{}' node".format(tag)) | 
|  | return result[0] | 
|  |  | 
|  |  | 
|  | def check_attributes(elem, expected_attrs, optional_attrs=None): | 
|  | """Ensure `elem` has no attributes except those in `expected_attrs`.""" | 
|  | actual_attrs = set(elem.attrib.keys()) | 
|  | unexpected_attrs = actual_attrs - set(expected_attrs) | 
|  | if optional_attrs: | 
|  | unexpected_attrs = unexpected_attrs - set(optional_attrs) | 
|  | if unexpected_attrs: | 
|  | attrs = " ".join(unexpected_attrs) | 
|  | error(elem, "has unexpected attributes: " + attrs) | 
|  |  | 
|  |  | 
|  | def check_children(elem, expected_children): | 
|  | """Ensure all children in `expected_children` are in `elem`.""" | 
|  | actual_children = {child.tag for child in elem} | 
|  | unexpected_children = set(expected_children) - actual_children | 
|  | if unexpected_children: | 
|  | children = " ".join(unexpected_children) | 
|  | error(elem, "is missing nodes: " + children) | 
|  |  | 
|  |  | 
|  | def check_child_names_unique(elem, tag): | 
|  | """Ensure uniqueness of the 'name' of all children of `elem` with `tag`.""" | 
|  | names = [child.attrib.get("name", None) for child in elem if child.tag == tag] | 
|  | name_counts = collections.Counter(names) | 
|  | has_duplicates = any(c > 1 for c in name_counts.values()) | 
|  | if has_duplicates: | 
|  | error(elem, "has {} nodes with duplicate names".format(tag)) |