| # -*- coding: utf-8 -*- |
| # Copyright 2021 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Utilities for parsing structured.xml. |
| |
| Functions in this module raise an error if constraints on the format of the |
| structured.xml file are not met. |
| |
| Functions use the concept of 'compound' and 'text' XML nodes. |
| - compound nodes can have attributes and child nodes, but no text |
| - text nodes can have text, but no attributes or child nodes |
| """ |
| |
| import collections |
| import re |
| |
| |
| def error(elem, msg): |
| """Raise a nicely formatted error with some context.""" |
| name = elem.attrib.get('name', None) |
| name = name + ' ' if name else '' |
| msg = 'Structured metrics error, {} node {}{}.'.format(elem.tag, name, msg) |
| raise ValueError(msg) |
| |
| |
| def get_attr(elem, tag, regex=None): |
| """Get an attribute. |
| |
| Error if it is missing, optionally error if it doesn't match the provided |
| regex. |
| """ |
| attr = elem.attrib.get(tag, None) |
| if not attr: |
| error(elem, "missing attribute '{}'".format(tag)) |
| if regex and not re.match(regex, attr): |
| error(elem, ("has '{}' attribute '{}' which does " |
| "not match regex '{}'").format(tag, attr, regex)) |
| return attr |
| |
| |
| def get_compound_children(elem, tag): |
| """Get all child nodes of `elem` with tag `tag`. |
| |
| Error if none exist, or a child is not a compound node. |
| """ |
| children = elem.findall(tag) |
| if not children: |
| error(elem, "missing node '{}'".format(tag)) |
| for child in children: |
| if child.text and child.text.strip(): |
| error(child, "contains text, but shouldn't") |
| return children |
| |
| |
| def get_compound_child(elem, tag): |
| """Get the child of `elem` with tag `tag`. |
| |
| Error if there isn't exactly one matching child, or it isn't compound. |
| """ |
| children = elem.findall(tag) |
| if len(children) != 1: |
| error(elem, "needs exactly one '{}' node".format(tag)) |
| return children[0] |
| |
| |
| def get_text_children(elem, tag, regex=None): |
| """Get the text of all child nodes of `elem` with tag `tag`. |
| |
| Error if none exist, or a child is not a text node. Optionally ensure the |
| text matches `regex`. |
| """ |
| children = elem.findall(tag) |
| if not children: |
| error(elem, "missing node '{}'".format(tag)) |
| |
| result = [] |
| for child in children: |
| check_attributes(child, set()) |
| check_children(child, set()) |
| text = child.text.strip() |
| if not text: |
| error(elem, "missing text in '{}'".format(tag)) |
| if regex and not re.match(regex, text): |
| error(elem, ("has '{}' node '{}' which does " |
| "not match regex '{}'").format(tag, text, regex)) |
| result.append(text) |
| return result |
| |
| |
| def get_text_child(elem, tag, regex=None): |
| """Get the text of the child of `elem` with tag `tag`. |
| |
| Error if there isn't exactly one matching child, or it isn't a text node. |
| Optionally ensure the text matches `regex`. |
| """ |
| result = get_text_children(elem, tag, regex) |
| if len(result) != 1: |
| error(elem, "needs exactly one '{}' node".format(tag)) |
| return result[0] |
| |
| |
| def check_attributes(elem, expected_attrs): |
| """Ensure `elem` has no attributes except those in `expected_attrs`.""" |
| actual_attrs = set(elem.attrib.keys()) |
| unexpected_attrs = actual_attrs - set(expected_attrs) |
| if unexpected_attrs: |
| attrs = ' '.join(unexpected_attrs) |
| error(elem, 'has unexpected attributes: ' + attrs) |
| |
| |
| def check_children(elem, expected_children): |
| """Ensure `elem` has no children without tags in `expected_children`.""" |
| actual_children = {child.tag for child in elem} |
| unexpected_children = actual_children - set(expected_children) |
| if unexpected_children: |
| children = ' '.join(unexpected_children) |
| error(elem, 'has unexpected nodes: ' + children) |
| |
| |
| def check_child_names_unique(elem, tag): |
| """Ensure uniqueness of the 'name' of all children of `elem` with `tag`.""" |
| names = [child.attrib.get('name', None) for child in elem if child.tag == tag] |
| name_counts = collections.Counter(names) |
| has_duplicates = any(c > 1 for c in name_counts.values()) |
| if has_duplicates: |
| error(elem, 'has {} nodes with duplicate names'.format(tag)) |