blob: af1afc414493a45248f61c1db99c595d2327ee11 [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2021 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities for parsing structured.xml.
Functions in this module raise an error if constraints on the format of the
structured.xml file are not met.
Functions use the concept of 'compound' and 'text' XML nodes.
- compound nodes can have attributes and child nodes, but no text
- text nodes can have text, but no attributes or child nodes
"""
import collections
import re
def error(elem, msg):
"""Raise a nicely formatted error with some context."""
name = elem.attrib.get('name', None)
name = name + ' ' if name else ''
msg = 'Structured metrics error, {} node {}{}.'.format(elem.tag, name, msg)
raise ValueError(msg)
def get_attr(elem, tag, regex=None):
"""Get an attribute.
Error if it is missing, optionally error if it doesn't match the provided
regex.
"""
attr = elem.attrib.get(tag, None)
if not attr:
error(elem, "missing attribute '{}'".format(tag))
if regex and not re.match(regex, attr):
error(elem, ("has '{}' attribute '{}' which does "
"not match regex '{}'").format(tag, attr, regex))
return attr
def get_compound_children(elem, tag):
"""Get all child nodes of `elem` with tag `tag`.
Error if none exist, or a child is not a compound node.
"""
children = elem.findall(tag)
if not children:
error(elem, "missing node '{}'".format(tag))
for child in children:
if child.text and child.text.strip():
error(child, "contains text, but shouldn't")
return children
def get_compound_child(elem, tag):
"""Get the child of `elem` with tag `tag`.
Error if there isn't exactly one matching child, or it isn't compound.
"""
children = elem.findall(tag)
if len(children) != 1:
error(elem, "needs exactly one '{}' node".format(tag))
return children[0]
def get_text_children(elem, tag, regex=None):
"""Get the text of all child nodes of `elem` with tag `tag`.
Error if none exist, or a child is not a text node. Optionally ensure the
text matches `regex`.
"""
children = elem.findall(tag)
if not children:
error(elem, "missing node '{}'".format(tag))
result = []
for child in children:
check_attributes(child, set())
check_children(child, set())
text = child.text.strip()
if not text:
error(elem, "missing text in '{}'".format(tag))
if regex and not re.match(regex, text):
error(elem, ("has '{}' node '{}' which does "
"not match regex '{}'").format(tag, text, regex))
result.append(text)
return result
def get_text_child(elem, tag, regex=None):
"""Get the text of the child of `elem` with tag `tag`.
Error if there isn't exactly one matching child, or it isn't a text node.
Optionally ensure the text matches `regex`.
"""
result = get_text_children(elem, tag, regex)
if len(result) != 1:
error(elem, "needs exactly one '{}' node".format(tag))
return result[0]
def check_attributes(elem, expected_attrs):
"""Ensure `elem` has no attributes except those in `expected_attrs`."""
actual_attrs = set(elem.attrib.keys())
unexpected_attrs = actual_attrs - set(expected_attrs)
if unexpected_attrs:
attrs = ' '.join(unexpected_attrs)
error(elem, 'has unexpected attributes: ' + attrs)
def check_children(elem, expected_children):
"""Ensure `elem` has no children without tags in `expected_children`."""
actual_children = {child.tag for child in elem}
unexpected_children = actual_children - set(expected_children)
if unexpected_children:
children = ' '.join(unexpected_children)
error(elem, 'has unexpected nodes: ' + children)
def check_child_names_unique(elem, tag):
"""Ensure uniqueness of the 'name' of all children of `elem` with `tag`."""
names = [child.attrib.get('name', None) for child in elem if child.tag == tag]
name_counts = collections.Counter(names)
has_duplicates = any(c > 1 for c in name_counts.values())
if has_duplicates:
error(elem, 'has {} nodes with duplicate names'.format(tag))