blob: 78fd4b7eca844b8767d003f1dbddc63802c9400f [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parses a markdown file, extracts documentation for UMA metrics from the doc,
and writes that into histograms.xml file.
The syntax for the markdown this script processes is as follows:
. The first line for each UMA metric should be: '## [metric name]'.
. The following lines should include the additional information about the
metric, in a markdown list, in '[name]: [value]' format. For example:
* units: pixels
* owners: first@chromium.org, second@example.com
. The description, and explanation, of the metric should be after an empty
line after the list of attributes.
. Each UMA metric section should end with a line '---'. If there are non-UMA
sections at the beginning of the doc, then the first UMA section should be
preceeded by a '---' line.
A complete example:
=== sample.md
# A sample markdown document.
This is a sample markdown. It has some documentation for UMA metrics too.
# Motivation
The purpose of this sample doc is to be a guide for writing such docs.
---
## ExampleMetric.First
* units: smiles
* owners: firstowner@chromium.org, second@example.org
* os: windows, mac
* added: 2018-03-01
* expires: 2023-01-01
ExampleMetric.First measures the first example.
---
## ExampleMetric.Second
* units: happiness
This measures the second example.
"""
import datetime
import os
import re
import sys
import time
import xml.dom.minidom
sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import path_util
sys.path.append(os.path.join(os.path.dirname(__file__), 'histograms'))
import pretty_print
SupportedTags = [
"added",
"expires",
"enum",
"os",
"owners",
"tags",
"units",
]
def IsTagKnown(tag):
return tag in SupportedTags
def IsTagValid(tag, value):
assert IsTagKnown(tag)
if tag == 'added' or tag == 'expires':
if re.match('^M[0-9]{2,3}$', value):
return True
date = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', value)
return date and datetime.date(int(date.group(1)), int(date.group(2)),
int(date.group(3)))
return True
class Trace:
def __init__(self, msg):
self.msg_ = msg
self.start_ = None
def __enter__(self):
self.start_ = time.time()
sys.stdout.write('%s ...' % (self.msg_))
sys.stdout.flush()
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.write(' Done (%.3f sec)\n' % (time.time() - self.start_))
def GetMetricsFromMdFile(mdfile):
"""Returns an array of metrics parsed from the markdown file. See the top of
the file for documentation on the format of the markdown file.
"""
with open(mdfile) as f:
raw_md = f.read()
metrics = []
sections = re.split('\n---+\n', raw_md)
tag_pattern = re.compile('^\* ([^:]*): (.*)$')
for section in sections:
if len(section.strip()) == 0: break
lines = section.strip().split('\n')
# The first line should have the header, containing the name of the metric.
header_match = re.match('^##+ ', lines[0])
if not header_match: continue
metric = {}
metric['name'] = lines[0][len(header_match.group(0)):]
for i in range(1, len(lines)):
if len(lines[i]) == 0:
i += 1
break
match = tag_pattern.match(lines[i])
assert match
assert IsTagKnown(match.group(1)), 'Unknown tag: "%s".' % (match.group(1))
assert IsTagValid(match.group(1), match.group(2)), 'Invalid value "%s" ' \
'for tag "%s".' % (match.group(2), match.group(1))
metric[match.group(1)] = match.group(2)
assert i < len(lines), 'No summary found for "%s"' % metric['name']
metric['summary'] = '\n'.join(lines[i:])
assert 'owners' in metric, 'Must have owners for "%s"' % metric['name']
assert 'enum' in metric or 'units' in metric, 'Metric "%s" must have ' \
'a unit listed in "enum" or "units".' % metric['name']
metrics.append(metric)
return metrics
def CreateNode(tree, tag, text):
node = tree.createElement(tag)
node.appendChild(tree.createTextNode(text))
return node
def main():
if len(sys.argv) != 2:
sys.stderr.write('Usage: %s <path-to-md-file>\n' % (sys.argv[0]))
sys.exit(1)
with Trace('Reading histograms.xml') as t:
xml_path = path_util.GetHistogramsFile()
with open(xml_path, 'rb') as f:
raw_xml = f.read()
with Trace('Parsing xml') as t:
tree = xml.dom.minidom.parseString(raw_xml)
histograms = tree.getElementsByTagName('histograms')
if histograms.length != 1:
sys.stderr.write('histograms.xml should have exactly one "histograms" '
'section.\n');
sys.exit(1)
histograms = histograms[0]
with Trace('Parsing md file %s' % (sys.argv[1])) as t:
metrics = GetMetricsFromMdFile(sys.argv[1])
with Trace('Adding parsed metrics') as t:
for metric in metrics:
node = tree.createElement('histogram')
node.setAttribute('name', metric['name'])
if 'units' in metric:
node.setAttribute('units', metric['units'])
elif 'enum' in metric:
node.setAttribute('enum', metric['enum'])
owners = metric['owners'].split(',')
for owner in owners:
node.appendChild(CreateNode(tree, 'owner', owner))
node.appendChild(CreateNode(tree, 'summary', metric['summary']))
# TODO(sad): This always appends the metric to the list. This should
# also update if there is an already existing metric, instead of adding a
# new one.
histograms.appendChild(node)
with Trace('Pretty printing into histograms.xml') as t:
new_xml = pretty_print.PrettyPrintHistogramsTree(tree)
with open(xml_path, 'wb') as f:
f.write(new_xml)
if __name__ == '__main__':
main()