blob: 1b2f0b05800b9b1e4df519263a09564d7312ccae [file] [log] [blame]
import math
import re
from functools import reduce
def _geom_mean_and_std_dev_from_histogram(histogram): # pragma: no cover
# Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/scripts/common/chromium_utils.py&l=222
# TODO(robertocn): Remove this code duplication from common.chromium_utils
if not 'buckets' in histogram:
return 0.0, 0.0
count = 0
sum_of_logs = 0
for bucket in histogram['buckets']:
if 'high' in bucket:
bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
else:
bucket['mean'] = bucket['low']
if bucket['mean'] > 0:
sum_of_logs += math.log(bucket['mean']) * bucket['count']
count += bucket['count']
if count == 0:
return 0.0, 0.0
sum_of_squares = 0
geom_mean = math.exp(sum_of_logs / count)
for bucket in histogram['buckets']:
if bucket['mean'] > 0:
sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count']
return geom_mean, math.sqrt(sum_of_squares / count)
def parse_chartjson_metric(results, metric): # pragma: no cover
"""Interpret results-chart.json, finding the needed values.
Args:
results: The dictionary parsed from the chartjson file.
metric: A pair of strings indicating chart and trace names.
Returns:
A triple (valid_values, values, all_results) where valid_values is a
boolean, values is a list of floating point numbers, and all_results is a
dictionary containing all the results originally in results_str.
"""
def escape_chars(original_string):
return re.sub(r'[\:|=/#&,]', '_', original_string)
chart_name, trace_name = metric
if trace_name == chart_name:
trace_name = 'summary'
try:
for chart in results['charts']:
if escape_chars(chart) == chart_name:
chart_name = chart # Unescaping
break
for trace in results['charts'][chart_name]:
if escape_chars(trace) == trace_name:
trace_name = trace # Unescaping
break
# This can happen if trace_name is meant to be a tir_label. This workaround
# is necessary because test paths are ambiguous, such that a 2-part test
# name can represent a tir_label-level summary or a story-level summary.
if trace_name not in results['charts'].get(chart_name, {}):
chart_name = trace_name + '@@' + chart_name
trace_name = 'summary'
if (results['charts'][chart_name][trace_name]['type'] ==
'list_of_scalar_values'):
values = results['charts'][chart_name][trace_name]['values']
if values:
avg_value = [sum(values) / len(values)]
return True, avg_value, results
if results['charts'][chart_name][trace_name]['type'] == 'histogram':
return True, [_geom_mean_and_std_dev_from_histogram(
results['charts'][chart_name][trace_name])[0]], results
except KeyError: # e.g. metric not found
pass
return False, [], results
# The following has largely been copied from bisect_perf_regression.py
def parse_metric(out, err, metric): # pragma: no cover
"""Tries to parse the output in RESULT line format or HISTOGRAM format.
Args:
metric: The metric as a list of [<trace>, <value>] string pairs.
out, err: stdout and stderr that may contain the output to be parsed
Returns:
A pair (valid_values, values) where valid_values is a boolean and values is
a list of floating point numbers.
"""
text = (out or '') + (err or '')
result = _parse_result_values_from_output(metric, text)
if not result:
result = _parse_histogram_values_from_output(metric, text)
return bool(len(result)), result
# TODO: Deprecate the text parsing approach to get results in favor of
# chartjson.
def _parse_result_values_from_output(metric, text): # pragma: no cover
"""Attempts to parse a metric in the format RESULT <graph>: <trace>= ...
Args:
metric: The metric as a list of [<trace>, <value>] string pairs.
text: The text to parse the metric values from.
Returns:
A list of floating point numbers found.
"""
if not text:
return [False, None]
# Format is: RESULT <graph>: <trace>= <value> <units>
metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1]))
# The log will be parsed looking for format:
# <*>RESULT <graph_name>: <trace_name>= <value>
single_result_re = re.compile(
metric_re + r'\s*(?P<VALUE>[-]?\d*(\.\d*)?)')
# The log will be parsed looking for format:
# <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...]
multi_results_re = re.compile(
metric_re + r'\s*\[\s*(?P<VALUES>[-]?[\d\., ]+)\s*\]')
# The log will be parsed looking for format:
# <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>}
mean_stddev_re = re.compile(
metric_re +
r'\s*\{\s*(?P<MEAN>[-]?\d*(\.\d*)?),\s*(?P<STDDEV>\d+(\.\d*)?)\s*\}')
text_lines = text.split('\n')
values_list = []
for current_line in text_lines:
# Parse the output from the performance test for the metric we're
# interested in.
single_result_match = single_result_re.search(current_line)
multi_results_match = multi_results_re.search(current_line)
mean_stddev_match = mean_stddev_re.search(current_line)
if (single_result_match is not None and single_result_match.group('VALUE')):
values_list += [single_result_match.group('VALUE')]
elif (multi_results_match is not None and
multi_results_match.group('VALUES')):
metric_values = multi_results_match.group('VALUES')
values_list += metric_values.split(',')
elif (mean_stddev_match is not None and
mean_stddev_match.group('MEAN')):
values_list += [mean_stddev_match.group('MEAN')]
list_of_floats = []
# It seems the pythonic way to do this is to try to cast and catch the error.
for v in values_list:
try:
list_of_floats.append(float(v))
except ValueError:
pass
return list_of_floats
def _parse_histogram_values_from_output(metric, text): # pragma: no cover
"""Attempts to parse a metric in the format HISTOGRAM <graph: <trace>.
Args:
metric: The metric as a list of [<trace>, <value>] strings.
text: The text to parse the metric values from.
Returns:
A list of floating point numbers found, [] if none were found.
"""
metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1])
text_lines = text.split('\n')
values_list = []
for current_line in text_lines:
if metric_formatted in current_line:
current_line = current_line[len(metric_formatted):]
try:
histogram_values = eval(current_line)
for b in histogram_values['buckets']:
average_for_bucket = float(b['high'] + b['low']) * 0.5
# Extends the list with N-elements with the average for that bucket.
values_list.extend([average_for_bucket] * b['count'])
except Exception:
pass
return values_list