scripts/slave/recipe_modules/bisect_tester/parse_metric.py - chromium/tools/build - Git at Google

 import math
 import re
 from functools import reduce


 def _geom_mean_and_std_dev_from_histogram(histogram):  # pragma: no cover
   # Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/scripts/common/chromium_utils.py&l=222
   # TODO(robertocn): Remove this code duplication from common.chromium_utils
   if not 'buckets' in histogram:
     return 0.0, 0.0
   count = 0
   sum_of_logs = 0
   for bucket in histogram['buckets']:
     if 'high' in bucket:
       bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
     else:
       bucket['mean'] = bucket['low']
     if bucket['mean'] > 0:
       sum_of_logs += math.log(bucket['mean']) * bucket['count']
       count += bucket['count']

   if count == 0:
     return 0.0, 0.0

   sum_of_squares = 0
   geom_mean = math.exp(sum_of_logs / count)
   for bucket in histogram['buckets']:
     if bucket['mean'] > 0:
       sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count']
   return geom_mean, math.sqrt(sum_of_squares / count)


 def parse_chartjson_metric(results, metric):  # pragma: no cover
   """Interpret results-chart.json, finding the needed values.

   Args:
     results: The dictionary parsed from the chartjson file.
     metric: A pair of strings indicating chart and trace names.

   Returns:
     A triple (valid_values, values, all_results) where valid_values is a
     boolean, values is a list of floating point numbers, and all_results is a
     dictionary containing all the results originally in results_str.
   """
   def escape_chars(original_string):
     return re.sub(r'[\:|=/#&,]', '_', original_string)

   chart_name, trace_name = metric
   if trace_name == chart_name:
     trace_name = 'summary'
   try:
     for chart in results['charts']:
       if escape_chars(chart) == chart_name:
         chart_name = chart  # Unescaping
         break
     for trace in results['charts'][chart_name]:
       if escape_chars(trace) == trace_name:
         trace_name = trace  # Unescaping
         break

     # This can happen if trace_name is meant to be a tir_label. This workaround
     # is necessary because test paths are ambiguous, such that a 2-part test
     # name can represent a tir_label-level summary or a story-level summary.
     if trace_name not in results['charts'].get(chart_name, {}):
       chart_name = trace_name + '@@' + chart_name
       trace_name = 'summary'

     if (results['charts'][chart_name][trace_name]['type'] ==
         'list_of_scalar_values'):
       values = results['charts'][chart_name][trace_name]['values']
       if values:
         avg_value = [sum(values) / len(values)]
         return True, avg_value, results
     if results['charts'][chart_name][trace_name]['type'] == 'histogram':
       return True, [_geom_mean_and_std_dev_from_histogram(
           results['charts'][chart_name][trace_name])[0]], results
   except KeyError:  # e.g. metric not found
     pass
   return False, [], results


 # The following has largely been copied from bisect_perf_regression.py
 def parse_metric(out, err, metric):  # pragma: no cover
   """Tries to parse the output in RESULT line format or HISTOGRAM format.

   Args:
     metric: The metric as a list of [<trace>, <value>] string pairs.
     out, err: stdout and stderr that may contain the output to be parsed

   Returns:
     A pair (valid_values, values) where valid_values is a boolean and values is
     a list of floating point numbers.
   """
   text = (out or '') + (err or '')
   result = _parse_result_values_from_output(metric, text)
   if not result:
     result = _parse_histogram_values_from_output(metric, text)
   return bool(len(result)), result


 # TODO: Deprecate the text parsing approach to get results in favor of
 #       chartjson.
 def _parse_result_values_from_output(metric, text):  # pragma: no cover
   """Attempts to parse a metric in the format RESULT <graph>: <trace>= ...

   Args:
     metric: The metric as a list of [<trace>, <value>] string pairs.
     text: The text to parse the metric values from.

   Returns:
     A list of floating point numbers found.
   """
   if not text:
     return [False, None]
   # Format is: RESULT <graph>: <trace>= <value> <units>
   metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1]))

   # The log will be parsed looking for format:
   # <*>RESULT <graph_name>: <trace_name>= <value>
   single_result_re = re.compile(
       metric_re + r'\s*(?P<VALUE>[-]?\d*(\.\d*)?)')

   # The log will be parsed looking for format:
   # <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...]
   multi_results_re = re.compile(
       metric_re + r'\s*\[\s*(?P<VALUES>[-]?[\d\., ]+)\s*\]')

   # The log will be parsed looking for format:
   # <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>}
   mean_stddev_re = re.compile(
       metric_re +
       r'\s*\{\s*(?P<MEAN>[-]?\d*(\.\d*)?),\s*(?P<STDDEV>\d+(\.\d*)?)\s*\}')

   text_lines = text.split('\n')
   values_list = []
   for current_line in text_lines:
     # Parse the output from the performance test for the metric we're
     # interested in.
     single_result_match = single_result_re.search(current_line)
     multi_results_match = multi_results_re.search(current_line)
     mean_stddev_match = mean_stddev_re.search(current_line)
     if (single_result_match is not None and single_result_match.group('VALUE')):
       values_list += [single_result_match.group('VALUE')]
     elif (multi_results_match is not None and
           multi_results_match.group('VALUES')):
       metric_values = multi_results_match.group('VALUES')
       values_list += metric_values.split(',')
     elif (mean_stddev_match is not None and
           mean_stddev_match.group('MEAN')):
       values_list += [mean_stddev_match.group('MEAN')]

   list_of_floats = []
   # It seems the pythonic way to do this is to try to cast and catch the error.
   for v in values_list:
     try:
       list_of_floats.append(float(v))
     except ValueError:
       pass
   return list_of_floats


 def _parse_histogram_values_from_output(metric, text):  # pragma: no cover
   """Attempts to parse a metric in the format HISTOGRAM <graph: <trace>.

   Args:
     metric: The metric as a list of [<trace>, <value>] strings.
     text: The text to parse the metric values from.

   Returns:
     A list of floating point numbers found, [] if none were found.
   """
   metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1])

   text_lines = text.split('\n')
   values_list = []

   for current_line in text_lines:
     if metric_formatted in current_line:
       current_line = current_line[len(metric_formatted):]

       try:
         histogram_values = eval(current_line)

         for b in histogram_values['buckets']:
           average_for_bucket = float(b['high'] + b['low']) * 0.5
           # Extends the list with N-elements with the average for that bucket.
           values_list.extend([average_for_bucket] * b['count'])
       except Exception:
         pass

   return values_list
	import math
	import re
	from functools import reduce


	def _geom_mean_and_std_dev_from_histogram(histogram): # pragma: no cover
	# Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/scripts/common/chromium_utils.py&l=222
	# TODO(robertocn): Remove this code duplication from common.chromium_utils
	if not 'buckets' in histogram:
	return 0.0, 0.0
	count = 0
	sum_of_logs = 0
	for bucket in histogram['buckets']:
	if 'high' in bucket:
	bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
	else:
	bucket['mean'] = bucket['low']
	if bucket['mean'] > 0:
	sum_of_logs += math.log(bucket['mean']) * bucket['count']
	count += bucket['count']

	if count == 0:
	return 0.0, 0.0

	sum_of_squares = 0
	geom_mean = math.exp(sum_of_logs / count)
	for bucket in histogram['buckets']:
	if bucket['mean'] > 0:
	sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count']
	return geom_mean, math.sqrt(sum_of_squares / count)


	def parse_chartjson_metric(results, metric): # pragma: no cover
	"""Interpret results-chart.json, finding the needed values.

	Args:
	results: The dictionary parsed from the chartjson file.
	metric: A pair of strings indicating chart and trace names.

	Returns:
	A triple (valid_values, values, all_results) where valid_values is a
	boolean, values is a list of floating point numbers, and all_results is a
	dictionary containing all the results originally in results_str.
	"""
	def escape_chars(original_string):
	return re.sub(r'[\:\|=/#&,]', '_', original_string)

	chart_name, trace_name = metric
	if trace_name == chart_name:
	trace_name = 'summary'
	try:
	for chart in results['charts']:
	if escape_chars(chart) == chart_name:
	chart_name = chart # Unescaping
	break
	for trace in results['charts'][chart_name]:
	if escape_chars(trace) == trace_name:
	trace_name = trace # Unescaping
	break

	# This can happen if trace_name is meant to be a tir_label. This workaround
	# is necessary because test paths are ambiguous, such that a 2-part test
	# name can represent a tir_label-level summary or a story-level summary.
	if trace_name not in results['charts'].get(chart_name, {}):
	chart_name = trace_name + '@@' + chart_name
	trace_name = 'summary'

	if (results['charts'][chart_name][trace_name]['type'] ==
	'list_of_scalar_values'):
	values = results['charts'][chart_name][trace_name]['values']
	if values:
	avg_value = [sum(values) / len(values)]
	return True, avg_value, results
	if results['charts'][chart_name][trace_name]['type'] == 'histogram':
	return True, [_geom_mean_and_std_dev_from_histogram(
	results['charts'][chart_name][trace_name])[0]], results
	except KeyError: # e.g. metric not found
	pass
	return False, [], results


	# The following has largely been copied from bisect_perf_regression.py
	def parse_metric(out, err, metric): # pragma: no cover
	"""Tries to parse the output in RESULT line format or HISTOGRAM format.

	Args:
	metric: The metric as a list of [<trace>, <value>] string pairs.
	out, err: stdout and stderr that may contain the output to be parsed

	Returns:
	A pair (valid_values, values) where valid_values is a boolean and values is
	a list of floating point numbers.
	"""
	text = (out or '') + (err or '')
	result = _parse_result_values_from_output(metric, text)
	if not result:
	result = _parse_histogram_values_from_output(metric, text)
	return bool(len(result)), result


	# TODO: Deprecate the text parsing approach to get results in favor of
	# chartjson.
	def _parse_result_values_from_output(metric, text): # pragma: no cover
	"""Attempts to parse a metric in the format RESULT <graph>: <trace>= ...

	Args:
	metric: The metric as a list of [<trace>, <value>] string pairs.
	text: The text to parse the metric values from.

	Returns:
	A list of floating point numbers found.
	"""
	if not text:
	return [False, None]
	# Format is: RESULT <graph>: <trace>= <value> <units>
	metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1]))

	# The log will be parsed looking for format:
	# <*>RESULT <graph_name>: <trace_name>= <value>
	single_result_re = re.compile(
	metric_re + r'\s(?P<VALUE>[-]?\d(\.\d*)?)')

	# The log will be parsed looking for format:
	# <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...]
	multi_results_re = re.compile(
	metric_re + r'\s\[\s(?P<VALUES>[-]?[\d\., ]+)\s*\]')

	# The log will be parsed looking for format:
	# <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>}
	mean_stddev_re = re.compile(
	metric_re +
	r'\s\{\s(?P<MEAN>[-]?\d(\.\d)?),\s(?P<STDDEV>\d+(\.\d)?)\s*\}')

	text_lines = text.split('\n')
	values_list = []
	for current_line in text_lines:
	# Parse the output from the performance test for the metric we're
	# interested in.
	single_result_match = single_result_re.search(current_line)
	multi_results_match = multi_results_re.search(current_line)
	mean_stddev_match = mean_stddev_re.search(current_line)
	if (single_result_match is not None and single_result_match.group('VALUE')):
	values_list += [single_result_match.group('VALUE')]
	elif (multi_results_match is not None and
	multi_results_match.group('VALUES')):
	metric_values = multi_results_match.group('VALUES')
	values_list += metric_values.split(',')
	elif (mean_stddev_match is not None and
	mean_stddev_match.group('MEAN')):
	values_list += [mean_stddev_match.group('MEAN')]

	list_of_floats = []
	# It seems the pythonic way to do this is to try to cast and catch the error.
	for v in values_list:
	try:
	list_of_floats.append(float(v))
	except ValueError:
	pass
	return list_of_floats


	def _parse_histogram_values_from_output(metric, text): # pragma: no cover
	"""Attempts to parse a metric in the format HISTOGRAM <graph: <trace>.

	Args:
	metric: The metric as a list of [<trace>, <value>] strings.
	text: The text to parse the metric values from.

	Returns:
	A list of floating point numbers found, [] if none were found.
	"""
	metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1])

	text_lines = text.split('\n')
	values_list = []

	for current_line in text_lines:
	if metric_formatted in current_line:
	current_line = current_line[len(metric_formatted):]

	try:
	histogram_values = eval(current_line)

	for b in histogram_values['buckets']:
	average_for_bucket = float(b['high'] + b['low']) * 0.5
	# Extends the list with N-elements with the average for that bucket.
	values_list.extend([average_for_bucket] * b['count'])
	except Exception:
	pass

	return values_list