blob: db4fd97801b49b93b4378370090e8b089aba3af6 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""generate_perf_report.py is to be used after comparative_tester.py has been
executed and written some test data into the location specified by
target_spec.py. It writes to results_dir and reads all present test info from
raw_data_dir. Using this script should just be a matter of invoking it from
chromium/src while raw test data exists in raw_data_dir."""
import json
import logging
import math
import os
import sys
from typing import List, Dict, Set, Tuple, Optional, Any, TypeVar, Callable
import target_spec
from test_results import (TargetResult, ReadTargetFromJson, TestResult,
ResultLine)
class LineStats(object):
def __init__(self, desc: str, unit: str, time_avg: float, time_dev: float,
cv: float, samples: int) -> None:
"""A corpus of stats about a particular line from a given test's output.
Args:
desc (str): Descriptive text of the line in question.
unit (str): The units of measure that the line's result is in.
time_avg (float): The average measurement.
time_dev (float): The standard deviation of the measurement.
cv (float): The coefficient of variance of the measure.
samples (int): The number of samples that went into making this object.
"""
self.desc = desc
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.unit = unit
self.sample_num = samples
def ToString(self) -> str:
"""Converts the line to a human-readable string."""
if self.sample_num > 1:
return "{}: {:.5f} σ={:.5f} {} with n={} cv={}".format(
self.desc, self.time_avg, self.time_dev, self.unit, self.sample_num,
self.cv)
else:
return "{}: {:.5f} with only one sample".format(self.desc, self.time_avg)
def LineFromList(lines: List[ResultLine]) -> LineStats:
"""Takes a list of ResultLines and generates statistics for them.
Args:
lines (List[ResultLine]): The list of lines to generate stats for.
Returns:
LineStats: the representation of statistical data for the lines.
"""
desc = lines[0].desc
unit = lines[0].unit
times = [line.meas for line in lines]
avg, dev, cv = GenStats(times)
return LineStats(desc, unit, avg, dev, cv, len(lines))
class TestStats(object):
def __init__(self, name: str, time_avg: float, time_dev: float, cv: float,
samples: int, lines: List[LineStats]) -> None:
"""Represents a summary of relevant statistics for a list of tests.
Args:
name (str): The name of the test whose runs are being averaged.
time_avg (float): The average time to execute the test.
time_dev (float): The standard deviation in the mean.
cv (float): The coefficient of variance of the population.
samples (int): The number of samples in the population
lines (List[LineStats]): The averaged list of all the lines of output that
comprises this test.
"""
self.name = name
self.time_avg = time_avg
self.time_dev = time_dev
self.cv = cv
self.sample_num = samples
self.lines = lines
def ToLines(self) -> List[str]:
"""The stats of this test, as well as its constituent LineStats, in a human-
readable format.
Returns:
List[str]: The human-readable list of lines.
"""
lines = []
if self.sample_num > 1:
lines.append("{}: {:.5f} σ={:.5f}ms with n={} cv={}".format(
self.name, self.time_avg, self.time_dev, self.sample_num, self.cv))
else:
lines.append("{}: {:.5f} with only one sample".format(
self.name, self.time_avg))
for line in self.lines:
lines.append(" {}".format(line.ToString()))
return lines
def TestFromList(tests: List[TestResult]) -> TestStats:
"""Coalesces a list of TestResults into a single TestStats object.
Args:
tests (List[TestResult]): The input sample of the tests.
Returns:
TestStats: A representation of the statistics of the tests.
"""
name = tests[0].name
avg, dev, cv = GenStats([test.time for test in tests])
lines = {} # type: Dict[str, List[ResultLine]]
for test in tests:
assert test.name == name
for line in test.lines:
if not line.desc in lines:
lines[line.desc] = [line]
else:
lines[line.desc].append(line)
test_lines = []
for _, line_list in lines.items():
stat_line = LineFromList(line_list)
if stat_line:
test_lines.append(stat_line)
return TestStats(name, avg, dev, cv, len(tests), test_lines)
class TargetStats(object):
def __init__(self, name: str, samples: int, tests: List[TestStats]) -> None:
"""A representation of the actual target that was built and run on the
platforms multiple times to generate statistical data.
Args:
name (str): The name of the target that was built and run.
samples (int): The number of times the tests were run.
tests (List[TestStats]): The statistics of tests included in the target.
"""
self.name = name
self.sample_num = samples
self.tests = tests
def ToLines(self) -> List[str]:
"""Converts the entire target into a list of lines in human-readable format.
Returns:
List[str]: The human-readable test lines.
"""
lines = []
if self.sample_num > 1:
lines.append("{}: ".format(self.name))
else:
lines.append("{}: with only one sample".format(self.name))
for test in self.tests:
for line in test.ToLines():
lines.append(" {}".format(line))
return lines
def __format__(self, format_spec):
return "\n".join(self.ToLines())
def TargetFromList(results: List[TargetResult]) -> TargetStats:
"""Coalesces a list of TargetResults into a single collection of stats.
Args:
results (List[TargetResult]): The sampling of target executions to generate
stats for.
Returns:
TargetStats: The body of stats for the sample given.
"""
name = results[0].name
sample_num = len(results)
tests = {} # type: Dict[str, List[TestResult]]
for result in results:
assert result.name == name
# This groups tests by name so that they can be considered independently,
# so that in the event tests flake out, their average times can
# still be accurately calculated
for test in result.tests:
if not test.name in tests.keys():
tests[test.name] = [test]
tests[test.name].append(test)
test_stats = [TestFromList(test_list) for _, test_list in tests.items()]
return TargetStats(name, sample_num, test_stats)
def GenStats(corpus: List[float]) -> Tuple[float, float, float]:
"""Generates statistics from a list of values
Args:
corpus (List[float]): The set of data to generate statistics for.
Returns:
Tuple[float, float, float]: The mean, standard deviation, and coefficient of
variation for the given sample data.
"""
avg = sum(corpus) / len(corpus)
adjusted_sum = 0.0
for item in corpus:
adjusted = item - avg
adjusted_sum += adjusted * adjusted
dev = math.sqrt(adjusted_sum / len(corpus))
cv = dev / avg
return avg, dev, cv
def DirectoryStats(directory: str) -> List[TargetStats]:
"""Takes a path to directory, and uses JSON files in that directory to compile
a list of statistical objects for each independent test target it can detect
in the directory.
Args:
directory (str): The directory to scan for relevant JSONs
Returns:
List[TargetStats]: Each element in this list is one target, averaged up over
all of its executions.
"""
resultMap = {} # type: Dict[str, List[TargetResult]]
for file in os.listdir(directory):
results = ReadTargetFromJson("{}/{}".format(directory, file))
if not results.name in resultMap.keys():
resultMap[results.name] = [results]
else:
resultMap[results.name].append(results)
targets = []
for _, resultList in resultMap.items():
targets.append(TargetFromList(resultList))
return targets
def CompareTargets(linux: TargetStats, fuchsia: TargetStats) -> Dict[str, Any]:
"""Compare takes a corpus of statistics from both Fuchsia and Linux, and then
lines up the values, compares them to each other, and writes them into a
dictionary that can be JSONified.
"""
if linux and fuchsia:
assert linux.name == fuchsia.name
paired_tests = ZipListsByPredicate(linux.tests, fuchsia.tests,
lambda test: test.name)
paired_tests = MapDictValues(paired_tests, CompareTests)
return {"name": linux.name, "tests": paired_tests}
else:
# One of them has to be non-null, by the way ZipListsByPredicate functions
assert linux or fuchsia
if linux:
logging.error("Fuchsia was missing test target {}".format(linux.name))
else:
logging.error("Linux was missing test target {}".format(fuchsia.name))
return None
def CompareTests(linux: TestStats, fuchsia: TestStats) -> Dict[str, Any]:
"""As CompareTargets, but at the test level"""
if not linux and not fuchsia:
logging.error("Two null TestStats objects were passed to CompareTests.")
return {}
if not linux or not fuchsia:
if linux:
name = linux.name
failing_os = "Fuchsia"
else:
name = fuchsia.name
failing_os = "Linux"
logging.error("%s failed to produce output for the test %s",
failing_os, name)
return {}
assert linux.name == fuchsia.name
paired_lines = ZipListsByPredicate(linux.lines, fuchsia.lines,
lambda line: line.desc)
paired_lines = MapDictValues(paired_lines, CompareLines)
result = {"lines": paired_lines, "unit": "ms"} # type: Dict[str, Any]
if linux:
result["name"] = linux.name
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
if fuchsia == None:
logging.warning("Fuchsia is missing test case {}".format(linux.name))
else:
result["name"] = fuchsia.name
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
return result
def CompareLines(linux: LineStats, fuchsia: LineStats) -> Dict[str, Any]:
"""CompareLines wraps two LineStats objects up as a JSON-dumpable dict.
It also logs a warning every time a line is given which can't be matched up.
If both lines passed are None, or their units or descriptions are not the same
(which should never happen) this function fails.
"""
if linux != None and fuchsia != None:
assert linux.desc == fuchsia.desc
assert linux.unit == fuchsia.unit
assert linux != None or fuchsia != None
# ref_test is because we don't actually care which test we get the values
# from, as long as we get values for the name and description
ref_test = linux if linux else fuchsia
result = {"desc": ref_test.desc, "unit": ref_test.unit}
if fuchsia == None:
logging.warning("Fuchsia is missing test line {}".format(linux.desc))
else:
result["fuchsia_avg"] = fuchsia.time_avg
result["fuchsia_dev"] = fuchsia.time_dev
result["fuchsia_cv"] = fuchsia.cv
if linux:
result["linux_avg"] = linux.time_avg
result["linux_dev"] = linux.time_dev
result["linux_cv"] = linux.cv
return result
T = TypeVar("T")
R = TypeVar("R")
def ZipListsByPredicate(left: List[T], right: List[T],
pred: Callable[[T], R]) -> Dict[R, Tuple[T, T]]:
"""This function takes two lists, and a predicate. The predicate is applied to
the values in both lists to obtain a keying value from them. Each item is then
inserted into the returned dictionary using the obtained key. The predicate
should not map multiple values from one list to the same key.
"""
paired_items = {} # type: Dict [R, Tuple[T, T]]
for item in left:
key = pred(item)
# the first list shouldn't cause any key collisions
assert key not in paired_items.keys()
paired_items[key] = item, None
for item in right:
key = pred(item)
if key in paired_items.keys():
# elem 1 of the tuple is always None if the key exists in the map
prev, _ = paired_items[key]
paired_items[key] = prev, item
else:
paired_items[key] = None, item
return paired_items
U = TypeVar("U")
V = TypeVar("V")
def MapDictValues(dct: Dict[T, Tuple[R, U]],
predicate: Callable[[R, U], V]) -> Dict[T, V]:
"""This function applies the predicate to all the values in the dictionary,
returning a new dictionary with the new values.
"""
out_dict = {}
for key, val in dct.items():
out_dict[key] = predicate(*val)
return out_dict
def main():
linux_avgs = DirectoryStats(target_spec.raw_linux_dir)
fuchsia_avgs = DirectoryStats(target_spec.raw_fuchsia_dir)
paired_targets = ZipListsByPredicate(linux_avgs, fuchsia_avgs,
lambda target: target.name)
for name, targets in paired_targets.items():
comparison_dict = CompareTargets(*targets)
if comparison_dict:
with open("{}/{}.json".format(target_spec.results_dir, name),
"w") as outfile:
json.dump(comparison_dict, outfile, indent=2)
if __name__ == "__main__":
sys.exit(main())