Add optional time per test field to Chromium results (#33387)
diff --git a/tools/wptrunner/wptrunner/formatters/chromium.py b/tools/wptrunner/wptrunner/formatters/chromium.py
index f2f9a63..5e7f4bd 100644
--- a/tools/wptrunner/wptrunner/formatters/chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/chromium.py
@@ -23,9 +23,15 @@
# A map of test status to the number of tests that had that status.
self.num_failures_by_status = defaultdict(int)
- # Start time, expressed as offset since UNIX epoch in seconds.
+ # Start time, expressed as offset since UNIX epoch in seconds. Measured
+ # from the first `suite_start` event.
self.start_timestamp_seconds = None
+ # A map of test names to test start timestamps, expressed in seconds
+ # since UNIX epoch. Only contains tests that are currently running
+ # (i.e., have not received the `test_end` event).
+ self.test_starts = {}
+
# Trie of test results. Each directory in the test name is a node in
# the trie and the leaf contains the dict of per-test data.
self.tests = {}
@@ -94,7 +100,7 @@
def _store_test_result(self, name, actual, expected, actual_metadata,
messages, wpt_actual, subtest_failure,
- reftest_screenshots=None):
+ duration=None, reftest_screenshots=None):
"""
Stores the result of a single test in |self.tests|
@@ -105,6 +111,7 @@
:param list messages: a list of test messages.
:param str wpt_actual: actual status reported by wpt, may differ from |actual|.
:param bool subtest_failure: whether this test failed because of subtests.
+ :param Optional[float] duration: time it took in seconds to run this test.
:param Optional[list] reftest_screenshots: see executors/base.py for definition.
"""
# The test name can contain a leading / which will produce an empty
@@ -116,6 +123,8 @@
cur_dict = cur_dict.setdefault(name_part, {})
cur_dict["actual"] = actual
cur_dict["expected"] = expected
+ if duration is not None:
+ cur_dict["time"] = duration
if subtest_failure:
self._append_artifact(cur_dict, "wpt_subtest_failure", "true")
if wpt_actual != actual:
@@ -197,14 +206,37 @@
expected_statuses = " ".join(sorted(all_statsues))
return expected_statuses
+ def _get_time(self, data):
+ """Get the timestamp of a message in seconds since the UNIX epoch."""
+ maybe_timestamp_millis = data.get("time")
+ if maybe_timestamp_millis is not None:
+ return float(maybe_timestamp_millis) / 1000
+ return time.time()
+
+ def _time_test(self, test_name, data):
+ """Time how long a test took to run.
+
+ :param str test_name: the name of the test to time
+ :param data: a data dictionary to extract the test end timestamp from
+ :return Optional[float]: a nonnegative duration in seconds or None if
+ the measurement is unavailable or invalid
+ """
+ test_start = self.test_starts.pop(test_name, None)
+ if test_start is not None:
+ # The |data| dictionary only provides millisecond resolution
+ # anyway, so further nonzero digits are unlikely to be meaningful.
+ duration = round(self._get_time(data) - test_start, 3)
+ if duration >= 0:
+ return duration
+ return None
+
def suite_start(self, data):
- # |data| contains a timestamp in milliseconds, while time.time() gives
- # it in seconds.
if self.start_timestamp_seconds is None:
- if 'time' in data:
- self.start_timestamp_seconds = float(data["time"]) / 1000
- else:
- self.start_timestamp_seconds = time.time()
+ self.start_timestamp_seconds = self._get_time(data)
+
+ def test_start(self, data):
+ test_name = data["test"]
+ self.test_starts[test_name] = self._get_time(data)
def test_status(self, data):
test_name = data["test"]
@@ -228,6 +260,7 @@
wpt_actual_status = data["status"]
actual_status = self._map_status_name(wpt_actual_status)
expected_statuses = self._get_expected_status_from_data(actual_status, data)
+ duration = self._time_test(test_name, data)
subtest_failure = False
if test_name in self.tests_with_subtest_fails:
subtest_failure = True
@@ -248,6 +281,7 @@
self.messages[test_name],
wpt_actual_status,
subtest_failure,
+ duration,
data.get("extra", {}).get("reftest_screenshots"))
# Remove the test from dicts to avoid accumulating too many.
diff --git a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
index 14e0788..77002d3 100644
--- a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
@@ -60,6 +60,38 @@
assert "expected" in test_obj
+def test_time_per_test(logger, capfd):
+ # Test that the formatter measures time per test correctly.
+
+ # Set up the handler.
+ output = StringIO()
+ logger.add_handler(handlers.StreamHandler(output, ChromiumFormatter()))
+
+ # output a bunch of stuff
+ logger.suite_start(["test-id-1", "test-id-2"], run_info={}, time=50)
+ logger.test_start("test-id-1", time=100)
+ logger.test_start("test-id-2", time=200)
+ logger.test_end("test-id-1", status="PASS", expected="PASS", time=300)
+ logger.test_end("test-id-2", status="PASS", expected="PASS", time=199)
+ logger.suite_end()
+ logger.shutdown()
+
+ # check nothing got output to stdout/stderr
+ # (note that mozlog outputs exceptions during handling to stderr!)
+ captured = capfd.readouterr()
+ assert captured.out == ""
+ assert captured.err == ""
+
+ # check the actual output of the formatter
+ output.seek(0)
+ output_obj = json.load(output)
+
+ test1_obj = output_obj["tests"]["test-id-1"]
+ test2_obj = output_obj["tests"]["test-id-2"]
+ assert pytest.approx(test1_obj["time"]) == 0.2 # 300ms - 100ms = 0.2s
+ assert "time" not in test2_obj
+
+
def test_chromium_test_name_trie(logger, capfd):
# Ensure test names are broken into directories and stored in a trie with
# test results at the leaves.