[wptrunner/chromium] De-dupe merged expected statuses (#26674)
Merge expected and known_intermittent into a set and sort it to produce
stable, unique expected statuses.
We suspect two sources of duplication:
* known_intermittent from mozlog seems to include the expected status.
* Some statuses map to the same Chromium status (e.g. FAIL and ERROR).
diff --git a/tools/wptrunner/wptrunner/formatters/chromium.py b/tools/wptrunner/wptrunner/formatters/chromium.py
index cef01c4..3c52f76 100644
--- a/tools/wptrunner/wptrunner/formatters/chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/chromium.py
@@ -172,7 +172,7 @@
to the actual status if it's not.
If the test has multiple statuses, it will have other statuses listed as
- "known_intermittent" in |data|. If these exist, they will be appended to
+ "known_intermittent" in |data|. If these exist, they will be added to
the returned status with spaced in between.
:param str actual_status: the actual status of the test
@@ -181,8 +181,9 @@
"""
expected_statuses = self._map_status_name(data["expected"]) if "expected" in data else actual_status
if data.get("known_intermittent"):
- expected_statuses += " " + " ".join(
- [self._map_status_name(other_status) for other_status in data["known_intermittent"]])
+ all_statsues = {self._map_status_name(other_status) for other_status in data["known_intermittent"]}
+ all_statsues.add(expected_statuses)
+ expected_statuses = " ".join(sorted(all_statsues))
return expected_statuses
def suite_start(self, data):
diff --git a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
index 8f8e732..807f583 100644
--- a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
@@ -426,8 +426,8 @@
test_obj = output_json["tests"]["t1"]
# The test's statuses are all mapped, changing ERROR->FAIL and OK->PASS
assert test_obj["actual"] == "FAIL"
- # All the possible statuses are concatenated together into expected.
- assert test_obj["expected"] == "PASS FAIL TIMEOUT"
+ # All the possible statuses are merged and sorted together into expected.
+ assert test_obj["expected"] == "FAIL PASS TIMEOUT"
# ...this is not a regression or unexpected because the actual status is one
# of the expected ones
assert "is_regression" not in test_obj
@@ -462,7 +462,7 @@
test_obj = output_json["tests"]["t1"]
# The test's statuses are all mapped, changing ERROR->FAIL and OK->PASS
assert test_obj["actual"] == "FAIL"
- # All the possible statuses are concatenated together into expected.
+ # All the possible statuses are merged and sorted together into expected.
assert test_obj["expected"] == "PASS TIMEOUT"
# ...this is a regression and unexpected because the actual status is not
# one of the expected ones
@@ -536,6 +536,39 @@
assert test_obj["expected"] == "PASS"
+def test_known_intermittent_duplicate(capfd):
+ # We don't want to have duplicate statuses in the final "expected" field.
+
+ # Set up the handler.
+ output = StringIO()
+ logger = structuredlog.StructuredLogger("test_a")
+ logger.add_handler(handlers.StreamHandler(output, ChromiumFormatter()))
+
+ # There are two duplications in this input:
+ # 1. known_intermittent already contains expected;
+ # 2. both statuses in known_intermittent map to FAIL in Chromium.
+ # In the end, we should only get one FAIL in Chromium "expected".
+ logger.suite_start(["t1"], run_info={}, time=123)
+ logger.test_start("t1")
+ logger.test_end("t1", status="ERROR", expected="ERROR", known_intermittent=["FAIL", "ERROR"])
+ logger.suite_end()
+
+ # Check nothing got output to stdout/stderr.
+ # (Note that mozlog outputs exceptions during handling to stderr!)
+ captured = capfd.readouterr()
+ assert captured.out == ""
+ assert captured.err == ""
+
+ # Check the actual output of the formatter.
+ output.seek(0)
+ output_json = json.load(output)
+
+ test_obj = output_json["tests"]["t1"]
+ assert test_obj["actual"] == "FAIL"
+ # No duplicate "FAIL" in "expected".
+ assert test_obj["expected"] == "FAIL"
+
+
def test_reftest_screenshots(capfd):
# reftest_screenshots, if present, should be plumbed into artifacts.