[wptrunner/chromium] De-dupe merged expected statuses (#26674)

Merge expected and known_intermittent into a set and sort it to produce
stable, unique expected statuses.

We suspect two sources of duplication:
* known_intermittent from mozlog seems to include the expected status.
* Some statuses map to the same Chromium status (e.g. FAIL and ERROR).
diff --git a/tools/wptrunner/wptrunner/formatters/chromium.py b/tools/wptrunner/wptrunner/formatters/chromium.py
index cef01c4..3c52f76 100644
--- a/tools/wptrunner/wptrunner/formatters/chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/chromium.py
@@ -172,7 +172,7 @@
         to the actual status if it's not.
 
         If the test has multiple statuses, it will have other statuses listed as
-        "known_intermittent" in |data|. If these exist, they will be appended to
+        "known_intermittent" in |data|. If these exist, they will be added to
         the returned status with spaced in between.
 
         :param str actual_status: the actual status of the test
@@ -181,8 +181,9 @@
         """
         expected_statuses = self._map_status_name(data["expected"]) if "expected" in data else actual_status
         if data.get("known_intermittent"):
-            expected_statuses += " " + " ".join(
-                [self._map_status_name(other_status) for other_status in data["known_intermittent"]])
+            all_statsues = {self._map_status_name(other_status) for other_status in data["known_intermittent"]}
+            all_statsues.add(expected_statuses)
+            expected_statuses = " ".join(sorted(all_statsues))
         return expected_statuses
 
     def suite_start(self, data):
diff --git a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
index 8f8e732..807f583 100644
--- a/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
+++ b/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
@@ -426,8 +426,8 @@
     test_obj = output_json["tests"]["t1"]
     # The test's statuses are all mapped, changing ERROR->FAIL and OK->PASS
     assert test_obj["actual"] == "FAIL"
-    # All the possible statuses are concatenated together into expected.
-    assert test_obj["expected"] == "PASS FAIL TIMEOUT"
+    # All the possible statuses are merged and sorted together into expected.
+    assert test_obj["expected"] == "FAIL PASS TIMEOUT"
     # ...this is not a regression or unexpected because the actual status is one
     # of the expected ones
     assert "is_regression" not in test_obj
@@ -462,7 +462,7 @@
     test_obj = output_json["tests"]["t1"]
     # The test's statuses are all mapped, changing ERROR->FAIL and OK->PASS
     assert test_obj["actual"] == "FAIL"
-    # All the possible statuses are concatenated together into expected.
+    # All the possible statuses are merged and sorted together into expected.
     assert test_obj["expected"] == "PASS TIMEOUT"
     # ...this is a regression and unexpected because the actual status is not
     # one of the expected ones
@@ -536,6 +536,39 @@
     assert test_obj["expected"] == "PASS"
 
 
+def test_known_intermittent_duplicate(capfd):
+    # We don't want to have duplicate statuses in the final "expected" field.
+
+    # Set up the handler.
+    output = StringIO()
+    logger = structuredlog.StructuredLogger("test_a")
+    logger.add_handler(handlers.StreamHandler(output, ChromiumFormatter()))
+
+    # There are two duplications in this input:
+    # 1. known_intermittent already contains expected;
+    # 2. both statuses in known_intermittent map to FAIL in Chromium.
+    # In the end, we should only get one FAIL in Chromium "expected".
+    logger.suite_start(["t1"], run_info={}, time=123)
+    logger.test_start("t1")
+    logger.test_end("t1", status="ERROR", expected="ERROR", known_intermittent=["FAIL", "ERROR"])
+    logger.suite_end()
+
+    # Check nothing got output to stdout/stderr.
+    # (Note that mozlog outputs exceptions during handling to stderr!)
+    captured = capfd.readouterr()
+    assert captured.out == ""
+    assert captured.err == ""
+
+    # Check the actual output of the formatter.
+    output.seek(0)
+    output_json = json.load(output)
+
+    test_obj = output_json["tests"]["t1"]
+    assert test_obj["actual"] == "FAIL"
+    # No duplicate "FAIL" in "expected".
+    assert test_obj["expected"] == "FAIL"
+
+
 def test_reftest_screenshots(capfd):
     # reftest_screenshots, if present, should be plumbed into artifacts.