[wptrunner] Plumb expectations `wpttest.{Test -> (Subtest)Result}` (#44424)

This allows vendors to override expectations at runtime via
browser-specific result converters. Instead of constructing
`(Subtest)Result`s directly, executors should use the newly introduced
`Test.make_{subtest_,}result()`, which have the same signatures but
default to passing through the testloader-read expectations.

This is a pure refactor; no functional changes intended.

Successor to: https://github.com/web-platform-tests/wpt/pull/44134#issuecomment-1920055927
diff --git a/tools/wptrunner/wptrunner/executors/base.py b/tools/wptrunner/wptrunner/executors/base.py
index 7119c0b..763b6fc 100644
--- a/tools/wptrunner/wptrunner/executors/base.py
+++ b/tools/wptrunner/wptrunner/executors/base.py
@@ -90,9 +90,9 @@
         """Convert a JSON result into a (TestResult, [SubtestResult]) tuple"""
         result_url, status, message, stack, subtest_results = result
         assert result_url == test.url, (f"Got results from {result_url}, expected {test.url}")
-        harness_result = test.result_cls(self.harness_codes[status], message, extra=extra, stack=stack)
+        harness_result = test.make_result(self.harness_codes[status], message, extra=extra, stack=stack)
         return (harness_result,
-                [test.subtest_result_cls(st_name, self.test_codes[st_status], st_message, st_stack)
+                [test.make_subtest_result(st_name, self.test_codes[st_status], st_message, st_stack)
                  for st_name, st_status, st_message, st_stack in subtest_results])
 
 
@@ -152,7 +152,7 @@
 def reftest_result_converter(self, test, result):
     extra = result.get("extra", {})
     _ensure_hash_in_reftest_screenshots(extra)
-    return (test.result_cls(
+    return (test.make_result(
         result["status"],
         result["message"],
         extra=extra,
@@ -165,14 +165,14 @@
     if subtest_data is None:
         subtest_data = []
 
-    harness_result = test.result_cls(*harness_data)
-    subtest_results = [test.subtest_result_cls(*item) for item in subtest_data]
+    harness_result = test.make_result(*harness_data)
+    subtest_results = [test.make_subtest_result(*item) for item in subtest_data]
 
     return (harness_result, subtest_results)
 
 
 def crashtest_result_converter(self, test, result):
-    return test.result_cls(**result), []
+    return test.make_result(**result), []
 
 
 class ExecutorException(Exception):
@@ -352,7 +352,7 @@
         if message:
             message += "\n"
         message += exception_string
-        return test.result_cls(status, message), []
+        return test.make_result(status, message), []
 
     def wait(self):
         return self.protocol.base.wait()
@@ -648,7 +648,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_wdspec(self, path, timeout):
         session_config = {"host": self.browser.host,
diff --git a/tools/wptrunner/wptrunner/executors/executorchrome.py b/tools/wptrunner/wptrunner/executors/executorchrome.py
index 7a54eb0..f564147 100644
--- a/tools/wptrunner/wptrunner/executors/executorchrome.py
+++ b/tools/wptrunner/wptrunner/executors/executorchrome.py
@@ -46,7 +46,7 @@
                         status = result["status"]
                         if status == "PASS":
                             status = "OK"
-                        harness_result = test.result_cls(status, result["message"])
+                        harness_result = test.make_result(status, result["message"])
                         # Don't report subtests.
                         return harness_result, []
                     # `crashtest` statuses are a subset of `(print-)reftest`
diff --git a/tools/wptrunner/wptrunner/executors/executorcontentshell.py b/tools/wptrunner/wptrunner/executors/executorcontentshell.py
index 5c4068c..82a6aeb 100644
--- a/tools/wptrunner/wptrunner/executors/executorcontentshell.py
+++ b/tools/wptrunner/wptrunner/executors/executorcontentshell.py
@@ -213,14 +213,14 @@
     """Converts our TimeoutError and CrashError exceptions into test results.
     """
     if isinstance(exception, TimeoutError):
-        return (test.result_cls("EXTERNAL-TIMEOUT", errors), [])
+        return (test.make_result("EXTERNAL-TIMEOUT", errors), [])
     if isinstance(exception, CrashError):
-        return (test.result_cls("CRASH", errors), [])
+        return (test.make_result("CRASH", errors), [])
     if isinstance(exception, LeakError):
         # TODO: the internal error is to force a restart, but it doesn't correctly
         # describe what the issue is. Need to find a way to return a "FAIL",
         # and restart the content_shell after the test run.
-        return (test.result_cls("INTERNAL-ERROR", errors), [])
+        return (test.make_result("INTERNAL-ERROR", errors), [])
     raise exception
 
 
@@ -312,7 +312,7 @@
                                                                timeout_for_test(self, test))
             errors = self.protocol.content_shell_errors.read_errors()
             if not text:
-                return (test.result_cls("ERROR", errors), [])
+                return (test.make_result("ERROR", errors), [])
 
             result_url, status, message, stack, subtest_results = json.loads(text)
             if result_url != test.url:
diff --git a/tools/wptrunner/wptrunner/executors/executormarionette.py b/tools/wptrunner/wptrunner/executors/executormarionette.py
index 21be5e8..a5bf61d 100644
--- a/tools/wptrunner/wptrunner/executors/executormarionette.py
+++ b/tools/wptrunner/wptrunner/executors/executormarionette.py
@@ -979,7 +979,7 @@
         if success:
             return self.convert_result(test, data, extra=extra)
 
-        return (test.result_cls(extra=extra, *data), [])
+        return (test.make_result(extra=extra, *data), [])
 
     def do_testharness(self, protocol, url, timeout):
         parent_window = protocol.testharness.close_old_windows(self.last_environment["protocol"])
@@ -1277,7 +1277,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(extra=extra, *data), [])
+        return (test.make_result(extra=extra, *data), [])
 
     def do_crashtest(self, protocol, url, timeout):
         if self.protocol.coverage.is_enabled:
diff --git a/tools/wptrunner/wptrunner/executors/executorselenium.py b/tools/wptrunner/wptrunner/executors/executorselenium.py
index 128e648..cf5ac2a 100644
--- a/tools/wptrunner/wptrunner/executors/executorselenium.py
+++ b/tools/wptrunner/wptrunner/executors/executorselenium.py
@@ -369,7 +369,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_testharness(self, protocol, url, timeout):
         format_map = {"url": strip_server(url)}
diff --git a/tools/wptrunner/wptrunner/executors/executorservo.py b/tools/wptrunner/wptrunner/executors/executorservo.py
index dd1ea1a..9c938b6 100644
--- a/tools/wptrunner/wptrunner/executors/executorservo.py
+++ b/tools/wptrunner/wptrunner/executors/executorservo.py
@@ -136,10 +136,10 @@
                     result = self.convert_result(test, self.result_data)
                 else:
                     self.proc.wait()
-                    result = (test.result_cls("CRASH", None), [])
+                    result = (test.make_result("CRASH", None), [])
                     proc_is_running = False
             else:
-                result = (test.result_cls("TIMEOUT", None), [])
+                result = (test.make_result("TIMEOUT", None), [])
 
             if proc_is_running:
                 if self.pause_after_test:
@@ -312,7 +312,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_crashtest(self, protocol, url, timeout):
         self.command = self.build_servo_command(self.test, extra_args=["-x"])
diff --git a/tools/wptrunner/wptrunner/executors/executorservodriver.py b/tools/wptrunner/wptrunner/executors/executorservodriver.py
index 0a939c5..5d7d55f 100644
--- a/tools/wptrunner/wptrunner/executors/executorservodriver.py
+++ b/tools/wptrunner/wptrunner/executors/executorservodriver.py
@@ -201,7 +201,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_testharness(self, session, url, timeout):
         session.url = url
@@ -257,15 +257,15 @@
             result = self.implementation.run_test(test)
             return self.convert_result(test, result)
         except OSError:
-            return test.result_cls("CRASH", None), []
+            return test.make_result("CRASH", None), []
         except TimeoutError:
-            return test.result_cls("TIMEOUT", None), []
+            return test.make_result("TIMEOUT", None), []
         except Exception as e:
             message = getattr(e, "message", "")
             if message:
                 message += "\n"
             message += traceback.format_exc()
-            return test.result_cls("INTERNAL-ERROR", message), []
+            return test.make_result("INTERNAL-ERROR", message), []
 
     def screenshot(self, test, viewport_size, dpi, page_ranges):
         # https://github.com/web-platform-tests/wpt/issues/7135
diff --git a/tools/wptrunner/wptrunner/executors/executorwebdriver.py b/tools/wptrunner/wptrunner/executors/executorwebdriver.py
index 05a26bc..b49b9e2 100644
--- a/tools/wptrunner/wptrunner/executors/executorwebdriver.py
+++ b/tools/wptrunner/wptrunner/executors/executorwebdriver.py
@@ -586,7 +586,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_testharness(self, protocol, url, timeout):
         # The previous test may not have closed its old windows (if something
@@ -752,7 +752,7 @@
         if success:
             return self.convert_result(test, data)
 
-        return (test.result_cls(*data), [])
+        return (test.make_result(*data), [])
 
     def do_crashtest(self, protocol, url, timeout):
         protocol.base.load(url)
diff --git a/tools/wptrunner/wptrunner/executors/executorwktr.py b/tools/wptrunner/wptrunner/executors/executorwktr.py
index b240bca..3c02fa7 100644
--- a/tools/wptrunner/wptrunner/executors/executorwktr.py
+++ b/tools/wptrunner/wptrunner/executors/executorwktr.py
@@ -172,9 +172,9 @@
     """Converts our TimeoutError and CrashError exceptions into test results.
     """
     if isinstance(exception, TimeoutError):
-        return (test.result_cls("EXTERNAL-TIMEOUT", errors), [])
+        return (test.make_result("EXTERNAL-TIMEOUT", errors), [])
     if isinstance(exception, CrashError):
-        return (test.result_cls("CRASH", errors), [])
+        return (test.make_result("CRASH", errors), [])
     raise exception
 
 
@@ -245,7 +245,7 @@
 
             errors = self.protocol.wktr_errors.read_errors()
             if not text:
-                return (test.result_cls("ERROR", errors), [])
+                return (test.make_result("ERROR", errors), [])
 
             output = None
             output_prefix = "CONSOLE MESSAGE: WPTRUNNER OUTPUT:"
@@ -255,10 +255,10 @@
                     if output is None:
                         output = line[len(output_prefix):]
                     else:
-                        return (test.result_cls("ERROR", "multiple wptrunner outputs"), [])
+                        return (test.make_result("ERROR", "multiple wptrunner outputs"), [])
 
             if output is None:
-                return (test.result_cls("ERROR", "no wptrunner output"), [])
+                return (test.make_result("ERROR", "no wptrunner output"), [])
 
             return self.convert_result(test, json.loads(output))
         except BaseException as exception:
diff --git a/tools/wptrunner/wptrunner/testrunner.py b/tools/wptrunner/wptrunner/testrunner.py
index 63f856d..28d06f8 100644
--- a/tools/wptrunner/wptrunner/testrunner.py
+++ b/tools/wptrunner/wptrunner/testrunner.py
@@ -663,9 +663,9 @@
         self.inject_message(
             "test_ended",
             test,
-            (test.result_cls("EXTERNAL-TIMEOUT",
-                             "TestRunner hit external timeout "
-                             "(this may indicate a hang)"), []),
+            (test.make_result("EXTERNAL-TIMEOUT",
+                              "TestRunner hit external timeout "
+                              "(this may indicate a hang)"), []),
         )
 
     def test_ended(self, test, results):
@@ -691,8 +691,8 @@
         for result in test_results:
             if test.disabled(result.name):
                 continue
-            expected = test.expected(result.name)
-            known_intermittent = test.known_intermittent(result.name)
+            expected = result.expected
+            known_intermittent = result.known_intermittent
             is_unexpected = expected != result.status and result.status not in known_intermittent
             is_expected_notrun = (expected == "NOTRUN" or "NOTRUN" in known_intermittent)
 
@@ -728,8 +728,8 @@
                                     stack=result.stack,
                                     subsuite=self.state.subsuite)
 
-        expected = test.expected()
-        known_intermittent = test.known_intermittent()
+        expected = file_result.expected
+        known_intermittent = file_result.known_intermittent
         status = file_result.status
 
         if self.browser.check_crash(test.id) and status != "CRASH":
diff --git a/tools/wptrunner/wptrunner/wpttest.py b/tools/wptrunner/wptrunner/wpttest.py
index 6df1c4c..2e3fd97 100644
--- a/tools/wptrunner/wptrunner/wpttest.py
+++ b/tools/wptrunner/wptrunner/wpttest.py
@@ -4,7 +4,7 @@
 import sys
 from abc import ABC
 from collections import defaultdict
-from typing import Any, ClassVar, Dict, Optional, Type
+from typing import Any, ClassVar, Dict, Optional, Set, Type
 from urllib.parse import urljoin
 
 from .wptmanifest.parser import atoms
@@ -14,6 +14,9 @@
 
 
 class Result(ABC):
+    default_expected: ClassVar[str]
+    statuses: Set[str]
+
     def __init__(self,
                  status,
                  message,
@@ -25,7 +28,7 @@
             raise ValueError("Unrecognised status %s" % status)
         self.status = status
         self.message = message
-        self.expected = expected
+        self.expected = expected if expected is not None else self.default_expected
         self.known_intermittent = known_intermittent if known_intermittent is not None else []
         self.extra = extra if extra is not None else {}
         self.stack = stack
@@ -238,6 +241,25 @@
     def __ne__(self, other):
         return not self.__eq__(other)
 
+    def make_result(self,
+                    status,
+                    message,
+                    expected=None,
+                    extra=None,
+                    stack=None,
+                    known_intermittent=None):
+        if expected is None:
+            expected = self.expected()
+            known_intermittent = self.known_intermittent()
+        return self.result_cls(status, message, expected, extra, stack, known_intermittent)
+
+    def make_subtest_result(self, name, status, message, stack=None, expected=None,
+                            known_intermittent=None):
+        if expected is None:
+            expected = self.expected(name)
+            known_intermittent = self.known_intermittent(name)
+        return self.subtest_result_cls(name, status, message, stack, expected, known_intermittent)
+
     def update_metadata(self, metadata=None):
         if metadata is None:
             metadata = {}