[wptrunner] Run larger groups first (#39613)

This change orders groups within each test type in the test queue by
decreasing total timeout for multi-process runs. This heuristic attempts
to distribute work to test runners more evenly by scheduling small
groups around larger ones.

A large group that runs near the end of the suite cannot be split, so
some runners may become idle while waiting for stragglers. For example,
[0] shows a run where there's a 1 min gap between the first and last
idle processes in the initial run.

Empirically, in Chromium CI, this change reduces total test runtime
meaningfully:
  Chrome on Linux: ~120 min [1] -> ~115 min [2]
  Content Shell on Linux: ~100 min [3] -> ~90 min [4]

[0]: https://chromium-swarm.appspot.com/task?id=62b302bbdd0bbf10
[1]: https://ci.chromium.org/ui/p/chromium/builders/ci/linux-wpt-fyi-rel/42930/overview
[2]: https://ci.chromium.org/ui/p/chromium/builders/try/linux-wpt-fyi-rel/2035/overview
[3]: https://ci.chromium.org/ui/p/chromium/builders/ci/linux-wpt-content-shell-fyi-rel/9981/overview
[4]: https://ci.chromium.org/ui/p/chromium/builders/try/linux-wpt-content-shell-fyi-rel/1850/overview
diff --git a/tools/wptrunner/wptrunner/testloader.py b/tools/wptrunner/wptrunner/testloader.py
index 950273c..8cd24f2 100644
--- a/tools/wptrunner/wptrunner/testloader.py
+++ b/tools/wptrunner/wptrunner/testloader.py
@@ -55,6 +55,7 @@
     def __getitem__(self, key):
         return self._data[key]
 
+
 def read_include_from_file(file):
     new_include = []
     with open(file) as f:
@@ -66,11 +67,11 @@
                 new_include.append(line)
     return new_include
 
+
 def update_include_for_groups(test_groups, include):
     if include is None:
         # We're just running everything
         return
-
     new_include = []
     for item in include:
         if item in test_groups:
@@ -400,9 +401,29 @@
         if self.logger is None:
             self.logger = structured.structuredlog.StructuredLogger("TestSource")
 
+    @classmethod
+    def make_queue(cls, tests_by_type, **kwargs):
+        mp = mpcontext.get_context()
+        test_queue = mp.Queue()
+        groups = cls.make_groups(tests_by_type, **kwargs)
+        processes = cls.process_count(kwargs["processes"], len(groups))
+        if processes > 1:
+            groups.sort(key=lambda group: (
+                # Place groups of the same test type together to minimize
+                # browser restarts.
+                group.test_type,
+                # Next, run larger groups first to avoid straggler runners. Use
+                # timeout to give slow tests greater relative weight.
+                -sum(test.timeout for test in group.group),
+            ))
+        for item in groups:
+            test_queue.put(item)
+        cls.add_sentinal(test_queue, processes)
+        return test_queue, processes
+
     @abstractmethod
     #@classmethod (doesn't compose with @abstractmethod in < 3.3)
-    def make_queue(cls, tests_by_type, **kwargs):  # noqa: N805
+    def make_groups(cls, tests_by_type, **kwargs):  # noqa: N805
         pass
 
     @abstractmethod
@@ -442,29 +463,17 @@
         raise NotImplementedError
 
     @classmethod
-    def make_queue(cls, tests_by_type, **kwargs):
-        mp = mpcontext.get_context()
-        test_queue = mp.Queue()
-        groups = []
-
-        state = {}
-
+    def make_groups(cls, tests_by_type, **kwargs):
+        groups, state = [], {}
         for test_type, tests in tests_by_type.items():
             for test in tests:
                 if cls.new_group(state, test_type, test, **kwargs):
                     group_metadata = cls.group_metadata(state)
                     groups.append(TestGroup(deque(), test_type, group_metadata))
-
                 group, _, metadata = groups[-1]
                 group.append(test)
                 test.update_metadata(metadata)
-
-        for item in groups:
-            test_queue.put(item)
-
-        processes = cls.process_count(kwargs["processes"], len(groups))
-        cls.add_sentinal(test_queue, processes)
-        return test_queue, processes
+        return groups
 
     @classmethod
     def tests_by_group(cls, tests_by_type, **kwargs):
@@ -481,10 +490,8 @@
 
 class SingleTestSource(TestSource):
     @classmethod
-    def make_queue(cls, tests_by_type, **kwargs):
-        mp = mpcontext.get_context()
-        test_queue = mp.Queue()
-        num_test_groups = 0
+    def make_groups(cls, tests_by_type, **kwargs):
+        groups = []
         for test_type, tests in tests_by_type.items():
             processes = kwargs["processes"]
             queues = [deque([]) for _ in range(processes)]
@@ -498,12 +505,8 @@
 
             for item in zip(queues, itertools.repeat(test_type), metadatas):
                 if len(item[0]) > 0:
-                    test_queue.put(TestGroup(*item))
-                    num_test_groups += 1
-
-        processes = cls.process_count(kwargs["processes"], num_test_groups)
-        cls.add_sentinal(test_queue, processes)
-        return test_queue, processes
+                    groups.append(TestGroup(*item))
+        return groups
 
     @classmethod
     def tests_by_group(cls, tests_by_type, **kwargs):
@@ -531,32 +534,21 @@
 
 class GroupFileTestSource(TestSource):
     @classmethod
-    def make_queue(cls, tests_by_type, **kwargs):
-        mp = mpcontext.get_context()
-        test_queue = mp.Queue()
-        num_test_groups = 0
-
+    def make_groups(cls, tests_by_type, **kwargs):
+        groups = []
         for test_type, tests in tests_by_type.items():
             tests_by_group = cls.tests_by_group({test_type: tests},
                                                 **kwargs)
-
             ids_to_tests = {test.id: test for test in tests}
-
             for group_name, test_ids in tests_by_group.items():
                 group_metadata = {"scope": group_name}
                 group = deque()
-
                 for test_id in test_ids:
                     test = ids_to_tests[test_id]
                     group.append(test)
                     test.update_metadata(group_metadata)
-
-                test_queue.put(TestGroup(group, test_type, group_metadata))
-                num_test_groups += 1
-
-        processes = cls.process_count(kwargs["processes"], num_test_groups)
-        cls.add_sentinal(test_queue, processes)
-        return test_queue, processes
+                groups.append(TestGroup(group, test_type, group_metadata))
+        return groups
 
     @classmethod
     def tests_by_group(cls, tests_by_type, **kwargs):