[wptrunner] Run larger groups first (#39613)
This change orders groups within each test type in the test queue by
decreasing total timeout for multi-process runs. This heuristic attempts
to distribute work to test runners more evenly by scheduling small
groups around larger ones.
A large group that runs near the end of the suite cannot be split, so
some runners may become idle while waiting for stragglers. For example,
[0] shows a run where there's a 1 min gap between the first and last
idle processes in the initial run.
Empirically, in Chromium CI, this change reduces total test runtime
meaningfully:
Chrome on Linux: ~120 min [1] -> ~115 min [2]
Content Shell on Linux: ~100 min [3] -> ~90 min [4]
[0]: https://chromium-swarm.appspot.com/task?id=62b302bbdd0bbf10
[1]: https://ci.chromium.org/ui/p/chromium/builders/ci/linux-wpt-fyi-rel/42930/overview
[2]: https://ci.chromium.org/ui/p/chromium/builders/try/linux-wpt-fyi-rel/2035/overview
[3]: https://ci.chromium.org/ui/p/chromium/builders/ci/linux-wpt-content-shell-fyi-rel/9981/overview
[4]: https://ci.chromium.org/ui/p/chromium/builders/try/linux-wpt-content-shell-fyi-rel/1850/overview
diff --git a/tools/wptrunner/wptrunner/testloader.py b/tools/wptrunner/wptrunner/testloader.py
index 950273c..8cd24f2 100644
--- a/tools/wptrunner/wptrunner/testloader.py
+++ b/tools/wptrunner/wptrunner/testloader.py
@@ -55,6 +55,7 @@
def __getitem__(self, key):
return self._data[key]
+
def read_include_from_file(file):
new_include = []
with open(file) as f:
@@ -66,11 +67,11 @@
new_include.append(line)
return new_include
+
def update_include_for_groups(test_groups, include):
if include is None:
# We're just running everything
return
-
new_include = []
for item in include:
if item in test_groups:
@@ -400,9 +401,29 @@
if self.logger is None:
self.logger = structured.structuredlog.StructuredLogger("TestSource")
+ @classmethod
+ def make_queue(cls, tests_by_type, **kwargs):
+ mp = mpcontext.get_context()
+ test_queue = mp.Queue()
+ groups = cls.make_groups(tests_by_type, **kwargs)
+ processes = cls.process_count(kwargs["processes"], len(groups))
+ if processes > 1:
+ groups.sort(key=lambda group: (
+ # Place groups of the same test type together to minimize
+ # browser restarts.
+ group.test_type,
+ # Next, run larger groups first to avoid straggler runners. Use
+ # timeout to give slow tests greater relative weight.
+ -sum(test.timeout for test in group.group),
+ ))
+ for item in groups:
+ test_queue.put(item)
+ cls.add_sentinal(test_queue, processes)
+ return test_queue, processes
+
@abstractmethod
#@classmethod (doesn't compose with @abstractmethod in < 3.3)
- def make_queue(cls, tests_by_type, **kwargs): # noqa: N805
+ def make_groups(cls, tests_by_type, **kwargs): # noqa: N805
pass
@abstractmethod
@@ -442,29 +463,17 @@
raise NotImplementedError
@classmethod
- def make_queue(cls, tests_by_type, **kwargs):
- mp = mpcontext.get_context()
- test_queue = mp.Queue()
- groups = []
-
- state = {}
-
+ def make_groups(cls, tests_by_type, **kwargs):
+ groups, state = [], {}
for test_type, tests in tests_by_type.items():
for test in tests:
if cls.new_group(state, test_type, test, **kwargs):
group_metadata = cls.group_metadata(state)
groups.append(TestGroup(deque(), test_type, group_metadata))
-
group, _, metadata = groups[-1]
group.append(test)
test.update_metadata(metadata)
-
- for item in groups:
- test_queue.put(item)
-
- processes = cls.process_count(kwargs["processes"], len(groups))
- cls.add_sentinal(test_queue, processes)
- return test_queue, processes
+ return groups
@classmethod
def tests_by_group(cls, tests_by_type, **kwargs):
@@ -481,10 +490,8 @@
class SingleTestSource(TestSource):
@classmethod
- def make_queue(cls, tests_by_type, **kwargs):
- mp = mpcontext.get_context()
- test_queue = mp.Queue()
- num_test_groups = 0
+ def make_groups(cls, tests_by_type, **kwargs):
+ groups = []
for test_type, tests in tests_by_type.items():
processes = kwargs["processes"]
queues = [deque([]) for _ in range(processes)]
@@ -498,12 +505,8 @@
for item in zip(queues, itertools.repeat(test_type), metadatas):
if len(item[0]) > 0:
- test_queue.put(TestGroup(*item))
- num_test_groups += 1
-
- processes = cls.process_count(kwargs["processes"], num_test_groups)
- cls.add_sentinal(test_queue, processes)
- return test_queue, processes
+ groups.append(TestGroup(*item))
+ return groups
@classmethod
def tests_by_group(cls, tests_by_type, **kwargs):
@@ -531,32 +534,21 @@
class GroupFileTestSource(TestSource):
@classmethod
- def make_queue(cls, tests_by_type, **kwargs):
- mp = mpcontext.get_context()
- test_queue = mp.Queue()
- num_test_groups = 0
-
+ def make_groups(cls, tests_by_type, **kwargs):
+ groups = []
for test_type, tests in tests_by_type.items():
tests_by_group = cls.tests_by_group({test_type: tests},
**kwargs)
-
ids_to_tests = {test.id: test for test in tests}
-
for group_name, test_ids in tests_by_group.items():
group_metadata = {"scope": group_name}
group = deque()
-
for test_id in test_ids:
test = ids_to_tests[test_id]
group.append(test)
test.update_metadata(group_metadata)
-
- test_queue.put(TestGroup(group, test_type, group_metadata))
- num_test_groups += 1
-
- processes = cls.process_count(kwargs["processes"], num_test_groups)
- cls.add_sentinal(test_queue, processes)
- return test_queue, processes
+ groups.append(TestGroup(group, test_type, group_metadata))
+ return groups
@classmethod
def tests_by_group(cls, tests_by_type, **kwargs):