Split Benchmark and Runner into separate classes

- Move Benchmark into benchmarks/base.py
- Reducing dependencies of benchmarks.* modules and only include
  the full module while typing

Change-Id: Ibd1bb1a187078fa6f76fdf637ef18fcb22eb9bc9
Reviewed-on: https://chromium-review.googlesource.com/c/crossbench/+/3904552
Reviewed-by: Patrick Thier <pthier@chromium.org>
diff --git a/crossbench/benchmarks/__init__.py b/crossbench/benchmarks/__init__.py
index f6603b0..1d1e32f 100644
--- a/crossbench/benchmarks/__init__.py
+++ b/crossbench/benchmarks/__init__.py
@@ -2,7 +2,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-from crossbench.benchmarks.jetstream import JetStream2Runner
-from crossbench.benchmarks.loading import PageLoadRunner
-from crossbench.benchmarks.motionmark import MotionMark12Runner
+from crossbench.benchmarks.base import Benchmark
+from crossbench.benchmarks.jetstream import JetStream2Benchmark
+from crossbench.benchmarks.loading import PageLoadBenchmark
+from crossbench.benchmarks.motionmark import MotionMark12Benchmark
 from crossbench.benchmarks.speedometer import Speedometer20Runner
diff --git a/crossbench/benchmarks/base.py b/crossbench/benchmarks/base.py
new file mode 100644
index 0000000..71c3668
--- /dev/null
+++ b/crossbench/benchmarks/base.py
@@ -0,0 +1,141 @@
+# Copyright 2022 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from __future__ import annotations
+
+import abc
+from typing import Iterable, Sequence, TYPE_CHECKING
+import argparse
+
+if TYPE_CHECKING:
+  import crossbench as cb
+import crossbench.stories as stories
+
+
+class Benchmark(abc.ABC):
+  NAME = None
+  DEFAULT_STORY_CLS  = None
+
+  @classmethod
+  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
+    assert cls.__doc__ and cls.__doc__, (
+    f"Benchmark class {cls} must provide a doc string.")
+    doc_title = cls.__doc__.strip().split("\n")[0]
+    parser = subparsers.add_parser(
+        cls.NAME,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        help=doc_title,
+        description=cls.__doc__.strip())
+    return parser
+
+  @classmethod
+  def describe(cls):
+    return {
+        "name": cls.NAME,
+        "description": cls.__doc__.strip(),
+        "stories": [],
+        "probes-default": {
+            probe_cls.NAME: probe_cls.__doc__.strip()
+            for probe_cls in cls.DEFAULT_STORY_CLS.PROBES
+        }
+    }
+
+  def __init__(self, stories: Sequence[cb.stories.Story]):
+    assert self.NAME is not None, f"{self} has no .NAME property"
+    self.stories = stories
+    if isinstance(stories, self.DEFAULT_STORY_CLS):
+      stories = [stories]
+    self._validate_stories()
+
+  def _validate_stories(self):
+    assert self.stories, "No stories provided"
+    first_story = self.stories[0]
+    expected_probes_cls_list = first_story.PROBES
+    for story in self.stories:
+      assert isinstance(story, self.DEFAULT_STORY_CLS), (
+          f"story={story} has not the same class as {self.DEFAULT_STORY_CLS}")
+      assert story.PROBES == expected_probes_cls_list, (
+          f"stroy={story} has different PROBES than {first_story}")
+
+
+class SubStoryBenchmark(Benchmark):
+
+  @classmethod
+  def parse_cli_stories(cls, values):
+    return tuple(story.strip() for story in values.split(","))
+
+  @classmethod
+  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
+    parser = super().add_cli_parser(subparsers)
+    parser.add_argument(
+        "--stories",
+        default="all",
+        type=cls.parse_cli_stories,
+        help="Comma-separated list of story names. Use 'all' as placeholder.")
+    is_combined_group = parser.add_mutually_exclusive_group()
+    is_combined_group.add_argument(
+        "--combined",
+        dest="separate",
+        default=False,
+        action="store_false",
+        help="Run each story in the same session. (default)")
+    is_combined_group.add_argument(
+        "--separate",
+        action="store_true",
+        help="Run each story in a fresh browser.")
+    return parser
+
+  @classmethod
+  def kwargs_from_cli(cls, args) -> dict:
+    return dict(stories=cls.stories_from_cli(args))
+
+  @classmethod
+  def stories_from_cli(cls, args) -> Iterable[cb.stories.Story]:
+    assert issubclass(cls.DEFAULT_STORY_CLS, stories.Story), (
+        f"{cls.__name__}.DEFAULT_STORY_CLS is not a Story class. "
+        f"Got '{cls.DEFAULT_STORY_CLS}' instead.")
+    return cls.DEFAULT_STORY_CLS.from_names(args.stories, args.separate)
+
+  @classmethod
+  def describe(cls) -> dict:
+    data = super().describe()
+    data["stories"] = cls.story_names()
+    return data
+
+  @classmethod
+  def story_names(cls) -> Iterable[str]:
+    return cls.DEFAULT_STORY_CLS.story_names()
+
+
+class PressBenchmark(SubStoryBenchmark):
+
+  @classmethod
+  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
+    parser = super().add_cli_parser(subparsers)
+    is_live_group = parser.add_mutually_exclusive_group()
+    is_live_group.add_argument(
+        "--live",
+        default=True,
+        action="store_true",
+        help="Use live/online benchmark url.")
+    is_live_group.add_argument(
+        "--local",
+        dest="live",
+        action="store_false",
+        help="Use locally hosted benchmark url.")
+    return parser
+
+  @classmethod
+  def stories_from_cli(cls, args) -> Iterable[cb.stories.PressBenchmarkStory]:
+    assert issubclass(cls.DEFAULT_STORY_CLS, stories.PressBenchmarkStory)
+    return cls.DEFAULT_STORY_CLS.from_names(args.stories, args.separate,
+                                            args.live)
+
+  @classmethod
+  def describe(cls) -> dict:
+    data = super().describe()
+    assert issubclass(cls.DEFAULT_STORY_CLS, stories.PressBenchmarkStory)
+    data["url"] = cls.DEFAULT_STORY_CLS.URL
+    data["url-local"] = cls.DEFAULT_STORY_CLS.URL_LOCAL
+    return data
diff --git a/crossbench/benchmarks/jetstream.py b/crossbench/benchmarks/jetstream.py
index c871d1d..c17e642 100644
--- a/crossbench/benchmarks/jetstream.py
+++ b/crossbench/benchmarks/jetstream.py
@@ -5,11 +5,18 @@
 from __future__ import annotations
 
 import pathlib
+from typing import TYPE_CHECKING
 
-import crossbench as cb
+if TYPE_CHECKING:
+  import crossbench as cb
+
+import crossbench.probes as probes
+import crossbench.stories as stories
+import crossbench.helper as helper
+import crossbench.benchmarks.base as benchmarks
 
 
-class JetStream2Probe(cb.probes.JsonResultProbe):
+class JetStream2Probe(probes.JsonResultProbe):
   """
   JetStream2-specific Probe.
   Extracts all JetStream2 times and scores.
@@ -40,7 +47,7 @@
     return data
 
   def merge_stories(self, group: cb.runner.StoriesRunGroup):
-    merged = cb.probes.json.JSONMerger.from_merged_files(
+    merged = probes.json.JSONMerger.from_merged_files(
         story_group.results[self] for story_group in group.repetitions_groups)
     merged_json_file = self.write_group_result(group, merged.to_json())
     merged_csv_file = merged_json_file.with_suffix(".csv")
@@ -50,11 +57,11 @@
   def _json_to_csv(self, merged_data, out_file):
     assert not out_file.exists()
     # "story_name" => [ metric_value_path, ...], ...
-    grouped_by_story = cb.helper.group_by(
+    grouped_by_story = helper.group_by(
         sorted(merged_data.keys(), key=lambda path: str(path).lower()),
         key=lambda path: path.parts[0])
     # ("metric_name", ...) => [ "story_name", ... ], ...
-    grouped_by_metrics = cb.helper.group_by(
+    grouped_by_metrics = helper.group_by(
         grouped_by_story.items(),
         key=lambda item: tuple(sorted(path.name for path in item[1])),
         value=lambda item: item[0])
@@ -78,7 +85,7 @@
           f.write("\n")
 
 
-class JetStream2Story(cb.stories.PressBenchmarkStory):
+class JetStream2Story(stories.PressBenchmarkStory):
   NAME = "jetstream_2"
   PROBES = (JetStream2Probe,)
   URL = "https://browserbench.org/JetStream/"
@@ -157,7 +164,7 @@
       if self._substories != self.SUBSTORIES:
         actions.wait_js_condition(("return JetStream && JetStream.benchmarks "
                                    "&& JetStream.benchmarks.length > 0;"),
-                                  cb.helper.wait_range(0.1, 10))
+                                  helper.wait_range(0.1, 10))
         actions.js(
             """
         let benchmarks = arguments[0];
@@ -168,7 +175,7 @@
       actions.wait_js_condition(
           """
         return document.querySelectorAll("#results>.benchmark").length > 0;
-      """, cb.helper.wait_range(0.5, 10))
+      """, helper.wait_range(0.5, 10))
     with run.actions("Run") as actions:
       actions.js("JetStream.start()")
       actions.wait(2 * len(self._substories))
@@ -176,10 +183,10 @@
           """
         let summaryElement = document.getElementById("result-summary");
         return (summaryElement.classList.contains("done"));
-        """, cb.helper.wait_range(1, 60 * 20))
+        """, helper.wait_range(1, 60 * 20))
 
 
-class JetStream2Runner(cb.runner.PressBenchmarkStoryRunner):
+class JetStream2Benchmark(benchmarks.PressBenchmark):
   """
   Benchmark runner for JetStream 2.
 
diff --git a/crossbench/benchmarks/loading.py b/crossbench/benchmarks/loading.py
index ac9ab79..7a8e7d3 100644
--- a/crossbench/benchmarks/loading.py
+++ b/crossbench/benchmarks/loading.py
@@ -1,18 +1,23 @@
 # Copyright 2022 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import annotations
 
 import abc
 import logging
 import pathlib
 import re
-from typing import Iterable, Optional, Sequence, Union
+from typing import Iterable, Optional, Union, TYPE_CHECKING
 from urllib.parse import urlparse
 
-import crossbench as cb
+if TYPE_CHECKING:
+  import crossbench as cb
+
+import crossbench.stories as stories
+import crossbench.benchmarks.base as benchmarks
 
 
-class Page(cb.stories.Story, metaclass=abc.ABCMeta):
+class Page(stories.Story, metaclass=abc.ABCMeta):
   pass
 
 
@@ -135,7 +140,7 @@
 PAGES = {page.name: page for page in PAGE_LIST}
 
 
-class PageLoadRunner(cb.runner.SubStoryRunner):
+class PageLoadBenchmark(benchmarks.SubStoryBenchmark):
   """
   Benchmark runner for loading pages.
 
@@ -163,14 +168,14 @@
         help="List of urls and durations to load: url,seconds,...")
     return parser
 
-  def __init__(self, out_dir: pathlib.Path,
-               browsers: Sequence[cb.browsers.Browser],
-               stories: Union[Page, Iterable[Page]], *args, **kwargs):
+  def __init__(self,
+               stories: Union[Page, Iterable[Page]],
+               duration: Optional[float] = None):
     if isinstance(stories, Page):
       stories = [stories]
-    duration: Optional[int] = kwargs.pop('duration', None)
     for story in stories:
       assert isinstance(story, Page)
       if duration is not None:
+        assert duration > 0, f"Invalid page duration={duration}s"
         story.duration = duration
-    super().__init__(out_dir, browsers, stories, *args, **kwargs)
+    super().__init__(stories)
diff --git a/crossbench/benchmarks/motionmark.py b/crossbench/benchmarks/motionmark.py
index 9cdc156..2c82d0b 100644
--- a/crossbench/benchmarks/motionmark.py
+++ b/crossbench/benchmarks/motionmark.py
@@ -2,13 +2,21 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import annotations
+
 import itertools
 import pathlib
+from typing import TYPE_CHECKING
 
-import crossbench as cb
+if TYPE_CHECKING:
+  import crossbench as cb
 
+import crossbench.probes as probes
+import crossbench.stories as stories
+import crossbench.helper as helper
+import crossbench.benchmarks.base as benchmarks
 
-class MotionMark12Probe(cb.probes.JsonResultProbe):
+class MotionMark12Probe(probes.JsonResultProbe):
   """
   MotionMark-specific Probe.
   Extracts all MotionMark times and scores.
@@ -31,14 +39,14 @@
     return True
 
   def flatten_json_data(self, json_data):
-    flat_data = cb.probes.json.flatten(*json_data)
+    flat_data = probes.json.flatten(*json_data)
     flat_data = {
         k: v for k, v in flat_data.items() if MotionMark12Probe.filter(k, v)
     }
     return flat_data
 
 
-class MotionMark12Story(cb.stories.PressBenchmarkStory):
+class MotionMark12Story(stories.PressBenchmarkStory):
   NAME = "motionmark_1.2"
   PROBES = (MotionMark12Probe,)
   URL = "https://browserbench.org/MotionMark1.2/developer.html"
@@ -143,7 +151,7 @@
       actions.navigate_to(self._url)
       actions.wait_js_condition(
           """return document.querySelector("tree > li") !== undefined""",
-          cb.helper.wait_range(0.1, 10))
+          helper.wait_range(0.1, 10))
       num_enabled = actions.js(
           """
         let benchmarks = arguments[0];
@@ -171,10 +179,10 @@
       actions.wait_js_condition(
           """
           return window.benchmarkRunnerClient.results._results != undefined
-          """, cb.helper.wait_range(5, 20 * len(self._substories)))
+          """, helper.wait_range(5, 20 * len(self._substories)))
 
 
-class MotionMark12Runner(cb.runner.PressBenchmarkStoryRunner):
+class MotionMark12Benchmark(benchmarks.PressBenchmark):
   """
   Benchmark runner for MotionMark 1.2.
 
@@ -183,10 +191,3 @@
 
   NAME = "motionmark_1.2"
   DEFAULT_STORY_CLS = MotionMark12Story
-
-  def __init__(self, *args, stories=None, **kwargs):
-    if isinstance(stories, self.DEFAULT_STORY_CLS):
-      stories = [stories]
-    for story in stories:
-      assert isinstance(story, self.DEFAULT_STORY_CLS)
-    super().__init__(*args, stories=stories, **kwargs)
diff --git a/crossbench/benchmarks/speedometer.py b/crossbench/benchmarks/speedometer.py
index d14eced..d595090 100644
--- a/crossbench/benchmarks/speedometer.py
+++ b/crossbench/benchmarks/speedometer.py
@@ -2,14 +2,23 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import annotations
+
 import argparse
 import json
 import pathlib
+from typing import TYPE_CHECKING
 
-import crossbench as cb
+if TYPE_CHECKING:
+  import crossbench as cb
+
+import crossbench.probes as probes
+import crossbench.stories as stories
+import crossbench.helper as helper
+import crossbench.benchmarks.base as benchmarks
 
 
-class Speedometer20Probe(cb.probes.JsonResultProbe):
+class Speedometer20Probe(probes.JsonResultProbe):
   """
   Speedometer2-specific probe.
   Extracts all speedometer times and scores.
@@ -24,12 +33,12 @@
   def flatten_json_data(self, json_data):
     # json_data may contain multiple iterations, merge those first
     assert isinstance(json_data, list)
-    merged = cb.probes.json.merge(
+    merged = probes.json.merge(
         *json_data, value=lambda values: values.geomean)
-    return cb.probes.json.flatten(merged)
+    return probes.json.flatten(merged)
 
   def merge_stories(self, group: cb.runner.StoriesRunGroup):
-    merged = cb.probes.json.JSONMerger.from_merged_files(
+    merged = probes.json.JSONMerger.from_merged_files(
         story_group.results[self] for story_group in group.repetitions_groups)
     merged_json_file = group.get_probe_results_file(self)
     with merged_json_file.open("w") as f:
@@ -47,7 +56,7 @@
         for k, v in merged_data.items()
     }
     # "suite_name" => (metric_value_path, ...), ...
-    grouped_by_suite = cb.helper.group_by(
+    grouped_by_suite = helper.group_by(
         sorted(merged_data.keys(), key=lambda path: str(path).lower()),
         key=lambda path: path.parts[0])
     # Sort summary metrics ("total"...) last
@@ -68,7 +77,7 @@
           f.write("\n")
 
 
-class Speedometer20Story(cb.stories.PressBenchmarkStory):
+class Speedometer20Story(stories.PressBenchmarkStory):
   NAME = "speedometer_2.0"
   PROBES = (Speedometer20Probe,)
   URL = "https://browserbench.org/Speedometer2.0/InteractiveRunner.html"
@@ -102,7 +111,7 @@
       actions.wait_js_condition(
           """
         return globalThis.Suites !== undefined;
-      """, cb.helper.wait_range(0.5, 10))
+      """, helper.wait_range(0.5, 10))
       if self._substories != self.SUBSTORIES:
         actions.js(
             """
@@ -135,11 +144,11 @@
       actions.wait(1 * len(self._substories))
       actions.wait_js_condition(
           "return globalThis.testDone",
-          cb.helper.wait_range(1, 10 +
+          helper.wait_range(1, 10 +
                                2 * len(self._substories) * self.iterations))
 
 
-class Speedometer20Runner(cb.runner.PressBenchmarkStoryRunner):
+class Speedometer20Runner(benchmarks.PressBenchmark):
   """
   Benchmark runner for Speedometer 2.0
   """
@@ -166,7 +175,7 @@
     kwargs["iterations"] = args.iterations
     return kwargs
 
-  def __init__(self, *args, stories=None, iterations=None, **kwargs):
+  def __init__(self, stories=None, iterations=None):
     if isinstance(stories, self.DEFAULT_STORY_CLS):
       stories = [stories]
     elif stories is None:
@@ -175,4 +184,4 @@
       assert isinstance(story, self.DEFAULT_STORY_CLS)
       if iterations:
         story.iterations = int(iterations)
-    super().__init__(*args, stories=stories, **kwargs)
+    super().__init__(stories)
diff --git a/crossbench/cli.py b/crossbench/cli.py
index 3d1a931..f7cb34f 100644
--- a/crossbench/cli.py
+++ b/crossbench/cli.py
@@ -223,15 +223,17 @@
 
   BENCHMARKS = (
       cb.benchmarks.Speedometer20Runner,
-      cb.benchmarks.JetStream2Runner,
-      cb.benchmarks.MotionMark12Runner,
-      cb.benchmarks.PageLoadRunner,
+      cb.benchmarks.JetStream2Benchmark,
+      cb.benchmarks.MotionMark12Benchmark,
+      cb.benchmarks.PageLoadBenchmark,
   )
 
   GENERAL_PURPOSE_PROBES_BY_NAME = {
       cls.NAME: cls for cls in cb.probes.GENERAL_PURPOSE_PROBES
   }
 
+  RUNNER_CLS =  cb.runner.Runner
+
   def __init__(self):
     self.parser = argparse.ArgumentParser()
     self._setup_parser()
@@ -270,6 +272,7 @@
 
   def _setup_benchmark_subparser(self, benchmark_cls):
     subparser = benchmark_cls.add_cli_parser(self.subparsers)
+    self.RUNNER_CLS.add_cli_parser(subparser)
     assert isinstance(subparser, argparse.ArgumentParser), (
         f"Benchmark class {benchmark_cls}.add_cli_parser did not return "
         f"an ArgumentParser: {subparser}")
@@ -335,16 +338,19 @@
       args.browser_config = BrowserConfig()
       args.browser_config.load_from_args(args)
     args.browsers = args.browser_config.variants
+
     benchmark_cls = args.benchmark_cls
-    assert issubclass(benchmark_cls, cb.runner.Runner), (
+    assert issubclass(benchmark_cls, cb.benchmarks.Benchmark), (
         f"benchmark_cls={benchmark_cls} is not subclass of Runner")
-    kwargs = benchmark_cls.kwargs_from_cli(args)
-    benchmark = benchmark_cls(**kwargs)
+    benchmark = benchmark_cls(**benchmark_cls.kwargs_from_cli(args))
+
+    runner_kwargs = self.RUNNER_CLS.kwargs_from_cli(args)
+    runner  = self.RUNNER_CLS(benchmark=benchmark, **runner_kwargs)
     for probe_name in args.probe:
       probe = self.GENERAL_PURPOSE_PROBES_BY_NAME[probe_name]()
-      benchmark.attach_probe(probe, matching_browser_only=True)
-    benchmark.run(is_dry_run=args.dry_run)
-    results_json = benchmark.out_dir / "results.json"
+      runner.attach_probe(probe, matching_browser_only=True)
+    runner.run(is_dry_run=args.dry_run)
+    results_json = runner.out_dir / "results.json"
     print(f"RESULTS: {results_json}")
 
   def run(self, argv):
diff --git a/crossbench/runner.py b/crossbench/runner.py
index 3b81cc7..0f3a0cc 100644
--- a/crossbench/runner.py
+++ b/crossbench/runner.py
@@ -208,24 +208,7 @@
     return [dict(title=str(e), trace=str(tb)) for tb, e in self._exceptions]
 
 
-class Benchmark:
-  pass
-
-
-class Runner(abc.ABC):
-  NAME = None
-  DEFAULT_STORY_CLS = None
-
-  # @property
-  # @classmethod
-  # def NAME(self) -> str:
-  #   pass
-
-  # @property
-  # @classmethod
-  # def DEFAULT_STORY_CLS(self) -> Type[cb.stories.Story]:
-  #   pass
-
+class Runner:
   @staticmethod
   def get_out_dir(cwd, suffix="", test=False) -> pathlib.Path:
     if test:
@@ -236,15 +219,7 @@
             f"{dt.datetime.now().strftime('%Y-%m-%d_%H%M%S')}{suffix}")
 
   @classmethod
-  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
-    assert cls.__doc__ and cls.__doc__, (
-        f"Benchmark class {cls} must provide a doc string.")
-    doc_title = cls.__doc__.strip().split("\n")[0]
-    parser = subparsers.add_parser(
-        cls.NAME,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        help=doc_title,
-        description=cls.__doc__.strip())
+  def add_cli_parser(cls, parser) -> argparse.ArgumentParser:
     parser.add_argument(
         "--repeat",
         default=1,
@@ -274,7 +249,7 @@
   @classmethod
   def kwargs_from_cli(cls, args):
     if args.out_dir is None:
-      label = args.label or cls.NAME
+      label = args.label or args.benchmark_cls.NAME
       cli_dir = pathlib.Path(__file__).parent.parent
       args.out_dir = cls.get_out_dir(cli_dir, label)
     return dict(
@@ -284,29 +259,17 @@
         use_checklist=args.use_checklist,
         throw=args.throw)
 
-  @classmethod
-  def describe(cls):
-    return {
-        "name": cls.NAME,
-        "description": cls.__doc__.strip(),
-        "stories": [],
-        "probes-default": {
-            probe_cls.NAME: probe_cls.__doc__.strip()
-            for probe_cls in cls.DEFAULT_STORY_CLS.PROBES
-        }
-    }
 
   def __init__(self,
                out_dir: pathlib.Path,
                browsers: Sequence[cb.browsers.Browser],
-               stories: Sequence[cb.stories.Story],
-               probes: Sequence[cb.probes.Probe] = (),
+               benchmark: cb.benchmarks.Benchmark,
+               additional_probes: Iterable[cb.probes.Probe] = (),
                platform: cb.helper.Platform = cb.helper.platform,
                throttle=True,
                repetitions=1,
                use_checklist=True,
                throw=False):
-    assert self.NAME is not None, f"{self} has no .NAME property"
     self.out_dir = out_dir
     assert not self.out_dir.exists(), f"out_dir={self.out_dir} exists already"
     self.out_dir.mkdir(parents=True)
@@ -314,10 +277,7 @@
     self.browsers = browsers
     self._validate_browsers()
     self._browser_platform = browsers[0].platform
-    self.stories = stories
-    assert stories, "No stories provided"
-    if isinstance(stories, self.DEFAULT_STORY_CLS):
-      stories = [stories]
+    self.stories = benchmark.stories
     self.repetitions = repetitions
     assert self.repetitions > 0, f"Invalid repetitions={self.repetitions}"
     self.throttle = throttle
@@ -326,18 +286,11 @@
     self._runs = []
     self._exceptions = ExceptionHandler(throw)
     self._platform = platform
-    self._attach_default_probes(probes)
+    self._attach_default_probes(additional_probes)
     self._validate_stories()
 
   def _validate_stories(self):
-    first_story = self.stories[0]
-    expected_probes_cls_list = first_story.PROBES
-    for story in self.stories:
-      assert isinstance(story, self.DEFAULT_STORY_CLS), (
-          f"story={story} has not the same class as {self.DEFAULT_STORY_CLS}")
-      assert story.PROBES == expected_probes_cls_list, (
-          f"stroy={story} has different PROBES than {first_story}")
-    for probe_cls in expected_probes_cls_list:
+    for probe_cls in self.stories[0].PROBES:
       assert inspect.isclass(probe_cls), (
           f"Story.PROBES must contain classes only, but got {type(probe_cls)}")
       self.attach_probe(probe_cls())
@@ -424,7 +377,6 @@
     self.collect_hardware_details()
 
   def get_runs(self) -> Iterable[Run]:
-    """Extension point for subclasses."""
     for iteration in range(self.repetitions):
       for story in self.stories:
         for browser in self.browsers:
@@ -484,89 +436,6 @@
       logging.info("COOLDOWN: still hot, waiting some more")
 
 
-class SubStoryRunner(Runner):
-
-  @classmethod
-  def parse_cli_stories(cls, values):
-    return tuple(story.strip() for story in values.split(","))
-
-  @classmethod
-  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
-    parser = super().add_cli_parser(subparsers)
-    parser.add_argument(
-        "--stories",
-        default="all",
-        type=cls.parse_cli_stories,
-        help="Comma-separated list of story names. Use 'all' as placeholder.")
-    is_combined_group = parser.add_mutually_exclusive_group()
-    is_combined_group.add_argument(
-        "--combined",
-        dest="separate",
-        default=False,
-        action="store_false",
-        help="Run each story in the same session. (default)")
-    is_combined_group.add_argument(
-        "--separate",
-        action="store_true",
-        help="Run each story in a fresh browser.")
-    return parser
-
-  @classmethod
-  def kwargs_from_cli(cls, args) -> dict:
-    kwargs = super().kwargs_from_cli(args)
-    kwargs["stories"] = cls.stories_from_cli(args)
-    return kwargs
-
-  @classmethod
-  def stories_from_cli(cls, args) -> Iterable[cb.stories.Story]:
-    assert issubclass(cls.DEFAULT_STORY_CLS, cb.stories.Story), (
-        f"{cls.__name__}.DEFAULT_STORY_CLS is not a Story class. "
-        f"Got '{cls.DEFAULT_STORY_CLS}' instead.")
-    return cls.DEFAULT_STORY_CLS.from_names(args.stories, args.separate)
-
-  @classmethod
-  def describe(cls) -> dict:
-    data = super().describe()
-    data["stories"] = cls.story_names()
-    return data
-
-  @classmethod
-  def story_names(cls) -> Iterable[str]:
-    return cls.DEFAULT_STORY_CLS.story_names()
-
-
-class PressBenchmarkStoryRunner(SubStoryRunner):
-
-  @classmethod
-  def add_cli_parser(cls, subparsers) -> argparse.ArgumentParser:
-    parser = super().add_cli_parser(subparsers)
-    is_live_group = parser.add_mutually_exclusive_group()
-    is_live_group.add_argument(
-        "--live",
-        default=True,
-        action="store_true",
-        help="Use live/online benchmark url.")
-    is_live_group.add_argument(
-        "--local",
-        dest="live",
-        action="store_false",
-        help="Use locally hosted benchmark url.")
-    return parser
-
-  @classmethod
-  def stories_from_cli(cls, args) -> Iterable[cb.stories.PressBenchmarkStory]:
-    assert issubclass(cls.DEFAULT_STORY_CLS, cb.stories.PressBenchmarkStory)
-    return cls.DEFAULT_STORY_CLS.from_names(args.stories, args.separate,
-                                            args.live)
-
-  @classmethod
-  def describe(cls) -> dict:
-    data = super().describe()
-    assert issubclass(cls.DEFAULT_STORY_CLS, cb.stories.PressBenchmarkStory)
-    data["url"] = cls.DEFAULT_STORY_CLS.URL
-    data["url-local"] = cls.DEFAULT_STORY_CLS.URL_LOCAL
-    return data
-
 
 class RunGroup:
 
diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py
index 72cde4a..ec02634 100644
--- a/tests/test_benchmarks.py
+++ b/tests/test_benchmarks.py
@@ -29,8 +29,8 @@
     ]
 
 
-class TestPageLoadRunner(BaseRunnerTest):
-  BENCHMARK = bm.loading.PageLoadRunner
+class TestPageLoadBenchmark(BaseRunnerTest):
+  BENCHMARK = bm.loading.PageLoadBenchmark
 
   def test_default_stories(self):
     stories = bm.loading.LivePage.from_names(["all"])
@@ -76,10 +76,12 @@
 
   def test_run(self):
     stories = bm.loading.PAGE_LIST
-    runner = self.BENCHMARK(
+    benchmark = self.BENCHMARK(stories)
+    self.assertTrue(len(benchmark.describe()) > 0)
+    runner = cb.runner.Runner(
         self.out_dir,
         self.browsers,
-        stories,
+        benchmark,
         use_checklist=False,
         platform=self.platform)
     runner.run()
@@ -92,7 +94,7 @@
 
 
 class JetStream2Test(BaseRunnerTest):
-  BENCHMARK = bm.jetstream.JetStream2Runner
+  BENCHMARK = bm.jetstream.JetStream2Benchmark
 
   def test_run(self):
     stories = bm.jetstream.JetStream2Story.from_names(['WSL'])
@@ -110,10 +112,12 @@
           jetstream_probe_results,
       ]
     repetitions = 3
-    runner = self.BENCHMARK(
+    benchmark = self.BENCHMARK(stories)
+    self.assertTrue(len(benchmark.describe()) > 0)
+    runner = cb.runner.Runner(
         self.out_dir,
         self.browsers,
-        stories,
+        benchmark,
         use_checklist=False,
         platform=self.platform,
         repetitions=repetitions)