[commands/test] Add warnings for long-running tests

Add output to './recipes.py test train' and './recipes.py test run' that
shows any tests that took longer than 10s to complete. Test duration
depends on many things and may not perfectly correlate to how much work
was done in the test, but with this relatively high threshold it should
clearly indicate tests that are probably doing something wrong.

Change-Id: I4b01256ce6b65e36689132658165f0469b3a6657
Reviewed-on: https://chromium-review.googlesource.com/c/infra/luci/recipes-py/+/5465200
Reviewed-by: Benjamin Shai <bshai@google.com>
Reviewed-by: Chan Li <chanli@chromium.org>
Commit-Queue: Rob Mohr <mohrr@google.com>
diff --git a/recipe_engine/internal/commands/test/__init__.py b/recipe_engine/internal/commands/test/__init__.py
index 569c85b..3578d3c 100644
--- a/recipe_engine/internal/commands/test/__init__.py
+++ b/recipe_engine/internal/commands/test/__init__.py
@@ -95,6 +95,10 @@
       '--show-warnings',
       action='store_true', default=False, dest='show_warnings',
       help='Show detailed warnings even on test failures.')
+  run_p.add_argument(
+      '--show-durations',
+      action='store_true', default=False, dest='show_durations',
+      help='Show long-running tests even on test failures.')
 
   helpstr = 'Re-train recipe expectations.'
   train_p = subp.add_parser(
@@ -128,6 +132,10 @@
       '--show-warnings',
       action='store_true', default=False, dest='show_warnings',
       help='Show detailed warnings even on test failures.')
+  train_p.add_argument(
+      '--show-durations',
+      action='store_true', default=False, dest='show_durations',
+      help='Show long-running tests even on test failures.')
 
   helpstr = 'Print all test names.'
   list_p = subp.add_parser(
diff --git a/recipe_engine/internal/commands/test/report.py b/recipe_engine/internal/commands/test/report.py
index 69e7cda..7f5375e 100644
--- a/recipe_engine/internal/commands/test/report.py
+++ b/recipe_engine/internal/commands/test/report.py
@@ -5,11 +5,14 @@
 
 """Internal helpers for reporting test status to stdout."""
 
+from __future__ import annotations
+
 import collections
 import datetime
 import logging
 import os
 import sys
+from typing import TYPE_CHECKING
 
 from collections import defaultdict
 from io import StringIO
@@ -21,6 +24,9 @@
 from .fail_tracker import FailTracker
 from ...warn.cause import CallSite, ImportSite
 
+if TYPE_CHECKING:
+  import PB.recipe_engine.internal.test.runner as runner_pb2
+
 
 @attr.s
 class Reporter:
@@ -31,6 +37,8 @@
   _fail_tracker = attr.ib()
   # If set, will print warning details (even if there are other fatal failures)
   _enable_warning_details = attr.ib()
+  # If set, will print duration details (even if there are fatal failures).
+  _enable_duration_details = attr.ib()
 
   _column_count = attr.ib(default=0)
   _error_buf = attr.ib(factory=StringIO)
@@ -196,6 +204,20 @@
         print('Fix test failures or pass --show-warnings for details.')
       print()
 
+    duration_result = _collect_duration_result(outcome_msg)
+    if duration_result:
+      print('------')
+      if len(duration_result) == 1:
+        print('Found 1 long-running test')
+      else:
+        print(f'Found {len(duration_result)} long-running tests')
+      print()
+      if self._enable_duration_details or not fail:
+        _print_durations(duration_result, self._enable_duration_details)
+      else:
+        print('Fix test failures or pass --show-durations for details.')
+      print()
+
     status_warning_result = _collect_global_warnings_result(outcome_msg)
     if status_warning_result:
       print('------')
@@ -427,3 +449,40 @@
     print_bug_links(definition)
     print_call_sites(causes.call_sites)
     print_import_sites(causes.import_sites)
+
+
+def _collect_duration_result(
+    outcome_msg: runner_pb2.Outcome) -> dict[str, datetime.timedelta]:
+  """Collects durations from all test outcomes saves the long ones."""
+  result = defaultdict(PerWarningResult)
+  for name, test_result in outcome_msg.test_results.items():
+    duration = datetime.timedelta(
+        milliseconds=test_result.duration.ToMilliseconds()
+    )
+    if duration >= datetime.timedelta(seconds=10):
+      result[name] = duration
+
+  return result
+
+
+SOFT_MAX_DURATIONS = 8
+HARD_MAX_DURATIONS = 12
+
+
+def _print_durations(duration_result: dict[str, datetime.timedelta],
+                     full: bool):
+  durations = list(duration_result.items())
+  durations.sort(key=lambda x: (x[1], x[0]))
+
+  # Don't show "x long-running tests hidden" when x is a very small number.
+  # In that case, just show the tests.
+  exit_early = True
+  if full or len(durations) <= HARD_MAX_DURATIONS:
+    exit_early = False
+
+  for i, (name, duration) in enumerate(reversed(durations)):
+    if exit_early and i >= SOFT_MAX_DURATIONS:
+      print(f'{len(durations)-i} long-running tests hidden, use '
+            '--show-durations to show all')
+      break
+    print(f'{duration.total_seconds():8.3f}s  {name}')
diff --git a/recipe_engine/internal/commands/test/run_train.py b/recipe_engine/internal/commands/test/run_train.py
index 023bcdb..d6dc03c 100644
--- a/recipe_engine/internal/commands/test/run_train.py
+++ b/recipe_engine/internal/commands/test/run_train.py
@@ -109,7 +109,7 @@
 
 
 def _run(test_results, recipe_deps, use_emoji, test_filter, is_train,
-         stop, jobs, show_warnings):
+         stop, jobs, show_warnings, show_durations):
   """Run tests in py3 subprocess pools.
   """
   main_repo = recipe_deps.main_repo
@@ -132,7 +132,7 @@
 
   fail_tracker = FailTracker(recipe_deps.previous_test_failures_path)
   reporter = report.Reporter(recipe_deps, use_emoji, is_train, fail_tracker,
-                             show_warnings)
+                             show_warnings, show_durations)
 
   cov_dir = None
   total_cov = coverage.Coverage(config_file=False, data_file='.total_coverage',
@@ -238,7 +238,7 @@
   repo = args.recipe_deps.main_repo
   try:
     _run(ret, args.recipe_deps, args.use_emoji, args.test_filter, is_train,
-         args.stop, args.jobs, args.show_warnings)
+         args.stop, args.jobs, args.show_warnings, args.show_durations)
     _dump()
   except KeyboardInterrupt:
     args.docs = False  # skip docs