blob: 52bf72234006b64ebc973e46e7fca324561401cf [file] [log] [blame]
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import fnmatch
try:
import numpy
import pandas
except ImportError:
numpy = None
pandas = None
SECONDS_IN_A_DAY = 60 * 60 * 24
def ResultsKey():
"""Create a DataFrame with information about result values."""
return pandas.DataFrame.from_records([
('P', 'PASS', '-', 0.0),
('N', 'NO DATA', ' ', 0.0),
('X', 'SKIP', '~', 0.0),
('Q', 'FAIL', 'F', 1.0),
('Y', 'NOTRUN', '?', 0.1),
('L', 'FLAKY', '!', 0.5),
], columns=('code', 'result', 'char', 'badness'), index='code')
def FilterBy(df, **kwargs):
"""Filter out a data frame by applying patterns to columns.
Args:
df: A data frame to filter.
**kwargs: Remaining keyword arguments are interpreted as column=pattern
specifications. The pattern may contain shell-style wildcards, only rows
whose value in the specified column matches the pattern will be kept in
the result. If the pattern is None, no filtering is done.
Returns:
The filtered data frame (a view on the original data frame).
"""
for column, pattern in kwargs.iteritems():
if pattern is not None:
df = df[df[column].str.match(fnmatch.translate(pattern), case=False)]
return df
def CompactResults(results):
"""Aggregate results from multime runs into a single result code."""
def Compact(result):
if len(result) == 1:
return result # Test ran once; use same value.
elif all(r == 'Q' for r in result):
return 'Q' # All runs failed; test failed.
else:
return 'L' # Sometimes failed, sometimes not; test flaky.
return results.map({r: Compact(r) for r in results.unique()})
def AggregateBuilds(df, half_life):
"""Aggregate results from multiple builds of the same test configuration.
Also computes the "flakiness" of a test configuration.
Args:
df: A data frame with test results per build for a single test
configuration (i.e. fixed master, builder, test_type).
half_life: A number of days. Builds failures from these many days ago are
half as important as a build failing today.
"""
results_key = ResultsKey()
df = df.copy()
df['result'] = CompactResults(df['result'])
df['status'] = df['result'].map(results_key['char'])
df['flakiness'] = df['result'].map(results_key['badness'])
time_ago = df['timestamp'].max() - df['timestamp']
days_ago = time_ago.dt.total_seconds() / SECONDS_IN_A_DAY
df['weight'] = numpy.power(0.5, days_ago / half_life)
df['flakiness'] *= df['weight']
latest_build = df['build_number'].max()
grouped = df.groupby(['builder', 'test_suite', 'test_case'])
df = grouped['flakiness'].sum().to_frame()
df['flakiness'] *= 100 / grouped['weight'].sum()
df['build_number'] = latest_build
df['status'] = grouped['status'].agg(lambda s: s.str.cat())
return df