blob: 15ff117ff6633baa9328ffc88f339598e928ec4b [file] [log] [blame]
# Copyright 2020 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import unittest
# Add src/testing/ into sys.path for importing representative perf test script.
PERF_TEST_SCRIPTS_DIR = os.path.join(
os.path.dirname(__file__), '..', '..', 'testing', 'scripts')
sys.path.append(PERF_TEST_SCRIPTS_DIR)
import run_rendering_benchmark_with_gated_performance as perf_tests # pylint: disable=wrong-import-position,import-error
BENCHMARK = 'rendering.desktop'
UPPER_LIMIT_DATA_SAMPLE = {
'story_1': {
'ci_095': 10,
'avg': 20,
'cpu_wall_time_ratio': 0.4,
},
'story_2': {
'ci_095': 10,
'avg': 16,
'cpu_wall_time_ratio': 0.3,
},
'story_3': {
'ci_095': 10,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_4': {
'ci_095': 10,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
'control': True,
},
'story_5': {
'ci_095': 20,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_6': {
'ci_095': 20,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
},
'story_7': {
'ci_095': 20,
'avg': 10,
'cpu_wall_time_ratio': 0.5,
'experimental': True,
},
}
def create_sample_input(record_list):
# Coverts an array of arrays in to an array of dicts with keys of
# stories, name, avg, count, ci_095 for the unittests.
keys = ['stories', 'name', 'avg', 'count', 'ci_095']
result = []
for row in record_list:
result.append(dict(zip(keys, row)))
return result
def create_sample_perf_results(passed_stories, failed_stories, benchmark):
perf_results = {
'tests': {},
'num_failures_by_type': {
'FAIL': len(failed_stories),
'PASS': len(passed_stories)
}
}
perf_results['tests'][benchmark] = {}
for story in passed_stories:
perf_results['tests'][benchmark][story] = {
'actual': 'PASS',
'is_unexpected': False,
'expected': 'PASS'
}
for story in failed_stories:
perf_results['tests'][benchmark][story] = {
'actual': 'FAIL',
'is_unexpected': True,
'expected': 'PASS'
}
return perf_results
def perf_test_initializer():
perf_test = perf_tests.RenderingRepresentativePerfTest(True)
perf_test.benchmark = BENCHMARK
perf_test.upper_limit_data = UPPER_LIMIT_DATA_SAMPLE
perf_test.set_platform_specific_attributes()
return perf_test
class TestRepresentativePerfScript(unittest.TestCase):
def test_parse_csv_results(self):
csv_obj = create_sample_input([
['story_1', 'frame_times', 16, 10, 1.5],
['story_1', 'cpu_wall_time_ratio', 0.5, 1, 1],
['story_2', 'latency', 10, 8, 4], # Record for a different metric.
['story_3', 'frame_times', 8, 20, 2],
['story_3', 'frame_times', 7, 20, 15],
['story_3', 'frame_times', 12, 20, 16],
['story_3', 'cpu_wall_time_ratio', 0.3, 1, 1],
['story_3', 'cpu_wall_time_ratio', 0.7, 1, 1],
['story_3', 'cpu_wall_time_ratio', '', 0, 1],
['story_4', 'frame_times', '', 10, 1], # Record with no avg.
['story_5', 'frame_times', 12, 0, 3], # Record with count of 0.
['story_6', 'frame_times', 12, 40, 40], # High noise record.
['story_8', 'frame_times', 12, 40, 4],
])
perf_test = perf_test_initializer()
values_per_story = perf_test.parse_csv_results(csv_obj)
# Existing Frame_times stories in upper_limits should be listed.
# All stories but story_2 & story_8.
self.assertEquals(len(values_per_story), 5)
self.assertEquals(values_per_story['story_1']['averages'], [16.0])
self.assertEquals(values_per_story['story_1']['ci_095'], [1.5])
self.assertEquals(values_per_story['story_1']['cpu_wall_time_ratio'], [0.5])
# Record with avg 12 has high noise.
self.assertEquals(values_per_story['story_3']['averages'], [8.0, 7.0, 12.0])
self.assertEquals(values_per_story['story_3']['ci_095'], [2.0, 15.0, 16.0])
self.assertEquals(values_per_story['story_3']['cpu_wall_time_ratio'],
[0.3, 0.7])
self.assertEquals(len(values_per_story['story_4']['averages']), 0)
self.assertEquals(len(values_per_story['story_4']['ci_095']), 0)
self.assertEquals(len(values_per_story['story_5']['averages']), 0)
self.assertEquals(len(values_per_story['story_5']['ci_095']), 0)
self.assertEquals(values_per_story['story_6']['averages'], [12.0])
self.assertEquals(values_per_story['story_6']['ci_095'], [40.0])
def test_compare_values_1(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.5, 0.52, 0.57]
},
'story_2': {
'averages': [16.0, 17.0, 22.0],
'ci_095': [1.0, 1.4, 1.2],
'cpu_wall_time_ratio': [0.3, 0.3, 0.3]
},
'story_3': {
'averages': [20.0, 15.0, 22.0],
'ci_095': [1.0, 0.8, 1.2],
'cpu_wall_time_ratio': [0.5, 0.5, 0.49]
}
}
sample_perf_results = create_sample_perf_results(
['story_1', 'story_2', 'story_3'], [], BENCHMARK)
rerun = False
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 3)
# The failure for story_3 is invalidated (low cpu_wall_time_ratio)
self.assertEquals(result_recorder.failed_stories, set(['story_2']))
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 1)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 1)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_2']['actual'], 'FAIL')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
def test_compare_values_2(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42],
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [14, 16, 12],
'cpu_wall_time_ratio': [0.5, 0.52],
},
'story_4': { # All runs have high noise.
'averages': [],
'ci_095': [16, 17, 18],
'cpu_wall_time_ratio': [],
},
'story_5': { # No recorded values.
'averages': [],
'ci_095': [],
'cpu_wall_time_ratio': [],
}
}
sample_perf_results = create_sample_perf_results(
['story_1', 'story_3', 'story_4', 'story_5'], ['story_2'], BENCHMARK)
rerun = True
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 1)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 5)
self.assertEquals(result_recorder.failed_stories,
set(['story_3', 'story_4', 'story_5']))
self.assertTrue(result_recorder.is_control_stories_noisy)
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 1)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 1)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_2']['actual'], 'FAIL')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_4']['actual'], 'PASS')
# Invalidating failure as a result of noisy control test
def test_compare_values_3(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42],
},
'story_3': { # Two of the runs have acceptable CI but high averages.
'averages': [10, 13],
'ci_095': [14, 16, 12],
'cpu_wall_time_ratio': [0.5, 0.52],
},
'story_4': { # All runs have high noise.
'averages': [],
'ci_095': [16, 17, 18],
'cpu_wall_time_ratio': [],
},
'story_5': { # No recorded values.
'averages': [],
'ci_095': [],
'cpu_wall_time_ratio': [],
}
}
sample_perf_results = create_sample_perf_results(
['story_1', 'story_3', 'story_4', 'story_5'], [], BENCHMARK)
rerun = True
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 0)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 4)
self.assertEquals(result_recorder.failed_stories,
set(['story_3', 'story_4', 'story_5']))
self.assertTrue(result_recorder.is_control_stories_noisy)
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_3']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_4']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_5']['actual'], 'PASS')
self.assertEquals(
output['tests'][BENCHMARK]['story_3']['invalidation_reason'],
'Noisy control test')
self.assertEquals(
output['tests'][BENCHMARK]['story_4']['invalidation_reason'],
'Noisy control test')
self.assertEquals(
output['tests'][BENCHMARK]['story_5']['invalidation_reason'],
'Noisy control test')
# Experimental stories should not fail the test
def test_compare_values_4(self):
values_per_story = {
'story_1': {
'averages': [16.0, 17.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.45, 0.42, 0.44],
},
'story_7':
{ # Experimental story with higher value than the upper limit.
'averages': [20, 26],
'ci_095': [14, 16],
'cpu_wall_time_ratio': [0.45, 0.42, 0.44],
}
}
sample_perf_results = create_sample_perf_results(['story_1', 'story_7'], [],
BENCHMARK)
rerun = False
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 0)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 2)
self.assertEquals(result_recorder.failed_stories, set([]))
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(output['tests'][BENCHMARK]['story_7']['actual'], 'PASS')
# Low cpu_wall_time_ratio invalidates the failure
def test_compare_values_5(self):
values_per_story = {
'story_1': {
'averages': [26.0, 27.0, 21.0],
'ci_095': [2.0, 15.0, 16.0],
'cpu_wall_time_ratio': [0.35, 0.42, 0.34],
# Higher avg than upper limit with low Cpu_wall_time_ratio
}
}
sample_perf_results = create_sample_perf_results(['story_1'], [], BENCHMARK)
rerun = False
perf_test = perf_test_initializer()
perf_test.result_recorder[rerun].set_tests(sample_perf_results)
self.assertEquals(perf_test.result_recorder[rerun].fails, 0)
perf_test.compare_values(values_per_story, rerun)
result_recorder = perf_test.result_recorder[rerun]
self.assertEquals(result_recorder.tests, 1)
self.assertEquals(result_recorder.failed_stories, set([]))
result_recorder.invalidate_failures(BENCHMARK)
(output, overall_return_code) = result_recorder.get_output(0)
self.assertEquals(overall_return_code, 0)
self.assertEquals(output['num_failures_by_type'].get('FAIL', 0), 0)
self.assertEquals(output['tests'][BENCHMARK]['story_1']['actual'], 'PASS')
self.assertEquals(
output['tests'][BENCHMARK]['story_1']['invalidation_reason'],
'Low cpu_wall_time_ratio')