| #!/usr/bin/env python3 |
| |
| # Copyright 2021 The Chromium Authors |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import argparse |
| import logging |
| import os |
| import pandas as pd |
| import numpy |
| |
| from scipy import stats as scipy_stats |
| |
| |
| def get_diamond_string(diamond_count: int): |
| if diamond_count == 0: |
| return "~" |
| elif diamond_count == 1: |
| return "◆" |
| elif diamond_count == 2: |
| return "◆◆" |
| elif diamond_count == 3: |
| return "◆◆◆" |
| elif diamond_count == 4: |
| return "◆◆◆◆" |
| |
| |
| def get_diamonds_count(significance: pd.DataFrame): |
| """ |
| This function emulates the "diamond" significance representation |
| that is familiar to UMA users. |
| """ |
| |
| assert (not (significance > 1).any().any()) |
| assert (not (significance < 0).any().any()) |
| |
| # Avoid log10(0) which is undefined. |
| significance = numpy.clip(significance, 0, 0.999999) |
| |
| # scipy_stats.norm.cdf(1.96) = 0.975 and we're interested in 2 tail |
| # test. 1.96 gives a 0.05 p-value. Multiply by 2 here to correct. |
| p_value = (1 - significance) * 2 |
| |
| # floor() to avoid exaggerating results and to round. |
| # absolute() to make the result positive. |
| log_p_value = numpy.floor(numpy.absolute(numpy.log10(p_value))) |
| |
| # Clip because 4 diamond is the max no matter the p-value. |
| return numpy.clip(log_p_value, 0, 4) |
| |
| |
| def compute_mean_and_stderr(summary_path: str): |
| df = pd.read_csv(summary_path) |
| |
| # skipna because no line has as all measurements. This is because of the |
| # different sampling rates of the data sources in power_sampler |
| # and power_metrics. |
| means = df.mean(skipna=True) |
| |
| # Calculate the standard error of each column. |
| stderrs = df.std(skipna=True) / numpy.sqrt(df.count()) |
| stats = means.to_frame().join(stderrs.to_frame(), |
| lsuffix='mean', |
| rsuffix='stderr') |
| stats = stats.rename(columns={"0mean": "mean", "0stderr": "stderr"}) |
| |
| return stats |
| |
| |
| def percent_difference(first_value: pd.DataFrame, second_value: pd.DataFrame): |
| """ |
| Returns the comparative percentage difference between two |
| values/columns. |
| |
| The result is to be read as : |
| |second_value| is X% smaller/larger than |first_value|. |
| |
| Ex: percent_difference(20, 10) --> -50 |
| Ex: percent_difference(10, 50) --> 500 |
| """ |
| |
| return ((second_value - first_value) / first_value) * 100 |
| |
| |
| def compare(data_dir: str, baseline_summary: str, alternative_summary: str): |
| """Open two summary files and compare their values. Saves the results |
| in data_dir. |
| |
| Args: |
| data_dir: The directory to save the comparison csv in. |
| baseline_summary: summary.csv for the baseline. |
| alternative_summary: summary.csv for the comparison. |
| """ |
| |
| # Get names of the browsers being compared from the paths. |
| baseline_name = os.path.basename( |
| os.path.dirname(baseline_summary)).split("_")[0] |
| alternative_name = os.path.basename( |
| os.path.dirname(alternative_summary)).split("_")[0] |
| |
| all_stats = [] |
| |
| # Extract mean and std values for each column of |summary| into a new |
| # dataframe. |
| baseline_stats = compute_mean_and_stderr(baseline_summary) |
| alternative_stats = compute_mean_and_stderr(alternative_summary) |
| |
| # Join the calculated values for both browsers into a single dataframe. |
| comparison_summary = baseline_stats.join(alternative_stats, |
| lsuffix=f"_{baseline_name}", |
| rsuffix=f"_{alternative_name}") |
| |
| # Calculate the difference in percent between the baseline and comparison. |
| comparison_summary["difference"] = percent_difference( |
| baseline_stats["mean"], alternative_stats["mean"]) |
| |
| # See https://www.cliffsnotes.com/study-guides/statistics/univariate-inferential-tests/two-sample-z-test-for-comparing-two-means |
| comparison_summary["z_score"] = (baseline_stats["mean"] - |
| alternative_stats["mean"]) / numpy.sqrt( |
| pow(baseline_stats["stderr"], 2) + |
| pow(alternative_stats["stderr"], 2)) |
| |
| # See https://machinelearningmastery.com/critical-values-for-statistical-hypothesis-testing/ |
| comparison_summary["significance_level"] = scipy_stats.norm.cdf( |
| abs(comparison_summary["z_score"])) |
| |
| diamond_count = get_diamonds_count(comparison_summary["significance_level"]) |
| comparison_summary["diamonds"] = diamond_count.apply(get_diamond_string) |
| |
| # Drop results for which comparing the mean makes no sense. |
| comparison_summary = comparison_summary.drop([ |
| 'battery_max_capacity', 'battery_current_capacity', 'sample_time', |
| 'elapsed_ns' |
| ]) |
| |
| # Display and save results. |
| logging.info(comparison_summary) |
| comparison_summary.to_csv(f"{data_dir}/comparison_summary.csv") |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description='Compares two summary files for analysis.') |
| parser.add_argument("--output_dir", |
| help="Directory where to write the comparison file.", |
| required=True) |
| parser.add_argument("--baseline_dir", |
| help="Directory containing the baseline benchmark data.", |
| required=True) |
| parser.add_argument( |
| "--alternative_dir", |
| help="Directory containing the alternative benchmark data.", |
| required=True) |
| parser.add_argument('--verbose', |
| action='store_true', |
| help='Print verbose output.') |
| args = parser.parse_args() |
| |
| if args.verbose: |
| log_level = logging.DEBUG |
| else: |
| log_level = logging.INFO |
| logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level) |
| |
| baseline_summary_path = os.path.join(args.baseline_dir, "summary.csv") |
| alternative_summary_path = os.path.join(args.alternative_dir, "summary.csv") |
| summaries = [baseline_summary_path, alternative_summary_path] |
| |
| for summary in summaries: |
| if not os.path.isfile(summary): |
| logging.error(f"summary.csv missing in {summary}.") |
| sys.exit(-1) |
| |
| compare(args.output_dir, summaries[0], summaries[1]) |
| |
| |
| if __name__ == "__main__": |
| |
| # Avoid scientific notation when printing numbers. |
| pd.options.display.float_format = '{:.6f}'.format |
| |
| main() |