| // Copyright 2024 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // A utility to compare two runs of a style perftest, in the perftest |
| // output format, and compute confidence intervals. The simplest way |
| // to get the right information is to build a binary before and after |
| // changes and then run it continuously every-other for a while until |
| // you feel you have enough data: |
| // |
| // rm -f old.txt new.txt; \ |
| // while :; do |
| // taskset -c 2,4,6,8 ./out/Release/blink_perf___old \ |
| // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a old.txt; |
| // taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \ |
| // --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a new.txt; |
| // done |
| // |
| // and then run ./out/Release/compare_blink_perf old.txt new.txt. |
| // (Possibly under watch -n 1 if you want to look at data as it |
| // comes in, though beware of p-hacking.) |
| // |
| // TODO(sesse): Consider whether we should remove the first few |
| // runs, as they are frequently outliers. |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include <algorithm> |
| #include <string> |
| #include <unordered_map> |
| #include <utility> |
| #include <vector> |
| |
| #include "base/rand_util.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "testing/perf/confidence/ratio_bootstrap_estimator.h" |
| |
| #ifdef UNSAFE_BUFFERS_BUILD |
| // Not used with untrusted inputs. |
| #pragma allow_unsafe_buffers |
| #endif |
| |
| using std::max; |
| using std::min; |
| using std::numeric_limits; |
| using std::pair; |
| using std::sort; |
| using std::string; |
| using std::unordered_map; |
| using std::vector; |
| |
| namespace { |
| |
| string BeautifyCategory(const string& category) { |
| if (category == "BlinkStyleInitialCalcTime") { |
| return "Initial style (µs)"; |
| } else if (category == "BlinkStyleRecalcTime") { |
| return "Recalc style (µs)"; |
| } else if (category == "BlinkStyleParseTime") { |
| return "Parse (µs)"; |
| } else { |
| return category; |
| } |
| } |
| |
| bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a, |
| const string& b) { |
| return lexicographical_compare( |
| a.begin(), a.end(), b.begin(), b.end(), |
| [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); }); |
| } |
| |
| // The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...] |
| unordered_map<string, unordered_map<string, vector<double>>> ReadFile( |
| const char* filename) { |
| unordered_map<string, unordered_map<string, vector<double>>> measurements; |
| |
| FILE* fp = fopen(filename, "r"); |
| if (fp == nullptr) { |
| perror(filename); |
| exit(1); |
| } |
| while (!feof(fp)) { |
| char buf[4096]; |
| if (fgets(buf, sizeof(buf), fp) == nullptr) { |
| break; |
| } |
| string str(buf); |
| if (str.length() > 1 && str[str.length() - 1] == '\n') { |
| str.resize(str.length() - 1); |
| } |
| if (str.length() > 1 && str[str.length() - 1] == '\r') { |
| str.resize(str.length() - 1); |
| } |
| |
| // A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us |
| vector<string> cols{""}; |
| for (char ch : str) { |
| if (ch == ' ') { |
| cols.push_back(""); |
| } else { |
| cols.back().push_back(ch); |
| } |
| } |
| if (cols.size() != 5 || cols[0] != "*RESULT" || !cols[1].ends_with(":") || |
| !cols[2].ends_with("=") || cols[4] != "us") { |
| continue; |
| } |
| |
| string category = cols[1]; |
| category.resize(category.length() - 1); |
| category = BeautifyCategory(category); |
| |
| string benchmark = cols[2]; |
| benchmark.resize(benchmark.length() - 1); |
| |
| double val; |
| if (!base::StringToDouble(cols[3], &val)) { |
| continue; |
| } |
| measurements[category][benchmark].push_back(val); |
| } |
| |
| fclose(fp); |
| return measurements; |
| } |
| |
| // Find the number of trials, for display. |
| void FindNumberOfTrials( |
| const unordered_map<string, unordered_map<string, vector<double>>>& |
| measurements, |
| unsigned& min_num_trials, |
| unsigned& max_num_trials) { |
| for (const auto& [category, entry] : measurements) { |
| for (const auto& [benchmark, samples] : entry) { |
| min_num_trials = min<unsigned>(min_num_trials, samples.size()); |
| max_num_trials = max<unsigned>(max_num_trials, samples.size()); |
| } |
| } |
| } |
| |
| struct Label { |
| string benchmark; |
| size_t data_index; |
| }; |
| |
| } // namespace |
| |
| int main(int argc, char** argv) { |
| if (argc != 3) { |
| fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n"); |
| exit(1); |
| } |
| |
| unordered_map<string, unordered_map<string, vector<double>>> before = |
| ReadFile(argv[1]); |
| unordered_map<string, unordered_map<string, vector<double>>> after = |
| ReadFile(argv[2]); |
| |
| unsigned min_num_trials = numeric_limits<unsigned>::max(); |
| unsigned max_num_trials = numeric_limits<unsigned>::min(); |
| FindNumberOfTrials(before, min_num_trials, max_num_trials); |
| FindNumberOfTrials(after, min_num_trials, max_num_trials); |
| if (min_num_trials == max_num_trials) { |
| printf("%u trial(s) on each side.\n", min_num_trials); |
| } else { |
| printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials); |
| } |
| |
| // Now pair up the data. (The estimator treats them as unpaired, |
| // but currently needs them to be of the same length within a single |
| // benchmark.) We do one run per category, so that we can get |
| // geometric means over them (RatioBootstrapEstimator doesn't support |
| // arbitrary grouping). |
| vector<string> sorted_categories; |
| for (const auto& [category, entry] : before) { |
| sorted_categories.push_back(category); |
| } |
| sort(sorted_categories.begin(), sorted_categories.end(), |
| [](const string& a, const string& b) { |
| return CodeUnitCompareIgnoringASCIICaseLessThan(a, b); |
| }); |
| for (const string& category : sorted_categories) { |
| vector<Label> labels; |
| vector<vector<RatioBootstrapEstimator::Sample>> data; |
| for (const auto& [benchmark, before_samples] : |
| before.find(category)->second) { |
| const auto after_entry = after.find(category); |
| if (after_entry == after.end()) { |
| continue; |
| } |
| const auto after_samples = after_entry->second.find(benchmark); |
| if (after_samples == after_entry->second.end()) { |
| continue; |
| } |
| |
| vector<RatioBootstrapEstimator::Sample> samples; |
| for (unsigned i = 0; |
| i < std::min(before_samples.size(), after_samples->second.size()); |
| ++i) { |
| samples.push_back({before_samples[i], after_samples->second[i]}); |
| } |
| labels.emplace_back(Label{benchmark, data.size()}); |
| data.push_back(std::move(samples)); |
| } |
| |
| RatioBootstrapEstimator estimator(base::RandUint64()); |
| const unsigned kNumResamples = 2000; |
| vector<RatioBootstrapEstimator::Estimate> estimates = |
| estimator.ComputeRatioEstimates(data, kNumResamples, |
| /*confidence_level=*/0.95, |
| /*compute_geometric_mean=*/true); |
| |
| // Sort the labels for display. |
| sort(labels.begin(), labels.end(), [](const Label& a, const Label& b) { |
| return CodeUnitCompareIgnoringASCIICaseLessThan(a.benchmark, b.benchmark); |
| }); |
| |
| printf("\n"); |
| printf("%-20s %9s %9s %7s %17s\n", category.c_str(), "Before", "After", |
| "Perf", "95% CI (BCa)"); |
| printf( |
| "=================== ========= ========= ======= " |
| "=================\n"); |
| for (const Label& label : labels) { |
| // RatioBootstrapEstimator doesn't give us the plain means, so compute |
| // that by hand. |
| double sum_before = 0.0, sum_after = 0.0; |
| for (const RatioBootstrapEstimator::Sample& sample : |
| data[label.data_index]) { |
| sum_before += sample.before; |
| sum_after += sample.after; |
| } |
| double mean_before = sum_before / data[label.data_index].size(); |
| double mean_after = sum_after / data[label.data_index].size(); |
| |
| const RatioBootstrapEstimator::Estimate& estimate = |
| estimates[label.data_index]; |
| printf("%-19s %9.0f %9.0f %+6.1f%% [%+5.1f%%, %+5.1f%%]\n", |
| label.benchmark.c_str(), mean_before, mean_after, |
| 100.0 * (estimate.point_estimate - 1.0), |
| 100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0)); |
| } |
| |
| const RatioBootstrapEstimator::Estimate& estimate = estimates[data.size()]; |
| printf("%-19s %9s %9s %+6.1f%% [%+5.1f%%, %+5.1f%%]\n", "Geometric mean", |
| "", "", 100.0 * (estimate.point_estimate - 1.0), |
| 100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0)); |
| } |
| } |