testing/perf/confidence/compare_blink_perf.cc - codesearch/chromium/src - Git at Google

 // Copyright 2024 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // A utility to compare two runs of a style perftest, in the perftest
 // output format, and compute confidence intervals. The simplest way
 // to get the right information is to build a binary before and after
 // changes and then run it continuously every-other for a while until
 // you feel you have enough data:
 //
 //   rm -f old.txt new.txt; \
 //   while :; do
 //     taskset -c 2,4,6,8 ./out/Release/blink_perf___old \
 //       --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a old.txt;
 //     taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \
 //       --gtest_filter=StyleCalcPerfTest.\* 2>&1 | tee -a new.txt;
 //   done
 //
 // and then run ./out/Release/compare_blink_perf old.txt new.txt.
 // (Possibly under watch -n 1 if you want to look at data as it
 // comes in, though beware of p-hacking.)
 //
 // TODO(sesse): Consider whether we should remove the first few
 // runs, as they are frequently outliers.

 #include <stdio.h>
 #include <stdlib.h>

 #include <algorithm>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>

 #include "base/rand_util.h"
 #include "base/strings/string_number_conversions.h"
 #include "testing/perf/confidence/ratio_bootstrap_estimator.h"

 #ifdef UNSAFE_BUFFERS_BUILD
 // Not used with untrusted inputs.
 #pragma allow_unsafe_buffers
 #endif

 using std::max;
 using std::min;
 using std::numeric_limits;
 using std::pair;
 using std::sort;
 using std::string;
 using std::unordered_map;
 using std::vector;

 namespace {

 string BeautifyCategory(const string& category) {
   if (category == "BlinkStyleInitialCalcTime") {
     return "Initial style (µs)";
   } else if (category == "BlinkStyleRecalcTime") {
     return "Recalc style (µs)";
   } else if (category == "BlinkStyleParseTime") {
     return "Parse (µs)";
   } else {
     return category;
   }
 }

 bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a,
                                               const string& b) {
   return lexicographical_compare(
       a.begin(), a.end(), b.begin(), b.end(),
       [](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
 }

 // The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...]
 unordered_map<string, unordered_map<string, vector<double>>> ReadFile(
     const char* filename) {
   unordered_map<string, unordered_map<string, vector<double>>> measurements;

   FILE* fp = fopen(filename, "r");
   if (fp == nullptr) {
     perror(filename);
     exit(1);
   }
   while (!feof(fp)) {
     char buf[4096];
     if (fgets(buf, sizeof(buf), fp) == nullptr) {
       break;
     }
     string str(buf);
     if (str.length() > 1 && str[str.length() - 1] == '\n') {
       str.resize(str.length() - 1);
     }
     if (str.length() > 1 && str[str.length() - 1] == '\r') {
       str.resize(str.length() - 1);
     }

     // A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us
     vector<string> cols{""};
     for (char ch : str) {
       if (ch == ' ') {
         cols.push_back("");
       } else {
         cols.back().push_back(ch);
       }
     }
     if (cols.size() != 5 || cols[0] != "*RESULT" || !cols[1].ends_with(":") ||
         !cols[2].ends_with("=") || cols[4] != "us") {
       continue;
     }

     string category = cols[1];
     category.resize(category.length() - 1);
     category = BeautifyCategory(category);

     string benchmark = cols[2];
     benchmark.resize(benchmark.length() - 1);

     double val;
     if (!base::StringToDouble(cols[3], &val)) {
       continue;
     }
     measurements[category][benchmark].push_back(val);
   }

   fclose(fp);
   return measurements;
 }

 // Find the number of trials, for display.
 void FindNumberOfTrials(
     const unordered_map<string, unordered_map<string, vector<double>>>&
         measurements,
     unsigned& min_num_trials,
     unsigned& max_num_trials) {
   for (const auto& [category, entry] : measurements) {
     for (const auto& [benchmark, samples] : entry) {
       min_num_trials = min<unsigned>(min_num_trials, samples.size());
       max_num_trials = max<unsigned>(max_num_trials, samples.size());
     }
   }
 }

 struct Label {
   string benchmark;
   size_t data_index;
 };

 }  // namespace

 int main(int argc, char** argv) {
   if (argc != 3) {
     fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n");
     exit(1);
   }

   unordered_map<string, unordered_map<string, vector<double>>> before =
       ReadFile(argv[1]);
   unordered_map<string, unordered_map<string, vector<double>>> after =
       ReadFile(argv[2]);

   unsigned min_num_trials = numeric_limits<unsigned>::max();
   unsigned max_num_trials = numeric_limits<unsigned>::min();
   FindNumberOfTrials(before, min_num_trials, max_num_trials);
   FindNumberOfTrials(after, min_num_trials, max_num_trials);
   if (min_num_trials == max_num_trials) {
     printf("%u trial(s) on each side.\n", min_num_trials);
   } else {
     printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials);
   }

   // Now pair up the data. (The estimator treats them as unpaired,
   // but currently needs them to be of the same length within a single
   // benchmark.) We do one run per category, so that we can get
   // geometric means over them (RatioBootstrapEstimator doesn't support
   // arbitrary grouping).
   vector<string> sorted_categories;
   for (const auto& [category, entry] : before) {
     sorted_categories.push_back(category);
   }
   sort(sorted_categories.begin(), sorted_categories.end(),
        [](const string& a, const string& b) {
          return CodeUnitCompareIgnoringASCIICaseLessThan(a, b);
        });
   for (const string& category : sorted_categories) {
     vector<Label> labels;
     vector<vector<RatioBootstrapEstimator::Sample>> data;
     for (const auto& [benchmark, before_samples] :
          before.find(category)->second) {
       const auto after_entry = after.find(category);
       if (after_entry == after.end()) {
         continue;
       }
       const auto after_samples = after_entry->second.find(benchmark);
       if (after_samples == after_entry->second.end()) {
         continue;
       }

       vector<RatioBootstrapEstimator::Sample> samples;
       for (unsigned i = 0;
            i < std::min(before_samples.size(), after_samples->second.size());
            ++i) {
         samples.push_back({before_samples[i], after_samples->second[i]});
       }
       labels.emplace_back(Label{benchmark, data.size()});
       data.push_back(std::move(samples));
     }

     RatioBootstrapEstimator estimator(base::RandUint64());
     const unsigned kNumResamples = 2000;
     vector<RatioBootstrapEstimator::Estimate> estimates =
         estimator.ComputeRatioEstimates(data, kNumResamples,
                                         /*confidence_level=*/0.95,
                                         /*compute_geometric_mean=*/true);

     // Sort the labels for display.
     sort(labels.begin(), labels.end(), [](const Label& a, const Label& b) {
       return CodeUnitCompareIgnoringASCIICaseLessThan(a.benchmark, b.benchmark);
     });

     printf("\n");
     printf("%-20s %9s %9s %7s %17s\n", category.c_str(), "Before", "After",
            "Perf", "95% CI (BCa)");
     printf(
         "=================== ========= ========= ======= "
         "=================\n");
     for (const Label& label : labels) {
       // RatioBootstrapEstimator doesn't give us the plain means, so compute
       // that by hand.
       double sum_before = 0.0, sum_after = 0.0;
       for (const RatioBootstrapEstimator::Sample& sample :
            data[label.data_index]) {
         sum_before += sample.before;
         sum_after += sample.after;
       }
       double mean_before = sum_before / data[label.data_index].size();
       double mean_after = sum_after / data[label.data_index].size();

       const RatioBootstrapEstimator::Estimate& estimate =
           estimates[label.data_index];
       printf("%-19s %9.0f %9.0f %+6.1f%%  [%+5.1f%%, %+5.1f%%]\n",
              label.benchmark.c_str(), mean_before, mean_after,
              100.0 * (estimate.point_estimate - 1.0),
              100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
     }

     const RatioBootstrapEstimator::Estimate& estimate = estimates[data.size()];
     printf("%-19s %9s %9s %+6.1f%%  [%+5.1f%%, %+5.1f%%]\n", "Geometric mean",
            "", "", 100.0 * (estimate.point_estimate - 1.0),
            100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
   }
 }
	// Copyright 2024 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// A utility to compare two runs of a style perftest, in the perftest
	// output format, and compute confidence intervals. The simplest way
	// to get the right information is to build a binary before and after
	// changes and then run it continuously every-other for a while until
	// you feel you have enough data:
	//
	// rm -f old.txt new.txt; \
	// while :; do
	// taskset -c 2,4,6,8 ./out/Release/blink_perf___old \
	// --gtest_filter=StyleCalcPerfTest.\* 2>&1 \| tee -a old.txt;
	// taskset -c 2,4,6,8 ./out/Release/blink_perf_tests \
	// --gtest_filter=StyleCalcPerfTest.\* 2>&1 \| tee -a new.txt;
	// done
	//
	// and then run ./out/Release/compare_blink_perf old.txt new.txt.
	// (Possibly under watch -n 1 if you want to look at data as it
	// comes in, though beware of p-hacking.)
	//
	// TODO(sesse): Consider whether we should remove the first few
	// runs, as they are frequently outliers.

	#include <stdio.h>
	#include <stdlib.h>

	#include <algorithm>
	#include <string>
	#include <unordered_map>
	#include <utility>
	#include <vector>

	#include "base/rand_util.h"
	#include "base/strings/string_number_conversions.h"
	#include "testing/perf/confidence/ratio_bootstrap_estimator.h"

	#ifdef UNSAFE_BUFFERS_BUILD
	// Not used with untrusted inputs.
	#pragma allow_unsafe_buffers
	#endif

	using std::max;
	using std::min;
	using std::numeric_limits;
	using std::pair;
	using std::sort;
	using std::string;
	using std::unordered_map;
	using std::vector;

	namespace {

	string BeautifyCategory(const string& category) {
	if (category == "BlinkStyleInitialCalcTime") {
	return "Initial style (µs)";
	} else if (category == "BlinkStyleRecalcTime") {
	return "Recalc style (µs)";
	} else if (category == "BlinkStyleParseTime") {
	return "Parse (µs)";
	} else {
	return category;
	}
	}

	bool CodeUnitCompareIgnoringASCIICaseLessThan(const string& a,
	const string& b) {
	return lexicographical_compare(
	a.begin(), a.end(), b.begin(), b.end(),
	[](char c1, char c2) { return std::tolower(c1) < std::tolower(c2); });
	}

	// The structure is e.g. BlinkStyleParseTime -> Video -> [100 us, 90 us, ...]
	unordered_map<string, unordered_map<string, vector<double>>> ReadFile(
	const char* filename) {
	unordered_map<string, unordered_map<string, vector<double>>> measurements;

	FILE* fp = fopen(filename, "r");
	if (fp == nullptr) {
	perror(filename);
	exit(1);
	}
	while (!feof(fp)) {
	char buf[4096];
	if (fgets(buf, sizeof(buf), fp) == nullptr) {
	break;
	}
	string str(buf);
	if (str.length() > 1 && str[str.length() - 1] == '\n') {
	str.resize(str.length() - 1);
	}
	if (str.length() > 1 && str[str.length() - 1] == '\r') {
	str.resize(str.length() - 1);
	}

	// A result line looks like: *RESULT BlinkStyleParseTime: Video= 11061 us
	vector<string> cols{""};
	for (char ch : str) {
	if (ch == ' ') {
	cols.push_back("");
	} else {
	cols.back().push_back(ch);
	}
	}
	if (cols.size() != 5 \|\| cols[0] != "*RESULT" \|\| !cols[1].ends_with(":") \|\|
	!cols[2].ends_with("=") \|\| cols[4] != "us") {
	continue;
	}

	string category = cols[1];
	category.resize(category.length() - 1);
	category = BeautifyCategory(category);

	string benchmark = cols[2];
	benchmark.resize(benchmark.length() - 1);

	double val;
	if (!base::StringToDouble(cols[3], &val)) {
	continue;
	}
	measurements[category][benchmark].push_back(val);
	}

	fclose(fp);
	return measurements;
	}

	// Find the number of trials, for display.
	void FindNumberOfTrials(
	const unordered_map<string, unordered_map<string, vector<double>>>&
	measurements,
	unsigned& min_num_trials,
	unsigned& max_num_trials) {
	for (const auto& [category, entry] : measurements) {
	for (const auto& [benchmark, samples] : entry) {
	min_num_trials = min<unsigned>(min_num_trials, samples.size());
	max_num_trials = max<unsigned>(max_num_trials, samples.size());
	}
	}
	}

	struct Label {
	string benchmark;
	size_t data_index;
	};

	} // namespace

	int main(int argc, char** argv) {
	if (argc != 3) {
	fprintf(stderr, "USAGE: compare_blink_perf OLD_LOG NEW_LOG\n");
	exit(1);
	}

	unordered_map<string, unordered_map<string, vector<double>>> before =
	ReadFile(argv[1]);
	unordered_map<string, unordered_map<string, vector<double>>> after =
	ReadFile(argv[2]);

	unsigned min_num_trials = numeric_limits<unsigned>::max();
	unsigned max_num_trials = numeric_limits<unsigned>::min();
	FindNumberOfTrials(before, min_num_trials, max_num_trials);
	FindNumberOfTrials(after, min_num_trials, max_num_trials);
	if (min_num_trials == max_num_trials) {
	printf("%u trial(s) on each side.\n", min_num_trials);
	} else {
	printf("%u–%u trial(s) on each side.\n", min_num_trials, max_num_trials);
	}

	// Now pair up the data. (The estimator treats them as unpaired,
	// but currently needs them to be of the same length within a single
	// benchmark.) We do one run per category, so that we can get
	// geometric means over them (RatioBootstrapEstimator doesn't support
	// arbitrary grouping).
	vector<string> sorted_categories;
	for (const auto& [category, entry] : before) {
	sorted_categories.push_back(category);
	}
	sort(sorted_categories.begin(), sorted_categories.end(),
	[](const string& a, const string& b) {
	return CodeUnitCompareIgnoringASCIICaseLessThan(a, b);
	});
	for (const string& category : sorted_categories) {
	vector<Label> labels;
	vector<vector<RatioBootstrapEstimator::Sample>> data;
	for (const auto& [benchmark, before_samples] :
	before.find(category)->second) {
	const auto after_entry = after.find(category);
	if (after_entry == after.end()) {
	continue;
	}
	const auto after_samples = after_entry->second.find(benchmark);
	if (after_samples == after_entry->second.end()) {
	continue;
	}

	vector<RatioBootstrapEstimator::Sample> samples;
	for (unsigned i = 0;
	i < std::min(before_samples.size(), after_samples->second.size());
	++i) {
	samples.push_back({before_samples[i], after_samples->second[i]});
	}
	labels.emplace_back(Label{benchmark, data.size()});
	data.push_back(std::move(samples));
	}

	RatioBootstrapEstimator estimator(base::RandUint64());
	const unsigned kNumResamples = 2000;
	vector<RatioBootstrapEstimator::Estimate> estimates =
	estimator.ComputeRatioEstimates(data, kNumResamples,
	/confidence_level=/0.95,
	/compute_geometric_mean=/true);

	// Sort the labels for display.
	sort(labels.begin(), labels.end(), [](const Label& a, const Label& b) {
	return CodeUnitCompareIgnoringASCIICaseLessThan(a.benchmark, b.benchmark);
	});

	printf("\n");
	printf("%-20s %9s %9s %7s %17s\n", category.c_str(), "Before", "After",
	"Perf", "95% CI (BCa)");
	printf(
	"=================== ========= ========= ======= "
	"=================\n");
	for (const Label& label : labels) {
	// RatioBootstrapEstimator doesn't give us the plain means, so compute
	// that by hand.
	double sum_before = 0.0, sum_after = 0.0;
	for (const RatioBootstrapEstimator::Sample& sample :
	data[label.data_index]) {
	sum_before += sample.before;
	sum_after += sample.after;
	}
	double mean_before = sum_before / data[label.data_index].size();
	double mean_after = sum_after / data[label.data_index].size();

	const RatioBootstrapEstimator::Estimate& estimate =
	estimates[label.data_index];
	printf("%-19s %9.0f %9.0f %+6.1f%% [%+5.1f%%, %+5.1f%%]\n",
	label.benchmark.c_str(), mean_before, mean_after,
	100.0 * (estimate.point_estimate - 1.0),
	100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
	}

	const RatioBootstrapEstimator::Estimate& estimate = estimates[data.size()];
	printf("%-19s %9s %9s %+6.1f%% [%+5.1f%%, %+5.1f%%]\n", "Geometric mean",
	"", "", 100.0 * (estimate.point_estimate - 1.0),
	100.0 * (estimate.lower - 1.0), 100.0 * (estimate.upper - 1.0));
	}
	}