chrome/browser/privacy_budget/mesa_distribution_unittest.cc - chromium/src - Git at Google

 // Copyright 2020 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "chrome/browser/privacy_budget/mesa_distribution.h"
 #include <math.h>

 #include <array>
 #include <limits>
 #include <random>
 #include <set>

 #include "base/rand_util.h"
 #include "testing/gtest/include/gtest/gtest.h"

 namespace {
 constexpr auto kSeed = 3;
 constexpr auto kPivotPoint = 300;
 constexpr auto kDistRatio = 0.9l;
 constexpr auto kGeometricDistributionParam = 0.5l;
 }  // namespace

 TEST(MesaDistributionTest, Get) {
   MesaDistribution<int> mesa(kPivotPoint, kDistRatio,
                              kGeometricDistributionParam);
   std::mt19937 g(kSeed);
   auto v1 = mesa.Get(g);
   g.seed(kSeed);
   auto v2 = mesa.Get(g);
   EXPECT_EQ(v1, v2);
 }

 // This test asserts that the MesaDistribution produces a near ideal
 // distribution of offset selections.
 //
 // We do this by drawing a very large number of samples from MesaDistribution in
 // the presence of a fairly good PRNG and verifying that the resulting
 // probability distribution is within a close margin of the ideal distribution.
 //
 // In order to pass the test, the simulation must result in an aggregate
 // probability distribution that's within ε of the ideal distribution.
 //
 // The PRNG is MT19937 with a fixed seed.
 TEST(MesaDistributionTest, SpreadTest_MaybeSlow) {
   constexpr auto kTrials = 10'000'000lu;

   // We truncate the distribution at kMaxOffset, or else we'd need to keep
   // occurrence counts for over a very large range.
   //
   // Truncation changes the probability distribution as a side-effect. Observed
   // probability density increases by a factor of 1 / CDF(kMaxOffset) where CDF
   // is the cumulative distribution function for Mesa. However beyond
   // 2 * kPivotPoint the CDF is incalculably close to 1.
   constexpr auto kMaxOffset = kPivotPoint * 3;

   // Lambda corresponds to λ in the description in mesa_distribution.h
   // explaining the distribution. It's the probability density within the linear
   // region of the distribution.
   const auto kLambda = kDistRatio / kPivotPoint;

   // Gamma corresponds to γ in the description in mesa_distribution.h. It's the
   // parameter to the Geometric distribution in the tail region.
   const auto kGamma = kLambda / (1 - kDistRatio);

   // kEpsilon is the maximum absolute error in probability per element. We
   // expect the experiment below to produce values within 5% of the linear
   // region density.
   const double kEpsilon = kLambda * 0.05 /* ±5% envelope */;

   // occurrence[i] := the number of times we've seen `i`.
   std::array<double, kMaxOffset + 1> occurrences = {0.0};

   // The distribution under test:
   MesaDistribution<int> mesa(kPivotPoint, kDistRatio, kGamma);

   std::mt19937 random_bit_generator(kSeed);

   for (auto i = 0u; i < kTrials; ++i) {
     auto v = mesa.Get(random_bit_generator);
     if (v > kMaxOffset)
       continue;
     ++occurrences[v];
   }

   // `occurrences` is a histogram of seen values. This loop converts it to
   // a probability.
   for (double& occurrence : occurrences)
     occurrence /= kTrials;

   // Offsets from 0 thru `kPivotPoint - 1` (inclusive) should have the same
   // probability. I.e. uniform.
   for (int i = 0; i < kPivotPoint; ++i) {
     ASSERT_NEAR(occurrences[i], kLambda, kEpsilon) << "at offset" << i;
   }

   // Offsets from `kPivotPoint` thru infinity should follow a geometric
   // distribution. We compare the observed probabilities with the Geometric PDF.
   double expected_pdf = kLambda;
   for (int i = kPivotPoint; i <= kMaxOffset; ++i) {
     ASSERT_NEAR(occurrences[i], expected_pdf, kEpsilon) << "at offset" << i;
     expected_pdf *= 1.0l - kGamma;
   }
 }
	// Copyright 2020 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "chrome/browser/privacy_budget/mesa_distribution.h"
	#include <math.h>

	#include <array>
	#include <limits>
	#include <random>
	#include <set>

	#include "base/rand_util.h"
	#include "testing/gtest/include/gtest/gtest.h"

	namespace {
	constexpr auto kSeed = 3;
	constexpr auto kPivotPoint = 300;
	constexpr auto kDistRatio = 0.9l;
	constexpr auto kGeometricDistributionParam = 0.5l;
	} // namespace

	TEST(MesaDistributionTest, Get) {
	MesaDistribution<int> mesa(kPivotPoint, kDistRatio,
	kGeometricDistributionParam);
	std::mt19937 g(kSeed);
	auto v1 = mesa.Get(g);
	g.seed(kSeed);
	auto v2 = mesa.Get(g);
	EXPECT_EQ(v1, v2);
	}

	// This test asserts that the MesaDistribution produces a near ideal
	// distribution of offset selections.
	//
	// We do this by drawing a very large number of samples from MesaDistribution in
	// the presence of a fairly good PRNG and verifying that the resulting
	// probability distribution is within a close margin of the ideal distribution.
	//
	// In order to pass the test, the simulation must result in an aggregate
	// probability distribution that's within ε of the ideal distribution.
	//
	// The PRNG is MT19937 with a fixed seed.
	TEST(MesaDistributionTest, SpreadTest_MaybeSlow) {
	constexpr auto kTrials = 10'000'000lu;

	// We truncate the distribution at kMaxOffset, or else we'd need to keep
	// occurrence counts for over a very large range.
	//
	// Truncation changes the probability distribution as a side-effect. Observed
	// probability density increases by a factor of 1 / CDF(kMaxOffset) where CDF
	// is the cumulative distribution function for Mesa. However beyond
	// 2 * kPivotPoint the CDF is incalculably close to 1.
	constexpr auto kMaxOffset = kPivotPoint * 3;

	// Lambda corresponds to λ in the description in mesa_distribution.h
	// explaining the distribution. It's the probability density within the linear
	// region of the distribution.
	const auto kLambda = kDistRatio / kPivotPoint;

	// Gamma corresponds to γ in the description in mesa_distribution.h. It's the
	// parameter to the Geometric distribution in the tail region.
	const auto kGamma = kLambda / (1 - kDistRatio);

	// kEpsilon is the maximum absolute error in probability per element. We
	// expect the experiment below to produce values within 5% of the linear
	// region density.
	const double kEpsilon = kLambda * 0.05 /* ±5% envelope */;

	// occurrence[i] := the number of times we've seen `i`.
	std::array<double, kMaxOffset + 1> occurrences = {0.0};

	// The distribution under test:
	MesaDistribution<int> mesa(kPivotPoint, kDistRatio, kGamma);

	std::mt19937 random_bit_generator(kSeed);

	for (auto i = 0u; i < kTrials; ++i) {
	auto v = mesa.Get(random_bit_generator);
	if (v > kMaxOffset)
	continue;
	++occurrences[v];
	}

	// `occurrences` is a histogram of seen values. This loop converts it to
	// a probability.
	for (double& occurrence : occurrences)
	occurrence /= kTrials;

	// Offsets from 0 thru `kPivotPoint - 1` (inclusive) should have the same
	// probability. I.e. uniform.
	for (int i = 0; i < kPivotPoint; ++i) {
	ASSERT_NEAR(occurrences[i], kLambda, kEpsilon) << "at offset" << i;
	}

	// Offsets from `kPivotPoint` thru infinity should follow a geometric
	// distribution. We compare the observed probabilities with the Geometric PDF.
	double expected_pdf = kLambda;
	for (int i = kPivotPoint; i <= kMaxOffset; ++i) {
	ASSERT_NEAR(occurrences[i], expected_pdf, kEpsilon) << "at offset" << i;
	expected_pdf *= 1.0l - kGamma;
	}
	}