|  | // Copyright 2017 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "components/zucchini/binary_data_histogram.h" | 
|  |  | 
|  | #include <stddef.h> | 
|  |  | 
|  | #include <memory> | 
|  | #include <vector> | 
|  |  | 
|  | #include "components/zucchini/buffer_view.h" | 
|  | #include "testing/gtest/include/gtest/gtest.h" | 
|  |  | 
|  | namespace zucchini { | 
|  |  | 
|  | TEST(OutlierDetectorTest, Basic) { | 
|  | auto make_detector = [](const std::vector<double>& values) { | 
|  | auto detector = std::make_unique<OutlierDetector>(); | 
|  | for (double v : values) | 
|  | detector->Add(v); | 
|  | detector->Prepare(); | 
|  | return detector; | 
|  | }; | 
|  |  | 
|  | std::unique_ptr<OutlierDetector> detector; | 
|  | // No data: Should at least not cause error. | 
|  | detector = make_detector({}); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.0)); | 
|  | // Single point: Trivially inert. | 
|  | detector = make_detector({0.5}); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.1)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.5)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.9)); | 
|  | // Two identical points: StdDev is 0, so falls back to built-in tolerance. | 
|  | detector = make_detector({0.5, 0.5}); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.3)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.499)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.5)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.501)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(0.7)); | 
|  | // Two separate points: Outliner test is pretty lax. | 
|  | detector = make_detector({0.4, 0.6}); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.2)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.3)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.5)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.7)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(0.8)); | 
|  | // Sharpen distribution by clustering toward norm: Now test is stricter. | 
|  | detector = make_detector({0.4, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.6}); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.3)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.4)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.5)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.6)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(0.7)); | 
|  | // Shift numbers around: Mean is 0.3, and data order scrambled. | 
|  | detector = make_detector({0.28, 0.2, 0.31, 0.4, 0.29, 0.32, 0.27, 0.30}); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.0)); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.1)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.2)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.3)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.4)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(0.5)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(1.0)); | 
|  | // Typical usage: Potential outlier would be part of original input data! | 
|  | detector = make_detector({0.3, 0.29, 0.31, 0.0, 0.3, 0.32, 0.3, 0.29, 0.6}); | 
|  | EXPECT_EQ(-1, detector->DecideOutlier(0.0)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.28)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.29)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.3)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.31)); | 
|  | EXPECT_EQ(0, detector->DecideOutlier(0.32)); | 
|  | EXPECT_EQ(1, detector->DecideOutlier(0.6)); | 
|  | } | 
|  |  | 
|  | TEST(BinaryDataHistogramTest, Basic) { | 
|  | constexpr double kUninitScore = -1; | 
|  |  | 
|  | constexpr uint8_t kTestData[] = {2, 137, 42, 0, 0, 0, 7, 11, 1, 11, 255}; | 
|  | const size_t n = sizeof(kTestData); | 
|  | ConstBufferView region(kTestData, n); | 
|  |  | 
|  | std::vector<BinaryDataHistogram> prefix_histograms(n + 1);  // Short to long. | 
|  | std::vector<BinaryDataHistogram> suffix_histograms(n + 1);  // Long to short. | 
|  |  | 
|  | for (size_t i = 0; i <= n; ++i) { | 
|  | ConstBufferView prefix(region.begin(), i); | 
|  | ConstBufferView suffix(region.begin() + i, n - i); | 
|  | // If regions are smaller than 2 bytes then it is invalid. Else valid. | 
|  | EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].Compute(prefix)); | 
|  | EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].Compute(suffix)); | 
|  | // IsValid() returns the same results. | 
|  | EXPECT_EQ(prefix.size() >= 2, prefix_histograms[i].IsValid()); | 
|  | EXPECT_EQ(suffix.size() >= 2, suffix_histograms[i].IsValid()); | 
|  | } | 
|  |  | 
|  | // Full-prefix = full-suffix = full data. | 
|  | EXPECT_EQ(0.0, prefix_histograms[n].Distance(suffix_histograms[0])); | 
|  | EXPECT_EQ(0.0, suffix_histograms[0].Distance(prefix_histograms[n])); | 
|  |  | 
|  | // Testing heuristics without overreliance on implementation details. | 
|  |  | 
|  | // Strict prefixes, in increasing size. Compare against full data. | 
|  | double prev_prefix_score = kUninitScore; | 
|  | for (size_t i = 2; i < n; ++i) { | 
|  | double score = prefix_histograms[i].Distance(prefix_histograms[n]); | 
|  | // Positivity. | 
|  | EXPECT_GT(score, 0.0); | 
|  | // Symmetry. | 
|  | EXPECT_EQ(score, prefix_histograms[n].Distance(prefix_histograms[i])); | 
|  | // Distance should decrease as prefix gets nearer to full data. | 
|  | if (prev_prefix_score != kUninitScore) | 
|  | EXPECT_LT(score, prev_prefix_score); | 
|  | prev_prefix_score = score; | 
|  | } | 
|  |  | 
|  | // Strict suffixes, in decreasing size. Compare against full data. | 
|  | double prev_suffix_score = -1; | 
|  | for (size_t i = 1; i <= n - 2; ++i) { | 
|  | double score = suffix_histograms[i].Distance(suffix_histograms[0]); | 
|  | // Positivity. | 
|  | EXPECT_GT(score, 0.0); | 
|  | // Symmetry. | 
|  | EXPECT_EQ(score, suffix_histograms[0].Distance(suffix_histograms[i])); | 
|  | // Distance should increase as suffix gets farther from full data. | 
|  | if (prev_suffix_score != kUninitScore) | 
|  | EXPECT_GT(score, prev_suffix_score); | 
|  | prev_suffix_score = score; | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace zucchini |