blob: 3491d2af80bc77d9ff8a5eea1f4e5f317ba0bc42 [file] [log] [blame]
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Tests of domain `InRegexp`.
#include <cstddef>
#include <optional>
#include <string>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/container/flat_hash_set.h"
#include "absl/random/random.h"
#include "./fuzztest/domain.h"
#include "./domain_tests/domain_testing.h"
#include "re2/re2.h"
namespace fuzztest {
namespace {
using ::testing::Contains;
using ::testing::Optional;
using ::testing::ResultOf;
using ::testing::StrEq;
TEST(InRegexp, InitGenerateDifferentValidAndShortValues) {
absl::BitGen bitgen;
static constexpr struct {
const char* regexp_to_test;
int n_unique_value;
int expected_average_size;
} kTestCases[] = {
{"a\\w*b", 300, 6},
{"A{2,10}", 9, 3},
{"a*\\w*\\W*b+", 400, 5},
};
for (auto [regexp, num_unique_values, expected_average_size] : kTestCases) {
absl::flat_hash_set<std::string> unique_values;
auto domain = InRegexp(regexp);
size_t total_size = 0;
size_t generation_times = 0;
while (unique_values.size() < num_unique_values) {
++generation_times;
std::string val = domain.GetValue(domain.Init(bitgen));
total_size += val.size();
EXPECT_TRUE(RE2::FullMatch(val, regexp));
unique_values.insert(val);
}
EXPECT_NEAR(expected_average_size,
static_cast<double>(total_size) / generation_times, 1);
}
}
TEST(InRegexp, InitGeneratesSeeds) {
auto domain = InRegexp(R"re(a\w*b)re").WithSeeds({"a_Hello_World_b"});
EXPECT_THAT(GenerateInitialValues(domain, 1000),
Contains(Value(domain, "a_Hello_World_b")));
}
// TODO(changochen): Improve the tests to verify "close" mutation.
TEST(InRegexp, MutationGeneratesDifferentValidValues) {
absl::BitGen bitgen;
static constexpr struct {
const char* regexp_to_test;
int n_unique_value;
} kTestCases[] = {
{"", 1},
{"aa[ab]b", 2},
{"^df[a-z]\\wf$", 50},
{"TestTest", 1},
{"a\\w*b", 100},
{"A{2,10}", 9},
{"Sun[\\W]{3}Cool[\\d]{4}", 40},
};
for (auto [regexp, num_unique_values] : kTestCases) {
absl::flat_hash_set<std::string> unique_values;
Domain<std::string> domain = InRegexp(regexp);
Value val(domain, bitgen);
EXPECT_TRUE(RE2::FullMatch(val.user_value, regexp));
while (unique_values.size() < num_unique_values) {
val.Mutate(domain, bitgen, {}, false);
EXPECT_TRUE(RE2::FullMatch(val.user_value, regexp));
unique_values.insert(val.user_value);
}
}
}
TEST(InRegexp, InvalidInputReportsErrors) {
EXPECT_DEATH_IF_SUPPORTED(InRegexp("["), "Invalid RE2 regular expression.");
}
TEST(InRegexp, MutatingRepetitionCanIncreaseAndDecreaseLength) {
absl::BitGen bitgen;
auto domain = InRegexp("a*");
Value val(domain, bitgen);
std::vector<int> size_histogram(7, 0);
constexpr int mutate_times = 50000;
for (int i = 0; i < mutate_times; ++i) {
val.corpus_value = domain.FromValue("aaaaa").value();
val.Mutate(domain, bitgen, {}, false);
size_t size_after_mutation = val.user_value.size();
ASSERT_LT(size_after_mutation, 7) << val.user_value;
++size_histogram[size_after_mutation];
}
// The expected ratio for adding a character is 50%, and the expected ration
// for removing 0, 1, 2, 3, 4, 5 characters is 50%/6 = 8.3% respectively.
constexpr double abs_diff = 0.01;
constexpr double p_adding_one_char = 0.5;
constexpr double p_removing_char_for_each_length = 0.5 / 6;
for (int i = 0; i < 6; i++) {
EXPECT_NEAR(static_cast<double>(size_histogram[i]) / mutate_times,
p_removing_char_for_each_length, abs_diff);
}
EXPECT_NEAR(static_cast<double>(size_histogram[6]) / mutate_times,
p_adding_one_char, abs_diff);
}
TEST(InRegexp, OnlyShrinkFindsDifferentValueWithMinimalLength) {
absl::BitGen bitgen;
static constexpr struct {
const char* regexp_to_test;
int expected_length;
int unique_num;
bool min_path_length_equal_min_string_length;
} kTestCases[] = {
{"a(c|d)+(e|f)+b", 4, 4, true},
{"A{2,10}D{2,10}", 4, 1, true},
{"A\\d+B\\w+C{2,5}", 6, 10, true},
{"a([a|b][c|d]|efghi)o", 7, 1, false},
{"(superlongstring|[ab][cd])(secondlongstring|[ef][gh])", 31, 1, false},
};
for (auto [regexp, expected_length, unique_num,
min_path_length_equal_min_string_length] : kTestCases) {
auto domain = InRegexp(regexp);
absl::flat_hash_set<std::string> unique_values;
while (unique_values.size() < unique_num) {
Value val(domain, bitgen);
while (val.user_value.size() != expected_length) {
size_t pre_size = val.user_value.size();
val.Mutate(domain, bitgen, {}, true);
if (min_path_length_equal_min_string_length) {
ASSERT_LE(val.user_value.size(), pre_size) << val.user_value;
}
EXPECT_TRUE(RE2::FullMatch(val.user_value, regexp));
}
EXPECT_EQ(val.user_value.size(), expected_length);
unique_values.insert(val.user_value);
}
}
}
TEST(InRegexp, ValidationRejectsInvalidValue) {
auto domain_a = InRegexp("a(c|d)+(e|f)+b");
auto domain_b = InRegexp("A{2,10}D{2,10}");
auto corpus_value_a = domain_a.FromValue("acceeb");
auto corpus_value_b = domain_b.FromValue("AADD");
ASSERT_OK(domain_a.ValidateCorpusValue(*corpus_value_a));
ASSERT_OK(domain_b.ValidateCorpusValue(*corpus_value_b));
EXPECT_THAT(domain_a.ValidateCorpusValue(*corpus_value_b),
IsInvalid("Invalid value for InRegexp(\"a(c|d)+(e|f)+b\") >> "
"Invalid DFA path."));
EXPECT_THAT(domain_b.ValidateCorpusValue(*corpus_value_a),
IsInvalid("Invalid value for InRegexp(\"A{2,10}D{2,10}\") >> "
"Invalid DFA path."));
}
// Additional validation test with values from a real bug (b/339306132).
TEST(InRegexp, ValidationRejectsInvalidValueRegression) {
auto domain_a =
InRegexp(R"re(^(sheet|empty_sheet|email_message|email_thread)$)re");
auto domain_b = InRegexp(
R"re(^(sheet|empty_sheet|email_message|email_thread|chat_message|chat_context)$)re");
auto sheet_a = domain_a.FromValue("sheet");
auto empty_sheet_a = domain_a.FromValue("empty_sheet");
auto email_message_a = domain_a.FromValue("email_message");
auto email_thread_a = domain_a.FromValue("email_thread");
ASSERT_TRUE(sheet_a.has_value());
ASSERT_TRUE(empty_sheet_a.has_value());
ASSERT_TRUE(email_message_a.has_value());
ASSERT_TRUE(email_thread_a.has_value());
EXPECT_OK(domain_a.ValidateCorpusValue(*sheet_a));
EXPECT_OK(domain_a.ValidateCorpusValue(*empty_sheet_a));
EXPECT_OK(domain_a.ValidateCorpusValue(*email_message_a));
EXPECT_OK(domain_a.ValidateCorpusValue(*email_thread_a));
// Even though `domain_b` subsumes `domain_a` in terms of user values, under
// the hood the DFAs are structurally different, so the corpus values (DFA
// paths) from one domain may not be valid for the other domain.
EXPECT_TRUE(!domain_b.ValidateCorpusValue(*sheet_a).ok() ||
domain_b.GetValue(*sheet_a) == "sheet")
<< "GetValue() returns \"" << domain_b.GetValue(*sheet_a) << "\"";
EXPECT_TRUE(!domain_b.ValidateCorpusValue(*empty_sheet_a).ok() ||
domain_b.GetValue(*empty_sheet_a) == "empty_sheet")
<< "GetValue() returns \"" << domain_b.GetValue(*empty_sheet_a) << "\"";
EXPECT_TRUE(!domain_b.ValidateCorpusValue(*email_message_a).ok() ||
domain_b.GetValue(*email_message_a) == "email_message")
<< "GetValue() returns \"" << domain_b.GetValue(*email_message_a) << "\"";
EXPECT_TRUE(!domain_b.ValidateCorpusValue(*email_thread_a).ok() ||
domain_b.GetValue(*email_thread_a) == "email_thread")
<< "GetValue() returns \"" << domain_b.GetValue(*email_thread_a) << "\"";
}
struct InRegexString {
std::string regexp;
std::string string_in_domain;
};
using InRegexpTest = ::testing::TestWithParam<InRegexString>;
TEST_P(InRegexpTest, SerializationWorksCorrectly) {
auto [regexp, string_in_domain] = GetParam();
auto domain = InRegexp(regexp);
auto corpus_value = domain.FromValue(string_in_domain);
ASSERT_TRUE(corpus_value.has_value());
ASSERT_OK(domain.ValidateCorpusValue(*corpus_value));
EXPECT_THAT(domain.ParseCorpus(domain.SerializeCorpus(*corpus_value)),
Optional(ResultOf(
[&](const auto& parsed_corpus) {
return domain.GetValue(parsed_corpus);
},
StrEq(string_in_domain))));
}
INSTANTIATE_TEST_SUITE_P(RegexpSet, InRegexpTest,
testing::ValuesIn(std::vector<InRegexString>{
{"a(c|d)+(e|f)+b", "acceeb"},
{"A{2,10}D{2,10}", "AADD"},
{"A\\d+B\\w+C{2,5}", "A1BaCC"}}));
} // namespace
} // namespace fuzztest