blob: abc5f96d35a215536da88d684d2ef7007d2d0396 [file] [log] [blame]
// Copyright 2022 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/browser/history_fuzzy_provider.h"
#include <algorithm>
#include <vector>
#include "base/logging.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace {
struct TestCase {
int tolerance;
std::u16string input;
bool expect_found;
std::vector<std::u16string> corrected_inputs;
};
template <typename Container, typename Item>
void SwapRemoveElement(Container& container, const Item& item) {
typename Container::iterator it = std::ranges::find(container, item);
if (it == container.end()) {
return;
}
typename Container::iterator last = container.end() - 1;
if (it != last) {
std::iter_swap(it, last);
}
container.pop_back();
}
// These operator implementations are for debugging.
std::ostream& operator<<(std::ostream& os, const fuzzy::Edit& edit) {
os << '{';
switch (edit.kind) {
case fuzzy::Edit::Kind::KEEP: {
os << 'K';
break;
}
case fuzzy::Edit::Kind::DELETE: {
os << 'D';
break;
}
case fuzzy::Edit::Kind::INSERT: {
os << 'I';
break;
}
case fuzzy::Edit::Kind::REPLACE: {
os << 'R';
break;
}
case fuzzy::Edit::Kind::TRANSPOSE: {
os << 'T';
break;
}
default: {
NOTREACHED();
}
}
os << "," << edit.at << "," << static_cast<char>(edit.new_char) << "}";
return os;
}
std::ostream& operator<<(std::ostream& os,
const fuzzy::Correction& correction) {
os << '[';
for (size_t i = 0; i < correction.edit_count; i++) {
os << correction.edits[i];
os << " <- ";
}
os << ']';
return os;
}
// Note: The `test_case` is destroyed in place as it is checked.
void VerifyTestCase(fuzzy::Node* node,
TestCase& test_case,
fuzzy::ToleranceSchedule tolerance_schedule) {
std::vector<fuzzy::Correction> corrections;
bool found =
node->FindCorrections(test_case.input, tolerance_schedule, corrections);
for (const fuzzy::Correction& correction : corrections) {
std::u16string corrected_input = test_case.input;
correction.ApplyTo(corrected_input);
DVLOG(1) << correction << " -> " << corrected_input;
}
CHECK_EQ(found, test_case.expect_found)
<< " input(" << test_case.tolerance << "): " << test_case.input;
CHECK_EQ(test_case.corrected_inputs.size(), corrections.size())
<< " input(" << test_case.tolerance << "): " << test_case.input;
for (const fuzzy::Correction& correction : corrections) {
std::u16string corrected_input = test_case.input;
correction.ApplyTo(corrected_input);
SwapRemoveElement(test_case.corrected_inputs, corrected_input);
}
CHECK_EQ(test_case.corrected_inputs.size(), size_t{0})
<< " input(" << test_case.tolerance << "): " << test_case.input;
}
// Verifies a vector of `cases`, destroying them in the process.
void VerifyCases(fuzzy::Node* node, std::vector<TestCase>& cases) {
for (TestCase& test_case : cases) {
VerifyTestCase(
node, test_case,
{.start_index = 0, .step_length = 0, .limit = test_case.tolerance});
}
}
// This is just like `VerifyCases` but uses a specified `tolerance_schedule`
// for all cases instead of each TestCase `tolerance` value.
void VerifyCasesWithSchedule(fuzzy::Node* node,
std::vector<TestCase>& cases,
fuzzy::ToleranceSchedule tolerance_schedule) {
for (TestCase& test_case : cases) {
VerifyTestCase(node, test_case, tolerance_schedule);
}
}
} // namespace
class HistoryFuzzyProviderTest : public testing::Test {
public:
HistoryFuzzyProviderTest() = default;
HistoryFuzzyProviderTest(const HistoryFuzzyProviderTest&) = delete;
HistoryFuzzyProviderTest& operator=(const HistoryFuzzyProviderTest&) = delete;
void SetUp() override {}
};
TEST_F(HistoryFuzzyProviderTest, AlgorithmIsNotGreedy) {
fuzzy::Node node;
node.Insert(u"wind", 0);
node.Insert(u"wash", 0);
std::vector<TestCase> cases = {
{
1,
u"wand",
false,
{
u"wind",
},
},
{
1,
u"wish",
false,
{
u"wash",
},
},
{
2,
u"xasx",
false,
{
u"wash",
},
},
{
3,
u"xaxsx",
false,
{
u"wash",
},
},
{
2,
u"want",
false,
{
u"wind",
u"wash",
},
},
};
VerifyCases(&node, cases);
}
TEST_F(HistoryFuzzyProviderTest, ReplacementWorksAnywhere) {
fuzzy::Node node;
node.Insert(u"abcdefg", 0);
node.Insert(u"abcdxyz", 0);
node.Insert(u"tuvwxyz", 0);
node.Insert(u"tuvabcd", 0);
// A few things to note about these cases:
// They don't complete to full strings; minimal corrections are supplied.
// Tolerance consumption is currently naive. This should likely change,
// as we may want to start with low (or even no) tolerance and then allow
// incremental tolerance gains as more of the input string is scanned.
// A stepping tolerance schedule like this would greatly increase efficiency
// and allow more tolerance for longer strings without risking odd matches.
std::vector<TestCase> cases = {
{
0,
u"abcdefg",
true,
{},
},
{
0,
u"abc_efg",
false,
{},
},
{
1,
u"abc_efg",
false,
{
u"abcdefg",
},
},
{
1,
u"abc_ef",
false,
{
u"abcdef",
},
},
{
2,
u"abc_e_g",
false,
{
u"abcdefg",
},
},
{
2,
u"a_c_e_g",
false,
{},
},
{
3,
u"a_c_e_",
false,
{
u"abcdef",
},
},
};
VerifyCases(&node, cases);
}
TEST_F(HistoryFuzzyProviderTest, InsertionWorksAnywhereExceptEnd) {
fuzzy::Node node;
node.Insert(u"abc", 0);
std::vector<TestCase> cases = {
{
1,
u"bc",
false,
{
u"abc",
},
},
{
1,
u"ac",
false,
{
u"abc",
},
},
{
1,
u"ab",
true,
{
// Note, we are NOT expecting "abc" here because insertion at
// end of input is generally predictive, and that is the job
// of the whole autocomplete system, not the fuzzy input
// correction algorithm, which seeks only to get inputs back
// on track for good suggestions.
},
},
{
2,
u"b",
false,
{
u"ab",
},
},
};
VerifyCases(&node, cases);
}
TEST_F(HistoryFuzzyProviderTest, DeletionWorksAnywhere) {
fuzzy::Node node;
node.Insert(u"abc", 0);
std::vector<TestCase> cases = {
{
1,
u"xabc",
false,
{
u"abc",
},
},
{
1,
u"abxc",
false,
{
u"abc",
},
},
{
1,
u"abcx",
false,
{
u"abc",
},
},
{
2,
u"axbxc",
false,
{
u"abc",
},
},
};
VerifyCases(&node, cases);
}
// This test ensures a preference for longer results when edit distances are
// equal. This isn't an absolute requirement, and some relevance or probability
// guidance might be better, but this simple heuristic avoids creating shorter
// substring corrections, for example both "was" and "wash".
TEST_F(HistoryFuzzyProviderTest, LongerResultsArePreferred) {
fuzzy::Node node;
node.Insert(u"ao", 0);
node.Insert(u"eeo", 0);
std::vector<TestCase> cases = {
{
1,
u"eo",
false,
{
u"eeo",
},
},
};
VerifyCases(&node, cases);
}
TEST_F(HistoryFuzzyProviderTest, EmptyTrieRespectsToleranceSchedule) {
fuzzy::Node node;
TestCase test_case;
// Blank is produced by deleting at index zero.
test_case = {
0,
u"x",
false,
{
u"",
},
};
VerifyTestCase(&node, test_case, {.start_index = 0, .limit = 1});
// But this is not allowed when `start_index` is one.
test_case = {
0,
u"x",
false,
{},
};
VerifyTestCase(&node, test_case, {.start_index = 1, .limit = 1});
}
TEST_F(HistoryFuzzyProviderTest, ToleranceScheduleIsEnforced) {
fuzzy::Node node;
node.Insert(u"abcdefghijklmnopqrstuv", 0);
std::vector<TestCase> cases = {
{
0,
u"axcdef",
false,
{},
},
{
0,
u"abxdef",
false,
{
u"abcdef",
},
},
{
0,
u"abxxdef",
false,
{},
},
{
0,
u"abxdexghi",
false,
{},
},
{
0,
u"abxdefxhi",
false,
{
u"abcdefghi",
},
},
{
0,
u"abxdefxhijxlmnop",
false,
{
u"abcdefghijklmnop",
},
},
{
0,
u"abxdefxhijxlmnopqx",
false,
{},
},
};
VerifyCasesWithSchedule(&node, cases,
{.start_index = 2, .step_length = 4, .limit = 3});
}
// This test ensures that transposition swaps two adjacent characters with
// a single operation at edit distance one. Only directly adjacent characters
// can be transposed and nonadjacent character swaps still require two edits.
TEST_F(HistoryFuzzyProviderTest, TransposeIsEditDistanceOne) {
fuzzy::Node node;
node.Insert(u"transpose", 0);
std::vector<TestCase> cases = {
{
// Direct transposition 'op' -> 'po'. Finding the correction
// with tolerance 1 implies a single transposition edit was enough.
1,
u"transopse",
false,
{
u"transpose",
},
},
{
// Not a direct transposition, as the 's' is in between,
// so this case requires insert + delete pair (tolerance 2).
2,
u"transpeso",
false,
{
u"transpose",
},
},
};
VerifyCases(&node, cases);
}
// This test covers a subtlety in the algorithm. It ensures we don't take
// replacements at the same position of a previous insertion because we
// only want one of {I,1,e}~{R,0,t} (ler ter) | {R,0,e}~{I,0,t} (er ter).
TEST_F(HistoryFuzzyProviderTest, DoesNotProduceDuplicate) {
fuzzy::Node node;
node.Insert(u"ter", 0);
std::vector<TestCase> cases = {
{
2,
u"lr",
false,
{
u"ter",
},
},
};
VerifyCases(&node, cases);
}
TEST_F(HistoryFuzzyProviderTest, NodesDeleteAndPreserveStructure) {
fuzzy::Node node;
const auto checked_delete = [&](const std::u16string& s) {
node.Delete(s, 0);
return node.TerminalCount() == 0;
};
node.Insert(u"abc", 0);
CHECK(checked_delete(u"abc"));
CHECK(node.next.empty());
node.Insert(u"def", 0);
CHECK(!checked_delete(u"de"));
CHECK(!node.next.empty());
CHECK(checked_delete(u"def"));
node.Insert(u"ghi", 0);
node.Insert(u"ghost", 0);
CHECK(!checked_delete(u"gh"));
CHECK(!node.next.empty());
CHECK(!checked_delete(u"ghi"));
CHECK(checked_delete(u"ghost"));
CHECK(node.next.empty());
}
TEST_F(HistoryFuzzyProviderTest, NodesMaintainRelevanceTotalTerminalCount) {
fuzzy::Node node;
// Start with no terminals.
CHECK_EQ(node.TerminalCount(), 0);
// Support including the empty string as terminal.
node.Insert(u"", 0);
CHECK_EQ(node.TerminalCount(), 1);
// Ensure repeated insertions don't cause growth.
node.Insert(u"abc", 0);
CHECK_EQ(node.TerminalCount(), 2);
node.Insert(u"abc", 0);
CHECK_EQ(node.TerminalCount(), 2);
// Check further additions on different, same, and partially same paths.
node.Insert(u"def", 0);
CHECK_EQ(node.TerminalCount(), 3);
node.Insert(u"abcd", 0);
CHECK_EQ(node.TerminalCount(), 4);
node.Insert(u"ab", 0);
CHECK_EQ(node.next[u'a']->TerminalCount(), 3);
CHECK_EQ(node.next[u'd']->TerminalCount(), 1);
CHECK_EQ(node.TerminalCount(), 5);
// Check deletion, including no-op and empty string deletion.
node.Delete(u"a", 0);
CHECK_EQ(node.TerminalCount(), 5);
node.Delete(u"x", 0);
CHECK_EQ(node.TerminalCount(), 5);
node.Delete(u"", 0);
CHECK_EQ(node.TerminalCount(), 4);
node.Delete(u"abc", 0);
CHECK_EQ(node.TerminalCount(), 3);
node.Delete(u"abcd", 0);
CHECK_EQ(node.TerminalCount(), 2);
node.Delete(u"ab", 0);
CHECK_EQ(node.TerminalCount(), 1);
node.Delete(u"defx", 0);
CHECK_EQ(node.TerminalCount(), 1);
node.Delete(u"def", 0);
CHECK_EQ(node.TerminalCount(), 0);
}