blob: 80856f9603ab784f5865d54af953da6f6333fbcb [file] [log] [blame]
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/components/string_matching/sequence_matcher.h"
#include "base/macros.h"
#include "base/strings/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace string_matching {
namespace {
constexpr bool kDefaultUseEditDistance = false;
using Match = SequenceMatcher::Match;
bool MatchEqual(const Match& match1, const Match& match2) {
return match1.pos_first_string == match2.pos_first_string &&
match1.pos_second_string == match2.pos_second_string &&
match1.length == match2.length;
}
} // namespace
class SequenceMatcherTest : public testing::Test {};
TEST_F(SequenceMatcherTest, TestEditDistance) {
// Transposition
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("abdc"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
1);
// Deletion
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcde"), base::UTF8ToUTF16("abcd"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
1);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("12"), base::UTF8ToUTF16(""),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
2);
// Insertion
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abc"), base::UTF8ToUTF16("abxbc"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
2);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abxbc"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
5);
// Substitution
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("book"), base::UTF8ToUTF16("back"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
2);
// Combination
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("caclulation"),
base::UTF8ToUTF16("calculator"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
3);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("sunday"),
base::UTF8ToUTF16("saturday"),
kDefaultUseEditDistance, 0.0)
.EditDistance(),
3);
}
TEST_F(SequenceMatcherTest, TestFindLongestMatch) {
SequenceMatcher sequence_match(base::UTF8ToUTF16("miscellanious"),
base::UTF8ToUTF16("miscellaneous"),
kDefaultUseEditDistance, 0.0);
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(0, 13, 0, 13),
Match(0, 0, 9)));
ASSERT_TRUE(MatchEqual(sequence_match.FindLongestMatch(7, 13, 7, 13),
Match(10, 10, 3)));
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16(""), base::UTF8ToUTF16("abcd"),
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 0, 0, 4),
Match(0, 0, 0)));
ASSERT_TRUE(MatchEqual(SequenceMatcher(base::UTF8ToUTF16("abababbababa"),
base::UTF8ToUTF16("ababbaba"),
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 12, 0, 8),
Match(2, 0, 8)));
ASSERT_TRUE(MatchEqual(
SequenceMatcher(base::UTF8ToUTF16("aaaaaa"), base::UTF8ToUTF16("aaaaa"),
kDefaultUseEditDistance, 0.0)
.FindLongestMatch(0, 6, 0, 5),
Match(0, 0, 5)));
}
TEST_F(SequenceMatcherTest, TestGetMatchingBlocks) {
SequenceMatcher sequence_match(
base::UTF8ToUTF16("This is a demo sentence!!!"),
base::UTF8ToUTF16("This demo sentence is good!!!"),
kDefaultUseEditDistance, 0.0);
const std::vector<Match> true_matches = {Match(0, 0, 4), Match(9, 4, 14),
Match(23, 26, 3), Match(26, 29, 0)};
const std::vector<Match> matches = sequence_match.GetMatchingBlocks();
ASSERT_EQ(matches.size(), 4ul);
for (int i = 0; i < 4; i++) {
ASSERT_TRUE(MatchEqual(matches[i], true_matches[i]));
}
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatio) {
ASSERT_EQ(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("adbc"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.75);
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.5);
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithoutPenalty) {
// Two matching blocks, total matching blocks length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.533, 0.001);
// One matching block, length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.533, 0.001);
// No matching block at all.
EXPECT_NEAR(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
kDefaultUseEditDistance, 0.0)
.Ratio(),
0.0, 0.001);
}
TEST_F(SequenceMatcherTest, TestSequenceMatcherRatioWithPenalty) {
// Two matching blocks, total matching blocks length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("word"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.4825, 0.0001);
// One matching block, length is 4.
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("worl"),
base::UTF8ToUTF16("hello world"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.533, 0.001);
// No matching block at all.
EXPECT_NEAR(
SequenceMatcher(base::UTF8ToUTF16("abcd"), base::UTF8ToUTF16("xyz"),
kDefaultUseEditDistance, 0.1)
.Ratio(),
0.0, 0.001);
}
TEST_F(SequenceMatcherTest, TestEditDistanceRatio) {
ASSERT_EQ(SequenceMatcher(base::UTF8ToUTF16("abcd"),
base::UTF8ToUTF16("adbc"), true, 0.0)
.Ratio(),
0.5);
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("white cats"),
base::UTF8ToUTF16("cats white"), true, 0.0)
.Ratio(),
0.2, 0.01);
// Totally different
EXPECT_NEAR(SequenceMatcher(base::UTF8ToUTF16("dog"),
base::UTF8ToUTF16("elphant"), true, 0.0)
.Ratio(),
0.0, 0.01);
}
} // namespace string_matching
} // namespace chromeos