blob: 4a67a9b90fa60d41060132bb066156ef38e2c572 [file] [log] [blame]
// Copyright (c) 2019 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chromeos/components/string_matching/fuzzy_tokenized_string_match.h"
#include "base/strings/utf_string_conversions.h"
#include "chromeos/components/string_matching/sequence_matcher.h"
#include "chromeos/components/string_matching/tokenized_string.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace chromeos {
namespace string_matching {
namespace {
constexpr double kPartialMatchPenaltyRate = 0.9;
} // namespace
class FuzzyTokenizedStringMatchTest : public testing::Test {};
// TODO(crbug.com/1018613): update the tests once params are consolidated.
TEST_F(FuzzyTokenizedStringMatchTest, PartialRatioTest) {
FuzzyTokenizedStringMatch match;
EXPECT_NEAR(match.PartialRatio(u"abcde", u"ababcXXXbcdeY",
kPartialMatchPenaltyRate, false, 0.0),
0.6, 0.01);
EXPECT_NEAR(match.PartialRatio(u"big string", u"strength",
kPartialMatchPenaltyRate, false, 0.0),
0.71, 0.01);
EXPECT_EQ(
match.PartialRatio(u"abc", u"", kPartialMatchPenaltyRate, false, 0.0), 0);
EXPECT_NEAR(match.PartialRatio(u"different in order", u"order text",
kPartialMatchPenaltyRate, false, 0.0),
0.67, 0.01);
}
TEST_F(FuzzyTokenizedStringMatchTest, TokenSetRatioTest) {
FuzzyTokenizedStringMatch match;
{
std::u16string query(u"order different in");
std::u16string text(u"text order");
EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
1);
EXPECT_NEAR(
match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.67, 0.01);
}
{
std::u16string query(u"short text");
std::u16string text(u"this text is really really really long");
EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
1);
EXPECT_NEAR(
match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.57, 0.01);
}
{
std::u16string query(u"common string");
std::u16string text(u"nothing is shared");
EXPECT_NEAR(
match.TokenSetRatio(TokenizedString(query), TokenizedString(text), true,
kPartialMatchPenaltyRate, false, 0.0),
0.38, 0.01);
EXPECT_NEAR(
match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.33, 0.01);
}
{
std::u16string query(u"token shared token same shared same");
std::u16string text(u"token shared token text text long");
EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
1);
EXPECT_NEAR(
match.TokenSetRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.83, 0.01);
}
}
TEST_F(FuzzyTokenizedStringMatchTest, TokenSortRatioTest) {
FuzzyTokenizedStringMatch match;
{
std::u16string query(u"order different in");
std::u16string text(u"text order");
EXPECT_NEAR(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
0.67, 0.01);
EXPECT_NEAR(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.36, 0.01);
}
{
std::u16string query(u"short text");
std::u16string text(u"this text is really really really long");
EXPECT_EQ(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
0.5 * std::pow(0.9, 1));
EXPECT_NEAR(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.33, 0.01);
}
{
std::u16string query(u"common string");
std::u16string text(u"nothing is shared");
EXPECT_NEAR(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
true, kPartialMatchPenaltyRate, false, 0.0),
0.38, 0.01);
EXPECT_NEAR(
match.TokenSortRatio(TokenizedString(query), TokenizedString(text),
false, kPartialMatchPenaltyRate, false, 0.0),
0.33, 0.01);
}
}
TEST_F(FuzzyTokenizedStringMatchTest, WeightedRatio) {
FuzzyTokenizedStringMatch match;
{
std::u16string query(u"anonymous");
std::u16string text(u"famous");
EXPECT_NEAR(
match.WeightedRatio(TokenizedString(query), TokenizedString(text),
kPartialMatchPenaltyRate, false, 0.0),
0.67, 0.01);
}
{
std::u16string query(u"Clash.of.clan");
std::u16string text(u"ClashOfTitan");
EXPECT_NEAR(
match.WeightedRatio(TokenizedString(query), TokenizedString(text),
kPartialMatchPenaltyRate, false, 0.0),
0.81, 0.01);
}
{
std::u16string query(u"final fantasy");
std::u16string text(u"finalfantasy");
EXPECT_NEAR(
match.WeightedRatio(TokenizedString(query), TokenizedString(text),
kPartialMatchPenaltyRate, false, 0.0),
0.96, 0.01);
}
{
std::u16string query(u"short text!!!");
std::u16string text(
u"this sentence is much much much much much longer "
u"than the text before");
EXPECT_NEAR(
match.WeightedRatio(TokenizedString(query), TokenizedString(text),
kPartialMatchPenaltyRate, false, 0.0),
0.49, 0.01);
}
}
TEST_F(FuzzyTokenizedStringMatchTest, PrefixMatcherTest) {
{
std::u16string query(u"clas");
std::u16string text(u"Clash of Clan");
EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.94, 0.01);
}
{
std::u16string query(u"clash clan");
std::u16string text(u"Clash of Clan");
EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.0);
}
{
std::u16string query(u"c o c");
std::u16string text(u"Clash of Clan");
EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.84, 0.01);
}
{
std::u16string query(u"wifi");
std::u16string text(u"wi-fi");
EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.91, 0.01);
}
{
std::u16string query(u"clam");
std::u16string text(u"Clash of Clan");
EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.0);
}
{
std::u16string query(u"rp");
std::u16string text(u"Remove Google Play Store");
EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.0);
}
{
std::u16string query(u"remove play");
std::u16string text(u"Remove Google Play Store");
EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.0);
}
{
std::u16string query(u"google play");
std::u16string text(u"Remove Google Play Store");
EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query),
TokenizedString(text)),
0.99, 0.01);
}
}
TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest1) {
FuzzyTokenizedStringMatch match;
{
std::u16string query(u"anonymous");
std::u16string text(u"famous");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.4, true, false, kPartialMatchPenaltyRate,
0.0));
}
{
std::u16string query(u"CC");
std::u16string text(u"Clash Of Clan");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.25, true, false, kPartialMatchPenaltyRate,
0.0));
}
{
std::u16string query(u"Clash.of.clan");
std::u16string text(u"ClashOfTitan");
EXPECT_TRUE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.4, true, false, kPartialMatchPenaltyRate,
0.0));
}
}
TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest2) {
FuzzyTokenizedStringMatch match;
{
std::u16string query(u"anonymous");
std::u16string text(u"famous");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.5, true, false, kPartialMatchPenaltyRate,
0.0));
}
{
std::u16string query(u"CC");
std::u16string text(u"Clash Of Clan");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.25, true, false, kPartialMatchPenaltyRate));
}
{
std::u16string query(u"Clash.of.clan");
std::u16string text(u"ClashOfTitan");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.5, true, false, kPartialMatchPenaltyRate,
0.0));
}
}
TEST_F(FuzzyTokenizedStringMatchTest, OtherParamTest) {
FuzzyTokenizedStringMatch match;
std::u16string query(u"anonymous");
std::u16string text(u"famous");
EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.35, false, true, kPartialMatchPenaltyRate,
0.0));
EXPECT_NEAR(match.relevance(), 0.33 / 2, 0.01);
}
TEST_F(FuzzyTokenizedStringMatchTest, ExactTextMatchTest) {
FuzzyTokenizedStringMatch match;
std::u16string query(u"yat");
std::u16string text(u"YaT");
EXPECT_TRUE(match.IsRelevant(TokenizedString(query), TokenizedString(text),
0.35, false, true, kPartialMatchPenaltyRate,
0.0));
EXPECT_DOUBLE_EQ(match.relevance(), 1.0);
EXPECT_EQ(match.hits().size(), 1u);
EXPECT_EQ(match.hits()[0].start(), 0u);
EXPECT_EQ(match.hits()[0].end(), 3u);
}
} // namespace string_matching
} // namespace chromeos