| // Copyright (c) 2019 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "chromeos/components/string_matching/fuzzy_tokenized_string_match.h" |
| |
| #include "base/macros.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "chromeos/components/string_matching/sequence_matcher.h" |
| #include "chromeos/components/string_matching/tokenized_string.h" |
| #include "testing/gtest/include/gtest/gtest.h" |
| |
| namespace chromeos { |
| namespace string_matching { |
| |
| namespace { |
| constexpr double kPartialMatchPenaltyRate = 0.9; |
| |
| } // namespace |
| |
| class FuzzyTokenizedStringMatchTest : public testing::Test {}; |
| |
| // TODO(crbug.com/1018613): update the tests once params are consolidated. |
| TEST_F(FuzzyTokenizedStringMatchTest, PartialRatioTest) { |
| FuzzyTokenizedStringMatch match; |
| EXPECT_NEAR(match.PartialRatio(base::UTF8ToUTF16("abcde"), |
| base::UTF8ToUTF16("ababcXXXbcdeY"), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.6, 0.01); |
| EXPECT_NEAR(match.PartialRatio(base::UTF8ToUTF16("big string"), |
| base::UTF8ToUTF16("strength"), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.71, 0.01); |
| EXPECT_EQ(match.PartialRatio(base::UTF8ToUTF16("abc"), base::UTF8ToUTF16(""), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0); |
| EXPECT_NEAR(match.PartialRatio(base::UTF8ToUTF16("different in order"), |
| base::UTF8ToUTF16("order text"), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.67, 0.01); |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, TokenSetRatioTest) { |
| FuzzyTokenizedStringMatch match; |
| { |
| base::string16 query(base::UTF8ToUTF16("order different in")); |
| base::string16 text(base::UTF8ToUTF16("text order")); |
| EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 1); |
| EXPECT_NEAR( |
| match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.67, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("short text")); |
| base::string16 text( |
| base::UTF8ToUTF16("this text is really really really long")); |
| EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 1); |
| EXPECT_NEAR( |
| match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.57, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("common string")); |
| base::string16 text(base::UTF8ToUTF16("nothing is shared")); |
| EXPECT_NEAR( |
| match.TokenSetRatio(TokenizedString(query), TokenizedString(text), true, |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.38, 0.01); |
| EXPECT_NEAR( |
| match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.33, 0.01); |
| } |
| { |
| base::string16 query( |
| base::UTF8ToUTF16("token shared token same shared same")); |
| base::string16 text(base::UTF8ToUTF16("token shared token text text long")); |
| EXPECT_EQ(match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 1); |
| EXPECT_NEAR( |
| match.TokenSetRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.83, 0.01); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, TokenSortRatioTest) { |
| FuzzyTokenizedStringMatch match; |
| { |
| base::string16 query(base::UTF8ToUTF16("order different in")); |
| base::string16 text(base::UTF8ToUTF16("text order")); |
| EXPECT_NEAR( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 0.67, 0.01); |
| EXPECT_NEAR( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.36, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("short text")); |
| base::string16 text( |
| base::UTF8ToUTF16("this text is really really really long")); |
| EXPECT_EQ( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 0.5 * std::pow(0.9, 1)); |
| EXPECT_NEAR( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.33, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("common string")); |
| base::string16 text(base::UTF8ToUTF16("nothing is shared")); |
| EXPECT_NEAR( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| true, kPartialMatchPenaltyRate, false, 0.0), |
| 0.38, 0.01); |
| EXPECT_NEAR( |
| match.TokenSortRatio(TokenizedString(query), TokenizedString(text), |
| false, kPartialMatchPenaltyRate, false, 0.0), |
| 0.33, 0.01); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, WeightedRatio) { |
| FuzzyTokenizedStringMatch match; |
| { |
| base::string16 query(base::UTF8ToUTF16("anonymous")); |
| base::string16 text(base::UTF8ToUTF16("famous")); |
| EXPECT_NEAR( |
| match.WeightedRatio(TokenizedString(query), TokenizedString(text), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.67, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("Clash.of.clan")); |
| base::string16 text(base::UTF8ToUTF16("ClashOfTitan")); |
| EXPECT_NEAR( |
| match.WeightedRatio(TokenizedString(query), TokenizedString(text), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.81, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("final fantasy")); |
| base::string16 text(base::UTF8ToUTF16("finalfantasy")); |
| EXPECT_NEAR( |
| match.WeightedRatio(TokenizedString(query), TokenizedString(text), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.96, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("short text!!!")); |
| base::string16 text( |
| base::UTF8ToUTF16("this sentence is much much much much much longer " |
| "than the text before")); |
| EXPECT_NEAR( |
| match.WeightedRatio(TokenizedString(query), TokenizedString(text), |
| kPartialMatchPenaltyRate, false, 0.0), |
| 0.49, 0.01); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, PrefixMatcherTest) { |
| { |
| base::string16 query(base::UTF8ToUTF16("clas")); |
| base::string16 text(base::UTF8ToUTF16("Clash of Clan")); |
| EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.94, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("clash clan")); |
| base::string16 text(base::UTF8ToUTF16("Clash of Clan")); |
| EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.0); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("c o c")); |
| base::string16 text(base::UTF8ToUTF16("Clash of Clan")); |
| EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.84, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("wifi")); |
| base::string16 text(base::UTF8ToUTF16("wi-fi")); |
| EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.91, 0.01); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("clam")); |
| base::string16 text(base::UTF8ToUTF16("Clash of Clan")); |
| EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.0); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("rp")); |
| base::string16 text(base::UTF8ToUTF16("Remove Google Play Store")); |
| EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.0); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("remove play")); |
| base::string16 text(base::UTF8ToUTF16("Remove Google Play Store")); |
| EXPECT_EQ(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.0); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("google play")); |
| base::string16 text(base::UTF8ToUTF16("Remove Google Play Store")); |
| EXPECT_NEAR(FuzzyTokenizedStringMatch::PrefixMatcher(TokenizedString(query), |
| TokenizedString(text)), |
| 0.99, 0.01); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest1) { |
| FuzzyTokenizedStringMatch match; |
| { |
| base::string16 query(base::UTF8ToUTF16("anonymous")); |
| base::string16 text(base::UTF8ToUTF16("famous")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.4, false, true, false, |
| kPartialMatchPenaltyRate, 0.0)); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("CC")); |
| base::string16 text(base::UTF8ToUTF16("Clash Of Clan")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.25, false, true, false, |
| kPartialMatchPenaltyRate, 0.0)); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("Clash.of.clan")); |
| base::string16 text(base::UTF8ToUTF16("ClashOfTitan")); |
| EXPECT_TRUE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.4, false, true, false, |
| kPartialMatchPenaltyRate, 0.0)); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, ParamThresholdTest2) { |
| FuzzyTokenizedStringMatch match; |
| { |
| base::string16 query(base::UTF8ToUTF16("anonymous")); |
| base::string16 text(base::UTF8ToUTF16("famous")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.5, false, true, false, |
| kPartialMatchPenaltyRate, 0.0)); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("CC")); |
| base::string16 text(base::UTF8ToUTF16("Clash Of Clan")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.25, false, true, false, |
| kPartialMatchPenaltyRate)); |
| } |
| { |
| base::string16 query(base::UTF8ToUTF16("Clash.of.clan")); |
| base::string16 text(base::UTF8ToUTF16("ClashOfTitan")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.5, false, true, false, |
| kPartialMatchPenaltyRate, 0.0)); |
| } |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, OtherParamTest) { |
| FuzzyTokenizedStringMatch match; |
| base::string16 query(base::UTF8ToUTF16("anonymous")); |
| base::string16 text(base::UTF8ToUTF16("famous")); |
| EXPECT_FALSE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.35, false, false, true, |
| kPartialMatchPenaltyRate, 0.0)); |
| EXPECT_NEAR(match.relevance(), 0.33 / 2, 0.01); |
| } |
| |
| TEST_F(FuzzyTokenizedStringMatchTest, ExactTextMatchTest) { |
| FuzzyTokenizedStringMatch match; |
| base::string16 query(base::UTF8ToUTF16("yat")); |
| base::string16 text(base::UTF8ToUTF16("YaT")); |
| EXPECT_TRUE(match.IsRelevant(TokenizedString(query), TokenizedString(text), |
| 0.35, false, false, true, |
| kPartialMatchPenaltyRate, 0.0)); |
| EXPECT_DOUBLE_EQ(match.relevance(), 1.0); |
| EXPECT_EQ(match.hits().size(), 1u); |
| EXPECT_EQ(match.hits()[0].start(), 0u); |
| EXPECT_EQ(match.hits()[0].end(), 3u); |
| } |
| |
| } // namespace string_matching |
| } // namespace chromeos |