blob: 8920820c8f0214dc22204cb2c0af4eb0742f8dbe [file] [log] [blame]
// Copyright 2014 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif
#include "components/bookmarks/browser/titled_url_index.h"
#include <array>
#include <memory>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/test/scoped_feature_list.h"
#include "components/bookmarks/browser/titled_url_match.h"
#include "components/bookmarks/browser/titled_url_node.h"
#include "components/bookmarks/browser/typed_count_sorter.h"
#include "components/bookmarks/common/bookmark_features.h"
#include "components/bookmarks/test/test_bookmark_client.h"
#include "components/query_parser/query_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
using base::UTF8ToUTF16;
namespace bookmarks {
namespace {
// Used for sorting in combination with TypedCountSorter.
class BookmarkClientMock : public TestBookmarkClient {
public:
explicit BookmarkClientMock(const std::map<GURL, int>& typed_count_map)
: typed_count_map_(typed_count_map) {}
BookmarkClientMock(const BookmarkClientMock&) = delete;
BookmarkClientMock& operator=(const BookmarkClientMock&) = delete;
bool SupportsTypedCountForUrls() override { return true; }
void GetTypedCountForUrls(UrlTypedCountMap* url_typed_count_map) override {
for (auto& url_typed_count_pair : *url_typed_count_map) {
const GURL* url = url_typed_count_pair.first;
if (!url)
continue;
auto found = typed_count_map_.find(*url);
if (found == typed_count_map_.end())
continue;
url_typed_count_pair.second = found->second;
}
}
private:
const std::map<GURL, int> typed_count_map_;
};
// Minimal implementation of TitledUrlNode.
class TestTitledUrlNode : public TitledUrlNode {
public:
TestTitledUrlNode(const std::u16string& title,
const GURL& url,
const std::u16string& ancestor_title)
: title_(title), url_(url), ancestor_title_(ancestor_title) {}
~TestTitledUrlNode() override = default;
const std::u16string& GetTitledUrlNodeTitle() const override {
return title_;
}
const GURL& GetTitledUrlNodeUrl() const override { return url_; }
std::vector<std::u16string_view> GetTitledUrlNodeAncestorTitles()
const override {
return {ancestor_title_};
}
private:
std::u16string title_;
GURL url_;
std::u16string ancestor_title_;
};
} // namespace
class TitledUrlIndexFake : public TitledUrlIndex {
public:
using TitledUrlIndex::ExtractQueryWords;
using TitledUrlIndex::MatchTitledUrlNodeWithQuery;
using TitledUrlIndex::RetrieveNodesMatchingAllTerms;
using TitledUrlIndex::RetrieveNodesMatchingAnyTerms;
// Helper to call `TitledUrlIndex::MatchTitledUrlNodeWithQuery` with simpler
// parameters, returning a bool indicating success.
bool MatchTitledUrlNodeWithQuery(std::u16string node_title,
std::u16string query) {
TestTitledUrlNode node{node_title, GURL("http://foo.com"), u""};
std::vector<std::u16string> query_terms =
TitledUrlIndexFake::ExtractQueryWords(query);
query_parser::QueryNodeVector query_nodes;
query_parser::QueryParser::ParseQueryNodes(
query, query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH,
&query_nodes);
return MatchTitledUrlNodeWithQuery(&node, query_nodes, query_terms)
.has_value();
}
};
namespace {
class TitledUrlIndexTest : public testing::TestWithParam<bool> {
public:
const GURL kAboutBlankURL = GURL("about:blank");
TitledUrlIndexTest() {
if (GetParam()) {
feature_list_.InitAndEnableFeature(
kBookmarksUseBinaryTreeInTitledUrlIndex);
} else {
feature_list_.InitAndDisableFeature(
kBookmarksUseBinaryTreeInTitledUrlIndex);
}
ResetNodes();
}
~TitledUrlIndexTest() override = default;
void ResetNodes() {
index_ = std::make_unique<TitledUrlIndexFake>();
owned_nodes_.clear();
owned_path_nodes_.clear();
}
std::pair<TitledUrlNode*, TitledUrlNode*> AddNode(
const std::string& title,
const GURL& url,
const std::string& ancestor_title = "") {
return AddNode(UTF8ToUTF16(title), url, UTF8ToUTF16(ancestor_title));
}
std::pair<TitledUrlNode*, TitledUrlNode*> AddNode(
const std::u16string& title,
const GURL& url,
const std::u16string& ancestor_title = std::u16string()) {
// Add the node.
owned_nodes_.push_back(
std::make_unique<TestTitledUrlNode>(title, url, ancestor_title));
index_->Add(owned_nodes_.back().get());
// Add its parent node.
owned_path_nodes_.push_back(
std::make_unique<TestTitledUrlNode>(ancestor_title, GURL{}, u""));
index_->AddPath(owned_path_nodes_.back().get());
return {owned_nodes_.back().get(), owned_path_nodes_.back().get()};
}
std::vector<TitledUrlMatch> GetResultsMatching(const std::string& query,
size_t max_count) {
return index_->GetResultsMatching(UTF8ToUTF16(query), max_count,
query_parser::MatchingAlgorithm::DEFAULT);
}
void ExpectMatches(const std::string& query,
const char** expected_titles,
size_t expected_count) {
std::vector<std::string> title_vector;
for (size_t i = 0; i < expected_count; ++i)
title_vector.push_back(expected_titles[i]);
ExpectMatches(query, query_parser::MatchingAlgorithm::DEFAULT,
title_vector);
}
void ExpectMatches(const std::string& query,
query_parser::MatchingAlgorithm matching_algorithm,
const std::vector<std::string>& expected_titles) {
std::vector<TitledUrlMatch> matches = index_->GetResultsMatching(
UTF8ToUTF16(query), 1000, matching_algorithm);
ASSERT_EQ(expected_titles.size(), matches.size());
for (const std::string& expected_title : expected_titles) {
bool found = false;
for (size_t j = 0; j < matches.size(); ++j) {
const std::u16string& title = matches[j].node->GetTitledUrlNodeTitle();
if (UTF8ToUTF16(expected_title) == title) {
matches.erase(matches.begin() + j);
found = true;
break;
}
}
ASSERT_TRUE(found);
}
}
void ExtractMatchPositions(const std::string& string,
TitledUrlMatch::MatchPositions* matches) {
for (std::string_view match : base::SplitStringPiece(
string, ":", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL)) {
std::vector<std::string_view> chunks = base::SplitStringPiece(
match, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
ASSERT_EQ(2U, chunks.size());
matches->push_back(TitledUrlMatch::MatchPosition());
int chunks0, chunks1;
EXPECT_TRUE(base::StringToInt(chunks[0], &chunks0));
EXPECT_TRUE(base::StringToInt(chunks[1], &chunks1));
matches->back().first = chunks0;
matches->back().second = chunks1;
}
}
void ExpectMatchPositions(
const TitledUrlMatch::MatchPositions& actual_positions,
const TitledUrlMatch::MatchPositions& expected_positions) {
ASSERT_EQ(expected_positions.size(), actual_positions.size());
for (size_t i = 0; i < expected_positions.size(); ++i) {
EXPECT_EQ(expected_positions[i].first, actual_positions[i].first);
EXPECT_EQ(expected_positions[i].second, actual_positions[i].second);
}
}
void VerifyRetrieveNodesMatchingAnyTerms(
const std::string& query,
const std::vector<int> expected_node_indexes) {
SCOPED_TRACE("Query: " + query);
std::vector<std::u16string> terms =
base::SplitString(base::UTF8ToUTF16(query), u" ", base::TRIM_WHITESPACE,
base::SPLIT_WANT_ALL);
auto matches = index()->RetrieveNodesMatchingAnyTerms(
terms, query_parser::MatchingAlgorithm::DEFAULT, 3);
// Verify the correct nodes matched.
ASSERT_EQ(matches.size(), expected_node_indexes.size());
for (int index : expected_node_indexes) {
SCOPED_TRACE(
"node: " +
base::UTF16ToUTF8(owned_nodes_[index]->GetTitledUrlNodeTitle()));
EXPECT_TRUE(matches.contains(owned_nodes_[index].get()));
}
}
TitledUrlIndexFake* index() { return index_.get(); }
private:
base::test::ScopedFeatureList feature_list_;
std::vector<std::unique_ptr<TestTitledUrlNode>> owned_nodes_;
std::vector<std::unique_ptr<TestTitledUrlNode>> owned_path_nodes_;
std::unique_ptr<TitledUrlIndexFake> index_;
};
// Various permutations with differing input, queries and output that exercises
// all query paths.
TEST_P(TitledUrlIndexTest, GetResultsMatching) {
struct TestData {
const std::string titles;
const std::string query;
const std::string expected;
} data[] = {
// Trivial test case of only one term, exact match.
{"a;b", "A", "a"},
// Two terms, exact matches.
{"a b;b", "a b", "a b"},
// Prefix match, one term.
{"abcd;abc;b", "abc", "abcd;abc"},
// Prefix match, multiple terms.
{"abcd cdef;abcd;abcd cdefg", "abc cde", "abcd cdef;abcd cdefg"},
// Exact and prefix match.
{"ab cdef;abcd;abcd cdefg", "ab cdef", "ab cdef"},
// Exact and prefix match.
{"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", "ab cde ghi",
"ab cdef ghij"},
// Title with term multiple times.
{"ab ab", "ab", "ab ab"},
// Make sure quotes don't do a prefix match.
{"think", "\"thi\"", ""},
// Prefix matches against multiple candidates.
{"abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},
// Multiple prefix matches (with a lot of redundancy) against multiple
// candidates.
{"abc1 abc2 abc3 abc4 def1 def2 def3 def4",
"abc def abc def abc def abc def abc def",
"abc1 abc2 abc3 abc4 def1 def2 def3 def4"},
// Prefix match on the first term.
{"abc", "a", ""},
// Prefix match on subsequent terms.
{"abc def", "abc d", ""},
};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
ResetNodes();
for (const std::string& title :
base::SplitString(test_data.titles, ";", base::TRIM_WHITESPACE,
base::SPLIT_WANT_ALL)) {
AddNode(title, kAboutBlankURL);
}
std::vector<std::string> expected;
if (!test_data.expected.empty()) {
expected = base::SplitString(test_data.expected, ";",
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
}
ExpectMatches(test_data.query, query_parser::MatchingAlgorithm::DEFAULT,
expected);
}
}
TEST_P(TitledUrlIndexTest, GetResultsMatchingAlwaysPrefixSearch) {
struct TestData {
const std::string titles;
const std::string query;
const std::string expected;
} data[] = {
// Trivial test case of only one term, exact match.
{"z;y", "Z", "z"},
// Prefix match, one term.
{"abcd;abc;b", "abc", "abcd;abc"},
// Prefix match, multiple terms.
{"abcd cdef;abcd;abcd cdefg", "abc cde", "abcd cdef;abcd cdefg"},
// Exact and prefix match.
{"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", "ab cde ghi",
"ab cdef ghij"},
// Title with term multiple times.
{"ab ab", "ab", "ab ab"},
// Make sure quotes don't do a prefix match.
{"think", "\"thi\"", ""},
// Prefix matches against multiple candidates.
{"abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},
// Prefix match on the first term.
{"abc", "a", "abc"},
// Prefix match on subsequent terms.
{"abc def", "abc d", "abc def"},
// Exact and prefix match.
{"ab cdef;abcd;abcd cdefg", "ab cdef", "ab cdef;abcd cdefg"},
};
for (const TestData& test_data : data) {
ResetNodes();
for (const std::string& title :
base::SplitString(test_data.titles, ";", base::TRIM_WHITESPACE,
base::SPLIT_WANT_ALL)) {
AddNode(title, kAboutBlankURL);
}
std::vector<std::string> expected;
if (!test_data.expected.empty()) {
expected = base::SplitString(test_data.expected, ";",
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
}
ExpectMatches(test_data.query,
query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH,
expected);
}
}
// Analogous to GetResultsMatching, this test tests various permutations
// of title, URL, and input to see if the title/URL matches the input as
// expected.
TEST_P(TitledUrlIndexTest, GetResultsMatchingWithURLs) {
struct TestData {
const std::string query;
const std::string title;
const std::string url;
const bool should_be_retrieved;
} data[] = {
// Test single-word inputs. Include both exact matches and prefix
// matches.
{"foo", "Foo", "http://www.bar.com/", true},
{"foo", "Foodie", "http://www.bar.com/", true},
{"foo", "Bar", "http://www.foo.com/", true},
{"foo", "Bar", "http://www.foodie.com/", true},
{"foo", "Foo", "http://www.foo.com/", true},
{"foo", "Bar", "http://www.bar.com/", false},
{"foo", "Bar", "http://www.bar.com/blah/foo/blah-again/ ", true},
{"foo", "Bar", "http://www.bar.com/blah/foodie/blah-again/ ", true},
{"foo", "Bar", "http://www.bar.com/blah-foo/blah-again/ ", true},
{"foo", "Bar", "http://www.bar.com/blah-foodie/blah-again/ ", true},
{"foo", "Bar", "http://www.bar.com/blahafoo/blah-again/ ", false},
// Test multi-word inputs.
{"foo bar", "Foo Bar", "http://baz.com/", true},
{"foo bar", "Foodie Bar", "http://baz.com/", true},
{"bar foo", "Foo Bar", "http://baz.com/", true},
{"bar foo", "Foodie Barly", "http://baz.com/", true},
{"foo bar", "Foo Baz", "http://baz.com/", false},
{"foo bar", "Foo Baz", "http://bar.com/", true},
{"foo bar", "Foo Baz", "http://barly.com/", true},
{"foo bar", "Foodie Baz", "http://barly.com/", true},
{"bar foo", "Foo Baz", "http://bar.com/", true},
{"bar foo", "Foo Baz", "http://barly.com/", true},
{"foo bar", "Baz Bar", "http://blah.com/foo", true},
{"foo bar", "Baz Barly", "http://blah.com/foodie", true},
{"foo bar", "Baz Bur", "http://blah.com/foo/bar", true},
{"foo bar", "Baz Bur", "http://blah.com/food/barly", true},
{"foo bar", "Baz Bur", "http://bar.com/blah/foo", true},
{"foo bar", "Baz Bur", "http://barly.com/blah/food", true},
{"foo bar", "Baz Bur", "http://bar.com/blah/flub", false},
{"foo bar", "Baz Bur", "http://foo.com/blah/flub", false}};
for (const TestData& test_data : data) {
ResetNodes();
AddNode(test_data.title, GURL(test_data.url));
std::vector<std::string> expected;
if (test_data.should_be_retrieved)
expected.push_back(test_data.title);
ExpectMatches(test_data.query, query_parser::MatchingAlgorithm::DEFAULT,
expected);
}
}
TEST_P(TitledUrlIndexTest, GetResultsMatchingWithSymbols) {
auto does_query_match_title = [&](std::string query, std::string title) {
ResetNodes();
AddNode(title, kAboutBlankURL);
size_t num_matches = GetResultsMatching(query, 10).size();
EXPECT_LE(num_matches, 1u);
return num_matches > 0;
};
// Symbols should act as word breaks and don't need to match between the query
// and title.
EXPECT_TRUE(does_query_match_title("abc@xyz", "xyz@abc"));
EXPECT_TRUE(does_query_match_title("abc&xyz", "xyz&abc"));
EXPECT_TRUE(does_query_match_title("abc@xyz", "xyz abc"));
EXPECT_TRUE(does_query_match_title("abc xyz", "xyz@abc"));
// Treating symbols as word breaks doesn't mean simply pretending they're not
// there.
EXPECT_FALSE(does_query_match_title("xyz@abc", "xyzabc"));
EXPECT_FALSE(does_query_match_title("xyzabc", "xyz@abc"));
// '@' as the first character of the query should be treated special since it
// indicates the user likely wants search scope.
EXPECT_FALSE(does_query_match_title("@abc", "@abc"));
// Other symbols shouldn't have that exception.
EXPECT_TRUE(does_query_match_title("&abc", "@abc"));
// '@' as the first character of the node shouldn't have that exception.
EXPECT_TRUE(does_query_match_title("abc", "@abc"));
// '@' in other locations in the query shouldn't have that exception.
EXPECT_TRUE(does_query_match_title("abc @abc", "@abc"));
// '@' followed by other symbols shouldn't have that exception.
EXPECT_TRUE(does_query_match_title("@ abc", "@abc"));
EXPECT_TRUE(does_query_match_title("@@abc", "@abc"));
EXPECT_TRUE(does_query_match_title("@&abc", "@abc"));
// '@' input or title shouldn't crash.
EXPECT_FALSE(does_query_match_title("@", "@"));
}
TEST_P(TitledUrlIndexTest, Normalization) {
struct TestData {
const char* const title;
const char* const query;
} data[] = {{"fooa\xcc\x88-test", "foo\xc3\xa4-test"},
{"fooa\xcc\x88-test", "fooa\xcc\x88-test"},
{"fooa\xcc\x88-test", "foo\xc3\xa4"},
{"fooa\xcc\x88-test", "fooa\xcc\x88"},
{"fooa\xcc\x88-test", "foo"},
{"foo\xc3\xa4-test", "foo\xc3\xa4-test"},
{"foo\xc3\xa4-test", "fooa\xcc\x88-test"},
{"foo\xc3\xa4-test", "foo\xc3\xa4"},
{"foo\xc3\xa4-test", "fooa\xcc\x88"},
{"foo\xc3\xa4-test", "foo"},
{"foo", "foo"}};
for (const TestData& test_data : data) {
ResetNodes();
AddNode(test_data.title, kAboutBlankURL);
std::vector<TitledUrlMatch> matches =
GetResultsMatching(test_data.query, 10);
EXPECT_EQ(1u, matches.size());
}
}
// Makes sure match positions are updated appropriately for title matches.
TEST_P(TitledUrlIndexTest, MatchPositionsTitles) {
struct TestData {
const std::string title;
const std::string query;
const std::string expected_title_match_positions;
} data[] = {
// Trivial test case of only one term, exact match.
{"a", "A", "0,1"},
{"foo bar", "bar", "4,7"},
{"fooey bark", "bar foo", "0,3:6,9"},
// Non-trivial tests.
{"foobar foo", "foobar foo", "0,6:7,10"},
{"foobar foo", "foo foobar", "0,6:7,10"},
{"foobar foobar", "foobar foo", "0,6:7,13"},
{"foobar foobar", "foo foobar", "0,6:7,13"},
};
for (const TestData& test_data : data) {
ResetNodes();
AddNode(test_data.title, kAboutBlankURL);
std::vector<TitledUrlMatch> matches =
GetResultsMatching(test_data.query, 1000);
ASSERT_EQ(1U, matches.size());
TitledUrlMatch::MatchPositions expected_title_matches;
ExtractMatchPositions(test_data.expected_title_match_positions,
&expected_title_matches);
ExpectMatchPositions(matches[0].title_match_positions,
expected_title_matches);
}
}
// Makes sure match positions are updated appropriately for URL matches.
TEST_P(TitledUrlIndexTest, MatchPositionsURLs) {
// The encoded stuff between /wiki/ and the # is 第二次世界大戦
const std::string ja_wiki_url =
"http://ja.wikipedia.org/wiki/%E7%AC%AC%E4"
"%BA%8C%E6%AC%A1%E4%B8%96%E7%95%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7"
".E3.83.AB.E3.82.B5.E3.82.A4.E3.83.A6.E4.BD.93.E5.88.B6";
struct TestData {
const std::string query;
const std::string url;
const std::string expected_url_match_positions;
} data[] = {
{"foo", "http://www.foo.com/", "11,14"},
{"foo", "http://www.foodie.com/", "11,14"},
{"foo", "http://www.foofoo.com/", "11,14"},
{"www", "http://www.foo.com/", "7,10"},
{"foo", "http://www.foodie.com/blah/foo/fi", "11,14:27,30"},
{"foo", "http://www.blah.com/blah/foo/fi", "25,28"},
{"foo www", "http://www.foodie.com/blah/foo/fi", "7,10:11,14:27,30"},
{"www foo", "http://www.foodie.com/blah/foo/fi", "7,10:11,14:27,30"},
{"www bla", "http://www.foodie.com/blah/foo/fi", "7,10:22,25"},
{"http", "http://www.foo.com/", "0,4"},
{"http www", "http://www.foo.com/", "0,4:7,10"},
{"http foo", "http://www.foo.com/", "0,4:11,14"},
{"http foo", "http://www.bar.com/baz/foodie/hi", "0,4:23,26"},
{"第二次", ja_wiki_url, "29,56"},
{"ja 第二次", ja_wiki_url, "7,9:29,56"},
{"第二次 E3.8", ja_wiki_url,
"29,56:94,98:103,107:"
"112,116:121,125:"
"130,134:139,143"}};
for (const TestData& test_data : data) {
ResetNodes();
AddNode("123456", GURL(test_data.url));
std::vector<TitledUrlMatch> matches =
GetResultsMatching(test_data.query, 1000);
ASSERT_EQ(1U, matches.size()) << test_data.url << test_data.query;
TitledUrlMatch::MatchPositions expected_url_matches;
ExtractMatchPositions(test_data.expected_url_match_positions,
&expected_url_matches);
ExpectMatchPositions(matches[0].url_match_positions, expected_url_matches);
}
}
// Makes sure index is updated when a node is removed.
TEST_P(TitledUrlIndexTest, Remove) {
TitledUrlNode* n1 = AddNode("foo", GURL("http://foo")).first;
TitledUrlNode* n2 = AddNode("bar", GURL("http://bar")).first;
TitledUrlNode* n3 = AddNode("bar", GURL("http://bar/baz")).first;
ASSERT_EQ(1U, GetResultsMatching("foo", 10).size());
ASSERT_EQ(2U, GetResultsMatching("bar", 10).size());
index()->Remove(n3);
EXPECT_EQ(1U, GetResultsMatching("bar", 10).size());
EXPECT_EQ(1U, GetResultsMatching("foo", 10).size());
index()->Remove(n1);
EXPECT_EQ(1U, GetResultsMatching("bar", 10).size());
EXPECT_EQ(0U, GetResultsMatching("foo", 10).size());
index()->Remove(n2);
EXPECT_EQ(0U, GetResultsMatching("bar", 10).size());
}
// Makes sure index is updated when a node is removed.
TEST_P(TitledUrlIndexTest, Remove_PathIndex) {
auto* parent_dir = AddNode("foo", GURL("http://foo"), "folder").second;
ASSERT_EQ(1U, GetResultsMatching("foo folder", 10).size());
index()->RemovePath(parent_dir);
ASSERT_EQ(0U, GetResultsMatching("foo folder", 10).size());
ASSERT_EQ(1U, GetResultsMatching("foo", 10).size());
}
// Makes sure no more than max queries is returned.
TEST_P(TitledUrlIndexTest, HonorMax) {
AddNode("abcd", kAboutBlankURL);
AddNode("abcde", kAboutBlankURL);
EXPECT_EQ(1U, GetResultsMatching("ABc", 1).size());
}
// Makes sure if the lower case string of a bookmark title is more characters
// than the upper case string no match positions are returned.
TEST_P(TitledUrlIndexTest, EmptyMatchOnMultiwideLowercaseString) {
TitledUrlNode* n1 = AddNode(u"\u0130 i", GURL("http://www.google.com")).first;
std::vector<TitledUrlMatch> matches = GetResultsMatching("i", 100);
ASSERT_EQ(1U, matches.size());
EXPECT_EQ(n1, matches[0].node);
EXPECT_TRUE(matches[0].title_match_positions.empty());
}
TEST_P(TitledUrlIndexTest, GetResultsSortedByTypedCount) {
struct TestData {
const GURL url;
const char* title;
const int typed_count;
};
auto data = std::to_array<TestData>({
{GURL("http://www.google.com/"), "Google", 100},
{GURL("http://maps.google.com/"), "Google Maps", 40},
{GURL("http://docs.google.com/"), "Google Docs", 50},
{GURL("http://reader.google.com/"), "Google Reader", 80},
});
std::map<GURL, int> typed_count_map;
for (const TestData& test_data : data)
typed_count_map.insert(
std::make_pair(test_data.url, test_data.typed_count));
BookmarkClientMock bookmark_client(typed_count_map);
index()->SetNodeSorter(std::make_unique<TypedCountSorter>(&bookmark_client));
for (const TestData& test_data : data)
AddNode(test_data.title, GURL(test_data.url));
// Populate match nodes.
std::vector<TitledUrlMatch> matches = GetResultsMatching("google", 4);
// The resulting order should be:
// 1. Google (google.com) 100
// 2. Google Reader (google.com/reader) 80
// 3. Google Docs (docs.google.com) 50
// 4. Google Maps (maps.google.com) 40
ASSERT_EQ(4U, matches.size());
EXPECT_EQ(data[0].url, matches[0].node->GetTitledUrlNodeUrl());
EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
EXPECT_EQ(data[2].url, matches[2].node->GetTitledUrlNodeUrl());
EXPECT_EQ(data[1].url, matches[3].node->GetTitledUrlNodeUrl());
// Select top two matches.
matches = GetResultsMatching("google", 2);
ASSERT_EQ(2U, matches.size());
EXPECT_EQ(data[0].url, matches[0].node->GetTitledUrlNodeUrl());
EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
index()->SetNodeSorter(nullptr);
}
TEST_P(TitledUrlIndexTest, MatchTitledUrlNodeWithQuery) {
// When the query matches the node, should return non `nullopt`.
EXPECT_TRUE(index()->MatchTitledUrlNodeWithQuery(u"matching", u"match"));
// When the query approximately matches the node, should return `nullopt`.
EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"mmmatch", u"match"));
// WHen the query doesn't match the node, should return `nullopt`.
EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"natch", u"match"));
}
TEST_P(TitledUrlIndexTest, MatchTitledUrlNodeWithQuery_ApproximateNodeMatch) {
// When the query matches the node, should return non `nullopt`.
EXPECT_TRUE(index()->MatchTitledUrlNodeWithQuery(u"matching", u"match"));
// When the query approximately matches the node, should return `nullopt`.
EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"mmmatch", u"match"));
// WHen the query doesn't match the node, should return `nullopt`.
EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"natch", u"match"));
}
TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
TitledUrlNode* node =
AddNode("term1 term2 other xyz ab", GURL("http://foo.com")).first;
struct TestData {
const std::string query;
const bool should_be_retrieved;
} data[] = {// Should return matches if all input terms match, even if not all
// node terms match.
{"term other", true},
// Should not match midword.
{"term ther", false},
// Short input terms should only return exact matches.
{"xy", false},
{"ab", true}};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
std::vector<std::u16string> terms =
base::SplitString(base::UTF8ToUTF16(test_data.query), u" ",
base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
auto matches = index()->RetrieveNodesMatchingAllTerms(
terms, query_parser::MatchingAlgorithm::DEFAULT);
if (test_data.should_be_retrieved) {
EXPECT_EQ(matches.size(), 1u);
EXPECT_TRUE(matches.contains(node));
} else
EXPECT_TRUE(matches.empty());
};
}
TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAnyTerms_PathMatch) {
ResetNodes();
AddNode("term1 term2 other xyz ab", GURL("http://foo.com"));
AddNode("no_match", GURL("http://no_match.com"), "path commo");
AddNode("common1", GURL("http://foo.com"));
AddNode("common2", GURL("http://foo.com"));
AddNode("common3", GURL("http://foo.com"));
AddNode("common4", GURL("http://foo.com"));
// Should return match even if an input term does not match, as long as it's
// in the path index.
VerifyRetrieveNodesMatchingAnyTerms("term path", {0});
// Should not return matches if any input term is neither matching nor in the
// path index.
VerifyRetrieveNodesMatchingAnyTerms("term not", {});
// If any input term is not in the path index, should do full matching (i.e.
// all input terms need to be non-path matched).
VerifyRetrieveNodesMatchingAnyTerms("term common", {});
// If all input terms are in the path index, only 1 input term needs to
// non-path match.
VerifyRetrieveNodesMatchingAnyTerms("comm path", {2, 3, 4});
// Should not return duplicate matches.
VerifyRetrieveNodesMatchingAnyTerms("term term1 term2", {0});
// Should not early exit when there are no intermediate matches.
VerifyRetrieveNodesMatchingAnyTerms("path comm", {2, 3, 4});
// Should not match midword ('ther' in 'other').
VerifyRetrieveNodesMatchingAnyTerms("ther ther", {});
// Short input terms should only return exact matches.
VerifyRetrieveNodesMatchingAnyTerms("xy xy", {});
VerifyRetrieveNodesMatchingAnyTerms("ab ab", {0});
// Allows complete title/URL matches to exceed `max_nodes` (3 in tests).
VerifyRetrieveNodesMatchingAnyTerms("commo commo", {2, 3, 4, 5});
}
TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAnyTerms_PathIndex) {
AddNode("term1 term2 other xyz ab", GURL("http://foo.com"), "parent");
AddNode("term1 term3", GURL("http://foo.com"), "parent2");
// When short-circuiting to matching all terms, should not intersect with path
// matching terms, only non-path matching terms.
// Should intersect 'term1' matches only, returning both nodes, even though
// the 1st node doesn't match 'parent2'.
VerifyRetrieveNodesMatchingAnyTerms("term1 parent2", {0, 1});
// Should intersect 'term1' and 'term2' matches returning the 1st node,
// even though it doesn't match 'parent2'.
VerifyRetrieveNodesMatchingAnyTerms("term1 term2 parent2", {0});
// Should intersect 'term2' and 'term3' matches returning 0 nodes.
VerifyRetrieveNodesMatchingAnyTerms("term2 term3 parent", {});
}
TEST_P(TitledUrlIndexTest, GetResultsMatchingAncestors) {
TitledUrlNode* node =
AddNode("leaf pare", GURL("http://foo.com"), "parent").first;
struct TestData {
const std::string query;
const bool should_be_retrieved;
const bool should_have_ancestor_match;
} data[] = {
// Should allow ancestor matches when `match_ancestor_titles` is true.
{"leaf parent", true, true},
// Should not early exit when there are no accumulated non-ancestor
// matches.
{"parent leaf", true, true},
// Should still require at least 1 non-ancestor match when
// `match_ancestor_titles` is true.
{"parent parent", false, false},
// Should set `has_ancestor_match` to true even if a term matched both an
// ancestor and title/URL.
{"pare", true, true},
// Short inputs should only match exact title or ancestor terms.
{"pa pa", false, false},
// Should not return matches if a term matches neither the title nor
// ancestor.
{"leaf not parent", false, false},
};
for (const TestData& test_data : data) {
SCOPED_TRACE("Query: " + test_data.query);
auto matches = GetResultsMatching(test_data.query, 10);
// Verify whether the match.
if (test_data.should_be_retrieved) {
EXPECT_EQ(matches.size(), 1u);
EXPECT_EQ(matches[0].node, node);
EXPECT_EQ(matches[0].has_ancestor_match,
test_data.should_have_ancestor_match);
} else
EXPECT_TRUE(matches.empty());
};
}
INSTANTIATE_TEST_SUITE_P(ContainerType,
TitledUrlIndexTest,
testing::Bool(),
[](const testing::TestParamInfo<bool>& info) {
return info.param ? "BinaryTree" : "FlatTree";
});
} // namespace
} // namespace bookmarks