components/bookmarks/browser/titled_url_index_unittest.cc - chromium/src - Git at Google

 // Copyright 2014 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifdef UNSAFE_BUFFERS_BUILD
 // TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
 #pragma allow_unsafe_buffers
 #endif

 #include "components/bookmarks/browser/titled_url_index.h"

 #include <array>
 #include <memory>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>

 #include "base/strings/string_number_conversions.h"
 #include "base/strings/string_split.h"
 #include "base/strings/string_util.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/test/scoped_feature_list.h"
 #include "components/bookmarks/browser/titled_url_match.h"
 #include "components/bookmarks/browser/titled_url_node.h"
 #include "components/bookmarks/browser/typed_count_sorter.h"
 #include "components/bookmarks/common/bookmark_features.h"
 #include "components/bookmarks/test/test_bookmark_client.h"
 #include "components/query_parser/query_parser.h"
 #include "testing/gtest/include/gtest/gtest.h"

 using base::UTF8ToUTF16;

 namespace bookmarks {
 namespace {

 // Used for sorting in combination with TypedCountSorter.
 class BookmarkClientMock : public TestBookmarkClient {
  public:
   explicit BookmarkClientMock(const std::map<GURL, int>& typed_count_map)
       : typed_count_map_(typed_count_map) {}

   BookmarkClientMock(const BookmarkClientMock&) = delete;
   BookmarkClientMock& operator=(const BookmarkClientMock&) = delete;

   bool SupportsTypedCountForUrls() override { return true; }

   void GetTypedCountForUrls(UrlTypedCountMap* url_typed_count_map) override {
     for (auto& url_typed_count_pair : *url_typed_count_map) {
       const GURL* url = url_typed_count_pair.first;
       if (!url)
         continue;

       auto found = typed_count_map_.find(*url);
       if (found == typed_count_map_.end())
         continue;

       url_typed_count_pair.second = found->second;
     }
   }

  private:
   const std::map<GURL, int> typed_count_map_;
 };

 // Minimal implementation of TitledUrlNode.
 class TestTitledUrlNode : public TitledUrlNode {
  public:
   TestTitledUrlNode(const std::u16string& title,
                     const GURL& url,
                     const std::u16string& ancestor_title)
       : title_(title), url_(url), ancestor_title_(ancestor_title) {}

   ~TestTitledUrlNode() override = default;

   const std::u16string& GetTitledUrlNodeTitle() const override {
     return title_;
   }

   const GURL& GetTitledUrlNodeUrl() const override { return url_; }

   std::vector<std::u16string_view> GetTitledUrlNodeAncestorTitles()
       const override {
     return {ancestor_title_};
   }

  private:
   std::u16string title_;
   GURL url_;
   std::u16string ancestor_title_;
 };

 }  // namespace

 class TitledUrlIndexFake : public TitledUrlIndex {
  public:
   using TitledUrlIndex::ExtractQueryWords;
   using TitledUrlIndex::MatchTitledUrlNodeWithQuery;
   using TitledUrlIndex::RetrieveNodesMatchingAllTerms;
   using TitledUrlIndex::RetrieveNodesMatchingAnyTerms;

   // Helper to call `TitledUrlIndex::MatchTitledUrlNodeWithQuery` with simpler
   // parameters, returning a bool indicating success.
   bool MatchTitledUrlNodeWithQuery(std::u16string node_title,
                                    std::u16string query) {
     TestTitledUrlNode node{node_title, GURL("http://foo.com"), u""};
     std::vector<std::u16string> query_terms =
         TitledUrlIndexFake::ExtractQueryWords(query);
     query_parser::QueryNodeVector query_nodes;
     query_parser::QueryParser::ParseQueryNodes(
         query, query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH,
         &query_nodes);
     return MatchTitledUrlNodeWithQuery(&node, query_nodes, query_terms)
         .has_value();
   }
 };

 namespace {

 class TitledUrlIndexTest : public testing::TestWithParam<bool> {
  public:
   const GURL kAboutBlankURL = GURL("about:blank");

   TitledUrlIndexTest() {
     if (GetParam()) {
       feature_list_.InitAndEnableFeature(
           kBookmarksUseBinaryTreeInTitledUrlIndex);
     } else {
       feature_list_.InitAndDisableFeature(
           kBookmarksUseBinaryTreeInTitledUrlIndex);
     }

     ResetNodes();
   }

   ~TitledUrlIndexTest() override = default;

   void ResetNodes() {
     index_ = std::make_unique<TitledUrlIndexFake>();
     owned_nodes_.clear();
     owned_path_nodes_.clear();
   }

   std::pair<TitledUrlNode*, TitledUrlNode*> AddNode(
       const std::string& title,
       const GURL& url,
       const std::string& ancestor_title = "") {
     return AddNode(UTF8ToUTF16(title), url, UTF8ToUTF16(ancestor_title));
   }

   std::pair<TitledUrlNode*, TitledUrlNode*> AddNode(
       const std::u16string& title,
       const GURL& url,
       const std::u16string& ancestor_title = std::u16string()) {
     // Add the node.
     owned_nodes_.push_back(
         std::make_unique<TestTitledUrlNode>(title, url, ancestor_title));
     index_->Add(owned_nodes_.back().get());
     // Add its parent node.
     owned_path_nodes_.push_back(
         std::make_unique<TestTitledUrlNode>(ancestor_title, GURL{}, u""));
     index_->AddPath(owned_path_nodes_.back().get());
     return {owned_nodes_.back().get(), owned_path_nodes_.back().get()};
   }

   std::vector<TitledUrlMatch> GetResultsMatching(const std::string& query,
                                                  size_t max_count) {
     return index_->GetResultsMatching(UTF8ToUTF16(query), max_count,
                                       query_parser::MatchingAlgorithm::DEFAULT);
   }

   void ExpectMatches(const std::string& query,
                      const char** expected_titles,
                      size_t expected_count) {
     std::vector<std::string> title_vector;
     for (size_t i = 0; i < expected_count; ++i)
       title_vector.push_back(expected_titles[i]);
     ExpectMatches(query, query_parser::MatchingAlgorithm::DEFAULT,
                   title_vector);
   }

   void ExpectMatches(const std::string& query,
                      query_parser::MatchingAlgorithm matching_algorithm,
                      const std::vector<std::string>& expected_titles) {
     std::vector<TitledUrlMatch> matches = index_->GetResultsMatching(
         UTF8ToUTF16(query), 1000, matching_algorithm);
     ASSERT_EQ(expected_titles.size(), matches.size());
     for (const std::string& expected_title : expected_titles) {
       bool found = false;
       for (size_t j = 0; j < matches.size(); ++j) {
         const std::u16string& title = matches[j].node->GetTitledUrlNodeTitle();
         if (UTF8ToUTF16(expected_title) == title) {
           matches.erase(matches.begin() + j);
           found = true;
           break;
         }
       }
       ASSERT_TRUE(found);
     }
   }

   void ExtractMatchPositions(const std::string& string,
                              TitledUrlMatch::MatchPositions* matches) {
     for (std::string_view match : base::SplitStringPiece(
              string, ":", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL)) {
       std::vector<std::string_view> chunks = base::SplitStringPiece(
           match, ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
       ASSERT_EQ(2U, chunks.size());
       matches->push_back(TitledUrlMatch::MatchPosition());
       int chunks0, chunks1;
       EXPECT_TRUE(base::StringToInt(chunks[0], &chunks0));
       EXPECT_TRUE(base::StringToInt(chunks[1], &chunks1));
       matches->back().first = chunks0;
       matches->back().second = chunks1;
     }
   }

   void ExpectMatchPositions(
       const TitledUrlMatch::MatchPositions& actual_positions,
       const TitledUrlMatch::MatchPositions& expected_positions) {
     ASSERT_EQ(expected_positions.size(), actual_positions.size());
     for (size_t i = 0; i < expected_positions.size(); ++i) {
       EXPECT_EQ(expected_positions[i].first, actual_positions[i].first);
       EXPECT_EQ(expected_positions[i].second, actual_positions[i].second);
     }
   }

   void VerifyRetrieveNodesMatchingAnyTerms(
       const std::string& query,
       const std::vector<int> expected_node_indexes) {
     SCOPED_TRACE("Query: " + query);
     std::vector<std::u16string> terms =
         base::SplitString(base::UTF8ToUTF16(query), u" ", base::TRIM_WHITESPACE,
                           base::SPLIT_WANT_ALL);
     auto matches = index()->RetrieveNodesMatchingAnyTerms(
         terms, query_parser::MatchingAlgorithm::DEFAULT, 3);

     // Verify the correct nodes matched.
     ASSERT_EQ(matches.size(), expected_node_indexes.size());
     for (int index : expected_node_indexes) {
       SCOPED_TRACE(
           "node: " +
           base::UTF16ToUTF8(owned_nodes_[index]->GetTitledUrlNodeTitle()));
       EXPECT_TRUE(matches.contains(owned_nodes_[index].get()));
     }
   }

   TitledUrlIndexFake* index() { return index_.get(); }

  private:
   base::test::ScopedFeatureList feature_list_;
   std::vector<std::unique_ptr<TestTitledUrlNode>> owned_nodes_;
   std::vector<std::unique_ptr<TestTitledUrlNode>> owned_path_nodes_;
   std::unique_ptr<TitledUrlIndexFake> index_;
 };

 // Various permutations with differing input, queries and output that exercises
 // all query paths.
 TEST_P(TitledUrlIndexTest, GetResultsMatching) {
   struct TestData {
     const std::string titles;
     const std::string query;
     const std::string expected;
   } data[] = {
       // Trivial test case of only one term, exact match.
       {"a;b", "A", "a"},

       // Two terms, exact matches.
       {"a b;b", "a b", "a b"},

       // Prefix match, one term.
       {"abcd;abc;b", "abc", "abcd;abc"},

       // Prefix match, multiple terms.
       {"abcd cdef;abcd;abcd cdefg", "abc cde", "abcd cdef;abcd cdefg"},

       // Exact and prefix match.
       {"ab cdef;abcd;abcd cdefg", "ab cdef", "ab cdef"},

       // Exact and prefix match.
       {"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", "ab cde ghi",
        "ab cdef ghij"},

       // Title with term multiple times.
       {"ab ab", "ab", "ab ab"},

       // Make sure quotes don't do a prefix match.
       {"think", "\"thi\"", ""},

       // Prefix matches against multiple candidates.
       {"abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},

       // Multiple prefix matches (with a lot of redundancy) against multiple
       // candidates.
       {"abc1 abc2 abc3 abc4 def1 def2 def3 def4",
        "abc def abc def abc def abc def abc def",
        "abc1 abc2 abc3 abc4 def1 def2 def3 def4"},

       // Prefix match on the first term.
       {"abc", "a", ""},

       // Prefix match on subsequent terms.
       {"abc def", "abc d", ""},
   };
   for (const TestData& test_data : data) {
     SCOPED_TRACE("Query: " + test_data.query);
     ResetNodes();

     for (const std::string& title :
          base::SplitString(test_data.titles, ";", base::TRIM_WHITESPACE,
                            base::SPLIT_WANT_ALL)) {
       AddNode(title, kAboutBlankURL);
     }

     std::vector<std::string> expected;
     if (!test_data.expected.empty()) {
       expected = base::SplitString(test_data.expected, ";",
                                    base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
     }

     ExpectMatches(test_data.query, query_parser::MatchingAlgorithm::DEFAULT,
                   expected);
   }
 }

 TEST_P(TitledUrlIndexTest, GetResultsMatchingAlwaysPrefixSearch) {
   struct TestData {
     const std::string titles;
     const std::string query;
     const std::string expected;
   } data[] = {
       // Trivial test case of only one term, exact match.
       {"z;y", "Z", "z"},

       // Prefix match, one term.
       {"abcd;abc;b", "abc", "abcd;abc"},

       // Prefix match, multiple terms.
       {"abcd cdef;abcd;abcd cdefg", "abc cde", "abcd cdef;abcd cdefg"},

       // Exact and prefix match.
       {"ab cdef ghij;ab;cde;cdef;ghi;cdef ab;ghij ab", "ab cde ghi",
        "ab cdef ghij"},

       // Title with term multiple times.
       {"ab ab", "ab", "ab ab"},

       // Make sure quotes don't do a prefix match.
       {"think", "\"thi\"", ""},

       // Prefix matches against multiple candidates.
       {"abc1 abc2 abc3 abc4", "abc", "abc1 abc2 abc3 abc4"},

       // Prefix match on the first term.
       {"abc", "a", "abc"},

       // Prefix match on subsequent terms.
       {"abc def", "abc d", "abc def"},

       // Exact and prefix match.
       {"ab cdef;abcd;abcd cdefg", "ab cdef", "ab cdef;abcd cdefg"},
   };
   for (const TestData& test_data : data) {
     ResetNodes();

     for (const std::string& title :
          base::SplitString(test_data.titles, ";", base::TRIM_WHITESPACE,
                            base::SPLIT_WANT_ALL)) {
       AddNode(title, kAboutBlankURL);
     }

     std::vector<std::string> expected;
     if (!test_data.expected.empty()) {
       expected = base::SplitString(test_data.expected, ";",
                                    base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
     }

     ExpectMatches(test_data.query,
                   query_parser::MatchingAlgorithm::ALWAYS_PREFIX_SEARCH,
                   expected);
   }
 }

 // Analogous to GetResultsMatching, this test tests various permutations
 // of title, URL, and input to see if the title/URL matches the input as
 // expected.
 TEST_P(TitledUrlIndexTest, GetResultsMatchingWithURLs) {
   struct TestData {
     const std::string query;
     const std::string title;
     const std::string url;
     const bool should_be_retrieved;
   } data[] = {
       // Test single-word inputs.  Include both exact matches and prefix
       // matches.
       {"foo", "Foo", "http://www.bar.com/", true},
       {"foo", "Foodie", "http://www.bar.com/", true},
       {"foo", "Bar", "http://www.foo.com/", true},
       {"foo", "Bar", "http://www.foodie.com/", true},
       {"foo", "Foo", "http://www.foo.com/", true},
       {"foo", "Bar", "http://www.bar.com/", false},
       {"foo", "Bar", "http://www.bar.com/blah/foo/blah-again/ ", true},
       {"foo", "Bar", "http://www.bar.com/blah/foodie/blah-again/ ", true},
       {"foo", "Bar", "http://www.bar.com/blah-foo/blah-again/ ", true},
       {"foo", "Bar", "http://www.bar.com/blah-foodie/blah-again/ ", true},
       {"foo", "Bar", "http://www.bar.com/blahafoo/blah-again/ ", false},

       // Test multi-word inputs.
       {"foo bar", "Foo Bar", "http://baz.com/", true},
       {"foo bar", "Foodie Bar", "http://baz.com/", true},
       {"bar foo", "Foo Bar", "http://baz.com/", true},
       {"bar foo", "Foodie Barly", "http://baz.com/", true},
       {"foo bar", "Foo Baz", "http://baz.com/", false},
       {"foo bar", "Foo Baz", "http://bar.com/", true},
       {"foo bar", "Foo Baz", "http://barly.com/", true},
       {"foo bar", "Foodie Baz", "http://barly.com/", true},
       {"bar foo", "Foo Baz", "http://bar.com/", true},
       {"bar foo", "Foo Baz", "http://barly.com/", true},
       {"foo bar", "Baz Bar", "http://blah.com/foo", true},
       {"foo bar", "Baz Barly", "http://blah.com/foodie", true},
       {"foo bar", "Baz Bur", "http://blah.com/foo/bar", true},
       {"foo bar", "Baz Bur", "http://blah.com/food/barly", true},
       {"foo bar", "Baz Bur", "http://bar.com/blah/foo", true},
       {"foo bar", "Baz Bur", "http://barly.com/blah/food", true},
       {"foo bar", "Baz Bur", "http://bar.com/blah/flub", false},
       {"foo bar", "Baz Bur", "http://foo.com/blah/flub", false}};

   for (const TestData& test_data : data) {
     ResetNodes();
     AddNode(test_data.title, GURL(test_data.url));

     std::vector<std::string> expected;
     if (test_data.should_be_retrieved)
       expected.push_back(test_data.title);

     ExpectMatches(test_data.query, query_parser::MatchingAlgorithm::DEFAULT,
                   expected);
   }
 }

 TEST_P(TitledUrlIndexTest, GetResultsMatchingWithSymbols) {
   auto does_query_match_title = [&](std::string query, std::string title) {
     ResetNodes();
     AddNode(title, kAboutBlankURL);
     size_t num_matches = GetResultsMatching(query, 10).size();
     EXPECT_LE(num_matches, 1u);
     return num_matches > 0;
   };

   // Symbols should act as word breaks and don't need to match between the query
   // and title.
   EXPECT_TRUE(does_query_match_title("abc@xyz", "xyz@abc"));
   EXPECT_TRUE(does_query_match_title("abc&xyz", "xyz&abc"));
   EXPECT_TRUE(does_query_match_title("abc@xyz", "xyz abc"));
   EXPECT_TRUE(does_query_match_title("abc xyz", "xyz@abc"));

   // Treating symbols as word breaks doesn't mean simply pretending they're not
   // there.
   EXPECT_FALSE(does_query_match_title("xyz@abc", "xyzabc"));
   EXPECT_FALSE(does_query_match_title("xyzabc", "xyz@abc"));

   // '@' as the first character of the query should be treated special since it
   // indicates the user likely wants search scope.
   EXPECT_FALSE(does_query_match_title("@abc", "@abc"));
   // Other symbols shouldn't have that exception.
   EXPECT_TRUE(does_query_match_title("&abc", "@abc"));
   // '@' as the first character of the node shouldn't have that exception.
   EXPECT_TRUE(does_query_match_title("abc", "@abc"));
   // '@' in other locations in the query shouldn't have that exception.
   EXPECT_TRUE(does_query_match_title("abc @abc", "@abc"));
   // '@' followed by other symbols shouldn't have that exception.
   EXPECT_TRUE(does_query_match_title("@ abc", "@abc"));
   EXPECT_TRUE(does_query_match_title("@@abc", "@abc"));
   EXPECT_TRUE(does_query_match_title("@&abc", "@abc"));

   // '@' input or title shouldn't crash.
   EXPECT_FALSE(does_query_match_title("@", "@"));
 }

 TEST_P(TitledUrlIndexTest, Normalization) {
   struct TestData {
     const char* const title;
     const char* const query;
   } data[] = {{"fooa\xcc\x88-test", "foo\xc3\xa4-test"},
               {"fooa\xcc\x88-test", "fooa\xcc\x88-test"},
               {"fooa\xcc\x88-test", "foo\xc3\xa4"},
               {"fooa\xcc\x88-test", "fooa\xcc\x88"},
               {"fooa\xcc\x88-test", "foo"},
               {"foo\xc3\xa4-test", "foo\xc3\xa4-test"},
               {"foo\xc3\xa4-test", "fooa\xcc\x88-test"},
               {"foo\xc3\xa4-test", "foo\xc3\xa4"},
               {"foo\xc3\xa4-test", "fooa\xcc\x88"},
               {"foo\xc3\xa4-test", "foo"},
               {"foo", "foo"}};

   for (const TestData& test_data : data) {
     ResetNodes();
     AddNode(test_data.title, kAboutBlankURL);

     std::vector<TitledUrlMatch> matches =
         GetResultsMatching(test_data.query, 10);
     EXPECT_EQ(1u, matches.size());
   }
 }

 // Makes sure match positions are updated appropriately for title matches.
 TEST_P(TitledUrlIndexTest, MatchPositionsTitles) {
   struct TestData {
     const std::string title;
     const std::string query;
     const std::string expected_title_match_positions;
   } data[] = {
       // Trivial test case of only one term, exact match.
       {"a", "A", "0,1"},
       {"foo bar", "bar", "4,7"},
       {"fooey bark", "bar foo", "0,3:6,9"},
       // Non-trivial tests.
       {"foobar foo", "foobar foo", "0,6:7,10"},
       {"foobar foo", "foo foobar", "0,6:7,10"},
       {"foobar foobar", "foobar foo", "0,6:7,13"},
       {"foobar foobar", "foo foobar", "0,6:7,13"},
   };
   for (const TestData& test_data : data) {
     ResetNodes();
     AddNode(test_data.title, kAboutBlankURL);

     std::vector<TitledUrlMatch> matches =
         GetResultsMatching(test_data.query, 1000);
     ASSERT_EQ(1U, matches.size());

     TitledUrlMatch::MatchPositions expected_title_matches;
     ExtractMatchPositions(test_data.expected_title_match_positions,
                           &expected_title_matches);
     ExpectMatchPositions(matches[0].title_match_positions,
                          expected_title_matches);
   }
 }

 // Makes sure match positions are updated appropriately for URL matches.
 TEST_P(TitledUrlIndexTest, MatchPositionsURLs) {
   // The encoded stuff between /wiki/ and the # is 第二次世界大戦
   const std::string ja_wiki_url =
       "http://ja.wikipedia.org/wiki/%E7%AC%AC%E4"
       "%BA%8C%E6%AC%A1%E4%B8%96%E7%95%8C%E5%A4%A7%E6%88%A6#.E3.83.B4.E3.82.A7"
       ".E3.83.AB.E3.82.B5.E3.82.A4.E3.83.A6.E4.BD.93.E5.88.B6";
   struct TestData {
     const std::string query;
     const std::string url;
     const std::string expected_url_match_positions;
   } data[] = {
       {"foo", "http://www.foo.com/", "11,14"},
       {"foo", "http://www.foodie.com/", "11,14"},
       {"foo", "http://www.foofoo.com/", "11,14"},
       {"www", "http://www.foo.com/", "7,10"},
       {"foo", "http://www.foodie.com/blah/foo/fi", "11,14:27,30"},
       {"foo", "http://www.blah.com/blah/foo/fi", "25,28"},
       {"foo www", "http://www.foodie.com/blah/foo/fi", "7,10:11,14:27,30"},
       {"www foo", "http://www.foodie.com/blah/foo/fi", "7,10:11,14:27,30"},
       {"www bla", "http://www.foodie.com/blah/foo/fi", "7,10:22,25"},
       {"http", "http://www.foo.com/", "0,4"},
       {"http www", "http://www.foo.com/", "0,4:7,10"},
       {"http foo", "http://www.foo.com/", "0,4:11,14"},
       {"http foo", "http://www.bar.com/baz/foodie/hi", "0,4:23,26"},
       {"第二次", ja_wiki_url, "29,56"},
       {"ja 第二次", ja_wiki_url, "7,9:29,56"},
       {"第二次 E3.8", ja_wiki_url,
        "29,56:94,98:103,107:"
        "112,116:121,125:"
        "130,134:139,143"}};

   for (const TestData& test_data : data) {
     ResetNodes();
     AddNode("123456", GURL(test_data.url));

     std::vector<TitledUrlMatch> matches =
         GetResultsMatching(test_data.query, 1000);
     ASSERT_EQ(1U, matches.size()) << test_data.url << test_data.query;

     TitledUrlMatch::MatchPositions expected_url_matches;
     ExtractMatchPositions(test_data.expected_url_match_positions,
                           &expected_url_matches);
     ExpectMatchPositions(matches[0].url_match_positions, expected_url_matches);
   }
 }

 // Makes sure index is updated when a node is removed.
 TEST_P(TitledUrlIndexTest, Remove) {
   TitledUrlNode* n1 = AddNode("foo", GURL("http://foo")).first;
   TitledUrlNode* n2 = AddNode("bar", GURL("http://bar")).first;
   TitledUrlNode* n3 = AddNode("bar", GURL("http://bar/baz")).first;

   ASSERT_EQ(1U, GetResultsMatching("foo", 10).size());
   ASSERT_EQ(2U, GetResultsMatching("bar", 10).size());

   index()->Remove(n3);
   EXPECT_EQ(1U, GetResultsMatching("bar", 10).size());
   EXPECT_EQ(1U, GetResultsMatching("foo", 10).size());

   index()->Remove(n1);
   EXPECT_EQ(1U, GetResultsMatching("bar", 10).size());
   EXPECT_EQ(0U, GetResultsMatching("foo", 10).size());

   index()->Remove(n2);
   EXPECT_EQ(0U, GetResultsMatching("bar", 10).size());
 }

 // Makes sure index is updated when a node is removed.
 TEST_P(TitledUrlIndexTest, Remove_PathIndex) {
   auto* parent_dir = AddNode("foo", GURL("http://foo"), "folder").second;
   ASSERT_EQ(1U, GetResultsMatching("foo folder", 10).size());

   index()->RemovePath(parent_dir);
   ASSERT_EQ(0U, GetResultsMatching("foo folder", 10).size());
   ASSERT_EQ(1U, GetResultsMatching("foo", 10).size());
 }

 // Makes sure no more than max queries is returned.
 TEST_P(TitledUrlIndexTest, HonorMax) {
   AddNode("abcd", kAboutBlankURL);
   AddNode("abcde", kAboutBlankURL);

   EXPECT_EQ(1U, GetResultsMatching("ABc", 1).size());
 }

 // Makes sure if the lower case string of a bookmark title is more characters
 // than the upper case string no match positions are returned.
 TEST_P(TitledUrlIndexTest, EmptyMatchOnMultiwideLowercaseString) {
   TitledUrlNode* n1 = AddNode(u"\u0130 i", GURL("http://www.google.com")).first;

   std::vector<TitledUrlMatch> matches = GetResultsMatching("i", 100);
   ASSERT_EQ(1U, matches.size());
   EXPECT_EQ(n1, matches[0].node);
   EXPECT_TRUE(matches[0].title_match_positions.empty());
 }

 TEST_P(TitledUrlIndexTest, GetResultsSortedByTypedCount) {
   struct TestData {
     const GURL url;
     const char* title;
     const int typed_count;
   };
   auto data = std::to_array<TestData>({
       {GURL("http://www.google.com/"), "Google", 100},
       {GURL("http://maps.google.com/"), "Google Maps", 40},
       {GURL("http://docs.google.com/"), "Google Docs", 50},
       {GURL("http://reader.google.com/"), "Google Reader", 80},
   });

   std::map<GURL, int> typed_count_map;
   for (const TestData& test_data : data)
     typed_count_map.insert(
         std::make_pair(test_data.url, test_data.typed_count));

   BookmarkClientMock bookmark_client(typed_count_map);
   index()->SetNodeSorter(std::make_unique<TypedCountSorter>(&bookmark_client));

   for (const TestData& test_data : data)
     AddNode(test_data.title, GURL(test_data.url));

   // Populate match nodes.
   std::vector<TitledUrlMatch> matches = GetResultsMatching("google", 4);

   // The resulting order should be:
   // 1. Google (google.com) 100
   // 2. Google Reader (google.com/reader) 80
   // 3. Google Docs (docs.google.com) 50
   // 4. Google Maps (maps.google.com) 40
   ASSERT_EQ(4U, matches.size());
   EXPECT_EQ(data[0].url, matches[0].node->GetTitledUrlNodeUrl());
   EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());
   EXPECT_EQ(data[2].url, matches[2].node->GetTitledUrlNodeUrl());
   EXPECT_EQ(data[1].url, matches[3].node->GetTitledUrlNodeUrl());

   // Select top two matches.
   matches = GetResultsMatching("google", 2);

   ASSERT_EQ(2U, matches.size());
   EXPECT_EQ(data[0].url, matches[0].node->GetTitledUrlNodeUrl());
   EXPECT_EQ(data[3].url, matches[1].node->GetTitledUrlNodeUrl());

   index()->SetNodeSorter(nullptr);
 }

 TEST_P(TitledUrlIndexTest, MatchTitledUrlNodeWithQuery) {
   // When the query matches the node, should return non `nullopt`.
   EXPECT_TRUE(index()->MatchTitledUrlNodeWithQuery(u"matching", u"match"));
   // When the query approximately matches the node, should return `nullopt`.
   EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"mmmatch", u"match"));
   // WHen the query doesn't match the node, should return `nullopt`.
   EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"natch", u"match"));
 }

 TEST_P(TitledUrlIndexTest, MatchTitledUrlNodeWithQuery_ApproximateNodeMatch) {
   // When the query matches the node, should return non `nullopt`.
   EXPECT_TRUE(index()->MatchTitledUrlNodeWithQuery(u"matching", u"match"));
   // When the query approximately matches the node, should return `nullopt`.
   EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"mmmatch", u"match"));
   // WHen the query doesn't match the node, should return `nullopt`.
   EXPECT_FALSE(index()->MatchTitledUrlNodeWithQuery(u"natch", u"match"));
 }

 TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAllTerms) {
   TitledUrlNode* node =
       AddNode("term1 term2 other xyz ab", GURL("http://foo.com")).first;

   struct TestData {
     const std::string query;
     const bool should_be_retrieved;
   } data[] = {// Should return matches if all input terms match, even if not all
               // node terms match.
               {"term other", true},
               // Should not match midword.
               {"term ther", false},
               // Short input terms should only return exact matches.
               {"xy", false},
               {"ab", true}};

   for (const TestData& test_data : data) {
     SCOPED_TRACE("Query: " + test_data.query);
     std::vector<std::u16string> terms =
         base::SplitString(base::UTF8ToUTF16(test_data.query), u" ",
                           base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
     auto matches = index()->RetrieveNodesMatchingAllTerms(
         terms, query_parser::MatchingAlgorithm::DEFAULT);
     if (test_data.should_be_retrieved) {
       EXPECT_EQ(matches.size(), 1u);
       EXPECT_TRUE(matches.contains(node));
     } else
       EXPECT_TRUE(matches.empty());
   };
 }

 TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAnyTerms_PathMatch) {
   ResetNodes();
   AddNode("term1 term2 other xyz ab", GURL("http://foo.com"));
   AddNode("no_match", GURL("http://no_match.com"), "path commo");
   AddNode("common1", GURL("http://foo.com"));
   AddNode("common2", GURL("http://foo.com"));
   AddNode("common3", GURL("http://foo.com"));
   AddNode("common4", GURL("http://foo.com"));

   // Should return match even if an input term does not match, as long as it's
   // in the path index.
   VerifyRetrieveNodesMatchingAnyTerms("term path", {0});
   // Should not return matches if any input term is neither matching nor in the
   // path index.
   VerifyRetrieveNodesMatchingAnyTerms("term not", {});

   // If any input term is not in the path index, should do full matching (i.e.
   // all input terms need to be non-path matched).
   VerifyRetrieveNodesMatchingAnyTerms("term common", {});
   // If all input terms are in the path index, only 1 input term needs to
   // non-path match.
   VerifyRetrieveNodesMatchingAnyTerms("comm path", {2, 3, 4});

   // Should not return duplicate matches.
   VerifyRetrieveNodesMatchingAnyTerms("term term1 term2", {0});

   // Should not early exit when there are no intermediate matches.
   VerifyRetrieveNodesMatchingAnyTerms("path comm", {2, 3, 4});

   // Should not match midword ('ther' in 'other').
   VerifyRetrieveNodesMatchingAnyTerms("ther ther", {});

   // Short input terms should only return exact matches.
   VerifyRetrieveNodesMatchingAnyTerms("xy xy", {});
   VerifyRetrieveNodesMatchingAnyTerms("ab ab", {0});

   // Allows complete title/URL matches to exceed `max_nodes` (3 in tests).
   VerifyRetrieveNodesMatchingAnyTerms("commo commo", {2, 3, 4, 5});
 }

 TEST_P(TitledUrlIndexTest, RetrieveNodesMatchingAnyTerms_PathIndex) {
   AddNode("term1 term2 other xyz ab", GURL("http://foo.com"), "parent");
   AddNode("term1 term3", GURL("http://foo.com"), "parent2");

   // When short-circuiting to matching all terms, should not intersect with path
   // matching terms, only non-path matching terms.

   // Should intersect 'term1' matches only, returning both nodes, even though
   // the 1st node doesn't match 'parent2'.
   VerifyRetrieveNodesMatchingAnyTerms("term1 parent2", {0, 1});
   // Should intersect 'term1' and 'term2' matches returning the 1st node,
   // even though it doesn't match 'parent2'.
   VerifyRetrieveNodesMatchingAnyTerms("term1 term2 parent2", {0});
   // Should intersect 'term2' and 'term3' matches returning 0 nodes.
   VerifyRetrieveNodesMatchingAnyTerms("term2 term3 parent", {});
 }

 TEST_P(TitledUrlIndexTest, GetResultsMatchingAncestors) {
   TitledUrlNode* node =
       AddNode("leaf pare", GURL("http://foo.com"), "parent").first;

   struct TestData {
     const std::string query;
     const bool should_be_retrieved;
     const bool should_have_ancestor_match;
   } data[] = {
       // Should allow ancestor matches when `match_ancestor_titles` is true.
       {"leaf parent", true, true},
       // Should not early exit when there are no accumulated non-ancestor
       // matches.
       {"parent leaf", true, true},
       // Should still require at least 1 non-ancestor match when
       // `match_ancestor_titles` is true.
       {"parent parent", false, false},
       // Should set `has_ancestor_match` to true even if a term matched both an
       // ancestor and title/URL.
       {"pare", true, true},
       // Short inputs should only match exact title or ancestor terms.
       {"pa pa", false, false},
       // Should not return matches if a term matches neither the title nor
       // ancestor.
       {"leaf not parent", false, false},
   };

   for (const TestData& test_data : data) {
     SCOPED_TRACE("Query: " + test_data.query);
     auto matches = GetResultsMatching(test_data.query, 10);

     // Verify whether the match.
     if (test_data.should_be_retrieved) {
       EXPECT_EQ(matches.size(), 1u);
       EXPECT_EQ(matches[0].node, node);
       EXPECT_EQ(matches[0].has_ancestor_match,
                 test_data.should_have_ancestor_match);
     } else
       EXPECT_TRUE(matches.empty());
   };
 }

 INSTANTIATE_TEST_SUITE_P(ContainerType,
                          TitledUrlIndexTest,
                          testing::Bool(),
                          [](const testing::TestParamInfo<bool>& info) {
                            return info.param ? "BinaryTree" : "FlatTree";
                          });

 }  // namespace
 }  // namespace bookmarks