| // Copyright 2014 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
| #define COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |
| |
| #include <stddef.h> |
| |
| #include <map> |
| #include <memory> |
| #include <optional> |
| #include <set> |
| #include <string> |
| #include <string_view> |
| #include <variant> |
| #include <vector> |
| |
| #include "base/containers/flat_set.h" |
| #include "base/memory/raw_ptr.h" |
| #include "components/bookmarks/browser/titled_url_node_sorter.h" |
| #include "components/query_parser/query_parser.h" |
| |
| namespace bookmarks { |
| |
| class TitledUrlNode; |
| |
| struct TitledUrlMatch; |
| |
| // TitledUrlIndex maintains an index of paired titles and URLs for quick lookup. |
| // |
| // TitledUrlIndex maintains the index (index_) as a map of sets. The map (type |
| // Index) maps from a lower case string to the set (type TitledUrlNodeSet) of |
| // TitledUrlNodes that contain that string in their title or URL. |
| class TitledUrlIndex { |
| public: |
| TitledUrlIndex(); |
| |
| TitledUrlIndex(const TitledUrlIndex&) = delete; |
| TitledUrlIndex& operator=(const TitledUrlIndex&) = delete; |
| |
| ~TitledUrlIndex(); |
| |
| // Sets a node sorter as provided by `sorter`, which is used to construct a |
| // sorted list of matches when matches are returned from the index. If null |
| // (which is also the default before this function is called), matches are |
| // returned unsorted. |
| void SetNodeSorter(std::unique_ptr<TitledUrlNodeSorter> sorter); |
| |
| // Invoked when a title/URL pair has been added to the model. |
| void Add(const TitledUrlNode* node); |
| |
| // Invoked when a title/URL pair has been removed from the model. |
| void Remove(const TitledUrlNode* node); |
| |
| // Invoked when a folder has been added to the model. |
| void AddPath(const TitledUrlNode* node); |
| |
| // Invoked when a folder has been removed from the model. |
| void RemovePath(const TitledUrlNode* node); |
| |
| // Returns up to `max_count` of matches containing each term from the text |
| // `query` in either the title, URL, or the titles of ancestor nodes. |
| // `matching_algorithm` determines the algorithm used by QueryParser |
| // internally to parse `query`. |
| std::vector<TitledUrlMatch> GetResultsMatching( |
| const std::u16string& query, |
| size_t max_count, |
| query_parser::MatchingAlgorithm matching_algorithm); |
| |
| // Returns a normalized version of the UTF16 string `text`. If it fails to |
| // normalize the string, returns `text` itself as a best-effort. |
| static std::u16string Normalize(std::u16string_view text); |
| |
| // The type of container used internally by the index, determined via feature |
| // flags. |
| enum class NodeSetType { |
| kFlat, |
| kBinaryTree, |
| }; |
| |
| private: |
| friend class TitledUrlIndexFake; |
| |
| using NodeVector = std::vector<raw_ptr<const TitledUrlNode, CtnExperimental>>; |
| using FlatNodeSet = |
| base::flat_set<raw_ptr<const TitledUrlNode, CtnExperimental>>; |
| |
| class NodeSet { |
| public: |
| explicit NodeSet(NodeSetType type); |
| NodeSet(const NodeSet&) = delete; |
| NodeSet& operator=(const NodeSet&) = delete; |
| ~NodeSet(); |
| |
| bool empty() const; |
| void InsertSingleNode(const TitledUrlNode* node); |
| void EraseSingleNode(const TitledUrlNode* node); |
| |
| NodeVector DeepCopyToVector() const; |
| void AppendNodesToVector(NodeVector& output) const; |
| |
| private: |
| using BinaryTreeNodeSet = |
| std::set<raw_ptr<const TitledUrlNode, CtnExperimental>>; |
| |
| std::variant<FlatNodeSet, BinaryTreeNodeSet> variant_; |
| }; |
| |
| using Index = std::map<std::u16string, NodeSet>; |
| |
| // Constructs `sorted_nodes` by copying the matches in `matches` and sorting |
| // them. |
| void SortMatches(const FlatNodeSet& matches, NodeVector* sorted_nodes) const; |
| |
| // For each node, calls `MatchTitledUrlNodeWithQuery()` and returns the |
| // aggregated `TitledUrlMatch`s. |
| std::vector<TitledUrlMatch> MatchTitledUrlNodesWithQuery( |
| const NodeVector& nodes, |
| const query_parser::QueryNodeVector& query_nodes, |
| const std::vector<std::u16string>& query_terms, |
| size_t max_count); |
| |
| // Finds `query_nodes` matches in `node` and returns a TitledUrlMatch |
| // containing `node` and the matches. |
| std::optional<TitledUrlMatch> MatchTitledUrlNodeWithQuery( |
| const TitledUrlNode* node, |
| const query_parser::QueryNodeVector& query_nodes, |
| const std::vector<std::u16string>& query_terms); |
| |
| // Return matches for the specified `terms`. This is an intersection of each |
| // term's matches. |
| FlatNodeSet RetrieveNodesMatchingAllTerms( |
| const std::vector<std::u16string>& terms, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Return matches for the specified `terms`. This is approximately a union of |
| // each term's match, with some limitations to avoid too many nodes being |
| // returned: terms shorter than `term_min_length` or matching more than |
| // `max_nodes_per_term` nodes won't have their nodes accumulated by union; and |
| // accumulation is capped to `max_nodes`. Guaranteed to include any node |
| // `RetrieveNodesMatchingAllTerms()` includes. |
| FlatNodeSet RetrieveNodesMatchingAnyTerms( |
| const std::vector<std::u16string>& terms, |
| query_parser::MatchingAlgorithm matching_algorithm, |
| size_t max_nodes) const; |
| |
| // Return matches for the specified `term`. May return duplicates. |
| NodeVector RetrieveNodesMatchingTerm( |
| const std::u16string& term, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Return true if `term` matches any path. in `path_index_`. |
| bool DoesTermMatchPath( |
| const std::u16string& term, |
| query_parser::MatchingAlgorithm matching_algorithm) const; |
| |
| // Returns the set of query words from `query`. |
| static std::vector<std::u16string> ExtractQueryWords( |
| const std::u16string& query); |
| |
| // Return the index terms for `node`. |
| static std::vector<std::u16string> ExtractIndexTerms( |
| const TitledUrlNode* node); |
| |
| // Adds `node` to `index_`. |
| void RegisterNode(const std::u16string& term, const TitledUrlNode* node); |
| |
| // Removes `node` from `index_`. |
| void UnregisterNode(const std::u16string& term, const TitledUrlNode* node); |
| |
| // A cached result of evaluating feature flags, to avoid repeated evaluation |
| // via base::FeatureList. |
| const NodeSetType indexed_node_set_type_; |
| |
| // A map of terms and the nodes containing those terms in their titles or |
| // URLs. E.g., given 2 bookmarks titled 'x y x' and 'x z', `index` would |
| // contain: `{ x: set[node1, node2], y: set[node1], z: set[node2] }`. |
| Index index_; |
| // A map of terms and the number of times it occurs in paths. E.g., given |
| // 2 paths 'bookmarks bar/x y x/x' and 'bookmarks bar/x z/x', `path_index_` |
| // would contain `{ bookmarks: 2, bar: 2, x: 4, y: 1, z: 1 }`. Note, 'x' has |
| // count 4, since it occurred twice in each path. Doesn't track actual |
| // bookmark nodes, as the latter would need large updates when moving, |
| // folders. Tracks counts so terms can be unindexed when the last containing |
| // folder is renamed or deleted. Updated on folder rename, creation, and |
| // deletion; not updated on bookmark or folder move. Used to short circuit |
| // unioning per-term matches when matching paths, as intersecting results in |
| // much fewer nodes. |
| std::map<std::u16string, size_t> path_index_; |
| |
| std::unique_ptr<TitledUrlNodeSorter> sorter_; |
| }; |
| |
| } // namespace bookmarks |
| |
| #endif // COMPONENTS_BOOKMARKS_BROWSER_TITLED_URL_INDEX_H_ |