| // Copyright 2021 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef COMPONENTS_HISTORY_CLUSTERS_CORE_HISTORY_CLUSTERS_SERVICE_H_ |
| #define COMPONENTS_HISTORY_CLUSTERS_CORE_HISTORY_CLUSTERS_SERVICE_H_ |
| |
| #include <map> |
| #include <memory> |
| #include <string> |
| #include <vector> |
| |
| #include "base/functional/callback.h" |
| #include "base/memory/scoped_refptr.h" |
| #include "base/memory/weak_ptr.h" |
| #include "base/observer_list.h" |
| #include "base/scoped_observation.h" |
| #include "base/supports_user_data.h" |
| #include "base/task/cancelable_task_tracker.h" |
| #include "base/time/time.h" |
| #include "base/timer/elapsed_timer.h" |
| #include "base/timer/timer.h" |
| #include "components/history/core/browser/history_service.h" |
| #include "components/history/core/browser/history_service_observer.h" |
| #include "components/history/core/browser/history_types.h" |
| #include "components/history_clusters/core/context_clusterer_history_service_observer.h" |
| #include "components/history_clusters/core/history_clusters_types.h" |
| #include "components/keyed_service/core/keyed_service.h" |
| #include "services/network/public/cpp/shared_url_loader_factory.h" |
| |
| class PrefService; |
| class TemplateURLService; |
| |
| namespace optimization_guide { |
| class EntityMetadataProvider; |
| class NewOptimizationGuideDecider; |
| } // namespace optimization_guide |
| |
| namespace site_engagement { |
| class SiteEngagementScoreProvider; |
| } // namespace site_engagement |
| |
| namespace history_clusters { |
| |
| class ClusteringBackend; |
| class HistoryClustersService; |
| class HistoryClustersServiceTask; |
| |
| // This Service provides an API to the History Clusters for UI entry points. |
| class HistoryClustersService : public base::SupportsUserData, |
| public KeyedService, |
| public history::HistoryServiceObserver { |
| public: |
| class Observer : public base::CheckedObserver { |
| public: |
| virtual void OnDebugMessage(const std::string& message) = 0; |
| }; |
| |
| // Use std::unordered_map here because we have ~1000 elements at the 99th |
| // percentile, and we do synchronous lookups as the user types in the omnibox. |
| using KeywordMap = |
| std::unordered_map<std::u16string, history::ClusterKeywordData>; |
| |
| // `url_loader_factory` is allowed to be nullptr, like in unit tests. |
| HistoryClustersService( |
| const std::string& application_locale, |
| history::HistoryService* history_service, |
| optimization_guide::EntityMetadataProvider* entity_metadata_provider, |
| scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory, |
| site_engagement::SiteEngagementScoreProvider* engagement_score_provider, |
| TemplateURLService* template_url_service, |
| optimization_guide::NewOptimizationGuideDecider* |
| optimization_guide_decider, |
| PrefService* pref_service); |
| HistoryClustersService(const HistoryClustersService&) = delete; |
| HistoryClustersService& operator=(const HistoryClustersService&) = delete; |
| ~HistoryClustersService() override; |
| |
| // Gets a weak pointer to this service. Used when UIs want to create a query |
| // state object whose lifetime might exceed the service. |
| base::WeakPtr<HistoryClustersService> GetWeakPtr(); |
| |
| // KeyedService: |
| void Shutdown() override; |
| |
| // Returns true if the Journeys feature is enabled for the current application |
| // locale. This is a cached wrapper of `IsJourneysEnabled()` within features.h |
| // that's already evaluated against the g_browser_process application locale. |
| bool IsJourneysEnabled() const; |
| |
| // Returns true if the Journeys use of Images is enabled. |
| static bool IsJourneysImagesEnabled(); |
| |
| // Used to add and remove observers. |
| void AddObserver(Observer* obs); |
| void RemoveObserver(Observer* obs); |
| |
| // Returns whether observers are registered to notify the debug messages. |
| bool ShouldNotifyDebugMessage() const; |
| |
| // Notifies the observers of a debug message being available. |
| void NotifyDebugMessage(const std::string& message) const; |
| |
| // TODO(manukh) `HistoryClustersService` should be responsible for |
| // constructing the |
| // `AnnotatedVisit`s rather than exposing these methods which are used by |
| // `HistoryClustersTabHelper` to construct the visits. |
| // Gets an `IncompleteVisitContextAnnotations` after DCHECKing it exists; this |
| // saves the call sites the effort. |
| IncompleteVisitContextAnnotations& GetIncompleteVisitContextAnnotations( |
| int64_t nav_id); |
| // Gets or creates an `IncompleteVisitContextAnnotations`. |
| IncompleteVisitContextAnnotations& |
| GetOrCreateIncompleteVisitContextAnnotations(int64_t nav_id); |
| // Returns whether an `IncompleteVisitContextAnnotations` exists. |
| // TODO(manukh): Merge `HasIncompleteVisitContextAnnotations()` and |
| // `GetIncompleteVisitContextAnnotations()`. |
| bool HasIncompleteVisitContextAnnotations(int64_t nav_id); |
| // Completes the `IncompleteVisitContextAnnotations` if the expected metrics |
| // have been recorded. References retrieved prior will no longer be valid. |
| void CompleteVisitContextAnnotationsIfReady(int64_t nav_id); |
| |
| // Returns the freshest clusters created from the user visit history based on |
| // `query`, `filter_params`, `begin_time`, and `continuation_params`. |
| // - `filter_params` represents how the caller wants the clusters to be |
| // filtered. |
| // - `begin_time` is an inclusive lower bound. In the general case where the |
| // caller wants to traverse to the start of history, `base::Time()` should |
| // be used. |
| // - `continuation_params` represents where the previous request left off. It |
| // should be set to the default initialized |
| // `QueryClustersContinuationParams` |
| // if the caller wants the newest visits. |
| // - `recluster`, if true, forces reclustering as if |
| // `persist_clusters_in_history_db` were false. |
| // Virtual for testing. |
| virtual std::unique_ptr<HistoryClustersServiceTask> QueryClusters( |
| ClusteringRequestSource clustering_request_source, |
| QueryClustersFilterParams filter_params, |
| base::Time begin_time, |
| QueryClustersContinuationParams continuation_params, |
| bool recluster, |
| QueryClustersCallback callback); |
| |
| // Entrypoint to the `HistoryClustersServiceTaskUpdateClusters`. Updates the |
| // persisted clusters in the history DB and invokes `callback` when done. |
| void UpdateClusters(); |
| |
| // Returns matched keyword data from cache synchronously if `query` matches a |
| // cluster keyword. This ignores clusters with only one visit to avoid |
| // overtriggering. Note: This depends on the cache state, so this may kick off |
| // a cache refresh request while immediately returning null data. It's |
| // expected that on the next keystroke, the cache may be ready and return the |
| // matched keyword data then. |
| absl::optional<history::ClusterKeywordData> DoesQueryMatchAnyCluster( |
| const std::string& query); |
| |
| // Prints the keyword bag state to the log messages. For example, a button on |
| // chrome://history-clusters-internals triggers this. |
| void PrintKeywordBagStateToLogMessage() const; |
| |
| // history::HistoryServiceObserver: |
| void OnURLVisited(history::HistoryService* history_service, |
| const history::URLRow& url_row, |
| const history::VisitRow& visit_row) override; |
| void OnURLsDeleted(history::HistoryService* history_service, |
| const history::DeletionInfo& deletion_info) override; |
| |
| private: |
| friend class HistoryClustersServiceTestApi; |
| friend class HistoryClustersServiceTestBase; |
| |
| // Invokes `UpdateClusters()` after a short delay, then again periodically. |
| // E.g., might invoke `UpdateClusters()` initially 5 minutes after startup, |
| // then every 1 hour afterwards. |
| void RepeatedlyUpdateClusters(); |
| |
| // Starts a keyword cache refresh, if necessary. |
| // TODO(manukh): `StartKeywordCacheRefresh()` and |
| // `PopulateClusterKeywordCache()` should be encapsulated into their own task |
| // to avoid cluttering `HistoryClusterService` with their callbacks. Similar |
| // to the `HistoryClustersServiceTaskGetMostRecentClusters` and |
| // `HistoryClustersServiceTaskUpdateClusters` tasks. |
| void StartKeywordCacheRefresh(); |
| |
| // This is a callback used for the `QueryClusters()` call from |
| // `DoesQueryMatchAnyCluster()`. Accumulates the keywords in `result` within |
| // `keyword_accumulator`. If History is not yet exhausted, will request |
| // another batch of clusters. Otherwise, will update the keyword cache. |
| void PopulateClusterKeywordCache( |
| base::ElapsedTimer total_latency_timer, |
| base::Time begin_time, |
| std::unique_ptr<KeywordMap> keyword_accumulator, |
| KeywordMap* cache, |
| std::vector<history::Cluster> clusters, |
| QueryClustersContinuationParams continuation_params); |
| |
| // Clears `all_keywords_cache_` and cancels any pending tasks to populate it. |
| void ClearKeywordCache(); |
| |
| // Reads the "all keywords" and short keyword caches from prefs and |
| // deserializes them. |
| void LoadCachesFromPrefs(); |
| // Serializes and writes the short keyword cache to prefs. |
| void WriteShortCacheToPrefs(); |
| // Serializes and writes the "all keywords" cache to prefs. |
| void WriteAllCacheToPrefs(); |
| |
| // Whether keyword caches should persisted via the pref service. |
| const bool persist_caches_to_prefs_; |
| |
| // True if Journeys is enabled based on field trial and locale checks. |
| const bool is_journeys_enabled_; |
| |
| // Non-owning pointer, but never nullptr. |
| history::HistoryService* const history_service_; |
| |
| // `VisitContextAnnotations`s are constructed stepwise; they're initially |
| // placed in `incomplete_visit_context_annotations_` and saved to the history |
| // database once completed (if persistence is enabled). |
| IncompleteVisitMap incomplete_visit_context_annotations_; |
| |
| // The backend used for clustering. Never nullptr. |
| std::unique_ptr<ClusteringBackend> backend_; |
| |
| // In-memory cache of keywords match clusters, so we can query this |
| // synchronously as the user types in the omnibox. Also save the timestamp |
| // the cache was generated so we can periodically re-generate. |
| // TODO(tommycli): Make a smarter mechanism for regenerating the cache. |
| KeywordMap all_keywords_cache_; |
| base::Time all_keywords_cache_timestamp_; |
| |
| // Like above, but will represent the clusters newer than |
| // `all_keywords_cache_timestamp_` I.e., this will contain up to 2 hours of |
| // clusters. This can be up to 10 seconds stale. We use a separate cache that |
| // can repeatedly be cleared and recreated instead of incrementally adding |
| // keywords to `all_keywords_cache_` because doing the latter might: |
| // 1) Give a different set of keywords since cluster keywords aren't |
| // necessarily a union of the individual visits' keywords. |
| // 2) Exclude keywords since keywords of size-1 clusters are not cached. |
| // TODO(manukh) This is a "band aid" fix to missing keywords for recent |
| // visits. |
| KeywordMap short_keyword_cache_; |
| base::Time short_keyword_cache_timestamp_; |
| |
| // Tracks the current keyword task. Will be `nullptr` or |
| // `cache_keyword_query_task_.Done()` will be true if there is no ongoing |
| // task. |
| std::unique_ptr<HistoryClustersServiceTask> cache_keyword_query_task_; |
| |
| // Tracks the current update task. Will be `nullptr` or |
| // `update_clusters_task_.Done()` will be true if there is no ongoing task. |
| std::unique_ptr<HistoryClustersServiceTask> update_clusters_task_; |
| |
| // Used to invoke `UpdateClusters()` on startup after a short delay. See |
| // `RepeatedlyUpdateClusters()`'s comment. |
| base::OneShotTimer update_clusters_after_startup_delay_timer_; |
| |
| // Used to invoke `UpdateClusters()` periodically. See |
| // `RepeatedlyUpdateClusters()`'s comment. |
| base::RepeatingTimer update_clusters_period_timer_; |
| |
| // The time of the last `UpdateClusters()` call. Used for logging and to limit |
| // requests when `persist_on_query` is enabled. |
| base::ElapsedTimer update_clusters_timer_; |
| |
| // Whether a synced visit was received since the last `UpdateClusters()` call. |
| // Used to determine whether the full set of persisted clusters needs to be |
| // iterated through when updating cluster triggerability. Always set this to |
| // true at the beginning of the session, so anything that happened at browser |
| // close gets picked up. |
| bool received_synced_visit_since_last_update_ = true; |
| |
| // A list of observers for this service. |
| base::ObserverList<Observer> observers_; |
| |
| // Tracks the observed history service, for cleanup. |
| base::ScopedObservation<history::HistoryService, |
| history::HistoryServiceObserver> |
| history_service_observation_{this}; |
| |
| std::unique_ptr<ContextClustererHistoryServiceObserver> |
| context_clusterer_observer_; |
| |
| // Used to store keyword caches across restarts. |
| raw_ptr<PrefService> pref_service_ = nullptr; |
| |
| // Weak pointers issued from this factory never get invalidated before the |
| // service is destroyed. |
| base::WeakPtrFactory<HistoryClustersService> weak_ptr_factory_{this}; |
| }; |
| |
| } // namespace history_clusters |
| |
| #endif // COMPONENTS_HISTORY_CLUSTERS_CORE_HISTORY_CLUSTERS_SERVICE_H_ |