|  | // Copyright 2020 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #ifndef CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_ | 
|  | #define CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_ | 
|  |  | 
|  | #include "base/gtest_prod_util.h" | 
|  | #include "base/scoped_observation.h" | 
|  | #include "base/task/cancelable_task_tracker.h" | 
|  | #include "base/timer/timer.h" | 
|  | #include "chrome/browser/federated_learning/floc_id_provider.h" | 
|  | #include "chrome/browser/privacy_sandbox/privacy_sandbox_settings.h" | 
|  | #include "components/federated_learning/floc_sorting_lsh_clusters_service.h" | 
|  | #include "components/history/core/browser/history_service.h" | 
|  | #include "components/history/core/browser/history_service_observer.h" | 
|  |  | 
|  | namespace federated_learning { | 
|  |  | 
|  | class FlocEventLogger; | 
|  |  | 
|  | // A service that regularly computes the floc id and logs it in a user event. | 
|  | // | 
|  | // For the first browser session of a profile, we'll start computing the floc | 
|  | // after the sorting-lsh file is loaded, and another computation will be | 
|  | // scheduled every X days. When the browser shuts down and starts up again, it | 
|  | // can remember the last state and can still schedule the computation at X days | 
|  | // after the last compute time. If we've missed a scheduled update due to the | 
|  | // browser not being alive, it'll compute after the next session starts, using | 
|  | // the sorting-lsh-file-loaded as the first compute triggering condition. | 
|  | // | 
|  | // The floc will be computed by: | 
|  | // Step 1: sim-hashing navigation URL domains in the last 7 days. This step aims | 
|  | // to group together users with similar browsing habit. | 
|  | // Step 2: applying the sorting-lsh post processing to the sim-hash value. The | 
|  | // sorting-lsh technique groups similar sim-hash values together to ensure the | 
|  | // smallest group size / K-anonymity. The mappings / group-size is computed | 
|  | // server side in chrome-sync, based on logged sim-hash data, and is pushed to | 
|  | // Chrome on a regular basis through the component updater. | 
|  | // | 
|  | // A computed floc will be valid if: | 
|  | // - 3rd party cookies are NOT blocked. | 
|  | // - There are at least 3 *eligible* history entries in the last 7 days, where | 
|  | // eligible means the IP was publicly routable. | 
|  | // - It's not blocked by the sorting-lsh (with encoded blocklist) file. | 
|  | // | 
|  | // If some of those conditions are not met, an invalid floc will be given. | 
|  | // | 
|  | // In the event of history deletion, the floc will be invalidated immediately if | 
|  | // the time range of the deletion overlaps with the time range used to compute | 
|  | // the existing floc. In the event of cookie deletion, the floc will always be | 
|  | // invalidated. Note that we only invalidate the floc rather than recomputing, | 
|  | // because we don't want the floc to change more frequently than the scheduled | 
|  | // update rate (% rare cases such as when the finch version param has changed | 
|  | // indicating a new algorithm / experiment, a recompute will be needed). | 
|  | class FlocIdProviderImpl : public FlocIdProvider, | 
|  | public FlocSortingLshClustersService::Observer, | 
|  | public PrivacySandboxSettings::Observer, | 
|  | public history::HistoryServiceObserver { | 
|  | public: | 
|  | struct ComputeFlocResult { | 
|  | ComputeFlocResult() = default; | 
|  |  | 
|  | ComputeFlocResult(uint64_t sim_hash, const FlocId& floc_id) | 
|  | : sim_hash_computed(true), sim_hash(sim_hash), floc_id(floc_id) {} | 
|  |  | 
|  | bool sim_hash_computed = false; | 
|  |  | 
|  | // Sim-hash of the browsing history. This is the baseline value where the | 
|  | // |floc_id| field should be derived from. We'll log this field for the | 
|  | // server to calculate the sorting-lsh cutting points. | 
|  | uint64_t sim_hash = 0; | 
|  |  | 
|  | // The floc to be exposed to JS API. It's derived from applying the | 
|  | // sorting-lsh & blocklist post-processing on the |sim_hash|. | 
|  | FlocId floc_id; | 
|  | }; | 
|  |  | 
|  | using CanComputeFlocCallback = base::OnceCallback<void(bool)>; | 
|  | using ComputeFlocCompletedCallback = | 
|  | base::OnceCallback<void(ComputeFlocResult)>; | 
|  | using GetRecentlyVisitedURLsCallback = | 
|  | history::HistoryService::QueryHistoryCallback; | 
|  |  | 
|  | FlocIdProviderImpl(PrefService* prefs, | 
|  | PrivacySandboxSettings* privacy_sandbox_settings, | 
|  | history::HistoryService* history_service, | 
|  | std::unique_ptr<FlocEventLogger> floc_event_logger); | 
|  | ~FlocIdProviderImpl() override; | 
|  | FlocIdProviderImpl(const FlocIdProviderImpl&) = delete; | 
|  | FlocIdProviderImpl& operator=(const FlocIdProviderImpl&) = delete; | 
|  |  | 
|  | blink::mojom::InterestCohortPtr GetInterestCohortForJsApi( | 
|  | const GURL& url, | 
|  | const base::Optional<url::Origin>& top_frame_origin) const override; | 
|  |  | 
|  | void MaybeRecordFlocToUkm(ukm::SourceId source_id) override; | 
|  |  | 
|  | protected: | 
|  | // protected virtual for testing. | 
|  | virtual void OnComputeFlocCompleted(ComputeFlocResult result); | 
|  | virtual void LogFlocComputedEvent(const ComputeFlocResult& result); | 
|  |  | 
|  | private: | 
|  | friend class FlocIdProviderUnitTest; | 
|  | friend class FlocIdProviderBrowserTest; | 
|  |  | 
|  | // KeyedService: | 
|  | void Shutdown() override; | 
|  |  | 
|  | // PrivacySandboxSettings::Observer | 
|  |  | 
|  | // When the floc-accessible-since time is updated (due to e.g. cookies | 
|  | // deletion), we'll either invalidate or keep using the floc. This will | 
|  | // depend on the updated time and the begin time of the history used to | 
|  | // compute the current floc. | 
|  | void OnFlocDataAccessibleSinceUpdated() override; | 
|  |  | 
|  | // On history deletion, we'll either invalidate or keep using the floc. This | 
|  | // will depend on the deletion type and the time range. | 
|  | void OnURLsDeleted(history::HistoryService* history_service, | 
|  | const history::DeletionInfo& deletion_info) override; | 
|  |  | 
|  | // FlocSortingLshClustersService::Observer | 
|  | void OnSortingLshClustersFileReady() override; | 
|  |  | 
|  | void ComputeFloc(); | 
|  |  | 
|  | void CheckCanComputeFloc(CanComputeFlocCallback callback); | 
|  | void OnCheckCanComputeFlocCompleted(ComputeFlocCompletedCallback callback, | 
|  | bool can_compute_floc); | 
|  |  | 
|  | bool IsSyncHistoryEnabled() const; | 
|  | bool IsPrivacySandboxAllowed() const; | 
|  |  | 
|  | void IsSwaaNacAccountEnabled(CanComputeFlocCallback callback); | 
|  |  | 
|  | void GetRecentlyVisitedURLs(GetRecentlyVisitedURLsCallback callback); | 
|  | void OnGetRecentlyVisitedURLsCompleted(ComputeFlocCompletedCallback callback, | 
|  | history::QueryResults results); | 
|  |  | 
|  | void DidApplySortingLshPostProcessing(ComputeFlocCompletedCallback callback, | 
|  | uint64_t sim_hash, | 
|  | base::Time history_begin_time, | 
|  | base::Time history_end_time, | 
|  | base::Optional<uint64_t> final_hash, | 
|  | base::Version version); | 
|  |  | 
|  | // Abandon any scheduled task, and schedule a new compute-floc task with | 
|  | // |delay|. | 
|  | void ScheduleFlocComputation(base::TimeDelta delay); | 
|  |  | 
|  | // The following raw pointer references are guaranteed to outlive this object. | 
|  | // |prefs_| is owned by Profile, and it won't be destroyed until the | 
|  | // destructor of Profile is called, where all the profile-keyed services | 
|  | // including this object will be destroyed. Other services are all created by | 
|  | // profile-keyed service factories, and the dependency declared in | 
|  | // FlocIdProviderFactory::FlocIdProviderFactory() guarantees that this object | 
|  | // will be destroyed first among those services. | 
|  | PrefService* prefs_; | 
|  | PrivacySandboxSettings* privacy_sandbox_settings_; | 
|  | history::HistoryService* history_service_; | 
|  |  | 
|  | std::unique_ptr<FlocEventLogger> floc_event_logger_; | 
|  |  | 
|  | // The id to be exposed to the JS API. It will always be in sync with the one | 
|  | // stored in prefs. | 
|  | FlocId floc_id_; | 
|  |  | 
|  | // When a floc is computed, we'll record it to the UKM on the next page load. | 
|  | // This flag controls whether the recording is needed. Caveat: given that this | 
|  | // info does not persist across browser sessions, we could miss the recording | 
|  | // when the floc is computed and then the browser is closed before the next | 
|  | // page load occurs. | 
|  | bool need_ukm_recording_ = false; | 
|  |  | 
|  | bool floc_computation_in_progress_ = false; | 
|  |  | 
|  | // True if history-delete occurs during an in-progress computation. When the | 
|  | // in-progress one finishes, we would disregard the result (i.e. no loggings | 
|  | // or floc update), and compute again. Potentially we could maintain extra | 
|  | // states to tell if the history-delete would have impact on the in-progress | 
|  | // result, but since this would only happen in rare race situations, we just | 
|  | // always recompute to keep things simple. | 
|  | bool need_recompute_ = false; | 
|  |  | 
|  | // Used for the async tasks querying the HistoryService. | 
|  | base::CancelableTaskTracker history_task_tracker_; | 
|  |  | 
|  | // The timer used to schedule a floc computation. | 
|  | base::OneShotTimer compute_floc_timer_; | 
|  |  | 
|  | base::ScopedObservation<history::HistoryService, | 
|  | history::HistoryServiceObserver> | 
|  | history_service_observation_{this}; | 
|  |  | 
|  | base::WeakPtrFactory<FlocIdProviderImpl> weak_ptr_factory_{this}; | 
|  | }; | 
|  |  | 
|  | }  // namespace federated_learning | 
|  |  | 
|  | #endif  // CHROME_BROWSER_FEDERATED_LEARNING_FLOC_ID_PROVIDER_IMPL_H_ |