blob: 4e361811f85488f4c8b4291d74dccc11f61fbc2d [file] [log] [blame]
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_SERVICE_H_
#define COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_SERVICE_H_
#include <string>
#include "base/callback_forward.h"
#include "base/containers/lru_cache.h"
#include "base/hash/hash.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/task/cancelable_task_tracker.h"
#include "components/continuous_search/browser/search_result_extractor_client.h"
#include "components/continuous_search/browser/search_result_extractor_client_status.h"
#include "components/continuous_search/common/public/mojom/continuous_search.mojom.h"
#include "components/history/core/browser/history_types.h"
#include "components/history/core/browser/url_row.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/optimization_guide/content/browser/page_content_annotator.h"
#include "components/optimization_guide/core/entity_metadata_provider.h"
#include "components/optimization_guide/core/model_info.h"
#include "components/optimization_guide/core/page_content_annotations_common.h"
#include "components/optimization_guide/machine_learning_tflite_buildflags.h"
#include "url/gurl.h"
namespace content {
class WebContents;
} // namespace content
namespace history {
class HistoryService;
} // namespace history
namespace optimization_guide {
class OptimizationGuideModelProvider;
class PageContentAnnotationsModelManager;
class PageContentAnnotationsServiceBrowserTest;
class PageContentAnnotationsWebContentsObserver;
// The information used by HistoryService to identify a visit to a URL.
struct HistoryVisit {
base::Time nav_entry_timestamp;
GURL url;
int64_t navigation_id;
struct Comp {
bool operator()(const HistoryVisit& lhs, const HistoryVisit& rhs) const {
if (lhs.nav_entry_timestamp != rhs.nav_entry_timestamp)
return lhs.nav_entry_timestamp < rhs.nav_entry_timestamp;
return lhs.url < rhs.url;
}
};
};
// A KeyedService that annotates page content.
class PageContentAnnotationsService : public KeyedService,
public EntityMetadataProvider {
public:
PageContentAnnotationsService(
const std::string& application_locale,
OptimizationGuideModelProvider* optimization_guide_model_provider,
history::HistoryService* history_service);
~PageContentAnnotationsService() override;
PageContentAnnotationsService(const PageContentAnnotationsService&) = delete;
PageContentAnnotationsService& operator=(
const PageContentAnnotationsService&) = delete;
// This is the main entry point for page content annotations by external
// callers.
void BatchAnnotate(BatchAnnotationCallback callback,
const std::vector<std::string>& inputs,
AnnotationType annotation_type);
// Overrides the PageContentAnnotator for testing. See
// test_page_content_annotator.h for an implementation designed for testing.
void OverridePageContentAnnotatorForTesting(PageContentAnnotator* annotator);
// Returns the model info for the given annotation type, if the model file is
// available.
absl::optional<ModelInfo> GetModelInfoForType(AnnotationType type) const;
// Runs |callback| with true when the model that powers |BatchAnnotate| for
// the given annotation type is ready to execute. If the model is ready now,
// the callback is run immediately. If the model file will never be available,
// the callback is run with false.
void NotifyWhenModelAvailable(AnnotationType type,
base::OnceCallback<void(bool)> callback);
// EntityMetadataProvider:
void GetMetadataForEntityId(
const std::string& entity_id,
EntityMetadataRetrievedCallback callback) override;
private:
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
// Callback invoked when |visit| has been annotated.
void OnPageContentAnnotated(
const HistoryVisit& visit,
const absl::optional<history::VisitContentModelAnnotations>&
content_annotations);
std::unique_ptr<PageContentAnnotationsModelManager> model_manager_;
#endif
// The annotator to use for requests to |BatchAnnotate|. In prod, this is
// simply |model_manager_.get()| but is set as a separate pointer here in
// order to be override-able for testing.
raw_ptr<PageContentAnnotator> annotator_;
// Requests to annotate |text|, which is associated with |web_contents|.
//
// When finished annotating, it will store the relevant information in
// History Service.
//
// The WCO friend class is used to keep the `Annotate` API internal to
// OptGuide. Callers should use `BatchAnnotate` instead.
friend class PageContentAnnotationsWebContentsObserver;
friend class PageContentAnnotationsServiceBrowserTest;
// Virtualized for testing.
virtual void Annotate(const HistoryVisit& visit, const std::string& text);
// Creates a HistoryVisit based on the current state of |web_contents|.
static HistoryVisit CreateHistoryVisitFromWebContents(
content::WebContents* web_contents,
int64_t navigation_id);
// Requests |search_result_extractor_client_| to extract related searches from
// the Google SRP DOM associated with |web_contents|.
//
// Once finished, it will store the related searches in History Service.
//
// Virtualized for testing.
virtual void ExtractRelatedSearches(const HistoryVisit& visit,
content::WebContents* web_contents);
// Callback invoked when related searches have been extracted for |visit|.
void OnRelatedSearchesExtracted(
const HistoryVisit& visit,
continuous_search::SearchResultExtractorClientStatus status,
continuous_search::mojom::CategoryResultsPtr results);
// Persist |entities| for |visit| in |history_service_|.
//
// Virtualized for testing.
virtual void PersistRemotePageEntities(
const HistoryVisit& visit,
const std::vector<history::VisitContentModelAnnotations::Category>&
entities);
using PersistAnnotationsCallback = base::OnceCallback<void(history::VisitID)>;
// Queries |history_service| for all the visits to the visited URL of |visit|.
// |callback| will be invoked to write the bound content annotations to
// |history_service| once the visits to the given URL have returned.
void QueryURL(const HistoryVisit& visit, PersistAnnotationsCallback callback);
// Callback invoked when |history_service| has returned results for the visits
// to a URL. In turn invokes |callback| to write the bound content annotations
// to |history_service|.
void OnURLQueried(const HistoryVisit& visit,
PersistAnnotationsCallback callback,
history::QueryURLResult url_result);
// The history service to write content annotations to. Not owned. Guaranteed
// to outlive |this|.
raw_ptr<history::HistoryService> history_service_;
// The task tracker to keep track of tasks to query |history_service|.
base::CancelableTaskTracker history_service_task_tracker_;
// The client of continuous_search::mojom::SearchResultExtractor interface
// used for extracting data from the main frame of Google SRP |web_contents|.
continuous_search::SearchResultExtractorClient
search_result_extractor_client_;
// A LRU Cache keeping track of the visits that have been requested for
// annotation. If the requested visit is in this cache, the models will not be
// requested for another annotation on the same visit.
base::LRUCache<HistoryVisit, bool, HistoryVisit::Comp>
last_annotated_history_visits_;
base::WeakPtrFactory<PageContentAnnotationsService> weak_ptr_factory_{this};
};
} // namespace optimization_guide
#endif // COMPONENTS_OPTIMIZATION_GUIDE_CONTENT_BROWSER_PAGE_CONTENT_ANNOTATIONS_SERVICE_H_