blob: 8b054fe1e4112502ad5a6a426130bbbdbaa303ea [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PASSAGE_EMBEDDINGS_PAGE_EMBEDDINGS_SERVICE_H_
#define CHROME_BROWSER_PASSAGE_EMBEDDINGS_PAGE_EMBEDDINGS_SERVICE_H_
#include <map>
#include <memory>
#include <optional>
#include "base/functional/callback.h"
#include "base/memory/raw_ptr.h"
#include "base/memory/weak_ptr.h"
#include "base/observer_list.h"
#include "base/scoped_observation.h"
#include "chrome/browser/page_content_annotations/page_content_extraction_service.h"
#include "components/keyed_service/core/keyed_service.h"
#include "components/passage_embeddings/passage_embeddings_types.h"
#include "content/public/browser/visibility.h"
#include "content/public/browser/web_contents_observer.h"
namespace content {
class WebContents;
}
namespace page_content_annotations {
class PageContentExtractionService;
}
namespace passage_embeddings {
// A passage from a page along with its computed embedding.
struct PassageEmbedding {
std::string passage;
passage_embeddings::Embedding embedding;
};
class PageEmbeddingsService
: public KeyedService,
public page_content_annotations::PageContentExtractionService::Observer {
public:
// The priority to use when computing embeddings. Higher priorities imply more
// performance overhead.
enum Priority {
kUserBlocking,
kUrgent,
kDefault,
kBackground,
};
class Observer : public base::CheckedObserver {
public:
// Gets the default priority to use for computing embeddings.
// Implementations are expected to return the same value over the entire
// lifetime of the observer.
virtual Priority GetDefaultPriority() const = 0;
// Invoked when embeddings become available or are updated for the
// web_contents. The embeddings then can be queried via GetEmbeddings().
virtual void OnPageEmbeddingsAvailable(content::WebContents* web_contents) {
}
};
// ScopedPriority allows observers to temporarily raise the priority of the
// embeddings computation for the lifetime of the object. This can be useful,
// for example, if embeddings are anticipated to be needed urgently to drive
// UI features.
class ScopedPriority {
public:
ScopedPriority(PageEmbeddingsService* service,
Observer* observer,
Priority priority);
~ScopedPriority();
ScopedPriority(ScopedPriority& other) = delete;
ScopedPriority& operator=(ScopedPriority& other) = delete;
ScopedPriority(ScopedPriority&& other);
ScopedPriority& operator=(ScopedPriority&& other);
private:
raw_ptr<PageEmbeddingsService> service_;
raw_ptr<PageEmbeddingsService::Observer> observer_;
};
// A callback to produce the passages for a page for which to generate
// embeddings. This is responsible for generating chunked passages from the
// AnnotatedPageContent and filtering to the top passages_to_generate most
// useful passages.
using EmbeddingCandidatesGenerator =
base::RepeatingCallback<std::vector<std::string>(
const optimization_guide::proto::AnnotatedPageContent&,
int passages_to_generate)>;
PageEmbeddingsService(EmbeddingCandidatesGenerator candidates_generator,
page_content_annotations::PageContentExtractionService*
page_content_extraction_service,
passage_embeddings::Embedder* embedder);
~PageEmbeddingsService() override;
void AddObserver(Observer* observer);
void RemoveObserver(Observer* observer);
ScopedPriority RaisePriority(Observer* observer, Priority priority);
// PageEmbeddingsService computes embeddings lazily for the active tab, on
// backgrounding. ProcessAllEmbeddings() forces the active tab's embeddings to
// be processed.
void ProcessAllEmbeddings();
// Retrieves the embeddings for web_content. Returns the empty vector if
// embeddings have not yet been computed.
std::vector<PassageEmbedding> GetEmbeddings(
content::WebContents* web_content) const;
// PageContentExtractionService:
void OnPageContentExtracted(
content::Page& page,
const optimization_guide::proto::AnnotatedPageContent& page_content)
override;
private:
class WebContentsEventsObserver;
void ComputeEmbeddings(content::WebContents* web_contents);
void OnEmbeddingsComputed(base::WeakPtr<content::WebContents> web_contents,
std::vector<std::string> passages,
std::vector<Embedding> embeddings,
Embedder::TaskId task_id,
ComputeEmbeddingsStatus status);
static Priority GetActivePriority(
const base::ObserverList<Observer>& observers,
const std::map<Observer*, Priority>& temporary_priority);
void UpdateTaskPriorities(Priority priority);
struct WebContentsState;
const EmbeddingCandidatesGenerator candidates_generator_;
const raw_ptr<passage_embeddings::Embedder> embedder_;
base::ScopedObservation<
page_content_annotations::PageContentExtractionService,
PageEmbeddingsService>
page_content_extraction_observation_{this};
base::ObserverList<Observer> observers_;
std::map<Observer*, Priority> temporary_priority_;
Priority current_priority_ = kDefault;
std::map<content::WebContents*, WebContentsState> web_contents_state_;
base::WeakPtrFactory<PageEmbeddingsService> weak_ptr_factory_{this};
};
} // namespace passage_embeddings
#endif // CHROME_BROWSER_PASSAGE_EMBEDDINGS_PAGE_EMBEDDINGS_SERVICE_H_