blob: 8d1421253f233253625bf0edc3d457f6edfbe396 [file] [log] [blame]
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/optimization_guide/content/browser/page_content_annotations_service.h"
#include "base/metrics/histogram_functions.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "components/history/core/browser/history_service.h"
#include "components/optimization_guide/core/noisy_metrics_recorder.h"
#include "components/optimization_guide/core/optimization_guide_enums.h"
#include "components/optimization_guide/core/optimization_guide_features.h"
#include "components/optimization_guide/core/optimization_guide_model_provider.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/web_contents.h"
#include "services/metrics/public/cpp/metrics_utils.h"
#include "services/metrics/public/cpp/ukm_builders.h"
#include "services/metrics/public/cpp/ukm_recorder.h"
#include "services/metrics/public/cpp/ukm_source.h"
#include "services/metrics/public/cpp/ukm_source_id.h"
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
#include "components/optimization_guide/content/browser/page_content_annotations_model_manager.h"
#endif
namespace optimization_guide {
namespace {
void LogPageContentAnnotationsStorageStatus(
PageContentAnnotationsStorageStatus status) {
DCHECK_NE(status, PageContentAnnotationsStorageStatus::kUnknown);
base::UmaHistogramEnumeration(
"OptimizationGuide.PageContentAnnotationsService."
"ContentAnnotationsStorageStatus",
status);
}
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
// Record the visibility score of the provided visit as a RAPPOR-style record to
// UKM.
void MaybeRecordVisibilityUKM(
const HistoryVisit& visit,
const absl::optional<history::VisitContentModelAnnotations>&
content_annotations) {
if (!content_annotations)
return;
if (content_annotations->visibility_score < 0)
return;
int64_t score =
static_cast<int64_t>(100 * content_annotations->visibility_score);
// We want 2^|num_bits| buckets, linearly spaced.
uint32_t num_buckets =
std::pow(2, optimization_guide::features::NumBitsForRAPPORMetrics());
DCHECK_GT(num_buckets, 0u);
float bucket_size = 100.0 / num_buckets;
uint32_t bucketed_score = static_cast<uint32_t>(floor(score / bucket_size));
DCHECK_LE(bucketed_score, num_buckets);
uint32_t noisy_score = NoisyMetricsRecorder().GetNoisyMetric(
optimization_guide::features::NoiseProbabilityForRAPPORMetrics(),
bucketed_score, optimization_guide::features::NumBitsForRAPPORMetrics());
ukm::SourceId ukm_source_id = ukm::ConvertToSourceId(
visit.navigation_id, ukm::SourceIdType::NAVIGATION_ID);
ukm::builders::PageContentAnnotations(ukm_source_id)
.SetVisibilityScore(static_cast<int64_t>(noisy_score))
.Record(ukm::UkmRecorder::Get());
}
#endif /* BUILDFLAG(BUILD_WITH_TFLITE_LIB) */
} // namespace
PageContentAnnotationsService::PageContentAnnotationsService(
const std::string& application_locale,
OptimizationGuideModelProvider* optimization_guide_model_provider,
history::HistoryService* history_service)
: last_annotated_history_visits_(
features::MaxContentAnnotationRequestsCached()) {
DCHECK(optimization_guide_model_provider);
DCHECK(history_service);
history_service_ = history_service;
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
model_manager_ = std::make_unique<PageContentAnnotationsModelManager>(
application_locale, optimization_guide_model_provider);
annotator_ = model_manager_.get();
#endif
}
PageContentAnnotationsService::~PageContentAnnotationsService() = default;
void PageContentAnnotationsService::Annotate(const HistoryVisit& visit,
const std::string& text) {
if (last_annotated_history_visits_.Peek(visit) !=
last_annotated_history_visits_.end()) {
// We have already been requested to annotate this visit, so don't submit
// for re-annotation.
return;
}
last_annotated_history_visits_.Put(visit, true);
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
model_manager_->Annotate(
text,
base::BindOnce(&PageContentAnnotationsService::OnPageContentAnnotated,
weak_ptr_factory_.GetWeakPtr(), visit));
#endif
}
void PageContentAnnotationsService::OverridePageContentAnnotatorForTesting(
PageContentAnnotator* annotator) {
annotator_ = annotator;
}
void PageContentAnnotationsService::BatchAnnotate(
BatchAnnotationCallback callback,
const std::vector<std::string>& inputs,
AnnotationType annotation_type) {
if (!annotator_) {
std::move(callback).Run(CreateEmptyBatchAnnotationResults(inputs));
return;
}
annotator_->Annotate(std::move(callback), inputs, annotation_type);
}
absl::optional<ModelInfo> PageContentAnnotationsService::GetModelInfoForType(
AnnotationType type) const {
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
DCHECK(model_manager_);
return model_manager_->GetModelInfoForType(type);
#else
return absl::nullopt;
#endif
}
void PageContentAnnotationsService::NotifyWhenModelAvailable(
AnnotationType type,
base::OnceCallback<void(bool)> callback) {
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
DCHECK(model_manager_);
model_manager_->NotifyWhenModelAvailable(type, std::move(callback));
#else
std::move(callback).Run(false);
#endif
}
void PageContentAnnotationsService::ExtractRelatedSearches(
const HistoryVisit& visit,
content::WebContents* web_contents) {
search_result_extractor_client_.RequestData(
web_contents, {continuous_search::mojom::ResultType::kRelatedSearches},
base::BindOnce(&PageContentAnnotationsService::OnRelatedSearchesExtracted,
weak_ptr_factory_.GetWeakPtr(), visit));
}
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
void PageContentAnnotationsService::OnPageContentAnnotated(
const HistoryVisit& visit,
const absl::optional<history::VisitContentModelAnnotations>&
content_annotations) {
base::UmaHistogramBoolean(
"OptimizationGuide.PageContentAnnotationsService.ContentAnnotated",
content_annotations.has_value());
if (!content_annotations)
return;
MaybeRecordVisibilityUKM(visit, content_annotations);
if (!features::ShouldWriteContentAnnotationsToHistoryService())
return;
QueryURL(visit,
base::BindOnce(
&history::HistoryService::AddContentModelAnnotationsForVisit,
history_service_->AsWeakPtr(), *content_annotations));
}
#endif
void PageContentAnnotationsService::OnRelatedSearchesExtracted(
const HistoryVisit& visit,
continuous_search::SearchResultExtractorClientStatus status,
continuous_search::mojom::CategoryResultsPtr results) {
const bool success =
status == continuous_search::SearchResultExtractorClientStatus::kSuccess;
base::UmaHistogramBoolean(
"OptimizationGuide.PageContentAnnotationsService."
"RelatedSearchesExtracted",
success);
if (!success) {
return;
}
std::vector<std::string> related_searches;
for (const auto& group : results->groups) {
if (group->type != continuous_search::mojom::ResultType::kRelatedSearches) {
continue;
}
std::transform(std::begin(group->results), std::end(group->results),
std::back_inserter(related_searches),
[](const continuous_search::mojom::SearchResultPtr& result) {
return base::UTF16ToUTF8(
base::CollapseWhitespace(result->title, true));
});
break;
}
if (related_searches.empty()) {
return;
}
if (!features::ShouldWriteContentAnnotationsToHistoryService()) {
return;
}
QueryURL(visit,
base::BindOnce(&history::HistoryService::AddRelatedSearchesForVisit,
history_service_->AsWeakPtr(), related_searches));
}
void PageContentAnnotationsService::QueryURL(
const HistoryVisit& visit,
PersistAnnotationsCallback callback) {
history_service_->QueryURL(
visit.url, /*want_visits=*/true,
base::BindOnce(&PageContentAnnotationsService::OnURLQueried,
weak_ptr_factory_.GetWeakPtr(), visit,
std::move(callback)),
&history_service_task_tracker_);
}
void PageContentAnnotationsService::OnURLQueried(
const HistoryVisit& visit,
PersistAnnotationsCallback callback,
history::QueryURLResult url_result) {
if (!url_result.success) {
LogPageContentAnnotationsStorageStatus(
PageContentAnnotationsStorageStatus::kNoVisitsForUrl);
return;
}
bool did_store_content_annotations = false;
for (const auto& visit_for_url : url_result.visits) {
if (visit.nav_entry_timestamp != visit_for_url.visit_time)
continue;
std::move(callback).Run(visit_for_url.visit_id);
did_store_content_annotations = true;
break;
}
LogPageContentAnnotationsStorageStatus(
did_store_content_annotations ? kSuccess : kSpecificVisitForUrlNotFound);
}
void PageContentAnnotationsService::GetMetadataForEntityId(
const std::string& entity_id,
EntityMetadataRetrievedCallback callback) {
#if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
model_manager_->GetMetadataForEntityId(entity_id, std::move(callback));
#else
std::move(callback).Run(absl::nullopt);
#endif
}
void PageContentAnnotationsService::PersistRemotePageEntities(
const HistoryVisit& history_visit,
const std::vector<history::VisitContentModelAnnotations::Category>&
entities) {
history::VisitContentModelAnnotations annotations;
annotations.entities = entities;
QueryURL(history_visit,
base::BindOnce(
&history::HistoryService::AddContentModelAnnotationsForVisit,
history_service_->AsWeakPtr(), annotations));
}
// static
HistoryVisit PageContentAnnotationsService::CreateHistoryVisitFromWebContents(
content::WebContents* web_contents,
int64_t navigation_id) {
HistoryVisit visit = {
web_contents->GetController().GetLastCommittedEntry()->GetTimestamp(),
web_contents->GetLastCommittedURL(), navigation_id};
return visit;
}
} // namespace optimization_guide