blob: 40c25340ce929cb9ea87e561a9beb9a3e5799c7b [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/contextual_cueing/contextual_cueing_page_data.h"
#include "base/i18n/char_iterator.h"
#include "base/strings/string_util.h"
#include "base/task/single_thread_task_runner.h"
#include "chrome/browser/contextual_cueing/contextual_cueing_features.h"
#include "content/public/browser/web_contents.h"
#include "pdf/buildflags.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace contextual_cueing {
namespace {
bool DidMatchCueingCondition(
const optimization_guide::proto::ContextualCueingConditions& condition,
int64_t value) {
if (!condition.has_cueing_operator()) {
return false;
}
if (!condition.has_int64_threshold()) {
return false;
}
switch (condition.cueing_operator()) {
case optimization_guide::proto::CONTEXTUAL_CUEING_OPERATOR_UNSPECIFIED:
return false;
case optimization_guide::proto::
CONTEXTUAL_CUEING_OPERATOR_GREATER_THAN_OR_EQUAL_TO:
return value >= condition.int64_threshold();
case optimization_guide::proto::
CONTEXTUAL_CUEING_OPERATOR_LESS_THAN_OR_EQUAL_TO:
return value <= condition.int64_threshold();
}
}
void CountWords(const optimization_guide::proto::ContentNode& content_node,
size_t max_word_count_limit,
size_t* word_count) {
bool is_previous_char_whitespace = true;
for (base::i18n::UTF8CharIterator iter(
content_node.content_attributes().text_data().text_content());
*word_count < max_word_count_limit && !iter.end(); iter.Advance()) {
bool is_current_char_whitespace = base::IsUnicodeWhitespace(iter.get());
if (is_previous_char_whitespace && !is_current_char_whitespace) {
// Count the start of the word.
++*word_count;
}
is_previous_char_whitespace = is_current_char_whitespace;
}
for (const auto& child : content_node.children_nodes()) {
CountWords(child, max_word_count_limit, word_count);
}
}
} // namespace
ContextualCueingPageData::ContextualCueingPageData(
content::Page& page,
optimization_guide::proto::GlicContextualCueingMetadata metadata,
CueingDecisionCallback cueing_decision_callback)
: content::PageUserData<ContextualCueingPageData>(page),
metadata_(std::move(metadata)),
cueing_decision_callback_(std::move(cueing_decision_callback)) {
FindMatchingConfig();
}
ContextualCueingPageData::~ContextualCueingPageData() {
if (cueing_decision_callback_) {
std::move(cueing_decision_callback_)
.Run(base::unexpected(NudgeDecision::kNudgeDecisionInterrupted));
}
}
PAGE_USER_DATA_KEY_IMPL(ContextualCueingPageData);
// Attempts to find the matching cueing configuration.
void ContextualCueingPageData::FindMatchingConfig() {
CHECK(cueing_decision_callback_);
bool needs_pdf_page_count = false;
bool needs_page_content = false;
for (const auto& config : metadata_.cueing_configurations()) {
if (!config.has_cue_label()) {
continue;
}
auto decision = DidMatchCueingConditions(config);
if (decision == kAllowed) {
std::move(cueing_decision_callback_)
.Run(base::ok(std::move(config.cue_label())));
return;
} else if (decision == kNeedsPdfPageCount) {
needs_pdf_page_count = true;
} else if (decision == kNeedsPageContent) {
needs_page_content = true;
}
}
if (needs_pdf_page_count) {
#if BUILDFLAG(ENABLE_PDF)
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
base::SingleThreadTaskRunner::GetCurrentDefault()->PostDelayedTask(
FROM_HERE,
base::BindOnce(&ContextualCueingPageData::RequestPdfPageCount,
weak_factory_.GetWeakPtr()),
kPdfPageCountCaptureDelay.Get());
return;
#endif // BUILDFLAG(ENABLE_PDF)
}
if (needs_page_content) {
// Wait till the page content is returned.
return;
}
// None of the config matched, and no client-signals were requested.
std::move(cueing_decision_callback_)
.Run(base::unexpected(NudgeDecision::kClientConditionsUnmet));
}
ContextualCueingPageData::CueingConfigurationDecision
ContextualCueingPageData::DidMatchCueingConditions(
const optimization_guide::proto::GlicCueingConfiguration& config) {
for (const auto& condition : config.conditions()) {
switch (condition.signal()) {
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_UNSPECIFIED:
return kDisallowed;
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_PDF_PAGE_COUNT:
if (page().GetContentsMimeType() != pdf::kPDFMimeType) {
return kDisallowed;
}
if (!pdf_page_count_) {
return kNeedsPdfPageCount;
}
return DidMatchCueingCondition(condition, *pdf_page_count_)
? kAllowed
: kDisallowed;
case optimization_guide::proto::
CONTEXTUAL_CUEING_CLIENT_SIGNAL_CONTENT_LENGTH_WORD_COUNT:
if (page().GetContentsMimeType() == pdf::kPDFMimeType) {
return kDisallowed;
}
if (page_content_word_count_info_ &&
page_content_word_count_info_->page_contents_words) {
return DidMatchCueingCondition(
condition,
*page_content_word_count_info_->page_contents_words)
? kAllowed
: kDisallowed;
}
if (!page_content_word_count_info_) {
page_content_word_count_info_ = {.max_count_needed = 0};
}
if (page_content_word_count_info_->max_count_needed <
static_cast<size_t>(condition.int64_threshold())) {
page_content_word_count_info_->max_count_needed =
static_cast<size_t>(condition.int64_threshold()) + 1;
}
return kNeedsPageContent;
}
}
return kAllowed;
}
#if BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::RequestPdfPageCount() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) {
pdf_helper->RegisterForDocumentLoadComplete(
base::BindOnce(&ContextualCueingPageData::OnPdfDocumentLoadComplete,
weak_factory_.GetWeakPtr()));
}
}
void ContextualCueingPageData::OnPdfDocumentLoadComplete() {
CHECK_EQ(pdf::kPDFMimeType, page().GetContentsMimeType());
auto* pdf_helper = pdf::PDFDocumentHelper::MaybeGetForWebContents(
content::WebContents::FromRenderFrameHost(&page().GetMainDocument()));
if (pdf_helper) {
// Fetch zero PDF bytes to just receive the total page count.
pdf_helper->GetPdfBytes(
/*size_limit=*/0,
base::BindOnce(&ContextualCueingPageData::OnPdfPageCountReceived,
weak_factory_.GetWeakPtr()));
}
}
void ContextualCueingPageData::OnPdfPageCountReceived(
pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes,
uint32_t page_count) {
if (status == pdf::mojom::PdfListener::GetPdfBytesStatus::kFailed) {
return;
}
pdf_page_count_ = page_count;
FindMatchingConfig();
}
#endif // BUILDFLAG(ENABLE_PDF)
void ContextualCueingPageData::OnPageContentExtracted(
const optimization_guide::proto::AnnotatedPageContent& page_content) {
if (!cueing_decision_callback_) {
return;
}
if (!page_content_word_count_info_) {
return;
}
size_t word_count = 0;
CountWords(page_content.root_node(),
page_content_word_count_info_->max_count_needed, &word_count);
page_content_word_count_info_->page_contents_words = word_count;
FindMatchingConfig();
}
} // namespace contextual_cueing