blob: 1eac9dc9b3d885e488417b412e640a2844319112 [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/ui/lens/lens_search_contextualization_controller.h"
#include "base/functional/bind.h"
#include "base/strings/string_split.h"
#include "base/task/bind_post_task.h"
#include "base/task/thread_pool.h"
#include "chrome/browser/content_extraction/inner_html.h"
#include "chrome/browser/ui/lens/lens_overlay_controller.h"
#include "chrome/browser/ui/lens/lens_overlay_image_helper.h"
#include "chrome/browser/ui/lens/lens_overlay_proto_converter.h"
#include "chrome/browser/ui/lens/lens_overlay_side_panel_coordinator.h"
#include "chrome/browser/ui/lens/lens_search_controller.h"
#include "chrome/browser/ui/lens/lens_search_feature_flag_utils.h"
#include "chrome/browser/ui/lens/lens_searchbox_controller.h"
#include "chrome/browser/ui/lens/lens_session_metrics_logger.h"
#include "components/content_extraction/content/browser/inner_text.h"
#include "components/lens/lens_features.h"
#include "components/tabs/public/tab_interface.h"
#include "components/viz/common/frame_sinks/copy_output_result.h"
#include "components/zoom/zoom_controller.h"
#include "content/public/browser/render_view_host.h"
#include "content/public/browser/render_widget_host.h"
#include "content/public/browser/render_widget_host_view.h"
#include "pdf/buildflags.h"
#include "third_party/blink/public/common/associated_interfaces/associated_interface_provider.h"
#if BUILDFLAG(ENABLE_PDF)
#include "components/pdf/browser/pdf_document_helper.h"
#include "pdf/mojom/pdf.mojom.h"
#endif // BUILDFLAG(ENABLE_PDF)
namespace {
// The amount of change in bytes that is considered a significant change and
// should trigger a page content update request. This provides tolerance in
// case there is slight variation in the retrieved bytes in between calls.
constexpr float kByteChangeTolerancePercent = 0.01;
// The maximum length of the DOM text to consider for OCR similarity.
// Currently 50 MB
constexpr int kMaxDomTextLengthForOcrSimilarity = 50 * 1000 * 1000;
// Returns a new string with all non-alphanumeric characters removed from the
// ends of the string.
std::string TrimNonAlphaNumeric(const std::string& text) {
if (text.empty()) {
return text;
}
// Find the first alphanumeric character from the beginning.
size_t first_alphanum_index =
std::find_if(text.begin(), text.end(), ::isalnum) - text.begin();
// If no alphanumeric character is found in the entire string, return an empty
// string.
if (first_alphanum_index == text.length()) {
return "";
}
// Find the index of the last alphanumeric character from the end.
size_t last_alphanum_index =
std::find_if(text.rbegin(), text.rend(), ::isalnum) - text.rbegin();
// `last_alphanumeric` is the count from the end of the string, so convert to
// index from the beginning.
last_alphanum_index = text.length() - 1 - last_alphanum_index;
// Extract the substring containing only the alphanumeric characters and those
// in between.
return text.substr(first_alphanum_index,
last_alphanum_index - first_alphanum_index + 1);
}
// Returns the percentage of words in the OCR text that are also in the DOM
// text.
double CalculateWordOverlapSimilarity(std::string dom_text,
lens::mojom::TextPtr ocr_text) {
// Split dom_text into possible words.
std::vector<std::string> dom_words = base::SplitString(
dom_text, " \t\r\n<>", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
// Convert dom_text to lowercase, alphanumeric only map for comparison. The
// map value is the number of times the word appears in the dom text.
std::map<std::string, int> dom_words_map;
for (std::string& word : dom_words) {
std::string processed_word = TrimNonAlphaNumeric(base::ToLowerASCII(word));
if (!processed_word.empty()) {
dom_words_map[processed_word]++;
}
}
// Count the number of words in ocr_text that are also in the dom text.
double overlap_count = 0;
double total_ocr_words = 0;
if (ocr_text && ocr_text->text_layout &&
ocr_text->text_layout->paragraphs.size() > 0) {
for (const auto& paragraph : ocr_text->text_layout->paragraphs) {
if (paragraph && paragraph->lines.size() > 0) {
for (const auto& line : paragraph->lines) {
if (line && line->words.size() > 0) {
for (const auto& word : line->words) {
if (word) {
std::string processed_word =
TrimNonAlphaNumeric(base::ToLowerASCII(word->plain_text));
if (processed_word.empty()) {
continue;
}
// Find the process word in the dom words.
auto word_iterator = dom_words_map.find(processed_word);
if (word_iterator != dom_words_map.end() &&
word_iterator->second > 0) {
// The word is in the dom text.
overlap_count++;
// Decrement the count in the map so if there are multiple of
// this word in the DOM, we only count it for each instance.
word_iterator->second--;
}
total_ocr_words++;
}
}
}
}
}
}
}
// Avoid divide by zero. Return the percentage of words in the OCR text that
// are also in the DOM text.
return total_ocr_words == 0 ? 0.0 : overlap_count / total_ocr_words;
}
bool IsProtectedPageFeatureEnabled() {
return lens::features::IsLensSearchProtectedPageEnabled() &&
lens::IsLensOverlayContextualSearchboxEnabled() &&
lens::features::UseApcAsContext();
}
} // namespace
namespace lens {
LensSearchContextualizationController::LensSearchContextualizationController(
LensSearchController* lens_search_controller)
: lens_search_controller_(lens_search_controller) {}
LensSearchContextualizationController::
~LensSearchContextualizationController() = default;
void LensSearchContextualizationController::StartContextualization(
lens::LensOverlayInvocationSource invocation_source,
OnPageContextUpdatedCallback callback) {
CHECK(state_ == State::kOff);
state_ = State::kInitializing;
invocation_source_ = invocation_source;
// TODO(crbug.com/403573362): Implement starting the query flow from here if
// needed.
StartScreenshotFlow(base::BindOnce(
&LensSearchContextualizationController::OnScreenshotTakenForContextual,
weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
}
void LensSearchContextualizationController::GetPageContextualization(
PageContentRetrievedCallback callback) {
// If the contextual searchbox is disabled, exit early.
if (!lens::IsLensOverlayContextualSearchboxEnabled()) {
std::move(callback).Run(/*page_contents=*/{}, lens::MimeType::kUnknown,
std::nullopt);
return;
}
is_page_context_eligible_ = true;
#if BUILDFLAG(ENABLE_PDF)
// The overlay controller needs to check if the PDF helper exists before
// calling MaybeGetPdfBytes or else the `callback` will have been moved but
// not called.
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
lens_search_controller_->GetTabInterface()->GetContents());
if (pdf_helper) {
// Fetch the PDF bytes then run the callback.
MaybeGetPdfBytes(pdf_helper, std::move(callback));
return;
}
#endif // BUILDFLAG(ENABLE_PDF)
std::vector<lens::PageContent> page_contents;
auto* render_frame_host = lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame();
if (!render_frame_host || (!lens::features::UseInnerTextAsContext() &&
!lens::features::UseApcAsContext())) {
std::move(callback).Run(page_contents, lens::MimeType::kUnknown,
std::nullopt);
return;
}
// TODO(crbug.com/399610478): The fetches for innerText and APC
// should be parallelized to fetch all data at once. Currently fetches are
// sequential to prevent getting stuck in a race condition.
MaybeGetInnerText(page_contents, render_frame_host, std::move(callback));
}
void LensSearchContextualizationController::TryUpdatePageContextualization(
OnPageContextUpdatedCallback callback) {
if (state_ == State::kInitializing) {
// Will be called again by OnInitialPageContextEligibilityFetched when the
// controller finishes initializing.
return;
}
if (state_ == State::kOff) {
// TODO(crbug.com/418825720): The viewport screenshot should be only be set
// in this controller in the future.
viewport_screenshot_ = lens_search_controller_->lens_overlay_controller()
->initial_screenshot();
state_ = State::kActive;
}
CHECK(state_ == State::kActive);
// If there is already an upload, do not send another request.
// TODO(crbug.com/399154548): Ideally, there could be two uploads in progress
// at a time, however, the current query controller implementation does not
// support this.
if (GetQueryController()->IsPageContentUploadInProgress()) {
std::move(callback).Run();
return;
}
on_page_context_updated_callback_ = std::move(callback);
GetPageContextualization(base::BindOnce(
&LensSearchContextualizationController::UpdatePageContextualization,
weak_ptr_factory_.GetWeakPtr()));
}
#if BUILDFLAG(ENABLE_PDF)
void LensSearchContextualizationController::
FetchVisiblePageIndexAndGetPartialPdfText(
uint32_t page_count,
PdfPartialPageTextRetrievedCallback callback) {
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
lens_search_controller_->GetTabInterface()->GetContents());
if (!pdf_helper ||
lens::features::GetLensOverlayPdfSuggestCharacterTarget() == 0 ||
page_count == 0) {
return;
}
CHECK(callback);
pdf_partial_page_text_retrieved_callback_ = std::move(callback);
// TODO(387306854): Add logic to grab page text form the visible page index.
// Fetch the first page of text which will be then recursively fetch following
// pages.
pdf_pages_text_.clear();
pdf_helper->GetPageText(
/*page_index=*/0,
base::BindOnce(
&LensSearchContextualizationController::GetPartialPdfTextCallback,
weak_ptr_factory_.GetWeakPtr(), /*page_index=*/0, page_count,
/*total_characters_retrieved=*/0));
}
#endif // BUILDFLAG(ENABLE_PDF)
void LensSearchContextualizationController::ResetState() {
on_page_context_updated_callback_.Reset();
is_page_context_eligible_ = false;
ocr_dom_similarity_recorded_in_session_ = false;
page_contents_.clear();
primary_content_type_ = lens::MimeType::kUnknown;
viewport_screenshot_.reset();
last_retrieved_most_visible_page_ = std::nullopt;
pdf_partial_page_text_retrieved_callback_.Reset();
pdf_pages_text_.clear();
// Reset the page context eligibility API state.
page_context_eligibility_callback_.Reset();
pending_context_eligibility_params_.reset();
state_ = State::kOff;
}
void LensSearchContextualizationController::SetPageContent(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type) {
page_contents_ = page_contents;
primary_content_type_ = primary_content_type;
}
void LensSearchContextualizationController::RecordDocumentMetrics(
std::optional<uint32_t> page_count) {
// Record the document size bytes for each lens::PageContent. If there are no
// page contents, then we will record 0.
std::set<lens::MimeType> retrieved_content_types;
if (page_contents_.empty()) {
lens::RecordDocumentSizeBytes(lens::MimeType::kUnknown, 0);
} else {
for (const auto& page_content : page_contents_) {
lens::RecordDocumentSizeBytes(page_content.content_type_,
page_content.bytes_.size());
retrieved_content_types.insert(page_content.content_type_);
}
}
if (page_count.has_value() && primary_content_type_ == lens::MimeType::kPdf) {
lens::RecordPdfPageCount(page_count.value());
return;
}
// Fetch and record the other content type for representing the webpage.
// TODO(crbug.com/398304347): Remove this once innerText metrics are recorded
// as part of the content data.
auto* render_frame_host = lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame();
if (!retrieved_content_types.contains(lens::MimeType::kPlainText)) {
// Fetch the innerText to log the size.
content_extraction::GetInnerText(
*render_frame_host, /*node_id=*/std::nullopt,
base::BindOnce(
&LensSearchContextualizationController::RecordInnerTextSize,
weak_ptr_factory_.GetWeakPtr()));
}
// Try and record the OCR DOM similarity since the page content is now
// available.
TryCalculateAndRecordOcrDomSimilarity();
}
void LensSearchContextualizationController::
TryCalculateAndRecordOcrDomSimilarity() {
// Exit early if we do not have all the data needed to calculate the
// similarity.
if (!text_ || page_contents_.empty() ||
ocr_dom_similarity_recorded_in_session_) {
return;
}
ocr_dom_similarity_recorded_in_session_ = true;
const auto& page_content_bytes = page_contents_.front().bytes_;
const auto primary_content_type = primary_content_type_;
bool is_dom = primary_content_type == lens::MimeType::kHtml ||
primary_content_type == lens::MimeType::kPlainText ||
primary_content_type == lens::MimeType::kAnnotatedPageContent;
bool is_dom_too_large =
page_content_bytes.size() > kMaxDomTextLengthForOcrSimilarity;
bool is_english = text_->content_language == "en";
// Exit early if the page content is not from the DOM, the DOM is very large
// and might bog down the thread, or the page is not in English since the
// score is not reliable for other languages.
if (!is_dom || is_dom_too_large || !is_english) {
// If the page content is not from the HTML DOM, the similarity cannot be
// calculated, so reset the text to avoid trying again.
text_.reset();
return;
}
// Post to a background thread to calculate the similarity to avoid slowing
// down the main thread.
base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE, {base::TaskPriority::BEST_EFFORT},
base::BindOnce(
&CalculateWordOverlapSimilarity,
std::string(page_content_bytes.begin(), page_content_bytes.end()),
text_.Clone()),
base::BindOnce(&lens::RecordOcrDomSimilarity));
}
void LensSearchContextualizationController::SetText(lens::mojom::TextPtr text) {
text_ = std::move(text);
}
void LensSearchContextualizationController::UpdatePageContextualization(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> page_count) {
// Exit early if the controller is off.
if (state_ == State::kOff) {
return;
}
if (!lens::IsLensOverlayContextualSearchboxEnabled()) {
std::move(on_page_context_updated_callback_).Run();
return;
}
// If page is not eligible, then return early as none of the content
// will be sent.
if (!is_page_context_eligible_) {
std::move(on_page_context_updated_callback_).Run();
return;
}
// Do not capture a new screenshot if the feature param is not enabled or if
// the overlay is showing over the live page, meaning the viewport cannot have
// changed.
if (!lens::features::UpdateViewportEachQueryEnabled() ||
lens_search_controller_->lens_overlay_controller()->IsOverlayShowing()) {
UpdatePageContextualizationPart2(page_contents, primary_content_type,
page_count, SkBitmap());
return;
}
// Begin the process of grabbing a screenshot.
CaptureScreenshot(base::BindOnce(
&LensSearchContextualizationController::UpdatePageContextualizationPart2,
weak_ptr_factory_.GetWeakPtr(), page_contents, primary_content_type,
page_count));
}
void LensSearchContextualizationController::UpdatePageContextualizationPart2(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> page_count,
const SkBitmap& bitmap) {
// It's possible the Lens session could have been closed while updating the
// page context. Return early and do not run the callback as it should have
// been cleared.
if (state_ == State::kOff || !on_page_context_updated_callback_) {
return;
}
#if BUILDFLAG(ENABLE_PDF)
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
lens_search_controller_->GetTabInterface()->GetContents());
if (pdf_helper) {
pdf_helper->GetMostVisiblePageIndex(base::BindOnce(
&LensSearchContextualizationController::UpdatePageContext,
weak_ptr_factory_.GetWeakPtr(), page_contents, primary_content_type,
page_count, bitmap));
return;
}
#endif // BUILDFLAG(ENABLE_PDF)
UpdatePageContext(page_contents, primary_content_type, page_count, bitmap,
/*most_visible_page=*/std::nullopt);
}
void LensSearchContextualizationController::UpdatePageContext(
std::vector<lens::PageContent> page_contents,
lens::MimeType primary_content_type,
std::optional<uint32_t> page_count,
const SkBitmap& bitmap,
std::optional<uint32_t> most_visible_page) {
// It's possible the Lens session could have been closed while updating the
// page context. Return early and do not run the callback as it should have
// been cleared.
if (state_ == State::kOff) {
return;
}
bool sending_bitmap = false;
if (!bitmap.drawsNothing() &&
(viewport_screenshot_.drawsNothing() ||
!lens::AreBitmapsEqual(viewport_screenshot_, bitmap))) {
viewport_screenshot_ = bitmap;
sending_bitmap = true;
// If the overlay is NOT showing/initializing, then the selections should be
// cleared so future contextual queries do not include it. The thumbnail
// will be updated by the query controller on region searches if needed.
if (!lens_search_controller_->lens_overlay_controller()
->IsOverlayShowing() &&
!lens_search_controller_->lens_overlay_controller()
->IsOverlayInitializing()) {
lens_search_controller_->lens_overlay_controller()->ClearAllSelections();
lens_search_controller_->HandleThumbnailCreatedBitmap(bitmap);
}
}
last_retrieved_most_visible_page_ = most_visible_page;
// TODO(crbug.com/399215935): Ideally, this check should ensure that any of
// the content date has not changed. For now, we only check if the
// primary_content_type bytes have changed.
auto old_page_content_it = std::ranges::find_if(
page_contents_, [&primary_content_type](const auto& page_content) {
return page_content.content_type_ == primary_content_type;
});
auto new_page_content_it = std::ranges::find_if(
page_contents, [&primary_content_type](const auto& page_content) {
return page_content.content_type_ == primary_content_type;
});
const lens::PageContent* old_page_content =
old_page_content_it != page_contents_.end() ? &(*old_page_content_it)
: nullptr;
const lens::PageContent* new_page_content =
new_page_content_it != page_contents.end() ? &(*new_page_content_it)
: nullptr;
if (primary_content_type_ == primary_content_type && old_page_content &&
new_page_content) {
const float old_size = old_page_content->bytes_.size();
const float new_size = new_page_content->bytes_.size();
const float percent_changed = abs((new_size - old_size) / old_size);
if (percent_changed < kByteChangeTolerancePercent) {
if (!sending_bitmap) {
// If the bytes have not changed more than our threshold and the
// screenshot has not changed, exit early. Notify the query controller
// that the user may be issuing a search request, and therefore the
// query should be restarted if TTL expired. If the bytes did change,
// this will happen automatically as a result of the
// SendUpdatedPageContent call below.
GetQueryController()->MaybeRestartQueryFlow();
if (on_page_context_updated_callback_) {
std::move(on_page_context_updated_callback_).Run();
}
return;
}
// If the screenshot has changed but the bytes have not, send only the
// screenshot.
GetQueryController()->SendUpdatedPageContent(
std::nullopt, std::nullopt, std::nullopt, std::nullopt,
last_retrieved_most_visible_page_,
sending_bitmap ? bitmap : SkBitmap());
// Run the callback that the page context has finished updating.
if (on_page_context_updated_callback_) {
std::move(on_page_context_updated_callback_).Run();
}
return;
}
}
// Since the page content has changed, let the query controller know to avoid
// dangling pointers.
GetQueryController()->ResetPageContentData();
page_contents_ = page_contents;
primary_content_type_ = primary_content_type;
// If no bytes were retrieved from the page, the query won't be able to be
// contextualized. Notify the side panel so the ghost loader isn't shown. No
// need to update update the overlay as this update only happens on navigation
// where the side panel will already be open.
if (!new_page_content || new_page_content->bytes_.empty()) {
lens_search_controller_->lens_overlay_side_panel_coordinator()
->SuppressGhostLoader();
}
#if BUILDFLAG(ENABLE_PDF)
// If the new page is a PDF, fetch the text from the page to be used as early
// suggest signals.
if (new_page_content &&
new_page_content->content_type_ == lens::MimeType::kPdf) {
FetchVisiblePageIndexAndGetPartialPdfText(
page_count.value_or(0),
base::BindOnce(&LensSearchContextualizationController::
OnPdfPartialPageTextRetrieved,
weak_ptr_factory_.GetWeakPtr()));
}
#endif
GetQueryController()->SendUpdatedPageContent(
page_contents_, primary_content_type_,
lens_search_controller_->GetPageURL(),
lens_search_controller_->GetPageTitle(),
last_retrieved_most_visible_page_, sending_bitmap ? bitmap : SkBitmap());
RecordDocumentMetrics(page_count.value_or(0));
lens_search_controller_->lens_session_metrics_logger()
->OnFollowUpPageContentRetrieved(primary_content_type);
// Run the callback that the page context has finished updating.
if (on_page_context_updated_callback_) {
std::move(on_page_context_updated_callback_).Run();
}
}
void LensSearchContextualizationController::MaybeGetInnerText(
std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback) {
if (!lens::features::UseInnerTextAsContext()) {
MaybeGetAnnotatedPageContent(page_contents, render_frame_host,
std::move(callback));
return;
}
content_extraction::GetInnerText(
*render_frame_host, /*node_id=*/std::nullopt,
base::BindOnce(
&LensSearchContextualizationController::OnInnerTextReceived,
weak_ptr_factory_.GetWeakPtr(), page_contents, render_frame_host,
std::move(callback)));
}
void LensSearchContextualizationController::OnInnerTextReceived(
std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback,
std::unique_ptr<content_extraction::InnerTextResult> result) {
const bool was_successful =
result && result->inner_text.size() <=
lens::features::GetLensOverlayFileUploadLimitBytes();
// Add the innerText to the page_contents if successful, or empty bytes if
// not.
page_contents.emplace_back(
/*bytes=*/was_successful
? std::vector<uint8_t>(result->inner_text.begin(),
result->inner_text.end())
: std::vector<uint8_t>{},
lens::MimeType::kPlainText);
MaybeGetAnnotatedPageContent(page_contents, render_frame_host,
std::move(callback));
}
void LensSearchContextualizationController::MaybeGetAnnotatedPageContent(
std::vector<lens::PageContent> page_contents,
content::RenderFrameHost* render_frame_host,
PageContentRetrievedCallback callback) {
if (!lens::features::UseApcAsContext()) {
// Done fetching page contents.
// Keep legacy behavior consistent by setting the primary content type to
// plain text if that is the only content type enabled.
// TODO(crbug.com/401614601): Set primary content type to kHtml in all
// cases.
auto primary_content_type = lens::features::UseInnerTextAsContext()
? lens::MimeType::kPlainText
: lens::MimeType::kHtml;
std::move(callback).Run(page_contents, primary_content_type, std::nullopt);
return;
}
blink::mojom::AIPageContentOptionsPtr ai_page_content_options =
optimization_guide::DefaultAIPageContentOptions(
/*on_critical_path =*/true);
ai_page_content_options->max_meta_elements = 20;
optimization_guide::GetAIPageContent(
lens_search_controller_->GetTabInterface()->GetContents(),
std::move(ai_page_content_options),
base::BindOnce(&LensSearchContextualizationController::
OnAnnotatedPageContentReceived,
weak_ptr_factory_.GetWeakPtr(), page_contents,
std::move(callback)));
}
void LensSearchContextualizationController::OnAnnotatedPageContentReceived(
std::vector<lens::PageContent> page_contents,
PageContentRetrievedCallback callback,
std::optional<optimization_guide::AIPageContentResult> result) {
// The tab URL is used to check if the page is context eligible.
const auto& tab_url = lens_search_controller_->GetTabInterface()
->GetContents()
->GetLastCommittedURL();
// Add the apc proto the page_contents if it exists.
if (result) {
// Convert the page metadata to a C struct defined in the optimization_guide
// component so it can be passed to the shared library.
std::vector<optimization_guide::FrameMetadata> frame_metadata_structs =
optimization_guide::GetFrameMetadataFromPageContent(result.value());
// If the page is protected, do not send the latest page content to the
// server.
IsPageContextEligible(
tab_url, std::move(frame_metadata_structs),
base::BindOnce(&LensSearchContextualizationController::
OnPageContextEligibilityFetched,
weak_ptr_factory_.GetWeakPtr(), std::move(page_contents),
std::move(callback), std::move(result)));
return;
}
IsPageContextEligible(
tab_url, {},
base::BindOnce(&LensSearchContextualizationController::
OnPageContextEligibilityFetched,
weak_ptr_factory_.GetWeakPtr(), std::move(page_contents),
std::move(callback), std::nullopt));
}
void LensSearchContextualizationController::OnPageContextEligibilityFetched(
std::vector<lens::PageContent> page_contents,
PageContentRetrievedCallback callback,
std::optional<optimization_guide::AIPageContentResult> result,
bool is_page_context_eligible) {
if (!is_page_context_eligible) {
is_page_context_eligible_ = false;
lens_search_controller_->lens_overlay_side_panel_coordinator()
->SetShowProtectedErrorPage(true);
// Clear all previous page contents.
page_contents.clear();
} else if (result) {
std::string serialized_apc;
result->proto.SerializeToString(&serialized_apc);
page_contents.emplace_back(
std::vector<uint8_t>(serialized_apc.begin(), serialized_apc.end()),
lens::MimeType::kAnnotatedPageContent);
}
// Done fetching page contents.
std::move(callback).Run(page_contents, lens::MimeType::kAnnotatedPageContent,
std::nullopt);
}
#if BUILDFLAG(ENABLE_PDF)
void LensSearchContextualizationController::MaybeGetPdfBytes(
pdf::PDFDocumentHelper* pdf_helper,
PageContentRetrievedCallback callback) {
// Try and fetch the PDF bytes if enabled.
CHECK(pdf_helper);
pdf_helper->GetPdfBytes(
/*size_limit=*/lens::features::GetLensOverlayFileUploadLimitBytes(),
base::BindOnce(&LensSearchContextualizationController::OnPdfBytesReceived,
weak_ptr_factory_.GetWeakPtr(), std::move(callback)));
}
void LensSearchContextualizationController::OnPdfBytesReceived(
PageContentRetrievedCallback callback,
pdf::mojom::PdfListener::GetPdfBytesStatus status,
const std::vector<uint8_t>& bytes,
uint32_t page_count) {
// TODO(crbug.com/370530197): Show user error message if status is not
// success.
if (status != pdf::mojom::PdfListener::GetPdfBytesStatus::kSuccess ||
page_count == 0) {
std::move(callback).Run(
{lens::PageContent(/*bytes=*/{}, lens::MimeType::kPdf)},
lens::MimeType::kPdf, page_count);
return;
}
std::move(callback).Run({lens::PageContent(bytes, lens::MimeType::kPdf)},
lens::MimeType::kPdf, page_count);
}
void LensSearchContextualizationController::GetPartialPdfTextCallback(
uint32_t page_index,
uint32_t total_page_count,
uint32_t total_characters_retrieved,
const std::u16string& page_text) {
// Sanity checks that the input is expected.
CHECK_GE(total_page_count, 1u);
CHECK_LT(page_index, total_page_count);
CHECK_EQ(pdf_pages_text_.size(), page_index);
// Add the page text to the list of pages and update the total characters
// retrieved count.
pdf_pages_text_.push_back(page_text);
// Ensure no integer overflow. If overflow, set the total characters retrieved
// to the max value so the loop will exit.
base::CheckedNumeric<uint32_t> total_characters_retrieved_check =
total_characters_retrieved;
total_characters_retrieved_check += page_text.size();
total_characters_retrieved = total_characters_retrieved_check.ValueOrDefault(
std::numeric_limits<uint32_t>::max());
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
lens_search_controller_->GetTabInterface()->GetContents());
// Stop the loop if the character limit is reached or if the page index is
// out of bounds or the PDF helper no longer exists.
if (!pdf_helper ||
total_characters_retrieved >=
lens::features::GetLensOverlayPdfSuggestCharacterTarget() ||
page_index + 1 >= total_page_count) {
std::move(pdf_partial_page_text_retrieved_callback_).Run(pdf_pages_text_);
GetQueryController()->SendPartialPageContentRequest(pdf_pages_text_);
return;
}
pdf_helper->GetPageText(
page_index + 1,
base::BindOnce(
&LensSearchContextualizationController::GetPartialPdfTextCallback,
weak_ptr_factory_.GetWeakPtr(), page_index + 1, total_page_count,
total_characters_retrieved));
}
void LensSearchContextualizationController::OnPdfPartialPageTextRetrieved(
std::vector<std::u16string> pdf_pages_text) {
pdf_pages_text_ = std::move(pdf_pages_text);
}
#endif // BUILDFLAG(ENABLE_PDF)
bool LensSearchContextualizationController::IsScreenshotPossible(
content::RenderWidgetHostView* view) {
return view && view->IsSurfaceAvailableForCopy();
}
void LensSearchContextualizationController::StartScreenshotFlow(
OnScreenshotTakenCallback callback) {
// Begin the process of grabbing a screenshot.
content::RenderWidgetHostView* view =
lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame()
->GetRenderViewHost()
->GetWidget()
->GetView();
// During initialization and shutdown a capture may not be possible.
if (!IsScreenshotPossible(view)) {
std::move(callback).Run(SkBitmap(), {}, std::nullopt);
return;
}
// Side panel is now fully closed, take screenshot and open overlay.
view->CopyFromSurface(
/*src_rect=*/gfx::Rect(), /*output_size=*/gfx::Size(),
base::BindPostTask(
base::SequencedTaskRunner::GetCurrentDefault(),
base::BindOnce(&LensSearchContextualizationController::
FetchViewportImageBoundingBoxes,
weak_ptr_factory_.GetWeakPtr(), std::move(callback))));
}
void LensSearchContextualizationController::CaptureScreenshot(
base::OnceCallback<void(const SkBitmap&)> callback) {
// Begin the process of grabbing a screenshot.
content::RenderWidgetHostView* view =
lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame()
->GetRenderViewHost()
->GetWidget()
->GetView();
if (!IsScreenshotPossible(view)) {
std::move(callback).Run(SkBitmap());
return;
}
view->CopyFromSurface(
/*src_rect=*/gfx::Rect(), /*output_size=*/gfx::Size(),
base::BindPostTask(
base::SequencedTaskRunner::GetCurrentDefault(),
base::BindOnce([](const viz::CopyOutputBitmapWithMetadata& result) {
return result.bitmap;
}).Then(std::move(callback))));
}
void LensSearchContextualizationController::DidCaptureScreenshot(
mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
chrome_render_frame,
int attempt_id,
const SkBitmap& bitmap,
const std::vector<gfx::Rect>& bounds,
OnScreenshotTakenCallback callback,
std::optional<uint32_t> pdf_current_page) {
// An id mismatch implies this is not the most recent screenshot attempt.
if (screenshot_attempt_id_ != attempt_id) {
return;
}
if (bitmap.drawsNothing()) {
std::move(callback).Run(SkBitmap(), {}, std::nullopt);
lens_search_controller_->CloseLensSync(
lens::LensOverlayDismissalSource::kErrorScreenshotCreationFailed);
return;
}
std::move(callback).Run(bitmap, bounds, pdf_current_page);
}
void LensSearchContextualizationController::OnScreenshotTakenForContextual(
OnPageContextUpdatedCallback callback,
const SkBitmap& bitmap,
const std::vector<gfx::Rect>& all_bounds,
std::optional<uint32_t> pdf_current_page) {
// Start the query as soon as the image is ready since it is the only
// critical asynchronous flow. This optimization parallelizes the query flow
// with other async startup processes.
const auto& tab_url = lens_search_controller_->GetTabInterface()
->GetContents()
->GetLastCommittedURL();
// Check if the page is context eligible. This should start the query flow
// after the eligibility is fetched.
IsPageContextEligible(
tab_url, /*frame_metadata=*/{},
base::BindOnce(&LensSearchContextualizationController::
OnInitialPageContextEligibilityFetched,
weak_ptr_factory_.GetWeakPtr(), bitmap, all_bounds,
pdf_current_page, std::move(callback)));
}
void LensSearchContextualizationController::IsPageContextEligible(
const GURL& main_frame_url,
std::vector<optimization_guide::FrameMetadata> frame_metadata,
LensSearchPageContextEligibilityCallback callback) {
if (!IsProtectedPageFeatureEnabled()) {
std::move(callback).Run(true);
return;
}
if (!page_context_eligibility_) {
// If the page context eligibility API failed to load, then the page should
// be marked as not eligible.
if (has_page_context_eligibility_api_loaded_) {
std::move(callback).Run(false);
return;
}
// If the page context eligibility API is not yet available, then wait for
// it to be loaded before checking eligibility by storing the callback and
// checking again once the API is loaded.
pending_context_eligibility_params_.emplace(main_frame_url,
std::move(frame_metadata));
page_context_eligibility_callback_ = std::move(callback);
return;
}
std::move(callback).Run(optimization_guide::IsPageContextEligible(
main_frame_url.GetHost(), main_frame_url.GetPath(),
std::move(frame_metadata), page_context_eligibility_));
}
void LensSearchContextualizationController::CreatePageContextEligibilityAPI() {
// Post to a background thread to avoid blocking the set up of the overlay.
base::ThreadPool::PostTaskAndReplyWithResult(
FROM_HERE, {base::TaskPriority::BEST_EFFORT, base::MayBlock()},
base::BindOnce(&optimization_guide::PageContextEligibility::Get),
base::BindOnce(&LensSearchContextualizationController::
OnPageContextEligibilityAPILoaded,
weak_ptr_factory_.GetWeakPtr()));
}
bool LensSearchContextualizationController::GetCurrentPageContextEligibility() {
if (!IsProtectedPageFeatureEnabled()) {
return true;
}
return is_page_context_eligible_ && has_page_context_eligibility_api_loaded_;
}
LensSearchContextualizationController::PageContextEligibilityParams::
PageContextEligibilityParams(
const GURL& main_frame_url,
std::vector<optimization_guide::FrameMetadata> frame_metadata)
: main_frame_url(main_frame_url),
frame_metadata(std::move(frame_metadata)) {}
LensSearchContextualizationController::PageContextEligibilityParams::
~PageContextEligibilityParams() = default;
void LensSearchContextualizationController::OnPageContextEligibilityAPILoaded(
optimization_guide::PageContextEligibility* page_context_eligibility) {
page_context_eligibility_ = page_context_eligibility;
has_page_context_eligibility_api_loaded_ = true;
if (page_context_eligibility_callback_ &&
pending_context_eligibility_params_) {
std::move(page_context_eligibility_callback_)
.Run(optimization_guide::IsPageContextEligible(
pending_context_eligibility_params_->main_frame_url.GetHost(),
pending_context_eligibility_params_->main_frame_url.GetPath(),
std::move(pending_context_eligibility_params_->frame_metadata),
page_context_eligibility_));
pending_context_eligibility_params_.reset();
}
}
void LensSearchContextualizationController::
OnInitialPageContextEligibilityFetched(
const SkBitmap& bitmap,
const std::vector<gfx::Rect>& all_bounds,
std::optional<uint32_t> pdf_current_page,
OnPageContextUpdatedCallback callback,
bool is_page_context_eligible) {
auto bitmap_to_send = bitmap;
auto page_url = lens_search_controller_->GetPageURL();
auto page_title = lens_search_controller_->GetPageTitle();
if (!is_page_context_eligible) {
is_page_context_eligible_ = false;
lens_search_controller_->lens_overlay_side_panel_coordinator()
->SetShowProtectedErrorPage(true);
bitmap_to_send = SkBitmap();
page_url = GURL();
page_title = "";
}
viewport_screenshot_ = bitmap_to_send;
page_url_ = page_url;
page_title_ = page_title;
GetQueryController()->StartQueryFlow(
viewport_screenshot_, page_url_, page_title_,
ConvertSignificantRegionBoxes(all_bounds),
std::vector<lens::PageContent>(), lens::MimeType::kUnknown,
pdf_current_page, GetUiScaleFactor(), base::TimeTicks::Now());
// Pass the thumbnail to the searchbox controller.
lens_search_controller_->HandleThumbnailCreatedBitmap(bitmap_to_send);
state_ = State::kActive;
TryUpdatePageContextualization(std::move(callback));
}
void LensSearchContextualizationController::FetchViewportImageBoundingBoxes(
OnScreenshotTakenCallback callback,
const viz::CopyOutputBitmapWithMetadata& result) {
const SkBitmap& bitmap = result.bitmap;
content::RenderFrameHost* render_frame_host =
lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame();
mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame> chrome_render_frame;
render_frame_host->GetRemoteAssociatedInterfaces()->GetInterface(
&chrome_render_frame);
// Bind the InterfacePtr into the callback so that it's kept alive until
// there's either a connection error or a response.
auto* frame = chrome_render_frame.get();
frame->RequestBoundsHintForAllImages(base::BindOnce(
&LensSearchContextualizationController::GetPdfCurrentPage,
weak_ptr_factory_.GetWeakPtr(), std::move(chrome_render_frame),
++screenshot_attempt_id_, bitmap, std::move(callback)));
}
void LensSearchContextualizationController::GetPdfCurrentPage(
mojo::AssociatedRemote<chrome::mojom::ChromeRenderFrame>
chrome_render_frame,
int attempt_id,
const SkBitmap& bitmap,
OnScreenshotTakenCallback callback,
const std::vector<gfx::Rect>& bounds) {
#if BUILDFLAG(ENABLE_PDF)
pdf::PDFDocumentHelper* pdf_helper =
pdf::PDFDocumentHelper::MaybeGetForWebContents(
lens_search_controller_->GetTabInterface()->GetContents());
if (pdf_helper) {
pdf_helper->GetMostVisiblePageIndex(base::BindOnce(
&LensSearchContextualizationController::DidCaptureScreenshot,
weak_ptr_factory_.GetWeakPtr(), std::move(chrome_render_frame),
attempt_id, bitmap, bounds, std::move(callback)));
return;
}
#endif // BUILDFLAG(ENABLE_PDF)
DidCaptureScreenshot(std::move(chrome_render_frame), attempt_id, bitmap,
bounds, std::move(callback),
/*pdf_current_page=*/std::nullopt);
}
void LensSearchContextualizationController::RecordInnerTextSize(
std::unique_ptr<content_extraction::InnerTextResult> result) {
if (!result) {
return;
}
lens::RecordDocumentSizeBytes(lens::MimeType::kPlainText,
result->inner_text.size());
}
std::vector<lens::mojom::CenterRotatedBoxPtr>
LensSearchContextualizationController::ConvertSignificantRegionBoxes(
const std::vector<gfx::Rect>& all_bounds) {
std::vector<lens::mojom::CenterRotatedBoxPtr> significant_region_boxes;
int max_regions = lens::features::GetLensOverlayMaxSignificantRegions();
if (max_regions == 0) {
return significant_region_boxes;
}
content::RenderFrameHost* render_frame_host =
lens_search_controller_->GetTabInterface()
->GetContents()
->GetPrimaryMainFrame();
auto view_bounds = render_frame_host->GetView()->GetViewBounds();
for (auto& image_bounds : all_bounds) {
// Check the original area of the images against the minimum area.
if (image_bounds.width() * image_bounds.height() >=
lens::features::GetLensOverlaySignificantRegionMinArea()) {
// We only have bounds for images in the main frame of the tab (i.e. not
// in iframes), so view bounds are identical to tab bounds and can be
// used for both parameters.
significant_region_boxes.emplace_back(
lens::GetCenterRotatedBoxFromTabViewAndImageBounds(
view_bounds, view_bounds, image_bounds));
}
}
// If an image is outside the viewpoint, the box will have zero area.
std::erase_if(significant_region_boxes, [](const auto& box) {
return box->box.height() == 0 || box->box.width() == 0;
});
// Sort by descending area.
std::sort(significant_region_boxes.begin(), significant_region_boxes.end(),
[](const auto& box1, const auto& box2) {
return box1->box.height() * box1->box.width() >
box2->box.height() * box2->box.width();
});
// Treat negative values of max_regions as no limit.
if (max_regions > 0 && significant_region_boxes.size() >
static_cast<unsigned long>(max_regions)) {
significant_region_boxes.resize(max_regions);
}
return significant_region_boxes;
}
float LensSearchContextualizationController::GetUiScaleFactor() {
int device_scale_factor = lens_search_controller_->GetTabInterface()
->GetContents()
->GetRenderWidgetHostView()
->GetDeviceScaleFactor();
float page_scale_factor =
zoom::ZoomController::FromWebContents(
lens_search_controller_->GetTabInterface()->GetContents())
->GetZoomPercent() /
100.0f;
return device_scale_factor * page_scale_factor;
}
lens::LensOverlayQueryController*
LensSearchContextualizationController::GetQueryController() {
auto* query_controller =
lens_search_controller_->lens_overlay_query_controller();
CHECK(query_controller);
return query_controller;
}
lens::LensSearchboxController*
LensSearchContextualizationController::GetSearchboxController() {
auto* searchbox_controller =
lens_search_controller_->lens_searchbox_controller();
CHECK(searchbox_controller);
return searchbox_controller;
}
} // namespace lens