blob: 3d565debf290f3263213589c1ce7e2a80cb0d7ca [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#import "ios/chrome/browser/intelligence/proto_wrappers/page_context_wrapper.h"
#import <Foundation/Foundation.h>
#import <memory>
#import <optional>
#import "base/barrier_closure.h"
#import "base/check.h"
#import "base/check_op.h"
#import "base/logging.h"
#import "base/memory/weak_ptr.h"
#import "base/strings/sys_string_conversions.h"
#import "base/strings/utf_string_conversions.h"
#import "components/optimization_guide/proto/features/common_quality_data.pb.h"
#import "ios/chrome/browser/snapshots/model/snapshot_tab_helper.h"
#import "ios/web/find_in_page/find_in_page_java_script_feature.h"
#import "ios/web/public/js_messaging/web_frame.h"
#import "ios/web/public/js_messaging/web_frames_manager.h"
#import "ios/web/public/web_state.h"
namespace {
// The JavaScript to be executed on each WebState's WebFrames, which retrieves
// the innerText.
const char16_t* kInnerTextJavaScript = u"document.body.innerText;";
} // namespace
@implementation PageContextWrapper {
base::WeakPtr<web::WebState> _webState;
// The amount of async tasks this specific instance of the PageContext wrapper
// needs to complete before executing the `completionCallback`.
NSInteger _asyncTasksToComplete;
// The accumulation of innerTexts from the all of the WebState's associated
// WebFrames.
NSMutableArray<NSString*>* _webFramesInnerTexts;
// The callback to execute once all async work is complete, whichs
// relinquishes ownership of the PageContext proto to the callback's handler.
base::OnceCallback<void(
std::unique_ptr<optimization_guide::proto::PageContext>)>
_completion_callback;
// Unique pointer to the PageContext proto.
std::unique_ptr<optimization_guide::proto::PageContext> _page_context;
}
- (instancetype)
initWithWebState:(web::WebState*)webState
completionCallback:
(base::OnceCallback<
void(std::unique_ptr<optimization_guide::proto::PageContext>)>)
completionCallback {
self = [super init];
if (self) {
_asyncTasksToComplete = 0;
_webState = webState->GetWeakPtr();
_completion_callback = std::move(completionCallback);
_webFramesInnerTexts = [[NSMutableArray alloc] init];
// Create the PageContext proto/object.
_page_context = std::make_unique<optimization_guide::proto::PageContext>();
_page_context->set_url(_webState->GetVisibleURL().spec());
_page_context->set_title(base::UTF16ToUTF8(_webState->GetTitle()));
}
return self;
}
- (void)dealloc {
[self stopTextHighlighting];
}
- (void)populatePageContextFieldsAsync {
CHECK_GE(_asyncTasksToComplete, 0);
if (_asyncTasksToComplete == 0) {
[self asyncWorkCompletedForPageContext];
return;
}
// Use a `BarrierClosure` to ensure all async tasks are completed before
// executing the overall completion callback. The BarrierClosure will wait
// until the `page_context_barrier` callback is itself run
// `_asyncTasksToComplete` times.
__weak PageContextWrapper* weakSelf = self;
base::RepeatingClosure page_context_barrier =
base::BarrierClosure(_asyncTasksToComplete, base::BindOnce(^{
[weakSelf asyncWorkCompletedForPageContext];
}));
// Asynchronous work. *IMPORTANT NOTES*:
// When adding async tasks below, an accompanying setter should also be
// created to follow the disabled-by-default pattern (which
// increments/decrements `_asyncTasksToComplete` accordingly). Also, if a
// given task is enabled, every code path for that task should eventually
// execute the `page_context_barrier` callback, otherwise the `BarrierClosure`
// will never execute its completion block.
if (_shouldGetSnapshot) {
[self processSnapshotWithBarrier:page_context_barrier];
}
if (_shouldGetInnerText) {
[self processInnerTextWithBarrier:page_context_barrier];
}
// Create full page PDF representation of the WebState, if enabled.
if (_shouldGetFullPagePDF) {
_webState->CreateFullPagePdf(base::BindOnce(^(NSData* PDFData) {
[weakSelf encodeAndSetFullPagePDF:PDFData];
page_context_barrier.Run();
}));
}
}
#pragma mark - Setters
// Sets the flag to enabled/disabled, and increments/decrements accordingly the
// total amount of async tasks gating the completion callback.
- (void)setShouldGetSnapshot:(BOOL)shouldGetSnapshot {
if (_shouldGetSnapshot == shouldGetSnapshot) {
return;
}
_asyncTasksToComplete += shouldGetSnapshot ? 1 : -1;
_shouldGetSnapshot = shouldGetSnapshot;
}
// Sets the flag to enabled/disabled, and increments/decrements accordingly the
// total amount of async tasks gating the completion callback.
- (void)setShouldGetFullPagePDF:(BOOL)shouldGetFullPagePDF {
if (_shouldGetFullPagePDF == shouldGetFullPagePDF) {
return;
}
_asyncTasksToComplete += shouldGetFullPagePDF ? 1 : -1;
_shouldGetFullPagePDF = shouldGetFullPagePDF;
}
// Sets the flag to enabled/disabled, and increments/decrements accordingly the
// total amount of async tasks gating the completion callback.
- (void)setShouldGetInnerText:(BOOL)shouldGetInnerText {
if (_shouldGetInnerText == shouldGetInnerText) {
return;
}
_asyncTasksToComplete += shouldGetInnerText ? 1 : -1;
_shouldGetInnerText = shouldGetInnerText;
}
#pragma mark - Private
// Retrieve WebState snapshot. The barrier's callback will be executed for all
// codepaths in this method.
- (void)processSnapshotWithBarrier:(base::RepeatingClosure)barrier {
__weak PageContextWrapper* weakSelf = self;
auto callback = ^(UIImage* image) {
if ([weakSelf shouldUpdateSnapshotWithImage:image]) {
[weakSelf updateSnapshotWithBarrier:barrier];
return;
}
[weakSelf encodeImageAndSetTabScreenshot:image];
barrier.Run();
};
// If the WebState is currently visible, update the snapshot in case the
// user was scrolling, otherwise retrieve the latest version in cache or on
// disk.
if (_webState->IsVisible()) {
raw_ptr<SnapshotTabHelper> snapshot_tab_helper =
SnapshotTabHelper::FromWebState(_webState.get());
auto updateSnapshotCallback =
base::BindOnce(^(std::optional<int> result_matches) {
// TODO(crbug.com/401282824): Log the matches count to measure
// highlighting precision.
snapshot_tab_helper->UpdateSnapshotWithCallback(callback);
});
// If there is text to highlight, do it before capturing the screenshot.
if (_textToHighlight != nil) {
web::WebFrame* main_frame =
_webState->GetPageWorldWebFramesManager()->GetMainWebFrame();
web::FindInPageJavaScriptFeature* find_in_page_feature =
web::FindInPageJavaScriptFeature::GetInstance();
find_in_page_feature->Search(main_frame,
base::SysNSStringToUTF8(_textToHighlight),
std::move(updateSnapshotCallback));
} else {
std::move(updateSnapshotCallback).Run(std::nullopt);
}
} else {
SnapshotTabHelper::FromWebState(_webState.get())
->RetrieveColorSnapshot(callback);
}
}
// Get the WebState's innerText. The barrier's callback will be executed for all
// codepaths in this method.
- (void)processInnerTextWithBarrier:(base::RepeatingClosure)barrier {
std::set<web::WebFrame*> web_frames =
_webState->GetPageWorldWebFramesManager()->GetAllWebFrames();
web::WebFrame* main_frame =
_webState->GetPageWorldWebFramesManager()->GetMainWebFrame();
if (web_frames.empty() || !main_frame) {
barrier.Run();
return;
}
// Use a `BarrierClosure` to ensure the JavaScript is done executing in
// all WebFrames before executing the page context barrier `barrier`,
// which in turn signals to the PageContextWrapper that the innerText is
// done being processed. The BarrierClosure will wait until the
// `inner_text_barrier` callback is itself run once per WebFrame.
__weak PageContextWrapper* weakSelf = self;
base::RepeatingClosure inner_text_barrier =
base::BarrierClosure(web_frames.size(), base::BindOnce(^{
[weakSelf webFramesInnerTextsFetchCompleted];
barrier.Run();
}));
auto callback = ^(const base::Value* value, NSError* error) {
[weakSelf parseAndConcatenateJavaScriptValue:value withError:error];
inner_text_barrier.Run();
};
// Execute the JavaScript on each WebFrame and pass in the callback (which
// executes the barrier when run).
for (web::WebFrame* web_frame : web_frames) {
// Skip WebFrames with different origins from the main WebFrame.
if (!web_frame || (!web_frame->GetSecurityOrigin().IsSameOriginWith(
main_frame->GetSecurityOrigin()))) {
inner_text_barrier.Run();
continue;
}
web_frame->ExecuteJavaScript(kInnerTextJavaScript,
base::BindOnce(callback));
}
}
// All async tasks are complete, execute the overall completion callback.
// Relinquish ownership to the callback handler.
- (void)asyncWorkCompletedForPageContext {
[self stopTextHighlighting];
std::move(_completion_callback).Run(std::move(_page_context));
}
// Returns YES if the image is nil and forcing the update of missing snapshots
// is enabled.
- (BOOL)shouldUpdateSnapshotWithImage:(UIImage*)image {
return !image && _shouldForceUpdateMissingSnapshots;
}
// Updates the snapshot for the given WebState, and executes the `barrier`
// callback when finished.
- (void)updateSnapshotWithBarrier:(base::RepeatingClosure)barrier {
SnapshotTabHelper::FromWebState(_webState.get())
->UpdateSnapshotWithCallback(^(UIImage* image) {
[self encodeImageAndSetTabScreenshot:image];
barrier.Run();
});
}
// Convert UIImage snapshot to PNG, and then to base64 encoded string. Set the
// tab screenshot on the current PageContext.
- (void)encodeImageAndSetTabScreenshot:(UIImage*)image {
[self stopTextHighlighting];
if (!image) {
DLOG(WARNING) << "Failed to fetch webpage screenshot.";
return;
}
NSData* imageData = UIImagePNGRepresentation(image);
if (!imageData) {
DLOG(WARNING) << "Failed to convert the screenshot to PNG.";
return;
}
NSString* base64String = [imageData base64EncodedStringWithOptions:0];
_page_context->set_tab_screenshot(base::SysNSStringToUTF8(base64String));
}
// If it exists, convert the PDF data to base64 encoded string and set it in the
// PageContext proto.
- (void)encodeAndSetFullPagePDF:(NSData*)PDFData {
if (!PDFData) {
DLOG(WARNING) << "Failed to fetch webpage PDF data.";
return;
}
NSString* base64String = [PDFData base64EncodedStringWithOptions:0];
_page_context->set_pdf_data(base::SysNSStringToUTF8(base64String));
}
// If it exists, parse and trim the returned base::Value from the JavaScript
// execution, and append it to the `_webFramesInnerTexts` array. `error` is
// defined if the JavaScript execution failed.
- (void)parseAndConcatenateJavaScriptValue:(const base::Value*)value
withError:(NSError*)error {
if (error || !value || !value->is_string()) {
DLOG(WARNING) << "Failed to fetch webpage innerText.";
if (error) {
DLOG(WARNING) << base::SysNSStringToUTF8([error localizedDescription]);
}
return;
}
NSString* resultString = [base::SysUTF8ToNSString(value->GetString())
stringByTrimmingCharactersInSet:[NSCharacterSet
whitespaceAndNewlineCharacterSet]];
if (!resultString.length) {
return;
}
[_webFramesInnerTexts addObject:resultString];
}
// Concatenate the innerText strings, and set the result in the PageContext
// proto.
- (void)webFramesInnerTextsFetchCompleted {
NSString* concatenatedInnerTexts =
[_webFramesInnerTexts componentsJoinedByString:@"\n"];
_page_context->set_inner_text(
base::SysNSStringToUTF8(concatenatedInnerTexts));
}
// Stop the highlighting of text.
- (void)stopTextHighlighting {
if (!_webState) {
return;
}
web::WebFrame* main_frame =
_webState->GetPageWorldWebFramesManager()->GetMainWebFrame();
if (!main_frame) {
return;
}
web::FindInPageJavaScriptFeature* find_in_page_feature =
web::FindInPageJavaScriptFeature::GetInstance();
find_in_page_feature->Stop(main_frame);
}
@end