blob: d46cdb6d6a035354940577900a99656becf1ac4c [file] [log] [blame]
// Copyright 2025 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#define CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_
#include "base/memory/raw_ptr.h"
#include "chrome/browser/page_content_annotations/page_content_extraction_types.h"
#include "components/content_extraction/content/browser/inner_text.h"
#include "components/optimization_guide/content/browser/page_content_proto_provider.h"
#include "components/optimization_guide/proto/features/common_quality_data.pb.h"
#include "content/public/browser/web_contents.h"
#include "pdf/buildflags.h"
#include "third_party/blink/public/mojom/content_extraction/ai_page_content.mojom.h"
namespace optimization_guide {
class PageContextEligibility;
} // namespace optimization_guide
namespace page_content_annotations {
// Class for deciding when a page is ready for getting page content, and
// extracts page content.
class AnnotatedPageContentRequest {
public:
static std::unique_ptr<AnnotatedPageContentRequest> Create(
content::WebContents* web_contents);
AnnotatedPageContentRequest(content::WebContents* web_contents,
blink::mojom::AIPageContentOptionsPtr request);
AnnotatedPageContentRequest(const AnnotatedPageContentRequest&) = delete;
AnnotatedPageContentRequest& operator=(const AnnotatedPageContentRequest&) =
delete;
~AnnotatedPageContentRequest();
void PrimaryPageChanged();
void DidFinishNavigation(content::NavigationHandle* navigation_handle);
void DidStopLoading();
void OnFirstContentfulPaintInPrimaryMainFrame();
// Returns the cached APC for `page` and whether it is eligible for
// server upload. Will return nullopt if not available.
std::optional<ExtractedPageContentResult> GetCachedContentAndEligibility();
private:
void ResetForNewNavigation();
void MaybeScheduleExtraction();
void RequestAnnotatedPageContentSync();
bool ShouldScheduleExtraction() const;
void OnPageContentReceived(
std::optional<optimization_guide::AIPageContentResult> page_content);
void OnInnerTextReceived(
base::TimeTicks start_time,
std::unique_ptr<content_extraction::InnerTextResult> result);
#if BUILDFLAG(ENABLE_PDF)
void RequestPdfPageCount();
// Invoked when pdf document is loaded, so that the metadata can be queried.
void OnPdfDocumentLoadComplete();
#endif // BUILDFLAG(ENABLE_PDF)
void OnPageContextEligibilityAPILoaded(
optimization_guide::PageContextEligibility* page_context_eligibility);
raw_ptr<optimization_guide::PageContextEligibility> page_context_eligibility_;
const raw_ptr<content::WebContents> web_contents_;
const blink::mojom::AIPageContentOptionsPtr request_;
const base::TimeDelta delay_;
const bool include_inner_text_;
enum class Lifecycle {
// Indicates that a new navigation occurred and we need to schedule an
// extraction. This is async because we need to wait for the page to be
// ready.
kPending,
// The extraction has been scheduled and we are waiting on a response from
// the renderer. The IPC to request the content maybe delayed so the page
// has reached a stable state.
kScheduled,
// The content for the last committed navigation has been extracted.
kDone
};
Lifecycle lifecycle_ = Lifecycle::kDone;
bool waiting_for_load_ = false;
bool waiting_for_fcp_ = false;
std::optional<ExtractedPageContentResult> cached_content_;
base::WeakPtrFactory<AnnotatedPageContentRequest> weak_factory_{this};
};
} // namespace page_content_annotations
#endif // CHROME_BROWSER_PAGE_CONTENT_ANNOTATIONS_PAGE_CONTENT_ANNOTATIONS_ANNOTATE_PAGE_CONTENT_REQUEST_H_