blob: 2867ee5ab4693205398943d812bd8bd8b00ab4a4 [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
#define COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
#include <vector>
#include "base/containers/queue.h"
#include "base/memory/weak_ptr.h"
#include "base/sequence_checker.h"
#include "content/public/renderer/render_frame_observer.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "pdf/accessibility_structs.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "ui/accessibility/ax_node_id_forward.h"
#include "ui/accessibility/ax_tree_update.h"
namespace chrome_pdf {
class PdfAccessibilityImageFetcher;
} // namespace chrome_pdf
namespace content {
class RenderFrame;
} // namespace content
namespace pdf {
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
// LINT.IfChange(PdfOcrRequestStatus)
enum class PdfOcrRequestStatus {
kRequested = 0,
kPerformed = 1,
kMaxValue = kPerformed,
};
// LINT.ThenChange(/tools/metrics/histograms/metadata/accessibility/enums.xml:PdfOcrRequestStatus)
// Used for storing OCR requests either before performing an OCR job, or after
// the results have been received. This is for scheduling the work in another
// task in batches in order to unblock the user from reading a partially
// OCRed PDF, and in order to avoid sending all the images to the OCR Helper
// at once, in case the PDF is closed halfway through the OCR process.
struct PdfOcrRequest {
PdfOcrRequest(const ui::AXNodeID& image_node_id,
const chrome_pdf::AccessibilityImageInfo& image,
const ui::AXNodeID& root_node_id,
const ui::AXNodeID& parent_node_id,
const ui::AXNodeID& page_node_id,
uint32_t page_index);
PdfOcrRequest(const PdfOcrRequest& other);
const ui::AXNodeID image_node_id;
const chrome_pdf::AccessibilityImageInfo image;
const ui::AXNodeID root_node_id;
const ui::AXNodeID parent_node_id;
const ui::AXNodeID page_node_id;
const uint32_t page_index;
// This boolean indicates which request corresponds to the last image on
// each page.
bool is_last_on_page = false;
// This field is set after the image is extracted from PDF.
gfx::SizeF image_pixel_size;
};
// Manages the connection to the OCR Service via Mojo, and ensures that
// requests are sent in order and that responses are batched.
class PdfOcrHelper : public content::RenderFrameObserver {
public:
using OnOcrDataReceivedCallback =
base::RepeatingCallback<void(std::vector<PdfOcrRequest> ocr_requests,
std::vector<ui::AXTreeUpdate> tree_updates)>;
PdfOcrHelper(chrome_pdf::PdfAccessibilityImageFetcher* image_fetcher,
content::RenderFrame& render_frame,
ui::AXNodeID root_node_id,
uint32_t page_count,
OnOcrDataReceivedCallback callback);
PdfOcrHelper(const PdfOcrHelper&) = delete;
PdfOcrHelper& operator=(const PdfOcrHelper&) = delete;
~PdfOcrHelper() override;
// If the OCR Helper is created before the PDF is loaded or reloaded, i.e.
// before `PdfAccessibilityTree::SetAccessibilityDocInfo` is called,
// previous requests are removed and page count and root node are re-set.
void Reset(ui::AXNodeID root_node_id, uint32_t page_count);
void OcrPage(base::queue<PdfOcrRequest> page_requests);
bool AreAllPagesOcred() const;
bool AreAllPagesInBatchOcred() const;
void SetScreenAIAnnotatorForTesting(
mojo::PendingRemote<screen_ai::mojom::ScreenAIAnnotator>
screen_ai_annotator);
void ResetRemainingPageCountForTesting();
uint32_t pages_per_batch_for_testing() const { return pages_per_batch_; }
// content::RenderFrameObserver:
void OnDestruct() override {}
private:
static uint32_t ComputePagesPerBatch(uint32_t page_count);
void OcrNextImage();
void ReceiveOcrResultsForImage(PdfOcrRequest request,
const ui::AXTreeUpdate& tree_update);
// If `screen_ai_annotator_` is not connected to OCR service and
// `render_frame_` is available, tries to connect it to the OCR service.
void MaybeConnectToOcrService();
// `image_fetcher_` owns `this`.
const raw_ptr<chrome_pdf::PdfAccessibilityImageFetcher> image_fetcher_;
uint32_t pages_per_batch_;
uint32_t remaining_page_count_;
ui::AXNodeID root_node_id_;
// True if there are pending OCR requests. Used to determine if `OcrPage`
// should call `OcrNextImage` or if the next call to
// `ReceiveOcrResultsForImage` should do it instead. This avoids the
// possibility of processing requests in the wrong order.
bool is_ocr_in_progress_ = false;
// A PDF is made up of a number of pages, and each page might have one or
// more inaccessible images that need to be OCRed. This queue could contain
// the OCR requests for all the images on several pages, so the requests
// from each page are concatenated together into a single queue.
// `PdfOcrRequest.is_last_on_page` indicates which request is the last on
// each page.
base::queue<PdfOcrRequest> all_requests_;
std::vector<PdfOcrRequest> batch_requests_;
std::vector<ui::AXTreeUpdate> batch_tree_updates_;
OnOcrDataReceivedCallback on_ocr_data_received_callback_;
mojo::Remote<screen_ai::mojom::ScreenAIAnnotator> screen_ai_annotator_;
SEQUENCE_CHECKER(sequence_checker_);
// Needs to be kept last so that it would be destructed first.
base::WeakPtrFactory<PdfOcrHelper> weak_ptr_factory_{this};
};
} // namespace pdf
#endif // COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_