components/pdf/renderer/pdf_ocr_helper.h - chromium/src - Git at Google

 // Copyright 2024 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
 #define COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_

 #include <vector>

 #include "base/containers/queue.h"
 #include "base/memory/weak_ptr.h"
 #include "base/sequence_checker.h"
 #include "content/public/renderer/render_frame_observer.h"
 #include "mojo/public/cpp/bindings/remote.h"
 #include "pdf/accessibility_structs.h"
 #include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
 #include "ui/accessibility/ax_node_id_forward.h"
 #include "ui/accessibility/ax_tree_update.h"

 namespace chrome_pdf {
 class PdfAccessibilityImageFetcher;
 }  // namespace chrome_pdf

 namespace content {
 class RenderFrame;
 }  // namespace content

 namespace pdf {

 // These values are persisted to logs. Entries should not be renumbered and
 // numeric values should never be reused.
 // LINT.IfChange(PdfOcrRequestStatus)
 enum class PdfOcrRequestStatus {
   kRequested = 0,
   kPerformed = 1,
   kMaxValue = kPerformed,
 };
 // LINT.ThenChange(/tools/metrics/histograms/metadata/accessibility/enums.xml:PdfOcrRequestStatus)

 // Used for storing OCR requests either before performing an OCR job, or after
 // the results have been received. This is for scheduling the work in another
 // task in batches in order to unblock the user from reading a partially
 // OCRed PDF, and in order to avoid sending all the images to the OCR Helper
 // at once, in case the PDF is closed halfway through the OCR process.
 struct PdfOcrRequest {
   PdfOcrRequest(const ui::AXNodeID& image_node_id,
                 const chrome_pdf::AccessibilityImageInfo& image,
                 const ui::AXNodeID& root_node_id,
                 const ui::AXNodeID& parent_node_id,
                 const ui::AXNodeID& page_node_id,
                 uint32_t page_index);
   PdfOcrRequest(const PdfOcrRequest& other);

   const ui::AXNodeID image_node_id;
   const chrome_pdf::AccessibilityImageInfo image;
   const ui::AXNodeID root_node_id;
   const ui::AXNodeID parent_node_id;
   const ui::AXNodeID page_node_id;
   const uint32_t page_index;
   // This boolean indicates which request corresponds to the last image on
   // each page.
   bool is_last_on_page = false;

   // This field is set after the image is extracted from PDF.
   gfx::SizeF image_pixel_size;
 };

 // Manages the connection to the OCR Service via Mojo, and ensures that
 // requests are sent in order and that responses are batched.
 class PdfOcrHelper : public content::RenderFrameObserver {
  public:
   using OnOcrDataReceivedCallback =
       base::RepeatingCallback<void(std::vector<PdfOcrRequest> ocr_requests,
                                    std::vector<ui::AXTreeUpdate> tree_updates)>;

   PdfOcrHelper(chrome_pdf::PdfAccessibilityImageFetcher* image_fetcher,
                content::RenderFrame& render_frame,
                ui::AXNodeID root_node_id,
                uint32_t page_count,
                OnOcrDataReceivedCallback callback);

   PdfOcrHelper(const PdfOcrHelper&) = delete;
   PdfOcrHelper& operator=(const PdfOcrHelper&) = delete;

   ~PdfOcrHelper() override;

   // If the OCR Helper is created before the PDF is loaded or reloaded, i.e.
   // before `PdfAccessibilityTree::SetAccessibilityDocInfo` is called,
   // previous requests are removed and page count and root node are re-set.
   void Reset(ui::AXNodeID root_node_id, uint32_t page_count);
   void OcrPage(base::queue<PdfOcrRequest> page_requests);
   bool AreAllPagesOcred() const;
   bool AreAllPagesInBatchOcred() const;
   void SetScreenAIAnnotatorForTesting(
       mojo::PendingRemote<screen_ai::mojom::ScreenAIAnnotator>
           screen_ai_annotator);
   void ResetRemainingPageCountForTesting();
   uint32_t pages_per_batch_for_testing() const { return pages_per_batch_; }

   // content::RenderFrameObserver:
   void OnDestruct() override {}

  private:
   static uint32_t ComputePagesPerBatch(uint32_t page_count);
   void OcrNextImage();
   void ReceiveOcrResultsForImage(PdfOcrRequest request,
                                  const ui::AXTreeUpdate& tree_update);

   // If `screen_ai_annotator_` is not connected to OCR service and
   // `render_frame_` is available, tries to connect it to the OCR service.
   void MaybeConnectToOcrService();

   // `image_fetcher_` owns `this`.
   const raw_ptr<chrome_pdf::PdfAccessibilityImageFetcher> image_fetcher_;

   uint32_t pages_per_batch_;
   uint32_t remaining_page_count_;
   ui::AXNodeID root_node_id_;

   // True if there are pending OCR requests. Used to determine if `OcrPage`
   // should call `OcrNextImage` or if the next call to
   // `ReceiveOcrResultsForImage` should do it instead. This avoids the
   // possibility of processing requests in the wrong order.
   bool is_ocr_in_progress_ = false;

   // A PDF is made up of a number of pages, and each page might have one or
   // more inaccessible images that need to be OCRed. This queue could contain
   // the OCR requests for all the images on several pages, so the requests
   // from each page are concatenated together into a single queue.
   // `PdfOcrRequest.is_last_on_page` indicates which request is the last on
   // each page.
   base::queue<PdfOcrRequest> all_requests_;
   std::vector<PdfOcrRequest> batch_requests_;
   std::vector<ui::AXTreeUpdate> batch_tree_updates_;
   OnOcrDataReceivedCallback on_ocr_data_received_callback_;
   mojo::Remote<screen_ai::mojom::ScreenAIAnnotator> screen_ai_annotator_;
   SEQUENCE_CHECKER(sequence_checker_);
   // Needs to be kept last so that it would be destructed first.
   base::WeakPtrFactory<PdfOcrHelper> weak_ptr_factory_{this};
 };

 }  // namespace pdf

 #endif  // COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
	// Copyright 2024 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_
	#define COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_

	#include <vector>

	#include "base/containers/queue.h"
	#include "base/memory/weak_ptr.h"
	#include "base/sequence_checker.h"
	#include "content/public/renderer/render_frame_observer.h"
	#include "mojo/public/cpp/bindings/remote.h"
	#include "pdf/accessibility_structs.h"
	#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
	#include "ui/accessibility/ax_node_id_forward.h"
	#include "ui/accessibility/ax_tree_update.h"

	namespace chrome_pdf {
	class PdfAccessibilityImageFetcher;
	} // namespace chrome_pdf

	namespace content {
	class RenderFrame;
	} // namespace content

	namespace pdf {

	// These values are persisted to logs. Entries should not be renumbered and
	// numeric values should never be reused.
	// LINT.IfChange(PdfOcrRequestStatus)
	enum class PdfOcrRequestStatus {
	kRequested = 0,
	kPerformed = 1,
	kMaxValue = kPerformed,
	};
	// LINT.ThenChange(/tools/metrics/histograms/metadata/accessibility/enums.xml:PdfOcrRequestStatus)

	// Used for storing OCR requests either before performing an OCR job, or after
	// the results have been received. This is for scheduling the work in another
	// task in batches in order to unblock the user from reading a partially
	// OCRed PDF, and in order to avoid sending all the images to the OCR Helper
	// at once, in case the PDF is closed halfway through the OCR process.
	struct PdfOcrRequest {
	PdfOcrRequest(const ui::AXNodeID& image_node_id,
	const chrome_pdf::AccessibilityImageInfo& image,
	const ui::AXNodeID& root_node_id,
	const ui::AXNodeID& parent_node_id,
	const ui::AXNodeID& page_node_id,
	uint32_t page_index);
	PdfOcrRequest(const PdfOcrRequest& other);

	const ui::AXNodeID image_node_id;
	const chrome_pdf::AccessibilityImageInfo image;
	const ui::AXNodeID root_node_id;
	const ui::AXNodeID parent_node_id;
	const ui::AXNodeID page_node_id;
	const uint32_t page_index;
	// This boolean indicates which request corresponds to the last image on
	// each page.
	bool is_last_on_page = false;

	// This field is set after the image is extracted from PDF.
	gfx::SizeF image_pixel_size;
	};

	// Manages the connection to the OCR Service via Mojo, and ensures that
	// requests are sent in order and that responses are batched.
	class PdfOcrHelper : public content::RenderFrameObserver {
	public:
	using OnOcrDataReceivedCallback =
	base::RepeatingCallback<void(std::vector<PdfOcrRequest> ocr_requests,
	std::vector<ui::AXTreeUpdate> tree_updates)>;

	PdfOcrHelper(chrome_pdf::PdfAccessibilityImageFetcher* image_fetcher,
	content::RenderFrame& render_frame,
	ui::AXNodeID root_node_id,
	uint32_t page_count,
	OnOcrDataReceivedCallback callback);

	PdfOcrHelper(const PdfOcrHelper&) = delete;
	PdfOcrHelper& operator=(const PdfOcrHelper&) = delete;

	~PdfOcrHelper() override;

	// If the OCR Helper is created before the PDF is loaded or reloaded, i.e.
	// before `PdfAccessibilityTree::SetAccessibilityDocInfo` is called,
	// previous requests are removed and page count and root node are re-set.
	void Reset(ui::AXNodeID root_node_id, uint32_t page_count);
	void OcrPage(base::queue<PdfOcrRequest> page_requests);
	bool AreAllPagesOcred() const;
	bool AreAllPagesInBatchOcred() const;
	void SetScreenAIAnnotatorForTesting(
	mojo::PendingRemote<screen_ai::mojom::ScreenAIAnnotator>
	screen_ai_annotator);
	void ResetRemainingPageCountForTesting();
	uint32_t pages_per_batch_for_testing() const { return pages_per_batch_; }

	// content::RenderFrameObserver:
	void OnDestruct() override {}

	private:
	static uint32_t ComputePagesPerBatch(uint32_t page_count);
	void OcrNextImage();
	void ReceiveOcrResultsForImage(PdfOcrRequest request,
	const ui::AXTreeUpdate& tree_update);

	// If `screen_ai_annotator_` is not connected to OCR service and
	// `render_frame_` is available, tries to connect it to the OCR service.
	void MaybeConnectToOcrService();

	// `image_fetcher_` owns `this`.
	const raw_ptr<chrome_pdf::PdfAccessibilityImageFetcher> image_fetcher_;

	uint32_t pages_per_batch_;
	uint32_t remaining_page_count_;
	ui::AXNodeID root_node_id_;

	// True if there are pending OCR requests. Used to determine if `OcrPage`
	// should call `OcrNextImage` or if the next call to
	// `ReceiveOcrResultsForImage` should do it instead. This avoids the
	// possibility of processing requests in the wrong order.
	bool is_ocr_in_progress_ = false;

	// A PDF is made up of a number of pages, and each page might have one or
	// more inaccessible images that need to be OCRed. This queue could contain
	// the OCR requests for all the images on several pages, so the requests
	// from each page are concatenated together into a single queue.
	// `PdfOcrRequest.is_last_on_page` indicates which request is the last on
	// each page.
	base::queue<PdfOcrRequest> all_requests_;
	std::vector<PdfOcrRequest> batch_requests_;
	std::vector<ui::AXTreeUpdate> batch_tree_updates_;
	OnOcrDataReceivedCallback on_ocr_data_received_callback_;
	mojo::Remote<screen_ai::mojom::ScreenAIAnnotator> screen_ai_annotator_;
	SEQUENCE_CHECKER(sequence_checker_);
	// Needs to be kept last so that it would be destructed first.
	base::WeakPtrFactory<PdfOcrHelper> weak_ptr_factory_{this};
	};

	} // namespace pdf

	#endif // COMPONENTS_PDF_RENDERER_PDF_OCR_HELPER_H_