pdf/pdfium/pdfium_searchify.cc - chromium/src.git - Git at Google

 // Copyright 2024 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "pdf/pdfium/pdfium_searchify.h"

 #include <math.h>
 #include <stdint.h>

 #include <algorithm>
 #include <array>
 #include <string>
 #include <utility>
 #include <vector>

 #include "base/check.h"
 #include "base/check_op.h"
 #include "base/compiler_specific.h"
 #include "base/containers/span.h"
 #include "base/functional/callback.h"
 #include "base/numerics/angle_conversions.h"
 #include "base/strings/utf_string_conversions.h"
 #include "pdf/pdfium/pdfium_engine.h"
 #include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
 #include "pdf/pdfium/pdfium_ocr.h"
 #include "pdf/pdfium/pdfium_searchify_font.h"
 #include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
 #include "third_party/pdfium/public/cpp/fpdf_scopers.h"
 #include "third_party/pdfium/public/fpdf_edit.h"
 #include "third_party/pdfium/public/fpdf_save.h"
 #include "third_party/pdfium/public/fpdfview.h"
 #include "third_party/skia/include/core/SkBitmap.h"
 #include "third_party/skia/include/core/SkImageInfo.h"
 #include "third_party/skia/include/core/SkPixmap.h"
 #include "ui/gfx/codec/jpeg_codec.h"
 #include "ui/gfx/geometry/point_f.h"
 #include "ui/gfx/geometry/rect.h"
 #include "ui/gfx/geometry/size.h"
 #include "ui/gfx/geometry/size_f.h"

 namespace chrome_pdf {

 namespace {

 std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
   std::u16string utf16_str = base::UTF8ToUTF16(string);
   std::vector<uint32_t> charcodes;
   charcodes.reserve(utf16_str.size());
   for (auto c : utf16_str) {
     charcodes.push_back(c);
   }
   return charcodes;
 }

 // The coordinate systems between OCR and PDF are different. OCR's origin is at
 // top-left, so we need to convert them to PDF's bottom-left.
 SearchifyBoundingBoxOrigin ConvertToPdfOrigin(const gfx::Rect& rect,
                                               float angle,
                                               float coordinate_system_height) {
   const float theta = base::DegToRad(angle);
   const float x = rect.x() - (sinf(theta) * rect.height());
   const float y =
       coordinate_system_height - (rect.y() + cosf(theta) * rect.height());
   return {.point = {x, y}, .theta = -theta};
 }

 // Project the text object's origin to the baseline's origin.
 SearchifyBoundingBoxOrigin ProjectToBaseline(
     const gfx::PointF& origin_point,
     const SearchifyBoundingBoxOrigin& baseline_origin) {
   const float sin_theta = sinf(baseline_origin.theta);
   const float cos_theta = cosf(baseline_origin.theta);
   // The length between `origin` and `baseline_origin`.
   const float length =
       (origin_point.x() - baseline_origin.point.x()) * cos_theta +
       (origin_point.y() - baseline_origin.point.y()) * sin_theta;
   return {.point = {baseline_origin.point.x() + length * cos_theta,
                     baseline_origin.point.y() + length * sin_theta},
           .theta = baseline_origin.theta};
 }

 gfx::SizeF GetRenderedImageSize(FPDF_PAGEOBJECT image) {
   FS_QUADPOINTSF quadpoints;
   if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
     return gfx::SizeF();
   }

   return gfx::SizeF(
       hypotf(quadpoints.x1 - quadpoints.x2, quadpoints.y1 - quadpoints.y2),
       hypotf(quadpoints.x2 - quadpoints.x3, quadpoints.y2 - quadpoints.y3));
 }

 bool CalculateImageWithoutScalingMatrix(FPDF_PAGEOBJECT image,
                                         const gfx::SizeF& rendered_size,
                                         FS_MATRIX& image_matrix) {
   if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
     return false;
   }
   image_matrix.a /= rendered_size.width();
   image_matrix.b /= rendered_size.width();
   image_matrix.c /= rendered_size.height();
   image_matrix.d /= rendered_size.height();
   return true;
 }

 // Returns the transformation matrix needed to move a word to where it is
 // positioned on the image.
 FS_MATRIX CalculateWordMoveMatrix(const SearchifyBoundingBoxOrigin& word_origin,
                                   int word_bounding_box_width,
                                   bool word_is_rtl) {
   const float sin_theta = sinf(word_origin.theta);
   const float cos_theta = cosf(word_origin.theta);
   FS_MATRIX move_matrix(cos_theta, sin_theta, -sin_theta, cos_theta,
                         word_origin.point.x(), word_origin.point.y());
   if (word_is_rtl) {
     move_matrix.a = -move_matrix.a;
     move_matrix.b = -move_matrix.b;
     move_matrix.e += cos_theta * word_bounding_box_width;
     move_matrix.f += sin_theta * word_bounding_box_width;
   }
   return move_matrix;
 }

 void AddWordOnImage(FPDF_DOCUMENT document,
                     FPDF_PAGE page,
                     FPDF_FONT font,
                     const screen_ai::mojom::WordBoxPtr& word,
                     base::span<const FS_MATRIX> transform_matrices) {
   ScopedFPDFPageObject text(
       FPDFPageObj_CreateTextObj(document, font, word->bounding_box.height()));
   CHECK(text);

   std::string word_string = word->word;
   // TODO(crbug.com/41487613): A more accurate width would be the distance
   // from current word's origin to next word's origin.
   if (word->has_space_after) {
     word_string.push_back(' ');
   }

   if (word_string.empty()) {
     DLOG(ERROR) << "Got empty word";
     return;
   }

   std::vector<uint32_t> charcodes = Utf8ToCharcodes(word_string);
   if (!FPDFText_SetCharcodes(text.get(), charcodes.data(), charcodes.size())) {
     DLOG(ERROR) << "Failed to set charcodes";
     return;
   }

   // Make text invisible
   if (!FPDFTextObj_SetTextRenderMode(text.get(),
                                      FPDF_TEXTRENDERMODE_INVISIBLE)) {
     DLOG(ERROR) << "Failed to make text invisible";
     return;
   }

   const gfx::SizeF text_object_size = GetImageSize(text.get());
   CHECK_GT(text_object_size.width(), 0);
   CHECK_GT(text_object_size.height(), 0);
   const FS_MATRIX text_scale_matrix(
       word->bounding_box.width() / text_object_size.width(), 0, 0,
       word->bounding_box.height() / text_object_size.height(), 0, 0);
   CHECK(FPDFPageObj_TransformF(text.get(), &text_scale_matrix));

   for (const auto& matrix : transform_matrices) {
     FPDFPageObj_TransformF(text.get(), &matrix);
   }

   FPDFPage_InsertObject(page, text.release());
 }

 void AddTextOnImage(FPDF_DOCUMENT document,
                     FPDF_PAGE page,
                     FPDF_FONT font,
                     FPDF_PAGEOBJECT image,
                     screen_ai::mojom::VisualAnnotationPtr annotation,
                     const gfx::Size& image_pixel_size) {
   const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
   if (image_rendered_size.IsEmpty()) {
     DLOG(ERROR) << "Failed to get image rendered dimensions";
     return;
   }

   // The transformation matrices is applied as follows:
   std::array<FS_MATRIX, 3> transform_matrices;
   // Move text object to the corresponding text position on the full image.
   FS_MATRIX& move_matrix = transform_matrices[0];
   // Scale from full image size to rendered image size on the PDF.
   FS_MATRIX& image_scale_matrix = transform_matrices[1];
   // Apply the image's transformation matrix on the PDF page without the
   // scaling matrix.
   FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];

   image_scale_matrix = {
       image_rendered_size.width() / image_pixel_size.width(),   0, 0,
       image_rendered_size.height() / image_pixel_size.height(), 0, 0};
   if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
                                           image_without_scaling_matrix)) {
     DLOG(ERROR) << "Failed to get image matrix";
     return;
   }

   for (const auto& line : annotation->lines) {
     SearchifyBoundingBoxOrigin baseline_origin =
         ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
                            image_pixel_size.height());

     for (const auto& word : line->words) {
       if (word->bounding_box.IsEmpty()) {
         continue;
       }

       SearchifyBoundingBoxOrigin origin =
           ConvertToPdfOrigin(word->bounding_box, word->bounding_box_angle,
                              image_pixel_size.height());
       move_matrix = CalculateWordMoveMatrix(
           ProjectToBaseline(origin.point, baseline_origin),
           word->bounding_box.width(),
           word->direction ==
               screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
       AddWordOnImage(document, page, font, word, transform_matrices);
     }
   }
 }

 ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
   std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
   return ScopedFPDFFont(
       FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
                                 cid_to_gid_map.data(), cid_to_gid_map.size()));
 }

 int GetBlockForJpeg(void* param,
                     unsigned long pos,
                     unsigned char* buf,
                     unsigned long size) {
   auto data_vector = *static_cast<base::span<const uint8_t>*>(param);
   if (pos + size < pos || pos + size > data_vector.size()) {
     return 0;
   }
   // TODO(tsepez): spanify arguments to remove the error.
   base::span<uint8_t> UNSAFE_TODO(buf_span(buf, size));
   buf_span.copy_from(data_vector.subspan(pos, size));
   return 1;
 }

 }  // namespace

 std::vector<uint8_t> PDFiumSearchify(
     base::span<const uint8_t> pdf_buffer,
     base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
         const SkBitmap& bitmap)> perform_ocr_callback) {
   ScopedFPDFDocument document(
       FPDF_LoadMemDocument64(pdf_buffer.data(), pdf_buffer.size(), nullptr));
   if (!document) {
     DLOG(ERROR) << "Failed to load document";
     return {};
   }
   int page_count = FPDF_GetPageCount(document.get());
   if (page_count == 0) {
     DLOG(ERROR) << "Got zero page count";
     return {};
   }
   ScopedFPDFFont font = CreateFont(document.get());
   CHECK(font);
   for (int page_index = 0; page_index < page_count; page_index++) {
     ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
     if (!page) {
       DLOG(ERROR) << "Failed to load page";
       continue;
     }
     int object_count = FPDFPage_CountObjects(page.get());
     for (int object_index = 0; object_index < object_count; object_index++) {
       // GetImageForOcr() checks for null `image`.
       FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
       SkBitmap bitmap = GetImageForOcr(document.get(), page.get(), image);
       // The object is not an image or failed to get the bitmap from the image.
       if (bitmap.empty()) {
         continue;
       }
       auto annotation = perform_ocr_callback.Run(bitmap);
       if (!annotation) {
         DLOG(ERROR) << "Failed to get OCR annotation on the image";
         return {};
       }
       AddTextOnImage(document.get(), page.get(), font.get(), image,
                      std::move(annotation),
                      gfx::Size(bitmap.width(), bitmap.height()));
     }
     if (!FPDFPage_GenerateContent(page.get())) {
       DLOG(ERROR) << "Failed to generate content";
       return {};
     }
   }
   PDFiumMemBufferFileWrite output_file_write;
   if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
     DLOG(ERROR) << "Failed to save the document";
     return {};
   }
   return output_file_write.TakeBuffer();
 }

 SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
     const gfx::Rect& rect,
     float angle,
     float coordinate_system_height) {
   return ConvertToPdfOrigin(rect, angle, coordinate_system_height);
 }

 FS_MATRIX CalculateWordMoveMatrixForTesting(
     const SearchifyBoundingBoxOrigin& origin,
     int word_bounding_box_width,
     bool word_is_rtl) {
   return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
 }

 PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
   // TODO(thestig): Check the default value of `use_skia`.
   InitializeSDK(false, false, FontMappingMode::kNoMapping);
 }

 PdfiumProgressiveSearchifier::ScopedSdkInitializer::~ScopedSdkInitializer() {
   ShutdownSDK();
 }

 PdfiumProgressiveSearchifier::PdfiumProgressiveSearchifier()
     : doc_(FPDF_CreateNewDocument()), font_(CreateFont(doc_.get())) {
   CHECK(doc_);
   CHECK(font_);
 }

 PdfiumProgressiveSearchifier::~PdfiumProgressiveSearchifier() = default;

 // TODO(chuhsuan): Return bool instead of crashing on error.
 void PdfiumProgressiveSearchifier::AddPage(
     const SkBitmap& bitmap,
     uint32_t page_index,
     screen_ai::mojom::VisualAnnotationPtr annotation) {
   CHECK(annotation);
   // Replace the page if it already exists.
   DeletePage(page_index);
   int width = bitmap.width();
   int height = bitmap.height();
   ScopedFPDFPage page(FPDFPage_New(doc_.get(), page_index, width, height));
   CHECK(page);
   ScopedFPDFPageObject image(FPDFPageObj_NewImageObj(doc_.get()));
   CHECK(image);
   std::vector<uint8_t> encoded;
   CHECK(gfx::JPEGCodec::Encode(bitmap, 100, &encoded));
   FPDF_FILEACCESS file_access{
       .m_FileLen = static_cast<unsigned long>(encoded.size()),
       .m_GetBlock = &GetBlockForJpeg,
       .m_Param = &encoded};
   CHECK(FPDFImageObj_LoadJpegFileInline(nullptr, 0, image.get(), &file_access));
   CHECK(FPDFImageObj_SetMatrix(image.get(), width, 0, 0, height, 0, 0));
   AddTextOnImage(doc_.get(), page.get(), font_.get(), image.get(),
                  std::move(annotation), gfx::Size(width, height));
   FPDFPage_InsertObject(page.get(), image.release());
   CHECK(FPDFPage_GenerateContent(page.get()));
 }

 void PdfiumProgressiveSearchifier::DeletePage(uint32_t page_index) {
   FPDFPage_Delete(doc_.get(), page_index);
 }

 std::vector<uint8_t> PdfiumProgressiveSearchifier::Save() {
   PDFiumMemBufferFileWrite output_file_write;
   CHECK(FPDF_SaveAsCopy(doc_.get(), &output_file_write, 0));
   return output_file_write.TakeBuffer();
 }

 }  // namespace chrome_pdf
	// Copyright 2024 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "pdf/pdfium/pdfium_searchify.h"

	#include <math.h>
	#include <stdint.h>

	#include <algorithm>
	#include <array>
	#include <string>
	#include <utility>
	#include <vector>

	#include "base/check.h"
	#include "base/check_op.h"
	#include "base/compiler_specific.h"
	#include "base/containers/span.h"
	#include "base/functional/callback.h"
	#include "base/numerics/angle_conversions.h"
	#include "base/strings/utf_string_conversions.h"
	#include "pdf/pdfium/pdfium_engine.h"
	#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
	#include "pdf/pdfium/pdfium_ocr.h"
	#include "pdf/pdfium/pdfium_searchify_font.h"
	#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
	#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
	#include "third_party/pdfium/public/fpdf_edit.h"
	#include "third_party/pdfium/public/fpdf_save.h"
	#include "third_party/pdfium/public/fpdfview.h"
	#include "third_party/skia/include/core/SkBitmap.h"
	#include "third_party/skia/include/core/SkImageInfo.h"
	#include "third_party/skia/include/core/SkPixmap.h"
	#include "ui/gfx/codec/jpeg_codec.h"
	#include "ui/gfx/geometry/point_f.h"
	#include "ui/gfx/geometry/rect.h"
	#include "ui/gfx/geometry/size.h"
	#include "ui/gfx/geometry/size_f.h"

	namespace chrome_pdf {

	namespace {

	std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
	std::u16string utf16_str = base::UTF8ToUTF16(string);
	std::vector<uint32_t> charcodes;
	charcodes.reserve(utf16_str.size());
	for (auto c : utf16_str) {
	charcodes.push_back(c);
	}
	return charcodes;
	}

	// The coordinate systems between OCR and PDF are different. OCR's origin is at
	// top-left, so we need to convert them to PDF's bottom-left.
	SearchifyBoundingBoxOrigin ConvertToPdfOrigin(const gfx::Rect& rect,
	float angle,
	float coordinate_system_height) {
	const float theta = base::DegToRad(angle);
	const float x = rect.x() - (sinf(theta) * rect.height());
	const float y =
	coordinate_system_height - (rect.y() + cosf(theta) * rect.height());
	return {.point = {x, y}, .theta = -theta};
	}

	// Project the text object's origin to the baseline's origin.
	SearchifyBoundingBoxOrigin ProjectToBaseline(
	const gfx::PointF& origin_point,
	const SearchifyBoundingBoxOrigin& baseline_origin) {
	const float sin_theta = sinf(baseline_origin.theta);
	const float cos_theta = cosf(baseline_origin.theta);
	// The length between `origin` and `baseline_origin`.
	const float length =
	(origin_point.x() - baseline_origin.point.x()) * cos_theta +
	(origin_point.y() - baseline_origin.point.y()) * sin_theta;
	return {.point = {baseline_origin.point.x() + length * cos_theta,
	baseline_origin.point.y() + length * sin_theta},
	.theta = baseline_origin.theta};
	}

	gfx::SizeF GetRenderedImageSize(FPDF_PAGEOBJECT image) {
	FS_QUADPOINTSF quadpoints;
	if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
	return gfx::SizeF();
	}

	return gfx::SizeF(
	hypotf(quadpoints.x1 - quadpoints.x2, quadpoints.y1 - quadpoints.y2),
	hypotf(quadpoints.x2 - quadpoints.x3, quadpoints.y2 - quadpoints.y3));
	}

	bool CalculateImageWithoutScalingMatrix(FPDF_PAGEOBJECT image,
	const gfx::SizeF& rendered_size,
	FS_MATRIX& image_matrix) {
	if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
	return false;
	}
	image_matrix.a /= rendered_size.width();
	image_matrix.b /= rendered_size.width();
	image_matrix.c /= rendered_size.height();
	image_matrix.d /= rendered_size.height();
	return true;
	}

	// Returns the transformation matrix needed to move a word to where it is
	// positioned on the image.
	FS_MATRIX CalculateWordMoveMatrix(const SearchifyBoundingBoxOrigin& word_origin,
	int word_bounding_box_width,
	bool word_is_rtl) {
	const float sin_theta = sinf(word_origin.theta);
	const float cos_theta = cosf(word_origin.theta);
	FS_MATRIX move_matrix(cos_theta, sin_theta, -sin_theta, cos_theta,
	word_origin.point.x(), word_origin.point.y());
	if (word_is_rtl) {
	move_matrix.a = -move_matrix.a;
	move_matrix.b = -move_matrix.b;
	move_matrix.e += cos_theta * word_bounding_box_width;
	move_matrix.f += sin_theta * word_bounding_box_width;
	}
	return move_matrix;
	}

	void AddWordOnImage(FPDF_DOCUMENT document,
	FPDF_PAGE page,
	FPDF_FONT font,
	const screen_ai::mojom::WordBoxPtr& word,
	base::span<const FS_MATRIX> transform_matrices) {
	ScopedFPDFPageObject text(
	FPDFPageObj_CreateTextObj(document, font, word->bounding_box.height()));
	CHECK(text);

	std::string word_string = word->word;
	// TODO(crbug.com/41487613): A more accurate width would be the distance
	// from current word's origin to next word's origin.
	if (word->has_space_after) {
	word_string.push_back(' ');
	}

	if (word_string.empty()) {
	DLOG(ERROR) << "Got empty word";
	return;
	}

	std::vector<uint32_t> charcodes = Utf8ToCharcodes(word_string);
	if (!FPDFText_SetCharcodes(text.get(), charcodes.data(), charcodes.size())) {
	DLOG(ERROR) << "Failed to set charcodes";
	return;
	}

	// Make text invisible
	if (!FPDFTextObj_SetTextRenderMode(text.get(),
	FPDF_TEXTRENDERMODE_INVISIBLE)) {
	DLOG(ERROR) << "Failed to make text invisible";
	return;
	}

	const gfx::SizeF text_object_size = GetImageSize(text.get());
	CHECK_GT(text_object_size.width(), 0);
	CHECK_GT(text_object_size.height(), 0);
	const FS_MATRIX text_scale_matrix(
	word->bounding_box.width() / text_object_size.width(), 0, 0,
	word->bounding_box.height() / text_object_size.height(), 0, 0);
	CHECK(FPDFPageObj_TransformF(text.get(), &text_scale_matrix));

	for (const auto& matrix : transform_matrices) {
	FPDFPageObj_TransformF(text.get(), &matrix);
	}

	FPDFPage_InsertObject(page, text.release());
	}

	void AddTextOnImage(FPDF_DOCUMENT document,
	FPDF_PAGE page,
	FPDF_FONT font,
	FPDF_PAGEOBJECT image,
	screen_ai::mojom::VisualAnnotationPtr annotation,
	const gfx::Size& image_pixel_size) {
	const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
	if (image_rendered_size.IsEmpty()) {
	DLOG(ERROR) << "Failed to get image rendered dimensions";
	return;
	}

	// The transformation matrices is applied as follows:
	std::array<FS_MATRIX, 3> transform_matrices;
	// Move text object to the corresponding text position on the full image.
	FS_MATRIX& move_matrix = transform_matrices[0];
	// Scale from full image size to rendered image size on the PDF.
	FS_MATRIX& image_scale_matrix = transform_matrices[1];
	// Apply the image's transformation matrix on the PDF page without the
	// scaling matrix.
	FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];

	image_scale_matrix = {
	image_rendered_size.width() / image_pixel_size.width(), 0, 0,
	image_rendered_size.height() / image_pixel_size.height(), 0, 0};
	if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
	image_without_scaling_matrix)) {
	DLOG(ERROR) << "Failed to get image matrix";
	return;
	}

	for (const auto& line : annotation->lines) {
	SearchifyBoundingBoxOrigin baseline_origin =
	ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
	image_pixel_size.height());

	for (const auto& word : line->words) {
	if (word->bounding_box.IsEmpty()) {
	continue;
	}

	SearchifyBoundingBoxOrigin origin =
	ConvertToPdfOrigin(word->bounding_box, word->bounding_box_angle,
	image_pixel_size.height());
	move_matrix = CalculateWordMoveMatrix(
	ProjectToBaseline(origin.point, baseline_origin),
	word->bounding_box.width(),
	word->direction ==
	screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
	AddWordOnImage(document, page, font, word, transform_matrices);
	}
	}
	}

	ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
	std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
	return ScopedFPDFFont(
	FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
	cid_to_gid_map.data(), cid_to_gid_map.size()));
	}

	int GetBlockForJpeg(void* param,
	unsigned long pos,
	unsigned char* buf,
	unsigned long size) {
	auto data_vector = static_cast<base::span<const uint8_t>>(param);
	if (pos + size < pos \|\| pos + size > data_vector.size()) {
	return 0;
	}
	// TODO(tsepez): spanify arguments to remove the error.
	base::span<uint8_t> UNSAFE_TODO(buf_span(buf, size));
	buf_span.copy_from(data_vector.subspan(pos, size));
	return 1;
	}

	} // namespace

	std::vector<uint8_t> PDFiumSearchify(
	base::span<const uint8_t> pdf_buffer,
	base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
	const SkBitmap& bitmap)> perform_ocr_callback) {
	ScopedFPDFDocument document(
	FPDF_LoadMemDocument64(pdf_buffer.data(), pdf_buffer.size(), nullptr));
	if (!document) {
	DLOG(ERROR) << "Failed to load document";
	return {};
	}
	int page_count = FPDF_GetPageCount(document.get());
	if (page_count == 0) {
	DLOG(ERROR) << "Got zero page count";
	return {};
	}
	ScopedFPDFFont font = CreateFont(document.get());
	CHECK(font);
	for (int page_index = 0; page_index < page_count; page_index++) {
	ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
	if (!page) {
	DLOG(ERROR) << "Failed to load page";
	continue;
	}
	int object_count = FPDFPage_CountObjects(page.get());
	for (int object_index = 0; object_index < object_count; object_index++) {
	// GetImageForOcr() checks for null `image`.
	FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
	SkBitmap bitmap = GetImageForOcr(document.get(), page.get(), image);
	// The object is not an image or failed to get the bitmap from the image.
	if (bitmap.empty()) {
	continue;
	}
	auto annotation = perform_ocr_callback.Run(bitmap);
	if (!annotation) {
	DLOG(ERROR) << "Failed to get OCR annotation on the image";
	return {};
	}
	AddTextOnImage(document.get(), page.get(), font.get(), image,
	std::move(annotation),
	gfx::Size(bitmap.width(), bitmap.height()));
	}
	if (!FPDFPage_GenerateContent(page.get())) {
	DLOG(ERROR) << "Failed to generate content";
	return {};
	}
	}
	PDFiumMemBufferFileWrite output_file_write;
	if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
	DLOG(ERROR) << "Failed to save the document";
	return {};
	}
	return output_file_write.TakeBuffer();
	}

	SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
	const gfx::Rect& rect,
	float angle,
	float coordinate_system_height) {
	return ConvertToPdfOrigin(rect, angle, coordinate_system_height);
	}

	FS_MATRIX CalculateWordMoveMatrixForTesting(
	const SearchifyBoundingBoxOrigin& origin,
	int word_bounding_box_width,
	bool word_is_rtl) {
	return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
	}

	PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
	// TODO(thestig): Check the default value of `use_skia`.
	InitializeSDK(false, false, FontMappingMode::kNoMapping);
	}

	PdfiumProgressiveSearchifier::ScopedSdkInitializer::~ScopedSdkInitializer() {
	ShutdownSDK();
	}

	PdfiumProgressiveSearchifier::PdfiumProgressiveSearchifier()
	: doc_(FPDF_CreateNewDocument()), font_(CreateFont(doc_.get())) {
	CHECK(doc_);
	CHECK(font_);
	}

	PdfiumProgressiveSearchifier::~PdfiumProgressiveSearchifier() = default;

	// TODO(chuhsuan): Return bool instead of crashing on error.
	void PdfiumProgressiveSearchifier::AddPage(
	const SkBitmap& bitmap,
	uint32_t page_index,
	screen_ai::mojom::VisualAnnotationPtr annotation) {
	CHECK(annotation);
	// Replace the page if it already exists.
	DeletePage(page_index);
	int width = bitmap.width();
	int height = bitmap.height();
	ScopedFPDFPage page(FPDFPage_New(doc_.get(), page_index, width, height));
	CHECK(page);
	ScopedFPDFPageObject image(FPDFPageObj_NewImageObj(doc_.get()));
	CHECK(image);
	std::vector<uint8_t> encoded;
	CHECK(gfx::JPEGCodec::Encode(bitmap, 100, &encoded));
	FPDF_FILEACCESS file_access{
	.m_FileLen = static_cast<unsigned long>(encoded.size()),
	.m_GetBlock = &GetBlockForJpeg,
	.m_Param = &encoded};
	CHECK(FPDFImageObj_LoadJpegFileInline(nullptr, 0, image.get(), &file_access));
	CHECK(FPDFImageObj_SetMatrix(image.get(), width, 0, 0, height, 0, 0));
	AddTextOnImage(doc_.get(), page.get(), font_.get(), image.get(),
	std::move(annotation), gfx::Size(width, height));
	FPDFPage_InsertObject(page.get(), image.release());
	CHECK(FPDFPage_GenerateContent(page.get()));
	}

	void PdfiumProgressiveSearchifier::DeletePage(uint32_t page_index) {
	FPDFPage_Delete(doc_.get(), page_index);
	}

	std::vector<uint8_t> PdfiumProgressiveSearchifier::Save() {
	PDFiumMemBufferFileWrite output_file_write;
	CHECK(FPDF_SaveAsCopy(doc_.get(), &output_file_write, 0));
	return output_file_write.TakeBuffer();
	}

	} // namespace chrome_pdf