blob: b56bb5a367555b6a8012415a71665e2d324946cf [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_searchify.h"
#include <math.h>
#include <stdint.h>
#include <array>
#include <string>
#include <utility>
#include <vector>
#include "base/check.h"
#include "base/check_op.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/functional/callback.h"
#include "base/numerics/angle_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/pdfium/pdfium_api_wrappers.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
#include "pdf/pdfium/pdfium_ocr.h"
#include "pdf/pdfium/pdfium_searchify_font.h"
#include "services/screen_ai/public/cpp/utilities.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdf_save.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
#include "ui/gfx/geometry/point_f.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/geometry/size_f.h"
namespace chrome_pdf {
namespace {
std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
std::u16string utf16_str = base::UTF8ToUTF16(string);
std::vector<uint32_t> charcodes;
charcodes.reserve(utf16_str.size());
for (auto c : utf16_str) {
charcodes.push_back(c);
}
return charcodes;
}
// The coordinate systems between OCR and PDF are different. OCR's origin is at
// top-left, so we need to convert them to PDF's bottom-left.
SearchifyBoundingBoxOrigin ConvertToPdfOrigin(const gfx::Rect& rect,
float angle,
float coordinate_system_height) {
const float theta = base::DegToRad(angle);
const float x = rect.x() - (sinf(theta) * rect.height());
const float y =
coordinate_system_height - (rect.y() + cosf(theta) * rect.height());
return {.point = {x, y}, .theta = -theta};
}
// Project the text object's origin to the baseline's origin.
SearchifyBoundingBoxOrigin ProjectToBaseline(
const gfx::PointF& origin_point,
const SearchifyBoundingBoxOrigin& baseline_origin) {
const float sin_theta = sinf(baseline_origin.theta);
const float cos_theta = cosf(baseline_origin.theta);
// The length between `origin` and `baseline_origin`.
const float length =
(origin_point.x() - baseline_origin.point.x()) * cos_theta +
(origin_point.y() - baseline_origin.point.y()) * sin_theta;
return {.point = {baseline_origin.point.x() + length * cos_theta,
baseline_origin.point.y() + length * sin_theta},
.theta = baseline_origin.theta};
}
gfx::SizeF GetRenderedImageSize(FPDF_PAGEOBJECT image) {
FS_QUADPOINTSF quadpoints;
if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
return gfx::SizeF();
}
return gfx::SizeF(
hypotf(quadpoints.x1 - quadpoints.x2, quadpoints.y1 - quadpoints.y2),
hypotf(quadpoints.x2 - quadpoints.x3, quadpoints.y2 - quadpoints.y3));
}
bool CalculateImageWithoutScalingMatrix(FPDF_PAGEOBJECT image,
const gfx::SizeF& rendered_size,
FS_MATRIX& image_matrix) {
if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
return false;
}
image_matrix.a /= rendered_size.width();
image_matrix.b /= rendered_size.width();
image_matrix.c /= rendered_size.height();
image_matrix.d /= rendered_size.height();
return true;
}
// Returns the transformation matrix needed to move a word to where it is
// positioned on the image.
FS_MATRIX CalculateWordMoveMatrix(const SearchifyBoundingBoxOrigin& word_origin,
int word_bounding_box_width,
bool word_is_rtl) {
const float sin_theta = sinf(word_origin.theta);
const float cos_theta = cosf(word_origin.theta);
FS_MATRIX move_matrix(cos_theta, sin_theta, -sin_theta, cos_theta,
word_origin.point.x(), word_origin.point.y());
if (word_is_rtl) {
move_matrix.a = -move_matrix.a;
move_matrix.b = -move_matrix.b;
move_matrix.e += cos_theta * word_bounding_box_width;
move_matrix.f += sin_theta * word_bounding_box_width;
}
return move_matrix;
}
// Returns whether this function succeeded or not.
bool AddWordOnImage(FPDF_DOCUMENT document,
FPDF_PAGE page,
FPDF_FONT font,
const screen_ai::mojom::WordBox& word,
base::span<const FS_MATRIX> transform_matrices) {
ScopedFPDFPageObject text(
FPDFPageObj_CreateTextObj(document, font, word.bounding_box.height()));
CHECK(text);
std::vector<uint32_t> charcodes = Utf8ToCharcodes(word.word);
if (charcodes.empty()) {
DLOG(ERROR) << "Got empty word";
return false;
}
bool result =
FPDFText_SetCharcodes(text.get(), charcodes.data(), charcodes.size());
CHECK(result);
// Make text invisible
result =
FPDFTextObj_SetTextRenderMode(text.get(), FPDF_TEXTRENDERMODE_INVISIBLE);
CHECK(result);
const gfx::SizeF text_object_size = GetImageSize(text.get());
CHECK_GT(text_object_size.width(), 0);
CHECK_GT(text_object_size.height(), 0);
const FS_MATRIX text_scale_matrix(
word.bounding_box.width() / text_object_size.width(), 0, 0,
word.bounding_box.height() / text_object_size.height(), 0, 0);
CHECK(FPDFPageObj_TransformF(text.get(), &text_scale_matrix));
for (const auto& matrix : transform_matrices) {
FPDFPageObj_TransformF(text.get(), &matrix);
}
FPDFPage_InsertObject(page, text.release());
return true;
}
// If OCR has recognized a space character between two consecutive words,
// inserts a new word between them to represent it, and returns the vector of
// words and spaces.
std::vector<screen_ai::mojom::WordBox> GetWordsAndSpaces(
base::span<const screen_ai::mojom::WordBoxPtr> words) {
std::vector<screen_ai::mojom::WordBox> words_and_spaces;
size_t original_word_count = words.size();
if (original_word_count) {
words_and_spaces.reserve(original_word_count * 2 - 1);
}
gfx::Rect empty_rect;
for (size_t i = 0; i < original_word_count; i++) {
auto& current_word = words[i];
words_and_spaces.push_back(*current_word);
// Add whitespace if it's not empty.
if (current_word->whitespace_bounding_box.width() &&
current_word->whitespace_bounding_box.height()) {
words_and_spaces.emplace_back(
/*word=*/" ", current_word->language,
current_word->whitespace_bounding_box,
current_word->whitespace_bounding_box_angle, current_word->direction,
/*whitespace_bounding_box=*/empty_rect,
/*whitespace_bounding_box_angle=*/0,
/*confidence=*/1);
}
}
return words_and_spaces;
}
} // namespace
std::vector<uint8_t> PDFiumSearchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) {
ScopedFPDFDocument document = LoadPdfData(pdf_buffer);
if (!document) {
DLOG(ERROR) << "Failed to load document";
return {};
}
int page_count = FPDF_GetPageCount(document.get());
if (page_count == 0) {
DLOG(ERROR) << "Got zero page count";
return {};
}
ScopedFPDFFont font = CreateFont(document.get());
CHECK(font);
for (int page_index = 0; page_index < page_count; page_index++) {
ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
if (!page) {
DLOG(ERROR) << "Failed to load page";
continue;
}
int object_count = FPDFPage_CountObjects(page.get());
for (int object_index = 0; object_index < object_count; object_index++) {
// GetImageForOcr() checks for null `image`.
FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
SkBitmap bitmap = GetImageForOcr(document.get(), page.get(), image,
screen_ai::GetMaxDimensionForOCR());
// The object is not an image or failed to get the bitmap from the image.
if (bitmap.empty()) {
continue;
}
auto annotation = perform_ocr_callback.Run(bitmap);
if (!annotation) {
DLOG(ERROR) << "Failed to get OCR annotation on the image";
return {};
}
AddTextOnImage(document.get(), page.get(), font.get(), image,
std::move(annotation),
gfx::Size(bitmap.width(), bitmap.height()));
}
if (!FPDFPage_GenerateContent(page.get())) {
DLOG(ERROR) << "Failed to generate content";
return {};
}
}
PDFiumMemBufferFileWrite output_file_write;
if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
DLOG(ERROR) << "Failed to save the document";
return {};
}
return output_file_write.TakeBuffer();
}
bool AddTextOnImage(FPDF_DOCUMENT document,
FPDF_PAGE page,
FPDF_FONT font,
FPDF_PAGEOBJECT image,
screen_ai::mojom::VisualAnnotationPtr annotation,
const gfx::Size& image_pixel_size) {
const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
if (image_rendered_size.IsEmpty()) {
DLOG(ERROR) << "Failed to get image rendered dimensions";
return false;
}
// The transformation matrices is applied as follows:
std::array<FS_MATRIX, 3> transform_matrices;
// Move text object to the corresponding text position on the full image.
FS_MATRIX& move_matrix = transform_matrices[0];
// Scale from full image size to rendered image size on the PDF.
FS_MATRIX& image_scale_matrix = transform_matrices[1];
// Apply the image's transformation matrix on the PDF page without the
// scaling matrix.
FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];
image_scale_matrix = {
image_rendered_size.width() / image_pixel_size.width(), 0, 0,
image_rendered_size.height() / image_pixel_size.height(), 0, 0};
if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
image_without_scaling_matrix)) {
DLOG(ERROR) << "Failed to get image matrix";
return false;
}
bool added_text = false;
for (const auto& line : annotation->lines) {
// TODO(crbug.com/398694513): Try to get baseline information from font
// information.
SearchifyBoundingBoxOrigin baseline_origin =
ConvertToPdfOrigin(line->bounding_box, line->bounding_box_angle,
image_pixel_size.height());
std::vector<screen_ai::mojom::WordBox> words_and_spaces =
GetWordsAndSpaces(line->words);
for (const auto& word : words_and_spaces) {
if (word.bounding_box.IsEmpty()) {
continue;
}
SearchifyBoundingBoxOrigin origin =
ConvertToPdfOrigin(word.bounding_box, word.bounding_box_angle,
image_pixel_size.height());
move_matrix = CalculateWordMoveMatrix(
ProjectToBaseline(origin.point, baseline_origin),
word.bounding_box.width(),
word.direction ==
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
added_text |=
AddWordOnImage(document, page, font, word, transform_matrices);
}
}
return added_text;
}
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
const gfx::Rect& rect,
float angle,
float coordinate_system_height) {
return ConvertToPdfOrigin(rect, angle, coordinate_system_height);
}
FS_MATRIX CalculateWordMoveMatrixForTesting(
const SearchifyBoundingBoxOrigin& origin,
int word_bounding_box_width,
bool word_is_rtl) {
return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
}
std::vector<screen_ai::mojom::WordBox> GetWordsAndSpacesForTesting( // IN-TEST
base::span<const screen_ai::mojom::WordBoxPtr> words) {
return GetWordsAndSpaces(words);
}
ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
return ScopedFPDFFont(
FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
cid_to_gid_map.data(), cid_to_gid_map.size()));
}
} // namespace chrome_pdf