blob: 465287989c1df7e135fc80b425de2c3ffd2c7f9d [file] [log] [blame] [edit]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_searchify.h"
#include <math.h>
#include <stdint.h>
#include <algorithm>
#include <array>
#include <string>
#include <utility>
#include <vector>
#include "base/check.h"
#include "base/check_op.h"
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/functional/callback.h"
#include "base/numerics/angle_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/pdfium/pdfium_engine.h"
#include "pdf/pdfium/pdfium_mem_buffer_file_write.h"
#include "pdf/pdfium/pdfium_ocr.h"
#include "pdf/pdfium/pdfium_searchify_font.h"
#include "services/screen_ai/public/mojom/screen_ai_service.mojom.h"
#include "third_party/pdfium/public/cpp/fpdf_scopers.h"
#include "third_party/pdfium/public/fpdf_edit.h"
#include "third_party/pdfium/public/fpdf_save.h"
#include "third_party/pdfium/public/fpdfview.h"
#include "third_party/skia/include/core/SkBitmap.h"
#include "third_party/skia/include/core/SkImageInfo.h"
#include "third_party/skia/include/core/SkPixmap.h"
#include "ui/gfx/codec/jpeg_codec.h"
#include "ui/gfx/geometry/point_f.h"
#include "ui/gfx/geometry/rect.h"
#include "ui/gfx/geometry/size.h"
#include "ui/gfx/geometry/size_f.h"
namespace chrome_pdf {
namespace {
std::vector<uint32_t> Utf8ToCharcodes(const std::string& string) {
std::u16string utf16_str = base::UTF8ToUTF16(string);
std::vector<uint32_t> charcodes;
charcodes.reserve(utf16_str.size());
for (auto c : utf16_str) {
charcodes.push_back(c);
}
return charcodes;
}
// The coordinate systems between OCR and PDF are different. OCR's origin is at
// top-left, so we need to convert them to PDF's bottom-left.
SearchifyBoundingBoxOrigin ConvertToPdfOrigin(const gfx::Rect& rect,
float angle,
float coordinate_system_height) {
const float theta = base::DegToRad(angle);
const float x = rect.x() - (sinf(theta) * rect.height());
const float y =
coordinate_system_height - (rect.y() + cosf(theta) * rect.height());
return {.point = {x, y}, .theta = -theta};
}
// Project the text object's origin to the baseline's origin.
SearchifyBoundingBoxOrigin ProjectToBaseline(
const gfx::PointF& origin_point,
const SearchifyBoundingBoxOrigin& baseline_origin) {
const float sin_theta = sinf(baseline_origin.theta);
const float cos_theta = cosf(baseline_origin.theta);
// The length between `origin` and `baseline_origin`.
const float length =
(origin_point.x() - baseline_origin.point.x()) * cos_theta +
(origin_point.y() - baseline_origin.point.y()) * sin_theta;
return {.point = {baseline_origin.point.x() + length * cos_theta,
baseline_origin.point.y() + length * sin_theta},
.theta = baseline_origin.theta};
}
gfx::SizeF GetRenderedImageSize(FPDF_PAGEOBJECT image) {
FS_QUADPOINTSF quadpoints;
if (!FPDFPageObj_GetRotatedBounds(image, &quadpoints)) {
return gfx::SizeF();
}
return gfx::SizeF(
hypotf(quadpoints.x1 - quadpoints.x2, quadpoints.y1 - quadpoints.y2),
hypotf(quadpoints.x2 - quadpoints.x3, quadpoints.y2 - quadpoints.y3));
}
bool CalculateImageWithoutScalingMatrix(FPDF_PAGEOBJECT image,
const gfx::SizeF& rendered_size,
FS_MATRIX& image_matrix) {
if (!FPDFPageObj_GetMatrix(image, &image_matrix)) {
return false;
}
image_matrix.a /= rendered_size.width();
image_matrix.b /= rendered_size.width();
image_matrix.c /= rendered_size.height();
image_matrix.d /= rendered_size.height();
return true;
}
// Returns the transformation matrix needed to move a word to where it is
// positioned on the image.
FS_MATRIX CalculateWordMoveMatrix(const SearchifyBoundingBoxOrigin& word_origin,
int word_bounding_box_width,
bool word_is_rtl) {
const float sin_theta = sinf(word_origin.theta);
const float cos_theta = cosf(word_origin.theta);
FS_MATRIX move_matrix(cos_theta, sin_theta, -sin_theta, cos_theta,
word_origin.point.x(), word_origin.point.y());
if (word_is_rtl) {
move_matrix.a = -move_matrix.a;
move_matrix.b = -move_matrix.b;
move_matrix.e += cos_theta * word_bounding_box_width;
move_matrix.f += sin_theta * word_bounding_box_width;
}
return move_matrix;
}
void AddWordOnImage(FPDF_DOCUMENT document,
FPDF_PAGE page,
FPDF_FONT font,
const screen_ai::mojom::WordBoxPtr& word,
base::span<const FS_MATRIX> transform_matrices) {
ScopedFPDFPageObject text(
FPDFPageObj_CreateTextObj(document, font, word->bounding_box.height()));
CHECK(text);
std::string word_string = word->word;
// TODO(crbug.com/41487613): A more accurate width would be the distance
// from current word's origin to next word's origin.
if (word->has_space_after) {
word_string.push_back(' ');
}
if (word_string.empty()) {
DLOG(ERROR) << "Got empty word";
return;
}
std::vector<uint32_t> charcodes = Utf8ToCharcodes(word_string);
if (!FPDFText_SetCharcodes(text.get(), charcodes.data(), charcodes.size())) {
DLOG(ERROR) << "Failed to set charcodes";
return;
}
// Make text invisible
if (!FPDFTextObj_SetTextRenderMode(text.get(),
FPDF_TEXTRENDERMODE_INVISIBLE)) {
DLOG(ERROR) << "Failed to make text invisible";
return;
}
const gfx::SizeF text_object_size = GetImageSize(text.get());
CHECK_GT(text_object_size.width(), 0);
CHECK_GT(text_object_size.height(), 0);
const FS_MATRIX text_scale_matrix(
word->bounding_box.width() / text_object_size.width(), 0, 0,
word->bounding_box.height() / text_object_size.height(), 0, 0);
CHECK(FPDFPageObj_TransformF(text.get(), &text_scale_matrix));
for (const auto& matrix : transform_matrices) {
FPDFPageObj_TransformF(text.get(), &matrix);
}
FPDFPage_InsertObject(page, text.release());
}
void AddTextOnImage(FPDF_DOCUMENT document,
FPDF_PAGE page,
FPDF_FONT font,
FPDF_PAGEOBJECT image,
screen_ai::mojom::VisualAnnotationPtr annotation,
const gfx::Size& image_pixel_size) {
const gfx::SizeF image_rendered_size = GetRenderedImageSize(image);
if (image_rendered_size.IsEmpty()) {
DLOG(ERROR) << "Failed to get image rendered dimensions";
return;
}
// The transformation matrices is applied as follows:
std::array<FS_MATRIX, 3> transform_matrices;
// Move text object to the corresponding text position on the full image.
FS_MATRIX& move_matrix = transform_matrices[0];
// Scale from full image size to rendered image size on the PDF.
FS_MATRIX& image_scale_matrix = transform_matrices[1];
// Apply the image's transformation matrix on the PDF page without the
// scaling matrix.
FS_MATRIX& image_without_scaling_matrix = transform_matrices[2];
image_scale_matrix = {
image_rendered_size.width() / image_pixel_size.width(), 0, 0,
image_rendered_size.height() / image_pixel_size.height(), 0, 0};
if (!CalculateImageWithoutScalingMatrix(image, image_rendered_size,
image_without_scaling_matrix)) {
DLOG(ERROR) << "Failed to get image matrix";
return;
}
for (const auto& line : annotation->lines) {
SearchifyBoundingBoxOrigin baseline_origin =
ConvertToPdfOrigin(line->baseline_box, line->baseline_box_angle,
image_pixel_size.height());
for (const auto& word : line->words) {
if (word->bounding_box.IsEmpty()) {
continue;
}
SearchifyBoundingBoxOrigin origin =
ConvertToPdfOrigin(word->bounding_box, word->bounding_box_angle,
image_pixel_size.height());
move_matrix = CalculateWordMoveMatrix(
ProjectToBaseline(origin.point, baseline_origin),
word->bounding_box.width(),
word->direction ==
screen_ai::mojom::Direction::DIRECTION_RIGHT_TO_LEFT);
AddWordOnImage(document, page, font, word, transform_matrices);
}
}
}
ScopedFPDFFont CreateFont(FPDF_DOCUMENT document) {
std::vector<uint8_t> cid_to_gid_map(CreateCidToGidMap());
return ScopedFPDFFont(
FPDFText_LoadCidType2Font(document, kPdfTtf, kPdfTtfSize, kToUnicodeCMap,
cid_to_gid_map.data(), cid_to_gid_map.size()));
}
int GetBlockForJpeg(void* param,
unsigned long pos,
unsigned char* buf,
unsigned long size) {
auto data_vector = *static_cast<base::span<const uint8_t>*>(param);
if (pos + size < pos || pos + size > data_vector.size()) {
return 0;
}
// TODO(tsepez): spanify arguments to remove the error.
base::span<uint8_t> UNSAFE_TODO(buf_span(buf, size));
buf_span.copy_from(data_vector.subspan(pos, size));
return 1;
}
} // namespace
std::vector<uint8_t> PDFiumSearchify(
base::span<const uint8_t> pdf_buffer,
base::RepeatingCallback<screen_ai::mojom::VisualAnnotationPtr(
const SkBitmap& bitmap)> perform_ocr_callback) {
ScopedFPDFDocument document(
FPDF_LoadMemDocument64(pdf_buffer.data(), pdf_buffer.size(), nullptr));
if (!document) {
DLOG(ERROR) << "Failed to load document";
return {};
}
int page_count = FPDF_GetPageCount(document.get());
if (page_count == 0) {
DLOG(ERROR) << "Got zero page count";
return {};
}
ScopedFPDFFont font = CreateFont(document.get());
CHECK(font);
for (int page_index = 0; page_index < page_count; page_index++) {
ScopedFPDFPage page(FPDF_LoadPage(document.get(), page_index));
if (!page) {
DLOG(ERROR) << "Failed to load page";
continue;
}
int object_count = FPDFPage_CountObjects(page.get());
for (int object_index = 0; object_index < object_count; object_index++) {
// GetImageForOcr() checks for null `image`.
FPDF_PAGEOBJECT image = FPDFPage_GetObject(page.get(), object_index);
SkBitmap bitmap = GetImageForOcr(document.get(), page.get(), image);
// The object is not an image or failed to get the bitmap from the image.
if (bitmap.empty()) {
continue;
}
auto annotation = perform_ocr_callback.Run(bitmap);
if (!annotation) {
DLOG(ERROR) << "Failed to get OCR annotation on the image";
return {};
}
AddTextOnImage(document.get(), page.get(), font.get(), image,
std::move(annotation),
gfx::Size(bitmap.width(), bitmap.height()));
}
if (!FPDFPage_GenerateContent(page.get())) {
DLOG(ERROR) << "Failed to generate content";
return {};
}
}
PDFiumMemBufferFileWrite output_file_write;
if (!FPDF_SaveAsCopy(document.get(), &output_file_write, 0)) {
DLOG(ERROR) << "Failed to save the document";
return {};
}
return output_file_write.TakeBuffer();
}
SearchifyBoundingBoxOrigin ConvertToPdfOriginForTesting(
const gfx::Rect& rect,
float angle,
float coordinate_system_height) {
return ConvertToPdfOrigin(rect, angle, coordinate_system_height);
}
FS_MATRIX CalculateWordMoveMatrixForTesting(
const SearchifyBoundingBoxOrigin& origin,
int word_bounding_box_width,
bool word_is_rtl) {
return CalculateWordMoveMatrix(origin, word_bounding_box_width, word_is_rtl);
}
PdfiumProgressiveSearchifier::ScopedSdkInitializer::ScopedSdkInitializer() {
// TODO(thestig): Check the default value of `use_skia`.
InitializeSDK(false, false, FontMappingMode::kNoMapping);
}
PdfiumProgressiveSearchifier::ScopedSdkInitializer::~ScopedSdkInitializer() {
ShutdownSDK();
}
PdfiumProgressiveSearchifier::PdfiumProgressiveSearchifier()
: doc_(FPDF_CreateNewDocument()), font_(CreateFont(doc_.get())) {
CHECK(doc_);
CHECK(font_);
}
PdfiumProgressiveSearchifier::~PdfiumProgressiveSearchifier() = default;
// TODO(chuhsuan): Return bool instead of crashing on error.
void PdfiumProgressiveSearchifier::AddPage(
const SkBitmap& bitmap,
uint32_t page_index,
screen_ai::mojom::VisualAnnotationPtr annotation) {
CHECK(annotation);
// Replace the page if it already exists.
DeletePage(page_index);
int width = bitmap.width();
int height = bitmap.height();
ScopedFPDFPage page(FPDFPage_New(doc_.get(), page_index, width, height));
CHECK(page);
ScopedFPDFPageObject image(FPDFPageObj_NewImageObj(doc_.get()));
CHECK(image);
std::vector<uint8_t> encoded;
CHECK(gfx::JPEGCodec::Encode(bitmap, 100, &encoded));
FPDF_FILEACCESS file_access{
.m_FileLen = static_cast<unsigned long>(encoded.size()),
.m_GetBlock = &GetBlockForJpeg,
.m_Param = &encoded};
CHECK(FPDFImageObj_LoadJpegFileInline(nullptr, 0, image.get(), &file_access));
CHECK(FPDFImageObj_SetMatrix(image.get(), width, 0, 0, height, 0, 0));
AddTextOnImage(doc_.get(), page.get(), font_.get(), image.get(),
std::move(annotation), gfx::Size(width, height));
FPDFPage_InsertObject(page.get(), image.release());
CHECK(FPDFPage_GenerateContent(page.get()));
}
void PdfiumProgressiveSearchifier::DeletePage(uint32_t page_index) {
FPDFPage_Delete(doc_.get(), page_index);
}
std::vector<uint8_t> PdfiumProgressiveSearchifier::Save() {
PDFiumMemBufferFileWrite output_file_write;
CHECK(FPDF_SaveAsCopy(doc_.get(), &output_file_write, 0));
return output_file_write.TakeBuffer();
}
} // namespace chrome_pdf