blob: c1fb3c33ddd4825760b839d19d6702b45609fc2f [file] [log] [blame]
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdfium/pdfium_document_metadata.h"
#include <stddef.h>
#include <string>
#include "base/check.h"
#include "base/functional/bind.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "pdf/document_metadata.h"
#include "pdf/pdf_utils/dates.h"
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
#include "third_party/pdfium/public/fpdf_doc.h"
#include "third_party/pdfium/public/fpdf_formfill.h"
#include "third_party/pdfium/public/fpdfview.h"
namespace chrome_pdf {
namespace {
// Retrieves the value of `field` in the document information dictionary.
// Trims whitespace characters from the retrieved value.
std::string GetTrimmedMetadataByField(FPDF_DOCUMENT doc,
FPDF_BYTESTRING field) {
CHECK(doc);
std::u16string metadata = CallPDFiumWideStringBufferApi(
base::BindRepeating(&FPDF_GetMetaText, doc, field),
/*check_expected_size=*/false);
return base::UTF16ToUTF8(base::TrimWhitespace(metadata, base::TRIM_ALL));
}
// Retrieves the version of the PDF (e.g. 1.4 or 2.0) as an enum.
PdfVersion GetDocumentVersion(FPDF_DOCUMENT doc) {
CHECK(doc);
int version;
if (!FPDF_GetFileVersion(doc, &version)) {
return PdfVersion::kUnknown;
}
switch (version) {
case 10:
return PdfVersion::k1_0;
case 11:
return PdfVersion::k1_1;
case 12:
return PdfVersion::k1_2;
case 13:
return PdfVersion::k1_3;
case 14:
return PdfVersion::k1_4;
case 15:
return PdfVersion::k1_5;
case 16:
return PdfVersion::k1_6;
case 17:
return PdfVersion::k1_7;
case 18:
return PdfVersion::k1_8;
case 20:
return PdfVersion::k2_0;
default:
return PdfVersion::kUnknown;
}
}
} // namespace
DocumentMetadata GetPDFiumDocumentMetadata(FPDF_DOCUMENT doc,
size_t size_bytes,
size_t page_count,
bool linearized,
bool has_attachments) {
CHECK(doc);
DocumentMetadata doc_metadata;
doc_metadata.version = GetDocumentVersion(doc);
doc_metadata.size_bytes = size_bytes;
doc_metadata.page_count = page_count;
doc_metadata.linearized = linearized;
doc_metadata.has_attachments = has_attachments;
doc_metadata.form_type = static_cast<FormType>(FPDF_GetFormType(doc));
// Document information dictionary entries
doc_metadata.title = GetTrimmedMetadataByField(doc, "Title");
doc_metadata.author = GetTrimmedMetadataByField(doc, "Author");
doc_metadata.subject = GetTrimmedMetadataByField(doc, "Subject");
doc_metadata.keywords = GetTrimmedMetadataByField(doc, "Keywords");
doc_metadata.creator = GetTrimmedMetadataByField(doc, "Creator");
doc_metadata.producer = GetTrimmedMetadataByField(doc, "Producer");
doc_metadata.creation_date =
ParsePdfDate(GetTrimmedMetadataByField(doc, "CreationDate"));
doc_metadata.mod_date =
ParsePdfDate(GetTrimmedMetadataByField(doc, "ModDate"));
return doc_metadata;
}
} // namespace chrome_pdf