blob: 6031f049be055b7b6d30a9b06d746692e203a1de [file]
// Copyright 2026 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <algorithm>
#include <optional>
#include <string>
#include <string_view>
#include <vector>
#include "base/containers/span.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_util.h"
#include "base/json/json_writer.h"
#include "base/path_service.h"
#include "base/strings/string_util.h"
#include "base/threading/thread_restrictions.h"
#include "base/values.h"
#include "content/public/test/browser_test.h"
#include "headless/test/headless_pdf_browsertest.h"
#include "pdf/pdf.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace headless {
namespace {
static const base::FilePath kTestDataDir(
FILE_PATH_LITERAL("headless/test/data"));
static const base::FilePath kTestDataSubDir(
FILE_PATH_LITERAL("structured_doc"));
// Returns files matching 'headless/test/data/structured_doc/*.html'.
std::vector<std::string> GetTestFiles() {
base::FilePath root_path;
CHECK(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &root_path));
root_path = root_path.Append(kTestDataDir).Append(kTestDataSubDir);
base::FileEnumerator enumerator(root_path, /*recursive=*/false,
/*file_type=*/base::FileEnumerator::FILES,
/*pattern=*/FILE_PATH_LITERAL("*.html"));
std::vector<std::string> test_files;
for (base::FilePath name = enumerator.Next(); !name.empty();
name = enumerator.Next()) {
test_files.push_back(name.BaseName().AsUTF8Unsafe());
}
std::sort(test_files.begin(), test_files.end());
return test_files;
}
// Generate pretty test name given the test file name.
std::string GetTestName(std::string_view test_filename) {
std::string name;
name.reserve(test_filename.size());
bool upper_case_next_char = true;
for (char ch : test_filename) {
if (ch == '.') {
break;
}
if (ch == '_') {
upper_case_next_char = true;
continue;
}
if (!base::IsAsciiAlphaNumeric(ch)) {
name += '_';
continue;
}
if (upper_case_next_char) {
upper_case_next_char = false;
name += base::ToUpperASCII(ch);
} else {
name += ch;
}
}
return name;
}
} // namespace
class HeadlessStructuredPDFBrowserTest
: public HeadlessPDFBrowserTestBase,
public ::testing::WithParamInterface<std::string> {
public:
std::string GetTestPath() override { return test_path_; }
std::string test_filename() const { return GetParam(); }
void SetUp() override {
base::FilePath path(kTestDataSubDir);
path = path.AppendASCII(test_filename());
test_path_ = path.AsUTF8Unsafe();
HeadlessPDFBrowserTestBase::SetUp();
}
const base::FilePath GetExpectationsFilePath() {
base::FilePath filepath;
CHECK(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &filepath));
filepath = filepath.Append(kTestDataDir).AppendASCII(test_path_);
filepath = filepath.InsertBeforeExtension(FILE_PATH_LITERAL("_expected"));
filepath = filepath.ReplaceExtension(FILE_PATH_LITERAL(".txt"));
return filepath;
}
void OnPDFReady(base::span<const uint8_t> pdf_span, int num_pages) override {
EXPECT_THAT(num_pages, testing::Eq(1));
std::optional<bool> tagged = chrome_pdf::IsPDFDocTagged(pdf_span);
ASSERT_THAT(tagged, testing::Optional(true));
constexpr int kFirstPage = 0;
base::Value struct_tree =
chrome_pdf::GetPDFStructTreeForPage(pdf_span, kFirstPage);
std::string json;
base::JSONWriter::WriteWithOptions(
struct_tree, base::JSONWriter::OPTIONS_PRETTY_PRINT, &json);
// Map Windows line endings to Unix by removing '\r'.
base::RemoveChars(json, "\r", &json);
base::FilePath expectations_filepath = GetExpectationsFilePath();
base::ScopedAllowBlockingForTesting allow_blocking;
if (ShouldUpdateExpectations()) {
LOG(INFO) << "Updating expectations in " << expectations_filepath;
CHECK(base::WriteFile(expectations_filepath, json));
}
std::string expected_json;
if (!base::ReadFileToString(expectations_filepath, &expected_json)) {
ADD_FAILURE() << "Unable to read expectations in "
<< expectations_filepath;
}
EXPECT_EQ(expected_json, json)
<< "To update test expectations run the tests with --reset-results "
"command line switch.\n";
}
private:
std::string test_path_;
};
HEADLESS_DEVTOOLED_TEST_P(HeadlessStructuredPDFBrowserTest);
INSTANTIATE_TEST_SUITE_P(
/* no prefix*/,
HeadlessStructuredPDFBrowserTest,
::testing::ValuesIn(GetTestFiles()),
[](const testing::TestParamInfo<
HeadlessStructuredPDFBrowserTest::ParamType>& info) {
return GetTestName(info.param);
}
);
} // namespace headless