blob: 309486f6459aeeef49a998e545c0cf984cbdb2cf [file] [log] [blame]
// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "pdf/pdf_utils/dates.h"
#include <stdint.h>
#include <optional>
#include <string_view>
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/time/time.h"
namespace chrome_pdf {
namespace {
class DateDeserializer final {
public:
// `parsing` must outlive `this` because `base::StringPiece` has reference
// semantics.
explicit DateDeserializer(std::string_view parsing)
: deserializing_(parsing) {}
~DateDeserializer() = default;
// Pops the first `num_digits` characters from the string and converts them to
// an int if possible. Popping too many characters or characters that cannot
// be converted puts the deserializer in a stopped state.
std::optional<int> PopDigits(size_t num_digits) {
if (stopped_)
return std::nullopt;
// `base::StringToUint()` allows leading sign characters, so also verify
// that the front character is a digit.
uint32_t value;
if (deserializing_.size() < num_digits ||
!base::IsAsciiDigit(deserializing_.front()) ||
!base::StringToUint(deserializing_.substr(0, num_digits), &value)) {
stopped_ = true;
return std::nullopt;
}
// Pop front characters.
deserializing_ = deserializing_.substr(num_digits);
return value;
}
// Pops the front character if it is not a digit. Otherwise, does not change
// the state of the deserializer and returns `std::nullopt`.
std::optional<char> TryPopNonDigit() {
if (stopped_ || deserializing_.empty())
return std::nullopt;
const char front = deserializing_.front();
if (base::IsAsciiDigit(front))
return std::nullopt;
deserializing_ = deserializing_.substr(1);
return front;
}
// Takes the deserializer out of a stopped state.
void unstop() { stopped_ = false; }
private:
std::string_view deserializing_;
bool stopped_ = false;
};
// Parses the offset info in `deserializer`, which is the time offset portion of
// the date format provided in section 7.9.4 "Dates" of the ISO 32000-1:2008
// spec. An input is expected to look like "HH'mm", such that "HH" is the hour
// and "mm" is the minute.
base::TimeDelta ParseOffset(DateDeserializer& deserializer) {
base::TimeDelta offset;
// UTC is assumed if no time zone information is provided.
const std::optional<char> sign = deserializer.TryPopNonDigit();
if (!sign.has_value() || (sign.value() != '+' && sign.value() != '-'))
return offset;
offset += base::Hours(deserializer.PopDigits(2).value_or(0));
// The spec requires that the hours offset be followed by an apostrophe, but
// don't be strict about its presence.
const std::optional<char> apostrophe = deserializer.TryPopNonDigit();
if (apostrophe.has_value() && apostrophe.value() != '\'')
return sign.value() == '+' ? offset : -offset;
// The minutes offset follows the hours offset. Be lenient about anything
// following the minutes offset. One reason for the leniency is the apostrophe
// following the minues, which is only mentioned in earlier versions of the
// spec.
offset += base::Minutes(deserializer.PopDigits(2).value_or(0));
return sign.value() == '+' ? offset : -offset;
}
} // namespace
base::Time ParsePdfDate(std::string_view date) {
// The prefix "D:" is required according to the spec, but don't require it as
// earlier versions of the spec weren't strict about it.
if (date.substr(0, 2) == "D:")
date = date.substr(2);
DateDeserializer deserializer(date);
// Year is the only required part of a valid date.
const std::optional<int> deserialized_year = deserializer.PopDigits(4);
if (!deserialized_year.has_value())
return base::Time();
// Month and day default to 1. The rest of the parts of a date default to 0.
base::Time::Exploded exploded = {
.year = deserialized_year.value(),
.month = deserializer.PopDigits(2).value_or(1),
.day_of_month = deserializer.PopDigits(2).value_or(1),
.hour = deserializer.PopDigits(2).value_or(0),
.minute = deserializer.PopDigits(2).value_or(0),
.second = deserializer.PopDigits(2).value_or(0)};
base::Time parsed;
if (!base::Time::FromUTCExploded(exploded, &parsed))
return base::Time();
// `base::Time` is in UTC, so `parsed` must be normalized if there is an
// offset.
deserializer.unstop();
return parsed - ParseOffset(deserializer);
}
} // namespace chrome_pdf