blob: 5ece2b49591d1e410aa9fa5293a835cdf450e0ad [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "annotator/grammar/dates/utils/date-match.h"
#include <algorithm>
#include "annotator/grammar/dates/utils/date-utils.h"
#include "annotator/types.h"
#include "utils/strings/append.h"
static const int kAM = 0;
static const int kPM = 1;
namespace libtextclassifier3 {
namespace dates {
namespace {
static int GetMeridiemValue(const TimespanCode& timespan_code) {
switch (timespan_code) {
case TimespanCode_AM:
case TimespanCode_MIDNIGHT:
// MIDNIGHT [3] -> AM
return kAM;
case TimespanCode_TONIGHT:
// TONIGHT [11] -> PM
case TimespanCode_NOON:
// NOON [2] -> PM
case TimespanCode_PM:
return kPM;
case TimespanCode_TIMESPAN_CODE_NONE:
default:
TC3_LOG(WARNING) << "Failed to extract time span code.";
}
return NO_VAL;
}
static int GetRelativeCount(const RelativeParameter* relative_parameter) {
for (const int interpretation :
*relative_parameter->day_of_week_interpretation()) {
switch (interpretation) {
case RelativeParameter_::Interpretation_NEAREST_LAST:
case RelativeParameter_::Interpretation_PREVIOUS:
return -1;
case RelativeParameter_::Interpretation_SECOND_LAST:
return -2;
case RelativeParameter_::Interpretation_SECOND_NEXT:
return 2;
case RelativeParameter_::Interpretation_COMING:
case RelativeParameter_::Interpretation_SOME:
case RelativeParameter_::Interpretation_NEAREST:
case RelativeParameter_::Interpretation_NEAREST_NEXT:
return 1;
case RelativeParameter_::Interpretation_CURRENT:
return 0;
}
}
return 0;
}
} // namespace
using strings::JoinStrings;
using strings::SStringAppendF;
std::string DateMatch::DebugString() const {
std::string res;
#if !defined(NDEBUG)
if (begin >= 0 && end >= 0) {
SStringAppendF(&res, 0, "[%u,%u)", begin, end);
}
if (HasDayOfWeek()) {
SStringAppendF(&res, 0, "%u", day_of_week);
}
if (HasYear()) {
int year_output = year;
if (HasBcAd() && bc_ad == BCAD_BC) {
year_output = -year;
}
SStringAppendF(&res, 0, "%u/", year_output);
} else {
SStringAppendF(&res, 0, "____/");
}
if (HasMonth()) {
SStringAppendF(&res, 0, "%u/", month);
} else {
SStringAppendF(&res, 0, "__/");
}
if (HasDay()) {
SStringAppendF(&res, 0, "%u ", day);
} else {
SStringAppendF(&res, 0, "__ ");
}
if (HasHour()) {
SStringAppendF(&res, 0, "%u:", hour);
} else {
SStringAppendF(&res, 0, "__:");
}
if (HasMinute()) {
SStringAppendF(&res, 0, "%u:", minute);
} else {
SStringAppendF(&res, 0, "__:");
}
if (HasSecond()) {
if (HasFractionSecond()) {
SStringAppendF(&res, 0, "%u.%lf ", second, fraction_second);
} else {
SStringAppendF(&res, 0, "%u ", second);
}
} else {
SStringAppendF(&res, 0, "__ ");
}
if (HasTimeSpanCode() && TimespanCode_TIMESPAN_CODE_NONE < time_span_code &&
time_span_code <= TimespanCode_MAX) {
SStringAppendF(&res, 0, "TS=%u ", time_span_code);
}
if (HasTimeZoneCode() && time_zone_code != -1) {
SStringAppendF(&res, 0, "TZ= %u ", time_zone_code);
}
if (HasTimeZoneOffset()) {
SStringAppendF(&res, 0, "TZO=%u ", time_zone_offset);
}
if (HasRelativeDate()) {
const RelativeMatch* rm = relative_match;
SStringAppendF(&res, 0, (rm->is_future_date ? "future " : "past "));
if (rm->day_of_week != NO_VAL) {
SStringAppendF(&res, 0, "DOW:%d ", rm->day_of_week);
}
if (rm->year != NO_VAL) {
SStringAppendF(&res, 0, "Y:%d ", rm->year);
}
if (rm->month != NO_VAL) {
SStringAppendF(&res, 0, "M:%d ", rm->month);
}
if (rm->day != NO_VAL) {
SStringAppendF(&res, 0, "D:%d ", rm->day);
}
if (rm->week != NO_VAL) {
SStringAppendF(&res, 0, "W:%d ", rm->week);
}
if (rm->hour != NO_VAL) {
SStringAppendF(&res, 0, "H:%d ", rm->hour);
}
if (rm->minute != NO_VAL) {
SStringAppendF(&res, 0, "M:%d ", rm->minute);
}
if (rm->second != NO_VAL) {
SStringAppendF(&res, 0, "S:%d ", rm->second);
}
}
SStringAppendF(&res, 0, "prio=%d ", priority);
SStringAppendF(&res, 0, "conf-score=%lf ", annotator_priority_score);
if (IsHourAmbiguous()) {
std::vector<int8> values;
GetPossibleHourValues(&values);
std::string str_values;
for (unsigned int i = 0; i < values.size(); ++i) {
SStringAppendF(&str_values, 0, "%u,", values[i]);
}
SStringAppendF(&res, 0, "amb=%s ", str_values.c_str());
}
std::vector<std::string> tags;
if (is_inferred) {
tags.push_back("inferred");
}
if (!tags.empty()) {
SStringAppendF(&res, 0, "tag=%s ", JoinStrings(",", tags).c_str());
}
#endif // !defined(NDEBUG)
return res;
}
void DateMatch::GetPossibleHourValues(std::vector<int8>* values) const {
TC3_CHECK(values != nullptr);
values->clear();
if (HasHour()) {
int8 possible_hour = hour;
values->push_back(possible_hour);
for (int count = 1; count < ambiguous_hour_count; ++count) {
possible_hour += ambiguous_hour_interval;
if (possible_hour >= 24) {
possible_hour -= 24;
}
values->push_back(possible_hour);
}
}
}
DatetimeComponent::RelativeQualifier DateMatch::GetRelativeQualifier() const {
if (HasRelativeDate()) {
if (relative_match->existing & RelativeMatch::HAS_IS_FUTURE) {
if (!relative_match->is_future_date) {
return DatetimeComponent::RelativeQualifier::PAST;
}
}
return DatetimeComponent::RelativeQualifier::FUTURE;
}
return DatetimeComponent::RelativeQualifier::UNSPECIFIED;
}
// Embed RelativeQualifier information of DatetimeComponent as a sign of
// relative counter field of datetime component i.e. relative counter is
// negative when relative qualifier RelativeQualifier::PAST.
int GetAdjustedRelativeCounter(
const DatetimeComponent::RelativeQualifier& relative_qualifier,
const int relative_counter) {
if (DatetimeComponent::RelativeQualifier::PAST == relative_qualifier) {
return -relative_counter;
}
return relative_counter;
}
Optional<DatetimeComponent> CreateDatetimeComponent(
const DatetimeComponent::ComponentType& component_type,
const DatetimeComponent::RelativeQualifier& relative_qualifier,
const int absolute_value, const int relative_value) {
if (absolute_value == NO_VAL && relative_value == NO_VAL) {
return Optional<DatetimeComponent>();
}
return Optional<DatetimeComponent>(DatetimeComponent(
component_type,
(relative_value != NO_VAL)
? relative_qualifier
: DatetimeComponent::RelativeQualifier::UNSPECIFIED,
(absolute_value != NO_VAL) ? absolute_value : 0,
(relative_value != NO_VAL)
? GetAdjustedRelativeCounter(relative_qualifier, relative_value)
: 0));
}
Optional<DatetimeComponent> CreateDayOfWeekComponent(
const RelativeMatch* relative_match,
const DatetimeComponent::RelativeQualifier& relative_qualifier,
const DayOfWeek& absolute_day_of_week) {
DatetimeComponent::RelativeQualifier updated_relative_qualifier =
relative_qualifier;
int absolute_value = absolute_day_of_week;
int relative_value = NO_VAL;
if (relative_match) {
relative_value = relative_match->day_of_week;
if (relative_match->existing & RelativeMatch::HAS_DAY_OF_WEEK) {
if (relative_match->IsStandaloneRelativeDayOfWeek() &&
absolute_day_of_week == DayOfWeek_DOW_NONE) {
absolute_value = relative_match->day_of_week;
}
// Check if the relative date has day of week with week period.
if (relative_match->existing & RelativeMatch::HAS_WEEK) {
relative_value = 1;
} else {
const NonterminalValue* nonterminal =
relative_match->day_of_week_nonterminal;
TC3_CHECK(nonterminal != nullptr);
TC3_CHECK(nonterminal->relative_parameter());
const RelativeParameter* rp = nonterminal->relative_parameter();
if (rp->day_of_week_interpretation()) {
relative_value = GetRelativeCount(rp);
if (relative_value < 0) {
relative_value = abs(relative_value);
updated_relative_qualifier =
DatetimeComponent::RelativeQualifier::PAST;
} else if (relative_value > 0) {
updated_relative_qualifier =
DatetimeComponent::RelativeQualifier::FUTURE;
}
}
}
}
}
return CreateDatetimeComponent(DatetimeComponent::ComponentType::DAY_OF_WEEK,
updated_relative_qualifier, absolute_value,
relative_value);
}
// Resolve the year’s ambiguity.
// If the year in the date has 4 digits i.e. DD/MM/YYYY then there is no
// ambiguity, the year value is YYYY but certain format i.e. MM/DD/YY is
// ambiguous e.g. in {April/23/15} year value can be 15 or 1915 or 2015.
// Following heuristic is used to resolve the ambiguity.
// - For YYYY there is nothing to resolve.
// - For all YY years
// - Value less than 50 will be resolved to 20YY
// - Value greater or equal 50 will be resolved to 19YY
static int InterpretYear(int parsed_year) {
if (parsed_year == NO_VAL) {
return parsed_year;
}
if (parsed_year < 100) {
if (parsed_year < 50) {
return parsed_year + 2000;
}
return parsed_year + 1900;
}
return parsed_year;
}
Optional<DatetimeComponent> DateMatch::GetDatetimeComponent(
const DatetimeComponent::ComponentType& component_type) const {
switch (component_type) {
case DatetimeComponent::ComponentType::YEAR:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), InterpretYear(year),
(relative_match != nullptr) ? relative_match->year : NO_VAL);
case DatetimeComponent::ComponentType::MONTH:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), month,
(relative_match != nullptr) ? relative_match->month : NO_VAL);
case DatetimeComponent::ComponentType::DAY_OF_MONTH:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), day,
(relative_match != nullptr) ? relative_match->day : NO_VAL);
case DatetimeComponent::ComponentType::HOUR:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), hour,
(relative_match != nullptr) ? relative_match->hour : NO_VAL);
case DatetimeComponent::ComponentType::MINUTE:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), minute,
(relative_match != nullptr) ? relative_match->minute : NO_VAL);
case DatetimeComponent::ComponentType::SECOND:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), second,
(relative_match != nullptr) ? relative_match->second : NO_VAL);
case DatetimeComponent::ComponentType::DAY_OF_WEEK:
return CreateDayOfWeekComponent(relative_match, GetRelativeQualifier(),
day_of_week);
case DatetimeComponent::ComponentType::MERIDIEM:
return CreateDatetimeComponent(component_type, GetRelativeQualifier(),
GetMeridiemValue(time_span_code), NO_VAL);
case DatetimeComponent::ComponentType::ZONE_OFFSET:
if (HasTimeZoneOffset()) {
return Optional<DatetimeComponent>(DatetimeComponent(
component_type, DatetimeComponent::RelativeQualifier::UNSPECIFIED,
time_zone_offset, /*arg_relative_count=*/0));
}
return Optional<DatetimeComponent>();
case DatetimeComponent::ComponentType::WEEK:
return CreateDatetimeComponent(
component_type, GetRelativeQualifier(), NO_VAL,
HasRelativeDate() ? relative_match->week : NO_VAL);
default:
return Optional<DatetimeComponent>();
}
}
bool DateMatch::IsValid() const {
if (!HasYear() && HasBcAd()) {
return false;
}
if (!HasMonth() && HasYear() && (HasDay() || HasDayOfWeek())) {
return false;
}
if (!HasDay() && HasDayOfWeek() && (HasYear() || HasMonth())) {
return false;
}
if (!HasDay() && !HasDayOfWeek() && HasHour() && (HasYear() || HasMonth())) {
return false;
}
if (!HasHour() && (HasMinute() || HasSecond() || HasFractionSecond())) {
return false;
}
if (!HasMinute() && (HasSecond() || HasFractionSecond())) {
return false;
}
if (!HasSecond() && HasFractionSecond()) {
return false;
}
// Check whether day exists in a month, to exclude cases like "April 31".
if (HasDay() && HasMonth() && day > GetLastDayOfMonth(year, month)) {
return false;
}
return (HasDateFields() || HasTimeFields() || HasRelativeDate());
}
void DateMatch::FillDatetimeComponents(
std::vector<DatetimeComponent>* datetime_component) const {
static const std::vector<DatetimeComponent::ComponentType>*
kDatetimeComponents = new std::vector<DatetimeComponent::ComponentType>{
DatetimeComponent::ComponentType::ZONE_OFFSET,
DatetimeComponent::ComponentType::MERIDIEM,
DatetimeComponent::ComponentType::SECOND,
DatetimeComponent::ComponentType::MINUTE,
DatetimeComponent::ComponentType::HOUR,
DatetimeComponent::ComponentType::DAY_OF_MONTH,
DatetimeComponent::ComponentType::DAY_OF_WEEK,
DatetimeComponent::ComponentType::WEEK,
DatetimeComponent::ComponentType::MONTH,
DatetimeComponent::ComponentType::YEAR};
for (const DatetimeComponent::ComponentType& component_type :
*kDatetimeComponents) {
Optional<DatetimeComponent> date_time =
GetDatetimeComponent(component_type);
if (date_time.has_value()) {
datetime_component->emplace_back(date_time.value());
}
}
}
std::string DateRangeMatch::DebugString() const {
std::string res;
// The method is only called for debugging purposes.
#if !defined(NDEBUG)
if (begin >= 0 && end >= 0) {
SStringAppendF(&res, 0, "[%u,%u)\n", begin, end);
}
SStringAppendF(&res, 0, "from: %s \n", from.DebugString().c_str());
SStringAppendF(&res, 0, "to: %s\n", to.DebugString().c_str());
#endif // !defined(NDEBUG)
return res;
}
} // namespace dates
} // namespace libtextclassifier3