blob: 94552fd8050f4848a321d50a8bef65720db96994 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#pragma GCC diagnostic ignored "-Wunused-function"
#include "annotator/grammar/dates/utils/date-utils.h"
#include <algorithm>
#include <ctime>
#include "annotator/grammar/dates/annotations/annotation-util.h"
#include "annotator/grammar/dates/dates_generated.h"
#include "annotator/grammar/dates/utils/annotation-keys.h"
#include "annotator/grammar/dates/utils/date-match.h"
#include "annotator/types.h"
#include "utils/base/macros.h"
namespace libtextclassifier3 {
namespace dates {
bool IsLeapYear(int year) {
// For the sake of completeness, we want to be able to decide
// whether a year is a leap year all the way back to 0 Julian, or
// 4714 BCE. But we don't want to take the modulus of a negative
// number, because this may not be very well-defined or portable. So
// we increment the year by some large multiple of 400, which is the
// periodicity of this leap-year calculation.
if (year < 0) {
year += 8000;
}
return ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0));
}
namespace {
#define SECSPERMIN (60)
#define MINSPERHOUR (60)
#define HOURSPERDAY (24)
#define DAYSPERWEEK (7)
#define DAYSPERNYEAR (365)
#define DAYSPERLYEAR (366)
#define MONSPERYEAR (12)
const int8 kDaysPerMonth[2][1 + MONSPERYEAR] = {
{-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
{-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
};
} // namespace
int8 GetLastDayOfMonth(int year, int month) {
if (year == 0) { // No year specified
return kDaysPerMonth[1][month];
}
return kDaysPerMonth[IsLeapYear(year)][month];
}
namespace {
inline bool IsHourInSegment(const TimeSpanSpec_::Segment* segment, int8 hour,
bool is_exact) {
return (hour >= segment->begin() &&
(hour < segment->end() ||
(hour == segment->end() && is_exact && segment->is_closed())));
}
Property* FindOrCreateDefaultDateTime(AnnotationData* inst) {
// Refer comments for kDateTime in annotation-keys.h to see the format.
static constexpr int kDefault[] = {-1, -1, -1, -1, -1, -1, -1, -1};
int idx = GetPropertyIndex(kDateTime, *inst);
if (idx < 0) {
idx = AddRepeatedIntProperty(kDateTime, kDefault, TC3_ARRAYSIZE(kDefault),
inst);
}
return &inst->properties[idx];
}
void IncrementDayOfWeek(DayOfWeek* dow) {
static const DayOfWeek dow_ring[] = {DayOfWeek_MONDAY, DayOfWeek_TUESDAY,
DayOfWeek_WEDNESDAY, DayOfWeek_THURSDAY,
DayOfWeek_FRIDAY, DayOfWeek_SATURDAY,
DayOfWeek_SUNDAY, DayOfWeek_MONDAY};
const auto& cur_dow =
std::find(std::begin(dow_ring), std::end(dow_ring), *dow);
if (cur_dow != std::end(dow_ring)) {
*dow = *std::next(cur_dow);
}
}
} // namespace
bool NormalizeHourByTimeSpan(const TimeSpanSpec* ts_spec, DateMatch* date) {
if (ts_spec->segment() == nullptr) {
return false;
}
if (date->HasHour()) {
const bool is_exact =
(!date->HasMinute() ||
(date->minute == 0 &&
(!date->HasSecond() ||
(date->second == 0 &&
(!date->HasFractionSecond() || date->fraction_second == 0.0)))));
for (const TimeSpanSpec_::Segment* segment : *ts_spec->segment()) {
if (IsHourInSegment(segment, date->hour + segment->offset(), is_exact)) {
date->hour += segment->offset();
return true;
}
if (!segment->is_strict() &&
IsHourInSegment(segment, date->hour, is_exact)) {
return true;
}
}
} else {
for (const TimeSpanSpec_::Segment* segment : *ts_spec->segment()) {
if (segment->is_stand_alone()) {
if (segment->begin() == segment->end()) {
date->hour = segment->begin();
}
// Allow stand-alone time-span points and ranges.
return true;
}
}
}
return false;
}
bool IsRefinement(const DateMatch& a, const DateMatch& b) {
int count = 0;
if (b.HasBcAd()) {
if (!a.HasBcAd() || a.bc_ad != b.bc_ad) return false;
} else if (a.HasBcAd()) {
if (a.bc_ad == BCAD_BC) return false;
++count;
}
if (b.HasYear()) {
if (!a.HasYear() || a.year != b.year) return false;
} else if (a.HasYear()) {
++count;
}
if (b.HasMonth()) {
if (!a.HasMonth() || a.month != b.month) return false;
} else if (a.HasMonth()) {
++count;
}
if (b.HasDay()) {
if (!a.HasDay() || a.day != b.day) return false;
} else if (a.HasDay()) {
++count;
}
if (b.HasDayOfWeek()) {
if (!a.HasDayOfWeek() || a.day_of_week != b.day_of_week) return false;
} else if (a.HasDayOfWeek()) {
++count;
}
if (b.HasHour()) {
if (!a.HasHour()) return false;
std::vector<int8> possible_hours;
b.GetPossibleHourValues(&possible_hours);
if (std::find(possible_hours.begin(), possible_hours.end(), a.hour) ==
possible_hours.end()) {
return false;
}
} else if (a.HasHour()) {
++count;
}
if (b.HasMinute()) {
if (!a.HasMinute() || a.minute != b.minute) return false;
} else if (a.HasMinute()) {
++count;
}
if (b.HasSecond()) {
if (!a.HasSecond() || a.second != b.second) return false;
} else if (a.HasSecond()) {
++count;
}
if (b.HasFractionSecond()) {
if (!a.HasFractionSecond() || a.fraction_second != b.fraction_second)
return false;
} else if (a.HasFractionSecond()) {
++count;
}
if (b.HasTimeSpanCode()) {
if (!a.HasTimeSpanCode() || a.time_span_code != b.time_span_code)
return false;
} else if (a.HasTimeSpanCode()) {
++count;
}
if (b.HasTimeZoneCode()) {
if (!a.HasTimeZoneCode() || a.time_zone_code != b.time_zone_code)
return false;
} else if (a.HasTimeZoneCode()) {
++count;
}
if (b.HasTimeZoneOffset()) {
if (!a.HasTimeZoneOffset() || a.time_zone_offset != b.time_zone_offset)
return false;
} else if (a.HasTimeZoneOffset()) {
++count;
}
return (count > 0 || a.priority >= b.priority);
}
bool IsRefinement(const DateRangeMatch& a, const DateRangeMatch& b) {
return false;
}
bool IsPrecedent(const DateMatch& a, const DateMatch& b) {
if (a.HasYear() && b.HasYear()) {
if (a.year < b.year) return true;
if (a.year > b.year) return false;
}
if (a.HasMonth() && b.HasMonth()) {
if (a.month < b.month) return true;
if (a.month > b.month) return false;
}
if (a.HasDay() && b.HasDay()) {
if (a.day < b.day) return true;
if (a.day > b.day) return false;
}
if (a.HasHour() && b.HasHour()) {
if (a.hour < b.hour) return true;
if (a.hour > b.hour) return false;
}
if (a.HasMinute() && b.HasHour()) {
if (a.minute < b.hour) return true;
if (a.minute > b.hour) return false;
}
if (a.HasSecond() && b.HasSecond()) {
if (a.second < b.hour) return true;
if (a.second > b.hour) return false;
}
return false;
}
void FillDateInstance(const DateMatch& date,
DatetimeParseResultSpan* instance) {
instance->span.first = date.begin;
instance->span.second = date.end;
instance->priority_score = date.GetAnnotatorPriorityScore();
DatetimeParseResult datetime_parse_result;
date.FillDatetimeComponents(&datetime_parse_result.datetime_components);
instance->data.emplace_back(datetime_parse_result);
}
void FillDateRangeInstance(const DateRangeMatch& range,
DatetimeParseResultSpan* instance) {
instance->span.first = range.begin;
instance->span.second = range.end;
instance->priority_score = range.GetAnnotatorPriorityScore();
// Filling from DatetimeParseResult.
instance->data.emplace_back();
range.from.FillDatetimeComponents(&instance->data.back().datetime_components);
// Filling to DatetimeParseResult.
instance->data.emplace_back();
range.to.FillDatetimeComponents(&instance->data.back().datetime_components);
}
namespace {
bool AnyOverlappedField(const DateMatch& prev, const DateMatch& next) {
#define Field(f) \
if (prev.f && next.f) return true
Field(year_match);
Field(month_match);
Field(day_match);
Field(day_of_week_match);
Field(time_value_match);
Field(time_span_match);
Field(time_zone_name_match);
Field(time_zone_offset_match);
Field(relative_match);
Field(combined_digits_match);
#undef Field
return false;
}
void MergeDateMatchImpl(const DateMatch& prev, DateMatch* next,
bool update_span) {
#define RM(f) \
if (!next->f) next->f = prev.f
RM(year_match);
RM(month_match);
RM(day_match);
RM(day_of_week_match);
RM(time_value_match);
RM(time_span_match);
RM(time_zone_name_match);
RM(time_zone_offset_match);
RM(relative_match);
RM(combined_digits_match);
#undef RM
#define RV(f) \
if (next->f == NO_VAL) next->f = prev.f
RV(year);
RV(month);
RV(day);
RV(hour);
RV(minute);
RV(second);
RV(fraction_second);
#undef RV
#define RE(f, v) \
if (next->f == v) next->f = prev.f
RE(day_of_week, DayOfWeek_DOW_NONE);
RE(bc_ad, BCAD_BCAD_NONE);
RE(time_span_code, TimespanCode_TIMESPAN_CODE_NONE);
RE(time_zone_code, TimezoneCode_TIMEZONE_CODE_NONE);
#undef RE
if (next->time_zone_offset == std::numeric_limits<int16>::min()) {
next->time_zone_offset = prev.time_zone_offset;
}
next->priority = std::max(next->priority, prev.priority);
next->annotator_priority_score =
std::max(next->annotator_priority_score, prev.annotator_priority_score);
if (update_span) {
next->begin = std::min(next->begin, prev.begin);
next->end = std::max(next->end, prev.end);
}
}
} // namespace
bool IsDateMatchMergeable(const DateMatch& prev, const DateMatch& next) {
// Do not merge if they share the same field.
if (AnyOverlappedField(prev, next)) {
return false;
}
// It's impossible that both prev and next have relative date since it's
// excluded by overlapping check before.
if (prev.HasRelativeDate() || next.HasRelativeDate()) {
// If one of them is relative date, then we merge:
// - if relative match shouldn't have time, and always has DOW or day.
// - if not both relative match and non relative match has day.
// - if non relative match has time or day.
const DateMatch* rm = &prev;
const DateMatch* non_rm = &prev;
if (prev.HasRelativeDate()) {
non_rm = &next;
} else {
rm = &next;
}
const RelativeMatch* relative_match = rm->relative_match;
// Relative Match should have day or DOW but no time.
if (!relative_match->HasDayFields() ||
relative_match->HasTimeValueFields()) {
return false;
}
// Check if both relative match and non relative match has day.
if (non_rm->HasDateFields() && relative_match->HasDay()) {
return false;
}
// Non relative match should have either hour (time) or day (date).
if (!non_rm->HasHour() && !non_rm->HasDay()) {
return false;
}
} else {
// Only one match has date and another has time.
if ((prev.HasDateFields() && next.HasDateFields()) ||
(prev.HasTimeFields() && next.HasTimeFields())) {
return false;
}
// DOW never be extracted as a single DateMatch except in RelativeMatch. So
// here, we always merge one with day and another one with hour.
if (!(prev.HasDay() || next.HasDay()) ||
!(prev.HasHour() || next.HasHour())) {
return false;
}
}
return true;
}
void MergeDateMatch(const DateMatch& prev, DateMatch* next, bool update_span) {
if (IsDateMatchMergeable(prev, *next)) {
MergeDateMatchImpl(prev, next, update_span);
}
}
} // namespace dates
} // namespace libtextclassifier3