| // Copyright 2020 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #pragma GCC diagnostic ignored "-Wunused-function" |
| |
| #include "annotator/grammar/dates/utils/date-utils.h" |
| |
| #include <algorithm> |
| #include <ctime> |
| |
| #include "annotator/grammar/dates/annotations/annotation-util.h" |
| #include "annotator/grammar/dates/dates_generated.h" |
| #include "annotator/grammar/dates/utils/annotation-keys.h" |
| #include "annotator/grammar/dates/utils/date-match.h" |
| #include "annotator/types.h" |
| #include "utils/base/macros.h" |
| |
| namespace libtextclassifier3 { |
| namespace dates { |
| |
| bool IsLeapYear(int year) { |
| // For the sake of completeness, we want to be able to decide |
| // whether a year is a leap year all the way back to 0 Julian, or |
| // 4714 BCE. But we don't want to take the modulus of a negative |
| // number, because this may not be very well-defined or portable. So |
| // we increment the year by some large multiple of 400, which is the |
| // periodicity of this leap-year calculation. |
| if (year < 0) { |
| year += 8000; |
| } |
| return ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)); |
| } |
| |
| namespace { |
| #define SECSPERMIN (60) |
| #define MINSPERHOUR (60) |
| #define HOURSPERDAY (24) |
| #define DAYSPERWEEK (7) |
| #define DAYSPERNYEAR (365) |
| #define DAYSPERLYEAR (366) |
| #define MONSPERYEAR (12) |
| |
| const int8 kDaysPerMonth[2][1 + MONSPERYEAR] = { |
| {-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, |
| {-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, |
| }; |
| } // namespace |
| |
| int8 GetLastDayOfMonth(int year, int month) { |
| if (year == 0) { // No year specified |
| return kDaysPerMonth[1][month]; |
| } |
| return kDaysPerMonth[IsLeapYear(year)][month]; |
| } |
| |
| namespace { |
| inline bool IsHourInSegment(const TimeSpanSpec_::Segment* segment, int8 hour, |
| bool is_exact) { |
| return (hour >= segment->begin() && |
| (hour < segment->end() || |
| (hour == segment->end() && is_exact && segment->is_closed()))); |
| } |
| |
| Property* FindOrCreateDefaultDateTime(AnnotationData* inst) { |
| // Refer comments for kDateTime in annotation-keys.h to see the format. |
| static constexpr int kDefault[] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
| |
| int idx = GetPropertyIndex(kDateTime, *inst); |
| if (idx < 0) { |
| idx = AddRepeatedIntProperty(kDateTime, kDefault, TC3_ARRAYSIZE(kDefault), |
| inst); |
| } |
| return &inst->properties[idx]; |
| } |
| |
| void IncrementDayOfWeek(DayOfWeek* dow) { |
| static const DayOfWeek dow_ring[] = {DayOfWeek_MONDAY, DayOfWeek_TUESDAY, |
| DayOfWeek_WEDNESDAY, DayOfWeek_THURSDAY, |
| DayOfWeek_FRIDAY, DayOfWeek_SATURDAY, |
| DayOfWeek_SUNDAY, DayOfWeek_MONDAY}; |
| const auto& cur_dow = |
| std::find(std::begin(dow_ring), std::end(dow_ring), *dow); |
| if (cur_dow != std::end(dow_ring)) { |
| *dow = *std::next(cur_dow); |
| } |
| } |
| } // namespace |
| |
| bool NormalizeHourByTimeSpan(const TimeSpanSpec* ts_spec, DateMatch* date) { |
| if (ts_spec->segment() == nullptr) { |
| return false; |
| } |
| if (date->HasHour()) { |
| const bool is_exact = |
| (!date->HasMinute() || |
| (date->minute == 0 && |
| (!date->HasSecond() || |
| (date->second == 0 && |
| (!date->HasFractionSecond() || date->fraction_second == 0.0))))); |
| for (const TimeSpanSpec_::Segment* segment : *ts_spec->segment()) { |
| if (IsHourInSegment(segment, date->hour + segment->offset(), is_exact)) { |
| date->hour += segment->offset(); |
| return true; |
| } |
| if (!segment->is_strict() && |
| IsHourInSegment(segment, date->hour, is_exact)) { |
| return true; |
| } |
| } |
| } else { |
| for (const TimeSpanSpec_::Segment* segment : *ts_spec->segment()) { |
| if (segment->is_stand_alone()) { |
| if (segment->begin() == segment->end()) { |
| date->hour = segment->begin(); |
| } |
| // Allow stand-alone time-span points and ranges. |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| bool IsRefinement(const DateMatch& a, const DateMatch& b) { |
| int count = 0; |
| if (b.HasBcAd()) { |
| if (!a.HasBcAd() || a.bc_ad != b.bc_ad) return false; |
| } else if (a.HasBcAd()) { |
| if (a.bc_ad == BCAD_BC) return false; |
| ++count; |
| } |
| if (b.HasYear()) { |
| if (!a.HasYear() || a.year != b.year) return false; |
| } else if (a.HasYear()) { |
| ++count; |
| } |
| if (b.HasMonth()) { |
| if (!a.HasMonth() || a.month != b.month) return false; |
| } else if (a.HasMonth()) { |
| ++count; |
| } |
| if (b.HasDay()) { |
| if (!a.HasDay() || a.day != b.day) return false; |
| } else if (a.HasDay()) { |
| ++count; |
| } |
| if (b.HasDayOfWeek()) { |
| if (!a.HasDayOfWeek() || a.day_of_week != b.day_of_week) return false; |
| } else if (a.HasDayOfWeek()) { |
| ++count; |
| } |
| if (b.HasHour()) { |
| if (!a.HasHour()) return false; |
| std::vector<int8> possible_hours; |
| b.GetPossibleHourValues(&possible_hours); |
| if (std::find(possible_hours.begin(), possible_hours.end(), a.hour) == |
| possible_hours.end()) { |
| return false; |
| } |
| } else if (a.HasHour()) { |
| ++count; |
| } |
| if (b.HasMinute()) { |
| if (!a.HasMinute() || a.minute != b.minute) return false; |
| } else if (a.HasMinute()) { |
| ++count; |
| } |
| if (b.HasSecond()) { |
| if (!a.HasSecond() || a.second != b.second) return false; |
| } else if (a.HasSecond()) { |
| ++count; |
| } |
| if (b.HasFractionSecond()) { |
| if (!a.HasFractionSecond() || a.fraction_second != b.fraction_second) |
| return false; |
| } else if (a.HasFractionSecond()) { |
| ++count; |
| } |
| if (b.HasTimeSpanCode()) { |
| if (!a.HasTimeSpanCode() || a.time_span_code != b.time_span_code) |
| return false; |
| } else if (a.HasTimeSpanCode()) { |
| ++count; |
| } |
| if (b.HasTimeZoneCode()) { |
| if (!a.HasTimeZoneCode() || a.time_zone_code != b.time_zone_code) |
| return false; |
| } else if (a.HasTimeZoneCode()) { |
| ++count; |
| } |
| if (b.HasTimeZoneOffset()) { |
| if (!a.HasTimeZoneOffset() || a.time_zone_offset != b.time_zone_offset) |
| return false; |
| } else if (a.HasTimeZoneOffset()) { |
| ++count; |
| } |
| return (count > 0 || a.priority >= b.priority); |
| } |
| |
| bool IsRefinement(const DateRangeMatch& a, const DateRangeMatch& b) { |
| return false; |
| } |
| |
| bool IsPrecedent(const DateMatch& a, const DateMatch& b) { |
| if (a.HasYear() && b.HasYear()) { |
| if (a.year < b.year) return true; |
| if (a.year > b.year) return false; |
| } |
| |
| if (a.HasMonth() && b.HasMonth()) { |
| if (a.month < b.month) return true; |
| if (a.month > b.month) return false; |
| } |
| |
| if (a.HasDay() && b.HasDay()) { |
| if (a.day < b.day) return true; |
| if (a.day > b.day) return false; |
| } |
| |
| if (a.HasHour() && b.HasHour()) { |
| if (a.hour < b.hour) return true; |
| if (a.hour > b.hour) return false; |
| } |
| |
| if (a.HasMinute() && b.HasHour()) { |
| if (a.minute < b.hour) return true; |
| if (a.minute > b.hour) return false; |
| } |
| |
| if (a.HasSecond() && b.HasSecond()) { |
| if (a.second < b.hour) return true; |
| if (a.second > b.hour) return false; |
| } |
| |
| return false; |
| } |
| |
| void FillDateInstance(const DateMatch& date, |
| DatetimeParseResultSpan* instance) { |
| instance->span.first = date.begin; |
| instance->span.second = date.end; |
| instance->priority_score = date.GetAnnotatorPriorityScore(); |
| DatetimeParseResult datetime_parse_result; |
| date.FillDatetimeComponents(&datetime_parse_result.datetime_components); |
| instance->data.emplace_back(datetime_parse_result); |
| } |
| |
| void FillDateRangeInstance(const DateRangeMatch& range, |
| DatetimeParseResultSpan* instance) { |
| instance->span.first = range.begin; |
| instance->span.second = range.end; |
| instance->priority_score = range.GetAnnotatorPriorityScore(); |
| |
| // Filling from DatetimeParseResult. |
| instance->data.emplace_back(); |
| range.from.FillDatetimeComponents(&instance->data.back().datetime_components); |
| |
| // Filling to DatetimeParseResult. |
| instance->data.emplace_back(); |
| range.to.FillDatetimeComponents(&instance->data.back().datetime_components); |
| } |
| |
| namespace { |
| bool AnyOverlappedField(const DateMatch& prev, const DateMatch& next) { |
| #define Field(f) \ |
| if (prev.f && next.f) return true |
| Field(year_match); |
| Field(month_match); |
| Field(day_match); |
| Field(day_of_week_match); |
| Field(time_value_match); |
| Field(time_span_match); |
| Field(time_zone_name_match); |
| Field(time_zone_offset_match); |
| Field(relative_match); |
| Field(combined_digits_match); |
| #undef Field |
| return false; |
| } |
| |
| void MergeDateMatchImpl(const DateMatch& prev, DateMatch* next, |
| bool update_span) { |
| #define RM(f) \ |
| if (!next->f) next->f = prev.f |
| RM(year_match); |
| RM(month_match); |
| RM(day_match); |
| RM(day_of_week_match); |
| RM(time_value_match); |
| RM(time_span_match); |
| RM(time_zone_name_match); |
| RM(time_zone_offset_match); |
| RM(relative_match); |
| RM(combined_digits_match); |
| #undef RM |
| |
| #define RV(f) \ |
| if (next->f == NO_VAL) next->f = prev.f |
| RV(year); |
| RV(month); |
| RV(day); |
| RV(hour); |
| RV(minute); |
| RV(second); |
| RV(fraction_second); |
| #undef RV |
| |
| #define RE(f, v) \ |
| if (next->f == v) next->f = prev.f |
| RE(day_of_week, DayOfWeek_DOW_NONE); |
| RE(bc_ad, BCAD_BCAD_NONE); |
| RE(time_span_code, TimespanCode_TIMESPAN_CODE_NONE); |
| RE(time_zone_code, TimezoneCode_TIMEZONE_CODE_NONE); |
| #undef RE |
| |
| if (next->time_zone_offset == std::numeric_limits<int16>::min()) { |
| next->time_zone_offset = prev.time_zone_offset; |
| } |
| |
| next->priority = std::max(next->priority, prev.priority); |
| next->annotator_priority_score = |
| std::max(next->annotator_priority_score, prev.annotator_priority_score); |
| if (update_span) { |
| next->begin = std::min(next->begin, prev.begin); |
| next->end = std::max(next->end, prev.end); |
| } |
| } |
| } // namespace |
| |
| bool IsDateMatchMergeable(const DateMatch& prev, const DateMatch& next) { |
| // Do not merge if they share the same field. |
| if (AnyOverlappedField(prev, next)) { |
| return false; |
| } |
| |
| // It's impossible that both prev and next have relative date since it's |
| // excluded by overlapping check before. |
| if (prev.HasRelativeDate() || next.HasRelativeDate()) { |
| // If one of them is relative date, then we merge: |
| // - if relative match shouldn't have time, and always has DOW or day. |
| // - if not both relative match and non relative match has day. |
| // - if non relative match has time or day. |
| const DateMatch* rm = &prev; |
| const DateMatch* non_rm = &prev; |
| if (prev.HasRelativeDate()) { |
| non_rm = &next; |
| } else { |
| rm = &next; |
| } |
| |
| const RelativeMatch* relative_match = rm->relative_match; |
| // Relative Match should have day or DOW but no time. |
| if (!relative_match->HasDayFields() || |
| relative_match->HasTimeValueFields()) { |
| return false; |
| } |
| // Check if both relative match and non relative match has day. |
| if (non_rm->HasDateFields() && relative_match->HasDay()) { |
| return false; |
| } |
| // Non relative match should have either hour (time) or day (date). |
| if (!non_rm->HasHour() && !non_rm->HasDay()) { |
| return false; |
| } |
| } else { |
| // Only one match has date and another has time. |
| if ((prev.HasDateFields() && next.HasDateFields()) || |
| (prev.HasTimeFields() && next.HasTimeFields())) { |
| return false; |
| } |
| // DOW never be extracted as a single DateMatch except in RelativeMatch. So |
| // here, we always merge one with day and another one with hour. |
| if (!(prev.HasDay() || next.HasDay()) || |
| !(prev.HasHour() || next.HasHour())) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| void MergeDateMatch(const DateMatch& prev, DateMatch* next, bool update_span) { |
| if (IsDateMatchMergeable(prev, *next)) { |
| MergeDateMatchImpl(prev, next, update_span); |
| } |
| } |
| |
| } // namespace dates |
| } // namespace libtextclassifier3 |