blob: 07e1964679e05ed152bc5b5cf681bd4daee3dc39 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
include "annotator/grammar/dates/timezone-code.fbs";
include "utils/grammar/rules.fbs";
// Type identifiers of all non-trivial matches.
namespace libtextclassifier3.dates;
enum MatchType : int {
UNKNOWN = 0,
// Match of a date extraction rule.
DATETIME_RULE = 1,
// Match of a date range extraction rule.
DATETIME_RANGE_RULE = 2,
// Match defined by an ExtractionRule (e.g., a single time-result that is
// matched by a time-rule, which is ready to be output individually, with
// this kind of match, we can retrieve it in range rules).
DATETIME = 3,
// Match defined by TermValue.
TERM_VALUE = 4,
// Matches defined by Nonterminal.
NONTERMINAL = 5,
DIGITS = 6,
YEAR = 7,
MONTH = 8,
DAY = 9,
HOUR = 10,
MINUTE = 11,
SECOND = 12,
FRACTION_SECOND = 13,
DAY_OF_WEEK = 14,
TIME_VALUE = 15,
TIME_SPAN = 16,
TIME_ZONE_NAME = 17,
TIME_ZONE_OFFSET = 18,
TIME_PERIOD = 19,
RELATIVE_DATE = 20,
COMBINED_DIGITS = 21,
}
namespace libtextclassifier3.dates;
enum BCAD : int {
BCAD_NONE = -1,
BC = 0,
AD = 1,
}
namespace libtextclassifier3.dates;
enum DayOfWeek : int {
DOW_NONE = -1,
SUNDAY = 1,
MONDAY = 2,
TUESDAY = 3,
WEDNESDAY = 4,
THURSDAY = 5,
FRIDAY = 6,
SATURDAY = 7,
}
namespace libtextclassifier3.dates;
enum TimespanCode : int {
TIMESPAN_CODE_NONE = -1,
AM = 0,
PM = 1,
NOON = 2,
MIDNIGHT = 3,
// English "tonight".
TONIGHT = 11,
}
// The datetime grammar rules.
namespace libtextclassifier3.dates;
table DatetimeRules {
// The context free grammar rules.
rules:grammar.RulesSet;
// Values associated with grammar rule matches.
extraction_rule:[ExtractionRuleParameter];
term_value:[TermValue];
nonterminal_value:[NonterminalValue];
}
namespace libtextclassifier3.dates;
table TermValue {
value:int;
// A time segment e.g. 10AM - 12AM
time_span_spec:TimeSpanSpec;
// Time zone information representation
time_zone_name_spec:TimeZoneNameSpec;
}
// Define nonterms from terms or other nonterms.
namespace libtextclassifier3.dates;
table NonterminalValue {
// Mapping value.
value:TermValue;
// Parameter describing formatting choices for nonterminal messages
nonterminal_parameter:NonterminalParameter;
// Parameter interpreting past/future dates (e.g. "last year")
relative_parameter:RelativeParameter;
// Format info for nonterminals representing times.
time_value_parameter:TimeValueParameter;
// Parameter describing the format of time-zone info - e.g. "UTC-8"
time_zone_offset_parameter:TimeZoneOffsetParameter;
}
namespace libtextclassifier3.dates.RelativeParameter_;
enum RelativeType : int {
NONE = 0,
YEAR = 1,
MONTH = 2,
DAY = 3,
WEEK = 4,
HOUR = 5,
MINUTE = 6,
SECOND = 7,
}
namespace libtextclassifier3.dates.RelativeParameter_;
enum Period : int {
PERIOD_UNKNOWN = 0,
PERIOD_PAST = 1,
PERIOD_FUTURE = 2,
}
// Relative interpretation.
// Indicates which day the day of week could be, for example "next Friday"
// could means the Friday which is the closest Friday or the Friday in the
// next week.
namespace libtextclassifier3.dates.RelativeParameter_;
enum Interpretation : int {
UNKNOWN = 0,
// The closest X in the past.
NEAREST_LAST = 1,
// The X before the closest X in the past.
SECOND_LAST = 2,
// The closest X in the future.
NEAREST_NEXT = 3,
// The X after the closest X in the future.
SECOND_NEXT = 4,
// X in the previous one.
PREVIOUS = 5,
// X in the coming one.
COMING = 6,
// X in current one, it can be both past and future.
CURRENT = 7,
// Some X.
SOME = 8,
// The closest X, it can be both past and future.
NEAREST = 9,
}
namespace libtextclassifier3.dates;
table RelativeParameter {
type:RelativeParameter_.RelativeType = NONE;
period:RelativeParameter_.Period = PERIOD_UNKNOWN;
day_of_week_interpretation:[RelativeParameter_.Interpretation];
}
namespace libtextclassifier3.dates.NonterminalParameter_;
enum Flag : int {
IS_SPELLED = 1,
}
namespace libtextclassifier3.dates;
table NonterminalParameter {
// Bit-wise OR Flag.
flag:uint = 0;
combined_digits_format:string;
}
namespace libtextclassifier3.dates.TimeValueParameter_;
enum TimeValueValidation : int {
// Allow extra spaces between sub-components in time-value.
ALLOW_EXTRA_SPACE = 1,
// 1 << 0
// Disallow colon- or dot-context with digits for time-value.
DISALLOW_COLON_DOT_CONTEXT = 2,
// 1 << 1
}
namespace libtextclassifier3.dates;
table TimeValueParameter {
validation:uint = 0;
// Bitwise-OR
flag:uint = 0;
// Bitwise-OR
}
namespace libtextclassifier3.dates.TimeZoneOffsetParameter_;
enum Format : int {
// Offset is in an uncategorized format.
FORMAT_UNKNOWN = 0,
// Offset contains 1-digit hour only, e.g. "UTC-8".
FORMAT_H = 1,
// Offset contains 2-digit hour only, e.g. "UTC-08".
FORMAT_HH = 2,
// Offset contains 1-digit hour and minute, e.g. "UTC-8:00".
FORMAT_H_MM = 3,
// Offset contains 2-digit hour and minute, e.g. "UTC-08:00".
FORMAT_HH_MM = 4,
// Offset contains 3-digit hour-and-minute, e.g. "UTC-800".
FORMAT_HMM = 5,
// Offset contains 4-digit hour-and-minute, e.g. "UTC-0800".
FORMAT_HHMM = 6,
}
namespace libtextclassifier3.dates;
table TimeZoneOffsetParameter {
format:TimeZoneOffsetParameter_.Format = FORMAT_UNKNOWN;
}
namespace libtextclassifier3.dates.ExtractionRuleParameter_;
enum ExtractionValidation : int {
// Boundary checking for final match.
LEFT_BOUND = 1,
RIGHT_BOUND = 2,
SPELLED_YEAR = 4,
SPELLED_MONTH = 8,
SPELLED_DAY = 16,
// Without this validation-flag set, unconfident time-zone expression
// are discarded in the output-callback, e.g. "-08:00, +8".
ALLOW_UNCONFIDENT_TIME_ZONE = 32,
}
// Parameter info for extraction rule, help rule explanation.
namespace libtextclassifier3.dates;
table ExtractionRuleParameter {
// Bit-wise OR Validation.
validation:uint = 0;
priority_delta:int;
id:string;
// The score reflects the confidence score of the date/time match, which is
// set while creating grammar rules.
// e.g. given we have the rule which detect "22.33" as a HH.MM then because
// of ambiguity the confidence of this match maybe relatively less.
annotator_priority_score:float;
}
// Internal structure used to describe an hour-mapping segment.
namespace libtextclassifier3.dates.TimeSpanSpec_;
table Segment {
// From 0 to 24, the beginning hour of the segment, always included.
begin:int;
// From 0 to 24, the ending hour of the segment, not included if the
// segment is not closed. The value 0 means the beginning of the next
// day, the same value as "begin" means a time-point.
end:int;
// From -24 to 24, the mapping offset in hours from spanned expressions
// to 24-hour expressions. The value 0 means identical mapping.
offset:int;
// True if the segment is a closed one instead of a half-open one.
// Always set it to true when describing time-points.
is_closed:bool = false;
// True if a strict check should be performed onto the segment which
// disallows already-offset hours to be used in spanned expressions,
// e.g. 15:30PM.
is_strict:bool = false;
// True if the time-span can be used without an explicitly specified
// hour value, then it can generate an exact time point (the "begin"
// o'clock sharp, like "noon") or a time range, like "Tonight".
is_stand_alone:bool = false;
}
namespace libtextclassifier3.dates;
table TimeSpanSpec {
code:TimespanCode;
segment:[TimeSpanSpec_.Segment];
}
namespace libtextclassifier3.dates.TimeZoneNameSpec_;
enum TimeZoneType : int {
// The corresponding name might represent a standard or daylight-saving
// time-zone, depending on some external information, e.g. the date.
AMBIGUOUS = 0,
// The corresponding name represents a standard time-zone.
STANDARD = 1,
// The corresponding name represents a daylight-saving time-zone.
DAYLIGHT = 2,
}
namespace libtextclassifier3.dates;
table TimeZoneNameSpec {
code:TimezoneCode;
type:TimeZoneNameSpec_.TimeZoneType = AMBIGUOUS;
// Set to true if the corresponding name is internationally used as an
// abbreviation (or expression) of UTC. For example, "GMT" and "Z".
is_utc:bool = false;
// Set to false if the corresponding name is not an abbreviation. For example,
// "Pacific Time" and "China Standard Time".
is_abbreviation:bool = true;
}