blob: cc0a9e58fe3f401d49851c0239ca45b1534d1568 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
#define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
#include "utils/base/integral_types.h"
#include "utils/utf8/unicodetext.h"
namespace libtextclassifier3 {
bool IsOpeningBracket(char32 codepoint);
bool IsClosingBracket(char32 codepoint);
bool IsWhitespace(char32 codepoint);
bool IsBidirectional(char32 codepoint);
bool IsDigit(char32 codepoint);
bool IsLower(char32 codepoint);
bool IsUpper(char32 codepoint);
bool IsPunctuation(char32 codepoint);
bool IsPercentage(char32 codepoint);
bool IsSlash(char32 codepoint);
bool IsMinus(char32 codepoint);
bool IsNumberSign(char32 codepoint);
bool IsDot(char32 codepoint);
bool IsApostrophe(char32 codepoint);
bool IsQuotation(char32 codepoint);
bool IsAmpersand(char32 codepoint);
bool IsLatinLetter(char32 codepoint);
bool IsArabicLetter(char32 codepoint);
bool IsCyrillicLetter(char32 codepoint);
bool IsChineseLetter(char32 codepoint);
bool IsJapaneseLetter(char32 codepoint);
bool IsKoreanLetter(char32 codepoint);
bool IsThaiLetter(char32 codepoint);
bool IsLetter(char32 codepoint);
bool IsCJTletter(char32 codepoint);
char32 ToLower(char32 codepoint);
char32 ToUpper(char32 codepoint);
char32 GetPairedBracket(char32 codepoint);
// Checks if the text format is not likely to be a number. Used to avoid most of
// the java exceptions thrown when fail to parse.
template <class T>
bool PassesIntPreChesks(const UnicodeText& text, const T result) {
if (text.empty() ||
(std::is_same<T, int32>::value && text.size_codepoints() > 10) ||
(std::is_same<T, int64>::value && text.size_codepoints() > 19)) {
return false;
}
for (auto it = text.begin(); it != text.end(); ++it) {
if (!IsDigit(*it)) {
return false;
}
}
return true;
}
} // namespace libtextclassifier3
#endif // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_