utils/utf8/unilib-common.h - chromiumos/third_party/libtextclassifier - Git at Google

 // Copyright 2020 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //

 #ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
 #define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_

 #include "utils/base/integral_types.h"
 #include "utils/utf8/unicodetext.h"

 namespace libtextclassifier3 {

 bool IsOpeningBracket(char32 codepoint);
 bool IsClosingBracket(char32 codepoint);
 bool IsWhitespace(char32 codepoint);
 bool IsBidirectional(char32 codepoint);
 bool IsDigit(char32 codepoint);
 bool IsLower(char32 codepoint);
 bool IsUpper(char32 codepoint);
 bool IsPunctuation(char32 codepoint);
 bool IsPercentage(char32 codepoint);
 bool IsSlash(char32 codepoint);
 bool IsMinus(char32 codepoint);
 bool IsNumberSign(char32 codepoint);
 bool IsDot(char32 codepoint);
 bool IsApostrophe(char32 codepoint);
 bool IsQuotation(char32 codepoint);
 bool IsAmpersand(char32 codepoint);

 bool IsLatinLetter(char32 codepoint);
 bool IsArabicLetter(char32 codepoint);
 bool IsCyrillicLetter(char32 codepoint);
 bool IsChineseLetter(char32 codepoint);
 bool IsJapaneseLetter(char32 codepoint);
 bool IsKoreanLetter(char32 codepoint);
 bool IsThaiLetter(char32 codepoint);
 bool IsLetter(char32 codepoint);
 bool IsCJTletter(char32 codepoint);

 char32 ToLower(char32 codepoint);
 char32 ToUpper(char32 codepoint);
 char32 GetPairedBracket(char32 codepoint);

 // Checks if the text format is not likely to be a number. Used to avoid most of
 // the java exceptions thrown when fail to parse.
 template <class T>
 bool PassesIntPreChesks(const UnicodeText& text, const T result) {
   if (text.empty() ||
       (std::is_same<T, int32>::value && text.size_codepoints() > 10) ||
       (std::is_same<T, int64>::value && text.size_codepoints() > 19)) {
     return false;
   }
   for (auto it = text.begin(); it != text.end(); ++it) {
     if (!IsDigit(*it)) {
       return false;
     }
   }
   return true;
 }

 }  // namespace libtextclassifier3

 #endif  // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//

	#ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_
	#define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_

	#include "utils/base/integral_types.h"
	#include "utils/utf8/unicodetext.h"

	namespace libtextclassifier3 {

	bool IsOpeningBracket(char32 codepoint);
	bool IsClosingBracket(char32 codepoint);
	bool IsWhitespace(char32 codepoint);
	bool IsBidirectional(char32 codepoint);
	bool IsDigit(char32 codepoint);
	bool IsLower(char32 codepoint);
	bool IsUpper(char32 codepoint);
	bool IsPunctuation(char32 codepoint);
	bool IsPercentage(char32 codepoint);
	bool IsSlash(char32 codepoint);
	bool IsMinus(char32 codepoint);
	bool IsNumberSign(char32 codepoint);
	bool IsDot(char32 codepoint);
	bool IsApostrophe(char32 codepoint);
	bool IsQuotation(char32 codepoint);
	bool IsAmpersand(char32 codepoint);

	bool IsLatinLetter(char32 codepoint);
	bool IsArabicLetter(char32 codepoint);
	bool IsCyrillicLetter(char32 codepoint);
	bool IsChineseLetter(char32 codepoint);
	bool IsJapaneseLetter(char32 codepoint);
	bool IsKoreanLetter(char32 codepoint);
	bool IsThaiLetter(char32 codepoint);
	bool IsLetter(char32 codepoint);
	bool IsCJTletter(char32 codepoint);

	char32 ToLower(char32 codepoint);
	char32 ToUpper(char32 codepoint);
	char32 GetPairedBracket(char32 codepoint);

	// Checks if the text format is not likely to be a number. Used to avoid most of
	// the java exceptions thrown when fail to parse.
	template <class T>
	bool PassesIntPreChesks(const UnicodeText& text, const T result) {
	if (text.empty() \|\|
	(std::is_same<T, int32>::value && text.size_codepoints() > 10) \|\|
	(std::is_same<T, int64>::value && text.size_codepoints() > 19)) {
	return false;
	}
	for (auto it = text.begin(); it != text.end(); ++it) {
	if (!IsDigit(*it)) {
	return false;
	}
	}
	return true;
	}

	} // namespace libtextclassifier3

	#endif // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_COMMON_H_