/*============================================================================= | |
Copyright (c) 2001-2011 Joel de Guzman | |
Distributed under the Boost Software License, Version 1.0. (See accompanying | |
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
Autogenerated by MultiStageTable.py (Unicode multi-stage | |
table builder) (c) Peter Kankowski, 2008 | |
==============================================================================*/ | |
#if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010) | |
#define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010 | |
#include <boost/cstdint.hpp> | |
# include "category_table.hpp" | |
# include "script_table.hpp" | |
# include "lowercase_table.hpp" | |
# include "uppercase_table.hpp" | |
namespace boost { namespace spirit { namespace ucd | |
{ | |
// This header provides Basic (Level 1) Unicode Support | |
// See http://unicode.org/reports/tr18/ for details | |
struct properties | |
{ | |
// bit pattern: xxMMMCCC | |
// MMM: major_category | |
// CCC: category | |
enum major_category | |
{ | |
letter, | |
mark, | |
number, | |
separator, | |
other, | |
punctuation, | |
symbol | |
}; | |
enum category | |
{ | |
uppercase_letter = 0, // [Lu] an uppercase letter | |
lowercase_letter, // [Ll] a lowercase letter | |
titlecase_letter, // [Lt] a digraphic character, with first part uppercase | |
modifier_letter, // [Lm] a modifier letter | |
other_letter, // [Lo] other letters, including syllables and ideographs | |
nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width) | |
enclosing_mark, // [Me] an enclosing combining mark | |
spacing_mark, // [Mc] a spacing combining mark (positive advance width) | |
decimal_number = 16, // [Nd] a decimal digit | |
letter_number, // [Nl] a letterlike numeric character | |
other_number, // [No] a numeric character of other type | |
space_separator = 24, // [Zs] a space character (of various non-zero widths) | |
line_separator, // [Zl] U+2028 LINE SEPARATOR only | |
paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only | |
control = 32, // [Cc] a C0 or C1 control code | |
format, // [Cf] a format control character | |
private_use, // [Co] a private-use character | |
surrogate, // [Cs] a surrogate code point | |
unassigned, // [Cn] a reserved unassigned code point or a noncharacter | |
dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark | |
open_punctuation, // [Ps] an opening punctuation mark (of a pair) | |
close_punctuation, // [Pe] a closing punctuation mark (of a pair) | |
connector_punctuation, // [Pc] a connecting punctuation mark, like a tie | |
other_punctuation, // [Po] a punctuation mark of other type | |
initial_punctuation, // [Pi] an initial quotation mark | |
final_punctuation, // [Pf] a final quotation mark | |
math_symbol = 48, // [Sm] a symbol of primarily mathematical use | |
currency_symbol, // [Sc] a currency sign | |
modifier_symbol, // [Sk] a non-letterlike modifier symbol | |
other_symbol // [So] a symbol of other type | |
}; | |
enum derived_properties | |
{ | |
alphabetic = 64, | |
uppercase = 128, | |
lowercase = 256, | |
white_space = 512, | |
hex_digit = 1024, | |
noncharacter_code_point = 2048, | |
default_ignorable_code_point = 4096 | |
}; | |
enum script | |
{ | |
arabic = 0, | |
imperial_aramaic = 1, | |
armenian = 2, | |
avestan = 3, | |
balinese = 4, | |
bamum = 5, | |
bengali = 6, | |
bopomofo = 7, | |
braille = 8, | |
buginese = 9, | |
buhid = 10, | |
canadian_aboriginal = 11, | |
carian = 12, | |
cham = 13, | |
cherokee = 14, | |
coptic = 15, | |
cypriot = 16, | |
cyrillic = 17, | |
devanagari = 18, | |
deseret = 19, | |
egyptian_hieroglyphs = 20, | |
ethiopic = 21, | |
georgian = 22, | |
glagolitic = 23, | |
gothic = 24, | |
greek = 25, | |
gujarati = 26, | |
gurmukhi = 27, | |
hangul = 28, | |
han = 29, | |
hanunoo = 30, | |
hebrew = 31, | |
hiragana = 32, | |
katakana_or_hiragana = 33, | |
old_italic = 34, | |
javanese = 35, | |
kayah_li = 36, | |
katakana = 37, | |
kharoshthi = 38, | |
khmer = 39, | |
kannada = 40, | |
kaithi = 41, | |
tai_tham = 42, | |
lao = 43, | |
latin = 44, | |
lepcha = 45, | |
limbu = 46, | |
linear_b = 47, | |
lisu = 48, | |
lycian = 49, | |
lydian = 50, | |
malayalam = 51, | |
mongolian = 52, | |
meetei_mayek = 53, | |
myanmar = 54, | |
nko = 55, | |
ogham = 56, | |
ol_chiki = 57, | |
old_turkic = 58, | |
oriya = 59, | |
osmanya = 60, | |
phags_pa = 61, | |
inscriptional_pahlavi = 62, | |
phoenician = 63, | |
inscriptional_parthian = 64, | |
rejang = 65, | |
runic = 66, | |
samaritan = 67, | |
old_south_arabian = 68, | |
saurashtra = 69, | |
shavian = 70, | |
sinhala = 71, | |
sundanese = 72, | |
syloti_nagri = 73, | |
syriac = 74, | |
tagbanwa = 75, | |
tai_le = 76, | |
new_tai_lue = 77, | |
tamil = 78, | |
tai_viet = 79, | |
telugu = 80, | |
tifinagh = 81, | |
tagalog = 82, | |
thaana = 83, | |
thai = 84, | |
tibetan = 85, | |
ugaritic = 86, | |
vai = 87, | |
old_persian = 88, | |
cuneiform = 89, | |
yi = 90, | |
inherited = 91, | |
common = 92, | |
unknown = 93 | |
}; | |
}; | |
inline properties::category get_category(::boost::uint32_t ch) | |
{ | |
return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F); | |
} | |
inline properties::major_category get_major_category(::boost::uint32_t ch) | |
{ | |
return static_cast<properties::major_category>(get_category(ch) >> 3); | |
} | |
inline bool is_punctuation(::boost::uint32_t ch) | |
{ | |
return get_major_category(ch) == properties::punctuation; | |
} | |
inline bool is_decimal_number(::boost::uint32_t ch) | |
{ | |
return get_category(ch) == properties::decimal_number; | |
} | |
inline bool is_hex_digit(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::hex_digit) != 0; | |
} | |
inline bool is_control(::boost::uint32_t ch) | |
{ | |
return get_category(ch) == properties::control; | |
} | |
inline bool is_alphabetic(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::alphabetic) != 0; | |
} | |
inline bool is_alphanumeric(::boost::uint32_t ch) | |
{ | |
return is_decimal_number(ch) || is_alphabetic(ch); | |
} | |
inline bool is_uppercase(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::uppercase) != 0; | |
} | |
inline bool is_lowercase(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::lowercase) != 0; | |
} | |
inline bool is_white_space(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::white_space) != 0; | |
} | |
inline bool is_blank(::boost::uint32_t ch) | |
{ | |
switch (ch) | |
{ | |
case '\n': case '\v': case '\f': case '\r': | |
return false; | |
default: | |
return is_white_space(ch) | |
&& !( get_category(ch) == properties::line_separator | |
|| get_category(ch) == properties::paragraph_separator | |
); | |
} | |
} | |
inline bool is_graph(::boost::uint32_t ch) | |
{ | |
return !( is_white_space(ch) | |
|| get_category(ch) == properties::control | |
|| get_category(ch) == properties::surrogate | |
|| get_category(ch) == properties::unassigned | |
); | |
} | |
inline bool is_print(::boost::uint32_t ch) | |
{ | |
return (is_graph(ch) || is_blank(ch)) && !is_control(ch); | |
} | |
inline bool is_noncharacter_code_point(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0; | |
} | |
inline bool is_default_ignorable_code_point(::boost::uint32_t ch) | |
{ | |
return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0; | |
} | |
inline properties::script get_script(::boost::uint32_t ch) | |
{ | |
return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F); | |
} | |
inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch) | |
{ | |
// The table returns 0 to signal that this code maps to itself | |
::boost::uint32_t r = detail::lowercase_lookup(ch); | |
return (r == 0)? ch : r; | |
} | |
inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch) | |
{ | |
// The table returns 0 to signal that this code maps to itself | |
::boost::uint32_t r = detail::uppercase_lookup(ch); | |
return (r == 0)? ch : r; | |
} | |
}}} | |
#endif |