third_party/boost/include/boost/spirit/home/support/char_encoding/unicode/query.hpp - webm/webmlive - Git at Google

 /*=============================================================================
     Copyright (c) 2001-2011 Joel de Guzman

     Distributed under the Boost Software License, Version 1.0. (See accompanying
     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

     Autogenerated by MultiStageTable.py (Unicode multi-stage
     table builder) (c) Peter Kankowski, 2008
 ==============================================================================*/
 #if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
 #define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010

 #include <boost/cstdint.hpp>

 # include "category_table.hpp"
 # include "script_table.hpp"
 # include "lowercase_table.hpp"
 # include "uppercase_table.hpp"

 namespace boost { namespace spirit { namespace ucd
 {
     // This header provides Basic (Level 1) Unicode Support
     // See http://unicode.org/reports/tr18/ for details

     struct properties
     {
         // bit pattern: xxMMMCCC
         // MMM: major_category
         // CCC: category

         enum major_category
         {
             letter,
             mark,
             number,
             separator,
             other,
             punctuation,
             symbol
         };

         enum category
         {
             uppercase_letter = 0,   // [Lu] an uppercase letter
             lowercase_letter,       // [Ll] a lowercase letter
             titlecase_letter,       // [Lt] a digraphic character, with first part uppercase
             modifier_letter,        // [Lm] a modifier letter
             other_letter,           // [Lo] other letters, including syllables and ideographs

             nonspacing_mark = 8,    // [Mn] a nonspacing combining mark (zero advance width)
             enclosing_mark,         // [Me] an enclosing combining mark
             spacing_mark,           // [Mc] a spacing combining mark (positive advance width)

             decimal_number = 16,    // [Nd] a decimal digit
             letter_number,          // [Nl] a letterlike numeric character
             other_number,           // [No] a numeric character of other type

             space_separator = 24,   // [Zs] a space character (of various non-zero widths)
             line_separator,         // [Zl] U+2028 LINE SEPARATOR only
             paragraph_separator,    // [Zp] U+2029 PARAGRAPH SEPARATOR only

             control = 32,           // [Cc] a C0 or C1 control code
             format,                 // [Cf] a format control character
             private_use,            // [Co] a private-use character
             surrogate,              // [Cs] a surrogate code point
             unassigned,             // [Cn] a reserved unassigned code point or a noncharacter

             dash_punctuation = 40,  // [Pd] a dash or hyphen punctuation mark
             open_punctuation,       // [Ps] an opening punctuation mark (of a pair)
             close_punctuation,      // [Pe] a closing punctuation mark (of a pair)
             connector_punctuation,  // [Pc] a connecting punctuation mark, like a tie
             other_punctuation,      // [Po] a punctuation mark of other type
             initial_punctuation,    // [Pi] an initial quotation mark
             final_punctuation,      // [Pf] a final quotation mark

             math_symbol = 48,       // [Sm] a symbol of primarily mathematical use
             currency_symbol,        // [Sc] a currency sign
             modifier_symbol,        // [Sk] a non-letterlike modifier symbol
             other_symbol            // [So] a symbol of other type
         };

         enum derived_properties
         {
             alphabetic = 64,
             uppercase = 128,
             lowercase = 256,
             white_space = 512,
             hex_digit = 1024,
             noncharacter_code_point = 2048,
             default_ignorable_code_point = 4096
         };

         enum script
         {
             arabic = 0,
             imperial_aramaic = 1,
             armenian = 2,
             avestan = 3,
             balinese = 4,
             bamum = 5,
             bengali = 6,
             bopomofo = 7,
             braille = 8,
             buginese = 9,
             buhid = 10,
             canadian_aboriginal = 11,
             carian = 12,
             cham = 13,
             cherokee = 14,
             coptic = 15,
             cypriot = 16,
             cyrillic = 17,
             devanagari = 18,
             deseret = 19,
             egyptian_hieroglyphs = 20,
             ethiopic = 21,
             georgian = 22,
             glagolitic = 23,
             gothic = 24,
             greek = 25,
             gujarati = 26,
             gurmukhi = 27,
             hangul = 28,
             han = 29,
             hanunoo = 30,
             hebrew = 31,
             hiragana = 32,
             katakana_or_hiragana = 33,
             old_italic = 34,
             javanese = 35,
             kayah_li = 36,
             katakana = 37,
             kharoshthi = 38,
             khmer = 39,
             kannada = 40,
             kaithi = 41,
             tai_tham = 42,
             lao = 43,
             latin = 44,
             lepcha = 45,
             limbu = 46,
             linear_b = 47,
             lisu = 48,
             lycian = 49,
             lydian = 50,
             malayalam = 51,
             mongolian = 52,
             meetei_mayek = 53,
             myanmar = 54,
             nko = 55,
             ogham = 56,
             ol_chiki = 57,
             old_turkic = 58,
             oriya = 59,
             osmanya = 60,
             phags_pa = 61,
             inscriptional_pahlavi = 62,
             phoenician = 63,
             inscriptional_parthian = 64,
             rejang = 65,
             runic = 66,
             samaritan = 67,
             old_south_arabian = 68,
             saurashtra = 69,
             shavian = 70,
             sinhala = 71,
             sundanese = 72,
             syloti_nagri = 73,
             syriac = 74,
             tagbanwa = 75,
             tai_le = 76,
             new_tai_lue = 77,
             tamil = 78,
             tai_viet = 79,
             telugu = 80,
             tifinagh = 81,
             tagalog = 82,
             thaana = 83,
             thai = 84,
             tibetan = 85,
             ugaritic = 86,
             vai = 87,
             old_persian = 88,
             cuneiform = 89,
             yi = 90,
             inherited = 91,
             common = 92,
             unknown = 93
         };
     };

     inline properties::category get_category(::boost::uint32_t ch)
     {
         return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
     }

     inline properties::major_category get_major_category(::boost::uint32_t ch)
     {
         return static_cast<properties::major_category>(get_category(ch) >> 3);
     }

     inline bool is_punctuation(::boost::uint32_t ch)
     {
         return get_major_category(ch) == properties::punctuation;
     }

     inline bool is_decimal_number(::boost::uint32_t ch)
     {
         return get_category(ch) == properties::decimal_number;
     }

     inline bool is_hex_digit(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::hex_digit) != 0;
     }

     inline bool is_control(::boost::uint32_t ch)
     {
         return get_category(ch) == properties::control;
     }

     inline bool is_alphabetic(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::alphabetic) != 0;
     }

     inline bool is_alphanumeric(::boost::uint32_t ch)
     {
         return is_decimal_number(ch) || is_alphabetic(ch);
     }

     inline bool is_uppercase(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::uppercase) != 0;
     }

     inline bool is_lowercase(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::lowercase) != 0;
     }

     inline bool is_white_space(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::white_space) != 0;
     }

     inline bool is_blank(::boost::uint32_t ch)
     {
         switch (ch)
         {
             case '\n': case '\v': case '\f': case '\r':
                 return false;
             default:
                 return is_white_space(ch)
                 && !(   get_category(ch) == properties::line_separator
                     ||  get_category(ch) == properties::paragraph_separator
                     );
         }
     }

     inline bool is_graph(::boost::uint32_t ch)
     {
         return !(   is_white_space(ch)
                 ||  get_category(ch) == properties::control
                 ||  get_category(ch) == properties::surrogate
                 ||  get_category(ch) == properties::unassigned
                 );
     }

     inline bool is_print(::boost::uint32_t ch)
     {
         return (is_graph(ch) || is_blank(ch)) && !is_control(ch);
     }

     inline bool is_noncharacter_code_point(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
     }

     inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
     {
         return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
     }

     inline properties::script get_script(::boost::uint32_t ch)
     {
         return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F);
     }

     inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
     {
         // The table returns 0 to signal that this code maps to itself
         ::boost::uint32_t r = detail::lowercase_lookup(ch);
         return (r == 0)? ch : r;
     }

     inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
     {
         // The table returns 0 to signal that this code maps to itself
         ::boost::uint32_t r = detail::uppercase_lookup(ch);
         return (r == 0)? ch : r;
     }
 }}}

 #endif
	/*=============================================================================
	Copyright (c) 2001-2011 Joel de Guzman

	Distributed under the Boost Software License, Version 1.0. (See accompanying
	file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

	Autogenerated by MultiStageTable.py (Unicode multi-stage
	table builder) (c) Peter Kankowski, 2008
	==============================================================================*/
	#if !defined(BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010)
	#define BOOST_SPIRIT_UNICODE_QUERY_FEBRUARY_2_2010

	#include <boost/cstdint.hpp>

	# include "category_table.hpp"
	# include "script_table.hpp"
	# include "lowercase_table.hpp"
	# include "uppercase_table.hpp"

	namespace boost { namespace spirit { namespace ucd
	{
	// This header provides Basic (Level 1) Unicode Support
	// See http://unicode.org/reports/tr18/ for details

	struct properties
	{
	// bit pattern: xxMMMCCC
	// MMM: major_category
	// CCC: category

	enum major_category
	{
	letter,
	mark,
	number,
	separator,
	other,
	punctuation,
	symbol
	};

	enum category
	{
	uppercase_letter = 0, // [Lu] an uppercase letter
	lowercase_letter, // [Ll] a lowercase letter
	titlecase_letter, // [Lt] a digraphic character, with first part uppercase
	modifier_letter, // [Lm] a modifier letter
	other_letter, // [Lo] other letters, including syllables and ideographs

	nonspacing_mark = 8, // [Mn] a nonspacing combining mark (zero advance width)
	enclosing_mark, // [Me] an enclosing combining mark
	spacing_mark, // [Mc] a spacing combining mark (positive advance width)

	decimal_number = 16, // [Nd] a decimal digit
	letter_number, // [Nl] a letterlike numeric character
	other_number, // [No] a numeric character of other type

	space_separator = 24, // [Zs] a space character (of various non-zero widths)
	line_separator, // [Zl] U+2028 LINE SEPARATOR only
	paragraph_separator, // [Zp] U+2029 PARAGRAPH SEPARATOR only

	control = 32, // [Cc] a C0 or C1 control code
	format, // [Cf] a format control character
	private_use, // [Co] a private-use character
	surrogate, // [Cs] a surrogate code point
	unassigned, // [Cn] a reserved unassigned code point or a noncharacter

	dash_punctuation = 40, // [Pd] a dash or hyphen punctuation mark
	open_punctuation, // [Ps] an opening punctuation mark (of a pair)
	close_punctuation, // [Pe] a closing punctuation mark (of a pair)
	connector_punctuation, // [Pc] a connecting punctuation mark, like a tie
	other_punctuation, // [Po] a punctuation mark of other type
	initial_punctuation, // [Pi] an initial quotation mark
	final_punctuation, // [Pf] a final quotation mark

	math_symbol = 48, // [Sm] a symbol of primarily mathematical use
	currency_symbol, // [Sc] a currency sign
	modifier_symbol, // [Sk] a non-letterlike modifier symbol
	other_symbol // [So] a symbol of other type
	};

	enum derived_properties
	{
	alphabetic = 64,
	uppercase = 128,
	lowercase = 256,
	white_space = 512,
	hex_digit = 1024,
	noncharacter_code_point = 2048,
	default_ignorable_code_point = 4096
	};

	enum script
	{
	arabic = 0,
	imperial_aramaic = 1,
	armenian = 2,
	avestan = 3,
	balinese = 4,
	bamum = 5,
	bengali = 6,
	bopomofo = 7,
	braille = 8,
	buginese = 9,
	buhid = 10,
	canadian_aboriginal = 11,
	carian = 12,
	cham = 13,
	cherokee = 14,
	coptic = 15,
	cypriot = 16,
	cyrillic = 17,
	devanagari = 18,
	deseret = 19,
	egyptian_hieroglyphs = 20,
	ethiopic = 21,
	georgian = 22,
	glagolitic = 23,
	gothic = 24,
	greek = 25,
	gujarati = 26,
	gurmukhi = 27,
	hangul = 28,
	han = 29,
	hanunoo = 30,
	hebrew = 31,
	hiragana = 32,
	katakana_or_hiragana = 33,
	old_italic = 34,
	javanese = 35,
	kayah_li = 36,
	katakana = 37,
	kharoshthi = 38,
	khmer = 39,
	kannada = 40,
	kaithi = 41,
	tai_tham = 42,
	lao = 43,
	latin = 44,
	lepcha = 45,
	limbu = 46,
	linear_b = 47,
	lisu = 48,
	lycian = 49,
	lydian = 50,
	malayalam = 51,
	mongolian = 52,
	meetei_mayek = 53,
	myanmar = 54,
	nko = 55,
	ogham = 56,
	ol_chiki = 57,
	old_turkic = 58,
	oriya = 59,
	osmanya = 60,
	phags_pa = 61,
	inscriptional_pahlavi = 62,
	phoenician = 63,
	inscriptional_parthian = 64,
	rejang = 65,
	runic = 66,
	samaritan = 67,
	old_south_arabian = 68,
	saurashtra = 69,
	shavian = 70,
	sinhala = 71,
	sundanese = 72,
	syloti_nagri = 73,
	syriac = 74,
	tagbanwa = 75,
	tai_le = 76,
	new_tai_lue = 77,
	tamil = 78,
	tai_viet = 79,
	telugu = 80,
	tifinagh = 81,
	tagalog = 82,
	thaana = 83,
	thai = 84,
	tibetan = 85,
	ugaritic = 86,
	vai = 87,
	old_persian = 88,
	cuneiform = 89,
	yi = 90,
	inherited = 91,
	common = 92,
	unknown = 93
	};
	};

	inline properties::category get_category(::boost::uint32_t ch)
	{
	return static_cast<properties::category>(detail::category_lookup(ch) & 0x3F);
	}

	inline properties::major_category get_major_category(::boost::uint32_t ch)
	{
	return static_cast<properties::major_category>(get_category(ch) >> 3);
	}

	inline bool is_punctuation(::boost::uint32_t ch)
	{
	return get_major_category(ch) == properties::punctuation;
	}

	inline bool is_decimal_number(::boost::uint32_t ch)
	{
	return get_category(ch) == properties::decimal_number;
	}

	inline bool is_hex_digit(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::hex_digit) != 0;
	}

	inline bool is_control(::boost::uint32_t ch)
	{
	return get_category(ch) == properties::control;
	}

	inline bool is_alphabetic(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::alphabetic) != 0;
	}

	inline bool is_alphanumeric(::boost::uint32_t ch)
	{
	return is_decimal_number(ch) \|\| is_alphabetic(ch);
	}

	inline bool is_uppercase(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::uppercase) != 0;
	}

	inline bool is_lowercase(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::lowercase) != 0;
	}

	inline bool is_white_space(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::white_space) != 0;
	}

	inline bool is_blank(::boost::uint32_t ch)
	{
	switch (ch)
	{
	case '\n': case '\v': case '\f': case '\r':
	return false;
	default:
	return is_white_space(ch)
	&& !( get_category(ch) == properties::line_separator
	\|\| get_category(ch) == properties::paragraph_separator
	);
	}
	}

	inline bool is_graph(::boost::uint32_t ch)
	{
	return !( is_white_space(ch)
	\|\| get_category(ch) == properties::control
	\|\| get_category(ch) == properties::surrogate
	\|\| get_category(ch) == properties::unassigned
	);
	}

	inline bool is_print(::boost::uint32_t ch)
	{
	return (is_graph(ch) \|\| is_blank(ch)) && !is_control(ch);
	}

	inline bool is_noncharacter_code_point(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::noncharacter_code_point) != 0;
	}

	inline bool is_default_ignorable_code_point(::boost::uint32_t ch)
	{
	return (detail::category_lookup(ch) & properties::default_ignorable_code_point) != 0;
	}

	inline properties::script get_script(::boost::uint32_t ch)
	{
	return static_cast<properties::script>(detail::script_lookup(ch) & 0x3F);
	}

	inline ::boost::uint32_t to_lowercase(::boost::uint32_t ch)
	{
	// The table returns 0 to signal that this code maps to itself
	::boost::uint32_t r = detail::lowercase_lookup(ch);
	return (r == 0)? ch : r;
	}

	inline ::boost::uint32_t to_uppercase(::boost::uint32_t ch)
	{
	// The table returns 0 to signal that this code maps to itself
	::boost::uint32_t r = detail::uppercase_lookup(ch);
	return (r == 0)? ch : r;
	}
	}}}

	#endif