third_party/boost/include/boost/spirit/home/support/detail/lexer/parser/tokeniser/re_tokeniser_helper.hpp - webm/webmlive - Git at Google

 // tokeniser_helper.hpp
 // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
 //
 // Distributed under the Boost Software License, Version 1.0. (See accompanying
 // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 #ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
 #define BOOST_LEXER_RE_TOKENISER_HELPER_H

 #include "../../char_traits.hpp"
 // strlen()
 #include <cstring>
 #include "../../size_t.hpp"
 #include "re_tokeniser_state.hpp"

 namespace boost
 {
 namespace lexer
 {
 namespace detail
 {
 template<typename CharT, typename Traits = char_traits<CharT> >
 class basic_re_tokeniser_helper
 {
 public:
     typedef basic_re_tokeniser_state<CharT> state;
     typedef std::basic_string<CharT> string;

     static const CharT *escape_sequence (state &state_, CharT &ch_,
         std::size_t &str_len_)
     {
         bool eos_ = state_.eos ();

         if (eos_)
         {
             throw runtime_error ("Unexpected end of regex "
                 "following '\\'.");
         }

         const CharT *str_ = charset_shortcut (*state_._curr, str_len_);

         if (str_)
         {
             state_.increment ();
         }
         else
         {
             ch_ = chr (state_);
         }

         return str_;
     }

     // This function can call itself.
     static void charset (state &state_, string &chars_, bool &negated_)
     {
         CharT ch_ = 0;
         bool eos_ = state_.next (ch_);

         if (eos_)
         {
             // Pointless returning index if at end of string
             throw runtime_error ("Unexpected end of regex "
                 "following '['.");
         }

         negated_ = ch_ == '^';

         if (negated_)
         {
             eos_ = state_.next (ch_);

             if (eos_)
             {
                 // Pointless returning index if at end of string
                 throw runtime_error ("Unexpected end of regex "
                     "following '^'.");
             }
         }

         bool chset_ = false;
         CharT prev_ = 0;

         while (ch_ != ']')
         {
             if (ch_ == '\\')
             {
                 std::size_t str_len_ = 0;
                 const CharT *str_ = escape_sequence (state_, prev_, str_len_);

                 chset_ = str_ != 0;

                 if (chset_)
                 {
                     state temp_state_ (str_ + 1, str_ + str_len_,
                         state_._flags, state_._locale);
                     string temp_chars_;
                     bool temp_negated_ = false;

                     charset (temp_state_, temp_chars_, temp_negated_);

                     if (negated_ != temp_negated_)
                     {
                         std::ostringstream ss_;

                         ss_ << "Mismatch in charset negation preceding "
                             "index " << state_.index () << '.';
                         throw runtime_error (ss_.str ().c_str ());
                     }

                     chars_ += temp_chars_;
                 }
             }
 /*
             else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
             {
                 // TODO: POSIX charsets
             }
 */
             else
             {
                 chset_ = false;
                 prev_ = ch_;
             }

             eos_ = state_.next (ch_);

             // Covers preceding if, else if and else
             if (eos_)
             {
                 // Pointless returning index if at end of string
                 throw runtime_error ("Unexpected end of regex "
                     "(missing ']').");
             }

             if (ch_ == '-')
             {
                 charset_range (chset_, state_, eos_, ch_, prev_, chars_);
             }
             else if (!chset_)
             {
                 if ((state_._flags & icase) &&
                     (std::isupper (prev_, state_._locale) ||
                     std::islower (prev_, state_._locale)))
                 {
                     CharT upper_ = std::toupper (prev_, state_._locale);
                     CharT lower_ = std::tolower (prev_, state_._locale);

                     chars_ += upper_;
                     chars_ += lower_;
                 }
                 else
                 {
                     chars_ += prev_;
                 }
             }
         }

         if (!negated_ && chars_.empty ())
         {
             throw runtime_error ("Empty charsets not allowed.");
         }
     }

     static CharT chr (state &state_)
     {
         CharT ch_ = 0;

         // eos_ has already been checked for.
         switch (*state_._curr)
         {
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
                 ch_ = decode_octal (state_);
                 break;
             case 'a':
                 ch_ = '\a';
                 state_.increment ();
                 break;
             case 'b':
                 ch_ = '\b';
                 state_.increment ();
                 break;
             case 'c':
                 ch_ = decode_control_char (state_);
                 break;
             case 'e':
                 ch_ = 27; // '\e' not recognised by compiler
                 state_.increment ();
                 break;
             case 'f':
                 ch_ = '\f';
                 state_.increment ();
                 break;
             case 'n':
                 ch_ = '\n';
                 state_.increment ();
                 break;
             case 'r':
                 ch_ = '\r';
                 state_.increment ();
                 break;
             case 't':
                 ch_ = '\t';
                 state_.increment ();
                 break;
             case 'v':
                 ch_ = '\v';
                 state_.increment ();
                 break;
             case 'x':
                 ch_ = decode_hex (state_);
                 break;
             default:
                 ch_ = *state_._curr;
                 state_.increment ();
                 break;
         }

         return ch_;
     }

 private:
     static const char *charset_shortcut (const char ch_,
         std::size_t &str_len_)
     {
         const char *str_ = 0;

         switch (ch_)
         {
         case 'd':
             str_ = "[0-9]";
             break;
         case 'D':
             str_ = "[^0-9]";
             break;
         case 's':
             str_ = "[ \t\n\r\f\v]";
             break;
         case 'S':
             str_ = "[^ \t\n\r\f\v]";
             break;
         case 'w':
             str_ = "[_0-9A-Za-z]";
             break;
         case 'W':
             str_ = "[^_0-9A-Za-z]";
             break;
         }

         if (str_)
         {
             // Some systems have strlen in namespace std.
             using namespace std;

             str_len_ = strlen (str_);
         }
         else
         {
             str_len_ = 0;
         }

         return str_;
     }

     static const wchar_t *charset_shortcut (const wchar_t ch_,
         std::size_t &str_len_)
     {
         const wchar_t *str_ = 0;

         switch (ch_)
         {
         case 'd':
             str_ = L"[0-9]";
             break;
         case 'D':
             str_ = L"[^0-9]";
             break;
         case 's':
             str_ = L"[ \t\n\r\f\v]";
             break;
         case 'S':
             str_ = L"[^ \t\n\r\f\v]";
             break;
         case 'w':
             str_ = L"[_0-9A-Za-z]";
             break;
         case 'W':
             str_ = L"[^_0-9A-Za-z]";
             break;
         }

         if (str_)
         {
             // Some systems have wcslen in namespace std.
             using namespace std;

             str_len_ = wcslen (str_);
         }
         else
         {
             str_len_ = 0;
         }

         return str_;
     }

     static CharT decode_octal (state &state_)
     {
         std::size_t accumulator_ = 0;
         CharT ch_ = *state_._curr;
         unsigned short count_ = 3;
         bool eos_ = false;

         for (;;)
         {
             accumulator_ *= 8;
             accumulator_ += ch_ - '0';
             --count_;
             state_.increment ();
             eos_ = state_.eos ();

             if (!count_ || eos_) break;

             ch_ = *state_._curr;

             // Don't consume invalid chars!
             if (ch_ < '0' || ch_ > '7')
             {
                 break;
             }
         }

         return static_cast<CharT> (accumulator_);
     }

     static CharT decode_control_char (state &state_)
     {
         // Skip over 'c'
         state_.increment ();

         CharT ch_ = 0;
         bool eos_ = state_.next (ch_);

         if (eos_)
         {
             // Pointless returning index if at end of string
             throw runtime_error ("Unexpected end of regex following \\c.");
         }
         else
         {
             if (ch_ >= 'a' && ch_ <= 'z')
             {
                 ch_ -= 'a' - 1;
             }
             else if (ch_ >= 'A' && ch_ <= 'Z')
             {
                 ch_ -= 'A' - 1;
             }
             else if (ch_ == '@')
             {
                 // Apparently...
                 ch_ = 0;
             }
             else
             {
                 std::ostringstream ss_;

                 ss_ << "Invalid control char at index " <<
                     state_.index () - 1 << '.';
                 throw runtime_error (ss_.str ().c_str ());
             }
         }

         return ch_;
     }

     static CharT decode_hex (state &state_)
     {
         // Skip over 'x'
         state_.increment ();

         CharT ch_ = 0;
         bool eos_ = state_.next (ch_);

         if (eos_)
         {
             // Pointless returning index if at end of string
             throw runtime_error ("Unexpected end of regex following \\x.");
         }

         if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') ||
             (ch_ >= 'A' && ch_ <= 'F')))
         {
             std::ostringstream ss_;

             ss_ << "Illegal char following \\x at index " <<
                 state_.index () - 1 << '.';
             throw runtime_error (ss_.str ().c_str ());
         }

         std::size_t hex_ = 0;

         do
         {
             hex_ *= 16;

             if (ch_ >= '0' && ch_ <= '9')
             {
                 hex_ += ch_ - '0';
             }
             else if (ch_ >= 'a' && ch_ <= 'f')
             {
                 hex_ += 10 + (ch_ - 'a');
             }
             else
             {
                 hex_ += 10 + (ch_ - 'A');
             }

             eos_ = state_.eos ();

             if (!eos_)
             {
                 ch_ = *state_._curr;

                 // Don't consume invalid chars!
                 if (((ch_ >= '0' && ch_ <= '9') ||
                     (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F')))
                 {
                     state_.increment ();
                 }
                 else
                 {
                     eos_ = true;
                 }
             }
         } while (!eos_);

         return static_cast<CharT> (hex_);
     }

     static void charset_range (const bool chset_, state &state_, bool &eos_,
         CharT &ch_, const CharT prev_, string &chars_)
     {
         if (chset_)
         {
             std::ostringstream ss_;

             ss_ << "Charset cannot form start of range preceding "
                 "index " << state_.index () - 1 << '.';
             throw runtime_error (ss_.str ().c_str ());
         }

         eos_ = state_.next (ch_);

         if (eos_)
         {
             // Pointless returning index if at end of string
             throw runtime_error ("Unexpected end of regex "
                 "following '-'.");
         }

         CharT curr_ = 0;

         if (ch_ == '\\')
         {
             std::size_t str_len_ = 0;

             if (escape_sequence (state_, curr_, str_len_))
             {
                 std::ostringstream ss_;

                 ss_ << "Charset cannot form end of range preceding index "
                     << state_.index () << '.';
                 throw runtime_error (ss_.str ().c_str ());
             }
         }
 /*
         else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
         {
             std::ostringstream ss_;

             ss_ << "POSIX char class cannot form end of range at "
                 "index " << state_.index () - 1 << '.';
             throw runtime_error (ss_.str ().c_str ());
         }
 */
         else
         {
             curr_ = ch_;
         }

         eos_ = state_.next (ch_);

         // Covers preceding if and else
         if (eos_)
         {
             // Pointless returning index if at end of string
             throw runtime_error ("Unexpected end of regex "
                 "(missing ']').");
         }

         std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
         std::size_t end_ = static_cast<typename Traits::index_type> (curr_);

         // Semanic check
         if (end_ < start_)
         {
             std::ostringstream ss_;

             ss_ << "Invalid range in charset preceding index " <<
                 state_.index () - 1 << '.';
             throw runtime_error (ss_.str ().c_str ());
         }

         chars_.reserve (chars_.size () + (end_ + 1 - start_));

         for (; start_ <= end_; ++start_)
         {
             CharT ch_ = static_cast<CharT> (start_);

             if ((state_._flags & icase) &&
                 (std::isupper (ch_, state_._locale) ||
                 std::islower (ch_, state_._locale)))
             {
                 CharT upper_ = std::toupper (ch_, state_._locale);
                 CharT lower_ = std::tolower (ch_, state_._locale);

                 chars_ += (upper_);
                 chars_ += (lower_);
             }
             else
             {
                 chars_ += (ch_);
             }
         }
     }
 };
 }
 }
 }

 #endif
	// tokeniser_helper.hpp
	// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
	//
	// Distributed under the Boost Software License, Version 1.0. (See accompanying
	// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	#ifndef BOOST_LEXER_RE_TOKENISER_HELPER_H
	#define BOOST_LEXER_RE_TOKENISER_HELPER_H

	#include "../../char_traits.hpp"
	// strlen()
	#include <cstring>
	#include "../../size_t.hpp"
	#include "re_tokeniser_state.hpp"

	namespace boost
	{
	namespace lexer
	{
	namespace detail
	{
	template<typename CharT, typename Traits = char_traits<CharT> >
	class basic_re_tokeniser_helper
	{
	public:
	typedef basic_re_tokeniser_state<CharT> state;
	typedef std::basic_string<CharT> string;

	static const CharT *escape_sequence (state &state_, CharT &ch_,
	std::size_t &str_len_)
	{
	bool eos_ = state_.eos ();

	if (eos_)
	{
	throw runtime_error ("Unexpected end of regex "
	"following '\\'.");
	}

	const CharT str_ = charset_shortcut (state_._curr, str_len_);

	if (str_)
	{
	state_.increment ();
	}
	else
	{
	ch_ = chr (state_);
	}

	return str_;
	}

	// This function can call itself.
	static void charset (state &state_, string &chars_, bool &negated_)
	{
	CharT ch_ = 0;
	bool eos_ = state_.next (ch_);

	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex "
	"following '['.");
	}

	negated_ = ch_ == '^';

	if (negated_)
	{
	eos_ = state_.next (ch_);

	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex "
	"following '^'.");
	}
	}

	bool chset_ = false;
	CharT prev_ = 0;

	while (ch_ != ']')
	{
	if (ch_ == '\\')
	{
	std::size_t str_len_ = 0;
	const CharT *str_ = escape_sequence (state_, prev_, str_len_);

	chset_ = str_ != 0;

	if (chset_)
	{
	state temp_state_ (str_ + 1, str_ + str_len_,
	state_._flags, state_._locale);
	string temp_chars_;
	bool temp_negated_ = false;

	charset (temp_state_, temp_chars_, temp_negated_);

	if (negated_ != temp_negated_)
	{
	std::ostringstream ss_;

	ss_ << "Mismatch in charset negation preceding "
	"index " << state_.index () << '.';
	throw runtime_error (ss_.str ().c_str ());
	}

	chars_ += temp_chars_;
	}
	}
	/*
	else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
	{
	// TODO: POSIX charsets
	}
	*/
	else
	{
	chset_ = false;
	prev_ = ch_;
	}

	eos_ = state_.next (ch_);

	// Covers preceding if, else if and else
	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex "
	"(missing ']').");
	}

	if (ch_ == '-')
	{
	charset_range (chset_, state_, eos_, ch_, prev_, chars_);
	}
	else if (!chset_)
	{
	if ((state_._flags & icase) &&
	(std::isupper (prev_, state_._locale) \|\|
	std::islower (prev_, state_._locale)))
	{
	CharT upper_ = std::toupper (prev_, state_._locale);
	CharT lower_ = std::tolower (prev_, state_._locale);

	chars_ += upper_;
	chars_ += lower_;
	}
	else
	{
	chars_ += prev_;
	}
	}
	}

	if (!negated_ && chars_.empty ())
	{
	throw runtime_error ("Empty charsets not allowed.");
	}
	}

	static CharT chr (state &state_)
	{
	CharT ch_ = 0;

	// eos_ has already been checked for.
	switch (*state_._curr)
	{
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	ch_ = decode_octal (state_);
	break;
	case 'a':
	ch_ = '\a';
	state_.increment ();
	break;
	case 'b':
	ch_ = '\b';
	state_.increment ();
	break;
	case 'c':
	ch_ = decode_control_char (state_);
	break;
	case 'e':
	ch_ = 27; // '\e' not recognised by compiler
	state_.increment ();
	break;
	case 'f':
	ch_ = '\f';
	state_.increment ();
	break;
	case 'n':
	ch_ = '\n';
	state_.increment ();
	break;
	case 'r':
	ch_ = '\r';
	state_.increment ();
	break;
	case 't':
	ch_ = '\t';
	state_.increment ();
	break;
	case 'v':
	ch_ = '\v';
	state_.increment ();
	break;
	case 'x':
	ch_ = decode_hex (state_);
	break;
	default:
	ch_ = *state_._curr;
	state_.increment ();
	break;
	}

	return ch_;
	}

	private:
	static const char *charset_shortcut (const char ch_,
	std::size_t &str_len_)
	{
	const char *str_ = 0;

	switch (ch_)
	{
	case 'd':
	str_ = "[0-9]";
	break;
	case 'D':
	str_ = "[^0-9]";
	break;
	case 's':
	str_ = "[ \t\n\r\f\v]";
	break;
	case 'S':
	str_ = "[^ \t\n\r\f\v]";
	break;
	case 'w':
	str_ = "[_0-9A-Za-z]";
	break;
	case 'W':
	str_ = "[^_0-9A-Za-z]";
	break;
	}

	if (str_)
	{
	// Some systems have strlen in namespace std.
	using namespace std;

	str_len_ = strlen (str_);
	}
	else
	{
	str_len_ = 0;
	}

	return str_;
	}

	static const wchar_t *charset_shortcut (const wchar_t ch_,
	std::size_t &str_len_)
	{
	const wchar_t *str_ = 0;

	switch (ch_)
	{
	case 'd':
	str_ = L"[0-9]";
	break;
	case 'D':
	str_ = L"[^0-9]";
	break;
	case 's':
	str_ = L"[ \t\n\r\f\v]";
	break;
	case 'S':
	str_ = L"[^ \t\n\r\f\v]";
	break;
	case 'w':
	str_ = L"[_0-9A-Za-z]";
	break;
	case 'W':
	str_ = L"[^_0-9A-Za-z]";
	break;
	}

	if (str_)
	{
	// Some systems have wcslen in namespace std.
	using namespace std;

	str_len_ = wcslen (str_);
	}
	else
	{
	str_len_ = 0;
	}

	return str_;
	}

	static CharT decode_octal (state &state_)
	{
	std::size_t accumulator_ = 0;
	CharT ch_ = *state_._curr;
	unsigned short count_ = 3;
	bool eos_ = false;

	for (;;)
	{
	accumulator_ *= 8;
	accumulator_ += ch_ - '0';
	--count_;
	state_.increment ();
	eos_ = state_.eos ();

	if (!count_ \|\| eos_) break;

	ch_ = *state_._curr;

	// Don't consume invalid chars!
	if (ch_ < '0' \|\| ch_ > '7')
	{
	break;
	}
	}

	return static_cast<CharT> (accumulator_);
	}

	static CharT decode_control_char (state &state_)
	{
	// Skip over 'c'
	state_.increment ();

	CharT ch_ = 0;
	bool eos_ = state_.next (ch_);

	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex following \\c.");
	}
	else
	{
	if (ch_ >= 'a' && ch_ <= 'z')
	{
	ch_ -= 'a' - 1;
	}
	else if (ch_ >= 'A' && ch_ <= 'Z')
	{
	ch_ -= 'A' - 1;
	}
	else if (ch_ == '@')
	{
	// Apparently...
	ch_ = 0;
	}
	else
	{
	std::ostringstream ss_;

	ss_ << "Invalid control char at index " <<
	state_.index () - 1 << '.';
	throw runtime_error (ss_.str ().c_str ());
	}
	}

	return ch_;
	}

	static CharT decode_hex (state &state_)
	{
	// Skip over 'x'
	state_.increment ();

	CharT ch_ = 0;
	bool eos_ = state_.next (ch_);

	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex following \\x.");
	}

	if (!((ch_ >= '0' && ch_ <= '9') \|\| (ch_ >= 'a' && ch_ <= 'f') \|\|
	(ch_ >= 'A' && ch_ <= 'F')))
	{
	std::ostringstream ss_;

	ss_ << "Illegal char following \\x at index " <<
	state_.index () - 1 << '.';
	throw runtime_error (ss_.str ().c_str ());
	}

	std::size_t hex_ = 0;

	do
	{
	hex_ *= 16;

	if (ch_ >= '0' && ch_ <= '9')
	{
	hex_ += ch_ - '0';
	}
	else if (ch_ >= 'a' && ch_ <= 'f')
	{
	hex_ += 10 + (ch_ - 'a');
	}
	else
	{
	hex_ += 10 + (ch_ - 'A');
	}

	eos_ = state_.eos ();

	if (!eos_)
	{
	ch_ = *state_._curr;

	// Don't consume invalid chars!
	if (((ch_ >= '0' && ch_ <= '9') \|\|
	(ch_ >= 'a' && ch_ <= 'f') \|\| (ch_ >= 'A' && ch_ <= 'F')))
	{
	state_.increment ();
	}
	else
	{
	eos_ = true;
	}
	}
	} while (!eos_);

	return static_cast<CharT> (hex_);
	}

	static void charset_range (const bool chset_, state &state_, bool &eos_,
	CharT &ch_, const CharT prev_, string &chars_)
	{
	if (chset_)
	{
	std::ostringstream ss_;

	ss_ << "Charset cannot form start of range preceding "
	"index " << state_.index () - 1 << '.';
	throw runtime_error (ss_.str ().c_str ());
	}

	eos_ = state_.next (ch_);

	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex "
	"following '-'.");
	}

	CharT curr_ = 0;

	if (ch_ == '\\')
	{
	std::size_t str_len_ = 0;

	if (escape_sequence (state_, curr_, str_len_))
	{
	std::ostringstream ss_;

	ss_ << "Charset cannot form end of range preceding index "
	<< state_.index () << '.';
	throw runtime_error (ss_.str ().c_str ());
	}
	}
	/*
	else if (ch_ == '[' && !state_.eos () && *state_._curr == ':')
	{
	std::ostringstream ss_;

	ss_ << "POSIX char class cannot form end of range at "
	"index " << state_.index () - 1 << '.';
	throw runtime_error (ss_.str ().c_str ());
	}
	*/
	else
	{
	curr_ = ch_;
	}

	eos_ = state_.next (ch_);

	// Covers preceding if and else
	if (eos_)
	{
	// Pointless returning index if at end of string
	throw runtime_error ("Unexpected end of regex "
	"(missing ']').");
	}

	std::size_t start_ = static_cast<typename Traits::index_type> (prev_);
	std::size_t end_ = static_cast<typename Traits::index_type> (curr_);

	// Semanic check
	if (end_ < start_)
	{
	std::ostringstream ss_;

	ss_ << "Invalid range in charset preceding index " <<
	state_.index () - 1 << '.';
	throw runtime_error (ss_.str ().c_str ());
	}

	chars_.reserve (chars_.size () + (end_ + 1 - start_));

	for (; start_ <= end_; ++start_)
	{
	CharT ch_ = static_cast<CharT> (start_);

	if ((state_._flags & icase) &&
	(std::isupper (ch_, state_._locale) \|\|
	std::islower (ch_, state_._locale)))
	{
	CharT upper_ = std::toupper (ch_, state_._locale);
	CharT lower_ = std::tolower (ch_, state_._locale);

	chars_ += (upper_);
	chars_ += (lower_);
	}
	else
	{
	chars_ += (ch_);
	}
	}
	}
	};
	}
	}
	}

	#endif