// Copyright (c) 2001-2011 Hartmut Kaiser | |
// | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM) | |
#define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM | |
#if defined(_MSC_VER) | |
#pragma once | |
#endif | |
#include <boost/detail/iterator.hpp> | |
#include <boost/spirit/home/support/detail/lexer/state_machine.hpp> | |
#include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
#include <boost/spirit/home/support/detail/lexer/size_t.hpp> | |
#include <boost/spirit/home/support/detail/lexer/char_traits.hpp> | |
#include <vector> | |
namespace boost { namespace spirit { namespace lex { namespace lexertl | |
{ | |
/////////////////////////////////////////////////////////////////////////// | |
template<typename Iterator> | |
class basic_iterator_tokeniser | |
{ | |
public: | |
typedef std::vector<std::size_t> size_t_vector; | |
typedef typename boost::detail::iterator_traits<Iterator>::value_type | |
char_type; | |
static std::size_t next ( | |
boost::lexer::basic_state_machine<char_type> const& state_machine_ | |
, std::size_t &dfa_state_, bool& bol_, Iterator &start_token_ | |
, Iterator const& end_, std::size_t& unique_id_) | |
{ | |
if (start_token_ == end_) | |
{ | |
unique_id_ = boost::lexer::npos; | |
return 0; | |
} | |
bool bol = bol_; | |
again: | |
std::size_t const* lookup_ = &state_machine_.data()._lookup[dfa_state_]-> | |
front (); | |
std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[dfa_state_]; | |
std::size_t const* dfa_ = &state_machine_.data()._dfa[dfa_state_]->front (); | |
std::size_t const* ptr_ = dfa_ + dfa_alphabet_; | |
Iterator curr_ = start_token_; | |
bool end_state_ = *ptr_ != 0; | |
std::size_t id_ = *(ptr_ + boost::lexer::id_index); | |
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
std::size_t end_start_state_ = dfa_state_; | |
bool end_bol_ = bol_; | |
Iterator end_token_ = start_token_; | |
while (curr_ != end_) | |
{ | |
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; | |
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
if (BOL_state_ && bol) | |
{ | |
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; | |
} | |
else if (EOL_state_ && *curr_ == '\n') | |
{ | |
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
} | |
else | |
{ | |
typedef typename | |
boost::detail::iterator_traits<Iterator>::value_type | |
value_type; | |
typedef typename | |
boost::lexer::char_traits<value_type>::index_type | |
index_type; | |
index_type index = | |
boost::lexer::char_traits<value_type>::call(*curr_++); | |
bol = (index == '\n') ? true : false; | |
std::size_t const state_ = ptr_[ | |
lookup_[static_cast<std::size_t>(index)]]; | |
if (state_ == 0) | |
{ | |
break; | |
} | |
ptr_ = &dfa_[state_ * dfa_alphabet_]; | |
} | |
if (*ptr_) | |
{ | |
end_state_ = true; | |
id_ = *(ptr_ + boost::lexer::id_index); | |
uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
end_start_state_ = *(ptr_ + boost::lexer::state_index); | |
end_bol_ = bol; | |
end_token_ = curr_; | |
} | |
} | |
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
if (EOL_state_ && curr_ == end_) | |
{ | |
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
if (*ptr_) | |
{ | |
end_state_ = true; | |
id_ = *(ptr_ + boost::lexer::id_index); | |
uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
end_start_state_ = *(ptr_ + boost::lexer::state_index); | |
end_bol_ = bol; | |
end_token_ = curr_; | |
} | |
} | |
if (end_state_) { | |
// return longest match | |
dfa_state_ = end_start_state_; | |
start_token_ = end_token_; | |
if (id_ == 0) | |
{ | |
bol = end_bol_; | |
goto again; | |
} | |
else | |
{ | |
bol_ = end_bol_; | |
} | |
} | |
else { | |
bol_ = (*start_token_ == '\n') ? true : false; | |
id_ = boost::lexer::npos; | |
uid_ = boost::lexer::npos; | |
} | |
unique_id_ = uid_; | |
return id_; | |
} | |
/////////////////////////////////////////////////////////////////////// | |
static std::size_t next ( | |
boost::lexer::basic_state_machine<char_type> const& state_machine_ | |
, bool& bol_, Iterator &start_token_, Iterator const& end_ | |
, std::size_t& unique_id_) | |
{ | |
if (start_token_ == end_) | |
{ | |
unique_id_ = boost::lexer::npos; | |
return 0; | |
} | |
bool bol = bol_; | |
std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front(); | |
std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0]; | |
std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front (); | |
std::size_t const* ptr_ = dfa_ + dfa_alphabet_; | |
Iterator curr_ = start_token_; | |
bool end_state_ = *ptr_ != 0; | |
std::size_t id_ = *(ptr_ + boost::lexer::id_index); | |
std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
bool end_bol_ = bol_; | |
Iterator end_token_ = start_token_; | |
while (curr_ != end_) | |
{ | |
std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index]; | |
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
if (BOL_state_ && bol) | |
{ | |
ptr_ = &dfa_[BOL_state_ * dfa_alphabet_]; | |
} | |
else if (EOL_state_ && *curr_ == '\n') | |
{ | |
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
} | |
else | |
{ | |
typedef typename | |
boost::detail::iterator_traits<Iterator>::value_type | |
value_type; | |
typedef typename | |
boost::lexer::char_traits<value_type>::index_type | |
index_type; | |
index_type index = | |
boost::lexer::char_traits<value_type>::call(*curr_++); | |
bol = (index == '\n') ? true : false; | |
std::size_t const state_ = ptr_[ | |
lookup_[static_cast<std::size_t>(index)]]; | |
if (state_ == 0) | |
{ | |
break; | |
} | |
ptr_ = &dfa_[state_ * dfa_alphabet_]; | |
} | |
if (*ptr_) | |
{ | |
end_state_ = true; | |
id_ = *(ptr_ + boost::lexer::id_index); | |
uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
end_bol_ = bol; | |
end_token_ = curr_; | |
} | |
} | |
std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index]; | |
if (EOL_state_ && curr_ == end_) | |
{ | |
ptr_ = &dfa_[EOL_state_ * dfa_alphabet_]; | |
if (*ptr_) | |
{ | |
end_state_ = true; | |
id_ = *(ptr_ + boost::lexer::id_index); | |
uid_ = *(ptr_ + boost::lexer::unique_id_index); | |
end_bol_ = bol; | |
end_token_ = curr_; | |
} | |
} | |
if (end_state_) { | |
// return longest match | |
bol_ = end_bol_; | |
start_token_ = end_token_; | |
} | |
else { | |
bol_ = *start_token_ == '\n'; | |
id_ = boost::lexer::npos; | |
uid_ = boost::lexer::npos; | |
} | |
unique_id_ = uid_; | |
return id_; | |
} | |
}; | |
}}}} | |
#endif |