// state_machine.hpp | |
// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/) | |
// | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#ifndef BOOST_LEXER_STATE_MACHINE_HPP | |
#define BOOST_LEXER_STATE_MACHINE_HPP | |
#include <algorithm> | |
#include "conversion/char_state_machine.hpp" | |
#include "consts.hpp" | |
#include <deque> | |
#include "internals.hpp" | |
#include <map> | |
#include "containers/ptr_vector.hpp" | |
#include "size_t.hpp" | |
#include <string> | |
namespace boost | |
{ | |
namespace lexer | |
{ | |
template<typename CharT> | |
class basic_state_machine | |
{ | |
public: | |
typedef CharT char_type; | |
class iterator | |
{ | |
public: | |
#if defined _MSC_VER && _MSC_VER <= 1200 | |
friend basic_state_machine; | |
#else | |
friend class basic_state_machine; | |
#endif | |
struct data | |
{ | |
// Current iterator info | |
std::size_t dfa; | |
std::size_t states; | |
std::size_t state; | |
std::size_t transitions; | |
std::size_t transition; | |
// Current state info | |
bool end_state; | |
std::size_t id; | |
std::size_t unique_id; | |
std::size_t goto_dfa; | |
std::size_t bol_index; | |
std::size_t eol_index; | |
// Current transition info | |
basic_string_token<CharT> token; | |
std::size_t goto_state; | |
data () : | |
dfa (npos), | |
states (0), | |
state (npos), | |
transitions (0), | |
transition (npos), | |
end_state (false), | |
id (npos), | |
unique_id (npos), | |
goto_dfa (npos), | |
bol_index (npos), | |
eol_index (npos), | |
goto_state (npos) | |
{ | |
} | |
bool operator == (const data &rhs_) const | |
{ | |
return dfa == rhs_.dfa && | |
states == rhs_.states && | |
state == rhs_.state && | |
transitions == rhs_.transitions && | |
transition == rhs_.transition && | |
end_state == rhs_.end_state && | |
id == rhs_.id && | |
unique_id == rhs_.unique_id && | |
goto_dfa == rhs_.goto_dfa && | |
bol_index == rhs_.bol_index && | |
eol_index == rhs_.eol_index && | |
token == rhs_.token && | |
transition == rhs_.transition; | |
} | |
}; | |
iterator () : | |
_sm (0), | |
_dfas (0), | |
_dfa (npos), | |
_states (0), | |
_state (npos), | |
_transitions (0), | |
_transition (npos) | |
{ | |
} | |
bool operator == (const iterator &rhs_) const | |
{ | |
return _dfas == rhs_._dfas && _dfa == rhs_._dfa && | |
_states == rhs_._states && _state == rhs_._state && | |
_transitions == rhs_._transitions && | |
_transition == rhs_._transition; | |
} | |
bool operator != (const iterator &rhs_) const | |
{ | |
return !(*this == rhs_); | |
} | |
data &operator * () | |
{ | |
return _data; | |
} | |
data *operator -> () | |
{ | |
return &_data; | |
} | |
// Let compiler generate operator = (). | |
// prefix version | |
iterator &operator ++ () | |
{ | |
next (); | |
return *this; | |
} | |
// postfix version | |
iterator operator ++ (int) | |
{ | |
iterator iter_ = *this; | |
next (); | |
return iter_; | |
} | |
void clear () | |
{ | |
_dfas = _states = _transitions = 0; | |
_dfa = _state = _transition = npos; | |
} | |
private: | |
basic_state_machine *_sm; | |
data _data; | |
std::size_t _dfas; | |
std::size_t _dfa; | |
std::size_t _states; | |
std::size_t _state; | |
std::size_t _transitions; | |
std::size_t _transition; | |
typename detail::basic_char_state_machine<CharT>::state:: | |
size_t_string_token_map::const_iterator _token_iter; | |
typename detail::basic_char_state_machine<CharT>::state:: | |
size_t_string_token_map::const_iterator _token_end; | |
void next () | |
{ | |
bool reset_state_ = false; | |
if (_transition >= _transitions) | |
{ | |
_transition = _data.transition = 0; | |
_data.state = ++_state; | |
reset_state_ = true; | |
if (_state >= _states) | |
{ | |
++_dfa; | |
if (_dfa >= _dfas) | |
{ | |
clear (); | |
reset_state_ = false; | |
} | |
else | |
{ | |
_states = _data.states = | |
_sm->_csm._sm_vector[_dfa].size (); | |
_state = _data.state = 0; | |
} | |
} | |
} | |
else | |
{ | |
_data.transition = _transition; | |
} | |
if (reset_state_) | |
{ | |
const typename detail::basic_char_state_machine<CharT>:: | |
state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state]; | |
_transitions = _data.transitions = ptr_->_transitions.size (); | |
_data.end_state = ptr_->_end_state; | |
_data.id = ptr_->_id; | |
_data.unique_id = ptr_->_unique_id; | |
_data.goto_dfa = ptr_->_state; | |
_data.bol_index = ptr_->_bol_index; | |
_data.eol_index = ptr_->_eol_index; | |
_token_iter = ptr_->_transitions.begin (); | |
_token_end = ptr_->_transitions.end (); | |
} | |
if (_token_iter != _token_end) | |
{ | |
_data.token = _token_iter->second; | |
_data.goto_state = _token_iter->first; | |
++_token_iter; | |
++_transition; | |
} | |
else | |
{ | |
_data.token.clear (); | |
_data.goto_state = npos; | |
} | |
} | |
}; | |
#if defined _MSC_VER && _MSC_VER <= 1200 | |
friend iterator; | |
#else | |
friend class iterator; | |
#endif | |
basic_state_machine () | |
{ | |
} | |
void clear () | |
{ | |
_internals.clear (); | |
_csm.clear (); | |
} | |
bool empty () const | |
{ | |
// Don't include _csm in this test, as irrelevant to state. | |
return _internals._lookup->empty () && | |
_internals._dfa_alphabet.empty () && | |
_internals._dfa->empty (); | |
} | |
std::size_t size () const | |
{ | |
return _internals._dfa->size (); | |
} | |
bool operator == (const basic_state_machine &rhs_) const | |
{ | |
// Don't include _csm in this test, as irrelevant to state. | |
return _internals._lookup == rhs_._internals._lookup && | |
_internals._dfa_alphabet == rhs_._internals._dfa_alphabet && | |
_internals._dfa == rhs_._internals._dfa && | |
_internals._seen_BOL_assertion == | |
rhs_._internals._seen_BOL_assertion && | |
_internals._seen_EOL_assertion == | |
rhs_._internals._seen_EOL_assertion; | |
} | |
iterator begin () const | |
{ | |
iterator iter_; | |
iter_._sm = const_cast<basic_state_machine *>(this); | |
check_for_csm (); | |
if (!_csm.empty ()) | |
{ | |
const typename detail::basic_char_state_machine<CharT>:: | |
state_vector *ptr_ = &_csm._sm_vector.front (); | |
iter_._dfas = _csm._sm_vector.size (); | |
iter_._states = iter_._data.states = ptr_->size (); | |
iter_._transitions = iter_._data.transitions = | |
ptr_->front ()._transitions.size (); | |
iter_._dfa = iter_._data.dfa = 0; | |
iter_._state = iter_._data.state = 0; | |
iter_._transition = 0; | |
iter_._data.end_state = ptr_->front ()._end_state; | |
iter_._data.id = ptr_->front ()._id; | |
iter_._data.unique_id = ptr_->front ()._unique_id; | |
iter_._data.goto_dfa = ptr_->front ()._state; | |
iter_._data.bol_index = ptr_->front ()._bol_index; | |
iter_._data.eol_index = ptr_->front ()._eol_index; | |
iter_._token_iter = ptr_->front ()._transitions.begin (); | |
iter_._token_end = ptr_->front ()._transitions.end (); | |
// Deal with case where there is only a bol or eol | |
// but no other transitions. | |
if (iter_._transitions) | |
{ | |
++iter_; | |
} | |
} | |
return iter_; | |
} | |
iterator end () const | |
{ | |
iterator iter_; | |
iter_._sm = const_cast<basic_state_machine *>(this); | |
return iter_; | |
} | |
void swap (basic_state_machine &sm_) | |
{ | |
_internals.swap (sm_._internals); | |
_csm.swap (sm_._csm); | |
} | |
const detail::internals &data () const | |
{ | |
return _internals; | |
} | |
private: | |
detail::internals _internals; | |
mutable detail::basic_char_state_machine<CharT> _csm; | |
void check_for_csm () const | |
{ | |
if (_csm.empty ()) | |
{ | |
human_readable (_csm); | |
} | |
} | |
void human_readable (detail::basic_char_state_machine<CharT> &sm_) const | |
{ | |
const std::size_t max_ = sizeof (CharT) == 1 ? | |
num_chars : num_wchar_ts; | |
const std::size_t start_states_ = _internals._dfa->size (); | |
sm_.clear (); | |
sm_._sm_vector.resize (start_states_); | |
for (std::size_t start_state_index_ = 0; | |
start_state_index_ < start_states_; ++start_state_index_) | |
{ | |
const detail::internals::size_t_vector *lu_ = | |
_internals._lookup[start_state_index_]; | |
const std::size_t alphabet_ = | |
_internals._dfa_alphabet[start_state_index_] - dfa_offset; | |
std::vector<std::basic_string<CharT> > chars_ (alphabet_); | |
const std::size_t states_ = _internals._dfa[start_state_index_]-> | |
size () / (alphabet_ + dfa_offset); | |
const std::size_t *read_ptr_ = &_internals. | |
_dfa[start_state_index_]->front () + alphabet_ + dfa_offset; | |
sm_._sm_vector[start_state_index_].resize (states_ - 1); | |
for (std::size_t alpha_index_ = 0; alpha_index_ < max_; | |
++alpha_index_) | |
{ | |
const std::size_t col_ = lu_->at (alpha_index_); | |
if (col_ != dead_state_index) | |
{ | |
chars_[col_ - dfa_offset] += static_cast<CharT> | |
(alpha_index_); | |
} | |
} | |
for (std::size_t state_index_ = 1; state_index_ < states_; | |
++state_index_) | |
{ | |
typename detail::basic_char_state_machine<CharT>::state | |
*state_ = &sm_._sm_vector[start_state_index_] | |
[state_index_ - 1]; | |
state_->_end_state = *read_ptr_ != 0; | |
state_->_id = *(read_ptr_ + id_index); | |
state_->_unique_id = *(read_ptr_ + unique_id_index); | |
state_->_state = *(read_ptr_ + state_index); | |
state_->_bol_index = *(read_ptr_ + bol_index) - 1; | |
state_->_eol_index = *(read_ptr_ + eol_index) - 1; | |
read_ptr_ += dfa_offset; | |
for (std::size_t col_index_ = 0; col_index_ < alphabet_; | |
++col_index_, ++read_ptr_) | |
{ | |
const std::size_t transition_ = *read_ptr_; | |
if (transition_ != 0) | |
{ | |
const std::size_t i_ = transition_ - 1; | |
typename detail::basic_char_state_machine<CharT>:: | |
state::size_t_string_token_map::iterator iter_ = | |
state_->_transitions.find (i_); | |
if (iter_ == state_->_transitions.end ()) | |
{ | |
basic_string_token<CharT> token_ | |
(false, chars_[col_index_]); | |
typename detail::basic_char_state_machine<CharT>:: | |
state::size_t_string_token_pair pair_ | |
(i_, token_); | |
state_->_transitions.insert (pair_); | |
} | |
else | |
{ | |
iter_->second._charset += chars_[col_index_]; | |
} | |
} | |
} | |
for (typename detail::basic_char_state_machine<CharT>::state:: | |
size_t_string_token_map::iterator iter_ = | |
state_->_transitions.begin (), | |
end_ = state_->_transitions.end (); | |
iter_ != end_; ++iter_) | |
{ | |
std::sort (iter_->second._charset.begin (), | |
iter_->second._charset.end ()); | |
iter_->second.normalise (); | |
} | |
} | |
} | |
} | |
}; | |
typedef basic_state_machine<char> state_machine; | |
typedef basic_state_machine<wchar_t> wstate_machine; | |
} | |
} | |
#endif |