blob: 3f1c62b88ade4cb0588ba386bbfd7936d14b5e7c [file] [log] [blame]
// state_machine.hpp
// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_LEXER_STATE_MACHINE_HPP
#define BOOST_LEXER_STATE_MACHINE_HPP
#include <algorithm>
#include "conversion/char_state_machine.hpp"
#include "consts.hpp"
#include <deque>
#include "internals.hpp"
#include <map>
#include "containers/ptr_vector.hpp"
#include "size_t.hpp"
#include <string>
namespace boost
{
namespace lexer
{
template<typename CharT>
class basic_state_machine
{
public:
typedef CharT char_type;
class iterator
{
public:
#if defined _MSC_VER && _MSC_VER <= 1200
friend basic_state_machine;
#else
friend class basic_state_machine;
#endif
struct data
{
// Current iterator info
std::size_t dfa;
std::size_t states;
std::size_t state;
std::size_t transitions;
std::size_t transition;
// Current state info
bool end_state;
std::size_t id;
std::size_t unique_id;
std::size_t goto_dfa;
std::size_t bol_index;
std::size_t eol_index;
// Current transition info
basic_string_token<CharT> token;
std::size_t goto_state;
data () :
dfa (npos),
states (0),
state (npos),
transitions (0),
transition (npos),
end_state (false),
id (npos),
unique_id (npos),
goto_dfa (npos),
bol_index (npos),
eol_index (npos),
goto_state (npos)
{
}
bool operator == (const data &rhs_) const
{
return dfa == rhs_.dfa &&
states == rhs_.states &&
state == rhs_.state &&
transitions == rhs_.transitions &&
transition == rhs_.transition &&
end_state == rhs_.end_state &&
id == rhs_.id &&
unique_id == rhs_.unique_id &&
goto_dfa == rhs_.goto_dfa &&
bol_index == rhs_.bol_index &&
eol_index == rhs_.eol_index &&
token == rhs_.token &&
transition == rhs_.transition;
}
};
iterator () :
_sm (0),
_dfas (0),
_dfa (npos),
_states (0),
_state (npos),
_transitions (0),
_transition (npos)
{
}
bool operator == (const iterator &rhs_) const
{
return _dfas == rhs_._dfas && _dfa == rhs_._dfa &&
_states == rhs_._states && _state == rhs_._state &&
_transitions == rhs_._transitions &&
_transition == rhs_._transition;
}
bool operator != (const iterator &rhs_) const
{
return !(*this == rhs_);
}
data &operator * ()
{
return _data;
}
data *operator -> ()
{
return &_data;
}
// Let compiler generate operator = ().
// prefix version
iterator &operator ++ ()
{
next ();
return *this;
}
// postfix version
iterator operator ++ (int)
{
iterator iter_ = *this;
next ();
return iter_;
}
void clear ()
{
_dfas = _states = _transitions = 0;
_dfa = _state = _transition = npos;
}
private:
basic_state_machine *_sm;
data _data;
std::size_t _dfas;
std::size_t _dfa;
std::size_t _states;
std::size_t _state;
std::size_t _transitions;
std::size_t _transition;
typename detail::basic_char_state_machine<CharT>::state::
size_t_string_token_map::const_iterator _token_iter;
typename detail::basic_char_state_machine<CharT>::state::
size_t_string_token_map::const_iterator _token_end;
void next ()
{
bool reset_state_ = false;
if (_transition >= _transitions)
{
_transition = _data.transition = 0;
_data.state = ++_state;
reset_state_ = true;
if (_state >= _states)
{
++_dfa;
if (_dfa >= _dfas)
{
clear ();
reset_state_ = false;
}
else
{
_states = _data.states =
_sm->_csm._sm_vector[_dfa].size ();
_state = _data.state = 0;
}
}
}
else
{
_data.transition = _transition;
}
if (reset_state_)
{
const typename detail::basic_char_state_machine<CharT>::
state *ptr_ = &_sm->_csm._sm_vector[_dfa][_state];
_transitions = _data.transitions = ptr_->_transitions.size ();
_data.end_state = ptr_->_end_state;
_data.id = ptr_->_id;
_data.unique_id = ptr_->_unique_id;
_data.goto_dfa = ptr_->_state;
_data.bol_index = ptr_->_bol_index;
_data.eol_index = ptr_->_eol_index;
_token_iter = ptr_->_transitions.begin ();
_token_end = ptr_->_transitions.end ();
}
if (_token_iter != _token_end)
{
_data.token = _token_iter->second;
_data.goto_state = _token_iter->first;
++_token_iter;
++_transition;
}
else
{
_data.token.clear ();
_data.goto_state = npos;
}
}
};
#if defined _MSC_VER && _MSC_VER <= 1200
friend iterator;
#else
friend class iterator;
#endif
basic_state_machine ()
{
}
void clear ()
{
_internals.clear ();
_csm.clear ();
}
bool empty () const
{
// Don't include _csm in this test, as irrelevant to state.
return _internals._lookup->empty () &&
_internals._dfa_alphabet.empty () &&
_internals._dfa->empty ();
}
std::size_t size () const
{
return _internals._dfa->size ();
}
bool operator == (const basic_state_machine &rhs_) const
{
// Don't include _csm in this test, as irrelevant to state.
return _internals._lookup == rhs_._internals._lookup &&
_internals._dfa_alphabet == rhs_._internals._dfa_alphabet &&
_internals._dfa == rhs_._internals._dfa &&
_internals._seen_BOL_assertion ==
rhs_._internals._seen_BOL_assertion &&
_internals._seen_EOL_assertion ==
rhs_._internals._seen_EOL_assertion;
}
iterator begin () const
{
iterator iter_;
iter_._sm = const_cast<basic_state_machine *>(this);
check_for_csm ();
if (!_csm.empty ())
{
const typename detail::basic_char_state_machine<CharT>::
state_vector *ptr_ = &_csm._sm_vector.front ();
iter_._dfas = _csm._sm_vector.size ();
iter_._states = iter_._data.states = ptr_->size ();
iter_._transitions = iter_._data.transitions =
ptr_->front ()._transitions.size ();
iter_._dfa = iter_._data.dfa = 0;
iter_._state = iter_._data.state = 0;
iter_._transition = 0;
iter_._data.end_state = ptr_->front ()._end_state;
iter_._data.id = ptr_->front ()._id;
iter_._data.unique_id = ptr_->front ()._unique_id;
iter_._data.goto_dfa = ptr_->front ()._state;
iter_._data.bol_index = ptr_->front ()._bol_index;
iter_._data.eol_index = ptr_->front ()._eol_index;
iter_._token_iter = ptr_->front ()._transitions.begin ();
iter_._token_end = ptr_->front ()._transitions.end ();
// Deal with case where there is only a bol or eol
// but no other transitions.
if (iter_._transitions)
{
++iter_;
}
}
return iter_;
}
iterator end () const
{
iterator iter_;
iter_._sm = const_cast<basic_state_machine *>(this);
return iter_;
}
void swap (basic_state_machine &sm_)
{
_internals.swap (sm_._internals);
_csm.swap (sm_._csm);
}
const detail::internals &data () const
{
return _internals;
}
private:
detail::internals _internals;
mutable detail::basic_char_state_machine<CharT> _csm;
void check_for_csm () const
{
if (_csm.empty ())
{
human_readable (_csm);
}
}
void human_readable (detail::basic_char_state_machine<CharT> &sm_) const
{
const std::size_t max_ = sizeof (CharT) == 1 ?
num_chars : num_wchar_ts;
const std::size_t start_states_ = _internals._dfa->size ();
sm_.clear ();
sm_._sm_vector.resize (start_states_);
for (std::size_t start_state_index_ = 0;
start_state_index_ < start_states_; ++start_state_index_)
{
const detail::internals::size_t_vector *lu_ =
_internals._lookup[start_state_index_];
const std::size_t alphabet_ =
_internals._dfa_alphabet[start_state_index_] - dfa_offset;
std::vector<std::basic_string<CharT> > chars_ (alphabet_);
const std::size_t states_ = _internals._dfa[start_state_index_]->
size () / (alphabet_ + dfa_offset);
const std::size_t *read_ptr_ = &_internals.
_dfa[start_state_index_]->front () + alphabet_ + dfa_offset;
sm_._sm_vector[start_state_index_].resize (states_ - 1);
for (std::size_t alpha_index_ = 0; alpha_index_ < max_;
++alpha_index_)
{
const std::size_t col_ = lu_->at (alpha_index_);
if (col_ != dead_state_index)
{
chars_[col_ - dfa_offset] += static_cast<CharT>
(alpha_index_);
}
}
for (std::size_t state_index_ = 1; state_index_ < states_;
++state_index_)
{
typename detail::basic_char_state_machine<CharT>::state
*state_ = &sm_._sm_vector[start_state_index_]
[state_index_ - 1];
state_->_end_state = *read_ptr_ != 0;
state_->_id = *(read_ptr_ + id_index);
state_->_unique_id = *(read_ptr_ + unique_id_index);
state_->_state = *(read_ptr_ + state_index);
state_->_bol_index = *(read_ptr_ + bol_index) - 1;
state_->_eol_index = *(read_ptr_ + eol_index) - 1;
read_ptr_ += dfa_offset;
for (std::size_t col_index_ = 0; col_index_ < alphabet_;
++col_index_, ++read_ptr_)
{
const std::size_t transition_ = *read_ptr_;
if (transition_ != 0)
{
const std::size_t i_ = transition_ - 1;
typename detail::basic_char_state_machine<CharT>::
state::size_t_string_token_map::iterator iter_ =
state_->_transitions.find (i_);
if (iter_ == state_->_transitions.end ())
{
basic_string_token<CharT> token_
(false, chars_[col_index_]);
typename detail::basic_char_state_machine<CharT>::
state::size_t_string_token_pair pair_
(i_, token_);
state_->_transitions.insert (pair_);
}
else
{
iter_->second._charset += chars_[col_index_];
}
}
}
for (typename detail::basic_char_state_machine<CharT>::state::
size_t_string_token_map::iterator iter_ =
state_->_transitions.begin (),
end_ = state_->_transitions.end ();
iter_ != end_; ++iter_)
{
std::sort (iter_->second._charset.begin (),
iter_->second._charset.end ());
iter_->second.normalise ();
}
}
}
}
};
typedef basic_state_machine<char> state_machine;
typedef basic_state_machine<wchar_t> wstate_machine;
}
}
#endif