blob: b80176f98657bc1044da42dcd4e6e612a49a3ef8 [file] [log] [blame]
// Copyright (c) 2001-2011 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM)
#define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/support/detail/lexer/generator.hpp>
#include <boost/spirit/home/support/detail/lexer/rules.hpp>
#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
#include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
#include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
#include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
#include <boost/mpl/bool.hpp>
namespace boost { namespace spirit { namespace lex { namespace lexertl
{
namespace detail
{
///////////////////////////////////////////////////////////////////////
template <typename Iterator, typename HasActors, typename HasState
, typename TokenValue>
class data; // no default specialization
///////////////////////////////////////////////////////////////////////
// neither supports state, nor actors
template <typename Iterator, typename TokenValue>
class data<Iterator, mpl::false_, mpl::false_, TokenValue>
{
protected:
typedef typename
boost::detail::iterator_traits<Iterator>::value_type
char_type;
public:
typedef Iterator base_iterator_type;
typedef iterator_range<Iterator> token_value_type;
typedef token_value_type get_value_type;
typedef std::size_t state_type;
typedef char_type const* state_name_type;
typedef unused_type semantic_actions_type;
typedef detail::wrap_action<unused_type, Iterator, data, std::size_t>
wrap_action_type;
typedef unused_type next_token_functor;
typedef unused_type get_state_name_type;
// initialize the shared data
template <typename IterData>
data (IterData const& data_, Iterator& first, Iterator const& last)
: first_(first), last_(last)
, state_machine_(data_.state_machine_)
, rules_(data_.rules_)
, bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
// The following functions are used by the implementation of the
// placeholder '_state'.
template <typename Char>
void set_state_name (Char const*)
{
// some (random) versions of gcc instantiate this function even if it's not
// needed leading to false static asserts
#if !defined(__GNUC__)
// If you see a compile time assertion below you're probably
// using a token type not supporting lexer states (the 3rd
// template parameter of the token is mpl::false_), but your
// code uses state changes anyways.
BOOST_STATIC_ASSERT(false);
#endif
}
char_type const* get_state_name() const { return rules_.initial(); }
std::size_t get_state_id (char_type const*) const
{
return 0;
}
// The function get_eoi() is used by the implementation of the
// placeholder '_eoi'.
Iterator const& get_eoi() const { return last_; }
// The function less() is used by the implementation of the support
// function lex::less(). Its functionality is equivalent to flex'
// function yyless(): it returns an iterator positioned to the
// nth input character beyond the current start iterator (i.e. by
// assigning the return value to the placeholder '_end' it is
// possible to return all but the first n characters of the current
// token back to the input stream.
//
// This function does nothing as long as no semantic actions are
// used.
Iterator const& less(Iterator const& it, int n)
{
// The following assertion fires most likely because you are
// using lexer semantic actions without using the actor_lexer
// as the base class for your token definition class.
BOOST_ASSERT(false &&
"Are you using lexer semantic actions without using the "
"actor_lexer base?");
return it;
}
// The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
// the current token value rather than replacing it.
//
// These functions do nothing as long as no semantic actions are
// used.
void more()
{
// The following assertion fires most likely because you are
// using lexer semantic actions without using the actor_lexer
// as the base class for your token definition class.
BOOST_ASSERT(false &&
"Are you using lexer semantic actions without using the "
"actor_lexer base?");
}
bool adjust_start() { return false; }
void revert_adjust_start() {}
// The function lookahead() is used by the implementation of the
// support function lex::lookahead. It can be used to implement
// lookahead for lexer engines not supporting constructs like flex'
// a/b (match a, but only when followed by b):
//
// This function does nothing as long as no semantic actions are
// used.
bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
{
// The following assertion fires most likely because you are
// using lexer semantic actions without using the actor_lexer
// as the base class for your token definition class.
BOOST_ASSERT(false &&
"Are you using lexer semantic actions without using the "
"actor_lexer base?");
return false;
}
// the functions next, invoke_actions, and get_state are used by
// the functor implementation below
// The function next() tries to match the next token from the
// underlying input sequence.
std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
{
prev_bol = bol_;
typedef basic_iterator_tokeniser<Iterator> tokenizer;
return tokenizer::next(state_machine_, bol_, end, last_
, unique_id);
}
// nothing to invoke, so this is empty
BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
, std::size_t, std::size_t, Iterator const&)
{
return pass_flags::pass_normal; // always accept
}
std::size_t get_state() const { return 0; }
void set_state(std::size_t) {}
Iterator& get_first() { return first_; }
Iterator const& get_first() const { return first_; }
Iterator const& get_last() const { return last_; }
iterator_range<Iterator> get_value() const
{
return iterator_range<Iterator>(first_, last_);
}
bool has_value() const { return false; }
void reset_value() {}
void reset_bol(bool bol) { bol_ = bol; }
protected:
Iterator& first_;
Iterator last_;
boost::lexer::basic_state_machine<char_type> const& state_machine_;
boost::lexer::basic_rules<char_type> const& rules_;
bool bol_; // helper storing whether last character was \n
private:
// silence MSVC warning C4512: assignment operator could not be generated
data& operator= (data const&);
};
///////////////////////////////////////////////////////////////////////
// doesn't support lexer semantic actions, but supports state
template <typename Iterator, typename TokenValue>
class data<Iterator, mpl::false_, mpl::true_, TokenValue>
: public data<Iterator, mpl::false_, mpl::false_, TokenValue>
{
protected:
typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
typedef typename base_type::char_type char_type;
public:
typedef Iterator base_iterator_type;
typedef iterator_range<Iterator> token_value_type;
typedef token_value_type get_value_type;
typedef typename base_type::state_type state_type;
typedef typename base_type::state_name_type state_name_type;
typedef typename base_type::semantic_actions_type
semantic_actions_type;
// initialize the shared data
template <typename IterData>
data (IterData const& data_, Iterator& first, Iterator const& last)
: base_type(data_, first, last)
, state_(0) {}
// The following functions are used by the implementation of the
// placeholder '_state'.
void set_state_name (char_type const* new_state)
{
std::size_t state_id = this->rules_.state(new_state);
// If the following assertion fires you've probably been using
// a lexer state name which was not defined in your token
// definition.
BOOST_ASSERT(state_id != boost::lexer::npos);
if (state_id != boost::lexer::npos)
state_ = state_id;
}
char_type const* get_state_name() const
{
return this->rules_.state(state_);
}
std::size_t get_state_id (char_type const* state) const
{
return this->rules_.state(state);
}
// the functions next() and get_state() are used by the functor
// implementation below
// The function next() tries to match the next token from the
// underlying input sequence.
std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
{
prev_bol = this->bol_;
typedef basic_iterator_tokeniser<Iterator> tokenizer;
return tokenizer::next(this->state_machine_, state_,
this->bol_, end, this->get_eoi(), unique_id);
}
std::size_t& get_state() { return state_; }
void set_state(std::size_t state) { state_ = state; }
protected:
std::size_t state_;
private:
// silence MSVC warning C4512: assignment operator could not be generated
data& operator= (data const&);
};
///////////////////////////////////////////////////////////////////////
// does support lexer semantic actions, may support state
template <typename Iterator, typename HasState, typename TokenValue>
class data<Iterator, mpl::true_, HasState, TokenValue>
: public data<Iterator, mpl::false_, HasState, TokenValue>
{
public:
typedef semantic_actions<Iterator, HasState, data>
semantic_actions_type;
protected:
typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
typedef typename base_type::char_type char_type;
typedef typename semantic_actions_type::functor_wrapper_type
functor_wrapper_type;
public:
typedef Iterator base_iterator_type;
typedef TokenValue token_value_type;
typedef TokenValue const& get_value_type;
typedef typename base_type::state_type state_type;
typedef typename base_type::state_name_type state_name_type;
typedef detail::wrap_action<functor_wrapper_type
, Iterator, data, std::size_t> wrap_action_type;
template <typename IterData>
data (IterData const& data_, Iterator& first, Iterator const& last)
: base_type(data_, first, last)
, actions_(data_.actions_), hold_()
, value_(iterator_range<Iterator>(first, last))
, has_value_(false), has_hold_(false) {}
// invoke attached semantic actions, if defined
BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
, std::size_t& id, std::size_t unique_id, Iterator& end)
{
return actions_.invoke_actions(state, id, unique_id, end, *this);
}
// The function less() is used by the implementation of the support
// function lex::less(). Its functionality is equivalent to flex'
// function yyless(): it returns an iterator positioned to the
// nth input character beyond the current start iterator (i.e. by
// assigning the return value to the placeholder '_end' it is
// possible to return all but the first n characters of the current
// token back to the input stream).
Iterator const& less(Iterator& it, int n)
{
it = this->get_first();
std::advance(it, n);
return it;
}
// The function more() is used by the implementation of the support
// function lex::more(). Its functionality is equivalent to flex'
// function yymore(): it tells the lexer that the next time it
// matches a rule, the corresponding token should be appended onto
// the current token value rather than replacing it.
void more()
{
hold_ = this->get_first();
has_hold_ = true;
}
// The function lookahead() is used by the implementation of the
// support function lex::lookahead. It can be used to implement
// lookahead for lexer engines not supporting constructs like flex'
// a/b (match a, but only when followed by b)
bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
{
Iterator end = end_;
std::size_t unique_id = boost::lexer::npos;
bool bol = this->bol_;
if (std::size_t(~0) == state)
state = this->state_;
typedef basic_iterator_tokeniser<Iterator> tokenizer;
return id == tokenizer::next(this->state_machine_, state,
bol, end, this->get_eoi(), unique_id);
}
// The adjust_start() and revert_adjust_start() are helper
// functions needed to implement the functionality required for
// lex::more(). It is called from the functor body below.
bool adjust_start()
{
if (!has_hold_)
return false;
std::swap(this->get_first(), hold_);
has_hold_ = false;
return true;
}
void revert_adjust_start()
{
// this will be called only if adjust_start above returned true
std::swap(this->get_first(), hold_);
has_hold_ = true;
}
TokenValue const& get_value() const
{
if (!has_value_) {
value_ = iterator_range<Iterator>(this->get_first(), end_);
has_value_ = true;
}
return value_;
}
template <typename Value>
void set_value(Value const& val)
{
value_ = val;
has_value_ = true;
}
void set_end(Iterator const& it)
{
end_ = it;
}
bool has_value() const { return has_value_; }
void reset_value() { has_value_ = false; }
protected:
semantic_actions_type const& actions_;
Iterator hold_; // iterator needed to support lex::more()
Iterator end_; // iterator pointing to end of matched token
mutable TokenValue value_; // token value to use
mutable bool has_value_; // 'true' if value_ is valid
bool has_hold_; // 'true' if hold_ is valid
private:
// silence MSVC warning C4512: assignment operator could not be generated
data& operator= (data const&);
};
}
}}}}
#endif