// Copyright (c) 2001-2011 Hartmut Kaiser | |
// | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM) | |
#define BOOST_SPIRIT_LEX_LEXER_MAR_17_2007_0139PM | |
#if defined(_MSC_VER) | |
#pragma once | |
#endif | |
#include <iosfwd> | |
#include <boost/spirit/home/support/detail/lexer/generator.hpp> | |
#include <boost/spirit/home/support/detail/lexer/rules.hpp> | |
#include <boost/spirit/home/support/detail/lexer/consts.hpp> | |
#include <boost/spirit/home/support/unused.hpp> | |
#include <boost/spirit/home/lex/lexer/lexertl/token.hpp> | |
#include <boost/spirit/home/lex/lexer/lexertl/functor.hpp> | |
#include <boost/spirit/home/lex/lexer/lexertl/functor_data.hpp> | |
#include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp> | |
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
#include <boost/spirit/home/support/detail/lexer/debug.hpp> | |
#endif | |
#include <boost/foreach.hpp> | |
namespace boost { namespace spirit { namespace lex { namespace lexertl | |
{ | |
/////////////////////////////////////////////////////////////////////////// | |
namespace detail | |
{ | |
/////////////////////////////////////////////////////////////////////// | |
// The must_escape function checks if the given character value needs | |
// to be preceded by a backslash character to disable its special | |
// meaning in the context of a regular expression | |
/////////////////////////////////////////////////////////////////////// | |
template <typename Char> | |
inline bool must_escape(Char c) | |
{ | |
// FIXME: more needed? | |
switch (c) { | |
case '+': case '/': case '*': case '?': | |
case '|': | |
case '(': case ')': | |
case '[': case ']': | |
case '{': case '}': | |
case '.': | |
case '^': case '$': | |
case '\\': | |
case '"': | |
return true; | |
default: | |
break; | |
} | |
return false; | |
} | |
/////////////////////////////////////////////////////////////////////// | |
// The escape function returns the string representation of the given | |
// character value, possibly escaped with a backslash character, to | |
// allow it being safely used in a regular expression definition. | |
/////////////////////////////////////////////////////////////////////// | |
template <typename Char> | |
inline std::basic_string<Char> escape(Char ch) | |
{ | |
std::basic_string<Char> result(1, ch); | |
if (detail::must_escape(ch)) | |
{ | |
typedef typename std::basic_string<Char>::size_type size_type; | |
result.insert((size_type)0, 1, '\\'); | |
} | |
return result; | |
} | |
/////////////////////////////////////////////////////////////////////// | |
// | |
/////////////////////////////////////////////////////////////////////// | |
inline boost::lexer::regex_flags map_flags(unsigned int flags) | |
{ | |
unsigned int retval = boost::lexer::none; | |
if (flags & match_flags::match_not_dot_newline) | |
retval |= boost::lexer::dot_not_newline; | |
if (flags & match_flags::match_icase) | |
retval |= boost::lexer::icase; | |
return boost::lexer::regex_flags(retval); | |
} | |
} | |
/////////////////////////////////////////////////////////////////////////// | |
template <typename Lexer, typename F> | |
bool generate_static(Lexer const&, std::ostream&, char const*, F); | |
/////////////////////////////////////////////////////////////////////////// | |
// | |
// Every lexer type to be used as a lexer for Spirit has to conform to | |
// the following public interface: | |
// | |
// typedefs: | |
// iterator_type The type of the iterator exposed by this lexer. | |
// token_type The type of the tokens returned from the exposed | |
// iterators. | |
// | |
// functions: | |
// default constructor | |
// Since lexers are instantiated as base classes | |
// only it might be a good idea to make this | |
// constructor protected. | |
// begin, end Return a pair of iterators, when dereferenced | |
// returning the sequence of tokens recognized in | |
// the input stream given as the parameters to the | |
// begin() function. | |
// add_token Should add the definition of a token to be | |
// recognized by this lexer. | |
// clear Should delete all current token definitions | |
// associated with the given state of this lexer | |
// object. | |
// | |
// template parameters: | |
// Iterator The type of the iterator used to access the | |
// underlying character stream. | |
// Token The type of the tokens to be returned from the | |
// exposed token iterator. | |
// Functor The type of the InputPolicy to use to instantiate | |
// the multi_pass iterator type to be used as the | |
// token iterator (returned from begin()/end()). | |
// | |
/////////////////////////////////////////////////////////////////////////// | |
/////////////////////////////////////////////////////////////////////////// | |
// | |
// The lexer class is a implementation of a Spirit.Lex lexer on | |
// top of Ben Hanson's lexertl library as outlined above (For more | |
// information about lexertl go here: http://www.benhanson.net/lexertl.html). | |
// | |
// This class is supposed to be used as the first and only template | |
// parameter while instantiating instances of a lex::lexer class. | |
// | |
/////////////////////////////////////////////////////////////////////////// | |
template <typename Token = token<> | |
, typename Iterator = typename Token::iterator_type | |
, typename Functor = functor<Token, lexertl::detail::data, Iterator> > | |
class lexer | |
{ | |
private: | |
struct dummy { void true_() {} }; | |
typedef void (dummy::*safe_bool)(); | |
static std::size_t const all_states_id = static_cast<std::size_t>(-2); | |
public: | |
operator safe_bool() const | |
{ return initialized_dfa_ ? &dummy::true_ : 0; } | |
typedef typename boost::detail::iterator_traits<Iterator>::value_type | |
char_type; | |
typedef std::basic_string<char_type> string_type; | |
typedef boost::lexer::basic_rules<char_type> basic_rules_type; | |
// Every lexer type to be used as a lexer for Spirit has to conform to | |
// a public interface . | |
typedef Token token_type; | |
typedef typename Token::id_type id_type; | |
typedef iterator<Functor> iterator_type; | |
private: | |
// this type is purely used for the iterator_type construction below | |
struct iterator_data_type | |
{ | |
typedef typename Functor::semantic_actions_type semantic_actions_type; | |
iterator_data_type( | |
boost::lexer::basic_state_machine<char_type> const& sm | |
, boost::lexer::basic_rules<char_type> const& rules | |
, semantic_actions_type const& actions) | |
: state_machine_(sm), rules_(rules), actions_(actions) | |
{} | |
boost::lexer::basic_state_machine<char_type> const& state_machine_; | |
boost::lexer::basic_rules<char_type> const& rules_; | |
semantic_actions_type const& actions_; | |
private: | |
// silence MSVC warning C4512: assignment operator could not be generated | |
iterator_data_type& operator= (iterator_data_type const&); | |
}; | |
public: | |
// Return the start iterator usable for iterating over the generated | |
// tokens. | |
iterator_type begin(Iterator& first, Iterator const& last | |
, char_type const* initial_state = 0) const | |
{ | |
if (!init_dfa()) // never minimize DFA for dynamic lexers | |
return iterator_type(); | |
iterator_data_type iterator_data(state_machine_, rules_, actions_); | |
return iterator_type(iterator_data, first, last, initial_state); | |
} | |
// Return the end iterator usable to stop iterating over the generated | |
// tokens. | |
iterator_type end() const | |
{ | |
return iterator_type(); | |
} | |
protected: | |
// Lexer instances can be created by means of a derived class only. | |
lexer(unsigned int flags) | |
: flags_(detail::map_flags(flags)) | |
, rules_(flags_) | |
, initialized_dfa_(false) | |
{} | |
public: | |
// interface for token definition management | |
std::size_t add_token(char_type const* state, char_type tokendef, | |
std::size_t token_id, char_type const* targetstate) | |
{ | |
add_state(state); | |
initialized_dfa_ = false; | |
if (state == all_states()) | |
return rules_.add(state, detail::escape(tokendef), token_id, rules_.dot()); | |
if (0 == targetstate) | |
targetstate = state; | |
else | |
add_state(targetstate); | |
return rules_.add(state, detail::escape(tokendef), token_id, targetstate); | |
} | |
std::size_t add_token(char_type const* state, string_type const& tokendef, | |
std::size_t token_id, char_type const* targetstate) | |
{ | |
add_state(state); | |
initialized_dfa_ = false; | |
if (state == all_states()) | |
return rules_.add(state, tokendef, token_id, rules_.dot()); | |
if (0 == targetstate) | |
targetstate = state; | |
else | |
add_state(targetstate); | |
return rules_.add(state, tokendef, token_id, targetstate); | |
} | |
// interface for pattern definition management | |
void add_pattern (char_type const* state, string_type const& name, | |
string_type const& patterndef) | |
{ | |
add_state(state); | |
rules_.add_macro(name.c_str(), patterndef); | |
initialized_dfa_ = false; | |
} | |
boost::lexer::rules const& get_rules() const { return rules_; } | |
void clear(char_type const* state) | |
{ | |
std::size_t s = rules_.state(state); | |
if (boost::lexer::npos != s) | |
rules_.clear(state); | |
initialized_dfa_ = false; | |
} | |
std::size_t add_state(char_type const* state) | |
{ | |
if (state == all_states()) | |
return all_states_id; | |
std::size_t stateid = rules_.state(state); | |
if (boost::lexer::npos == stateid) { | |
stateid = rules_.add_state(state); | |
initialized_dfa_ = false; | |
} | |
return stateid; | |
} | |
string_type initial_state() const | |
{ | |
return string_type(rules_.initial()); | |
} | |
string_type all_states() const | |
{ | |
return string_type(rules_.all_states()); | |
} | |
// Register a semantic action with the given id | |
template <typename F> | |
void add_action(std::size_t unique_id, std::size_t state, F act) | |
{ | |
// If you see an error here stating add_action is not a member of | |
// fusion::unused_type then you are probably having semantic actions | |
// attached to at least one token in the lexer definition without | |
// using the lex::lexertl::actor_lexer<> as its base class. | |
typedef typename Functor::wrap_action_type wrapper_type; | |
if (state == all_states_id) { | |
// add the action to all known states | |
typedef typename | |
basic_rules_type::string_size_t_map::value_type | |
state_type; | |
BOOST_FOREACH(state_type const& s, rules_.statemap()) | |
actions_.add_action(unique_id, s.second, wrapper_type::call(act)); | |
} | |
else { | |
actions_.add_action(unique_id, state, wrapper_type::call(act)); | |
} | |
} | |
// template <typename F> | |
// void add_action(std::size_t unique_id, char_type const* state, F act) | |
// { | |
// typedef typename Functor::wrap_action_type wrapper_type; | |
// actions_.add_action(unique_id, add_state(state), wrapper_type::call(act)); | |
// } | |
// We do not minimize the state machine by default anymore because | |
// Ben said: "If you can afford to generate a lexer at runtime, there | |
// is little point in calling minimise." | |
// Go figure. | |
bool init_dfa(bool minimize = false) const | |
{ | |
if (!initialized_dfa_) { | |
state_machine_.clear(); | |
typedef boost::lexer::basic_generator<char_type> generator; | |
generator::build (rules_, state_machine_); | |
if (minimize) | |
generator::minimise (state_machine_); | |
#if defined(BOOST_SPIRIT_LEXERTL_DEBUG) | |
boost::lexer::debug::dump(state_machine_, std::cerr); | |
#endif | |
initialized_dfa_ = true; | |
} | |
return true; | |
} | |
private: | |
// lexertl specific data | |
mutable boost::lexer::basic_state_machine<char_type> state_machine_; | |
boost::lexer::regex_flags flags_; | |
basic_rules_type rules_; | |
typename Functor::semantic_actions_type actions_; | |
mutable bool initialized_dfa_; | |
// generator functions must be able to access members directly | |
template <typename Lexer, typename F> | |
friend bool generate_static(Lexer const&, std::ostream&, char const*, F); | |
}; | |
/////////////////////////////////////////////////////////////////////////// | |
// | |
// The actor_lexer class is another implementation of a Spirit.Lex | |
// lexer on top of Ben Hanson's lexertl library as outlined above (For | |
// more information about lexertl go here: | |
// http://www.benhanson.net/lexertl.html). | |
// | |
// The only difference to the lexer class above is that token_def | |
// definitions may have semantic (lexer) actions attached while being | |
// defined: | |
// | |
// int w; | |
// token_def word = "[^ \t\n]+"; | |
// self = word[++ref(w)]; // see example: word_count_lexer | |
// | |
// This class is supposed to be used as the first and only template | |
// parameter while instantiating instances of a lex::lexer class. | |
// | |
/////////////////////////////////////////////////////////////////////////// | |
template <typename Token = token<> | |
, typename Iterator = typename Token::iterator_type | |
, typename Functor = functor<Token, lexertl::detail::data, Iterator, mpl::true_> > | |
class actor_lexer : public lexer<Token, Iterator, Functor> | |
{ | |
protected: | |
// Lexer instances can be created by means of a derived class only. | |
actor_lexer(unsigned int flags) | |
: lexer<Token, Iterator, Functor>(flags) {} | |
}; | |
}}}} | |
#endif |