// Copyright (c) 2001-2011 Hartmut Kaiser | |
// | |
// Distributed under the Boost Software License, Version 1.0. (See accompanying | |
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) | |
#define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM | |
#if defined(_MSC_VER) | |
#pragma once | |
#endif | |
#include <boost/spirit/home/support/info.hpp> | |
#include <boost/spirit/home/qi/skip_over.hpp> | |
#include <boost/spirit/home/qi/parser.hpp> | |
#include <boost/spirit/home/qi/detail/assign_to.hpp> | |
#include <boost/spirit/home/lex/reference.hpp> | |
#include <boost/spirit/home/lex/meta_compiler.hpp> | |
#include <boost/spirit/home/lex/lexer_type.hpp> | |
#include <boost/spirit/home/lex/lexer/token_def.hpp> | |
#include <boost/assert.hpp> | |
#include <boost/noncopyable.hpp> | |
#include <boost/detail/iterator.hpp> | |
#include <boost/fusion/include/vector.hpp> | |
#include <boost/mpl/assert.hpp> | |
#include <boost/range/iterator_range.hpp> | |
#include <string> | |
namespace boost { namespace spirit { namespace lex | |
{ | |
/////////////////////////////////////////////////////////////////////////// | |
namespace detail | |
{ | |
/////////////////////////////////////////////////////////////////////// | |
template <typename LexerDef> | |
struct lexer_def_ | |
: proto::extends< | |
typename proto::terminal< | |
lex::reference<lexer_def_<LexerDef> const> | |
>::type | |
, lexer_def_<LexerDef> > | |
, qi::parser<lexer_def_<LexerDef> > | |
, lex::lexer_type<lexer_def_<LexerDef> > | |
{ | |
private: | |
// avoid warnings about using 'this' in constructor | |
lexer_def_& this_() { return *this; } | |
typedef typename LexerDef::char_type char_type; | |
typedef typename LexerDef::string_type string_type; | |
typedef typename LexerDef::id_type id_type; | |
typedef lex::reference<lexer_def_ const> reference_; | |
typedef typename proto::terminal<reference_>::type terminal_type; | |
typedef proto::extends<terminal_type, lexer_def_> proto_base_type; | |
reference_ alias() const | |
{ | |
return reference_(*this); | |
} | |
public: | |
// Qi interface: metafunction calculating parser attribute type | |
template <typename Context, typename Iterator> | |
struct attribute | |
{ | |
// the return value of a token set contains the matched token | |
// id, and the corresponding pair of iterators | |
typedef typename Iterator::base_iterator_type iterator_type; | |
typedef | |
fusion::vector2<id_type, iterator_range<iterator_type> > | |
type; | |
}; | |
// Qi interface: parse functionality | |
template <typename Iterator, typename Context | |
, typename Skipper, typename Attribute> | |
bool parse(Iterator& first, Iterator const& last | |
, Context& /*context*/, Skipper const& skipper | |
, Attribute& attr) const | |
{ | |
qi::skip_over(first, last, skipper); // always do a pre-skip | |
if (first != last) { | |
typedef typename | |
boost::detail::iterator_traits<Iterator>::value_type | |
token_type; | |
token_type const& t = *first; | |
if (token_is_valid(t) && t.state() == first.get_state()) { | |
// any of the token definitions matched | |
spirit::traits::assign_to(t, attr); | |
++first; | |
return true; | |
} | |
} | |
return false; | |
} | |
// Qi interface: 'what' functionality | |
template <typename Context> | |
info what(Context& /*context*/) const | |
{ | |
return info("lexer"); | |
} | |
private: | |
// allow to use the lexer.self.add("regex1", id1)("regex2", id2); | |
// syntax | |
struct adder | |
{ | |
adder(lexer_def_& def_) | |
: def(def_) {} | |
// Add a token definition based on a single character as given | |
// by the first parameter, the second parameter allows to | |
// specify the token id to use for the new token. If no token | |
// id is given the character code is used. | |
adder const& operator()(char_type c | |
, id_type token_id = id_type()) const | |
{ | |
if (id_type() == token_id) | |
token_id = static_cast<id_type>(c); | |
def.def.add_token (def.state.c_str(), c, token_id | |
, def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
return *this; | |
} | |
// Add a token definition based on a character sequence as | |
// given by the first parameter, the second parameter allows to | |
// specify the token id to use for the new token. If no token | |
// id is given this function will generate a unique id to be | |
// used as the token's id. | |
adder const& operator()(string_type const& s | |
, id_type token_id = id_type()) const | |
{ | |
if (id_type() == token_id) | |
token_id = def.def.get_next_id(); | |
def.def.add_token (def.state.c_str(), s, token_id | |
, def.targetstate.empty() ? 0 : def.targetstate.c_str()); | |
return *this; | |
} | |
template <typename Attribute> | |
adder const& operator()( | |
token_def<Attribute, char_type, id_type>& tokdef | |
, id_type token_id = id_type()) const | |
{ | |
// make sure we have a token id | |
if (id_type() == token_id) { | |
if (id_type() == tokdef.id()) { | |
token_id = def.def.get_next_id(); | |
tokdef.id(token_id); | |
} | |
else { | |
token_id = tokdef.id(); | |
} | |
} | |
else { | |
// the following assertion makes sure that the token_def | |
// instance has not been assigned a different id earlier | |
BOOST_ASSERT(id_type() == tokdef.id() | |
|| token_id == tokdef.id()); | |
tokdef.id(token_id); | |
} | |
def.define(tokdef); | |
return *this; | |
} | |
// template <typename F> | |
// adder const& operator()(char_type c, id_type token_id, F act) const | |
// { | |
// if (id_type() == token_id) | |
// token_id = def.def.get_next_id(); | |
// std::size_t unique_id = | |
// def.def.add_token (def.state.c_str(), s, token_id); | |
// def.def.add_action(unique_id, def.state.c_str(), act); | |
// return *this; | |
// } | |
lexer_def_& def; | |
private: | |
// silence MSVC warning C4512: assignment operator could not be generated | |
adder& operator= (adder const&); | |
}; | |
friend struct adder; | |
// allow to use lexer.self.add_pattern("pattern1", "regex1")(...); | |
// syntax | |
struct pattern_adder | |
{ | |
pattern_adder(lexer_def_& def_) | |
: def(def_) {} | |
pattern_adder const& operator()(string_type const& p | |
, string_type const& s) const | |
{ | |
def.def.add_pattern (def.state.c_str(), p, s); | |
return *this; | |
} | |
lexer_def_& def; | |
private: | |
// silence MSVC warning C4512: assignment operator could not be generated | |
pattern_adder& operator= (pattern_adder const&); | |
}; | |
friend struct pattern_adder; | |
private: | |
// Helper function to invoke the necessary 2 step compilation | |
// process on token definition expressions | |
template <typename TokenExpr> | |
void compile2pass(TokenExpr const& expr) | |
{ | |
expr.collect(def, state, targetstate); | |
expr.add_actions(def); | |
} | |
public: | |
/////////////////////////////////////////////////////////////////// | |
template <typename Expr> | |
void define(Expr const& expr) | |
{ | |
compile2pass(compile<lex::domain>(expr)); | |
} | |
lexer_def_(LexerDef& def_, string_type const& state_ | |
, string_type const& targetstate_ = string_type()) | |
: proto_base_type(terminal_type::make(alias())) | |
, add(this_()), add_pattern(this_()), def(def_) | |
, state(state_), targetstate(targetstate_) | |
{} | |
// allow to switch states | |
lexer_def_ operator()(char_type const* state) const | |
{ | |
return lexer_def_(def, state); | |
} | |
lexer_def_ operator()(char_type const* state | |
, char_type const* targetstate) const | |
{ | |
return lexer_def_(def, state, targetstate); | |
} | |
lexer_def_ operator()(string_type const& state | |
, string_type const& targetstate = string_type()) const | |
{ | |
return lexer_def_(def, state, targetstate); | |
} | |
// allow to assign a token definition expression | |
template <typename Expr> | |
lexer_def_& operator= (Expr const& xpr) | |
{ | |
// Report invalid expression error as early as possible. | |
// If you got an error_invalid_expression error message here, | |
// then the expression (expr) is not a valid spirit lex | |
// expression. | |
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
def.clear(state.c_str()); | |
define(xpr); | |
return *this; | |
} | |
adder add; | |
pattern_adder add_pattern; | |
private: | |
LexerDef& def; | |
string_type state; | |
string_type targetstate; | |
private: | |
// silence MSVC warning C4512: assignment operator could not be generated | |
lexer_def_& operator= (lexer_def_ const&); | |
}; | |
// allow to assign a token definition expression | |
template <typename LexerDef, typename Expr> | |
inline lexer_def_<LexerDef>& | |
operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) | |
{ | |
// Report invalid expression error as early as possible. | |
// If you got an error_invalid_expression error message here, | |
// then the expression (expr) is not a valid spirit lex | |
// expression. | |
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
lexdef.define(xpr); | |
return lexdef; | |
} | |
template <typename LexerDef, typename Expr> | |
inline lexer_def_<LexerDef>& | |
operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) | |
{ | |
// Report invalid expression error as early as possible. | |
// If you got an error_invalid_expression error message here, | |
// then the expression (expr) is not a valid spirit lex | |
// expression. | |
BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); | |
lexdef.define(xpr); | |
return lexdef; | |
} | |
} | |
/////////////////////////////////////////////////////////////////////////// | |
// The match_flags flags are used to influence different matching | |
// modes of the lexer | |
struct match_flags | |
{ | |
enum enum_type | |
{ | |
match_default = 0, // no flags | |
match_not_dot_newline = 1, // the regex '.' doesn't match newlines | |
match_icase = 2 // all matching operations are case insensitive | |
}; | |
}; | |
/////////////////////////////////////////////////////////////////////////// | |
// This represents a lexer object | |
/////////////////////////////////////////////////////////////////////////// | |
/////////////////////////////////////////////////////////////////////////// | |
// This is the first token id automatically assigned by the library | |
// if needed | |
enum tokenids | |
{ | |
min_token_id = 0x10000 | |
}; | |
template <typename Lexer> | |
class lexer : public Lexer | |
{ | |
private: | |
// avoid warnings about using 'this' in constructor | |
lexer& this_() { return *this; } | |
std::size_t next_token_id; // has to be an integral type | |
public: | |
typedef Lexer lexer_type; | |
typedef typename Lexer::id_type id_type; | |
typedef typename Lexer::char_type char_type; | |
typedef typename Lexer::iterator_type iterator_type; | |
typedef lexer base_type; | |
typedef detail::lexer_def_<lexer> lexer_def; | |
typedef std::basic_string<char_type> string_type; | |
lexer(unsigned int flags = match_flags::match_default | |
, id_type first_id = id_type(min_token_id)) | |
: lexer_type(flags) | |
, next_token_id(first_id) | |
, self(this_(), lexer_type::initial_state()) | |
{} | |
// access iterator interface | |
template <typename Iterator> | |
iterator_type begin(Iterator& first, Iterator const& last | |
, char_type const* initial_state = 0) const | |
{ return this->lexer_type::begin(first, last, initial_state); } | |
iterator_type end() const | |
{ return this->lexer_type::end(); } | |
std::size_t map_state(char_type const* state) | |
{ return this->lexer_type::add_state(state); } | |
// create a unique token id | |
id_type get_next_id() { return id_type(next_token_id++); } | |
lexer_def self; // allow for easy token definition | |
}; | |
}}} | |
#endif |