/////////////////////////////////////////////////////////////////////////////// | |
/// \file regex_token_iterator.hpp | |
/// Contains the definition of regex_token_iterator, and STL-compatible iterator | |
/// for tokenizing a string using a regular expression. | |
// | |
// Copyright 2008 Eric Niebler. Distributed under the Boost | |
// Software License, Version 1.0. (See accompanying file | |
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#ifndef BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 | |
#define BOOST_XPRESSIVE_REGEX_TOKEN_ITERATOR_HPP_EAN_10_04_2005 | |
// MS compatible compilers support #pragma once | |
#if defined(_MSC_VER) && (_MSC_VER >= 1020) | |
# pragma once | |
#endif | |
#include <vector> | |
#include <boost/assert.hpp> | |
#include <boost/mpl/assert.hpp> | |
#include <boost/type_traits/is_same.hpp> | |
#include <boost/type_traits/is_convertible.hpp> | |
#include <boost/xpressive/regex_iterator.hpp> | |
namespace boost { namespace xpressive { namespace detail | |
{ | |
////////////////////////////////////////////////////////////////////////// | |
// regex_token_iterator_impl | |
// | |
template<typename BidiIter> | |
struct regex_token_iterator_impl | |
: counted_base<regex_token_iterator_impl<BidiIter> > | |
{ | |
typedef sub_match<BidiIter> value_type; | |
regex_token_iterator_impl | |
( | |
BidiIter begin | |
, BidiIter cur | |
, BidiIter end | |
, BidiIter next_search | |
, basic_regex<BidiIter> const &rex | |
, regex_constants::match_flag_type flags = regex_constants::match_default | |
, std::vector<int> subs = std::vector<int>(1, 0) | |
, int n = -2 | |
, bool not_null = false | |
) | |
: iter_(begin, cur, end, next_search, rex, flags, not_null) | |
, result_() | |
, n_((-2 == n) ? (int)subs.size() - 1 : n) | |
, subs_() | |
{ | |
BOOST_ASSERT(0 != subs.size()); | |
this->subs_.swap(subs); | |
} | |
bool next() | |
{ | |
if(-1 != this->n_) | |
{ | |
BidiIter cur = this->iter_.state_.cur_; | |
if(0 != (++this->n_ %= (int)this->subs_.size()) || this->iter_.next()) | |
{ | |
this->result_ = (-1 == this->subs_[ this->n_ ]) | |
? this->iter_.what_.prefix() | |
: this->iter_.what_[ this->subs_[ this->n_ ] ]; | |
return true; | |
} | |
else if(-1 == this->subs_[ this->n_-- ] && cur != this->iter_.state_.end_) | |
{ | |
this->result_ = value_type(cur, this->iter_.state_.end_, true); | |
return true; | |
} | |
} | |
return false; | |
} | |
bool equal_to(regex_token_iterator_impl<BidiIter> const &that) const | |
{ | |
return this->iter_.equal_to(that.iter_) && this->n_ == that.n_; | |
} | |
regex_iterator_impl<BidiIter> iter_; | |
value_type result_; | |
int n_; | |
std::vector<int> subs_; | |
}; | |
inline int get_mark_number(int i) | |
{ | |
return i; | |
} | |
inline std::vector<int> to_vector(int subs) | |
{ | |
return std::vector<int>(1, subs); | |
} | |
inline std::vector<int> const &to_vector(std::vector<int> const &subs) | |
{ | |
return subs; | |
} | |
template<typename Int, std::size_t Size> | |
inline std::vector<int> to_vector(Int const (&sub_matches)[ Size ]) | |
{ | |
// so that people can specify sub-match indices inline with | |
// string literals, like "\1\2\3", leave off the trailing '\0' | |
std::size_t const size = Size - is_same<Int, char>::value; | |
std::vector<int> vect(size); | |
for(std::size_t i = 0; i < size; ++i) | |
{ | |
vect[i] = get_mark_number(sub_matches[i]); | |
} | |
return vect; | |
} | |
template<typename Int> | |
inline std::vector<int> to_vector(std::vector<Int> const &sub_matches) | |
{ | |
BOOST_MPL_ASSERT((is_convertible<Int, int>)); | |
return std::vector<int>(sub_matches.begin(), sub_matches.end()); | |
} | |
} // namespace detail | |
////////////////////////////////////////////////////////////////////////// | |
// regex_token_iterator | |
// | |
template<typename BidiIter> | |
struct regex_token_iterator | |
{ | |
typedef basic_regex<BidiIter> regex_type; | |
typedef typename iterator_value<BidiIter>::type char_type; | |
typedef sub_match<BidiIter> value_type; | |
typedef std::ptrdiff_t difference_type; | |
typedef value_type const *pointer; | |
typedef value_type const &reference; | |
typedef std::forward_iterator_tag iterator_category; | |
/// INTERNAL ONLY | |
typedef detail::regex_token_iterator_impl<BidiIter> impl_type_; | |
/// \post \c *this is the end of sequence iterator. | |
regex_token_iterator() | |
: impl_() | |
{ | |
} | |
/// \param begin The beginning of the character range to search. | |
/// \param end The end of the character range to search. | |
/// \param rex The regex pattern to search for. | |
/// \pre \c [begin,end) is a valid range. | |
regex_token_iterator | |
( | |
BidiIter begin | |
, BidiIter end | |
, basic_regex<BidiIter> const &rex | |
) | |
: impl_() | |
{ | |
if(0 != rex.regex_id()) | |
{ | |
this->impl_ = new impl_type_(begin, begin, end, begin, rex); | |
this->next_(); | |
} | |
} | |
/// \param begin The beginning of the character range to search. | |
/// \param end The end of the character range to search. | |
/// \param rex The regex pattern to search for. | |
/// \param args A let() expression with argument bindings for semantic actions. | |
/// \pre \c [begin,end) is a valid range. | |
template<typename LetExpr> | |
regex_token_iterator | |
( | |
BidiIter begin | |
, BidiIter end | |
, basic_regex<BidiIter> const &rex | |
, detail::let_<LetExpr> const &args | |
) | |
: impl_() | |
{ | |
if(0 != rex.regex_id()) | |
{ | |
this->impl_ = new impl_type_(begin, begin, end, begin, rex); | |
detail::bind_args(args, this->impl_->iter_.what_); | |
this->next_(); | |
} | |
} | |
/// \param begin The beginning of the character range to search. | |
/// \param end The end of the character range to search. | |
/// \param rex The regex pattern to search for. | |
/// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) | |
/// \pre \c [begin,end) is a valid range. | |
/// \pre \c subs is either an integer greater or equal to -1, | |
/// or else an array or non-empty \c std::vector\<\> of such integers. | |
template<typename Subs> | |
regex_token_iterator | |
( | |
BidiIter begin | |
, BidiIter end | |
, basic_regex<BidiIter> const &rex | |
, Subs const &subs | |
, regex_constants::match_flag_type flags = regex_constants::match_default | |
) | |
: impl_() | |
{ | |
if(0 != rex.regex_id()) | |
{ | |
this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); | |
this->next_(); | |
} | |
} | |
/// \param begin The beginning of the character range to search. | |
/// \param end The end of the character range to search. | |
/// \param rex The regex pattern to search for. | |
/// \param args A let() expression with argument bindings for semantic actions. | |
/// \param flags Optional match flags, used to control how the expression is matched against the sequence. (See match_flag_type.) | |
/// \pre \c [begin,end) is a valid range. | |
/// \pre \c subs is either an integer greater or equal to -1, | |
/// or else an array or non-empty \c std::vector\<\> of such integers. | |
template<typename Subs, typename LetExpr> | |
regex_token_iterator | |
( | |
BidiIter begin | |
, BidiIter end | |
, basic_regex<BidiIter> const &rex | |
, Subs const &subs | |
, detail::let_<LetExpr> const &args | |
, regex_constants::match_flag_type flags = regex_constants::match_default | |
) | |
: impl_() | |
{ | |
if(0 != rex.regex_id()) | |
{ | |
this->impl_ = new impl_type_(begin, begin, end, begin, rex, flags, detail::to_vector(subs)); | |
detail::bind_args(args, this->impl_->iter_.what_); | |
this->next_(); | |
} | |
} | |
/// \post <tt>*this == that</tt> | |
regex_token_iterator(regex_token_iterator<BidiIter> const &that) | |
: impl_(that.impl_) // COW | |
{ | |
} | |
/// \post <tt>*this == that</tt> | |
regex_token_iterator<BidiIter> &operator =(regex_token_iterator<BidiIter> const &that) | |
{ | |
this->impl_ = that.impl_; // COW | |
return *this; | |
} | |
friend bool operator ==(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right) | |
{ | |
if(!left.impl_ || !right.impl_) | |
{ | |
return !left.impl_ && !right.impl_; | |
} | |
return left.impl_->equal_to(*right.impl_); | |
} | |
friend bool operator !=(regex_token_iterator<BidiIter> const &left, regex_token_iterator<BidiIter> const &right) | |
{ | |
return !(left == right); | |
} | |
value_type const &operator *() const | |
{ | |
return this->impl_->result_; | |
} | |
value_type const *operator ->() const | |
{ | |
return &this->impl_->result_; | |
} | |
/// If N == -1 then sets *this equal to the end of sequence iterator. | |
/// Otherwise if N+1 \< subs.size(), then increments N and sets result equal to | |
/// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). | |
/// Otherwise if what.prefix().first != what[0].second and if the element match_prev_avail is | |
/// not set in flags then sets it. Then locates the next match as if by calling | |
/// regex_search(what[0].second, end, what, *pre, flags), with the following variation: | |
/// in the event that the previous match found was of zero length (what[0].length() == 0) | |
/// then attempts to find a non-zero length match starting at what[0].second, only if that | |
/// fails and provided what[0].second != suffix().second does it look for a (possibly zero | |
/// length) match starting from what[0].second + 1. If such a match is found then sets N | |
/// equal to zero, and sets result equal to | |
/// ((subs[N] == -1) ? value_type(what.prefix().str()) : value_type(what[subs[N]].str())). | |
/// Otherwise if no further matches were found, then let last_end be the endpoint of the last | |
/// match that was found. Then if last_end != end and subs[0] == -1 sets N equal to -1 and | |
/// sets result equal to value_type(last_end, end). Otherwise sets *this equal to the end | |
/// of sequence iterator. | |
regex_token_iterator<BidiIter> &operator ++() | |
{ | |
this->fork_(); // un-share the implementation | |
this->next_(); | |
return *this; | |
} | |
regex_token_iterator<BidiIter> operator ++(int) | |
{ | |
regex_token_iterator<BidiIter> tmp(*this); | |
++*this; | |
return tmp; | |
} | |
private: | |
/// INTERNAL ONLY | |
void fork_() | |
{ | |
if(1 != this->impl_->use_count()) | |
{ | |
intrusive_ptr<impl_type_> clone = new impl_type_ | |
( | |
this->impl_->iter_.state_.begin_ | |
, this->impl_->iter_.state_.cur_ | |
, this->impl_->iter_.state_.end_ | |
, this->impl_->iter_.state_.next_search_ | |
, this->impl_->iter_.rex_ | |
, this->impl_->iter_.flags_ | |
, this->impl_->subs_ | |
, this->impl_->n_ | |
, this->impl_->iter_.not_null_ | |
); | |
// only copy the match_results struct if we have to. Note: if the next call | |
// to impl_->next() will return false or call regex_search, we don't need to | |
// copy the match_results struct. | |
if(-1 != this->impl_->n_ && this->impl_->n_ + 1 != static_cast<int>(this->impl_->subs_.size())) | |
{ | |
// BUGBUG This is expensive -- it causes the sequence_stack to be cleared. | |
// Find a better way | |
clone->iter_.what_ = this->impl_->iter_.what_; | |
} | |
else | |
{ | |
// At the very least, copy the action args | |
detail::core_access<BidiIter>::get_action_args(clone->iter_.what_) | |
= detail::core_access<BidiIter>::get_action_args(this->impl_->iter_.what_); | |
} | |
this->impl_.swap(clone); | |
} | |
} | |
/// INTERNAL ONLY | |
void next_() | |
{ | |
BOOST_ASSERT(this->impl_ && 1 == this->impl_->use_count()); | |
if(!this->impl_->next()) | |
{ | |
this->impl_ = 0; | |
} | |
} | |
intrusive_ptr<impl_type_> impl_; | |
}; | |
}} // namespace boost::xpressive | |
#endif |