/* | |
* | |
* Copyright (c) 2002 | |
* John Maddock | |
* | |
* Use, modification and distribution are subject to the | |
* Boost Software License, Version 1.0. (See accompanying file | |
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
* | |
*/ | |
#ifndef BOOST_REGEX_MATCHER_HPP | |
#define BOOST_REGEX_MATCHER_HPP | |
#include <boost/regex/v4/iterator_category.hpp> | |
#ifdef BOOST_MSVC | |
#pragma warning(push) | |
#pragma warning(disable: 4103) | |
#endif | |
#ifdef BOOST_HAS_ABI_HEADERS | |
# include BOOST_ABI_PREFIX | |
#endif | |
#ifdef BOOST_MSVC | |
#pragma warning(pop) | |
#endif | |
#ifdef BOOST_MSVC | |
# pragma warning(push) | |
# pragma warning(disable: 4800) | |
#endif | |
namespace boost{ | |
namespace re_detail{ | |
// | |
// error checking API: | |
// | |
BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf); | |
// | |
// function can_start: | |
// | |
template <class charT> | |
inline bool can_start(charT c, const unsigned char* map, unsigned char mask) | |
{ | |
return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask)); | |
} | |
inline bool can_start(char c, const unsigned char* map, unsigned char mask) | |
{ | |
return map[(unsigned char)c] & mask; | |
} | |
inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) | |
{ | |
return map[(unsigned char)c] & mask; | |
} | |
inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) | |
{ | |
return map[c] & mask; | |
} | |
inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) | |
{ | |
return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); | |
} | |
#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives. | |
#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) | |
inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) | |
{ | |
return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask); | |
} | |
#endif | |
#endif | |
#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) | |
inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) | |
{ | |
return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask)); | |
} | |
#endif | |
// | |
// Unfortunately Rogue Waves standard library appears to have a bug | |
// in std::basic_string::compare that results in eroneous answers | |
// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version | |
// 0x020101) the test case was: | |
// {39135,0} < {0xff,0} | |
// which succeeds when it should not. | |
// | |
#ifndef _RWSTD_VER | |
#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) | |
template <class C, class T, class A> | |
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p) | |
{ | |
if(0 == *p) | |
{ | |
if(s.empty() || ((s.size() == 1) && (s[0] == 0))) | |
return 0; | |
} | |
return s.compare(p); | |
} | |
#endif | |
#else | |
#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) | |
template <class C, class T, class A> | |
inline int string_compare(const std::basic_string<C,T,A>& s, const C* p) | |
{ | |
if(0 == *p) | |
{ | |
if(s.empty() || ((s.size() == 1) && (s[0] == 0))) | |
return 0; | |
} | |
return s.compare(p); | |
} | |
#endif | |
inline int string_compare(const std::string& s, const char* p) | |
{ return std::strcmp(s.c_str(), p); } | |
# ifndef BOOST_NO_WREGEX | |
inline int string_compare(const std::wstring& s, const wchar_t* p) | |
{ return std::wcscmp(s.c_str(), p); } | |
#endif | |
#endif | |
template <class Seq, class C> | |
inline int string_compare(const Seq& s, const C* p) | |
{ | |
std::size_t i = 0; | |
while((i < s.size()) && (p[i] == s[i])) | |
{ | |
++i; | |
} | |
return (i == s.size()) ? -p[i] : s[i] - p[i]; | |
} | |
# define STR_COMP(s,p) string_compare(s,p) | |
template<class charT> | |
inline const charT* re_skip_past_null(const charT* p) | |
{ | |
while (*p != static_cast<charT>(0)) ++p; | |
return ++p; | |
} | |
template <class iterator, class charT, class traits_type, class char_classT> | |
iterator BOOST_REGEX_CALL re_is_set_member(iterator next, | |
iterator last, | |
const re_set_long<char_classT>* set_, | |
const regex_data<charT, traits_type>& e, bool icase) | |
{ | |
const charT* p = reinterpret_cast<const charT*>(set_+1); | |
iterator ptr; | |
unsigned int i; | |
//bool icase = e.m_flags & regex_constants::icase; | |
if(next == last) return next; | |
typedef typename traits_type::string_type traits_string_type; | |
const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits); | |
// dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never | |
// referenced | |
(void)traits_inst; | |
// try and match a single character, could be a multi-character | |
// collating element... | |
for(i = 0; i < set_->csingles; ++i) | |
{ | |
ptr = next; | |
if(*p == static_cast<charT>(0)) | |
{ | |
// treat null string as special case: | |
if(traits_inst.translate(*ptr, icase) != *p) | |
{ | |
while(*p == static_cast<charT>(0))++p; | |
continue; | |
} | |
return set_->isnot ? next : (ptr == next) ? ++next : ptr; | |
} | |
else | |
{ | |
while(*p && (ptr != last)) | |
{ | |
if(traits_inst.translate(*ptr, icase) != *p) | |
break; | |
++p; | |
++ptr; | |
} | |
if(*p == static_cast<charT>(0)) // if null we've matched | |
return set_->isnot ? next : (ptr == next) ? ++next : ptr; | |
p = re_skip_past_null(p); // skip null | |
} | |
} | |
charT col = traits_inst.translate(*next, icase); | |
if(set_->cranges || set_->cequivalents) | |
{ | |
traits_string_type s1; | |
// | |
// try and match a range, NB only a single character can match | |
if(set_->cranges) | |
{ | |
if((e.m_flags & regex_constants::collate) == 0) | |
s1.assign(1, col); | |
else | |
{ | |
charT a[2] = { col, charT(0), }; | |
s1 = traits_inst.transform(a, a + 1); | |
} | |
for(i = 0; i < set_->cranges; ++i) | |
{ | |
if(STR_COMP(s1, p) >= 0) | |
{ | |
do{ ++p; }while(*p); | |
++p; | |
if(STR_COMP(s1, p) <= 0) | |
return set_->isnot ? next : ++next; | |
} | |
else | |
{ | |
// skip first string | |
do{ ++p; }while(*p); | |
++p; | |
} | |
// skip second string | |
do{ ++p; }while(*p); | |
++p; | |
} | |
} | |
// | |
// try and match an equivalence class, NB only a single character can match | |
if(set_->cequivalents) | |
{ | |
charT a[2] = { col, charT(0), }; | |
s1 = traits_inst.transform_primary(a, a +1); | |
for(i = 0; i < set_->cequivalents; ++i) | |
{ | |
if(STR_COMP(s1, p) == 0) | |
return set_->isnot ? next : ++next; | |
// skip string | |
do{ ++p; }while(*p); | |
++p; | |
} | |
} | |
} | |
if(traits_inst.isctype(col, set_->cclasses) == true) | |
return set_->isnot ? next : ++next; | |
if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) | |
return set_->isnot ? next : ++next; | |
return set_->isnot ? ++next : next; | |
} | |
template <class BidiIterator> | |
class repeater_count | |
{ | |
repeater_count** stack; | |
repeater_count* next; | |
int state_id; | |
std::size_t count; // the number of iterations so far | |
BidiIterator start_pos; // where the last repeat started | |
public: | |
repeater_count(repeater_count** s) | |
{ | |
stack = s; | |
next = 0; | |
state_id = -1; | |
count = 0; | |
} | |
repeater_count(int i, repeater_count** s, BidiIterator start) | |
: start_pos(start) | |
{ | |
state_id = i; | |
stack = s; | |
next = *stack; | |
*stack = this; | |
if(state_id > next->state_id) | |
count = 0; | |
else | |
{ | |
repeater_count* p = next; | |
while(p && (p->state_id != state_id)) | |
p = p->next; | |
if(p) | |
{ | |
count = p->count; | |
start_pos = p->start_pos; | |
} | |
else | |
count = 0; | |
} | |
} | |
~repeater_count() | |
{ | |
if(next) | |
*stack = next; | |
} | |
std::size_t get_count() { return count; } | |
int get_id() { return state_id; } | |
std::size_t operator++() { return ++count; } | |
bool check_null_repeat(const BidiIterator& pos, std::size_t max) | |
{ | |
// this is called when we are about to start a new repeat, | |
// if the last one was NULL move our count to max, | |
// otherwise save the current position. | |
bool result = (count == 0) ? false : (pos == start_pos); | |
if(result) | |
count = max; | |
else | |
start_pos = pos; | |
return result; | |
} | |
}; | |
struct saved_state; | |
enum saved_state_type | |
{ | |
saved_type_end = 0, | |
saved_type_paren = 1, | |
saved_type_recurse = 2, | |
saved_type_assertion = 3, | |
saved_state_alt = 4, | |
saved_state_repeater_count = 5, | |
saved_state_extra_block = 6, | |
saved_state_greedy_single_repeat = 7, | |
saved_state_rep_slow_dot = 8, | |
saved_state_rep_fast_dot = 9, | |
saved_state_rep_char = 10, | |
saved_state_rep_short_set = 11, | |
saved_state_rep_long_set = 12, | |
saved_state_non_greedy_long_repeat = 13, | |
saved_state_count = 14 | |
}; | |
template <class Results> | |
struct recursion_info | |
{ | |
typedef typename Results::value_type value_type; | |
typedef typename value_type::iterator iterator; | |
int idx; | |
const re_syntax_base* preturn_address; | |
Results results; | |
repeater_count<iterator>* repeater_stack; | |
}; | |
#ifdef BOOST_MSVC | |
#pragma warning(push) | |
#pragma warning(disable : 4251 4231 4660) | |
#endif | |
template <class BidiIterator, class Allocator, class traits> | |
class perl_matcher | |
{ | |
public: | |
typedef typename traits::char_type char_type; | |
typedef perl_matcher<BidiIterator, Allocator, traits> self_type; | |
typedef bool (self_type::*matcher_proc_type)(void); | |
typedef std::size_t traits_size_type; | |
typedef typename is_byte<char_type>::width_type width_type; | |
typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type; | |
typedef match_results<BidiIterator, Allocator> results_type; | |
perl_matcher(BidiIterator first, BidiIterator end, | |
match_results<BidiIterator, Allocator>& what, | |
const basic_regex<char_type, traits>& e, | |
match_flag_type f, | |
BidiIterator l_base) | |
: m_result(what), base(first), last(end), | |
position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), | |
m_independent(false), next_count(&rep_obj), rep_obj(&next_count) | |
{ | |
construct_init(e, f); | |
} | |
bool match(); | |
bool find(); | |
void setf(match_flag_type f) | |
{ m_match_flags |= f; } | |
void unsetf(match_flag_type f) | |
{ m_match_flags &= ~f; } | |
private: | |
void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f); | |
bool find_imp(); | |
bool match_imp(); | |
#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD | |
typedef bool (perl_matcher::*protected_proc_type)(); | |
bool protected_call(protected_proc_type); | |
#endif | |
void estimate_max_state_count(std::random_access_iterator_tag*); | |
void estimate_max_state_count(void*); | |
bool match_prefix(); | |
bool match_all_states(); | |
// match procs, stored in s_match_vtable: | |
bool match_startmark(); | |
bool match_endmark(); | |
bool match_literal(); | |
bool match_start_line(); | |
bool match_end_line(); | |
bool match_wild(); | |
bool match_match(); | |
bool match_word_boundary(); | |
bool match_within_word(); | |
bool match_word_start(); | |
bool match_word_end(); | |
bool match_buffer_start(); | |
bool match_buffer_end(); | |
bool match_backref(); | |
bool match_long_set(); | |
bool match_set(); | |
bool match_jump(); | |
bool match_alt(); | |
bool match_rep(); | |
bool match_combining(); | |
bool match_soft_buffer_end(); | |
bool match_restart_continue(); | |
bool match_long_set_repeat(); | |
bool match_set_repeat(); | |
bool match_char_repeat(); | |
bool match_dot_repeat_fast(); | |
bool match_dot_repeat_slow(); | |
bool match_dot_repeat_dispatch() | |
{ | |
return ::boost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); | |
} | |
bool match_backstep(); | |
bool match_assert_backref(); | |
bool match_toggle_case(); | |
#ifdef BOOST_REGEX_RECURSIVE | |
bool backtrack_till_match(std::size_t count); | |
#endif | |
bool match_recursion(); | |
// find procs stored in s_find_vtable: | |
bool find_restart_any(); | |
bool find_restart_word(); | |
bool find_restart_line(); | |
bool find_restart_buf(); | |
bool find_restart_lit(); | |
private: | |
// final result structure to be filled in: | |
match_results<BidiIterator, Allocator>& m_result; | |
// temporary result for POSIX matches: | |
scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match; | |
// pointer to actual result structure to fill in: | |
match_results<BidiIterator, Allocator>* m_presult; | |
// start of sequence being searched: | |
BidiIterator base; | |
// end of sequence being searched: | |
BidiIterator last; | |
// current character being examined: | |
BidiIterator position; | |
// where to restart next search after failed match attempt: | |
BidiIterator restart; | |
// where the current search started from, acts as base for $` during grep: | |
BidiIterator search_base; | |
// how far we can go back when matching lookbehind: | |
BidiIterator backstop; | |
// the expression being examined: | |
const basic_regex<char_type, traits>& re; | |
// the expression's traits class: | |
const ::boost::regex_traits_wrapper<traits>& traits_inst; | |
// the next state in the machine being matched: | |
const re_syntax_base* pstate; | |
// matching flags in use: | |
match_flag_type m_match_flags; | |
// how many states we have examined so far: | |
std::ptrdiff_t state_count; | |
// max number of states to examine before giving up: | |
std::ptrdiff_t max_state_count; | |
// whether we should ignore case or not: | |
bool icase; | |
// set to true when (position == last), indicates that we may have a partial match: | |
bool m_has_partial_match; | |
// set to true whenever we get a match: | |
bool m_has_found_match; | |
// set to true whenever we're inside an independent sub-expression: | |
bool m_independent; | |
// the current repeat being examined: | |
repeater_count<BidiIterator>* next_count; | |
// the first repeat being examined (top of linked list): | |
repeater_count<BidiIterator> rep_obj; | |
// the mask to pass when matching word boundaries: | |
typename traits::char_class_type m_word_mask; | |
// the bitmask to use when determining whether a match_any matches a newline or not: | |
unsigned char match_any_mask; | |
// recursion information: | |
std::vector<recursion_info<results_type> > recursion_stack; | |
#ifdef BOOST_REGEX_NON_RECURSIVE | |
// | |
// additional members for non-recursive version: | |
// | |
typedef bool (self_type::*unwind_proc_type)(bool); | |
void extend_stack(); | |
bool unwind(bool); | |
bool unwind_end(bool); | |
bool unwind_paren(bool); | |
bool unwind_recursion_stopper(bool); | |
bool unwind_assertion(bool); | |
bool unwind_alt(bool); | |
bool unwind_repeater_counter(bool); | |
bool unwind_extra_block(bool); | |
bool unwind_greedy_single_repeat(bool); | |
bool unwind_slow_dot_repeat(bool); | |
bool unwind_fast_dot_repeat(bool); | |
bool unwind_char_repeat(bool); | |
bool unwind_short_set_repeat(bool); | |
bool unwind_long_set_repeat(bool); | |
bool unwind_non_greedy_repeat(bool); | |
bool unwind_recursion(bool); | |
bool unwind_recursion_pop(bool); | |
void destroy_single_repeat(); | |
void push_matched_paren(int index, const sub_match<BidiIterator>& sub); | |
void push_recursion_stopper(); | |
void push_assertion(const re_syntax_base* ps, bool positive); | |
void push_alt(const re_syntax_base* ps); | |
void push_repeater_count(int i, repeater_count<BidiIterator>** s); | |
void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); | |
void push_non_greedy_repeat(const re_syntax_base* ps); | |
void push_recursion(int idx, const re_syntax_base* p, results_type* presults); | |
void push_recursion_pop(); | |
// pointer to base of stack: | |
saved_state* m_stack_base; | |
// pointer to current stack position: | |
saved_state* m_backup_state; | |
// determines what value to return when unwinding from recursion, | |
// allows for mixed recursive/non-recursive algorithm: | |
bool m_recursive_result; | |
// how many memory blocks have we used up?: | |
unsigned used_block_count; | |
#endif | |
// these operations aren't allowed, so are declared private, | |
// bodies are provided to keep explicit-instantiation requests happy: | |
perl_matcher& operator=(const perl_matcher&) | |
{ | |
return *this; | |
} | |
perl_matcher(const perl_matcher& that) | |
: m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} | |
}; | |
#ifdef BOOST_MSVC | |
#pragma warning(pop) | |
#endif | |
} // namespace re_detail | |
#ifdef BOOST_MSVC | |
#pragma warning(push) | |
#pragma warning(disable: 4103) | |
#endif | |
#ifdef BOOST_HAS_ABI_HEADERS | |
# include BOOST_ABI_SUFFIX | |
#endif | |
#ifdef BOOST_MSVC | |
#pragma warning(pop) | |
#endif | |
} // namespace boost | |
#ifdef BOOST_MSVC | |
# pragma warning(pop) | |
#endif | |
// | |
// include the implementation of perl_matcher: | |
// | |
#ifdef BOOST_REGEX_RECURSIVE | |
#include <boost/regex/v4/perl_matcher_recursive.hpp> | |
#else | |
#include <boost/regex/v4/perl_matcher_non_recursive.hpp> | |
#endif | |
// this one has to be last: | |
#include <boost/regex/v4/perl_matcher_common.hpp> | |
#endif | |