/////////////////////////////////////////////////////////////////////////////// | |
/// \file boyer_moore.hpp | |
/// Contains the boyer-moore implementation. Note: this is *not* a general- | |
/// purpose boyer-moore implementation. It truncates the search string at | |
/// 256 characters, but it is sufficient for the needs of xpressive. | |
// | |
// Copyright 2008 Eric Niebler. Distributed under the Boost | |
// Software License, Version 1.0. (See accompanying file | |
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
#ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 | |
#define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 | |
// MS compatible compilers support #pragma once | |
#if defined(_MSC_VER) && (_MSC_VER >= 1020) | |
# pragma once | |
# pragma warning(push) | |
# pragma warning(disable : 4100) // unreferenced formal parameter | |
#endif | |
#include <climits> // for UCHAR_MAX | |
#include <cstddef> // for std::ptrdiff_t | |
#include <utility> // for std::max | |
#include <vector> | |
#include <boost/mpl/bool.hpp> | |
#include <boost/noncopyable.hpp> | |
#include <boost/iterator/iterator_traits.hpp> | |
#include <boost/type_traits/is_convertible.hpp> | |
#include <boost/xpressive/detail/detail_fwd.hpp> | |
namespace boost { namespace xpressive { namespace detail | |
{ | |
/////////////////////////////////////////////////////////////////////////////// | |
// boyer_moore | |
// | |
template<typename BidiIter, typename Traits> | |
struct boyer_moore | |
: noncopyable | |
{ | |
typedef typename iterator_value<BidiIter>::type char_type; | |
typedef Traits traits_type; | |
typedef has_fold_case<Traits> case_fold; | |
typedef typename Traits::string_type string_type; | |
// initialize the Boyer-Moore search data structure, using the | |
// search sub-sequence to prime the pump. | |
boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase) | |
: begin_(begin) | |
, last_(begin) | |
, fold_() | |
, find_fun_( | |
icase | |
? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_) | |
: &boyer_moore::find_ | |
) | |
{ | |
std::ptrdiff_t const uchar_max = UCHAR_MAX; | |
std::ptrdiff_t diff = std::distance(begin, end); | |
this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max)); | |
std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_); | |
--this->length_; | |
icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_()); | |
} | |
BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const | |
{ | |
return (this->*this->find_fun_)(begin, end, tr); | |
} | |
private: | |
void init_(Traits const &tr, mpl::false_) | |
{ | |
for(unsigned char offset = this->length_; offset; --offset, ++this->last_) | |
{ | |
this->offsets_[tr.hash(*this->last_)] = offset; | |
} | |
} | |
void init_(Traits const &tr, mpl::true_) | |
{ | |
this->fold_.reserve(this->length_ + 1); | |
for(unsigned char offset = this->length_; offset; --offset, ++this->last_) | |
{ | |
this->fold_.push_back(tr.fold_case(*this->last_)); | |
for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end(); | |
beg != end; ++beg) | |
{ | |
this->offsets_[tr.hash(*beg)] = offset; | |
} | |
} | |
this->fold_.push_back(tr.fold_case(*this->last_)); | |
} | |
// case-sensitive Boyer-Moore search | |
BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const | |
{ | |
typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
diff_type const endpos = std::distance(begin, end); | |
diff_type offset = static_cast<diff_type>(this->length_); | |
for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
{ | |
std::advance(begin, offset); | |
char_type const *pat_tmp = this->last_; | |
BidiIter str_tmp = begin; | |
for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) | |
{ | |
if(pat_tmp == this->begin_) | |
{ | |
return str_tmp; | |
} | |
} | |
offset = this->offsets_[tr.hash(tr.translate(*begin))]; | |
} | |
return end; | |
} | |
// case-insensitive Boyer-Moore search | |
BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const | |
{ | |
typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
diff_type const endpos = std::distance(begin, end); | |
diff_type offset = static_cast<diff_type>(this->length_); | |
for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
{ | |
std::advance(begin, offset); | |
char_type const *pat_tmp = this->last_; | |
BidiIter str_tmp = begin; | |
for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) | |
{ | |
if(pat_tmp == this->begin_) | |
{ | |
return str_tmp; | |
} | |
} | |
offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))]; | |
} | |
return end; | |
} | |
// case-insensitive Boyer-Moore search with case-folding | |
BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const | |
{ | |
typedef typename boost::iterator_difference<BidiIter>::type diff_type; | |
diff_type const endpos = std::distance(begin, end); | |
diff_type offset = static_cast<diff_type>(this->length_); | |
for(diff_type curpos = offset; curpos < endpos; curpos += offset) | |
{ | |
std::advance(begin, offset); | |
string_type const *pat_tmp = &this->fold_.back(); | |
BidiIter str_tmp = begin; | |
for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp); | |
--pat_tmp, --str_tmp) | |
{ | |
if(pat_tmp == &this->fold_.front()) | |
{ | |
return str_tmp; | |
} | |
} | |
offset = this->offsets_[tr.hash(*begin)]; | |
} | |
return end; | |
} | |
private: | |
char_type const *begin_; | |
char_type const *last_; | |
std::vector<string_type> fold_; | |
BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const; | |
unsigned char length_; | |
unsigned char offsets_[UCHAR_MAX + 1]; | |
}; | |
}}} // namespace boost::xpressive::detail | |
#if defined(_MSC_VER) && (_MSC_VER >= 1020) | |
# pragma warning(pop) | |
#endif | |
#endif |