blob: 13c47ecd6336c432da04e6304568c174b92e9a4f [file] [log] [blame]
///////////////////////////////////////////////////////////////////////////////
/// \file symbols.hpp
/// Contains the Ternary Search Trie implementation.
/// Based on the following papers:
/// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal
/// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using
/// Conditional Rotations and Randomized Heuristics
//
// Copyright 2007 David Jenkins.
// Copyright 2007 Eric Niebler.
//
// Distributed under the Boost Software License, Version 1.0. (See
// accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
#define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
// MS compatible compilers support #pragma once
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
# pragma once
#endif
#include <boost/noncopyable.hpp>
#include <boost/range/begin.hpp>
#include <boost/range/end.hpp>
#include <boost/range/value_type.hpp>
#include <boost/range/const_iterator.hpp>
#include <boost/shared_ptr.hpp>
namespace boost { namespace xpressive { namespace detail
{
///////////////////////////////////////////////////////////////////////////////
// symbols (using a ternary search trie)
//
template<typename Map>
struct symbols
{
typedef typename range_value<Map>::type::first_type key_type;
typedef typename range_value<Map>::type::second_type value_type;
typedef typename range_value<key_type>::type char_type;
typedef typename range_const_iterator<Map>::type iterator;
typedef typename range_const_iterator<key_type>::type key_iterator;
typedef value_type const *result_type;
// copies of this symbol table share the TST
template<typename Trans>
void load(Map const &map, Trans trans)
{
iterator begin = boost::begin(map);
iterator end = boost::end(map);
node* root_p = this->root.get();
for(; begin != end; ++begin)
{
key_iterator kbegin = boost::begin(begin->first);
key_iterator kend = boost::end(begin->first);
root_p = this->insert(root_p, kbegin, kend, &begin->second, trans);
}
this->root.reset(root_p);
}
template<typename BidiIter, typename Trans>
result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const
{
return this->search(begin, end, trans, this->root.get());
}
template<typename Sink>
void peek(Sink const &sink) const
{
this->peek_(this->root.get(), sink);
}
private:
///////////////////////////////////////////////////////////////////////////////
// struct node : a node in the TST.
// The "eq" field stores the result pointer when ch is zero.
//
struct node
: boost::noncopyable
{
node(char_type c)
: ch(c)
, lo(0)
, eq(0)
, hi(0)
#ifdef BOOST_DISABLE_THREADS
, tau(0)
#endif
{}
~node()
{
delete lo;
if (ch)
delete eq;
delete hi;
}
void swap(node& that)
{
std::swap(ch, that.ch);
std::swap(lo, that.lo);
std::swap(eq, that.eq);
std::swap(hi, that.hi);
#ifdef BOOST_DISABLE_THREADS
std::swap(tau, that.tau);
#endif
}
char_type ch;
node* lo;
union
{
node* eq;
result_type result;
};
node* hi;
#ifdef BOOST_DISABLE_THREADS
long tau;
#endif
};
///////////////////////////////////////////////////////////////////////////////
// insert : insert a string into the TST
//
template<typename Trans>
node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
{
char_type c1 = 0;
if(begin != end)
{
c1 = trans(*begin);
}
if(!p)
{
p = new node(c1);
}
if(c1 < p->ch)
{
p->lo = this->insert(p->lo, begin, end, r, trans);
}
else if(c1 == p->ch)
{
if(0 == c1)
{
p->result = r;
}
else
{
p->eq = this->insert(p->eq, ++begin, end, r, trans);
}
}
else
{
p->hi = this->insert(p->hi, begin, end, r, trans);
}
return p;
}
#ifdef BOOST_DISABLE_THREADS
///////////////////////////////////////////////////////////////////////////////
// conditional rotation : the goal is to minimize the overall
// weighted path length of each binary search tree
//
bool cond_rotation(bool left, node* const i, node* const j) const
{
// don't rotate top node in binary search tree
if (i == j)
return false;
// calculate psi (the rotation condition)
node* const k = (left ? i->hi : i->lo);
long psi = 2*i->tau - j->tau - (k ? k->tau : 0);
if (psi <= 0)
return false;
// recalculate the tau values
j->tau += -i->tau + (k ? k->tau : 0);
i->tau += j->tau - (k ? k->tau : 0);
// fixup links and swap nodes
if (left)
{
j->lo = k;
i->hi = i;
}
else
{
j->hi = k;
i->lo = i;
}
(*i).swap(*j);
return true;
}
#endif
///////////////////////////////////////////////////////////////////////////////
// search : find a string in the TST
//
template<typename BidiIter, typename Trans>
result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const
{
result_type r = 0;
node* p2 = p;
bool left = false;
char_type c1 = (begin != end ? trans(*begin) : 0);
while(p)
{
#ifdef BOOST_DISABLE_THREADS
++p->tau;
#endif
if(c1 == p->ch)
{
// conditional rotation test
#ifdef BOOST_DISABLE_THREADS
if (this->cond_rotation(left, p, p2))
p = p2;
#endif
if (0 == p->ch)
{
// it's a match!
r = p->result;
}
if(begin == end)
break;
++begin;
p = p->eq;
// search for the longest match first
r = search(begin,end,trans,p);
if (0 == r)
{
// search for a match ending here
r = search(end,end,trans,p);
if (0 == r)
{
--begin;
}
}
break;
}
else if(c1 < p->ch)
{
left = true;
p2 = p;
p = p->lo;
}
else // (c1 > p->ch)
{
left = false;
p2 = p;
p = p->hi;
}
}
return r;
}
template<typename Sink>
void peek_(node const *const &p, Sink const &sink) const
{
if(p)
{
sink(p->ch);
this->peek_(p->lo, sink);
this->peek_(p->hi, sink);
}
}
boost::shared_ptr<node> root;
};
}}} // namespace boost::xpressive::detail
#endif